Fixed management command to remove orphan files
All checks were successful
Build containers when image tags change / build-if-image-changed (., web, containers, main container, git.baumann.gr/adebaumann/labhelper) (push) Successful in 2m13s
Build containers when image tags change / build-if-image-changed (data-loader, loader, initContainers, init-container, git.baumann.gr/adebaumann/labhelper-data-loader) (push) Successful in 8s

This commit is contained in:
2026-01-17 14:46:45 +01:00
parent 2705f6c16e
commit 985460ff84
3 changed files with 112 additions and 60 deletions

View File

@@ -18,4 +18,4 @@ data:
LOGIN_REDIRECT_URL: "index"
LOGOUT_REDIRECT_URL: "login"
GUNICORN_OPTS: "--access-logfile -"
IMAGE_TAG: "0.063"
IMAGE_TAG: "0.064"

View File

@@ -27,7 +27,7 @@ spec:
mountPath: /data
containers:
- name: web
image: git.baumann.gr/adebaumann/labhelper:0.063
image: git.baumann.gr/adebaumann/labhelper:0.064
imagePullPolicy: Always
ports:
- containerPort: 8000

View File

@@ -4,144 +4,196 @@ from django.conf import settings
from django.core.management.base import BaseCommand
from django.db.models import F
from sorl.thumbnail.models import KVStore
from boxes.models import Thing
from boxes.models import Thing, ThingFile
class Command(BaseCommand):
help = 'Clean up orphaned images and thumbnails from deleted things'
help = "Clean up orphaned images, files, and thumbnails from deleted things"
def add_arguments(self, parser):
parser.add_argument(
'--dry-run',
action='store_true',
dest='dry_run',
help='Show what would be deleted without actually deleting',
"--dry-run",
action="store_true",
dest="dry_run",
help="Show what would be deleted without actually deleting",
)
def handle(self, *args, **options):
dry_run = options.get('dry_run', False)
dry_run = options.get("dry_run", False)
if dry_run:
self.stdout.write(self.style.WARNING('DRY RUN - No files will be deleted'))
self.stdout.write('Finding orphaned images and thumbnails...')
self.stdout.write(self.style.WARNING("DRY RUN - No files will be deleted"))
self.stdout.write("Finding orphaned images and thumbnails...")
media_root = settings.MEDIA_ROOT
cache_root = os.path.join(media_root, 'cache')
things_root = os.path.join(media_root, 'things')
cache_root = os.path.join(media_root, "cache")
things_root = os.path.join(media_root, "things")
if not os.path.exists(things_root):
self.stdout.write(self.style.WARNING('No things directory found'))
self.stdout.write(self.style.WARNING("No things directory found"))
return
valid_paths = set()
for thing in Thing.objects.exclude(picture__exact='').exclude(picture__isnull=True):
for thing in Thing.objects.exclude(picture__exact="").exclude(
picture__isnull=True
):
if thing.picture:
valid_paths.add(os.path.basename(thing.picture.name))
self.stdout.write(f'Found {len(valid_paths)} valid images in database')
for thing_file in ThingFile.objects.all():
if thing_file.file:
if thing_file.file.name.startswith("things/"):
relative_path = thing_file.file.name[7:]
valid_paths.add(relative_path)
self.stdout.write(
f"Found {len(valid_paths)} valid images and files in database"
)
orphaned_thumbnail_paths = set()
db_cache_paths = set()
for kvstore in KVStore.objects.filter(key__startswith='sorl-thumbnail||image||'):
for kvstore in KVStore.objects.filter(
key__startswith="sorl-thumbnail||image||"
):
try:
data = json.loads(kvstore.value)
name = data.get('name', '')
if name.startswith('things/'):
name = data.get("name", "")
if name.startswith("things/"):
filename = os.path.basename(name)
if filename not in valid_paths:
image_hash = kvstore.key.split('||')[-1]
thumbnail_kvstore = KVStore.objects.filter(key=f'sorl-thumbnail||thumbnails||{image_hash}').first()
image_hash = kvstore.key.split("||")[-1]
thumbnail_kvstore = KVStore.objects.filter(
key=f"sorl-thumbnail||thumbnails||{image_hash}"
).first()
if thumbnail_kvstore:
thumbnail_list = json.loads(thumbnail_kvstore.value)
for thumbnail_hash in thumbnail_list:
thumbnail_image_kvstore = KVStore.objects.filter(key=f'sorl-thumbnail||image||{thumbnail_hash}').first()
thumbnail_image_kvstore = KVStore.objects.filter(
key=f"sorl-thumbnail||image||{thumbnail_hash}"
).first()
if thumbnail_image_kvstore:
thumbnail_data = json.loads(thumbnail_image_kvstore.value)
thumbnail_path = thumbnail_data.get('name', '')
if thumbnail_path.startswith('cache/'):
thumbnail_data = json.loads(
thumbnail_image_kvstore.value
)
thumbnail_path = thumbnail_data.get("name", "")
if thumbnail_path.startswith("cache/"):
orphaned_thumbnail_paths.add(thumbnail_path)
elif name.startswith('cache/'):
elif name.startswith("cache/"):
db_cache_paths.add(name)
except (json.JSONDecodeError, KeyError, AttributeError):
pass
deleted_count = 0
thumbnail_deleted_count = 0
empty_dirs_removed = 0
for root, dirs, files in os.walk(things_root, topdown=False):
for filename in files:
file_path = os.path.join(root, filename)
relative_path = os.path.relpath(file_path, things_root)
if relative_path not in valid_paths:
deleted_count += 1
if dry_run:
self.stdout.write(f'Would delete: {file_path}')
self.stdout.write(f"Would delete: {file_path}")
else:
try:
os.remove(file_path)
self.stdout.write(f'Deleted: {file_path}')
self.stdout.write(f"Deleted: {file_path}")
except OSError as e:
self.stdout.write(self.style.ERROR(f'Failed to delete {file_path}: {e}'))
self.stdout.write(
self.style.ERROR(f"Failed to delete {file_path}: {e}")
)
for dirname in dirs:
dir_path = os.path.join(root, dirname)
if not os.listdir(dir_path):
if dry_run:
self.stdout.write(f'Would remove empty directory: {dir_path}')
self.stdout.write(f"Would remove empty directory: {dir_path}")
else:
try:
os.rmdir(dir_path)
self.stdout.write(f'Removed empty directory: {dir_path}')
self.stdout.write(f"Removed empty directory: {dir_path}")
empty_dirs_removed += 1
except OSError as e:
self.stdout.write(self.style.ERROR(f'Failed to remove {dir_path}: {e}'))
self.stdout.write(
self.style.ERROR(f"Failed to remove {dir_path}: {e}")
)
if os.path.exists(cache_root):
for root, dirs, files in os.walk(cache_root, topdown=False):
for filename in files:
file_path = os.path.join(root, filename)
relative_path = os.path.relpath(file_path, media_root)
if relative_path in orphaned_thumbnail_paths:
thumbnail_deleted_count += 1
if dry_run:
self.stdout.write(f'Would delete thumbnail (orphaned image): {file_path}')
self.stdout.write(
f"Would delete thumbnail (orphaned image): {file_path}"
)
else:
try:
os.remove(file_path)
self.stdout.write(f'Deleted thumbnail (orphaned image): {file_path}')
self.stdout.write(
f"Deleted thumbnail (orphaned image): {file_path}"
)
except OSError as e:
self.stdout.write(self.style.ERROR(f'Failed to delete {file_path}: {e}'))
self.stdout.write(
self.style.ERROR(
f"Failed to delete {file_path}: {e}"
)
)
elif relative_path not in db_cache_paths:
thumbnail_deleted_count += 1
if dry_run:
self.stdout.write(f'Would delete thumbnail (no db entry): {file_path}')
self.stdout.write(
f"Would delete thumbnail (no db entry): {file_path}"
)
else:
try:
os.remove(file_path)
self.stdout.write(f'Deleted thumbnail (no db entry): {file_path}')
self.stdout.write(
f"Deleted thumbnail (no db entry): {file_path}"
)
except OSError as e:
self.stdout.write(self.style.ERROR(f'Failed to delete {file_path}: {e}'))
self.stdout.write(
self.style.ERROR(
f"Failed to delete {file_path}: {e}"
)
)
for dirname in dirs:
dir_path = os.path.join(root, dirname)
if not os.listdir(dir_path):
if dry_run:
self.stdout.write(f'Would remove empty cache directory: {dir_path}')
self.stdout.write(
f"Would remove empty cache directory: {dir_path}"
)
else:
try:
os.rmdir(dir_path)
empty_dirs_removed += 1
except OSError as e:
self.stdout.write(self.style.ERROR(f'Failed to remove {dir_path}: {e}'))
self.stdout.write(
self.style.ERROR(
f"Failed to remove {dir_path}: {e}"
)
)
if dry_run:
self.stdout.write(self.style.WARNING(f'\nDry run complete. Would delete {deleted_count} images and {thumbnail_deleted_count} thumbnails'))
self.stdout.write(f'Would remove {empty_dirs_removed} empty directories')
self.stdout.write(
self.style.WARNING(
f"\nDry run complete. Would delete {deleted_count} files and {thumbnail_deleted_count} thumbnails"
)
)
self.stdout.write(f"Would remove {empty_dirs_removed} empty directories")
else:
self.stdout.write(self.style.SUCCESS(f'\nCleanup complete! Deleted {deleted_count} images and {thumbnail_deleted_count} thumbnails'))
self.stdout.write(f'Removed {empty_dirs_removed} empty directories')
self.stdout.write(
self.style.SUCCESS(
f"\nCleanup complete! Deleted {deleted_count} files and {thumbnail_deleted_count} thumbnails"
)
)
self.stdout.write(f"Removed {empty_dirs_removed} empty directories")