[videogames] Fix how we scan for new RA playlogs

2026-05-26 09:01:27 -04:00
parent f4b30ade70
commit 2fac5815b1
2 changed files with 68 additions and 39 deletions
--- a/vrobbler/apps/scrobbles/importers/webdav.py
+++ b/vrobbler/apps/scrobbles/importers/webdav.py
@ -20,7 +20,7 @@ DEFAULT_EBIRD_PATH = "var/ebird/"
 DEFAULT_SCALE_PATH = "var/scale/"


-def import_from_webdav_for_all_users(restart=False):
+def import_from_webdav_for_all_users(restart=False, update_retroarch_hash=False):
    """Iterate all WebDAV-enabled users, scanning each media-type directory."""
    webdav_enabled_user_ids = UserProfile.objects.filter(
        webdav_url__isnull=False,
@ -46,7 +46,9 @@ def import_from_webdav_for_all_users(restart=False):
        logger.info("Scanning WebDAV gpx for user %s", user_id)
        gpx_count += scan_webdav_for_gpx(client, user_id)
        logger.info("Scanning WebDAV retroarch for user %s", user_id)
-        retro_count += scan_webdav_for_retroarch(client, user_id)
+        retro_count += scan_webdav_for_retroarch(
+            client, user_id, update_hash_only=update_retroarch_hash
+        )
        logger.info("Scanning WebDAV bgstats for user %s", user_id)
        bgstats_count += scan_webdav_for_bgstats(client, user_id)
        logger.info("Scanning WebDAV ebird for user %s", user_id)
@ -182,8 +184,18 @@ def scan_webdav_for_gpx(webdav_client, user_id):
    return new_imports


-def scan_webdav_for_retroarch(webdav_client, user_id):
-    """Download all .lrtl files from WebDAV, zip them, queue one import."""
+def scan_webdav_for_retroarch(webdav_client, user_id, update_hash_only=False):
+    """Check for new .lrtl files on WebDAV, download+import if changed.
+
+    Uses ETags from a single PROPFIND to detect changes without downloading
+    any file content, making the common no-change case very fast.
+
+    When *update_hash_only* is True, mismatched hashes update the last import's
+    stored hash without re-downloading — useful when migrating to a new hash
+    scheme to avoid a one-time re-import.
+    """
+    import hashlib
+    import shutil
    import zipfile
    from datetime import datetime

@ -198,7 +210,7 @@ def scan_webdav_for_retroarch(webdav_client, user_id):
        return 0

    try:
-        files = webdav_client.list(retroarch_path)
+        files = webdav_client.list(retroarch_path, get_info=True)
    except Exception as e:
        logger.warning(
            "Could not list var/retroarch/",
@ -206,10 +218,11 @@ def scan_webdav_for_retroarch(webdav_client, user_id):
        )
        return 0

-    lrtl_basenames = sorted(
-        os.path.basename(fname) for fname in files if fname.lower().endswith(".lrtl")
+    lrtl_files = sorted(
+        [f for f in files if f.get("name", "").lower().endswith(".lrtl")],
+        key=lambda x: x.get("name", ""),
    )
-    if not lrtl_basenames:
+    if not lrtl_files:
        logger.info("No .lrtl files found on webdav", extra={"user_id": user_id})
        return 0

@ -223,14 +236,48 @@ def scan_webdav_for_retroarch(webdav_client, user_id):
        )
        return 0

+    # Compute hash from filenames + ETags (no downloads needed)
+    hasher = hashlib.md5()
+    for f in lrtl_files:
+        hasher.update(f.get("name", "").encode())
+        hasher.update((f.get("etag") or f.get("modified") or "").encode())
+    content_hash = hasher.hexdigest()
+
+    # Skip if the last completed import already has this hash
+    last_import = (
+        RetroarchImport.objects.filter(
+            user_id=user_id, processed_finished__isnull=False
+        )
+        .order_by("-processed_finished")
+        .first()
+    )
+    if last_import and last_import.files_hash == content_hash:
+        logger.info(
+            "Retroarch lrtl files unchanged for user, skipping",
+            extra={"user_id": user_id},
+        )
+        return 0
+
+    if update_hash_only and last_import:
+        logger.info(
+            "Updating retroarch files_hash (%s) without re-import",
+            content_hash[:12],
+            extra={"user_id": user_id},
+        )
+        last_import.files_hash = content_hash
+        last_import.save(update_fields=["files_hash"])
+        return 0
+
+    # Something changed — download everything
    download_dir = tempfile.mkdtemp()
    try:
        downloaded = []
-        for basename in lrtl_basenames:
+        for f in lrtl_files:
+            basename = f.get("name")
            dst = os.path.join(download_dir, basename)
            try:
                webdav_client.download_sync(
-                    remote_path=f"{retroarch_path}/{basename}",
+                    remote_path=f.get("path"),
                    local_path=dst,
                )
                downloaded.append(basename)
@ -240,32 +287,6 @@ def scan_webdav_for_retroarch(webdav_client, user_id):
        if not downloaded:
            return 0

-        # Compute content hash of all downloaded files
-        import hashlib
-
-        hasher = hashlib.md5()
-        for basename in downloaded:
-            filepath = os.path.join(download_dir, basename)
-            hasher.update(basename.encode())
-            with open(filepath, "rb") as f:
-                hasher.update(f.read())
-        content_hash = hasher.hexdigest()
-
-        # Skip if the last completed import already has this hash
-        last_import = (
-            RetroarchImport.objects.filter(
-                user_id=user_id, processed_finished__isnull=False
-            )
-            .order_by("-processed_finished")
-            .first()
-        )
-        if last_import and last_import.files_hash == content_hash:
-            logger.info(
-                "Retroarch lrtl files unchanged for user, skipping",
-                extra={"user_id": user_id},
-            )
-            return 0
-
        zip_path = os.path.join(
            download_dir,
            f"retroarch-batch-{datetime.now().strftime('%Y%m%d%H%M%S')}.zip",
@ -290,8 +311,6 @@ def scan_webdav_for_retroarch(webdav_client, user_id):
        )
        return 1
    finally:
-        import shutil
-
        shutil.rmtree(download_dir, ignore_errors=True)


--- a/vrobbler/apps/scrobbles/management/commands/import_from_webdav.py
+++ b/vrobbler/apps/scrobbles/management/commands/import_from_webdav.py
@ -9,13 +9,23 @@ class Command(BaseCommand):
            action="store_true",
            help="Restart failed imports",
        )
+        parser.add_argument(
+            "--update-retroarch-hash",
+            action="store_true",
+            help="Update retroarch files_hash to new ETag-based scheme "
+            "without re-importing (migration helper)",
+        )

    def handle(self, *args, **options):
        restart = False
        if options["restart"]:
            restart = True
+        update_hash = options.get("update_retroarch_hash", False)
        ko_count, gpx_count, retro_count, bgstats_count, ebird_count, scale_count = (
-            webdav.import_from_webdav_for_all_users(restart=restart)
+            webdav.import_from_webdav_for_all_users(
+                restart=restart,
+                update_retroarch_hash=update_hash,
+            )
        )
        print(
            f"Started {ko_count} KOReader, {gpx_count} Trail GPX, "