[importers] Add flag to reimport already processed files
All checks were successful
build & deploy / test (push) Successful in 2m1s
build & deploy / build-and-deploy (push) Successful in 29s

This commit is contained in:
2026-05-28 17:19:08 -04:00
parent bea2b2d187
commit 62d8ffd794
3 changed files with 98 additions and 5 deletions

View File

@ -93,7 +93,7 @@ fetching and simple saving.
:LOGBOOK:
CLOCK: [2025-07-09 Wed 09:55]--[2025-07-09 Wed 10:15] => 0:20
:END:
* Backlog [26/42] :vrobbler:project:personal:
* Backlog [28/44] :vrobbler:project:personal:
** TODO [#C] Add sentiment parsing for Scrobbles with notes :vrobbler:project:scrobbles:sentiment:
:PROPERTIES:
:ID: 37781d6a-f3b0-48b2-bf98-33c2c791cf85
@ -496,7 +496,45 @@ or at least make it optional, and then require a M2M between Track and Artist.
Then this one would be a Track by both `Matt Sweeney` and `Bonnie "Prince"
Billy`
** TODO [#A] Allow special parameter to re-import already processed GPX files :imports:gpx:
** TODO [#A] Move imported eBird CSV files to processed/ directory on WebDAV :webdav:ebird:importers:
:PROPERTIES:
:ID: 445e1253-d353-4b55-b1d8-39d0a0dcdd34
:END:
- File: ~vrobbler/apps/scrobbles/importers/webdav.py~ (line 439)
- Same pattern as the GPX importer: after importing a =.csv= file from
WebDAV, move it to =var/ebird/processed/= with a timestamp appended.
** TODO [#A] Move imported Scale CSV files to processed/ directory on WebDAV :webdav:scale:importers:
:PROPERTIES:
:ID: 1a0de363-d1ea-466e-9966-e24941a6180b
:END:
- File: ~vrobbler/apps/scrobbles/importers/webdav.py~ (line 496)
- Same pattern as the GPX importer: after importing a =.csv= file from
WebDAV, move it to =var/scale/processed/= with a timestamp appended.
** DONE [#A] Allow special parameter to re-import already processed GPX files :imports:gpx:
:PROPERTIES:
:ID: 166c0809-c11a-4d02-8071-7a69dcb36e64
:END:
*** Description
Now that we stash imported GPX files in the processed/ subdirectory on import,
it would be nice to have a flag like --include-processed on the webdav importer
that would import files both in the root of gpx/ and also in the processed
directory. This would aide testing imports in staging quickly without constantly
moving files back and forth.
** DONE [#A] Move imported GPX files to processed/ directory on WebDAV :webdav:gpx:importers:
:PROPERTIES:
:ID: db2b02fc-817c-4c39-bd51-13f9b77c7888
:END:
- File: ~vrobbler/apps/scrobbles/importers/webdav.py~ (line 198)
- After importing a GPX/FIT file from WebDAV, move it to a =processed/=
subdirectory with a timestamp appended. This eliminates the DB lookup for
already-imported filenames — any file present in the top-level directory
is new. Also makes manual re-imports easy (just move a file back).
** DONE [#A] Add CSS Grid calendar view for scrobbles :vrobbler:personal:project:templates:feature:
:PROPERTIES:
:ID: be915acf-d803-466a-8770-823819ebf2a9

View File

@ -22,7 +22,10 @@ DEFAULT_SCALE_PATH = "var/scale/"
def import_from_webdav_for_all_users(
restart=False, update_retroarch_hash=False, update_koreader_etag=False
restart=False,
update_retroarch_hash=False,
update_koreader_etag=False,
include_processed=False,
):
"""Iterate all WebDAV-enabled users, scanning each media-type directory."""
webdav_enabled_user_ids = UserProfile.objects.filter(
@ -31,6 +34,12 @@ def import_from_webdav_for_all_users(
webdav_pass__isnull=False,
webdav_auto_import=True,
).values_list("user_id", flat=True)
if include_processed:
logger.warning(
"Re-importing previously-processed files from processed/ subdirectories "
"— this may create duplicate scrobbles."
)
logger.info(f"Start import of {webdav_enabled_user_ids.count()} webdav accounts")
ko_count = 0
@ -49,7 +58,9 @@ def import_from_webdav_for_all_users(
client, user_id, restart, update_etag_only=update_koreader_etag
)
logger.info("Scanning WebDAV gpx for user %s", user_id)
gpx_count += scan_webdav_for_gpx(client, user_id)
gpx_count += scan_webdav_for_gpx(
client, user_id, include_processed=include_processed
)
logger.info("Scanning WebDAV retroarch for user %s", user_id)
retro_count += scan_webdav_for_retroarch(
client, user_id, update_hash_only=update_retroarch_hash
@ -200,7 +211,7 @@ def scan_webdav_for_koreader(
return 1
def scan_webdav_for_gpx(webdav_client, user_id):
def scan_webdav_for_gpx(webdav_client, user_id, include_processed=False):
gpx_path = DEFAULT_GPX_PATH # TODO allow this to be configured in user settings
try:
webdav_client.info(DEFAULT_GPX_PATH)
@ -259,6 +270,41 @@ def scan_webdav_for_gpx(webdav_client, user_id):
finally:
os.unlink(tmp.name)
if include_processed:
try:
processed_files = webdav_client.list(processed_dir)
except Exception as e:
logger.warning(
"Could not list var/gpx/processed/",
extra={"user_id": user_id, "error": str(e)},
)
return new_imports
for fname in processed_files:
fname = os.path.basename(fname)
if not fname.lower().endswith(gpx_extensions):
continue
remote_path = f"{processed_dir}{fname}"
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=fname)
try:
webdav_client.download_sync(
remote_path=remote_path, local_path=tmp.name
)
imp = TrailGPXImport.objects.create(
user_id=user_id,
original_filename=fname,
)
with open(tmp.name, "rb") as f:
imp.gpx_file.save(fname, f, save=True)
process_trail_gpx_import.delay(imp.id)
new_imports += 1
except Exception as e:
logger.error(f"Failed to import processed GPX file {fname}: {e}")
finally:
os.unlink(tmp.name)
return new_imports

View File

@ -1,4 +1,5 @@
from django.core.management.base import BaseCommand
from vrobbler.apps.scrobbles.importers import webdav
@ -21,6 +22,12 @@ class Command(BaseCommand):
help="Store current WebDAV ETag on last KoReader import "
"without re-importing (migration helper)",
)
parser.add_argument(
"--include-processed",
action="store_true",
help="Also import files already moved to processed/ subdirectories "
"(may produce duplicate scrobbles; use with care on production)",
)
def handle(self, *args, **options):
restart = False
@ -28,8 +35,10 @@ class Command(BaseCommand):
restart = True
update_hash = options.get("update_retroarch_hash", False)
update_etag = options.get("update_koreader_etag", False)
include_processed = options.get("include_processed", False)
webdav.import_from_webdav_for_all_users(
restart=restart,
update_retroarch_hash=update_hash,
update_koreader_etag=update_etag,
include_processed=include_processed,
)