[importers] Add flag to reimport already processed files
This commit is contained in:
42
PROJECT.org
42
PROJECT.org
@ -93,7 +93,7 @@ fetching and simple saving.
|
||||
:LOGBOOK:
|
||||
CLOCK: [2025-07-09 Wed 09:55]--[2025-07-09 Wed 10:15] => 0:20
|
||||
:END:
|
||||
* Backlog [26/42] :vrobbler:project:personal:
|
||||
* Backlog [28/44] :vrobbler:project:personal:
|
||||
** TODO [#C] Add sentiment parsing for Scrobbles with notes :vrobbler:project:scrobbles:sentiment:
|
||||
:PROPERTIES:
|
||||
:ID: 37781d6a-f3b0-48b2-bf98-33c2c791cf85
|
||||
@ -496,7 +496,45 @@ or at least make it optional, and then require a M2M between Track and Artist.
|
||||
Then this one would be a Track by both `Matt Sweeney` and `Bonnie "Prince"
|
||||
Billy`
|
||||
|
||||
** TODO [#A] Allow special parameter to re-import already processed GPX files :imports:gpx:
|
||||
** TODO [#A] Move imported eBird CSV files to processed/ directory on WebDAV :webdav:ebird:importers:
|
||||
:PROPERTIES:
|
||||
:ID: 445e1253-d353-4b55-b1d8-39d0a0dcdd34
|
||||
:END:
|
||||
- File: ~vrobbler/apps/scrobbles/importers/webdav.py~ (line 439)
|
||||
- Same pattern as the GPX importer: after importing a =.csv= file from
|
||||
WebDAV, move it to =var/ebird/processed/= with a timestamp appended.
|
||||
|
||||
** TODO [#A] Move imported Scale CSV files to processed/ directory on WebDAV :webdav:scale:importers:
|
||||
:PROPERTIES:
|
||||
:ID: 1a0de363-d1ea-466e-9966-e24941a6180b
|
||||
:END:
|
||||
- File: ~vrobbler/apps/scrobbles/importers/webdav.py~ (line 496)
|
||||
- Same pattern as the GPX importer: after importing a =.csv= file from
|
||||
WebDAV, move it to =var/scale/processed/= with a timestamp appended.
|
||||
|
||||
** DONE [#A] Allow special parameter to re-import already processed GPX files :imports:gpx:
|
||||
:PROPERTIES:
|
||||
:ID: 166c0809-c11a-4d02-8071-7a69dcb36e64
|
||||
:END:
|
||||
|
||||
*** Description
|
||||
|
||||
Now that we stash imported GPX files in the processed/ subdirectory on import,
|
||||
it would be nice to have a flag like --include-processed on the webdav importer
|
||||
that would import files both in the root of gpx/ and also in the processed
|
||||
directory. This would aide testing imports in staging quickly without constantly
|
||||
moving files back and forth.
|
||||
|
||||
** DONE [#A] Move imported GPX files to processed/ directory on WebDAV :webdav:gpx:importers:
|
||||
:PROPERTIES:
|
||||
:ID: db2b02fc-817c-4c39-bd51-13f9b77c7888
|
||||
:END:
|
||||
- File: ~vrobbler/apps/scrobbles/importers/webdav.py~ (line 198)
|
||||
- After importing a GPX/FIT file from WebDAV, move it to a =processed/=
|
||||
subdirectory with a timestamp appended. This eliminates the DB lookup for
|
||||
already-imported filenames — any file present in the top-level directory
|
||||
is new. Also makes manual re-imports easy (just move a file back).
|
||||
|
||||
** DONE [#A] Add CSS Grid calendar view for scrobbles :vrobbler:personal:project:templates:feature:
|
||||
:PROPERTIES:
|
||||
:ID: be915acf-d803-466a-8770-823819ebf2a9
|
||||
|
||||
@ -22,7 +22,10 @@ DEFAULT_SCALE_PATH = "var/scale/"
|
||||
|
||||
|
||||
def import_from_webdav_for_all_users(
|
||||
restart=False, update_retroarch_hash=False, update_koreader_etag=False
|
||||
restart=False,
|
||||
update_retroarch_hash=False,
|
||||
update_koreader_etag=False,
|
||||
include_processed=False,
|
||||
):
|
||||
"""Iterate all WebDAV-enabled users, scanning each media-type directory."""
|
||||
webdav_enabled_user_ids = UserProfile.objects.filter(
|
||||
@ -31,6 +34,12 @@ def import_from_webdav_for_all_users(
|
||||
webdav_pass__isnull=False,
|
||||
webdav_auto_import=True,
|
||||
).values_list("user_id", flat=True)
|
||||
if include_processed:
|
||||
logger.warning(
|
||||
"Re-importing previously-processed files from processed/ subdirectories "
|
||||
"— this may create duplicate scrobbles."
|
||||
)
|
||||
|
||||
logger.info(f"Start import of {webdav_enabled_user_ids.count()} webdav accounts")
|
||||
|
||||
ko_count = 0
|
||||
@ -49,7 +58,9 @@ def import_from_webdav_for_all_users(
|
||||
client, user_id, restart, update_etag_only=update_koreader_etag
|
||||
)
|
||||
logger.info("Scanning WebDAV gpx for user %s", user_id)
|
||||
gpx_count += scan_webdav_for_gpx(client, user_id)
|
||||
gpx_count += scan_webdav_for_gpx(
|
||||
client, user_id, include_processed=include_processed
|
||||
)
|
||||
logger.info("Scanning WebDAV retroarch for user %s", user_id)
|
||||
retro_count += scan_webdav_for_retroarch(
|
||||
client, user_id, update_hash_only=update_retroarch_hash
|
||||
@ -200,7 +211,7 @@ def scan_webdav_for_koreader(
|
||||
return 1
|
||||
|
||||
|
||||
def scan_webdav_for_gpx(webdav_client, user_id):
|
||||
def scan_webdav_for_gpx(webdav_client, user_id, include_processed=False):
|
||||
gpx_path = DEFAULT_GPX_PATH # TODO allow this to be configured in user settings
|
||||
try:
|
||||
webdav_client.info(DEFAULT_GPX_PATH)
|
||||
@ -259,6 +270,41 @@ def scan_webdav_for_gpx(webdav_client, user_id):
|
||||
finally:
|
||||
os.unlink(tmp.name)
|
||||
|
||||
if include_processed:
|
||||
try:
|
||||
processed_files = webdav_client.list(processed_dir)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Could not list var/gpx/processed/",
|
||||
extra={"user_id": user_id, "error": str(e)},
|
||||
)
|
||||
return new_imports
|
||||
|
||||
for fname in processed_files:
|
||||
fname = os.path.basename(fname)
|
||||
if not fname.lower().endswith(gpx_extensions):
|
||||
continue
|
||||
|
||||
remote_path = f"{processed_dir}{fname}"
|
||||
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=fname)
|
||||
try:
|
||||
webdav_client.download_sync(
|
||||
remote_path=remote_path, local_path=tmp.name
|
||||
)
|
||||
imp = TrailGPXImport.objects.create(
|
||||
user_id=user_id,
|
||||
original_filename=fname,
|
||||
)
|
||||
with open(tmp.name, "rb") as f:
|
||||
imp.gpx_file.save(fname, f, save=True)
|
||||
|
||||
process_trail_gpx_import.delay(imp.id)
|
||||
new_imports += 1
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to import processed GPX file {fname}: {e}")
|
||||
finally:
|
||||
os.unlink(tmp.name)
|
||||
|
||||
return new_imports
|
||||
|
||||
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from vrobbler.apps.scrobbles.importers import webdav
|
||||
|
||||
|
||||
@ -21,6 +22,12 @@ class Command(BaseCommand):
|
||||
help="Store current WebDAV ETag on last KoReader import "
|
||||
"without re-importing (migration helper)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--include-processed",
|
||||
action="store_true",
|
||||
help="Also import files already moved to processed/ subdirectories "
|
||||
"(may produce duplicate scrobbles; use with care on production)",
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
restart = False
|
||||
@ -28,8 +35,10 @@ class Command(BaseCommand):
|
||||
restart = True
|
||||
update_hash = options.get("update_retroarch_hash", False)
|
||||
update_etag = options.get("update_koreader_etag", False)
|
||||
include_processed = options.get("include_processed", False)
|
||||
webdav.import_from_webdav_for_all_users(
|
||||
restart=restart,
|
||||
update_retroarch_hash=update_hash,
|
||||
update_koreader_etag=update_etag,
|
||||
include_processed=include_processed,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user