From 1304a27408011a2bb5c9c1f096350db01cbc19cf Mon Sep 17 00:00:00 2001 From: Colin Powell Date: Sun, 17 Nov 2024 20:48:24 -0500 Subject: [PATCH] [books] Add webdav koreader importer --- vrobbler/apps/beers/models.py | 5 +- vrobbler/apps/books/koreader.py | 35 ++++++-- .../management/commands/import_from_webdav.py | 18 ++++ vrobbler/apps/scrobbles/models.py | 15 ++++ vrobbler/apps/scrobbles/utils.py | 86 +++++++++++++++++++ vrobbler/apps/scrobbles/views.py | 2 +- vrobbler/apps/webdav/client.py | 26 ++++-- 7 files changed, 169 insertions(+), 18 deletions(-) create mode 100644 vrobbler/apps/scrobbles/management/commands/import_from_webdav.py diff --git a/vrobbler/apps/beers/models.py b/vrobbler/apps/beers/models.py index 12c46c7..feb2d89 100644 --- a/vrobbler/apps/beers/models.py +++ b/vrobbler/apps/beers/models.py @@ -1,5 +1,6 @@ from uuid import uuid4 +from beers.untappd import get_beer_from_untappd_id, get_rating_from_soup from django.apps import apps from django.db import models from django.urls import reverse @@ -8,10 +9,6 @@ from imagekit.models import ImageSpecField from imagekit.processors import ResizeToFit from scrobbles.dataclasses import BeerLogData from scrobbles.mixins import ScrobblableConstants, ScrobblableMixin -from vrobbler.apps.beers.untappd import ( - get_beer_from_untappd_id, - get_rating_from_soup, -) BNULL = {"blank": True, "null": True} diff --git a/vrobbler/apps/books/koreader.py b/vrobbler/apps/books/koreader.py index faa5b8a..19ceff8 100644 --- a/vrobbler/apps/books/koreader.py +++ b/vrobbler/apps/books/koreader.py @@ -1,4 +1,3 @@ -from collections import OrderedDict import logging import re import sqlite3 @@ -7,12 +6,11 @@ from enum import Enum import pytz import requests -from books.models import Author, Book -from books.openlibrary import get_author_openlibrary_id +from books.constants import BOOKS_TITLES_TO_IGNORE from django.apps import apps from django.contrib.auth import get_user_model from stream_sqlite import stream_sqlite -from vrobbler.apps.books.constants import BOOKS_TITLES_TO_IGNORE +from webdav.client import get_webdav_client logger = logging.getLogger(__name__) User = get_user_model() @@ -63,6 +61,8 @@ def lookup_or_create_authors_from_author_str(ko_author_str: str) -> list: """Takes a string of authors from KoReader and returns a list of Authors from our database """ + from books.models import Author + author_str_list = ko_author_str.split(", ") author_list = [] for author_str in author_str_list: @@ -83,6 +83,8 @@ def lookup_or_create_authors_from_author_str(ko_author_str: str) -> list: def create_book_from_row(row: list): + from books.models import Book + # No KoReader book yet, create it author_str = get_author_str_from_row(row).replace("\x00", "") total_pages = row[KoReaderBookColumn.PAGES.value] @@ -131,6 +133,8 @@ def build_book_map(rows) -> dict: primary key IDs for page creation. """ + from books.models import Book + book_id_map = {} for book_row in rows: @@ -148,7 +152,12 @@ def build_book_map(rows) -> dict: ).first() if not book: - title = book_row[KoReaderBookColumn.TITLE.value].split(" - ")[0].lower().replace("\x00", "") + title = ( + book_row[KoReaderBookColumn.TITLE.value] + .split(" - ")[0] + .lower() + .replace("\x00", "") + ) book = Book.objects.filter(title=title).first() if not book: @@ -438,3 +447,19 @@ def process_koreader_sqlite_file(file_path, user_id) -> list: extra={"created_scrobbles": created}, ) return created + + +def fetch_file_from_webdav(user_id: int) -> str: + file_path = f"/tmp/{user_id}-koreader-import.sqlite3" + client = get_webdav_client(user_id) + + if not client: + logger.warning("could not get webdav client for user") + # TODO maybe we raise an exception here? + return "" + + client.download_sync( + remote_path="var/koreader/statistics.sqlite3", + local_path=file_path, + ) + return file_path diff --git a/vrobbler/apps/scrobbles/management/commands/import_from_webdav.py b/vrobbler/apps/scrobbles/management/commands/import_from_webdav.py new file mode 100644 index 0000000..707d056 --- /dev/null +++ b/vrobbler/apps/scrobbles/management/commands/import_from_webdav.py @@ -0,0 +1,18 @@ +from django.core.management.base import BaseCommand +from vrobbler.apps.scrobbles.utils import import_from_webdav_for_all_users + + +class Command(BaseCommand): + def add_arguments(self, parser): + parser.add_argument( + "--restart", + action="store_true", + help="Restart failed imports", + ) + + def handle(self, *args, **options): + restart = False + if options["restart"]: + restart = True + count = import_from_webdav_for_all_users(restart=restart) + print(f"Started {count} WeDAV imports") diff --git a/vrobbler/apps/scrobbles/models.py b/vrobbler/apps/scrobbles/models.py index 7659555..8579ee2 100644 --- a/vrobbler/apps/scrobbles/models.py +++ b/vrobbler/apps/scrobbles/models.py @@ -15,6 +15,7 @@ from books.models import Book from bricksets.models import BrickSet from django.conf import settings from django.contrib.auth import get_user_model +from django.core.files import File from django.db import models from django.urls import reverse from django.utils import timezone @@ -49,6 +50,7 @@ from videos.models import Series, Video from webpages.models import WebPage from vrobbler.apps.scrobbles.constants import MEDIA_END_PADDING_SECONDS +from vrobbler.apps.scrobbles.utils import get_file_md5_hash logger = logging.getLogger(__name__) User = get_user_model() @@ -186,6 +188,19 @@ class KoReaderImport(BaseFileImportMixin): sqlite_file = models.FileField(upload_to=get_path, **BNULL) + def save_sqlite_file_to_self(self, file_path): + with open(file_path, "rb") as f: + self.sqlite_file.save( + f"{self.user_id}-koreader-statistics.sqlite", + File(f), + save=True, + ) + + def file_md5_hash(self) -> str: + if self.sqlite_file: + return get_file_md5_hash(self.sqlite_file.path) + return "" + def process(self, force=False): if self.processed_finished and not force: diff --git a/vrobbler/apps/scrobbles/utils.py b/vrobbler/apps/scrobbles/utils.py index 6c1cbe5..8e61d9d 100644 --- a/vrobbler/apps/scrobbles/utils.py +++ b/vrobbler/apps/scrobbles/utils.py @@ -1,8 +1,10 @@ +import hashlib import logging import re from datetime import datetime, timedelta, tzinfo import pytz +from books.koreader import fetch_file_from_webdav from django.apps import apps from django.contrib.auth import get_user_model from django.db import models @@ -11,6 +13,8 @@ from profiles.models import UserProfile from profiles.utils import now_user_timezone from scrobbles.constants import LONG_PLAY_MEDIA from scrobbles.tasks import process_lastfm_import, process_retroarch_import +from vrobbler.apps.scrobbles.tasks import process_koreader_import +from webdav.client import get_webdav_client logger = logging.getLogger(__name__) User = get_user_model() @@ -178,5 +182,87 @@ def delete_zombie_scrobbles(dry_run=True): return zombies_found +def import_from_webdav_for_all_users(restart=False): + """Grab a list of all users with WebDAV enabled and kickoff imports for them""" + from scrobbles.models import KoReaderImport + + # LastFmImport = apps.get_model("scrobbles", "LastFMImport") + webdav_enabled_user_ids = UserProfile.objects.filter( + webdav_url__isnull=False, + webdav_user__isnull=False, + webdav_pass__isnull=False, + webdav_auto_import=True, + ).values_list("user_id", flat=True) + logger.info( + f"start import of {webdav_enabled_user_ids.count()} webdav accounts" + ) + + koreader_import_count = 0 + + for user_id in webdav_enabled_user_ids: + webdav_client = get_webdav_client(user_id) + + try: + webdav_client.info("var/koreader/statistics.sqlite3") + koreader_found = True + except: + koreader_found = False + logger.info( + "no koreader stats file found on webdav", + extra={"user_id": user_id}, + ) + + if koreader_found: + last_import = ( + KoReaderImport.objects.filter( + user_id=user_id, processed_finished__isnull=False + ) + .order_by("processed_finished") + .last() + ) + + koreader_file_path = fetch_file_from_webdav(1) + new_hash = get_file_md5_hash(koreader_file_path) + old_hash = None + if last_import: + old_hash = last_import.file_md5_hash() + + if old_hash and new_hash == old_hash: + logger.info( + "koreader stats file has not changed", + extra={ + "user_id": user_id, + "new_hash": new_hash, + "old_hash": old_hash, + "last_import_id": last_import.id, + }, + ) + continue + + koreader_import, created = KoReaderImport.objects.get_or_create( + user_id=user_id, processed_finished__isnull=True + ) + + if not created and not restart: + logger.info( + f"Not resuming failed KoReader import {koreader_import.id} for user {user_id}, use restart=True to restart" + ) + continue + + koreader_import.save_sqlite_file_to_self(koreader_file_path) + + process_koreader_import.delay(koreader_import.id) + koreader_import_count += 1 + return koreader_import_count + + def media_class_to_foreign_key(media_class: str) -> str: return re.sub(r"(? str: + with open(file_path, "rb") as f: + file_hash = hashlib.md5() + while chunk := f.read(8192): + file_hash.update(chunk) + return file_hash.hexdigest() diff --git a/vrobbler/apps/scrobbles/views.py b/vrobbler/apps/scrobbles/views.py index fbaeb74..0371fc9 100644 --- a/vrobbler/apps/scrobbles/views.py +++ b/vrobbler/apps/scrobbles/views.py @@ -25,7 +25,7 @@ from rest_framework.decorators import ( permission_classes, ) from rest_framework.parsers import MultiPartParser -from rest_framework.permissions import IsAuthenticated +from rest_framework.permissions import IsAuthenticated, AllowAny from rest_framework.response import Response from scrobbles.api import serializers from scrobbles.constants import ( diff --git a/vrobbler/apps/webdav/client.py b/vrobbler/apps/webdav/client.py index f83c7d8..744370d 100644 --- a/vrobbler/apps/webdav/client.py +++ b/vrobbler/apps/webdav/client.py @@ -1,23 +1,33 @@ -from webdav3.client import Client +import logging +from typing import Optional from profiles.models import UserProfile +from webdav3.client import Client -def get_webdav_client(user_id): - client = None +logger = logging.getLogger("__name__") + + +def get_webdav_client(user_id) -> Optional[Client]: profile = UserProfile.objects.filter(user_id=user_id).first() if not profile: - logger.info("[get_webdav_client] no profile for user", extra={"user_id": user_id}) + logger.info( + "[get_webdav_client] no profile for user", + extra={"user_id": user_id}, + ) return if not profile.webdav_user: - logger.info("[get_webdav_client] no webdave user for profile", extra={"user_id": user_id}) + logger.info( + "[get_webdav_client] no webdave user for profile", + extra={"user_id": user_id}, + ) return return Client( { - 'webdav_hostname': profile.webdav_url, - 'webdav_login': profile.webdav_user, - 'webdav_password': profile.webdav_pass, + "webdav_hostname": profile.webdav_url, + "webdav_login": profile.webdav_user, + "webdav_password": profile.webdav_pass, } )