[books] Add webdav koreader importer

This commit is contained in:
2024-11-17 20:48:24 -05:00
parent 2327b1f622
commit 1304a27408
7 changed files with 169 additions and 18 deletions

View File

@ -1,5 +1,6 @@
from uuid import uuid4 from uuid import uuid4
from beers.untappd import get_beer_from_untappd_id, get_rating_from_soup
from django.apps import apps from django.apps import apps
from django.db import models from django.db import models
from django.urls import reverse from django.urls import reverse
@ -8,10 +9,6 @@ from imagekit.models import ImageSpecField
from imagekit.processors import ResizeToFit from imagekit.processors import ResizeToFit
from scrobbles.dataclasses import BeerLogData from scrobbles.dataclasses import BeerLogData
from scrobbles.mixins import ScrobblableConstants, ScrobblableMixin from scrobbles.mixins import ScrobblableConstants, ScrobblableMixin
from vrobbler.apps.beers.untappd import (
get_beer_from_untappd_id,
get_rating_from_soup,
)
BNULL = {"blank": True, "null": True} BNULL = {"blank": True, "null": True}

View File

@ -1,4 +1,3 @@
from collections import OrderedDict
import logging import logging
import re import re
import sqlite3 import sqlite3
@ -7,12 +6,11 @@ from enum import Enum
import pytz import pytz
import requests import requests
from books.models import Author, Book from books.constants import BOOKS_TITLES_TO_IGNORE
from books.openlibrary import get_author_openlibrary_id
from django.apps import apps from django.apps import apps
from django.contrib.auth import get_user_model from django.contrib.auth import get_user_model
from stream_sqlite import stream_sqlite from stream_sqlite import stream_sqlite
from vrobbler.apps.books.constants import BOOKS_TITLES_TO_IGNORE from webdav.client import get_webdav_client
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
User = get_user_model() User = get_user_model()
@ -63,6 +61,8 @@ def lookup_or_create_authors_from_author_str(ko_author_str: str) -> list:
"""Takes a string of authors from KoReader and returns a list """Takes a string of authors from KoReader and returns a list
of Authors from our database of Authors from our database
""" """
from books.models import Author
author_str_list = ko_author_str.split(", ") author_str_list = ko_author_str.split(", ")
author_list = [] author_list = []
for author_str in author_str_list: for author_str in author_str_list:
@ -83,6 +83,8 @@ def lookup_or_create_authors_from_author_str(ko_author_str: str) -> list:
def create_book_from_row(row: list): def create_book_from_row(row: list):
from books.models import Book
# No KoReader book yet, create it # No KoReader book yet, create it
author_str = get_author_str_from_row(row).replace("\x00", "") author_str = get_author_str_from_row(row).replace("\x00", "")
total_pages = row[KoReaderBookColumn.PAGES.value] total_pages = row[KoReaderBookColumn.PAGES.value]
@ -131,6 +133,8 @@ def build_book_map(rows) -> dict:
primary key IDs for page creation. primary key IDs for page creation.
""" """
from books.models import Book
book_id_map = {} book_id_map = {}
for book_row in rows: for book_row in rows:
@ -148,7 +152,12 @@ def build_book_map(rows) -> dict:
).first() ).first()
if not book: if not book:
title = book_row[KoReaderBookColumn.TITLE.value].split(" - ")[0].lower().replace("\x00", "") title = (
book_row[KoReaderBookColumn.TITLE.value]
.split(" - ")[0]
.lower()
.replace("\x00", "")
)
book = Book.objects.filter(title=title).first() book = Book.objects.filter(title=title).first()
if not book: if not book:
@ -438,3 +447,19 @@ def process_koreader_sqlite_file(file_path, user_id) -> list:
extra={"created_scrobbles": created}, extra={"created_scrobbles": created},
) )
return created return created
def fetch_file_from_webdav(user_id: int) -> str:
file_path = f"/tmp/{user_id}-koreader-import.sqlite3"
client = get_webdav_client(user_id)
if not client:
logger.warning("could not get webdav client for user")
# TODO maybe we raise an exception here?
return ""
client.download_sync(
remote_path="var/koreader/statistics.sqlite3",
local_path=file_path,
)
return file_path

View File

@ -0,0 +1,18 @@
from django.core.management.base import BaseCommand
from vrobbler.apps.scrobbles.utils import import_from_webdav_for_all_users
class Command(BaseCommand):
def add_arguments(self, parser):
parser.add_argument(
"--restart",
action="store_true",
help="Restart failed imports",
)
def handle(self, *args, **options):
restart = False
if options["restart"]:
restart = True
count = import_from_webdav_for_all_users(restart=restart)
print(f"Started {count} WeDAV imports")

View File

@ -15,6 +15,7 @@ from books.models import Book
from bricksets.models import BrickSet from bricksets.models import BrickSet
from django.conf import settings from django.conf import settings
from django.contrib.auth import get_user_model from django.contrib.auth import get_user_model
from django.core.files import File
from django.db import models from django.db import models
from django.urls import reverse from django.urls import reverse
from django.utils import timezone from django.utils import timezone
@ -49,6 +50,7 @@ from videos.models import Series, Video
from webpages.models import WebPage from webpages.models import WebPage
from vrobbler.apps.scrobbles.constants import MEDIA_END_PADDING_SECONDS from vrobbler.apps.scrobbles.constants import MEDIA_END_PADDING_SECONDS
from vrobbler.apps.scrobbles.utils import get_file_md5_hash
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
User = get_user_model() User = get_user_model()
@ -186,6 +188,19 @@ class KoReaderImport(BaseFileImportMixin):
sqlite_file = models.FileField(upload_to=get_path, **BNULL) sqlite_file = models.FileField(upload_to=get_path, **BNULL)
def save_sqlite_file_to_self(self, file_path):
with open(file_path, "rb") as f:
self.sqlite_file.save(
f"{self.user_id}-koreader-statistics.sqlite",
File(f),
save=True,
)
def file_md5_hash(self) -> str:
if self.sqlite_file:
return get_file_md5_hash(self.sqlite_file.path)
return ""
def process(self, force=False): def process(self, force=False):
if self.processed_finished and not force: if self.processed_finished and not force:

View File

@ -1,8 +1,10 @@
import hashlib
import logging import logging
import re import re
from datetime import datetime, timedelta, tzinfo from datetime import datetime, timedelta, tzinfo
import pytz import pytz
from books.koreader import fetch_file_from_webdav
from django.apps import apps from django.apps import apps
from django.contrib.auth import get_user_model from django.contrib.auth import get_user_model
from django.db import models from django.db import models
@ -11,6 +13,8 @@ from profiles.models import UserProfile
from profiles.utils import now_user_timezone from profiles.utils import now_user_timezone
from scrobbles.constants import LONG_PLAY_MEDIA from scrobbles.constants import LONG_PLAY_MEDIA
from scrobbles.tasks import process_lastfm_import, process_retroarch_import from scrobbles.tasks import process_lastfm_import, process_retroarch_import
from vrobbler.apps.scrobbles.tasks import process_koreader_import
from webdav.client import get_webdav_client
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
User = get_user_model() User = get_user_model()
@ -178,5 +182,87 @@ def delete_zombie_scrobbles(dry_run=True):
return zombies_found return zombies_found
def import_from_webdav_for_all_users(restart=False):
"""Grab a list of all users with WebDAV enabled and kickoff imports for them"""
from scrobbles.models import KoReaderImport
# LastFmImport = apps.get_model("scrobbles", "LastFMImport")
webdav_enabled_user_ids = UserProfile.objects.filter(
webdav_url__isnull=False,
webdav_user__isnull=False,
webdav_pass__isnull=False,
webdav_auto_import=True,
).values_list("user_id", flat=True)
logger.info(
f"start import of {webdav_enabled_user_ids.count()} webdav accounts"
)
koreader_import_count = 0
for user_id in webdav_enabled_user_ids:
webdav_client = get_webdav_client(user_id)
try:
webdav_client.info("var/koreader/statistics.sqlite3")
koreader_found = True
except:
koreader_found = False
logger.info(
"no koreader stats file found on webdav",
extra={"user_id": user_id},
)
if koreader_found:
last_import = (
KoReaderImport.objects.filter(
user_id=user_id, processed_finished__isnull=False
)
.order_by("processed_finished")
.last()
)
koreader_file_path = fetch_file_from_webdav(1)
new_hash = get_file_md5_hash(koreader_file_path)
old_hash = None
if last_import:
old_hash = last_import.file_md5_hash()
if old_hash and new_hash == old_hash:
logger.info(
"koreader stats file has not changed",
extra={
"user_id": user_id,
"new_hash": new_hash,
"old_hash": old_hash,
"last_import_id": last_import.id,
},
)
continue
koreader_import, created = KoReaderImport.objects.get_or_create(
user_id=user_id, processed_finished__isnull=True
)
if not created and not restart:
logger.info(
f"Not resuming failed KoReader import {koreader_import.id} for user {user_id}, use restart=True to restart"
)
continue
koreader_import.save_sqlite_file_to_self(koreader_file_path)
process_koreader_import.delay(koreader_import.id)
koreader_import_count += 1
return koreader_import_count
def media_class_to_foreign_key(media_class: str) -> str: def media_class_to_foreign_key(media_class: str) -> str:
return re.sub(r"(?<!^)(?=[A-Z])", "_", media_class).lower() return re.sub(r"(?<!^)(?=[A-Z])", "_", media_class).lower()
def get_file_md5_hash(file_path: str) -> str:
with open(file_path, "rb") as f:
file_hash = hashlib.md5()
while chunk := f.read(8192):
file_hash.update(chunk)
return file_hash.hexdigest()

View File

@ -25,7 +25,7 @@ from rest_framework.decorators import (
permission_classes, permission_classes,
) )
from rest_framework.parsers import MultiPartParser from rest_framework.parsers import MultiPartParser
from rest_framework.permissions import IsAuthenticated from rest_framework.permissions import IsAuthenticated, AllowAny
from rest_framework.response import Response from rest_framework.response import Response
from scrobbles.api import serializers from scrobbles.api import serializers
from scrobbles.constants import ( from scrobbles.constants import (

View File

@ -1,23 +1,33 @@
from webdav3.client import Client import logging
from typing import Optional
from profiles.models import UserProfile from profiles.models import UserProfile
from webdav3.client import Client
def get_webdav_client(user_id): logger = logging.getLogger("__name__")
client = None
def get_webdav_client(user_id) -> Optional[Client]:
profile = UserProfile.objects.filter(user_id=user_id).first() profile = UserProfile.objects.filter(user_id=user_id).first()
if not profile: if not profile:
logger.info("[get_webdav_client] no profile for user", extra={"user_id": user_id}) logger.info(
"[get_webdav_client] no profile for user",
extra={"user_id": user_id},
)
return return
if not profile.webdav_user: if not profile.webdav_user:
logger.info("[get_webdav_client] no webdave user for profile", extra={"user_id": user_id}) logger.info(
"[get_webdav_client] no webdave user for profile",
extra={"user_id": user_id},
)
return return
return Client( return Client(
{ {
'webdav_hostname': profile.webdav_url, "webdav_hostname": profile.webdav_url,
'webdav_login': profile.webdav_user, "webdav_login": profile.webdav_user,
'webdav_password': profile.webdav_pass, "webdav_password": profile.webdav_pass,
} }
) )