[books] Add webdav koreader importer

This commit is contained in:
2024-11-17 20:48:24 -05:00
parent 2327b1f622
commit 1304a27408
7 changed files with 169 additions and 18 deletions

View File

@ -1,5 +1,6 @@
from uuid import uuid4
from beers.untappd import get_beer_from_untappd_id, get_rating_from_soup
from django.apps import apps
from django.db import models
from django.urls import reverse
@ -8,10 +9,6 @@ from imagekit.models import ImageSpecField
from imagekit.processors import ResizeToFit
from scrobbles.dataclasses import BeerLogData
from scrobbles.mixins import ScrobblableConstants, ScrobblableMixin
from vrobbler.apps.beers.untappd import (
get_beer_from_untappd_id,
get_rating_from_soup,
)
BNULL = {"blank": True, "null": True}

View File

@ -1,4 +1,3 @@
from collections import OrderedDict
import logging
import re
import sqlite3
@ -7,12 +6,11 @@ from enum import Enum
import pytz
import requests
from books.models import Author, Book
from books.openlibrary import get_author_openlibrary_id
from books.constants import BOOKS_TITLES_TO_IGNORE
from django.apps import apps
from django.contrib.auth import get_user_model
from stream_sqlite import stream_sqlite
from vrobbler.apps.books.constants import BOOKS_TITLES_TO_IGNORE
from webdav.client import get_webdav_client
logger = logging.getLogger(__name__)
User = get_user_model()
@ -63,6 +61,8 @@ def lookup_or_create_authors_from_author_str(ko_author_str: str) -> list:
"""Takes a string of authors from KoReader and returns a list
of Authors from our database
"""
from books.models import Author
author_str_list = ko_author_str.split(", ")
author_list = []
for author_str in author_str_list:
@ -83,6 +83,8 @@ def lookup_or_create_authors_from_author_str(ko_author_str: str) -> list:
def create_book_from_row(row: list):
from books.models import Book
# No KoReader book yet, create it
author_str = get_author_str_from_row(row).replace("\x00", "")
total_pages = row[KoReaderBookColumn.PAGES.value]
@ -131,6 +133,8 @@ def build_book_map(rows) -> dict:
primary key IDs for page creation.
"""
from books.models import Book
book_id_map = {}
for book_row in rows:
@ -148,7 +152,12 @@ def build_book_map(rows) -> dict:
).first()
if not book:
title = book_row[KoReaderBookColumn.TITLE.value].split(" - ")[0].lower().replace("\x00", "")
title = (
book_row[KoReaderBookColumn.TITLE.value]
.split(" - ")[0]
.lower()
.replace("\x00", "")
)
book = Book.objects.filter(title=title).first()
if not book:
@ -438,3 +447,19 @@ def process_koreader_sqlite_file(file_path, user_id) -> list:
extra={"created_scrobbles": created},
)
return created
def fetch_file_from_webdav(user_id: int) -> str:
file_path = f"/tmp/{user_id}-koreader-import.sqlite3"
client = get_webdav_client(user_id)
if not client:
logger.warning("could not get webdav client for user")
# TODO maybe we raise an exception here?
return ""
client.download_sync(
remote_path="var/koreader/statistics.sqlite3",
local_path=file_path,
)
return file_path

View File

@ -0,0 +1,18 @@
from django.core.management.base import BaseCommand
from vrobbler.apps.scrobbles.utils import import_from_webdav_for_all_users
class Command(BaseCommand):
def add_arguments(self, parser):
parser.add_argument(
"--restart",
action="store_true",
help="Restart failed imports",
)
def handle(self, *args, **options):
restart = False
if options["restart"]:
restart = True
count = import_from_webdav_for_all_users(restart=restart)
print(f"Started {count} WeDAV imports")

View File

@ -15,6 +15,7 @@ from books.models import Book
from bricksets.models import BrickSet
from django.conf import settings
from django.contrib.auth import get_user_model
from django.core.files import File
from django.db import models
from django.urls import reverse
from django.utils import timezone
@ -49,6 +50,7 @@ from videos.models import Series, Video
from webpages.models import WebPage
from vrobbler.apps.scrobbles.constants import MEDIA_END_PADDING_SECONDS
from vrobbler.apps.scrobbles.utils import get_file_md5_hash
logger = logging.getLogger(__name__)
User = get_user_model()
@ -186,6 +188,19 @@ class KoReaderImport(BaseFileImportMixin):
sqlite_file = models.FileField(upload_to=get_path, **BNULL)
def save_sqlite_file_to_self(self, file_path):
with open(file_path, "rb") as f:
self.sqlite_file.save(
f"{self.user_id}-koreader-statistics.sqlite",
File(f),
save=True,
)
def file_md5_hash(self) -> str:
if self.sqlite_file:
return get_file_md5_hash(self.sqlite_file.path)
return ""
def process(self, force=False):
if self.processed_finished and not force:

View File

@ -1,8 +1,10 @@
import hashlib
import logging
import re
from datetime import datetime, timedelta, tzinfo
import pytz
from books.koreader import fetch_file_from_webdav
from django.apps import apps
from django.contrib.auth import get_user_model
from django.db import models
@ -11,6 +13,8 @@ from profiles.models import UserProfile
from profiles.utils import now_user_timezone
from scrobbles.constants import LONG_PLAY_MEDIA
from scrobbles.tasks import process_lastfm_import, process_retroarch_import
from vrobbler.apps.scrobbles.tasks import process_koreader_import
from webdav.client import get_webdav_client
logger = logging.getLogger(__name__)
User = get_user_model()
@ -178,5 +182,87 @@ def delete_zombie_scrobbles(dry_run=True):
return zombies_found
def import_from_webdav_for_all_users(restart=False):
"""Grab a list of all users with WebDAV enabled and kickoff imports for them"""
from scrobbles.models import KoReaderImport
# LastFmImport = apps.get_model("scrobbles", "LastFMImport")
webdav_enabled_user_ids = UserProfile.objects.filter(
webdav_url__isnull=False,
webdav_user__isnull=False,
webdav_pass__isnull=False,
webdav_auto_import=True,
).values_list("user_id", flat=True)
logger.info(
f"start import of {webdav_enabled_user_ids.count()} webdav accounts"
)
koreader_import_count = 0
for user_id in webdav_enabled_user_ids:
webdav_client = get_webdav_client(user_id)
try:
webdav_client.info("var/koreader/statistics.sqlite3")
koreader_found = True
except:
koreader_found = False
logger.info(
"no koreader stats file found on webdav",
extra={"user_id": user_id},
)
if koreader_found:
last_import = (
KoReaderImport.objects.filter(
user_id=user_id, processed_finished__isnull=False
)
.order_by("processed_finished")
.last()
)
koreader_file_path = fetch_file_from_webdav(1)
new_hash = get_file_md5_hash(koreader_file_path)
old_hash = None
if last_import:
old_hash = last_import.file_md5_hash()
if old_hash and new_hash == old_hash:
logger.info(
"koreader stats file has not changed",
extra={
"user_id": user_id,
"new_hash": new_hash,
"old_hash": old_hash,
"last_import_id": last_import.id,
},
)
continue
koreader_import, created = KoReaderImport.objects.get_or_create(
user_id=user_id, processed_finished__isnull=True
)
if not created and not restart:
logger.info(
f"Not resuming failed KoReader import {koreader_import.id} for user {user_id}, use restart=True to restart"
)
continue
koreader_import.save_sqlite_file_to_self(koreader_file_path)
process_koreader_import.delay(koreader_import.id)
koreader_import_count += 1
return koreader_import_count
def media_class_to_foreign_key(media_class: str) -> str:
return re.sub(r"(?<!^)(?=[A-Z])", "_", media_class).lower()
def get_file_md5_hash(file_path: str) -> str:
with open(file_path, "rb") as f:
file_hash = hashlib.md5()
while chunk := f.read(8192):
file_hash.update(chunk)
return file_hash.hexdigest()

View File

@ -25,7 +25,7 @@ from rest_framework.decorators import (
permission_classes,
)
from rest_framework.parsers import MultiPartParser
from rest_framework.permissions import IsAuthenticated
from rest_framework.permissions import IsAuthenticated, AllowAny
from rest_framework.response import Response
from scrobbles.api import serializers
from scrobbles.constants import (

View File

@ -1,23 +1,33 @@
from webdav3.client import Client
import logging
from typing import Optional
from profiles.models import UserProfile
from webdav3.client import Client
def get_webdav_client(user_id):
client = None
logger = logging.getLogger("__name__")
def get_webdav_client(user_id) -> Optional[Client]:
profile = UserProfile.objects.filter(user_id=user_id).first()
if not profile:
logger.info("[get_webdav_client] no profile for user", extra={"user_id": user_id})
logger.info(
"[get_webdav_client] no profile for user",
extra={"user_id": user_id},
)
return
if not profile.webdav_user:
logger.info("[get_webdav_client] no webdave user for profile", extra={"user_id": user_id})
logger.info(
"[get_webdav_client] no webdave user for profile",
extra={"user_id": user_id},
)
return
return Client(
{
'webdav_hostname': profile.webdav_url,
'webdav_login': profile.webdav_user,
'webdav_password': profile.webdav_pass,
"webdav_hostname": profile.webdav_url,
"webdav_login": profile.webdav_user,
"webdav_password": profile.webdav_pass,
}
)