Fix book importing

2023-03-06 14:05:33 -05:00
parent 73c72ef465
commit 788e1ab9e9
11 changed files with 424 additions and 43 deletions
--- a/vrobbler/apps/books/admin.py
+++ b/vrobbler/apps/books/admin.py
@ -1,6 +1,6 @@
 from django.contrib import admin

-from books.models import Author, Book
+from books.models import Author, Book, Page

 from scrobbles.admin import ScrobbleInline

@ -8,11 +8,22 @@ from scrobbles.admin import ScrobbleInline
@admin.register(Author)
 class AuthorAdmin(admin.ModelAdmin):
    date_hierarchy = "created"
-    list_display = ("name", "openlibrary_id")
+    list_display = (
+        "name",
+        "openlibrary_id",
+        "bio",
+        "wikipedia_url",
+    )
    ordering = ("-created",)
    search_fields = ("name",)


+@admin.register(Page)
+class PageAdmin(admin.ModelAdmin):
+    date_hierarchy = "created"
+    list_filter = ("book",)
+
+
@admin.register(Book)
 class BookAdmin(admin.ModelAdmin):
    date_hierarchy = "created"
--- a/vrobbler/apps/books/koreader.py
+++ b/vrobbler/apps/books/koreader.py
@ -5,9 +5,8 @@ from enum import Enum

 import pytz

-from books.models import Author, Book
+from books.models import Author, Book, Page
 from scrobbles.models import Scrobble
-from django.utils import timezone

 logger = logging.getLogger(__name__)

@ -35,6 +34,38 @@ class KoReaderPageStatColumn(Enum):
    TOTAL_PAGES = 4


+def process_pages_for_book(book_id, sqlite_file_path):
+    con = sqlite3.connect(sqlite_file_path)
+    cur = con.cursor()
+
+    book = Book.objects.filter(koreader_id=book_id).first()
+    if not book:
+        logger.error(f"No book found with KoReader ID of {book_id}")
+        return
+
+    page_table = cur.execute(
+        f"SELECT * FROM page_stat_data where id_book={book_id}"
+    )
+    new_pages = []
+    for page_row in page_table:
+        page_number = page_row[KoReaderPageStatColumn.PAGE.value]
+        page, page_created = Page.objects.get_or_create(
+            book=book, number=page_number
+        )
+        if page_created:
+            ts = page_row[KoReaderPageStatColumn.START_TIME.value]
+            page.start_time = datetime.utcfromtimestamp(ts).replace(
+                tzinfo=pytz.utc
+            )
+            page.duration_seconds = page_row[
+                KoReaderPageStatColumn.DURATION.value
+            ]
+            page.save()
+            new_pages.append(page)
+    logger.info("Added {len(new_pages)} for book {book}")
+    return new_pages
+
+
 def process_koreader_sqlite_file(sqlite_file_path, user_id):
    """Given a sqlite file from KoReader, open the book table, iterate
    over rows creating scrobbles from each book found"""
@ -65,21 +96,21 @@ def process_koreader_sqlite_file(sqlite_file_path, user_id):
        )

        if created:
-            book.title = book_row[KoReaderBookColumn.TITLE.value]
-            book.pages = book_row[KoReaderBookColumn.PAGES.value]
-            book.koreader_md5 = book_row[KoReaderBookColumn.MD5.value]
-            book.koreader_id = int(book_row[KoReaderBookColumn.ID.value])
-            book.koreader_authors = book_row[KoReaderBookColumn.AUTHORS.value]
-            book.run_time_ticks = int(book_row[KoReaderBookColumn.PAGES.value])
-            book.save(
-                update_fields=[
-                    "pages",
-                    "koreader_md5",
-                    "koreader_id",
-                    "koreader_authors",
-                ]
-            )
+            pages = book_row[KoReaderBookColumn.PAGES.value]
+            run_time = pages * book.AVG_PAGE_READING_SECONDS
+            run_time_ticks = run_time * 1000
+            book_dict = {
+                "title": book_row[KoReaderBookColumn.TITLE.value],
+                "pages": book_row[KoReaderBookColumn.PAGES.value],
+                "koreader_md5": book_row[KoReaderBookColumn.MD5.value],
+                "koreader_id": int(book_row[KoReaderBookColumn.ID.value]),
+                "koreader_authors": book_row[KoReaderBookColumn.AUTHORS.value],
+                "run_time": run_time,
+                "run_time_ticks": run_time_ticks,
+            }
+            Book.objects.filter(pk=book.id).update(**book_dict)
            book.fix_metadata()
+
            if author_list:
                book.authors.add(*[a.id for a in author_list])

@ -91,6 +122,7 @@ def process_koreader_sqlite_file(sqlite_file_path, user_id):
        timestamp = datetime.utcfromtimestamp(
            book_row[KoReaderBookColumn.LAST_OPEN.value]
        ).replace(tzinfo=pytz.utc)
+        process_pages_for_book(book.koreader_id, sqlite_file_path)

        new_scrobble = Scrobble(
            book_id=book.id,
--- a/vrobbler/apps/books/migrations/0004_remove_book_author_name_and_more.py
+++ b/vrobbler/apps/books/migrations/0004_remove_book_author_name_and_more.py
@ -0,0 +1,69 @@
+# Generated by Django 4.1.5 on 2023-03-06 16:31
+
+from django.db import migrations, models
+import django.db.models.deletion
+import django_extensions.db.fields
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("books", "0003_book_cover"),
+    ]
+
+    operations = [
+        migrations.RemoveField(
+            model_name="book",
+            name="author_name",
+        ),
+        migrations.RemoveField(
+            model_name="book",
+            name="author_openlibrary_id",
+        ),
+        migrations.AddField(
+            model_name="author",
+            name="headshot",
+            field=models.ImageField(
+                blank=True, null=True, upload_to="books/authors/"
+            ),
+        ),
+        migrations.CreateModel(
+            name="Page",
+            fields=[
+                (
+                    "id",
+                    models.BigAutoField(
+                        auto_created=True,
+                        primary_key=True,
+                        serialize=False,
+                        verbose_name="ID",
+                    ),
+                ),
+                (
+                    "created",
+                    django_extensions.db.fields.CreationDateTimeField(
+                        auto_now_add=True, verbose_name="created"
+                    ),
+                ),
+                (
+                    "modified",
+                    django_extensions.db.fields.ModificationDateTimeField(
+                        auto_now=True, verbose_name="modified"
+                    ),
+                ),
+                ("number", models.IntegerField()),
+                ("start_time", models.DateTimeField()),
+                ("duration_seconds", models.IntegerField()),
+                (
+                    "book",
+                    models.ForeignKey(
+                        on_delete=django.db.models.deletion.CASCADE,
+                        to="books.book",
+                    ),
+                ),
+            ],
+            options={
+                "unique_together": {("book", "number")},
+            },
+        ),
+    ]
--- a/vrobbler/apps/books/migrations/0005_author_uuid.py
+++ b/vrobbler/apps/books/migrations/0005_author_uuid.py
@ -0,0 +1,21 @@
+# Generated by Django 4.1.5 on 2023-03-06 16:34
+
+from django.db import migrations, models
+import uuid
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("books", "0004_remove_book_author_name_and_more"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="author",
+            name="uuid",
+            field=models.UUIDField(
+                blank=True, default=uuid.uuid4, editable=False, null=True
+            ),
+        ),
+    ]
--- a/vrobbler/apps/books/migrations/0006_alter_page_duration_seconds_alter_page_start_time.py
+++ b/vrobbler/apps/books/migrations/0006_alter_page_duration_seconds_alter_page_start_time.py
@ -0,0 +1,23 @@
+# Generated by Django 4.1.5 on 2023-03-06 17:13
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("books", "0005_author_uuid"),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name="page",
+            name="duration_seconds",
+            field=models.IntegerField(blank=True, null=True),
+        ),
+        migrations.AlterField(
+            model_name="page",
+            name="start_time",
+            field=models.DateTimeField(blank=True, null=True),
+        ),
+    ]
--- a/vrobbler/apps/books/migrations/0007_author_amazon_id_author_bio_author_goodreads_id_and_more.py
+++ b/vrobbler/apps/books/migrations/0007_author_amazon_id_author_bio_author_goodreads_id_and_more.py
@ -0,0 +1,48 @@
+# Generated by Django 4.1.5 on 2023-03-06 17:44
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("books", "0006_alter_page_duration_seconds_alter_page_start_time"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="author",
+            name="amazon_id",
+            field=models.CharField(blank=True, max_length=255, null=True),
+        ),
+        migrations.AddField(
+            model_name="author",
+            name="bio",
+            field=models.TextField(blank=True, null=True),
+        ),
+        migrations.AddField(
+            model_name="author",
+            name="goodreads_id",
+            field=models.CharField(blank=True, max_length=255, null=True),
+        ),
+        migrations.AddField(
+            model_name="author",
+            name="isni",
+            field=models.CharField(blank=True, max_length=255, null=True),
+        ),
+        migrations.AddField(
+            model_name="author",
+            name="librarything_id",
+            field=models.CharField(blank=True, max_length=255, null=True),
+        ),
+        migrations.AddField(
+            model_name="author",
+            name="wikidata_id",
+            field=models.CharField(blank=True, max_length=255, null=True),
+        ),
+        migrations.AddField(
+            model_name="author",
+            name="wikipedia_url",
+            field=models.CharField(blank=True, max_length=255, null=True),
+        ),
+    ]
--- a/vrobbler/apps/books/migrations/0008_book_first_sentence.py
+++ b/vrobbler/apps/books/migrations/0008_book_first_sentence.py
@ -0,0 +1,21 @@
+# Generated by Django 4.1.5 on 2023-03-06 18:42
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        (
+            "books",
+            "0007_author_amazon_id_author_bio_author_goodreads_id_and_more",
+        ),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="book",
+            name="first_sentence",
+            field=models.CharField(blank=True, max_length=255, null=True),
+        ),
+    ]
--- a/vrobbler/apps/books/models.py
+++ b/vrobbler/apps/books/models.py
@ -1,7 +1,11 @@
 import logging
+from uuid import uuid4

 import requests
-from books.openlibrary import lookup_book_from_openlibrary
+from books.openlibrary import (
+    lookup_author_from_openlibrary,
+    lookup_book_from_openlibrary,
+)
 from django.conf import settings
 from django.contrib.auth import get_user_model
 from django.core.files.base import ContentFile
@ -18,17 +22,44 @@ BNULL = {"blank": True, "null": True}

 class Author(TimeStampedModel):
    name = models.CharField(max_length=255)
+    uuid = models.UUIDField(default=uuid4, editable=False, **BNULL)
    openlibrary_id = models.CharField(max_length=255, **BNULL)
+    headshot = models.ImageField(upload_to="books/authors/", **BNULL)
+    bio = models.TextField(**BNULL)
+    wikipedia_url = models.CharField(max_length=255, **BNULL)
+    isni = models.CharField(max_length=255, **BNULL)
+    wikidata_id = models.CharField(max_length=255, **BNULL)
+    goodreads_id = models.CharField(max_length=255, **BNULL)
+    librarything_id = models.CharField(max_length=255, **BNULL)
+    amazon_id = models.CharField(max_length=255, **BNULL)

    def __str__(self):
        return f"{self.name}"

-    def fix_metadata(self):
-        logger.warn("Not implemented yet")
+    def fix_metadata(self, data_dict: dict = {}):
+        if not data_dict and self.openlibrary_id:
+            data_dict = lookup_author_from_openlibrary(self.openlibrary_id)
+
+        if not data_dict or not data_dict.get("name"):
+            return
+
+        headshot_url = data_dict.pop("author_headshot_url", "")
+
+        Author.objects.filter(pk=self.id).update(**data_dict)
+        self.refresh_from_db()
+
+        if headshot_url:
+            r = requests.get(headshot_url)
+            if r.status_code == 200:
+                fname = f"{self.name}_{self.uuid}.jpg"
+                self.headshot.save(fname, ContentFile(r.content), save=True)


 class Book(ScrobblableMixin):
    COMPLETION_PERCENT = getattr(settings, "BOOK_COMPLETION_PERCENT", 95)
+    AVG_PAGE_READING_SECONDS = getattr(
+        settings, "AVERAGE_PAGE_READING_SECONDS", 60
+    )

    title = models.CharField(max_length=255)
    authors = models.ManyToManyField(Author)
@ -40,8 +71,7 @@ class Book(ScrobblableMixin):
    pages = models.IntegerField(**BNULL)
    language = models.CharField(max_length=4, **BNULL)
    first_publish_year = models.IntegerField(**BNULL)
-    author_name = models.CharField(max_length=255, **BNULL)
-    author_openlibrary_id = models.CharField(max_length=255, **BNULL)
+    first_sentence = models.CharField(max_length=255, **BNULL)
    openlibrary_id = models.CharField(max_length=255, **BNULL)
    cover = models.ImageField(upload_to="books/covers/", **BNULL)

@ -52,13 +82,47 @@ class Book(ScrobblableMixin):
        if not self.openlibrary_id or force_update:
            book_dict = lookup_book_from_openlibrary(self.title, self.author)

-            cover_url = book_dict.pop("cover_url")
-            Book.objects.filter(pk=self.id).update(**book_dict)
+            cover_url = book_dict.pop("cover_url", "")
+            ol_author_id = book_dict.pop("ol_author_id", "")
+            ol_author_name = book_dict.pop("ol_author_name", "")
+            # Don't want to overwrite KoReader's pages if OL is ignorant
+            if not book_dict.get("pages"):
+                book_dict.pop("pages")

-            r = requests.get(cover_url)
-            if r.status_code == 200:
-                fname = f"{self.title}_{self.uuid}.jpg"
-                self.cover.save(fname, ContentFile(r.content), save=True)
+            ol_title = book_dict.get("title")
+            if ol_title.lower() != self.title.lower():
+                logger.warn(
+                    f"OL and KoReader disagree on this book title {self.title} != {ol_title}"
+                )
+                return
+
+            Book.objects.filter(pk=self.id).update(**book_dict)
+            self.refresh_from_db()
+
+            # Process authors
+            author = None
+            author_created = False
+            if ol_author_id:
+                author, author_created = Author.objects.get_or_create(
+                    openlibrary_id=ol_author_id
+                )
+                if author_created or force_update:
+                    author.fix_metadata()
+            if not author and ol_author_name:
+                author, author_created = Author.objects.get_or_create(
+                    name=ol_author_name
+                )
+            self.authors.add(author)
+
+            if cover_url:
+                r = requests.get(cover_url)
+                if r.status_code == 200:
+                    fname = f"{self.title}_{self.uuid}.jpg"
+                    self.cover.save(fname, ContentFile(r.content), save=True)
+
+            if self.pages:
+                self.run_time = self.pages * int(self.AVG_PAGE_READING_SECONDS)
+                self.run_time_ticks = int(self.run_time) * 1000

            self.save()

@ -76,6 +140,11 @@ class Book(ScrobblableMixin):
            return 0
        return int(self.pages * (self.COMPLETION_PERCENT / 100))

+    def update_long_play_seconds(self):
+        """Check page timestamps and duration and update"""
+        if self.page_set.all():
+            ...
+
    def progress_for_user(self, user_id: int) -> int:
        """Used to keep track of whether the book is complete or not"""
        user = User.objects.get(id=user_id)
@ -86,6 +155,22 @@ class Book(ScrobblableMixin):
    def find_or_create(cls, data_dict: dict) -> "Game":
        from books.utils import get_or_create_book

-        return get_or_create_book(
+        return update_or_create_book(
            data_dict.get("title"), data_dict.get("author")
        )
+
+
+class Page(TimeStampedModel):
+    book = models.ForeignKey(Book, on_delete=models.CASCADE)
+    number = models.IntegerField()
+    start_time = models.DateTimeField(**BNULL)
+    duration_seconds = models.IntegerField(**BNULL)
+
+    class Meta:
+        unique_together = (
+            "book",
+            "number",
+        )
+
+    def __str__(self):
+        return f"Page {self.number} of {self.book.pages} in {self.book.title}"
--- a/vrobbler/apps/books/openlibrary.py
+++ b/vrobbler/apps/books/openlibrary.py
@ -1,6 +1,8 @@
 import json
-import requests
 import logging
+import urllib
+
+import requests

 logger = logging.getLogger(__name__)

@ -8,6 +10,8 @@ SEARCH_URL = "https://openlibrary.org/search.json?title={title}"
 ISBN_URL = "https://openlibrary.org/isbn/{isbn}.json"
 SEARCH_URL = "https://openlibrary.org/search.json?title={title}"
 COVER_URL = "https://covers.openlibrary.org/b/olid/{id}-L.jpg"
+AUTHOR_URL = "https://openlibrary.org/authors/{id}.json"
+AUTHOR_IMAGE_URL = "https://covers.openlibrary.org/a/olid/{id}-L.jpg"


 def get_first(key: str, result: dict) -> str:
@ -17,8 +21,43 @@ def get_first(key: str, result: dict) -> str:
    return obj


+def lookup_author_from_openlibrary(olid: str) -> dict:
+    author_url = AUTHOR_URL.format(id=olid)
+    response = requests.get(author_url)
+
+    if response.status_code != 200:
+        logger.warn(f"Bad response from OL: {response.status_code}")
+        return {}
+
+    results = json.loads(response.content)
+
+    if not results:
+        logger.warn(f"No author results found from OL for {olid}")
+        return {}
+
+    remote_ids = results.get("remote_ids", {})
+    bio = ""
+    if results.get("bio"):
+        try:
+            bio = results.get("bio").get("value")
+        except AttributeError:
+            bio = results.get("bio")
+    return {
+        "name": results.get("name"),
+        "wikipedia_url": results.get("wikipedia"),
+        "wikidata_id": remote_ids.get("wikidata"),
+        "isni": remote_ids.get("isni"),
+        "goodreads_id": remote_ids.get("goodreads"),
+        "librarything_id": remote_ids.get("librarything"),
+        "amazon_id": remote_ids.get("amazon"),
+        "bio": bio,
+        "author_headshot_url": AUTHOR_IMAGE_URL.format(id=olid),
+    }
+
+
 def lookup_book_from_openlibrary(title: str, author: str = None) -> dict:
-    search_url = SEARCH_URL.format(title=title)
+    title_quoted = urllib.parse.quote(title)
+    search_url = SEARCH_URL.format(title=title_quoted)
    response = requests.get(search_url)

    if response.status_code != 200:
@ -37,14 +76,22 @@ def lookup_book_from_openlibrary(title: str, author: str = None) -> dict:
            f"Lookup for {title} found top result with mismatched author"
        )
    ol_id = top.get("cover_edition_key")
+    author_ol_id = get_first("author_key", top)
+    first_sentence = ""
+    if top.get("first_sentence"):
+        try:
+            first_sentence = top.get("first_sentence")[0].get("value")
+        except AttributeError:
+            first_sentence = top.get("first_sentence")[0]
+    print(top)
    return {
        "title": top.get("title"),
        "isbn": top.get("isbn")[0],
-        "openlibrary_id": top.get("cover_edition_key"),
-        "author_name": get_first("author_name", top),
-        "author_openlibrary_id": get_first("author_key", top),
+        "openlibrary_id": ol_id,
        "goodreads_id": get_first("id_goodreads", top),
        "first_publish_year": top.get("first_publish_year"),
-        "pages": top.get("number_of_pages_median"),
+        "first_sentence": first_sentence,
+        "pages": top.get("number_of_pages_median", None),
        "cover_url": COVER_URL.format(id=ol_id),
+        "ol_author_id": author_ol_id,
    }
--- a/vrobbler/apps/books/utils.py
+++ b/vrobbler/apps/books/utils.py
@ -2,22 +2,44 @@ from django.core.files.base import ContentFile
 import requests
 from typing import Optional
 from books.openlibrary import lookup_book_from_openlibrary
-from books.models import Book
+from books.models import Author, Book


-def get_or_create_book(title: str, author: Optional[str] = None) -> Book:
+def update_or_create_book(
+    title: str, author: Optional[str] = None, force_update=False
+) -> Book:
    book_dict = lookup_book_from_openlibrary(title, author)

    book, book_created = Book.objects.get_or_create(
        isbn=book_dict.get("isbn"),
    )

-    if book_created:
+    if book_created or force_update:
        cover_url = book_dict.pop("cover_url")
+        ol_author_id = book_dict.pop("ol_author_id")
+        ol_author_name = book_dict.pop("ol_author_name")
+        author_image_url = book_dict.pop("author_image_url")

        Book.objects.filter(pk=book.id).update(**book_dict)
        book.refresh_from_db()

+        # Process authors
+        if ol_author_name and ol_author_id:
+            author, author_created = Author.objects.get_or_create(
+                name=ol_author_name
+            )
+            if author_created or force_update:
+                author.openlibrary_id = ol_author_id
+                author.save()
+
+                r = requests.get(author_image_url)
+                if r.status_code == 200:
+                    fname = f"{author.name}_{author.uuid}.jpg"
+                    author.headshot.save(
+                        fname, ContentFile(r.content), save=True
+                    )
+
+        # Process cover URL
        r = requests.get(cover_url)
        if r.status_code == 200:
            fname = f"{book.title}_{book.uuid}.jpg"
--- a/vrobbler/apps/music/models.py
+++ b/vrobbler/apps/music/models.py
@ -71,10 +71,12 @@ class Artist(TimeStampedModel):
        self.theaudiodb_genre = tadb_info["genre"]
        self.theaudiodb_mood = tadb_info["mood"]

-        r = requests.get(tadb_info.get("thumb_url", ""))
-        if r.status_code == 200:
-            fname = f"{self.name}_{self.uuid}.jpg"
-            self.thumbnail.save(fname, ContentFile(r.content), save=True)
+        thumb_url = tadb_info.get("thumb_url", "")
+        if thumb_url:
+            r = requests.get(thumb_url)
+            if r.status_code == 200:
+                fname = f"{self.name}_{self.uuid}.jpg"
+                self.thumbnail.save(fname, ContentFile(r.content), save=True)

    @property
    def rym_link(self):