Start to add comicvine lookups and consoldiate koreader data

2024-01-29 01:48:56 -05:00
parent 70c7eda415
commit 9e2d7a6bc0
10 changed files with 403 additions and 25 deletions
--- a/vrobbler.conf.example
+++ b/vrobbler.conf.example
@ -20,6 +20,7 @@ VROBBLER_THESPORTSDB_API_KEY="<key>"
 VROBBLER_THEAUDIODB_API_KEY="<key>"
 VROBBLER_IGDB_CLIENT_ID="<id>"
 VROBBLER_IGDB_CLIENT_SECRET="<key>"
+VROBBLER_COMICVINE_API_KEY="<key>"

 # Storages
 # VROBBLER_DATABASE_URL="postgres://USER:PASSWORD@HOST:PORT/NAME"
--- a/vrobbler/apps/books/comicvine.py
+++ b/vrobbler/apps/books/comicvine.py
@ -0,0 +1,232 @@
+"""
+ComicVine API Information & Documentation:
+https://comicvine.gamespot.com/api/
+https://comicvine.gamespot.com/api/documentation
+"""
+import json
+import logging
+from django.conf import settings
+
+import requests
+
+logger = logging.getLogger(__name__)
+
+
+class ComicVineClient(object):
+    """
+    Interacts with the ``search`` resource of the ComicVine API. Requires an
+    account on https://comicvine.gamespot.com/ in order to obtain an API key.
+    """
+
+    # All API requests made by this client will be made to this URL.
+    API_URL = "https://www.comicvine.com/api/search/"
+
+    # A valid User-Agent header must be set in order for our API requests to
+    # be accepted, otherwise our request will be rejected with a
+    # **403 - Forbidden** error.
+    HEADERS = {
+        "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:7.0) "
+        "Gecko/20130825 Firefox/36.0"
+    }
+
+    # A set of valid resource types to return in results.
+    RESOURCE_TYPES = {
+        "character",
+        "issue",
+        "location",
+        "object",
+        "person",
+        "publisher",
+        "story_arc",
+        "team",
+        "volume",
+    }
+
+    def __init__(self, api_key, expire_after=300):
+        """
+        Store the API key in a class variable, and install the requests cache,
+        configuring it using the ``expire_after`` parameter.
+
+        :param api_key: Your personal ComicVine API key.
+        :type  api_key: str
+        :param expire_after: The number of seconds to retain an entry in cache.
+        :type  expire_after: int or None
+        """
+
+        self.api_key = api_key
+
+    def search(self, query, offset=0, limit=10, resources=None):
+        """
+        Perform a search against the API, using the provided query term. If
+        required, a list of resource types to filter search results to can
+        be included.
+
+        Take the JSON contained in the response and provide it to the custom
+        ``Response`` object's constructor. Return the ``Response`` object.
+
+        :param query: The search query with which to make the request.
+        :type  query: str
+        :param offset: The index of the first record returned.
+        :type  offset: int or None
+        :param limit: How many records to return **(max 10)**
+        :type  limit: int or None
+        :param resources: A list of resources to include in the search results.
+        :type  resources: list or None
+        :type  use_cache: bool
+
+        :return: The response object containing the results of the search
+                 query.
+        :rtype:  comicvine_search.response.Response
+        """
+
+        params = self._request_params(query, offset, limit, resources)
+        json_data = self._query_api(params)
+
+        return json_data
+
+    def _request_params(self, query, offset, limit, resources):
+        """
+        Construct a dict containing the required key-value pairs of parameters
+        required in order to make the API request.
+
+        The documentation for the ``search`` resource can be found at
+        https://comicvine.gamespot.com/api/documentation#toc-0-30.
+
+        Regarding 'limit', as per the documentation:
+
+            The number of results to display per page. This value defaults to
+            10 and can not exceed this number.
+
+        :param query: The search query with which to make the request.
+        :type  query: str
+        :param offset: The index of the first record returned.
+        :type  offset: int
+        :param limit: How many records to return **(max 10)**
+        :type  limit: int
+        :param resources: A list of resources to include in the search results.
+        :type  resources: list or None
+
+        :return: A dictionary of request parameters.
+        :rtype:  dict
+        """
+
+        return {
+            "api_key": self.api_key,
+            "format": "json",
+            "limit": min(10, limit),  # hard limit of 10
+            "offset": max(0, offset),  # cannot provide negative offset
+            "query": query,
+            "resources": self._validate_resources(resources),
+        }
+
+    def _validate_resources(self, resources):
+        """
+        Provided a list of resources, first convert it to a set and perform an
+        intersection with the set of valid resource types, ``RESOURCE_TYPES``.
+        Return a comma-separted string of the remaining valid resources, or
+        None if the set is empty.
+
+        :param resources: A list of resources to include in the search results.
+        :type  resources: list or None
+
+        :return: A comma-separated string of valid resources.
+        :rtype:  str or None
+        """
+
+        if not resources:
+            return None
+
+        valid_resources = self.RESOURCE_TYPES & set(resources)
+        return ",".join(valid_resources) if valid_resources else None
+
+    def _query_api(self, params):
+        """
+        Query the ComicVine API's ``search`` resource, providing the required
+        headers and parameters with the request. Optionally allow the caller
+        of the function to disable the request cache.
+
+        If an error occurs during the request, handle it accordingly. Upon
+        success, return the JSON from the response.
+
+        :param params: Parameters to include with the request.
+        :type  params: dict
+        :param use_cache: Toggle the use of requests_cache.
+        :type  use_cache: bool
+
+        :return: The JSON contained in the response.
+        :rtype:  dict
+        """
+
+        # Since we're performing the identical action regardless of whether
+        # or not the request cache is to be used, store the procedure in a
+        # local function to avoid repetition.
+        def __httpget():
+            response = requests.get(
+                self.API_URL, headers=self.HEADERS, params=params
+            )
+
+            if not response.ok:
+                self._handle_http_error(response)
+
+            return response.json()
+
+        return __httpget()
+
+    def _handle_http_error(self, response):
+        """
+        Provided a ``requests.Response`` object, if the status code is
+        anything other than **200**, we will treat it as an error.
+
+        Using the response's status code, determine which type of exception to
+        raise. Construct an exception message from the response's status code
+        and reason properties before raising the exception.
+
+        :param response: The requests.Response object returned by the HTTP
+                         request.
+        :type  response: requests.Response
+
+        :raises ComicVineUnauthorizedException: if no API key provided.
+        :raises ComicVineForbiddenException: if no User-Agent header provided.
+        :raises ComicVineApiException: if an unidentified error occurs.
+        """
+
+        exception = {
+            401: Exception,
+            403: Exception,
+        }.get(response.status_code, Exception)
+        message = f"{response.status_code} {response.reason}"
+
+        raise exception(message)
+
+
+def lookup_comic_from_comicvine(title: str) -> dict:
+    api_key = getattr(settings, "COMICVINE_API_KEY", "")
+    if not api_key:
+        logger.warn("No ComicVine API key configured, not looking anything up")
+        return {}
+
+    client = ComicVineClient(
+        api_key=getattr(settings, "COMICVINE_API_KEY", None)
+    )
+    return client.search(title)
+    results = [
+        r
+        for r in client.search(title).get("results")
+        if r.get("resource_type") == "volume"
+    ]
+
+    return results
+    """
+    {
+        "title": top.get("title"),
+        "isbn": isbn,
+        "comicvine_id": ol_id,
+        "first_publish_year": top.get("first_publish_year"),
+        "first_sentence": first_sentence,
+        "pages": top.get("number_of_pages_median", None),
+        "cover_url": COVER_URL.format(id=ol_id),
+        "cv_author_id": ol_author_id,
+        "subject_key_list": top.get("subject_key", []),
+    }
+
+    """
--- a/vrobbler/apps/books/koreader.py
+++ b/vrobbler/apps/books/koreader.py
@ -1,21 +1,14 @@
-import codecs
 import logging
-import os
 import re
 import sqlite3
-from datetime import datetime, timedelta
+from datetime import datetime
 from enum import Enum
-from typing import Iterable, List, Optional

 import pytz
-import requests
-from books.models import Author, Book, Page
+from books.models import Author, Book
 from books.openlibrary import get_author_openlibrary_id
 from django.apps import apps
 from django.contrib.auth import get_user_model
-from django.db.models import Sum
-from pylast import httpx, tempfile
-from scrobbles.utils import timestamp_user_tz_to_utc
 from stream_sqlite import stream_sqlite

 logger = logging.getLogger(__name__)
@ -95,11 +88,16 @@ def create_book_from_row(row: list):
    run_time = total_pages * Book.AVG_PAGE_READING_SECONDS

    book = Book.objects.create(
-        koreader_md5=row[KoReaderBookColumn.MD5.value],
        title=row[KoReaderBookColumn.TITLE.value],
-        koreader_id=row[KoReaderBookColumn.ID.value],
-        koreader_authors=author_str,
        pages=total_pages,
+        koreader_data_by_hash={
+            row[KoReaderBookColumn.MD5.value]: {
+                "title": row[KoReaderBookColumn.TITLE.value],
+                "author_str": author_str,
+                "book_id": row[KoReaderBookColumn.ID.value],
+                "pages": total_pages,
+            }
+        },
        run_time_seconds=run_time,
    )
    book.fix_metadata()
@ -122,8 +120,16 @@ def build_book_map(rows) -> dict:
    book_id_map = {}

    for book_row in rows:
+        if (
+            book_row[KoReaderBookColumn.TITLE.value]
+            == "KOReader Quickstart Guide"
+        ):
+            logger.info(
+                "Ignoring the KOReader quickstart guide. No on wants that."
+            )
+            continue
        book = Book.objects.filter(
-            koreader_md5=book_row[KoReaderBookColumn.MD5.value]
+            koreader_md5__icontains=book_row[KoReaderBookColumn.MD5.value]
        ).first()

        if not book:
@ -136,6 +142,7 @@ def build_book_map(rows) -> dict:

        book_id_map[book_row[KoReaderBookColumn.ID.value]] = {
            "book_id": book.id,
+            "hash": book_row[KoReaderBookColumn.MD5.value],
            "total_seconds": total_seconds,
        }
    return book_id_map
@ -289,6 +296,7 @@ def build_scrobbles_from_book_map(
                            timestamp=timestamp,
                            stop_timestamp=stop_timestamp,
                            playback_position_seconds=playback_position_seconds,
+                            book_koreader_hash=book_dict.get("hash"),
                            book_page_data=scrobble_page_data,
                            book_pages_read=page_number,
                            in_progress=False,
--- a/vrobbler/apps/books/management/commands/migrate_koreader_data_to_json.py
+++ b/vrobbler/apps/books/management/commands/migrate_koreader_data_to_json.py
@ -0,0 +1,17 @@
+from django.core.management.base import BaseCommand
+from books.models import Book
+
+
+class Command(BaseCommand):
+    def handle(self, *args, **options):
+        for book in Book.objects.all():
+            koreader_data = book.koreader_data_by_hash or {}
+            if book.koreader_md5:
+                koreader_data[book.koreader_md5] = {
+                    "title": book.title,
+                    "book_id": book.koreader_id,
+                    "author_str": book.koreader_authors,
+                    "pages": book.pages,
+                }
+            book.koreader_data_by_hash = koreader_data
+            book.save(update_fields=["koreader_data_by_hash"])
--- a/vrobbler/apps/books/management/commands/migrate_pages_to_json_data.py
+++ b/vrobbler/apps/books/management/commands/migrate_pages_to_json_data.py
@ -0,0 +1,62 @@
+from books.models import Book
+from django.core.management.base import BaseCommand
+
+
+class Command(BaseCommand):
+    def add_arguments(self, parser):
+        parser.add_argument(
+            "--commit",
+            action="store_true",
+            help="Actually populate data",
+        )
+
+    def handle(self, *args, **options):
+        dry_run = True
+        if options["commit"]:
+            dry_run = False
+
+        pages_to_create = []
+        associated_scrobble = None
+        last_scrobble = None
+        for book in Book.objects.all():
+            for page in book.page_set.all().order_by("number"):
+                notes = ""
+                last_scrobble = associated_scrobble
+                if (
+                    not associated_scrobble
+                    or page.number > associated_scrobble.book_pages_read
+                ):
+                    associated_scrobble = page.user.scrobble_set.filter(
+                        book=page.book,
+                        timestamp__gte=page.start_time,
+                        timestamp__lte=page.end_time,
+                    ).first()
+
+                if (
+                    last_scrobble
+                    and not associated_scrobble
+                    and page.number > last_scrobble.book_pages_read
+                ):
+                    associated_scrobble = last_scrobble
+                    notes = f"Extrapolated reading from scrobble {associated_scrobble.id}"
+
+                pages_to_create.append(
+                    ScrobbledPage(
+                        scrobble=associated_scrobble,
+                        number=page.number,
+                        start_time=page.start_time,
+                        end_time=page.end_time,
+                        duration_seconds=page.duration_seconds,
+                        notes=notes,
+                    )
+                )
+
+        pages_to_move_len = len(pages_to_create)
+        if dry_run:
+            print(
+                f"Found {pages_to_move_len} to migrate. Use --commit to move them"
+            )
+            return
+
+        ScrobbledPage.objects.bulk_create(pages_to_create)
+        print(f"Migrated {pages_to_move_len} generic pages to scrobbled pages")
--- a/vrobbler/apps/books/migrations/0020_author_comicvine_data_book_comicvine_data_and_more.py
+++ b/vrobbler/apps/books/migrations/0020_author_comicvine_data_book_comicvine_data_and_more.py
@ -0,0 +1,28 @@
+# Generated by Django 4.2.9 on 2024-01-29 05:55
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("books", "0019_alter_book_authors"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="author",
+            name="comicvine_data",
+            field=models.JSONField(blank=True, null=True),
+        ),
+        migrations.AddField(
+            model_name="book",
+            name="comicvine_data",
+            field=models.JSONField(blank=True, null=True),
+        ),
+        migrations.AddField(
+            model_name="book",
+            name="koreader_data_by_hash",
+            field=models.JSONField(blank=True, null=True),
+        ),
+    ]
--- a/vrobbler/apps/books/models.py
+++ b/vrobbler/apps/books/models.py
@ -59,6 +59,7 @@ class Author(TimeStampedModel):
    wikidata_id = models.CharField(max_length=255, **BNULL)
    goodreads_id = models.CharField(max_length=255, **BNULL)
    librarything_id = models.CharField(max_length=255, **BNULL)
+    comicvine_data = models.JSONField(**BNULL)
    amazon_id = models.CharField(max_length=255, **BNULL)

    def __str__(self):
@ -92,9 +93,11 @@ class Book(LongPlayScrobblableMixin):
    title = models.CharField(max_length=255)
    authors = models.ManyToManyField(Author, blank=True)
    goodreads_id = models.CharField(max_length=255, **BNULL)
+    # All individual koreader fields are deprecated
    koreader_id = models.IntegerField(**BNULL)
    koreader_authors = models.CharField(max_length=255, **BNULL)
    koreader_md5 = models.CharField(max_length=255, **BNULL)
+    koreader_data_by_hash = models.JSONField(**BNULL)
    isbn = models.CharField(max_length=255, **BNULL)
    pages = models.IntegerField(**BNULL)
    language = models.CharField(max_length=4, **BNULL)
@ -102,6 +105,7 @@ class Book(LongPlayScrobblableMixin):
    first_sentence = models.TextField(**BNULL)
    openlibrary_id = models.CharField(max_length=255, **BNULL)
    locg_slug = models.CharField(max_length=255, **BNULL)
+    comicvine_data = models.JSONField(**BNULL)
    cover = models.ImageField(upload_to="books/covers/", **BNULL)
    cover_small = ImageSpecField(
        source="cover",
@ -146,15 +150,8 @@ class Book(LongPlayScrobblableMixin):
                author_name = self.author.name

            if not data:
-                if self.locg_slug:
-                    data = lookup_comic_by_locg_slug(str(self.locg_slug))
-                else:
-                    data = lookup_comic_from_locg(str(self.title))
-
                if not data:
-                    logger.warn(
-                        f"Book not found on LOCG, checking OL {self.title}"
-                    )
+                    logger.warn(f"rChecking openlibrary for {self.title}")
                    if self.openlibrary_id and force_update:
                        data = lookup_book_from_openlibrary(
                            str(self.openlibrary_id)
@ -163,9 +160,20 @@ class Book(LongPlayScrobblableMixin):
                        data = lookup_book_from_openlibrary(
                            str(self.title), author_name
                        )
-                if not data:
-                    logger.warn(f"Book not found in OL {self.title}")
-                    return
+
+            if not data:
+                if self.locg_slug:
+                    logger.warn(
+                        f"rChecking openlibrary for {self.title} with slug {self.locg_slug}"
+                    )
+                    data = lookup_comic_by_locg_slug(str(self.locg_slug))
+                else:
+                    logger.warn(f"rChecking openlibrary for {self.title}")
+                    data = lookup_comic_from_locg(str(self.title))
+
+            if not data:
+                logger.warn(f"Book not found in any sources: {self.title}")
+                return

            # We can discard the author name from OL for now, we'll lookup details below
            data.pop("ol_author_name", "")
--- a/vrobbler/apps/scrobbles/migrations/0047_scrobble_book_koreader_hash.py
+++ b/vrobbler/apps/scrobbles/migrations/0047_scrobble_book_koreader_hash.py
@ -0,0 +1,18 @@
+# Generated by Django 4.2.9 on 2024-01-29 06:13
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("scrobbles", "0046_scrobble_book_page_data_alter_chartrecord_year"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="scrobble",
+            name="book_koreader_hash",
+            field=models.CharField(blank=True, max_length=50, null=True),
+        ),
+    ]
--- a/vrobbler/apps/scrobbles/models.py
+++ b/vrobbler/apps/scrobbles/models.py
@ -520,9 +520,12 @@ class Scrobble(TimeStampedModel):
    scrobble_log = models.TextField(**BNULL)
    notes = models.TextField(**BNULL)

-    # Fields for keeping track long content like books and games
+    # Fields for keeping track of book data
+    book_koreader_hash = models.CharField(max_length=50, **BNULL)
    book_pages_read = models.IntegerField(**BNULL)
    book_page_data = models.JSONField(**BNULL)
+
+    # Fields for keeping track of video game data
    videogame_save_data = models.FileField(
        upload_to="scrobbles/videogame_save_data/", **BNULL
    )
--- a/vrobbler/settings.py
+++ b/vrobbler/settings.py
@ -65,6 +65,7 @@ LASTFM_API_KEY = os.getenv("VROBBLER_LASTFM_API_KEY")
 LASTFM_SECRET_KEY = os.getenv("VROBBLER_LASTFM_SECRET_KEY")
 IGDB_CLIENT_ID = os.getenv("VROBBLER_IGDB_CLIENT_ID")
 IGDB_CLIENT_SECRET = os.getenv("VROBBLER_IGDB_CLIENT_SECRET")
+COMICVINE_API_KEY = os.getenv("VROBBLER_COMICVINE_API_KEY")
 GEOLOC_ACCURACY = os.getenv("VROBBLER_GEOLOC_ACCURACY", 3)

 DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"