Start to add comicvine lookups and consoldiate koreader data

This commit is contained in:
2024-01-29 01:48:56 -05:00
parent 70c7eda415
commit 9e2d7a6bc0
10 changed files with 403 additions and 25 deletions

View File

@ -20,6 +20,7 @@ VROBBLER_THESPORTSDB_API_KEY="<key>"
VROBBLER_THEAUDIODB_API_KEY="<key>"
VROBBLER_IGDB_CLIENT_ID="<id>"
VROBBLER_IGDB_CLIENT_SECRET="<key>"
VROBBLER_COMICVINE_API_KEY="<key>"
# Storages
# VROBBLER_DATABASE_URL="postgres://USER:PASSWORD@HOST:PORT/NAME"

View File

@ -0,0 +1,232 @@
"""
ComicVine API Information & Documentation:
https://comicvine.gamespot.com/api/
https://comicvine.gamespot.com/api/documentation
"""
import json
import logging
from django.conf import settings
import requests
logger = logging.getLogger(__name__)
class ComicVineClient(object):
"""
Interacts with the ``search`` resource of the ComicVine API. Requires an
account on https://comicvine.gamespot.com/ in order to obtain an API key.
"""
# All API requests made by this client will be made to this URL.
API_URL = "https://www.comicvine.com/api/search/"
# A valid User-Agent header must be set in order for our API requests to
# be accepted, otherwise our request will be rejected with a
# **403 - Forbidden** error.
HEADERS = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:7.0) "
"Gecko/20130825 Firefox/36.0"
}
# A set of valid resource types to return in results.
RESOURCE_TYPES = {
"character",
"issue",
"location",
"object",
"person",
"publisher",
"story_arc",
"team",
"volume",
}
def __init__(self, api_key, expire_after=300):
"""
Store the API key in a class variable, and install the requests cache,
configuring it using the ``expire_after`` parameter.
:param api_key: Your personal ComicVine API key.
:type api_key: str
:param expire_after: The number of seconds to retain an entry in cache.
:type expire_after: int or None
"""
self.api_key = api_key
def search(self, query, offset=0, limit=10, resources=None):
"""
Perform a search against the API, using the provided query term. If
required, a list of resource types to filter search results to can
be included.
Take the JSON contained in the response and provide it to the custom
``Response`` object's constructor. Return the ``Response`` object.
:param query: The search query with which to make the request.
:type query: str
:param offset: The index of the first record returned.
:type offset: int or None
:param limit: How many records to return **(max 10)**
:type limit: int or None
:param resources: A list of resources to include in the search results.
:type resources: list or None
:type use_cache: bool
:return: The response object containing the results of the search
query.
:rtype: comicvine_search.response.Response
"""
params = self._request_params(query, offset, limit, resources)
json_data = self._query_api(params)
return json_data
def _request_params(self, query, offset, limit, resources):
"""
Construct a dict containing the required key-value pairs of parameters
required in order to make the API request.
The documentation for the ``search`` resource can be found at
https://comicvine.gamespot.com/api/documentation#toc-0-30.
Regarding 'limit', as per the documentation:
The number of results to display per page. This value defaults to
10 and can not exceed this number.
:param query: The search query with which to make the request.
:type query: str
:param offset: The index of the first record returned.
:type offset: int
:param limit: How many records to return **(max 10)**
:type limit: int
:param resources: A list of resources to include in the search results.
:type resources: list or None
:return: A dictionary of request parameters.
:rtype: dict
"""
return {
"api_key": self.api_key,
"format": "json",
"limit": min(10, limit), # hard limit of 10
"offset": max(0, offset), # cannot provide negative offset
"query": query,
"resources": self._validate_resources(resources),
}
def _validate_resources(self, resources):
"""
Provided a list of resources, first convert it to a set and perform an
intersection with the set of valid resource types, ``RESOURCE_TYPES``.
Return a comma-separted string of the remaining valid resources, or
None if the set is empty.
:param resources: A list of resources to include in the search results.
:type resources: list or None
:return: A comma-separated string of valid resources.
:rtype: str or None
"""
if not resources:
return None
valid_resources = self.RESOURCE_TYPES & set(resources)
return ",".join(valid_resources) if valid_resources else None
def _query_api(self, params):
"""
Query the ComicVine API's ``search`` resource, providing the required
headers and parameters with the request. Optionally allow the caller
of the function to disable the request cache.
If an error occurs during the request, handle it accordingly. Upon
success, return the JSON from the response.
:param params: Parameters to include with the request.
:type params: dict
:param use_cache: Toggle the use of requests_cache.
:type use_cache: bool
:return: The JSON contained in the response.
:rtype: dict
"""
# Since we're performing the identical action regardless of whether
# or not the request cache is to be used, store the procedure in a
# local function to avoid repetition.
def __httpget():
response = requests.get(
self.API_URL, headers=self.HEADERS, params=params
)
if not response.ok:
self._handle_http_error(response)
return response.json()
return __httpget()
def _handle_http_error(self, response):
"""
Provided a ``requests.Response`` object, if the status code is
anything other than **200**, we will treat it as an error.
Using the response's status code, determine which type of exception to
raise. Construct an exception message from the response's status code
and reason properties before raising the exception.
:param response: The requests.Response object returned by the HTTP
request.
:type response: requests.Response
:raises ComicVineUnauthorizedException: if no API key provided.
:raises ComicVineForbiddenException: if no User-Agent header provided.
:raises ComicVineApiException: if an unidentified error occurs.
"""
exception = {
401: Exception,
403: Exception,
}.get(response.status_code, Exception)
message = f"{response.status_code} {response.reason}"
raise exception(message)
def lookup_comic_from_comicvine(title: str) -> dict:
api_key = getattr(settings, "COMICVINE_API_KEY", "")
if not api_key:
logger.warn("No ComicVine API key configured, not looking anything up")
return {}
client = ComicVineClient(
api_key=getattr(settings, "COMICVINE_API_KEY", None)
)
return client.search(title)
results = [
r
for r in client.search(title).get("results")
if r.get("resource_type") == "volume"
]
return results
"""
{
"title": top.get("title"),
"isbn": isbn,
"comicvine_id": ol_id,
"first_publish_year": top.get("first_publish_year"),
"first_sentence": first_sentence,
"pages": top.get("number_of_pages_median", None),
"cover_url": COVER_URL.format(id=ol_id),
"cv_author_id": ol_author_id,
"subject_key_list": top.get("subject_key", []),
}
"""

View File

@ -1,21 +1,14 @@
import codecs
import logging
import os
import re
import sqlite3
from datetime import datetime, timedelta
from datetime import datetime
from enum import Enum
from typing import Iterable, List, Optional
import pytz
import requests
from books.models import Author, Book, Page
from books.models import Author, Book
from books.openlibrary import get_author_openlibrary_id
from django.apps import apps
from django.contrib.auth import get_user_model
from django.db.models import Sum
from pylast import httpx, tempfile
from scrobbles.utils import timestamp_user_tz_to_utc
from stream_sqlite import stream_sqlite
logger = logging.getLogger(__name__)
@ -95,11 +88,16 @@ def create_book_from_row(row: list):
run_time = total_pages * Book.AVG_PAGE_READING_SECONDS
book = Book.objects.create(
koreader_md5=row[KoReaderBookColumn.MD5.value],
title=row[KoReaderBookColumn.TITLE.value],
koreader_id=row[KoReaderBookColumn.ID.value],
koreader_authors=author_str,
pages=total_pages,
koreader_data_by_hash={
row[KoReaderBookColumn.MD5.value]: {
"title": row[KoReaderBookColumn.TITLE.value],
"author_str": author_str,
"book_id": row[KoReaderBookColumn.ID.value],
"pages": total_pages,
}
},
run_time_seconds=run_time,
)
book.fix_metadata()
@ -122,8 +120,16 @@ def build_book_map(rows) -> dict:
book_id_map = {}
for book_row in rows:
if (
book_row[KoReaderBookColumn.TITLE.value]
== "KOReader Quickstart Guide"
):
logger.info(
"Ignoring the KOReader quickstart guide. No on wants that."
)
continue
book = Book.objects.filter(
koreader_md5=book_row[KoReaderBookColumn.MD5.value]
koreader_md5__icontains=book_row[KoReaderBookColumn.MD5.value]
).first()
if not book:
@ -136,6 +142,7 @@ def build_book_map(rows) -> dict:
book_id_map[book_row[KoReaderBookColumn.ID.value]] = {
"book_id": book.id,
"hash": book_row[KoReaderBookColumn.MD5.value],
"total_seconds": total_seconds,
}
return book_id_map
@ -289,6 +296,7 @@ def build_scrobbles_from_book_map(
timestamp=timestamp,
stop_timestamp=stop_timestamp,
playback_position_seconds=playback_position_seconds,
book_koreader_hash=book_dict.get("hash"),
book_page_data=scrobble_page_data,
book_pages_read=page_number,
in_progress=False,

View File

@ -0,0 +1,17 @@
from django.core.management.base import BaseCommand
from books.models import Book
class Command(BaseCommand):
def handle(self, *args, **options):
for book in Book.objects.all():
koreader_data = book.koreader_data_by_hash or {}
if book.koreader_md5:
koreader_data[book.koreader_md5] = {
"title": book.title,
"book_id": book.koreader_id,
"author_str": book.koreader_authors,
"pages": book.pages,
}
book.koreader_data_by_hash = koreader_data
book.save(update_fields=["koreader_data_by_hash"])

View File

@ -0,0 +1,62 @@
from books.models import Book
from django.core.management.base import BaseCommand
class Command(BaseCommand):
def add_arguments(self, parser):
parser.add_argument(
"--commit",
action="store_true",
help="Actually populate data",
)
def handle(self, *args, **options):
dry_run = True
if options["commit"]:
dry_run = False
pages_to_create = []
associated_scrobble = None
last_scrobble = None
for book in Book.objects.all():
for page in book.page_set.all().order_by("number"):
notes = ""
last_scrobble = associated_scrobble
if (
not associated_scrobble
or page.number > associated_scrobble.book_pages_read
):
associated_scrobble = page.user.scrobble_set.filter(
book=page.book,
timestamp__gte=page.start_time,
timestamp__lte=page.end_time,
).first()
if (
last_scrobble
and not associated_scrobble
and page.number > last_scrobble.book_pages_read
):
associated_scrobble = last_scrobble
notes = f"Extrapolated reading from scrobble {associated_scrobble.id}"
pages_to_create.append(
ScrobbledPage(
scrobble=associated_scrobble,
number=page.number,
start_time=page.start_time,
end_time=page.end_time,
duration_seconds=page.duration_seconds,
notes=notes,
)
)
pages_to_move_len = len(pages_to_create)
if dry_run:
print(
f"Found {pages_to_move_len} to migrate. Use --commit to move them"
)
return
ScrobbledPage.objects.bulk_create(pages_to_create)
print(f"Migrated {pages_to_move_len} generic pages to scrobbled pages")

View File

@ -0,0 +1,28 @@
# Generated by Django 4.2.9 on 2024-01-29 05:55
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("books", "0019_alter_book_authors"),
]
operations = [
migrations.AddField(
model_name="author",
name="comicvine_data",
field=models.JSONField(blank=True, null=True),
),
migrations.AddField(
model_name="book",
name="comicvine_data",
field=models.JSONField(blank=True, null=True),
),
migrations.AddField(
model_name="book",
name="koreader_data_by_hash",
field=models.JSONField(blank=True, null=True),
),
]

View File

@ -59,6 +59,7 @@ class Author(TimeStampedModel):
wikidata_id = models.CharField(max_length=255, **BNULL)
goodreads_id = models.CharField(max_length=255, **BNULL)
librarything_id = models.CharField(max_length=255, **BNULL)
comicvine_data = models.JSONField(**BNULL)
amazon_id = models.CharField(max_length=255, **BNULL)
def __str__(self):
@ -92,9 +93,11 @@ class Book(LongPlayScrobblableMixin):
title = models.CharField(max_length=255)
authors = models.ManyToManyField(Author, blank=True)
goodreads_id = models.CharField(max_length=255, **BNULL)
# All individual koreader fields are deprecated
koreader_id = models.IntegerField(**BNULL)
koreader_authors = models.CharField(max_length=255, **BNULL)
koreader_md5 = models.CharField(max_length=255, **BNULL)
koreader_data_by_hash = models.JSONField(**BNULL)
isbn = models.CharField(max_length=255, **BNULL)
pages = models.IntegerField(**BNULL)
language = models.CharField(max_length=4, **BNULL)
@ -102,6 +105,7 @@ class Book(LongPlayScrobblableMixin):
first_sentence = models.TextField(**BNULL)
openlibrary_id = models.CharField(max_length=255, **BNULL)
locg_slug = models.CharField(max_length=255, **BNULL)
comicvine_data = models.JSONField(**BNULL)
cover = models.ImageField(upload_to="books/covers/", **BNULL)
cover_small = ImageSpecField(
source="cover",
@ -146,15 +150,8 @@ class Book(LongPlayScrobblableMixin):
author_name = self.author.name
if not data:
if self.locg_slug:
data = lookup_comic_by_locg_slug(str(self.locg_slug))
else:
data = lookup_comic_from_locg(str(self.title))
if not data:
logger.warn(
f"Book not found on LOCG, checking OL {self.title}"
)
logger.warn(f"rChecking openlibrary for {self.title}")
if self.openlibrary_id and force_update:
data = lookup_book_from_openlibrary(
str(self.openlibrary_id)
@ -163,9 +160,20 @@ class Book(LongPlayScrobblableMixin):
data = lookup_book_from_openlibrary(
str(self.title), author_name
)
if not data:
logger.warn(f"Book not found in OL {self.title}")
return
if not data:
if self.locg_slug:
logger.warn(
f"rChecking openlibrary for {self.title} with slug {self.locg_slug}"
)
data = lookup_comic_by_locg_slug(str(self.locg_slug))
else:
logger.warn(f"rChecking openlibrary for {self.title}")
data = lookup_comic_from_locg(str(self.title))
if not data:
logger.warn(f"Book not found in any sources: {self.title}")
return
# We can discard the author name from OL for now, we'll lookup details below
data.pop("ol_author_name", "")

View File

@ -0,0 +1,18 @@
# Generated by Django 4.2.9 on 2024-01-29 06:13
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("scrobbles", "0046_scrobble_book_page_data_alter_chartrecord_year"),
]
operations = [
migrations.AddField(
model_name="scrobble",
name="book_koreader_hash",
field=models.CharField(blank=True, max_length=50, null=True),
),
]

View File

@ -520,9 +520,12 @@ class Scrobble(TimeStampedModel):
scrobble_log = models.TextField(**BNULL)
notes = models.TextField(**BNULL)
# Fields for keeping track long content like books and games
# Fields for keeping track of book data
book_koreader_hash = models.CharField(max_length=50, **BNULL)
book_pages_read = models.IntegerField(**BNULL)
book_page_data = models.JSONField(**BNULL)
# Fields for keeping track of video game data
videogame_save_data = models.FileField(
upload_to="scrobbles/videogame_save_data/", **BNULL
)

View File

@ -65,6 +65,7 @@ LASTFM_API_KEY = os.getenv("VROBBLER_LASTFM_API_KEY")
LASTFM_SECRET_KEY = os.getenv("VROBBLER_LASTFM_SECRET_KEY")
IGDB_CLIENT_ID = os.getenv("VROBBLER_IGDB_CLIENT_ID")
IGDB_CLIENT_SECRET = os.getenv("VROBBLER_IGDB_CLIENT_SECRET")
COMICVINE_API_KEY = os.getenv("VROBBLER_COMICVINE_API_KEY")
GEOLOC_ACCURACY = os.getenv("VROBBLER_GEOLOC_ACCURACY", 3)
DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"