Start to add comicvine lookups and consoldiate koreader data
This commit is contained in:
@ -20,6 +20,7 @@ VROBBLER_THESPORTSDB_API_KEY="<key>"
|
||||
VROBBLER_THEAUDIODB_API_KEY="<key>"
|
||||
VROBBLER_IGDB_CLIENT_ID="<id>"
|
||||
VROBBLER_IGDB_CLIENT_SECRET="<key>"
|
||||
VROBBLER_COMICVINE_API_KEY="<key>"
|
||||
|
||||
# Storages
|
||||
# VROBBLER_DATABASE_URL="postgres://USER:PASSWORD@HOST:PORT/NAME"
|
||||
|
||||
232
vrobbler/apps/books/comicvine.py
Normal file
232
vrobbler/apps/books/comicvine.py
Normal file
@ -0,0 +1,232 @@
|
||||
"""
|
||||
ComicVine API Information & Documentation:
|
||||
https://comicvine.gamespot.com/api/
|
||||
https://comicvine.gamespot.com/api/documentation
|
||||
"""
|
||||
import json
|
||||
import logging
|
||||
from django.conf import settings
|
||||
|
||||
import requests
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ComicVineClient(object):
|
||||
"""
|
||||
Interacts with the ``search`` resource of the ComicVine API. Requires an
|
||||
account on https://comicvine.gamespot.com/ in order to obtain an API key.
|
||||
"""
|
||||
|
||||
# All API requests made by this client will be made to this URL.
|
||||
API_URL = "https://www.comicvine.com/api/search/"
|
||||
|
||||
# A valid User-Agent header must be set in order for our API requests to
|
||||
# be accepted, otherwise our request will be rejected with a
|
||||
# **403 - Forbidden** error.
|
||||
HEADERS = {
|
||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:7.0) "
|
||||
"Gecko/20130825 Firefox/36.0"
|
||||
}
|
||||
|
||||
# A set of valid resource types to return in results.
|
||||
RESOURCE_TYPES = {
|
||||
"character",
|
||||
"issue",
|
||||
"location",
|
||||
"object",
|
||||
"person",
|
||||
"publisher",
|
||||
"story_arc",
|
||||
"team",
|
||||
"volume",
|
||||
}
|
||||
|
||||
def __init__(self, api_key, expire_after=300):
|
||||
"""
|
||||
Store the API key in a class variable, and install the requests cache,
|
||||
configuring it using the ``expire_after`` parameter.
|
||||
|
||||
:param api_key: Your personal ComicVine API key.
|
||||
:type api_key: str
|
||||
:param expire_after: The number of seconds to retain an entry in cache.
|
||||
:type expire_after: int or None
|
||||
"""
|
||||
|
||||
self.api_key = api_key
|
||||
|
||||
def search(self, query, offset=0, limit=10, resources=None):
|
||||
"""
|
||||
Perform a search against the API, using the provided query term. If
|
||||
required, a list of resource types to filter search results to can
|
||||
be included.
|
||||
|
||||
Take the JSON contained in the response and provide it to the custom
|
||||
``Response`` object's constructor. Return the ``Response`` object.
|
||||
|
||||
:param query: The search query with which to make the request.
|
||||
:type query: str
|
||||
:param offset: The index of the first record returned.
|
||||
:type offset: int or None
|
||||
:param limit: How many records to return **(max 10)**
|
||||
:type limit: int or None
|
||||
:param resources: A list of resources to include in the search results.
|
||||
:type resources: list or None
|
||||
:type use_cache: bool
|
||||
|
||||
:return: The response object containing the results of the search
|
||||
query.
|
||||
:rtype: comicvine_search.response.Response
|
||||
"""
|
||||
|
||||
params = self._request_params(query, offset, limit, resources)
|
||||
json_data = self._query_api(params)
|
||||
|
||||
return json_data
|
||||
|
||||
def _request_params(self, query, offset, limit, resources):
|
||||
"""
|
||||
Construct a dict containing the required key-value pairs of parameters
|
||||
required in order to make the API request.
|
||||
|
||||
The documentation for the ``search`` resource can be found at
|
||||
https://comicvine.gamespot.com/api/documentation#toc-0-30.
|
||||
|
||||
Regarding 'limit', as per the documentation:
|
||||
|
||||
The number of results to display per page. This value defaults to
|
||||
10 and can not exceed this number.
|
||||
|
||||
:param query: The search query with which to make the request.
|
||||
:type query: str
|
||||
:param offset: The index of the first record returned.
|
||||
:type offset: int
|
||||
:param limit: How many records to return **(max 10)**
|
||||
:type limit: int
|
||||
:param resources: A list of resources to include in the search results.
|
||||
:type resources: list or None
|
||||
|
||||
:return: A dictionary of request parameters.
|
||||
:rtype: dict
|
||||
"""
|
||||
|
||||
return {
|
||||
"api_key": self.api_key,
|
||||
"format": "json",
|
||||
"limit": min(10, limit), # hard limit of 10
|
||||
"offset": max(0, offset), # cannot provide negative offset
|
||||
"query": query,
|
||||
"resources": self._validate_resources(resources),
|
||||
}
|
||||
|
||||
def _validate_resources(self, resources):
|
||||
"""
|
||||
Provided a list of resources, first convert it to a set and perform an
|
||||
intersection with the set of valid resource types, ``RESOURCE_TYPES``.
|
||||
Return a comma-separted string of the remaining valid resources, or
|
||||
None if the set is empty.
|
||||
|
||||
:param resources: A list of resources to include in the search results.
|
||||
:type resources: list or None
|
||||
|
||||
:return: A comma-separated string of valid resources.
|
||||
:rtype: str or None
|
||||
"""
|
||||
|
||||
if not resources:
|
||||
return None
|
||||
|
||||
valid_resources = self.RESOURCE_TYPES & set(resources)
|
||||
return ",".join(valid_resources) if valid_resources else None
|
||||
|
||||
def _query_api(self, params):
|
||||
"""
|
||||
Query the ComicVine API's ``search`` resource, providing the required
|
||||
headers and parameters with the request. Optionally allow the caller
|
||||
of the function to disable the request cache.
|
||||
|
||||
If an error occurs during the request, handle it accordingly. Upon
|
||||
success, return the JSON from the response.
|
||||
|
||||
:param params: Parameters to include with the request.
|
||||
:type params: dict
|
||||
:param use_cache: Toggle the use of requests_cache.
|
||||
:type use_cache: bool
|
||||
|
||||
:return: The JSON contained in the response.
|
||||
:rtype: dict
|
||||
"""
|
||||
|
||||
# Since we're performing the identical action regardless of whether
|
||||
# or not the request cache is to be used, store the procedure in a
|
||||
# local function to avoid repetition.
|
||||
def __httpget():
|
||||
response = requests.get(
|
||||
self.API_URL, headers=self.HEADERS, params=params
|
||||
)
|
||||
|
||||
if not response.ok:
|
||||
self._handle_http_error(response)
|
||||
|
||||
return response.json()
|
||||
|
||||
return __httpget()
|
||||
|
||||
def _handle_http_error(self, response):
|
||||
"""
|
||||
Provided a ``requests.Response`` object, if the status code is
|
||||
anything other than **200**, we will treat it as an error.
|
||||
|
||||
Using the response's status code, determine which type of exception to
|
||||
raise. Construct an exception message from the response's status code
|
||||
and reason properties before raising the exception.
|
||||
|
||||
:param response: The requests.Response object returned by the HTTP
|
||||
request.
|
||||
:type response: requests.Response
|
||||
|
||||
:raises ComicVineUnauthorizedException: if no API key provided.
|
||||
:raises ComicVineForbiddenException: if no User-Agent header provided.
|
||||
:raises ComicVineApiException: if an unidentified error occurs.
|
||||
"""
|
||||
|
||||
exception = {
|
||||
401: Exception,
|
||||
403: Exception,
|
||||
}.get(response.status_code, Exception)
|
||||
message = f"{response.status_code} {response.reason}"
|
||||
|
||||
raise exception(message)
|
||||
|
||||
|
||||
def lookup_comic_from_comicvine(title: str) -> dict:
|
||||
api_key = getattr(settings, "COMICVINE_API_KEY", "")
|
||||
if not api_key:
|
||||
logger.warn("No ComicVine API key configured, not looking anything up")
|
||||
return {}
|
||||
|
||||
client = ComicVineClient(
|
||||
api_key=getattr(settings, "COMICVINE_API_KEY", None)
|
||||
)
|
||||
return client.search(title)
|
||||
results = [
|
||||
r
|
||||
for r in client.search(title).get("results")
|
||||
if r.get("resource_type") == "volume"
|
||||
]
|
||||
|
||||
return results
|
||||
"""
|
||||
{
|
||||
"title": top.get("title"),
|
||||
"isbn": isbn,
|
||||
"comicvine_id": ol_id,
|
||||
"first_publish_year": top.get("first_publish_year"),
|
||||
"first_sentence": first_sentence,
|
||||
"pages": top.get("number_of_pages_median", None),
|
||||
"cover_url": COVER_URL.format(id=ol_id),
|
||||
"cv_author_id": ol_author_id,
|
||||
"subject_key_list": top.get("subject_key", []),
|
||||
}
|
||||
|
||||
"""
|
||||
@ -1,21 +1,14 @@
|
||||
import codecs
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import sqlite3
|
||||
from datetime import datetime, timedelta
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Iterable, List, Optional
|
||||
|
||||
import pytz
|
||||
import requests
|
||||
from books.models import Author, Book, Page
|
||||
from books.models import Author, Book
|
||||
from books.openlibrary import get_author_openlibrary_id
|
||||
from django.apps import apps
|
||||
from django.contrib.auth import get_user_model
|
||||
from django.db.models import Sum
|
||||
from pylast import httpx, tempfile
|
||||
from scrobbles.utils import timestamp_user_tz_to_utc
|
||||
from stream_sqlite import stream_sqlite
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@ -95,11 +88,16 @@ def create_book_from_row(row: list):
|
||||
run_time = total_pages * Book.AVG_PAGE_READING_SECONDS
|
||||
|
||||
book = Book.objects.create(
|
||||
koreader_md5=row[KoReaderBookColumn.MD5.value],
|
||||
title=row[KoReaderBookColumn.TITLE.value],
|
||||
koreader_id=row[KoReaderBookColumn.ID.value],
|
||||
koreader_authors=author_str,
|
||||
pages=total_pages,
|
||||
koreader_data_by_hash={
|
||||
row[KoReaderBookColumn.MD5.value]: {
|
||||
"title": row[KoReaderBookColumn.TITLE.value],
|
||||
"author_str": author_str,
|
||||
"book_id": row[KoReaderBookColumn.ID.value],
|
||||
"pages": total_pages,
|
||||
}
|
||||
},
|
||||
run_time_seconds=run_time,
|
||||
)
|
||||
book.fix_metadata()
|
||||
@ -122,8 +120,16 @@ def build_book_map(rows) -> dict:
|
||||
book_id_map = {}
|
||||
|
||||
for book_row in rows:
|
||||
if (
|
||||
book_row[KoReaderBookColumn.TITLE.value]
|
||||
== "KOReader Quickstart Guide"
|
||||
):
|
||||
logger.info(
|
||||
"Ignoring the KOReader quickstart guide. No on wants that."
|
||||
)
|
||||
continue
|
||||
book = Book.objects.filter(
|
||||
koreader_md5=book_row[KoReaderBookColumn.MD5.value]
|
||||
koreader_md5__icontains=book_row[KoReaderBookColumn.MD5.value]
|
||||
).first()
|
||||
|
||||
if not book:
|
||||
@ -136,6 +142,7 @@ def build_book_map(rows) -> dict:
|
||||
|
||||
book_id_map[book_row[KoReaderBookColumn.ID.value]] = {
|
||||
"book_id": book.id,
|
||||
"hash": book_row[KoReaderBookColumn.MD5.value],
|
||||
"total_seconds": total_seconds,
|
||||
}
|
||||
return book_id_map
|
||||
@ -289,6 +296,7 @@ def build_scrobbles_from_book_map(
|
||||
timestamp=timestamp,
|
||||
stop_timestamp=stop_timestamp,
|
||||
playback_position_seconds=playback_position_seconds,
|
||||
book_koreader_hash=book_dict.get("hash"),
|
||||
book_page_data=scrobble_page_data,
|
||||
book_pages_read=page_number,
|
||||
in_progress=False,
|
||||
|
||||
@ -0,0 +1,17 @@
|
||||
from django.core.management.base import BaseCommand
|
||||
from books.models import Book
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
def handle(self, *args, **options):
|
||||
for book in Book.objects.all():
|
||||
koreader_data = book.koreader_data_by_hash or {}
|
||||
if book.koreader_md5:
|
||||
koreader_data[book.koreader_md5] = {
|
||||
"title": book.title,
|
||||
"book_id": book.koreader_id,
|
||||
"author_str": book.koreader_authors,
|
||||
"pages": book.pages,
|
||||
}
|
||||
book.koreader_data_by_hash = koreader_data
|
||||
book.save(update_fields=["koreader_data_by_hash"])
|
||||
@ -0,0 +1,62 @@
|
||||
from books.models import Book
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument(
|
||||
"--commit",
|
||||
action="store_true",
|
||||
help="Actually populate data",
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
dry_run = True
|
||||
if options["commit"]:
|
||||
dry_run = False
|
||||
|
||||
pages_to_create = []
|
||||
associated_scrobble = None
|
||||
last_scrobble = None
|
||||
for book in Book.objects.all():
|
||||
for page in book.page_set.all().order_by("number"):
|
||||
notes = ""
|
||||
last_scrobble = associated_scrobble
|
||||
if (
|
||||
not associated_scrobble
|
||||
or page.number > associated_scrobble.book_pages_read
|
||||
):
|
||||
associated_scrobble = page.user.scrobble_set.filter(
|
||||
book=page.book,
|
||||
timestamp__gte=page.start_time,
|
||||
timestamp__lte=page.end_time,
|
||||
).first()
|
||||
|
||||
if (
|
||||
last_scrobble
|
||||
and not associated_scrobble
|
||||
and page.number > last_scrobble.book_pages_read
|
||||
):
|
||||
associated_scrobble = last_scrobble
|
||||
notes = f"Extrapolated reading from scrobble {associated_scrobble.id}"
|
||||
|
||||
pages_to_create.append(
|
||||
ScrobbledPage(
|
||||
scrobble=associated_scrobble,
|
||||
number=page.number,
|
||||
start_time=page.start_time,
|
||||
end_time=page.end_time,
|
||||
duration_seconds=page.duration_seconds,
|
||||
notes=notes,
|
||||
)
|
||||
)
|
||||
|
||||
pages_to_move_len = len(pages_to_create)
|
||||
if dry_run:
|
||||
print(
|
||||
f"Found {pages_to_move_len} to migrate. Use --commit to move them"
|
||||
)
|
||||
return
|
||||
|
||||
ScrobbledPage.objects.bulk_create(pages_to_create)
|
||||
print(f"Migrated {pages_to_move_len} generic pages to scrobbled pages")
|
||||
@ -0,0 +1,28 @@
|
||||
# Generated by Django 4.2.9 on 2024-01-29 05:55
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("books", "0019_alter_book_authors"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name="author",
|
||||
name="comicvine_data",
|
||||
field=models.JSONField(blank=True, null=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="book",
|
||||
name="comicvine_data",
|
||||
field=models.JSONField(blank=True, null=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="book",
|
||||
name="koreader_data_by_hash",
|
||||
field=models.JSONField(blank=True, null=True),
|
||||
),
|
||||
]
|
||||
@ -59,6 +59,7 @@ class Author(TimeStampedModel):
|
||||
wikidata_id = models.CharField(max_length=255, **BNULL)
|
||||
goodreads_id = models.CharField(max_length=255, **BNULL)
|
||||
librarything_id = models.CharField(max_length=255, **BNULL)
|
||||
comicvine_data = models.JSONField(**BNULL)
|
||||
amazon_id = models.CharField(max_length=255, **BNULL)
|
||||
|
||||
def __str__(self):
|
||||
@ -92,9 +93,11 @@ class Book(LongPlayScrobblableMixin):
|
||||
title = models.CharField(max_length=255)
|
||||
authors = models.ManyToManyField(Author, blank=True)
|
||||
goodreads_id = models.CharField(max_length=255, **BNULL)
|
||||
# All individual koreader fields are deprecated
|
||||
koreader_id = models.IntegerField(**BNULL)
|
||||
koreader_authors = models.CharField(max_length=255, **BNULL)
|
||||
koreader_md5 = models.CharField(max_length=255, **BNULL)
|
||||
koreader_data_by_hash = models.JSONField(**BNULL)
|
||||
isbn = models.CharField(max_length=255, **BNULL)
|
||||
pages = models.IntegerField(**BNULL)
|
||||
language = models.CharField(max_length=4, **BNULL)
|
||||
@ -102,6 +105,7 @@ class Book(LongPlayScrobblableMixin):
|
||||
first_sentence = models.TextField(**BNULL)
|
||||
openlibrary_id = models.CharField(max_length=255, **BNULL)
|
||||
locg_slug = models.CharField(max_length=255, **BNULL)
|
||||
comicvine_data = models.JSONField(**BNULL)
|
||||
cover = models.ImageField(upload_to="books/covers/", **BNULL)
|
||||
cover_small = ImageSpecField(
|
||||
source="cover",
|
||||
@ -146,15 +150,8 @@ class Book(LongPlayScrobblableMixin):
|
||||
author_name = self.author.name
|
||||
|
||||
if not data:
|
||||
if self.locg_slug:
|
||||
data = lookup_comic_by_locg_slug(str(self.locg_slug))
|
||||
else:
|
||||
data = lookup_comic_from_locg(str(self.title))
|
||||
|
||||
if not data:
|
||||
logger.warn(
|
||||
f"Book not found on LOCG, checking OL {self.title}"
|
||||
)
|
||||
logger.warn(f"rChecking openlibrary for {self.title}")
|
||||
if self.openlibrary_id and force_update:
|
||||
data = lookup_book_from_openlibrary(
|
||||
str(self.openlibrary_id)
|
||||
@ -163,9 +160,20 @@ class Book(LongPlayScrobblableMixin):
|
||||
data = lookup_book_from_openlibrary(
|
||||
str(self.title), author_name
|
||||
)
|
||||
if not data:
|
||||
logger.warn(f"Book not found in OL {self.title}")
|
||||
return
|
||||
|
||||
if not data:
|
||||
if self.locg_slug:
|
||||
logger.warn(
|
||||
f"rChecking openlibrary for {self.title} with slug {self.locg_slug}"
|
||||
)
|
||||
data = lookup_comic_by_locg_slug(str(self.locg_slug))
|
||||
else:
|
||||
logger.warn(f"rChecking openlibrary for {self.title}")
|
||||
data = lookup_comic_from_locg(str(self.title))
|
||||
|
||||
if not data:
|
||||
logger.warn(f"Book not found in any sources: {self.title}")
|
||||
return
|
||||
|
||||
# We can discard the author name from OL for now, we'll lookup details below
|
||||
data.pop("ol_author_name", "")
|
||||
|
||||
@ -0,0 +1,18 @@
|
||||
# Generated by Django 4.2.9 on 2024-01-29 06:13
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("scrobbles", "0046_scrobble_book_page_data_alter_chartrecord_year"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name="scrobble",
|
||||
name="book_koreader_hash",
|
||||
field=models.CharField(blank=True, max_length=50, null=True),
|
||||
),
|
||||
]
|
||||
@ -520,9 +520,12 @@ class Scrobble(TimeStampedModel):
|
||||
scrobble_log = models.TextField(**BNULL)
|
||||
notes = models.TextField(**BNULL)
|
||||
|
||||
# Fields for keeping track long content like books and games
|
||||
# Fields for keeping track of book data
|
||||
book_koreader_hash = models.CharField(max_length=50, **BNULL)
|
||||
book_pages_read = models.IntegerField(**BNULL)
|
||||
book_page_data = models.JSONField(**BNULL)
|
||||
|
||||
# Fields for keeping track of video game data
|
||||
videogame_save_data = models.FileField(
|
||||
upload_to="scrobbles/videogame_save_data/", **BNULL
|
||||
)
|
||||
|
||||
@ -65,6 +65,7 @@ LASTFM_API_KEY = os.getenv("VROBBLER_LASTFM_API_KEY")
|
||||
LASTFM_SECRET_KEY = os.getenv("VROBBLER_LASTFM_SECRET_KEY")
|
||||
IGDB_CLIENT_ID = os.getenv("VROBBLER_IGDB_CLIENT_ID")
|
||||
IGDB_CLIENT_SECRET = os.getenv("VROBBLER_IGDB_CLIENT_SECRET")
|
||||
COMICVINE_API_KEY = os.getenv("VROBBLER_COMICVINE_API_KEY")
|
||||
GEOLOC_ACCURACY = os.getenv("VROBBLER_GEOLOC_ACCURACY", 3)
|
||||
|
||||
DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"
|
||||
|
||||
Reference in New Issue
Block a user