Compare commits
5 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| d5830f5cd1 | |||
| c71b51fdb8 | |||
| 935d059a20 | |||
| 25776eb495 | |||
| 5ac4625af9 |
49
PROJECT.org
49
PROJECT.org
@ -442,15 +442,6 @@ displayed in the template.
|
||||
|
||||
** TODO [#B] Add CSV endpoint for book scrobbles that LibraryThing can ingest :personal:project:books:feature:export:
|
||||
https://app.todoist.com/app/task/add-a-csv-endpoint-for-users-book-reads-that-library-thing-can-ingest-6X7QPMRp265xMXqg#comment-6X7QrXq6gJjMP4hg
|
||||
** TODO [#B] Scrape ComicBookRoundUp ratings for comic book metadata :vrobbler:books:feature:comicbook:personal:project:
|
||||
|
||||
- Note taken on [2025-09-25 Thu 10:51]
|
||||
|
||||
As an example https://comicbookroundup.com/comic-books/reviews/humanoids-publishing/the-history-of-science-fiction
|
||||
** TODO [#B] Find page numbers for comic books from ComicVine :feature:books:
|
||||
:PROPERTIES:
|
||||
:ID: 79f867c3-1288-4143-b6bf-2a452983ee9f
|
||||
:END:
|
||||
** TODO [#B] Make IMAP and WebDAV configurable :webdav:feature:imap:importers:
|
||||
:PROPERTIES:
|
||||
:ID: b1426d92-2feb-4d15-9738-d5b7b0594f96
|
||||
@ -518,6 +509,46 @@ log a warning and move on.
|
||||
We should have a global view `/favorites/` that shows the logged in users's
|
||||
favorited media objects.
|
||||
|
||||
** TODO [#B] Scrape ComicBookRoundUp ratings for comic book metadata :vrobbler:books:feature:comicbook:personal:project:
|
||||
|
||||
- Note taken on [2025-09-25 Thu 10:51]
|
||||
|
||||
As an example https://comicbookroundup.com/comic-books/reviews/humanoids-publishing/the-history-of-science-fiction
|
||||
** TODO [#B] Find page numbers for comic books from ComicVine :feature:books:
|
||||
:PROPERTIES:
|
||||
:ID: 79f867c3-1288-4143-b6bf-2a452983ee9f
|
||||
:END:
|
||||
* Version 51.2 [2/2]
|
||||
** DONE [#A] Fix bug where last page of book gets separate scrobble :bug:books:importers:koreader:
|
||||
:PROPERTIES:
|
||||
:ID: e13e0b4c-461e-e5a9-c685-b972f4e262e5
|
||||
:END:
|
||||
|
||||
*** Description
|
||||
|
||||
The new KoReader code is working great to import books with the correct timezone
|
||||
and what-not. But it has a weird artifact of creating one extra scrobble for the
|
||||
last page read. Need to button that up.
|
||||
|
||||
** DONE [#B] Fix metadata scraping for books :books:metadata:
|
||||
:PROPERTIES:
|
||||
:ID: ea416a69-a8a8-4d05-b7d4-0a3470820e34
|
||||
:END:
|
||||
|
||||
|
||||
* Version 51.1 [1/1]
|
||||
** DONE [#A] Fix scrobbling comic books :books:scrobbles:bug:
|
||||
:PROPERTIES:
|
||||
:ID: 8dfbff19-3fa4-f3b8-21c7-7a416498000c
|
||||
:END:
|
||||
|
||||
*** Description
|
||||
|
||||
At some point logdata and log got confused, and now when you try
|
||||
to scrobble a comic book, it just throws errors. We should look
|
||||
into where the confusion happened and fix it.
|
||||
|
||||
|
||||
* Version 51.0 [3/3]
|
||||
** DONE [#B] Fix koreader scrobble imports to use DST properly :bug:books:imports:
|
||||
:PROPERTIES:
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "vrobbler"
|
||||
version = "51.0"
|
||||
version = "51.2"
|
||||
description = ""
|
||||
authors = ["Colin Powell <colin@unbl.ink>"]
|
||||
|
||||
|
||||
@ -217,7 +217,6 @@ def build_scrobbles_from_book_map(book_map: dict, user: "User") -> list["Scrobbl
|
||||
pages_not_found.append(book_id)
|
||||
continue
|
||||
|
||||
should_create_scrobble = False
|
||||
scrobble_page_data = {}
|
||||
playback_position_seconds = 0
|
||||
prev_page_stats = {}
|
||||
@ -247,32 +246,39 @@ def build_scrobbles_from_book_map(book_map: dict, user: "User") -> list["Scrobbl
|
||||
end_of_reading = pages_processed == total_pages_read
|
||||
big_jump_to_this_page = (page_number - last_page_number) > 10
|
||||
is_session_gap = seconds_from_last_page > SESSION_GAP_SECONDS
|
||||
if (is_session_gap and not big_jump_to_this_page) or end_of_reading:
|
||||
should_create_scrobble = True
|
||||
should_create_scrobble = (
|
||||
is_session_gap and not big_jump_to_this_page
|
||||
) or end_of_reading
|
||||
|
||||
# Always accumulate the current page first
|
||||
scrobble_page_data[page_number] = stats
|
||||
|
||||
if should_create_scrobble:
|
||||
if not scrobble_page_data:
|
||||
scrobble_page_data[page_number] = stats
|
||||
# For end-of-reading, the current page is already accumulated
|
||||
# and belongs in this scrobble. For a session gap, remove the
|
||||
# current page from this scrobble — it starts a new session.
|
||||
if is_session_gap and not end_of_reading:
|
||||
del scrobble_page_data[page_number]
|
||||
|
||||
scrobble_page_data = dict(
|
||||
sorted(
|
||||
scrobble_page_data.items(),
|
||||
key=lambda x: x[1]["start_ts"],
|
||||
)
|
||||
)
|
||||
try:
|
||||
first_page = scrobble_page_data.get(
|
||||
list(scrobble_page_data.keys())[0]
|
||||
)
|
||||
last_page = scrobble_page_data.get(
|
||||
list(scrobble_page_data.keys())[-1]
|
||||
)
|
||||
except IndexError:
|
||||
|
||||
if not scrobble_page_data:
|
||||
logger.error(
|
||||
"Could not process book, no page data found",
|
||||
extra={"scrobble_page_data": scrobble_page_data},
|
||||
)
|
||||
continue
|
||||
|
||||
first_page = next(iter(scrobble_page_data.values()))
|
||||
last_page = scrobble_page_data[
|
||||
list(scrobble_page_data.keys())[-1]
|
||||
]
|
||||
|
||||
timestamp = user.profile.get_timestamp_with_tz(
|
||||
datetime.fromtimestamp(
|
||||
int(first_page.get("start_ts"))
|
||||
@ -319,90 +325,18 @@ def build_scrobbles_from_book_map(book_map: dict, user: "User") -> list["Scrobbl
|
||||
timezone=tz,
|
||||
)
|
||||
)
|
||||
# Then start over
|
||||
should_create_scrobble = False
|
||||
# Then start over for the next session
|
||||
playback_position_seconds = 0
|
||||
scrobble_page_data = {}
|
||||
|
||||
# We accumulate pages for the scrobble until we should create a new one
|
||||
scrobble_page_data[page_number] = stats
|
||||
# For session gaps, re-add the current page as the
|
||||
# beginning of the next session's accumulation
|
||||
if is_session_gap and not end_of_reading:
|
||||
scrobble_page_data[page_number] = stats
|
||||
|
||||
last_page_number = page_number
|
||||
prev_page_stats = stats
|
||||
|
||||
# Handle leftover pages that never triggered a session gap
|
||||
if scrobble_page_data:
|
||||
scrobble_page_data = dict(
|
||||
sorted(
|
||||
scrobble_page_data.items(),
|
||||
key=lambda x: x[1]["start_ts"],
|
||||
)
|
||||
)
|
||||
try:
|
||||
first_page = scrobble_page_data.get(
|
||||
list(scrobble_page_data.keys())[0]
|
||||
)
|
||||
last_page = scrobble_page_data.get(
|
||||
list(scrobble_page_data.keys())[-1]
|
||||
)
|
||||
except IndexError:
|
||||
logger.error(
|
||||
"Could not process book, no page data found",
|
||||
extra={"scrobble_page_data": scrobble_page_data},
|
||||
)
|
||||
continue
|
||||
|
||||
playback_position_seconds = sum(
|
||||
p["duration"] for p in scrobble_page_data.values()
|
||||
)
|
||||
|
||||
timestamp = user.profile.get_timestamp_with_tz(
|
||||
datetime.fromtimestamp(
|
||||
int(first_page.get("start_ts"))
|
||||
)
|
||||
)
|
||||
stop_timestamp = user.profile.get_timestamp_with_tz(
|
||||
datetime.fromtimestamp(
|
||||
int(last_page.get("end_ts"))
|
||||
)
|
||||
)
|
||||
|
||||
scrobble = Scrobble.objects.filter(
|
||||
timestamp=timestamp,
|
||||
book_id=book_id,
|
||||
user_id=user.id,
|
||||
).first()
|
||||
|
||||
if not scrobble:
|
||||
logger.info(
|
||||
f"Queueing scrobble for {book_id}, page {list(scrobble_page_data.keys())[0]}"
|
||||
)
|
||||
log_data = {
|
||||
"koreader_hash": book_dict.get("hash"),
|
||||
"page_data": scrobble_page_data,
|
||||
"pages_read": len(scrobble_page_data.keys()),
|
||||
}
|
||||
if hasattr(timestamp.tzinfo, "tzname"):
|
||||
tz = timestamp.tzinfo.tzname
|
||||
if hasattr(timestamp.tzinfo, "name"):
|
||||
tz = timestamp.tzinfo.name
|
||||
scrobbles_to_create.append(
|
||||
Scrobble(
|
||||
book_id=book_id,
|
||||
user_id=user.id,
|
||||
source="KOReader",
|
||||
media_type=Scrobble.MediaType.BOOK,
|
||||
timestamp=timestamp,
|
||||
log=log_data,
|
||||
stop_timestamp=stop_timestamp,
|
||||
playback_position_seconds=playback_position_seconds,
|
||||
in_progress=False,
|
||||
played_to_completion=True,
|
||||
long_play_complete=False,
|
||||
timezone=tz,
|
||||
)
|
||||
)
|
||||
|
||||
if pages_not_found:
|
||||
logger.info(f"Pages not found for books: {set(pages_not_found)}")
|
||||
return scrobbles_to_create
|
||||
|
||||
282
vrobbler/apps/books/management/commands/cleanup_book_metadata.py
Normal file
282
vrobbler/apps/books/management/commands/cleanup_book_metadata.py
Normal file
@ -0,0 +1,282 @@
|
||||
import logging
|
||||
import time
|
||||
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.db import transaction
|
||||
|
||||
from books.constants import READCOMICSONLINE_URL
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
MISSING_ALL = [
|
||||
"cover",
|
||||
"summary",
|
||||
"isbn",
|
||||
"pages",
|
||||
"language",
|
||||
"publisher",
|
||||
"publish_year",
|
||||
]
|
||||
|
||||
MISSING_GROUPS = {
|
||||
"cover": lambda b: not bool(b.cover),
|
||||
"summary": lambda b: not b.summary,
|
||||
"isbn": lambda b: not b.isbn_13 and not b.isbn_10,
|
||||
"pages": lambda b: b.pages is None,
|
||||
"language": lambda b: not b.language,
|
||||
"publisher": lambda b: not b.publisher,
|
||||
"publish_year": lambda b: b.first_publish_year is None,
|
||||
}
|
||||
|
||||
|
||||
def _book_matches(book, flags):
|
||||
if not flags:
|
||||
return False
|
||||
for flag in flags:
|
||||
fn = MISSING_GROUPS.get(flag)
|
||||
if fn and fn(book):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "Backfill missing metadata on books from Google Books, OpenLibrary, and ComicVine"
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument(
|
||||
"--commit",
|
||||
action="store_true",
|
||||
help="Commit changes to the database",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--batch-size",
|
||||
type=int,
|
||||
default=100,
|
||||
help="Number of books to process per batch (default: 100)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--sleep",
|
||||
type=float,
|
||||
default=0.5,
|
||||
help="Seconds to sleep between API calls (default: 0.5)",
|
||||
)
|
||||
for flag in MISSING_ALL:
|
||||
parser.add_argument(
|
||||
f"--missing-{flag}",
|
||||
dest="missing_flags",
|
||||
action="append_const",
|
||||
const=flag,
|
||||
help=f"Process books missing {flag}",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--comics-only",
|
||||
action="store_true",
|
||||
help="Only process books with a readcomicsonline.ru URL",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--all",
|
||||
action="store_true",
|
||||
dest="all_missing",
|
||||
help="Process books missing any metadata field",
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
from books.models import Book
|
||||
|
||||
commit = options["commit"]
|
||||
batch_size = options["batch_size"]
|
||||
sleep_secs = options["sleep"]
|
||||
flags = options.get("missing_flags") or []
|
||||
comics_only = options["comics_only"]
|
||||
all_missing = options["all_missing"]
|
||||
|
||||
if all_missing:
|
||||
flags = MISSING_ALL
|
||||
|
||||
if not flags and not comics_only:
|
||||
self.stdout.write(
|
||||
"No filters specified. Use --all, --missing-*, or --comics-only."
|
||||
)
|
||||
return
|
||||
|
||||
qs = Book.objects.all()
|
||||
if comics_only:
|
||||
qs = qs.filter(readcomics_url__isnull=False)
|
||||
|
||||
if flags:
|
||||
qs = [b for b in qs.iterator() if _book_matches(b, flags)]
|
||||
else:
|
||||
qs = list(qs)
|
||||
|
||||
total = len(qs)
|
||||
self.stdout.write(f"Found {total} books to process")
|
||||
|
||||
if not commit:
|
||||
self.stdout.write(
|
||||
"Dry run — no API calls will be made. Use --commit to run lookups."
|
||||
)
|
||||
return
|
||||
|
||||
enriched = 0
|
||||
skipped = 0
|
||||
stats = {
|
||||
"cover_fixed": 0,
|
||||
"summary_fixed": 0,
|
||||
"isbn_fixed": 0,
|
||||
"pages_fixed": 0,
|
||||
"language_fixed": 0,
|
||||
"publisher_fixed": 0,
|
||||
"publish_year_fixed": 0,
|
||||
}
|
||||
|
||||
for batch_num, offset in enumerate(range(0, len(qs), batch_size)):
|
||||
batch = qs[offset : offset + batch_size]
|
||||
for book in batch:
|
||||
result = self._enrich_book(book, sleep_secs)
|
||||
if result:
|
||||
enriched += 1
|
||||
for key in stats:
|
||||
if result.get(key):
|
||||
stats[key] += 1
|
||||
else:
|
||||
skipped += 1
|
||||
|
||||
self.stdout.write(
|
||||
f" Batch {batch_num + 1}: {offset + len(batch)}/{total} — "
|
||||
f"enriched: {enriched}, skipped: {skipped}"
|
||||
)
|
||||
|
||||
self.stdout.write(
|
||||
f"\nResults (commit={commit}):\n"
|
||||
f" Books enriched: {enriched}\n"
|
||||
f" Books skipped: {skipped}\n"
|
||||
f" Covers fixed: {stats['cover_fixed']}\n"
|
||||
f" Summaries fixed:{stats['summary_fixed']}\n"
|
||||
f" ISBNs fixed: {stats['isbn_fixed']}\n"
|
||||
f" Pages fixed: {stats['pages_fixed']}\n"
|
||||
f" Languages fixed:{stats['language_fixed']}\n"
|
||||
f" Publishers fixed:{stats['publisher_fixed']}\n"
|
||||
f" Publish yrs fixed: {stats['publish_year_fixed']}"
|
||||
)
|
||||
|
||||
def _enrich_book(self, book, sleep_secs):
|
||||
from books.sources.comicvine import lookup_comic_from_comicvine
|
||||
from books.sources.google import lookup_book_from_google
|
||||
from books.sources.openlibrary import lookup_book_from_openlibrary as lookup_book_from_ol
|
||||
|
||||
title = book.original_title or book.title
|
||||
author_name = book.author.name if book.author else None
|
||||
book_dict = {}
|
||||
|
||||
is_comic = bool(book.readcomics_url) or (
|
||||
book.issue_number is not None or book.volume_number is not None
|
||||
)
|
||||
if is_comic and READCOMICSONLINE_URL in (book.readcomics_url or ""):
|
||||
cv_data = lookup_comic_from_comicvine(title)
|
||||
if cv_data:
|
||||
book_dict.update(cv_data)
|
||||
|
||||
ol_data = lookup_book_from_ol(title, author=author_name)
|
||||
time.sleep(sleep_secs)
|
||||
google_data = lookup_book_from_google(title)
|
||||
|
||||
if ol_data:
|
||||
for k, v in ol_data.items():
|
||||
book_dict.setdefault(k, v)
|
||||
|
||||
if google_data:
|
||||
for k, v in google_data.items():
|
||||
if v:
|
||||
book_dict.setdefault(k, v)
|
||||
|
||||
if not book_dict:
|
||||
return None
|
||||
|
||||
changed = self._apply(book, book_dict, title)
|
||||
return changed
|
||||
|
||||
def _apply(self, book, data, title):
|
||||
changed = {
|
||||
"cover_fixed": False,
|
||||
"summary_fixed": False,
|
||||
"isbn_fixed": False,
|
||||
"pages_fixed": False,
|
||||
"language_fixed": False,
|
||||
"publisher_fixed": False,
|
||||
"publish_year_fixed": False,
|
||||
}
|
||||
|
||||
update_fields = []
|
||||
|
||||
cover_url = data.pop("cover_url", "")
|
||||
|
||||
if data.get("summary") and not book.summary:
|
||||
book.summary = data["summary"]
|
||||
update_fields.append("summary")
|
||||
changed["summary_fixed"] = True
|
||||
|
||||
if data.get("isbn_13") and not book.isbn_13:
|
||||
book.isbn_13 = data["isbn_13"]
|
||||
update_fields.append("isbn_13")
|
||||
changed["isbn_fixed"] = True
|
||||
|
||||
if data.get("isbn_10") and not book.isbn_10:
|
||||
book.isbn_10 = data["isbn_10"]
|
||||
update_fields.append("isbn_10")
|
||||
changed["isbn_fixed"] = True
|
||||
|
||||
if data.get("pages") and book.pages is None:
|
||||
book.pages = data["pages"]
|
||||
update_fields.append("pages")
|
||||
changed["pages_fixed"] = True
|
||||
|
||||
if data.get("language") and not book.language:
|
||||
book.language = data["language"]
|
||||
update_fields.append("language")
|
||||
changed["language_fixed"] = True
|
||||
|
||||
if data.get("publisher") and not book.publisher:
|
||||
book.publisher = data["publisher"]
|
||||
update_fields.append("publisher")
|
||||
changed["publisher_fixed"] = True
|
||||
|
||||
if data.get("first_publish_year") and book.first_publish_year is None:
|
||||
book.first_publish_year = data["first_publish_year"]
|
||||
update_fields.append("first_publish_year")
|
||||
changed["publish_year_fixed"] = True
|
||||
|
||||
if data.get("openlibrary_id") and not book.openlibrary_id:
|
||||
book.openlibrary_id = data["openlibrary_id"]
|
||||
update_fields.append("openlibrary_id")
|
||||
|
||||
if data.get("comicvine_id") and not book.comicvine_id:
|
||||
book.comicvine_id = data["comicvine_id"]
|
||||
update_fields.append("comicvine_id")
|
||||
|
||||
if data.get("issue_number") and book.issue_number is None:
|
||||
book.issue_number = data["issue_number"]
|
||||
update_fields.append("issue_number")
|
||||
|
||||
if data.get("volume_number") and book.volume_number is None:
|
||||
book.volume_number = data["volume_number"]
|
||||
update_fields.append("volume_number")
|
||||
|
||||
if update_fields:
|
||||
book.save(update_fields=update_fields)
|
||||
self.stdout.write(f" [ENRICHED] {book} — {', '.join(update_fields)}")
|
||||
|
||||
if cover_url and not book.cover:
|
||||
book.save_image_from_url(cover_url)
|
||||
if book.cover:
|
||||
changed["cover_fixed"] = True
|
||||
self.stdout.write(f" [COVER] {book} — cover saved from source")
|
||||
|
||||
genres = data.pop("genres", data.pop("generes", []))
|
||||
if genres:
|
||||
existing = set(book.genre.names())
|
||||
new_genres = [g for g in genres if g not in existing]
|
||||
if new_genres:
|
||||
book.genre.add(*new_genres)
|
||||
self.stdout.write(f" [GENRES] {book} — added {len(new_genres)} genres")
|
||||
|
||||
return changed if any(changed.values()) else None
|
||||
@ -0,0 +1,130 @@
|
||||
import logging
|
||||
|
||||
from datetime import timedelta
|
||||
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.db import transaction
|
||||
|
||||
from books.koreader import SESSION_GAP_SECONDS, fix_long_play_stats_for_scrobbles
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
SESSION_GAP = timedelta(seconds=SESSION_GAP_SECONDS)
|
||||
|
||||
|
||||
def _page_data_keys(pages):
|
||||
return sorted(int(k) for k in (pages or {}))
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "Merge orphaned 1-page KOReader scrobbles into the preceding scrobble"
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument(
|
||||
"--commit",
|
||||
action="store_true",
|
||||
help="Commit changes to the database",
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
from scrobbles.models import Scrobble
|
||||
|
||||
commit = options["commit"]
|
||||
|
||||
qs = Scrobble.objects.filter(
|
||||
media_type="Book", source="KOReader"
|
||||
).order_by("book_id", "timestamp")
|
||||
|
||||
if not qs.exists():
|
||||
self.stdout.write("No KOReader book scrobbles found.")
|
||||
return
|
||||
|
||||
merged = 0
|
||||
affected_books = set()
|
||||
|
||||
# Group by book_id manually since we're iterating in order
|
||||
book_scrobbles = {}
|
||||
for s in qs:
|
||||
book_scrobbles.setdefault(s.book_id, []).append(s)
|
||||
|
||||
if not commit:
|
||||
self.stdout.write("Dry run — no changes will be saved. Use --commit to apply.")
|
||||
|
||||
for book_id, scrobbles in book_scrobbles.items():
|
||||
batch_merged = 0
|
||||
i = 0
|
||||
while i < len(scrobbles) - 1:
|
||||
current = scrobbles[i]
|
||||
orphan = scrobbles[i + 1]
|
||||
|
||||
orphan_pages = orphan.logdata.page_data if orphan.logdata else {}
|
||||
orphan_keys = _page_data_keys(orphan_pages)
|
||||
if len(orphan_keys) != 1:
|
||||
i += 1
|
||||
continue
|
||||
|
||||
current_pages = current.logdata.page_data if current.logdata else {}
|
||||
current_keys = _page_data_keys(current_pages)
|
||||
if not current_keys:
|
||||
i += 1
|
||||
continue
|
||||
|
||||
orphan_page_num = orphan_keys[0]
|
||||
current_last_page = current_keys[-1]
|
||||
|
||||
if orphan_page_num != current_last_page + 1:
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Check that the orphan is close enough in time
|
||||
gap = orphan.timestamp - current.stop_timestamp
|
||||
if gap > SESSION_GAP:
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Merge orphan into current
|
||||
current_pages[str(orphan_page_num)] = orphan_pages[str(orphan_page_num)]
|
||||
current.log["page_data"] = current_pages
|
||||
current.log["pages_read"] = len(current_pages)
|
||||
current.stop_timestamp = orphan.stop_timestamp
|
||||
current.playback_position_seconds += orphan.playback_position_seconds
|
||||
|
||||
affected_books.add(book_id)
|
||||
|
||||
if commit:
|
||||
with transaction.atomic():
|
||||
current.save(
|
||||
update_fields=[
|
||||
"log",
|
||||
"stop_timestamp",
|
||||
"playback_position_seconds",
|
||||
]
|
||||
)
|
||||
orphan.delete()
|
||||
|
||||
merged += 1
|
||||
batch_merged += 1
|
||||
scrobbles.pop(i + 1)
|
||||
|
||||
if batch_merged:
|
||||
self.stdout.write(
|
||||
f" Book {book_id}: merged {batch_merged} orphan scrobble(s)"
|
||||
)
|
||||
|
||||
self.stdout.write(f"\nTotal orphans merged: {merged}")
|
||||
|
||||
if commit and affected_books:
|
||||
self.stdout.write("Recalculating long_play_stats for affected books...")
|
||||
for book_id in affected_books:
|
||||
scrobbles_to_fix = (
|
||||
Scrobble.objects.filter(book_id=book_id, source="KOReader")
|
||||
.order_by("timestamp")
|
||||
)
|
||||
fix_long_play_stats_for_scrobbles(list(scrobbles_to_fix))
|
||||
|
||||
self.stdout.write(f"Fixed stats for {len(affected_books)} books.")
|
||||
|
||||
if not commit:
|
||||
self.stdout.write(
|
||||
f"\nWould merge {merged} orphan scrobble(s) across "
|
||||
f"{len(affected_books)} book(s)."
|
||||
)
|
||||
@ -229,36 +229,20 @@ class Book(LongPlayScrobblableMixin):
|
||||
) -> "Book":
|
||||
book, created = cls.objects.get_or_create(title=title)
|
||||
|
||||
if not created:
|
||||
if not created and not overwrite:
|
||||
return book
|
||||
|
||||
book_dict = lookup_comic_from_comicvine(title)
|
||||
if not book_dict:
|
||||
return book
|
||||
|
||||
if created or overwrite:
|
||||
author_list = []
|
||||
author_dicts = book_dict.pop("author_dicts")
|
||||
if author_dicts:
|
||||
for author_dict in author_dicts:
|
||||
if author_dict.get("authorId"):
|
||||
author, a_created = Author.objects.get_or_create(
|
||||
semantic_id=author_dict.get("authorId")
|
||||
)
|
||||
author_list.append(author)
|
||||
if a_created:
|
||||
author.name = author_dict.get("name")
|
||||
author.save()
|
||||
# TODO enrich author?
|
||||
...
|
||||
for k, v in book_dict.items():
|
||||
setattr(book, k, v)
|
||||
book.save()
|
||||
|
||||
for k, v in book_dict.items():
|
||||
setattr(book, k, v)
|
||||
book.save()
|
||||
|
||||
if author_list:
|
||||
book.authors.add(*author_list)
|
||||
genres = book_dict.pop("genres", [])
|
||||
if genres:
|
||||
book.genre.add(*genres)
|
||||
genres = book_dict.get("genres", [])
|
||||
if genres:
|
||||
book.genre.add(*genres)
|
||||
return book
|
||||
|
||||
@classmethod
|
||||
@ -296,20 +280,27 @@ class Book(LongPlayScrobblableMixin):
|
||||
book_dict = lookup_comic_from_comicvine(title)
|
||||
if book_dict:
|
||||
source_tag = MediaSourceTag.COMICVINE
|
||||
book_dict["readcomics_url"] = get_comic_issue_url(url)
|
||||
book_dict["next_readcomics_url"] = next_url_if_exists(
|
||||
book_dict["readcomics_url"]
|
||||
)
|
||||
book_dict["readcomics_url"] = get_comic_issue_url(url)
|
||||
book_dict["next_readcomics_url"] = next_url_if_exists(
|
||||
book_dict["readcomics_url"]
|
||||
)
|
||||
|
||||
if not book_dict:
|
||||
book_dict = lookup_book_from_ol(title, author=author)
|
||||
if book_dict:
|
||||
book_dict = {}
|
||||
ol_data = lookup_book_from_ol(title, author=author)
|
||||
google_data = lookup_book_from_google(title)
|
||||
|
||||
if ol_data:
|
||||
book_dict.update(ol_data)
|
||||
source_tag = MediaSourceTag.OPENLIBRARY
|
||||
|
||||
if not book_dict:
|
||||
book_dict = lookup_book_from_google(title)
|
||||
if book_dict:
|
||||
if google_data:
|
||||
for k, v in google_data.items():
|
||||
if v:
|
||||
book_dict.setdefault(k, v)
|
||||
source_tag = MediaSourceTag.GOOGLE_BOOKS
|
||||
if ol_data and ol_data.get("cover_url"):
|
||||
book_dict["cover_url"] = ol_data["cover_url"]
|
||||
|
||||
if not book_dict:
|
||||
logger.warning(
|
||||
@ -378,7 +369,6 @@ class Book(LongPlayScrobblableMixin):
|
||||
|
||||
if not data and COMICVINE_API_KEY:
|
||||
logger.warn(f"Checking ComicVine for {self.title}")
|
||||
cv_client = ComicVineClient(api_key=COMICVINE_API_KEY)
|
||||
data = lookup_comic_from_comicvine(str(self.title))
|
||||
|
||||
if not data:
|
||||
|
||||
@ -18,7 +18,7 @@ class ComicVineClient(object):
|
||||
"""
|
||||
|
||||
# All API requests made by this client will be made to this URL.
|
||||
API_URL = "https://www.comicvine.com/api/search/"
|
||||
API_URL = "https://comicvine.gamespot.com/api/search/"
|
||||
|
||||
# A valid User-Agent header must be set in order for our API requests to
|
||||
# be accepted, otherwise our request will be rejected with a
|
||||
@ -41,15 +41,12 @@ class ComicVineClient(object):
|
||||
"volume",
|
||||
}
|
||||
|
||||
def __init__(self, api_key, expire_after=300):
|
||||
def __init__(self, api_key):
|
||||
"""
|
||||
Store the API key in a class variable, and install the requests cache,
|
||||
configuring it using the ``expire_after`` parameter.
|
||||
Store the API key in a class variable.
|
||||
|
||||
:param api_key: Your personal ComicVine API key.
|
||||
:type api_key: str
|
||||
:param expire_after: The number of seconds to retain an entry in cache.
|
||||
:type expire_after: int or None
|
||||
"""
|
||||
|
||||
self.api_key = api_key
|
||||
@ -109,14 +106,17 @@ class ComicVineClient(object):
|
||||
:rtype: dict
|
||||
"""
|
||||
|
||||
return {
|
||||
params = {
|
||||
"api_key": self.api_key,
|
||||
"format": "json",
|
||||
"limit": min(10, limit), # hard limit of 10
|
||||
"offset": max(0, offset), # cannot provide negative offset
|
||||
"query": query,
|
||||
"resources": self._validate_resources(resources),
|
||||
}
|
||||
validated = self._validate_resources(resources)
|
||||
if validated:
|
||||
params["resources"] = validated
|
||||
return params
|
||||
|
||||
def _validate_resources(self, resources):
|
||||
"""
|
||||
@ -141,33 +141,35 @@ class ComicVineClient(object):
|
||||
def _query_api(self, params):
|
||||
"""
|
||||
Query the ComicVine API's ``search`` resource, providing the required
|
||||
headers and parameters with the request. Optionally allow the caller
|
||||
of the function to disable the request cache.
|
||||
headers and parameters with the request.
|
||||
|
||||
If an error occurs during the request, handle it accordingly. Upon
|
||||
success, return the JSON from the response.
|
||||
|
||||
:param params: Parameters to include with the request.
|
||||
:type params: dict
|
||||
:param use_cache: Toggle the use of requests_cache.
|
||||
:type use_cache: bool
|
||||
|
||||
:return: The JSON contained in the response.
|
||||
:rtype: dict
|
||||
"""
|
||||
|
||||
# Since we're performing the identical action regardless of whether
|
||||
# or not the request cache is to be used, store the procedure in a
|
||||
# local function to avoid repetition.
|
||||
def __httpget():
|
||||
response = requests.get(self.API_URL, headers=self.HEADERS, params=params)
|
||||
response = requests.get(self.API_URL, headers=self.HEADERS, params=params)
|
||||
|
||||
if not response.ok:
|
||||
self._handle_http_error(response)
|
||||
if not response.ok:
|
||||
self._handle_http_error(response)
|
||||
|
||||
return response.json()
|
||||
json_data = response.json()
|
||||
|
||||
return __httpget()
|
||||
if json_data.get("status_code") != 1:
|
||||
error_msg = json_data.get("error", "Unknown ComicVine API error")
|
||||
logger.error(
|
||||
"ComicVine API returned status_code %s: %s",
|
||||
json_data.get("status_code"),
|
||||
error_msg,
|
||||
)
|
||||
return {}
|
||||
|
||||
return json_data
|
||||
|
||||
def _handle_http_error(self, response):
|
||||
"""
|
||||
@ -200,10 +202,8 @@ def lookup_comic_from_comicvine(title: str) -> dict:
|
||||
original_title = title
|
||||
|
||||
issue_number = None
|
||||
volume_nubmer = None
|
||||
resource_type = "issue"
|
||||
if "Issue " in title:
|
||||
resource_type = "issue"
|
||||
issue_number = title.split("Issue ")[1]
|
||||
volume_number = None
|
||||
if "Volume " in title:
|
||||
@ -215,48 +215,49 @@ def lookup_comic_from_comicvine(title: str) -> dict:
|
||||
logger.warning("No ComicVine API key configured, not looking anything up")
|
||||
return {}
|
||||
|
||||
client = ComicVineClient(api_key=getattr(settings, "COMICVINE_API_KEY", None))
|
||||
client = ComicVineClient(api_key=api_key)
|
||||
|
||||
raw_results = client.search(title).get("results")
|
||||
results = [r for r in raw_results if r.get("resource_type") == resource_type]
|
||||
raw_results = client.search(title)
|
||||
if not raw_results:
|
||||
return {}
|
||||
results = raw_results.get("results", [])
|
||||
results = [r for r in results if r.get("resource_type") == resource_type]
|
||||
if not results:
|
||||
logger.warning("No comic found on ComicVine")
|
||||
return {}
|
||||
|
||||
found_result = None
|
||||
for result in results:
|
||||
if result.get("issue_number") == str(issue_number):
|
||||
if issue_number is not None and result.get("issue_number") == str(issue_number):
|
||||
found_result = result
|
||||
break
|
||||
if result.get("volume_number") == str(volume_number):
|
||||
if volume_number is not None and result.get("volume_number") == str(volume_number):
|
||||
found_result = result
|
||||
break
|
||||
|
||||
if not found_result:
|
||||
found_result = results[0]
|
||||
|
||||
logger.info("ComicVine results", extra={"results": results})
|
||||
|
||||
if not found_result:
|
||||
logger.warning("No matches found on ComicVine")
|
||||
return {}
|
||||
|
||||
title = found_result.get("name")
|
||||
|
||||
if found_result.get("volume"):
|
||||
title = found_result.get("volume").get("name")
|
||||
|
||||
cover_url = None
|
||||
if found_result.get("image"):
|
||||
cover_url = found_result["image"].get("original_url")
|
||||
|
||||
data_dict = {
|
||||
"title": title,
|
||||
"original_title": original_title,
|
||||
"issue_number": found_result.get("issue_number"),
|
||||
"volume_number": found_result.get("volume_number"),
|
||||
"cover_url": found_result.get("image").get("original_url"),
|
||||
"cover_url": cover_url,
|
||||
"comicvine_id": found_result.get("id"),
|
||||
"comicvine_data": found_result,
|
||||
"summary": found_result.get("description"),
|
||||
"publish_date": found_result.get("cover_date"),
|
||||
"first_publish_year": found_result.get("cover_date", "")[:4],
|
||||
"first_publish_year": (found_result.get("cover_date") or "")[:4],
|
||||
}
|
||||
|
||||
return data_dict
|
||||
|
||||
@ -26,8 +26,6 @@ def lookup_book_from_google(title: str) -> dict:
|
||||
if not google_result:
|
||||
return {}
|
||||
|
||||
publish_date = pendulum.parse(google_result.get("publishedDate"))
|
||||
|
||||
isbn_13 = ""
|
||||
isbn_10 = ""
|
||||
for ident in google_result.get("industryIdentifiers", []):
|
||||
@ -35,25 +33,25 @@ def lookup_book_from_google(title: str) -> dict:
|
||||
isbn_13 = ident.get("identifier")
|
||||
if ident.get("type") == "ISBN_10":
|
||||
isbn_10 = ident.get("identifier")
|
||||
# TODO this may lead to issues with the first get if Google changes our title
|
||||
# book_metadata.title = google_result.get("title")
|
||||
# if google_result.get("subtitle"):
|
||||
# book_metadata["title"] = ": ".join(
|
||||
# [google_result.get("title"), google_result.get("subtitle")]
|
||||
# )
|
||||
# book_dict["subtitle"] = google_result.get("subtitle")
|
||||
book_dict["authors"] = google_result.get("authors")
|
||||
book_dict["publisher"] = google_result.get("publisher")
|
||||
book_dict["first_publish_year"] = publish_date.year
|
||||
book_dict["pages"] = google_result.get("pageCount")
|
||||
book_dict["isbn_13"] = isbn_13
|
||||
book_dict["isbn_10"] = isbn_10
|
||||
book_dict["publish_date"] = google_result.get("publishedDate")
|
||||
if len(book_dict["publish_date"]) == 4:
|
||||
book_dict["publish_date"] = f"{book_dict['publish_date']}-1-1"
|
||||
book_dict["language"] = google_result.get("language")
|
||||
book_dict["summary"] = google_result.get("description")
|
||||
book_dict["genres"] = google_result.get("categories")
|
||||
|
||||
raw_date = google_result.get("publishedDate")
|
||||
if raw_date:
|
||||
try:
|
||||
publish_date = pendulum.parse(raw_date)
|
||||
book_dict["first_publish_year"] = publish_date.year
|
||||
except Exception:
|
||||
pass
|
||||
book_dict["publish_date"] = raw_date
|
||||
if len(raw_date) == 4:
|
||||
book_dict["publish_date"] = f"{raw_date}-1-1"
|
||||
book_dict["cover_url"] = (
|
||||
google_result.get("imageLinks", {})
|
||||
.get("thumbnail", "")
|
||||
|
||||
@ -46,18 +46,16 @@ def test_build_scrobbles_from_pages(get_mock, koreader_rows, demo_user, valid_re
|
||||
scrobbles = build_scrobbles_from_book_map(book_map, demo_user)
|
||||
# The test data generator adds the session-gap 3600s AFTER the trigger page
|
||||
# (not before), so the first session includes 21 pages (1-21), and each
|
||||
# subsequent session has 20 until the last. The last trigger page (120) was
|
||||
# previously orphaned by the loop structure; the post-loop fix now creates a
|
||||
# scrobble for it.
|
||||
expected_scrobbles = 7 * len(book_map.keys())
|
||||
# subsequent session has 20 until the last. The last page is now included
|
||||
# in the final scrobble instead of being orphaned.
|
||||
expected_scrobbles = 6 * len(book_map.keys())
|
||||
assert len(scrobbles) == expected_scrobbles
|
||||
assert len(scrobbles[0].logdata.page_data.keys()) == 21
|
||||
assert len(scrobbles[1].logdata.page_data.keys()) == 20
|
||||
assert len(scrobbles[2].logdata.page_data.keys()) == 20
|
||||
assert len(scrobbles[3].logdata.page_data.keys()) == 20
|
||||
assert len(scrobbles[4].logdata.page_data.keys()) == 20
|
||||
assert len(scrobbles[5].logdata.page_data.keys()) == 18
|
||||
assert len(scrobbles[6].logdata.page_data.keys()) == 1
|
||||
assert len(scrobbles[5].logdata.page_data.keys()) == 19
|
||||
|
||||
|
||||
def test_get_author_str_from_row():
|
||||
|
||||
@ -225,7 +225,9 @@ class KoReaderImport(BaseFileImportMixin):
|
||||
|
||||
self.mark_started()
|
||||
try:
|
||||
scrobbles = process_koreader_sqlite_file(self.upload_file_path, self.user.id)
|
||||
scrobbles = process_koreader_sqlite_file(
|
||||
self.upload_file_path, self.user.id
|
||||
)
|
||||
self.record_log(scrobbles)
|
||||
except Exception as e:
|
||||
self.record_error(f"Import failed: {e}")
|
||||
@ -272,7 +274,9 @@ class AudioScrobblerTSVImport(BaseFileImportMixin):
|
||||
|
||||
self.mark_started()
|
||||
try:
|
||||
scrobbles = import_audioscrobbler_tsv_file(self.upload_file_path, self.user.id)
|
||||
scrobbles = import_audioscrobbler_tsv_file(
|
||||
self.upload_file_path, self.user.id
|
||||
)
|
||||
self.record_log(scrobbles)
|
||||
except Exception as e:
|
||||
self.record_error(f"Import failed: {e}")
|
||||
@ -936,6 +940,8 @@ class Scrobble(TimeStampedModel):
|
||||
logdata_cls = logdata.BaseLogData
|
||||
|
||||
log_dict = self.log
|
||||
if isinstance(log_dict, logdata.BaseLogData):
|
||||
log_dict = log_dict.asdict
|
||||
if isinstance(self.log, str):
|
||||
# There's nothing stopping django from saving a string in a JSONField :(
|
||||
logger.warning(
|
||||
@ -1440,7 +1446,7 @@ class Scrobble(TimeStampedModel):
|
||||
start_ts=int(timezone.now().timestamp()),
|
||||
)
|
||||
}
|
||||
)
|
||||
).asdict
|
||||
|
||||
logger.info(
|
||||
f"[scrobbling] creating new scrobble",
|
||||
@ -1455,7 +1461,7 @@ class Scrobble(TimeStampedModel):
|
||||
"calories", None
|
||||
):
|
||||
if media.calories:
|
||||
scrobble_data["log"] = FoodLogData(calories=media.calories)
|
||||
scrobble_data["log"] = FoodLogData(calories=media.calories).asdict
|
||||
|
||||
scrobble = cls.create(scrobble_data)
|
||||
return scrobble
|
||||
@ -1778,7 +1784,7 @@ class Scrobble(TimeStampedModel):
|
||||
return False
|
||||
|
||||
def calculate_reading_stats(self, commit=True):
|
||||
page_data = self.log.get("page_data")
|
||||
page_data = self.logdata.page_data
|
||||
|
||||
if page_data:
|
||||
if isinstance(page_data, dict):
|
||||
@ -1838,9 +1844,7 @@ class FavoriteMedia(TimeStampedModel):
|
||||
birding_location = models.ForeignKey(
|
||||
BirdingLocation, on_delete=models.CASCADE, **BNULL
|
||||
)
|
||||
media_type = models.CharField(
|
||||
max_length=20, choices=Scrobble.MediaType.choices
|
||||
)
|
||||
media_type = models.CharField(max_length=20, choices=Scrobble.MediaType.choices)
|
||||
sent_to_mopidy = models.BooleanField(default=False)
|
||||
|
||||
class Meta:
|
||||
|
||||
@ -357,10 +357,7 @@ def manual_scrobble_book(
|
||||
|
||||
if action == "stop":
|
||||
if url:
|
||||
if isinstance(scrobble.log, "BookLogData"):
|
||||
scrobble.log.resume_url = next_url_if_exists(url)
|
||||
else:
|
||||
scrobble.log["resume_url"] = next_url_if_exists(url)
|
||||
scrobble.log["resume_url"] = next_url_if_exists(url)
|
||||
scrobble.save(update_fields=["log"])
|
||||
scrobble.stop(force_finish=True)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user