Compare commits

...

11 Commits
30 ... 32

13 changed files with 269 additions and 36 deletions

View File

@ -479,6 +479,27 @@ https://life.lab.unbl.ink/scrobble/e39779c8-62a5-46a6-bdef-fb7662810dc6/start/
- Note taken on [2025-09-30 Tue 09:33]
This may have already been resolved ... need to just confirm it.
** DONE [#B] Save path to reading source on book scrobbles and show it on the detail page :vrobbler:feature:books:personal:project:
:PROPERTIES:
:ID: f1ef3945-e6e4-66c1-b72e-3cede7a0f84a
:END:
** DONE [#B] Move comic resume URL to next page and check if it exists :vrobbler:feature:books:personal:project:
:PROPERTIES:
:ID: 9fe09567-11a3-7083-53c7-07458a9591d0
:END:
* Version 31.0 [3/3]
** DONE [#A] Stop comic book webpage scrobbles from overwriting old scrobbles :vrobbler:personal:bug:books:scrobbling:
:PROPERTIES:
:ID: 4b2ec068-a281-a88b-c31d-6248d6eb0aa0
:END:
** DONE [#A] Add page calculation to manually scrobbled books :vrobbler:personal:feature:books:scrobbling:
:PROPERTIES:
:ID: b2e313b3-5c35-57e7-8933-627535baf34b
:END:
** DONE [#A] Fix bug in scrobbling comics where google fails :vrobbler:personal:bug:books:scrobbling:
:PROPERTIES:
:ID: 9a870c05-6d20-0803-d35d-c03fbe1d0ee1
:END:
* Version 30.0 [3/3]
** DONE [#A] Fix readcomicsonline browsing to update pages :vrobbler:books:feature:comicbook:personal:project:scrobbling:
:PROPERTIES:

View File

@ -21,7 +21,8 @@ class BookAdmin(admin.ModelAdmin):
date_hierarchy = "created"
list_display = (
"title",
"subtitle",
"author",
"issue_or_volume",
"isbn_13",
"first_publish_year",
"pages",
@ -32,6 +33,9 @@ class BookAdmin(admin.ModelAdmin):
ScrobbleInline,
]
def issue_or_volume(self, obj):
return obj.issue_number or obj.volume_number
@admin.register(Paper)
class BookAdmin(admin.ModelAdmin):

View File

@ -0,0 +1,18 @@
# Generated by Django 4.2.19 on 2025-10-22 16:29
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('books', '0029_book_comicvine_id_book_issue_number_and_more'),
]
operations = [
migrations.AddField(
model_name='book',
name='readcomics_url',
field=models.CharField(blank=True, max_length=255, null=True),
),
]

View File

@ -0,0 +1,18 @@
# Generated by Django 4.2.19 on 2025-10-22 17:42
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('books', '0030_book_readcomics_url'),
]
operations = [
migrations.AddField(
model_name='book',
name='next_readcomics_url',
field=models.CharField(blank=True, max_length=255, null=True),
),
]

View File

@ -1,15 +1,19 @@
import logging
from collections import OrderedDict
from dataclasses import dataclass
import logging
from datetime import datetime
from typing import Optional
from uuid import uuid4
import requests
from books.constants import READCOMICSONLINE_URL
from books.openlibrary import (
lookup_author_from_openlibrary,
lookup_book_from_openlibrary,
)
from books.sources.google import lookup_book_from_google
from books.sources.semantic import lookup_paper_from_semantic
from books.utils import get_comic_issue_url
from django.conf import settings
from django.contrib.auth import get_user_model
from django.core.files.base import ContentFile
@ -18,27 +22,25 @@ from django.urls import reverse
from django_extensions.db.models import TimeStampedModel
from imagekit.models import ImageSpecField
from imagekit.processors import ResizeToFit
from scrobbles.dataclasses import BaseLogData, LongPlayLogData
from scrobbles.mixins import (
LongPlayScrobblableMixin,
ObjectWithGenres,
ScrobblableConstants,
)
from scrobbles.utils import get_scrobbles_for_media
from scrobbles.utils import get_scrobbles_for_media, next_url_if_exists
from taggit.managers import TaggableManager
from thefuzz import fuzz
from vrobbler.apps.books.sources.comicvine import (
ComicVineClient,
lookup_comic_from_comicvine,
)
from vrobbler.apps.books.locg import (
lookup_comic_by_locg_slug,
lookup_comic_from_locg,
lookup_comic_writer_by_locg_slug,
)
from books.sources.google import lookup_book_from_google
from books.sources.semantic import lookup_paper_from_semantic
from scrobbles.dataclasses import BaseLogData, LongPlayLogData
from vrobbler.apps.books.sources.comicvine import (
ComicVineClient,
lookup_comic_from_comicvine,
)
COMICVINE_API_KEY = getattr(settings, "COMICVINE_API_KEY", "")
@ -62,6 +64,7 @@ class BookLogData(BaseLogData, LongPlayLogData):
pages_read: Optional[int] = None
page_start: Optional[int] = None
page_end: Optional[int] = None
resume_url: Optional[str] = None
_excluded_fields = {"koreader_hash", "page_data"}
@ -148,6 +151,8 @@ class Book(LongPlayScrobblableMixin):
first_sentence = models.TextField(**BNULL)
# ComicVine
comicvine_id = models.CharField(max_length=255, **BNULL)
readcomics_url = models.CharField(max_length=255, **BNULL)
next_readcomics_url = models.CharField(max_length=255, **BNULL)
issue_number = models.IntegerField(max_length=5, **BNULL)
volume_number = models.IntegerField(max_length=5, **BNULL)
# OpenLibrary
@ -169,7 +174,11 @@ class Book(LongPlayScrobblableMixin):
genre = TaggableManager(through=ObjectWithGenres)
def __str__(self):
def __str__(self) -> str:
if self.issue_number and "Issue" not in str(self.title):
return f"{self.title} - Issue {self.issue_number}"
if self.volume_number and "Volume" not in str(self.title):
return f"{self.title} - Volume {self.volume_number}"
return f"{self.title}"
@property
@ -197,9 +206,8 @@ class Book(LongPlayScrobblableMixin):
@classmethod
def get_from_comicvine(cls, title: str, overwrite: bool = False, force_new: bool =False) -> "Book":
book, created = cls.objects.get_or_create(title=title)
if not created and not overwrite and not force_new:
book, created = cls.objects.get_or_create(original_title=title)
logger.info("Found comic by original title, use force_new=True to override")
if not created:
return book
book_dict = lookup_comic_from_comicvine(title)
@ -233,7 +241,7 @@ class Book(LongPlayScrobblableMixin):
@classmethod
def find_or_create(
cls, title: str, enrich: bool = False, commit: bool = True
cls, title: str, url: str = "", enrich: bool = False, commit: bool = True
):
"""Given a title, get a Book instance.
@ -244,7 +252,7 @@ class Book(LongPlayScrobblableMixin):
like to batch create, use commit=False and you'll get an unsaved but enriched
instance back which you can then save at your convenience."""
# TODO use either a Google Books id identifier or author name like for tracks
book, created = cls.objects.get_or_create(title=title)
book, created = cls.objects.get_or_create(original_title=title)
if not created:
logger.info(
"Found exact match for book by title", extra={"title": title}
@ -257,17 +265,22 @@ class Book(LongPlayScrobblableMixin):
)
return book
book_dict = lookup_book_from_google(title)
if not book_dict or book_dict.get("isbn_10"):
book_dict = None
if READCOMICSONLINE_URL in url:
book_dict = lookup_comic_from_comicvine(title)
book_dict["readcomics_url"] = get_comic_issue_url(url)
book_dict["next_readcomics_url"] = next_url_if_exists(book_dict["readcomics_url"])
if not book_dict:
book_dict = lookup_book_from_google(title)
if not book_dict:
logger.warning("No book found in any source, using data as is", extra={"title": title})
author_list = []
authors = book_dict.pop("authors")
cover_url = book_dict.pop("cover_url")
try:
genres = book_dict.pop("generes")
except:
genres = []
authors = book_dict.pop("authors", [])
cover_url = book_dict.pop("cover_url", "")
genres = book_dict.pop("generes", [])
if authors:
for author_str in authors:
@ -293,7 +306,7 @@ class Book(LongPlayScrobblableMixin):
return book
def save_image_from_url(self, url: str, force_update: bool = False):
if not self.cover or (force_update and url):
if url and (not self.cover or force_update):
r = requests.get(url)
if r.status_code == 200:
fname = f"{self.title}_{self.uuid}.jpg"

View File

@ -227,14 +227,12 @@ def lookup_comic_from_comicvine(title: str) -> dict:
for r in raw_results
if r.get("resource_type") == resource_type
]
print(results)
if not results:
logger.warning("No comic found on ComicVine")
return {}
found_result = None
for result in results:
print("checking ", result.get("issue_number"), " to ", str(issue_number))
if result.get("issue_number") == str(issue_number):
found_result = result
break
@ -264,6 +262,9 @@ def lookup_comic_from_comicvine(title: str) -> dict:
"cover_url": found_result.get("image").get("original_url"),
"comicvine_id": found_result.get("id"),
"comicvine_data": found_result,
"summary": found_result.get("description"),
"publish_date": found_result.get("cover_date"),
"first_publish_year": found_result.get("cover_date", "")[:4]
}
return data_dict

View File

@ -29,6 +29,9 @@ def lookup_book_from_google(title: str) -> dict:
google_result = (
json.loads(response.content).get("items", [{}])[0].get("volumeInfo")
)
if not google_result:
return {}
publish_date = pendulum.parse(google_result.get("publishedDate"))
isbn_13 = ""

View File

@ -1,7 +1,14 @@
import re
from urllib.parse import urlparse, urlunparse
from titlecase import titlecase
def parse_readcomicsonline_uri(uri: str) -> tuple:
path = uri.split("comic/")[1]
try:
path = uri.split("comic/")[1]
except IndexError:
return "", "", ""
parts = path.split('/')
title = ""
@ -16,3 +23,37 @@ def parse_readcomicsonline_uri(uri: str) -> tuple:
page = parts[2]
return title, volume, page
def get_comic_issue_url(url: str) -> str:
parsed = urlparse(url)
parts = [p for p in parsed.path.strip('/').split('/') if p]
# Find the index of "comic"
try:
comic_index = parts.index("comic")
except ValueError:
raise ValueError("URL does not contain '/comic/' segment")
# Extract title (next part after 'comic')
if len(parts) <= comic_index + 1:
raise ValueError("No comic title found after '/comic/'")
title = parts[comic_index + 1]
# Look for the first numeric segment after the title
number = None
for segment in parts[comic_index + 2:]:
if segment.isdigit():
number = segment
break
# Build normalized path
new_parts = ["comic", title]
if number:
new_parts.append(number)
normalized_path = "/" + "/".join(new_parts)
# Rebuild full URL (same scheme and host)
simplified_url = urlunparse(parsed._replace(path=normalized_path, query='', fragment=''))
return simplified_url

View File

@ -991,7 +991,7 @@ class Scrobble(TimeStampedModel):
@property
def can_be_updated(self) -> bool:
if self.media_obj.__class__.__name__ in LONG_PLAY_MEDIA.values():
if self.media_obj.__class__.__name__ in LONG_PLAY_MEDIA.values() and self.source != "readcomicsonline.ru":
logger.info(
"[scrobbling] cannot be updated, long play media",
extra={
@ -1172,7 +1172,7 @@ class Scrobble(TimeStampedModel):
# If it's marked as stopped, send it through our update mechanism, which will complete it
if scrobble and (
scrobble.can_be_updated
or read_log_page
or (read_log_page and scrobble.can_be_updated)
or scrobble_data["playback_status"] == "stopped"
):
if read_log_page:
@ -1181,6 +1181,7 @@ class Scrobble(TimeStampedModel):
for page in page_list:
if not page.get("end_ts", None):
page["end_ts"] = int(timezone.now().timestamp())
page["duration"] = page["end_ts"] - page.get("start_ts")
page_list.append(
BookPageLogData(
@ -1198,7 +1199,7 @@ class Scrobble(TimeStampedModel):
scrobble_data.pop("playback_status")
if read_log_page:
scrobble_data["log"] = BookLogData(page_data=BookPageLogData(page_number=read_log_page, start_ts=int(timezone.now().timestamp())))
scrobble_data["log"] = BookLogData(page_data=[BookPageLogData(page_number=read_log_page, start_ts=int(timezone.now().timestamp()))])
logger.info(
f"[scrobbling] creating new scrobble",
@ -1392,6 +1393,9 @@ class Scrobble(TimeStampedModel):
if class_name in LONG_PLAY_MEDIA.values():
self.finish_long_play()
if class_name == "Book":
self.calculate_reading_stats()
logger.info(
f"[scrobbling] stopped",
extra={
@ -1487,3 +1491,40 @@ class Scrobble(TimeStampedModel):
beyond_completion = False
return beyond_completion
def calculate_reading_stats(self, commit=True):
# --- Sort safely by numeric page_number ---
def safe_page_number(entry):
try:
return int(getattr("page_number", entry), 0)
except (ValueError, TypeError):
return float("inf") # push invalid entries to the end
page_data = self.log.get("page_data")
if not page_data:
logger.warning("No page data found to calculate")
return
if isinstance(page_data, dict):
logger.warning("Page data is dict, migrate koreader data")
return
page_data.sort(key=safe_page_number)
# --- Extract valid numeric page numbers ---
valid_pages = []
for page in page_data:
try:
valid_pages.append(int(page["page_number"]))
except (ValueError, TypeError):
continue
# --- Compute stats ---
if valid_pages:
self.log["page_start"] = min(valid_pages)
self.log["page_end"] = max(valid_pages)
self.log["pages_read"] = len(set(valid_pages))
if commit:
self.save(update_fields=["log"])

View File

@ -29,7 +29,12 @@ from scrobbles.constants import (
)
from scrobbles.models import Scrobble
from scrobbles.notifications import ScrobbleNtfyNotification
from scrobbles.utils import convert_to_seconds, extract_domain
from scrobbles.utils import (
convert_to_seconds,
extract_domain,
remove_last_part,
next_url_if_exists,
)
from sports.models import SportEvent
from sports.thesportsdb import lookup_event_from_thesportsdb
from tasks.models import Task
@ -260,22 +265,34 @@ def manual_scrobble_book(
log = {}
source = "Vrobbler"
page = None
url = ""
if READCOMICSONLINE_URL in title:
url = title
title, volume, page = parse_readcomicsonline_uri(title)
if not title:
logger.info(
"[scrobblers] manual book scrobble request failed",
extra={
"title": title,
"user_id": user_id,
"media_type": Scrobble.MediaType.BOOK,
},
)
return
title = f"{title} - Issue {volume}"
if not page:
page = 1
log = BookLogData(page_data=BookPageLogData(page_number=page, start_ts=int(timezone.now().timestamp())))
logger.info("[scrobblers] Book page included in scrobble, should update!")
source = READCOMICSONLINE_URL
source = READCOMICSONLINE_URL.replace("https://", "")
# TODO: Check for scrobble of this book already and if so, update the page count
book = Book.find_or_create(title, enrich=True)
book = Book.find_or_create(title, url=url, enrich=True)
scrobble_dict = {
"user_id": user_id,
@ -299,6 +316,9 @@ def manual_scrobble_book(
scrobble = Scrobble.create_or_update(book, user_id, scrobble_dict, read_log_page=page)
if action == "stop":
if url:
scrobble.log["resume_url"] = next_url_if_exists(url)
scrobble.save(update_fields=["log"])
scrobble.stop(force_finish=True)
return scrobble

View File

@ -1,5 +1,6 @@
import hashlib
import logging
import requests
import re
from datetime import date, datetime, timedelta
from typing import TYPE_CHECKING, Optional
@ -408,3 +409,41 @@ def get_daily_calorie_dict_for_user(user_id: int) -> dict[date, int]:
)
return {entry["day"]: entry["total_calories"] for entry in qs}
def remove_last_part(url: str) -> str:
url = url.rstrip('/')
if '/' not in url:
return url
return url.rsplit('/', 1)[0]
def next_url_if_exists(url: str) -> str:
# Normalize (remove trailing slash)
url = url.rstrip('/')
# Find last number in the URL path
match = re.search(r'(\d+)(?:/?$)', url)
if not match:
logger.info("No numeric segment found in the URL", extra={"url": url})
return ""
number = int(match.group(1))
new_number = number + 1
# Replace only the last occurrence of that number
new_url = re.sub(rf'{number}(?:/?$)', f'{new_number}/', url + '/', 1)
# Check if the new URL exists
try:
resp = requests.head(new_url, allow_redirects=True, timeout=5)
if resp.status_code == 200:
return new_url
else:
# Fallback: some sites may not support HEAD well — try GET
resp = requests.get(new_url, timeout=5)
if resp.status_code == 200:
return new_url
except requests.RequestException:
pass
# If it doesnt exist
return ""

View File

@ -625,7 +625,7 @@ def scrobble_start(request, uuid):
if (
user.profile.redirect_to_webpage
and media_obj.__class__.__name__ == Scrobble.MediaType.WEBPAGE
and (media_obj.__class__.__name__ == Scrobble.MediaType.WEBPAGE or media_obj.__class__.__name__ == Scrobble.MediaType.BOOK)
):
logger.info(f"Redirecting to {media_obj} detail page")
return HttpResponseRedirect(media_obj.url)

View File

@ -26,7 +26,21 @@
</div>
</div>
<div class="row">
<p><a href="{{s.logdata.restart_url}}">Read again</a></p>
{% if object.readcomics_url %}
<p><a href="{{object.readcomics_url}}">Read next issue</a></p>
{% endif %}
{% if object.next_readcomics_url %}
<p><a href="{{object.next_readcomics_url}}">Read next issue</a></p>
{% endif %}
<p>{{scrobbles.count}} scrobbles</p>
{% for s in scrobbles %}
{% if forloop.first %}
<p><a href="{{s.logdata.resume_url}}">Resume reading</a></p>
{% endif %}
{% endfor %}
</div>
<div class="row">
<div class="col-md">