[books] Add utility urls to model and scrobbles

[project] Finish book resume link task
[books] Set restart and resume urls on comic book scrobbles
2025-10-22 14:18:01 -04:00 · 2025-10-22 12:18:40 -04:00 · 2025-10-22 12:18:08 -04:00 · 2025-10-22 01:00:25 -04:00 · 2025-10-20 22:47:32 -04:00 · 2025-10-20 17:17:18 -04:00
13 changed files with 269 additions and 36 deletions
--- a/PROJECT.org
+++ b/PROJECT.org
@ -479,6 +479,27 @@ https://life.lab.unbl.ink/scrobble/e39779c8-62a5-46a6-bdef-fb7662810dc6/start/
 - Note taken on [2025-09-30 Tue 09:33]

  This may have already been resolved ... need to just confirm it.
+** DONE [#B] Save path to reading source on book scrobbles and show it on the detail page :vrobbler:feature:books:personal:project:
+:PROPERTIES:
+:ID:       f1ef3945-e6e4-66c1-b72e-3cede7a0f84a
+:END:
+** DONE [#B] Move comic resume URL to next page and check if it exists :vrobbler:feature:books:personal:project:
+:PROPERTIES:
+:ID:       9fe09567-11a3-7083-53c7-07458a9591d0
+:END:
+* Version 31.0 [3/3]
+** DONE [#A] Stop comic book webpage scrobbles from overwriting old scrobbles :vrobbler:personal:bug:books:scrobbling:
+:PROPERTIES:
+:ID:       4b2ec068-a281-a88b-c31d-6248d6eb0aa0
+:END:
+** DONE [#A] Add page calculation to manually scrobbled books :vrobbler:personal:feature:books:scrobbling:
+:PROPERTIES:
+:ID:       b2e313b3-5c35-57e7-8933-627535baf34b
+:END:
+** DONE [#A] Fix bug in scrobbling comics where google fails :vrobbler:personal:bug:books:scrobbling:
+:PROPERTIES:
+:ID:       9a870c05-6d20-0803-d35d-c03fbe1d0ee1
+:END:
 * Version 30.0 [3/3]
 ** DONE [#A] Fix readcomicsonline browsing to update pages :vrobbler:books:feature:comicbook:personal:project:scrobbling:
 :PROPERTIES:
--- a/vrobbler/apps/books/admin.py
+++ b/vrobbler/apps/books/admin.py
@ -21,7 +21,8 @@ class BookAdmin(admin.ModelAdmin):
    date_hierarchy = "created"
    list_display = (
        "title",
-        "subtitle",
+        "author",
+        "issue_or_volume",
        "isbn_13",
        "first_publish_year",
        "pages",
@ -32,6 +33,9 @@ class BookAdmin(admin.ModelAdmin):
        ScrobbleInline,
    ]

+    def issue_or_volume(self, obj):
+        return obj.issue_number or obj.volume_number
+

@admin.register(Paper)
 class BookAdmin(admin.ModelAdmin):
--- a/vrobbler/apps/books/migrations/0030_book_readcomics_url.py
+++ b/vrobbler/apps/books/migrations/0030_book_readcomics_url.py
@ -0,0 +1,18 @@
+# Generated by Django 4.2.19 on 2025-10-22 16:29
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('books', '0029_book_comicvine_id_book_issue_number_and_more'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='book',
+            name='readcomics_url',
+            field=models.CharField(blank=True, max_length=255, null=True),
+        ),
+    ]
--- a/vrobbler/apps/books/migrations/0031_book_next_readcomics_url.py
+++ b/vrobbler/apps/books/migrations/0031_book_next_readcomics_url.py
@ -0,0 +1,18 @@
+# Generated by Django 4.2.19 on 2025-10-22 17:42
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('books', '0030_book_readcomics_url'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='book',
+            name='next_readcomics_url',
+            field=models.CharField(blank=True, max_length=255, null=True),
+        ),
+    ]
--- a/vrobbler/apps/books/models.py
+++ b/vrobbler/apps/books/models.py
@ -1,15 +1,19 @@
+import logging
 from collections import OrderedDict
 from dataclasses import dataclass
-import logging
 from datetime import datetime
 from typing import Optional
 from uuid import uuid4

 import requests
+from books.constants import READCOMICSONLINE_URL
 from books.openlibrary import (
    lookup_author_from_openlibrary,
    lookup_book_from_openlibrary,
 )
+from books.sources.google import lookup_book_from_google
+from books.sources.semantic import lookup_paper_from_semantic
+from books.utils import get_comic_issue_url
 from django.conf import settings
 from django.contrib.auth import get_user_model
 from django.core.files.base import ContentFile
@ -18,27 +22,25 @@ from django.urls import reverse
 from django_extensions.db.models import TimeStampedModel
 from imagekit.models import ImageSpecField
 from imagekit.processors import ResizeToFit
+from scrobbles.dataclasses import BaseLogData, LongPlayLogData
 from scrobbles.mixins import (
    LongPlayScrobblableMixin,
    ObjectWithGenres,
    ScrobblableConstants,
 )
-from scrobbles.utils import get_scrobbles_for_media
+from scrobbles.utils import get_scrobbles_for_media, next_url_if_exists
 from taggit.managers import TaggableManager
 from thefuzz import fuzz
-from vrobbler.apps.books.sources.comicvine import (
-    ComicVineClient,
-    lookup_comic_from_comicvine,
-)

 from vrobbler.apps.books.locg import (
    lookup_comic_by_locg_slug,
    lookup_comic_from_locg,
    lookup_comic_writer_by_locg_slug,
 )
-from books.sources.google import lookup_book_from_google
-from books.sources.semantic import lookup_paper_from_semantic
-from scrobbles.dataclasses import BaseLogData, LongPlayLogData
+from vrobbler.apps.books.sources.comicvine import (
+    ComicVineClient,
+    lookup_comic_from_comicvine,
+)

 COMICVINE_API_KEY = getattr(settings, "COMICVINE_API_KEY", "")

@ -62,6 +64,7 @@ class BookLogData(BaseLogData, LongPlayLogData):
    pages_read: Optional[int] = None
    page_start: Optional[int] = None
    page_end: Optional[int] = None
+    resume_url: Optional[str] = None

    _excluded_fields = {"koreader_hash", "page_data"}

@ -148,6 +151,8 @@ class Book(LongPlayScrobblableMixin):
    first_sentence = models.TextField(**BNULL)
    # ComicVine
    comicvine_id = models.CharField(max_length=255, **BNULL)
+    readcomics_url = models.CharField(max_length=255, **BNULL)
+    next_readcomics_url = models.CharField(max_length=255, **BNULL)
    issue_number = models.IntegerField(max_length=5, **BNULL)
    volume_number = models.IntegerField(max_length=5, **BNULL)
    # OpenLibrary
@ -169,7 +174,11 @@ class Book(LongPlayScrobblableMixin):

    genre = TaggableManager(through=ObjectWithGenres)

-    def __str__(self):
+    def __str__(self) -> str:
+        if self.issue_number and "Issue" not in str(self.title):
+            return f"{self.title} - Issue {self.issue_number}"
+        if self.volume_number and "Volume" not in str(self.title):
+            return f"{self.title} - Volume {self.volume_number}"
        return f"{self.title}"

    @property
@ -197,9 +206,8 @@ class Book(LongPlayScrobblableMixin):
    @classmethod
    def get_from_comicvine(cls, title: str, overwrite: bool = False, force_new: bool =False) -> "Book":
        book, created = cls.objects.get_or_create(title=title)
-        if not created and not overwrite and not force_new:
-            book, created = cls.objects.get_or_create(original_title=title)
-            logger.info("Found comic by original title, use force_new=True to override")
+
+        if not created:
            return book

        book_dict = lookup_comic_from_comicvine(title)
@ -233,7 +241,7 @@ class Book(LongPlayScrobblableMixin):

    @classmethod
    def find_or_create(
-        cls, title: str, enrich: bool = False, commit: bool = True
+            cls, title: str, url: str = "", enrich: bool = False, commit: bool = True
    ):
        """Given a title, get a Book instance.

@ -244,7 +252,7 @@ class Book(LongPlayScrobblableMixin):
        like to batch create, use commit=False and you'll get an unsaved but enriched
        instance back which you can then save at your convenience."""
        # TODO use either a Google Books id identifier or author name like for tracks
-        book, created = cls.objects.get_or_create(title=title)
+        book, created = cls.objects.get_or_create(original_title=title)
        if not created:
            logger.info(
                "Found exact match for book by title", extra={"title": title}
@ -257,17 +265,22 @@ class Book(LongPlayScrobblableMixin):
            )
            return book

-        book_dict = lookup_book_from_google(title)
-        if not book_dict or book_dict.get("isbn_10"):
+        book_dict = None
+        if READCOMICSONLINE_URL in url:
            book_dict = lookup_comic_from_comicvine(title)
+            book_dict["readcomics_url"] = get_comic_issue_url(url)
+            book_dict["next_readcomics_url"] = next_url_if_exists(book_dict["readcomics_url"])
+
+        if not book_dict:
+            book_dict = lookup_book_from_google(title)
+
+        if not book_dict:
+            logger.warning("No book found in any source, using data as is", extra={"title": title})

        author_list = []
-        authors = book_dict.pop("authors")
-        cover_url = book_dict.pop("cover_url")
-        try:
-            genres = book_dict.pop("generes")
-        except:
-            genres = []
+        authors = book_dict.pop("authors", [])
+        cover_url = book_dict.pop("cover_url", "")
+        genres = book_dict.pop("generes", [])

        if authors:
            for author_str in authors:
@ -293,7 +306,7 @@ class Book(LongPlayScrobblableMixin):
        return book

    def save_image_from_url(self, url: str, force_update: bool = False):
-        if not self.cover or (force_update and url):
+        if url and (not self.cover or force_update):
            r = requests.get(url)
            if r.status_code == 200:
                fname = f"{self.title}_{self.uuid}.jpg"
--- a/vrobbler/apps/books/sources/comicvine.py
+++ b/vrobbler/apps/books/sources/comicvine.py
@ -227,14 +227,12 @@ def lookup_comic_from_comicvine(title: str) -> dict:
        for r in raw_results
        if r.get("resource_type") == resource_type
    ]
-    print(results)
    if not results:
        logger.warning("No comic found on ComicVine")
        return {}

    found_result = None
    for result in results:
-        print("checking ", result.get("issue_number"), " to ", str(issue_number))
        if result.get("issue_number") == str(issue_number):
            found_result = result
            break
@ -264,6 +262,9 @@ def lookup_comic_from_comicvine(title: str) -> dict:
        "cover_url": found_result.get("image").get("original_url"),
        "comicvine_id": found_result.get("id"),
        "comicvine_data": found_result,
+        "summary": found_result.get("description"),
+        "publish_date": found_result.get("cover_date"),
+        "first_publish_year": found_result.get("cover_date", "")[:4]
    }

    return data_dict
--- a/vrobbler/apps/books/sources/google.py
+++ b/vrobbler/apps/books/sources/google.py
@ -29,6 +29,9 @@ def lookup_book_from_google(title: str) -> dict:
    google_result = (
        json.loads(response.content).get("items", [{}])[0].get("volumeInfo")
    )
+    if not google_result:
+        return {}
+
    publish_date = pendulum.parse(google_result.get("publishedDate"))

    isbn_13 = ""
--- a/vrobbler/apps/books/utils.py
+++ b/vrobbler/apps/books/utils.py
@ -1,7 +1,14 @@
+import re
+from urllib.parse import urlparse, urlunparse
+
 from titlecase import titlecase

+
 def parse_readcomicsonline_uri(uri: str) -> tuple:
-    path = uri.split("comic/")[1]
+    try:
+        path = uri.split("comic/")[1]
+    except IndexError:
+        return "", "", ""

    parts = path.split('/')
    title = ""
@ -16,3 +23,37 @@ def parse_readcomicsonline_uri(uri: str) -> tuple:
        page = parts[2]

    return title, volume, page
+
+
+def get_comic_issue_url(url: str) -> str:
+    parsed = urlparse(url)
+    parts = [p for p in parsed.path.strip('/').split('/') if p]
+
+    # Find the index of "comic"
+    try:
+        comic_index = parts.index("comic")
+    except ValueError:
+        raise ValueError("URL does not contain '/comic/' segment")
+
+    # Extract title (next part after 'comic')
+    if len(parts) <= comic_index + 1:
+        raise ValueError("No comic title found after '/comic/'")
+    title = parts[comic_index + 1]
+
+    # Look for the first numeric segment after the title
+    number = None
+    for segment in parts[comic_index + 2:]:
+        if segment.isdigit():
+            number = segment
+            break
+
+    # Build normalized path
+    new_parts = ["comic", title]
+    if number:
+        new_parts.append(number)
+
+    normalized_path = "/" + "/".join(new_parts)
+
+    # Rebuild full URL (same scheme and host)
+    simplified_url = urlunparse(parsed._replace(path=normalized_path, query='', fragment=''))
+    return simplified_url
--- a/vrobbler/apps/scrobbles/models.py
+++ b/vrobbler/apps/scrobbles/models.py
@ -991,7 +991,7 @@ class Scrobble(TimeStampedModel):

    @property
    def can_be_updated(self) -> bool:
-        if self.media_obj.__class__.__name__ in LONG_PLAY_MEDIA.values():
+        if self.media_obj.__class__.__name__ in LONG_PLAY_MEDIA.values() and self.source != "readcomicsonline.ru":
            logger.info(
                "[scrobbling] cannot be updated, long play media",
                extra={
@ -1172,7 +1172,7 @@ class Scrobble(TimeStampedModel):
            # If it's marked as stopped, send it through our update mechanism, which will complete it
            if scrobble and (
                scrobble.can_be_updated
-                or read_log_page
+                or (read_log_page and scrobble.can_be_updated)
                or scrobble_data["playback_status"] == "stopped"
            ):
                if read_log_page:
@ -1181,6 +1181,7 @@ class Scrobble(TimeStampedModel):
                        for page in page_list:
                            if not page.get("end_ts", None):
                                page["end_ts"] = int(timezone.now().timestamp())
+                                page["duration"] = page["end_ts"] - page.get("start_ts")

                    page_list.append(
                        BookPageLogData(
@ -1198,7 +1199,7 @@ class Scrobble(TimeStampedModel):
            scrobble_data.pop("playback_status")

        if read_log_page:
-            scrobble_data["log"] = BookLogData(page_data=BookPageLogData(page_number=read_log_page, start_ts=int(timezone.now().timestamp())))
+            scrobble_data["log"] = BookLogData(page_data=[BookPageLogData(page_number=read_log_page, start_ts=int(timezone.now().timestamp()))])

        logger.info(
            f"[scrobbling] creating new scrobble",
@ -1392,6 +1393,9 @@ class Scrobble(TimeStampedModel):
        if class_name in LONG_PLAY_MEDIA.values():
            self.finish_long_play()

+        if class_name == "Book":
+            self.calculate_reading_stats()
+
        logger.info(
            f"[scrobbling] stopped",
            extra={
@ -1487,3 +1491,40 @@ class Scrobble(TimeStampedModel):
            beyond_completion = False

        return beyond_completion
+
+    def calculate_reading_stats(self, commit=True):
+        # --- Sort safely by numeric page_number ---
+        def safe_page_number(entry):
+            try:
+                return int(getattr("page_number", entry), 0)
+            except (ValueError, TypeError):
+                return float("inf")  # push invalid entries to the end
+
+        page_data = self.log.get("page_data")
+
+        if not page_data:
+            logger.warning("No page data found to calculate")
+            return
+
+        if isinstance(page_data, dict):
+            logger.warning("Page data is dict, migrate koreader data")
+            return
+
+        page_data.sort(key=safe_page_number)
+
+        # --- Extract valid numeric page numbers ---
+        valid_pages = []
+        for page in page_data:
+            try:
+                valid_pages.append(int(page["page_number"]))
+            except (ValueError, TypeError):
+                continue
+
+        # --- Compute stats ---
+        if valid_pages:
+            self.log["page_start"] = min(valid_pages)
+            self.log["page_end"] = max(valid_pages)
+            self.log["pages_read"] = len(set(valid_pages))
+
+            if commit:
+                self.save(update_fields=["log"])
--- a/vrobbler/apps/scrobbles/scrobblers.py
+++ b/vrobbler/apps/scrobbles/scrobblers.py
@ -29,7 +29,12 @@ from scrobbles.constants import (
 )
 from scrobbles.models import Scrobble
 from scrobbles.notifications import ScrobbleNtfyNotification
-from scrobbles.utils import convert_to_seconds, extract_domain
+from scrobbles.utils import (
+    convert_to_seconds,
+    extract_domain,
+    remove_last_part,
+    next_url_if_exists,
+)
 from sports.models import SportEvent
 from sports.thesportsdb import lookup_event_from_thesportsdb
 from tasks.models import Task
@ -260,22 +265,34 @@ def manual_scrobble_book(
    log = {}
    source = "Vrobbler"
    page = None
+    url = ""

    if READCOMICSONLINE_URL in title:
+        url = title
        title, volume, page = parse_readcomicsonline_uri(title)
+        if not title:
+            logger.info(
+                "[scrobblers] manual book scrobble request failed",
+                extra={
+                    "title": title,
+                    "user_id": user_id,
+                    "media_type": Scrobble.MediaType.BOOK,
+                },
+            )
+            return
+
        title = f"{title} - Issue {volume}"

        if not page:
            page = 1

-        log = BookLogData(page_data=BookPageLogData(page_number=page, start_ts=int(timezone.now().timestamp())))
        logger.info("[scrobblers] Book page included in scrobble, should update!")

-        source = READCOMICSONLINE_URL
+        source = READCOMICSONLINE_URL.replace("https://", "")

    # TODO: Check for scrobble of this book already and if so, update the page count

-    book = Book.find_or_create(title, enrich=True)
+    book = Book.find_or_create(title, url=url, enrich=True)

    scrobble_dict = {
        "user_id": user_id,
@ -299,6 +316,9 @@ def manual_scrobble_book(
    scrobble = Scrobble.create_or_update(book, user_id, scrobble_dict, read_log_page=page)

    if action == "stop":
+        if url:
+            scrobble.log["resume_url"] = next_url_if_exists(url)
+            scrobble.save(update_fields=["log"])
        scrobble.stop(force_finish=True)

    return scrobble
--- a/vrobbler/apps/scrobbles/utils.py
+++ b/vrobbler/apps/scrobbles/utils.py
@ -1,5 +1,6 @@
 import hashlib
 import logging
+import requests
 import re
 from datetime import date, datetime, timedelta
 from typing import TYPE_CHECKING, Optional
@ -408,3 +409,41 @@ def get_daily_calorie_dict_for_user(user_id: int) -> dict[date, int]:
    )

    return {entry["day"]: entry["total_calories"] for entry in qs}
+
+def remove_last_part(url: str) -> str:
+    url = url.rstrip('/')
+    if '/' not in url:
+        return url
+    return url.rsplit('/', 1)[0]
+
+def next_url_if_exists(url: str) -> str:
+    # Normalize (remove trailing slash)
+    url = url.rstrip('/')
+
+    # Find last number in the URL path
+    match = re.search(r'(\d+)(?:/?$)', url)
+    if not match:
+        logger.info("No numeric segment found in the URL", extra={"url": url})
+        return ""
+
+    number = int(match.group(1))
+    new_number = number + 1
+
+    # Replace only the last occurrence of that number
+    new_url = re.sub(rf'{number}(?:/?$)', f'{new_number}/', url + '/', 1)
+
+    # Check if the new URL exists
+    try:
+        resp = requests.head(new_url, allow_redirects=True, timeout=5)
+        if resp.status_code == 200:
+            return new_url
+        else:
+            # Fallback: some sites may not support HEAD well — try GET
+            resp = requests.get(new_url, timeout=5)
+            if resp.status_code == 200:
+                return new_url
+    except requests.RequestException:
+        pass
+
+    # If it doesn’t exist
+    return ""
--- a/vrobbler/apps/scrobbles/views.py
+++ b/vrobbler/apps/scrobbles/views.py
@ -625,7 +625,7 @@ def scrobble_start(request, uuid):

    if (
        user.profile.redirect_to_webpage
-        and media_obj.__class__.__name__ == Scrobble.MediaType.WEBPAGE
+        and (media_obj.__class__.__name__ == Scrobble.MediaType.WEBPAGE or media_obj.__class__.__name__ == Scrobble.MediaType.BOOK)
    ):
        logger.info(f"Redirecting to {media_obj} detail page")
        return HttpResponseRedirect(media_obj.url)
--- a/vrobbler/templates/books/book_detail.html
+++ b/vrobbler/templates/books/book_detail.html
@ -26,7 +26,21 @@
    </div>
 </div>
 <div class="row">
+    <p><a href="{{s.logdata.restart_url}}">Read again</a></p>
+    {% if object.readcomics_url %}
+    <p><a href="{{object.readcomics_url}}">Read next issue</a></p>
+    {% endif %}
+    {% if object.next_readcomics_url %}
+    <p><a href="{{object.next_readcomics_url}}">Read next issue</a></p>
+    {% endif %}
+
    <p>{{scrobbles.count}} scrobbles</p>
+
+    {% for s in scrobbles %}
+    {% if forloop.first %}
+    <p><a href="{{s.logdata.resume_url}}">Resume reading</a></p>
+    {% endif %}
+    {% endfor %}
 </div>
 <div class="row">
    <div class="col-md">
Author	SHA1	Message	Date
Colin Powell	050add8543	[books] Add utility urls to model and scrobbles	2025-10-22 14:18:01 -04:00
Colin Powell	8faf0296a6	[project] Finish book resume link task	2025-10-22 12:18:40 -04:00
Colin Powell	f209f3b107	[books] Set restart and resume urls on comic book scrobbles	2025-10-22 12:18:08 -04:00
Colin Powell	b233b60ae0	[books] Add bookmark_url to logdata	2025-10-22 01:00:25 -04:00
Colin Powell	e1d4a7c5a4	[books] Fix looking up comic by original title	2025-10-20 22:47:32 -04:00
Colin Powell	59e8339e94	[releases] Fix comic books scrobbling, mostly	2025-10-20 17:17:18 -04:00
Colin Powell	9277db97e5	[books] Fix comic scrobbles overrwriting one another	2025-10-20 17:15:54 -04:00
Colin Powell	e755dc6641	Fix bug where title not found	2025-10-20 17:02:52 -04:00
Colin Powell	782f5c15d6	[books] Calc stats and dont die when title not found	2025-10-20 17:02:34 -04:00
Colin Powell	2f4fae7d02	[books] Short circut google lookup if it fails	2025-10-20 16:12:01 -04:00
Colin Powell	4b7c5aa58d	[books] Fix bad lookups for creating books	2025-10-20 16:11:20 -04:00