[books] Add utility urls to model and scrobbles

[project] Finish book resume link task
[books] Set restart and resume urls on comic book scrobbles
2025-10-22 14:18:01 -04:00 · 2025-10-22 12:18:40 -04:00 · 2025-10-22 12:18:08 -04:00 · 2025-10-22 01:00:25 -04:00 · 2025-10-20 22:47:32 -04:00
10 changed files with 189 additions and 23 deletions
--- a/PROJECT.org
+++ b/PROJECT.org
@ -92,7 +92,7 @@ fetching and simple saving.
 :LOGBOOK:
 CLOCK: [2025-07-09 Wed 09:55]--[2025-07-09 Wed 10:15] =>  0:20
 :END:
-* Backlog [3/27]
+* Backlog [2/26]
 ** TODO [#C] Create small utility to clean up tracks scrobbled with wonky playback times :vrobbler:personal:bug:music:scrobbles:
 ** TODO [#C] Move to using more robust mopidy-webhooks pacakge form pypi :utility:improvement:
 :PROPERTIES:
@ -479,6 +479,14 @@ https://life.lab.unbl.ink/scrobble/e39779c8-62a5-46a6-bdef-fb7662810dc6/start/
 - Note taken on [2025-09-30 Tue 09:33]

  This may have already been resolved ... need to just confirm it.
+** DONE [#B] Save path to reading source on book scrobbles and show it on the detail page :vrobbler:feature:books:personal:project:
+:PROPERTIES:
+:ID:       f1ef3945-e6e4-66c1-b72e-3cede7a0f84a
+:END:
+** DONE [#B] Move comic resume URL to next page and check if it exists :vrobbler:feature:books:personal:project:
+:PROPERTIES:
+:ID:       9fe09567-11a3-7083-53c7-07458a9591d0
+:END:
 * Version 31.0 [3/3]
 ** DONE [#A] Stop comic book webpage scrobbles from overwriting old scrobbles :vrobbler:personal:bug:books:scrobbling:
 :PROPERTIES:
--- a/vrobbler/apps/books/admin.py
+++ b/vrobbler/apps/books/admin.py
@ -21,7 +21,8 @@ class BookAdmin(admin.ModelAdmin):
    date_hierarchy = "created"
    list_display = (
        "title",
-        "subtitle",
+        "author",
+        "issue_or_volume",
        "isbn_13",
        "first_publish_year",
        "pages",
@ -32,6 +33,9 @@ class BookAdmin(admin.ModelAdmin):
        ScrobbleInline,
    ]

+    def issue_or_volume(self, obj):
+        return obj.issue_number or obj.volume_number
+

@admin.register(Paper)
 class BookAdmin(admin.ModelAdmin):
--- a/vrobbler/apps/books/migrations/0030_book_readcomics_url.py
+++ b/vrobbler/apps/books/migrations/0030_book_readcomics_url.py
@ -0,0 +1,18 @@
+# Generated by Django 4.2.19 on 2025-10-22 16:29
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('books', '0029_book_comicvine_id_book_issue_number_and_more'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='book',
+            name='readcomics_url',
+            field=models.CharField(blank=True, max_length=255, null=True),
+        ),
+    ]
--- a/vrobbler/apps/books/migrations/0031_book_next_readcomics_url.py
+++ b/vrobbler/apps/books/migrations/0031_book_next_readcomics_url.py
@ -0,0 +1,18 @@
+# Generated by Django 4.2.19 on 2025-10-22 17:42
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('books', '0030_book_readcomics_url'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='book',
+            name='next_readcomics_url',
+            field=models.CharField(blank=True, max_length=255, null=True),
+        ),
+    ]
--- a/vrobbler/apps/books/models.py
+++ b/vrobbler/apps/books/models.py
@ -1,15 +1,19 @@
+import logging
 from collections import OrderedDict
 from dataclasses import dataclass
-import logging
 from datetime import datetime
 from typing import Optional
 from uuid import uuid4

 import requests
+from books.constants import READCOMICSONLINE_URL
 from books.openlibrary import (
    lookup_author_from_openlibrary,
    lookup_book_from_openlibrary,
 )
+from books.sources.google import lookup_book_from_google
+from books.sources.semantic import lookup_paper_from_semantic
+from books.utils import get_comic_issue_url
 from django.conf import settings
 from django.contrib.auth import get_user_model
 from django.core.files.base import ContentFile
@ -18,27 +22,25 @@ from django.urls import reverse
 from django_extensions.db.models import TimeStampedModel
 from imagekit.models import ImageSpecField
 from imagekit.processors import ResizeToFit
+from scrobbles.dataclasses import BaseLogData, LongPlayLogData
 from scrobbles.mixins import (
    LongPlayScrobblableMixin,
    ObjectWithGenres,
    ScrobblableConstants,
 )
-from scrobbles.utils import get_scrobbles_for_media
+from scrobbles.utils import get_scrobbles_for_media, next_url_if_exists
 from taggit.managers import TaggableManager
 from thefuzz import fuzz
-from vrobbler.apps.books.sources.comicvine import (
-    ComicVineClient,
-    lookup_comic_from_comicvine,
-)

 from vrobbler.apps.books.locg import (
    lookup_comic_by_locg_slug,
    lookup_comic_from_locg,
    lookup_comic_writer_by_locg_slug,
 )
-from books.sources.google import lookup_book_from_google
-from books.sources.semantic import lookup_paper_from_semantic
-from scrobbles.dataclasses import BaseLogData, LongPlayLogData
+from vrobbler.apps.books.sources.comicvine import (
+    ComicVineClient,
+    lookup_comic_from_comicvine,
+)

 COMICVINE_API_KEY = getattr(settings, "COMICVINE_API_KEY", "")

@ -62,6 +64,7 @@ class BookLogData(BaseLogData, LongPlayLogData):
    pages_read: Optional[int] = None
    page_start: Optional[int] = None
    page_end: Optional[int] = None
+    resume_url: Optional[str] = None

    _excluded_fields = {"koreader_hash", "page_data"}

@ -148,6 +151,8 @@ class Book(LongPlayScrobblableMixin):
    first_sentence = models.TextField(**BNULL)
    # ComicVine
    comicvine_id = models.CharField(max_length=255, **BNULL)
+    readcomics_url = models.CharField(max_length=255, **BNULL)
+    next_readcomics_url = models.CharField(max_length=255, **BNULL)
    issue_number = models.IntegerField(max_length=5, **BNULL)
    volume_number = models.IntegerField(max_length=5, **BNULL)
    # OpenLibrary
@ -169,7 +174,11 @@ class Book(LongPlayScrobblableMixin):

    genre = TaggableManager(through=ObjectWithGenres)

-    def __str__(self):
+    def __str__(self) -> str:
+        if self.issue_number and "Issue" not in str(self.title):
+            return f"{self.title} - Issue {self.issue_number}"
+        if self.volume_number and "Volume" not in str(self.title):
+            return f"{self.title} - Volume {self.volume_number}"
        return f"{self.title}"

    @property
@ -197,9 +206,8 @@ class Book(LongPlayScrobblableMixin):
    @classmethod
    def get_from_comicvine(cls, title: str, overwrite: bool = False, force_new: bool =False) -> "Book":
        book, created = cls.objects.get_or_create(title=title)
-        if not created and not overwrite and not force_new:
-            book, created = cls.objects.get_or_create(original_title=title)
-            logger.info("Found comic by original title, use force_new=True to override")
+
+        if not created:
            return book

        book_dict = lookup_comic_from_comicvine(title)
@ -233,7 +241,7 @@ class Book(LongPlayScrobblableMixin):

    @classmethod
    def find_or_create(
-        cls, title: str, enrich: bool = False, commit: bool = True
+            cls, title: str, url: str = "", enrich: bool = False, commit: bool = True
    ):
        """Given a title, get a Book instance.

@ -244,7 +252,7 @@ class Book(LongPlayScrobblableMixin):
        like to batch create, use commit=False and you'll get an unsaved but enriched
        instance back which you can then save at your convenience."""
        # TODO use either a Google Books id identifier or author name like for tracks
-        book, created = cls.objects.get_or_create(title=title)
+        book, created = cls.objects.get_or_create(original_title=title)
        if not created:
            logger.info(
                "Found exact match for book by title", extra={"title": title}
@ -257,9 +265,17 @@ class Book(LongPlayScrobblableMixin):
            )
            return book

-        book_dict = lookup_book_from_google(title)
-        if not book_dict or book_dict.get("isbn_10"):
+        book_dict = None
+        if READCOMICSONLINE_URL in url:
            book_dict = lookup_comic_from_comicvine(title)
+            book_dict["readcomics_url"] = get_comic_issue_url(url)
+            book_dict["next_readcomics_url"] = next_url_if_exists(book_dict["readcomics_url"])
+
+        if not book_dict:
+            book_dict = lookup_book_from_google(title)
+
+        if not book_dict:
+            logger.warning("No book found in any source, using data as is", extra={"title": title})

        author_list = []
        authors = book_dict.pop("authors", [])
--- a/vrobbler/apps/books/sources/comicvine.py
+++ b/vrobbler/apps/books/sources/comicvine.py
@ -227,14 +227,12 @@ def lookup_comic_from_comicvine(title: str) -> dict:
        for r in raw_results
        if r.get("resource_type") == resource_type
    ]
-    print(results)
    if not results:
        logger.warning("No comic found on ComicVine")
        return {}

    found_result = None
    for result in results:
-        print("checking ", result.get("issue_number"), " to ", str(issue_number))
        if result.get("issue_number") == str(issue_number):
            found_result = result
            break
@ -264,6 +262,9 @@ def lookup_comic_from_comicvine(title: str) -> dict:
        "cover_url": found_result.get("image").get("original_url"),
        "comicvine_id": found_result.get("id"),
        "comicvine_data": found_result,
+        "summary": found_result.get("description"),
+        "publish_date": found_result.get("cover_date"),
+        "first_publish_year": found_result.get("cover_date", "")[:4]
    }

    return data_dict
--- a/vrobbler/apps/books/utils.py
+++ b/vrobbler/apps/books/utils.py
@ -1,5 +1,9 @@
+import re
+from urllib.parse import urlparse, urlunparse
+
 from titlecase import titlecase

+
 def parse_readcomicsonline_uri(uri: str) -> tuple:
    try:
        path = uri.split("comic/")[1]
@ -19,3 +23,37 @@ def parse_readcomicsonline_uri(uri: str) -> tuple:
        page = parts[2]

    return title, volume, page
+
+
+def get_comic_issue_url(url: str) -> str:
+    parsed = urlparse(url)
+    parts = [p for p in parsed.path.strip('/').split('/') if p]
+
+    # Find the index of "comic"
+    try:
+        comic_index = parts.index("comic")
+    except ValueError:
+        raise ValueError("URL does not contain '/comic/' segment")
+
+    # Extract title (next part after 'comic')
+    if len(parts) <= comic_index + 1:
+        raise ValueError("No comic title found after '/comic/'")
+    title = parts[comic_index + 1]
+
+    # Look for the first numeric segment after the title
+    number = None
+    for segment in parts[comic_index + 2:]:
+        if segment.isdigit():
+            number = segment
+            break
+
+    # Build normalized path
+    new_parts = ["comic", title]
+    if number:
+        new_parts.append(number)
+
+    normalized_path = "/" + "/".join(new_parts)
+
+    # Rebuild full URL (same scheme and host)
+    simplified_url = urlunparse(parsed._replace(path=normalized_path, query='', fragment=''))
+    return simplified_url
--- a/vrobbler/apps/scrobbles/scrobblers.py
+++ b/vrobbler/apps/scrobbles/scrobblers.py
@ -29,7 +29,12 @@ from scrobbles.constants import (
 )
 from scrobbles.models import Scrobble
 from scrobbles.notifications import ScrobbleNtfyNotification
-from scrobbles.utils import convert_to_seconds, extract_domain
+from scrobbles.utils import (
+    convert_to_seconds,
+    extract_domain,
+    remove_last_part,
+    next_url_if_exists,
+)
 from sports.models import SportEvent
 from sports.thesportsdb import lookup_event_from_thesportsdb
 from tasks.models import Task
@ -260,8 +265,10 @@ def manual_scrobble_book(
    log = {}
    source = "Vrobbler"
    page = None
+    url = ""

    if READCOMICSONLINE_URL in title:
+        url = title
        title, volume, page = parse_readcomicsonline_uri(title)
        if not title:
            logger.info(
@ -285,7 +292,7 @@ def manual_scrobble_book(

    # TODO: Check for scrobble of this book already and if so, update the page count

-    book = Book.find_or_create(title, enrich=True)
+    book = Book.find_or_create(title, url=url, enrich=True)

    scrobble_dict = {
        "user_id": user_id,
@ -309,6 +316,9 @@ def manual_scrobble_book(
    scrobble = Scrobble.create_or_update(book, user_id, scrobble_dict, read_log_page=page)

    if action == "stop":
+        if url:
+            scrobble.log["resume_url"] = next_url_if_exists(url)
+            scrobble.save(update_fields=["log"])
        scrobble.stop(force_finish=True)

    return scrobble
--- a/vrobbler/apps/scrobbles/utils.py
+++ b/vrobbler/apps/scrobbles/utils.py
@ -1,5 +1,6 @@
 import hashlib
 import logging
+import requests
 import re
 from datetime import date, datetime, timedelta
 from typing import TYPE_CHECKING, Optional
@ -408,3 +409,41 @@ def get_daily_calorie_dict_for_user(user_id: int) -> dict[date, int]:
    )

    return {entry["day"]: entry["total_calories"] for entry in qs}
+
+def remove_last_part(url: str) -> str:
+    url = url.rstrip('/')
+    if '/' not in url:
+        return url
+    return url.rsplit('/', 1)[0]
+
+def next_url_if_exists(url: str) -> str:
+    # Normalize (remove trailing slash)
+    url = url.rstrip('/')
+
+    # Find last number in the URL path
+    match = re.search(r'(\d+)(?:/?$)', url)
+    if not match:
+        logger.info("No numeric segment found in the URL", extra={"url": url})
+        return ""
+
+    number = int(match.group(1))
+    new_number = number + 1
+
+    # Replace only the last occurrence of that number
+    new_url = re.sub(rf'{number}(?:/?$)', f'{new_number}/', url + '/', 1)
+
+    # Check if the new URL exists
+    try:
+        resp = requests.head(new_url, allow_redirects=True, timeout=5)
+        if resp.status_code == 200:
+            return new_url
+        else:
+            # Fallback: some sites may not support HEAD well — try GET
+            resp = requests.get(new_url, timeout=5)
+            if resp.status_code == 200:
+                return new_url
+    except requests.RequestException:
+        pass
+
+    # If it doesn’t exist
+    return ""
--- a/vrobbler/templates/books/book_detail.html
+++ b/vrobbler/templates/books/book_detail.html
@ -26,7 +26,21 @@
    </div>
 </div>
 <div class="row">
+    <p><a href="{{s.logdata.restart_url}}">Read again</a></p>
+    {% if object.readcomics_url %}
+    <p><a href="{{object.readcomics_url}}">Read next issue</a></p>
+    {% endif %}
+    {% if object.next_readcomics_url %}
+    <p><a href="{{object.next_readcomics_url}}">Read next issue</a></p>
+    {% endif %}
+
    <p>{{scrobbles.count}} scrobbles</p>
+
+    {% for s in scrobbles %}
+    {% if forloop.first %}
+    <p><a href="{{s.logdata.resume_url}}">Resume reading</a></p>
+    {% endif %}
+    {% endfor %}
 </div>
 <div class="row">
    <div class="col-md">
Author	SHA1	Message	Date
Colin Powell	050add8543	[books] Add utility urls to model and scrobbles	2025-10-22 14:18:01 -04:00
Colin Powell	8faf0296a6	[project] Finish book resume link task	2025-10-22 12:18:40 -04:00
Colin Powell	f209f3b107	[books] Set restart and resume urls on comic book scrobbles	2025-10-22 12:18:08 -04:00
Colin Powell	b233b60ae0	[books] Add bookmark_url to logdata	2025-10-22 01:00:25 -04:00
Colin Powell	e1d4a7c5a4	[books] Fix looking up comic by original title	2025-10-20 22:47:32 -04:00