Compare commits

...

11 Commits
31 ... 33

13 changed files with 220 additions and 30 deletions

View File

@ -92,7 +92,7 @@ fetching and simple saving.
:LOGBOOK:
CLOCK: [2025-07-09 Wed 09:55]--[2025-07-09 Wed 10:15] => 0:20
:END:
* Backlog [3/27]
* Backlog [1/27]
** TODO [#C] Create small utility to clean up tracks scrobbled with wonky playback times :vrobbler:personal:bug:music:scrobbles:
** TODO [#C] Move to using more robust mopidy-webhooks pacakge form pypi :utility:improvement:
:PROPERTIES:
@ -479,6 +479,30 @@ https://life.lab.unbl.ink/scrobble/e39779c8-62a5-46a6-bdef-fb7662810dc6/start/
- Note taken on [2025-09-30 Tue 09:33]
This may have already been resolved ... need to just confirm it.
** TODO [#A] Find page numbers for comic books from ComicVine :vrobbler:feature:books:personal:project:
* Version 33.0 [3/3]
** DONE [#A] Fix bug where scrobble is_stale only uses seconds not total_seconds :vrobbler:bug:scrobbles:personal:project:
:PROPERTIES:
:ID: 7f6070ac-4f67-011d-ebd5-f3dc47da46ed
:END:
** DONE [#B] Fix duplicatged Read next issue for Comic books :vrobbler:bug:books:personal:project:
:PROPERTIES:
:ID: 97943040-1f03-b0b7-b0aa-123a783e4f7b
:END:
** DONE [#A] Add API authentication to BGG calls :vrobbler:bug:boardgames:personal:project:
:PROPERTIES:
:ID: 4955cc34-0882-50db-92f7-f36a95bf57a4
:END:
<2025-10-28 Tue>
* Version 32.0 [2/2]
** DONE [#B] Save path to reading source on book scrobbles and show it on the detail page :vrobbler:feature:books:personal:project:
:PROPERTIES:
:ID: f1ef3945-e6e4-66c1-b72e-3cede7a0f84a
:END:
** DONE [#B] Move comic resume URL to next page and check if it exists :vrobbler:feature:books:personal:project:
:PROPERTIES:
:ID: 9fe09567-11a3-7083-53c7-07458a9591d0
:END:
* Version 31.0 [3/3]
** DONE [#A] Stop comic book webpage scrobbles from overwriting old scrobbles :vrobbler:personal:bug:books:scrobbling:
:PROPERTIES:

View File

@ -6,6 +6,7 @@ from typing import TYPE_CHECKING, Optional
import requests
from bs4 import BeautifulSoup
from django.contrib.auth import get_user_model
from django.conf import settings
User = get_user_model()
if TYPE_CHECKING:
@ -17,6 +18,8 @@ SEARCH_ID_URL = (
"https://boardgamegeek.com/xmlapi/search?search={query}&exact=1"
)
GAME_ID_URL = "https://boardgamegeek.com/xmlapi/boardgame/{id}"
BGG_ACCESS_TOKEN = getattr(settings, "BGG_ACCESS_TOKEN", "")
BASE_HEADERS = {"User-Agent": "Vrobbler 31.0", "Authorization": f"Bearer {BGG_ACCESS_TOKEN}"}
def take_first(thing: Optional[list]) -> str:
@ -37,10 +40,9 @@ def take_first(thing: Optional[list]) -> str:
def lookup_boardgame_id_from_bgg(title: str) -> Optional[int]:
soup = None
headers = {"User-Agent": "Vrobbler 0.11.12"}
game_id = None
url = SEARCH_ID_URL.format(query=title)
r = requests.get(url, headers=headers)
r = requests.get(url, headers=BASE_HEADERS)
if r.status_code == 200:
soup = BeautifulSoup(r.text, "xml")
@ -57,7 +59,6 @@ def lookup_boardgame_id_from_bgg(title: str) -> Optional[int]:
def lookup_boardgame_from_bgg(lookup_id: str) -> dict:
soup = None
game_dict = {}
headers = {"User-Agent": "Vrobbler 0.11.12"}
title = ""
bgg_id = None
@ -73,7 +74,7 @@ def lookup_boardgame_from_bgg(lookup_id: str) -> dict:
bgg_id = lookup_boardgame_id_from_bgg(title)
url = GAME_ID_URL.format(id=bgg_id)
r = requests.get(url, headers=headers)
r = requests.get(url, headers=BASE_HEADERS)
if r.status_code == 200:
soup = BeautifulSoup(r.text, "xml")
@ -109,7 +110,8 @@ def push_scrobble_to_bgg(scrobble: "Scrobble", user: User) -> Optional[bool]:
login_payload = {
"credentials": {"username": bgg_username, "password": bgg_password}
}
headers = {"content-type": "application/json"}
headers = BASE_HEADERS
headers["content-type"] = "application/json"
# TODO Look up past plays for scrobble.media_obj.bggeek_id, and make sure we haven't scrobbled this before

View File

@ -21,7 +21,8 @@ class BookAdmin(admin.ModelAdmin):
date_hierarchy = "created"
list_display = (
"title",
"subtitle",
"author",
"issue_or_volume",
"isbn_13",
"first_publish_year",
"pages",
@ -32,6 +33,9 @@ class BookAdmin(admin.ModelAdmin):
ScrobbleInline,
]
def issue_or_volume(self, obj):
return obj.issue_number or obj.volume_number
@admin.register(Paper)
class BookAdmin(admin.ModelAdmin):

View File

@ -0,0 +1,18 @@
# Generated by Django 4.2.19 on 2025-10-22 16:29
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('books', '0029_book_comicvine_id_book_issue_number_and_more'),
]
operations = [
migrations.AddField(
model_name='book',
name='readcomics_url',
field=models.CharField(blank=True, max_length=255, null=True),
),
]

View File

@ -0,0 +1,18 @@
# Generated by Django 4.2.19 on 2025-10-22 17:42
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('books', '0030_book_readcomics_url'),
]
operations = [
migrations.AddField(
model_name='book',
name='next_readcomics_url',
field=models.CharField(blank=True, max_length=255, null=True),
),
]

View File

@ -1,15 +1,19 @@
import logging
from collections import OrderedDict
from dataclasses import dataclass
import logging
from datetime import datetime
from typing import Optional
from uuid import uuid4
import requests
from books.constants import READCOMICSONLINE_URL
from books.openlibrary import (
lookup_author_from_openlibrary,
lookup_book_from_openlibrary,
)
from books.sources.google import lookup_book_from_google
from books.sources.semantic import lookup_paper_from_semantic
from books.utils import get_comic_issue_url
from django.conf import settings
from django.contrib.auth import get_user_model
from django.core.files.base import ContentFile
@ -18,27 +22,25 @@ from django.urls import reverse
from django_extensions.db.models import TimeStampedModel
from imagekit.models import ImageSpecField
from imagekit.processors import ResizeToFit
from scrobbles.dataclasses import BaseLogData, LongPlayLogData
from scrobbles.mixins import (
LongPlayScrobblableMixin,
ObjectWithGenres,
ScrobblableConstants,
)
from scrobbles.utils import get_scrobbles_for_media
from scrobbles.utils import get_scrobbles_for_media, next_url_if_exists
from taggit.managers import TaggableManager
from thefuzz import fuzz
from vrobbler.apps.books.sources.comicvine import (
ComicVineClient,
lookup_comic_from_comicvine,
)
from vrobbler.apps.books.locg import (
lookup_comic_by_locg_slug,
lookup_comic_from_locg,
lookup_comic_writer_by_locg_slug,
)
from books.sources.google import lookup_book_from_google
from books.sources.semantic import lookup_paper_from_semantic
from scrobbles.dataclasses import BaseLogData, LongPlayLogData
from vrobbler.apps.books.sources.comicvine import (
ComicVineClient,
lookup_comic_from_comicvine,
)
COMICVINE_API_KEY = getattr(settings, "COMICVINE_API_KEY", "")
@ -62,6 +64,7 @@ class BookLogData(BaseLogData, LongPlayLogData):
pages_read: Optional[int] = None
page_start: Optional[int] = None
page_end: Optional[int] = None
resume_url: Optional[str] = None
_excluded_fields = {"koreader_hash", "page_data"}
@ -148,6 +151,8 @@ class Book(LongPlayScrobblableMixin):
first_sentence = models.TextField(**BNULL)
# ComicVine
comicvine_id = models.CharField(max_length=255, **BNULL)
readcomics_url = models.CharField(max_length=255, **BNULL)
next_readcomics_url = models.CharField(max_length=255, **BNULL)
issue_number = models.IntegerField(max_length=5, **BNULL)
volume_number = models.IntegerField(max_length=5, **BNULL)
# OpenLibrary
@ -169,7 +174,11 @@ class Book(LongPlayScrobblableMixin):
genre = TaggableManager(through=ObjectWithGenres)
def __str__(self):
def __str__(self) -> str:
if self.issue_number and "Issue" not in str(self.title):
return f"{self.title} - Issue {self.issue_number}"
if self.volume_number and "Volume" not in str(self.title):
return f"{self.title} - Volume {self.volume_number}"
return f"{self.title}"
@property
@ -197,9 +206,8 @@ class Book(LongPlayScrobblableMixin):
@classmethod
def get_from_comicvine(cls, title: str, overwrite: bool = False, force_new: bool =False) -> "Book":
book, created = cls.objects.get_or_create(title=title)
if not created and not overwrite and not force_new:
book, created = cls.objects.get_or_create(original_title=title)
logger.info("Found comic by original title, use force_new=True to override")
if not created:
return book
book_dict = lookup_comic_from_comicvine(title)
@ -233,7 +241,7 @@ class Book(LongPlayScrobblableMixin):
@classmethod
def find_or_create(
cls, title: str, enrich: bool = False, commit: bool = True
cls, title: str, url: str = "", enrich: bool = False, commit: bool = True
):
"""Given a title, get a Book instance.
@ -244,7 +252,7 @@ class Book(LongPlayScrobblableMixin):
like to batch create, use commit=False and you'll get an unsaved but enriched
instance back which you can then save at your convenience."""
# TODO use either a Google Books id identifier or author name like for tracks
book, created = cls.objects.get_or_create(title=title)
book, created = cls.objects.get_or_create(original_title=title)
if not created:
logger.info(
"Found exact match for book by title", extra={"title": title}
@ -257,9 +265,17 @@ class Book(LongPlayScrobblableMixin):
)
return book
book_dict = lookup_book_from_google(title)
if not book_dict or book_dict.get("isbn_10"):
book_dict = None
if READCOMICSONLINE_URL in url:
book_dict = lookup_comic_from_comicvine(title)
book_dict["readcomics_url"] = get_comic_issue_url(url)
book_dict["next_readcomics_url"] = next_url_if_exists(book_dict["readcomics_url"])
if not book_dict:
book_dict = lookup_book_from_google(title)
if not book_dict:
logger.warning("No book found in any source, using data as is", extra={"title": title})
author_list = []
authors = book_dict.pop("authors", [])

View File

@ -227,14 +227,12 @@ def lookup_comic_from_comicvine(title: str) -> dict:
for r in raw_results
if r.get("resource_type") == resource_type
]
print(results)
if not results:
logger.warning("No comic found on ComicVine")
return {}
found_result = None
for result in results:
print("checking ", result.get("issue_number"), " to ", str(issue_number))
if result.get("issue_number") == str(issue_number):
found_result = result
break
@ -264,6 +262,9 @@ def lookup_comic_from_comicvine(title: str) -> dict:
"cover_url": found_result.get("image").get("original_url"),
"comicvine_id": found_result.get("id"),
"comicvine_data": found_result,
"summary": found_result.get("description"),
"publish_date": found_result.get("cover_date"),
"first_publish_year": found_result.get("cover_date", "")[:4]
}
return data_dict

View File

@ -1,5 +1,9 @@
import re
from urllib.parse import urlparse, urlunparse
from titlecase import titlecase
def parse_readcomicsonline_uri(uri: str) -> tuple:
try:
path = uri.split("comic/")[1]
@ -19,3 +23,37 @@ def parse_readcomicsonline_uri(uri: str) -> tuple:
page = parts[2]
return title, volume, page
def get_comic_issue_url(url: str) -> str:
parsed = urlparse(url)
parts = [p for p in parsed.path.strip('/').split('/') if p]
# Find the index of "comic"
try:
comic_index = parts.index("comic")
except ValueError:
raise ValueError("URL does not contain '/comic/' segment")
# Extract title (next part after 'comic')
if len(parts) <= comic_index + 1:
raise ValueError("No comic title found after '/comic/'")
title = parts[comic_index + 1]
# Look for the first numeric segment after the title
number = None
for segment in parts[comic_index + 2:]:
if segment.isdigit():
number = segment
break
# Build normalized path
new_parts = ["comic", title]
if number:
new_parts.append(number)
normalized_path = "/" + "/".join(new_parts)
# Rebuild full URL (same scheme and host)
simplified_url = urlunparse(parsed._replace(path=normalized_path, query='', fragment=''))
return simplified_url

View File

@ -822,7 +822,7 @@ class Scrobble(TimeStampedModel):
"""
is_stale = False
now = timezone.now()
seconds_since_last_update = (now - self.modified).seconds
seconds_since_last_update = (now - self.modified).total_seconds()
if seconds_since_last_update >= self.media_obj.SECONDS_TO_STALE:
is_stale = True
return is_stale

View File

@ -29,7 +29,12 @@ from scrobbles.constants import (
)
from scrobbles.models import Scrobble
from scrobbles.notifications import ScrobbleNtfyNotification
from scrobbles.utils import convert_to_seconds, extract_domain
from scrobbles.utils import (
convert_to_seconds,
extract_domain,
remove_last_part,
next_url_if_exists,
)
from sports.models import SportEvent
from sports.thesportsdb import lookup_event_from_thesportsdb
from tasks.models import Task
@ -260,8 +265,10 @@ def manual_scrobble_book(
log = {}
source = "Vrobbler"
page = None
url = ""
if READCOMICSONLINE_URL in title:
url = title
title, volume, page = parse_readcomicsonline_uri(title)
if not title:
logger.info(
@ -285,7 +292,7 @@ def manual_scrobble_book(
# TODO: Check for scrobble of this book already and if so, update the page count
book = Book.find_or_create(title, enrich=True)
book = Book.find_or_create(title, url=url, enrich=True)
scrobble_dict = {
"user_id": user_id,
@ -309,6 +316,12 @@ def manual_scrobble_book(
scrobble = Scrobble.create_or_update(book, user_id, scrobble_dict, read_log_page=page)
if action == "stop":
if url:
if isinstance(scrobble.log, "BookLogData"):
scrobble.log.resume_url = next_url_if_exists(url)
else:
scrobble.log["resume_url"] = next_url_if_exists(url)
scrobble.save(update_fields=["log"])
scrobble.stop(force_finish=True)
return scrobble

View File

@ -1,5 +1,6 @@
import hashlib
import logging
import requests
import re
from datetime import date, datetime, timedelta
from typing import TYPE_CHECKING, Optional
@ -393,7 +394,10 @@ def get_daily_calories_for_user_by_day(user_id: int, date: date| str) -> int:
if isinstance(date, str):
date = pendulum.parse(date)
qs = base_scrobble_qs(user_id).filter(day=date)
try:
qs = base_scrobble_qs(user_id).filter(day=date)
except AttibuteError as e:
logger.warning(f"Can't generate calorie total: {e}")
agg = qs.aggregate(total_calories=models.Sum("calories_int"))
return agg["total_calories"] or 0
@ -408,3 +412,41 @@ def get_daily_calorie_dict_for_user(user_id: int) -> dict[date, int]:
)
return {entry["day"]: entry["total_calories"] for entry in qs}
def remove_last_part(url: str) -> str:
url = url.rstrip('/')
if '/' not in url:
return url
return url.rsplit('/', 1)[0]
def next_url_if_exists(url: str) -> str:
# Normalize (remove trailing slash)
url = url.rstrip('/')
# Find last number in the URL path
match = re.search(r'(\d+)(?:/?$)', url)
if not match:
logger.info("No numeric segment found in the URL", extra={"url": url})
return ""
number = int(match.group(1))
new_number = number + 1
# Replace only the last occurrence of that number
new_url = re.sub(rf'{number}(?:/?$)', f'{new_number}/', url + '/', 1)
# Check if the new URL exists
try:
resp = requests.head(new_url, allow_redirects=True, timeout=5)
if resp.status_code == 200:
return new_url
else:
# Fallback: some sites may not support HEAD well — try GET
resp = requests.get(new_url, timeout=5)
if resp.status_code == 200:
return new_url
except requests.RequestException:
pass
# If it doesnt exist
return ""

View File

@ -68,6 +68,7 @@ LASTFM_SECRET_KEY = os.getenv("VROBBLER_LASTFM_SECRET_KEY")
IGDB_CLIENT_ID = os.getenv("VROBBLER_IGDB_CLIENT_ID")
IGDB_CLIENT_SECRET = os.getenv("VROBBLER_IGDB_CLIENT_SECRET")
COMICVINE_API_KEY = os.getenv("VROBBLER_COMICVINE_API_KEY")
BGG_ACCESS_TOKEN = os.getenv("VROBBLER_BGG_ACCESS_TOKEN", "")
GEOLOC_ACCURACY = os.getenv("VROBBLER_GEOLOC_ACCURACY", 3)
GEOLOC_PROXIMITY = os.getenv("VROBBLER_GEOLOC_PROXIMITY", "0.0001")
POINTS_FOR_MOVEMENT_HISTORY = os.getenv(

View File

@ -26,7 +26,20 @@
</div>
</div>
<div class="row">
{% if object.readcomics_url %}
<p><a href="{{object.readcomics_url}}">Read again</a></p>
{% endif %}
{% if object.next_readcomics_url %}
<p><a href="{{object.next_readcomics_url}}">Read next issue</a></p>
{% endif %}
<p>{{scrobbles.count}} scrobbles</p>
{% for s in scrobbles %}
{% if forloop.first %}
<p><a href="{{s.logdata.resume_url}}">Resume reading</a></p>
{% endif %}
{% endfor %}
</div>
<div class="row">
<div class="col-md">