Files
vrobbler/vrobbler/apps/books/sources/google.py
Colin Powell 935d059a20
All checks were successful
build / test (push) Successful in 2m30s
[books] Fix metadata scrapping
It's not perfect, but at least we get covers now
2026-06-12 09:36:21 -04:00

69 lines
2.2 KiB
Python

import json
import logging
import pendulum
import requests
from django.conf import settings
API_KEY = settings.GOOGLE_API_KEY
GOOGLE_BOOKS_URL = 'https://www.googleapis.com/books/v1/volumes?q="{title}"&key={key}'
logger = logging.getLogger(__name__)
def lookup_book_from_google(title: str) -> dict:
book_dict = {"title": title}
url = GOOGLE_BOOKS_URL.format(title=title, key=API_KEY)
headers = {"User-Agent": "Vrobbler 0.11.12"}
response = requests.get(url, headers=headers)
if response.status_code != 200:
logger.warning("Bad response from Google", extra={"response": response})
return book_dict
google_result = json.loads(response.content).get("items", [{}])[0].get("volumeInfo")
if not google_result:
return {}
isbn_13 = ""
isbn_10 = ""
for ident in google_result.get("industryIdentifiers", []):
if ident.get("type") == "ISBN_13":
isbn_13 = ident.get("identifier")
if ident.get("type") == "ISBN_10":
isbn_10 = ident.get("identifier")
book_dict["authors"] = google_result.get("authors")
book_dict["publisher"] = google_result.get("publisher")
book_dict["pages"] = google_result.get("pageCount")
book_dict["isbn_13"] = isbn_13
book_dict["isbn_10"] = isbn_10
book_dict["language"] = google_result.get("language")
book_dict["summary"] = google_result.get("description")
book_dict["genres"] = google_result.get("categories")
raw_date = google_result.get("publishedDate")
if raw_date:
try:
publish_date = pendulum.parse(raw_date)
book_dict["first_publish_year"] = publish_date.year
except Exception:
pass
book_dict["publish_date"] = raw_date
if len(raw_date) == 4:
book_dict["publish_date"] = f"{raw_date}-1-1"
book_dict["cover_url"] = (
google_result.get("imageLinks", {})
.get("thumbnail", "")
.replace("zoom=1", "zoom=15")
.replace("&edge=curl", "")
)
book_dict["base_run_time_seconds"] = 3600
if book_dict.get("pages"):
book_dict["base_run_time_seconds"] = book_dict.get("pages", 10) * getattr(
settings, "AVERAGE_PAGE_READING_SECONDS", 60
)
return book_dict