Add fuzzing for book titles

This commit is contained in:
2024-01-27 00:20:15 -05:00
parent 0b3bc53704
commit 919fa1b0b4
5 changed files with 248 additions and 590 deletions

811
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -44,6 +44,7 @@ ipython = "^8.14.0"
pendulum = "^2.1.2" pendulum = "^2.1.2"
trafilatura = "^1.6.3" trafilatura = "^1.6.3"
django-imagekit = "^5.0.0" django-imagekit = "^5.0.0"
thefuzz = "^0.22.1"
[tool.poetry.group.dev] [tool.poetry.group.dev]
optional = true optional = true

View File

@ -185,7 +185,10 @@ class Book(LongPlayScrobblableMixin):
if "pages" in data.keys() and data.get("pages") == None: if "pages" in data.keys() and data.get("pages") == None:
data.pop("pages") data.pop("pages")
if not isinstance(data.get("pages"), int): if (
not isinstance(data.get("pages"), int)
and "pages" in data.keys()
):
logger.info( logger.info(
f"Pages for {self} from OL expected to be int, but got {data.get('pages')}" f"Pages for {self} from OL expected to be int, but got {data.get('pages')}"
) )

View File

@ -5,6 +5,8 @@ import urllib
import requests import requests
from thefuzz import fuzz
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
ISBN_URL = "https://openlibrary.org/isbn/{isbn}.json" ISBN_URL = "https://openlibrary.org/isbn/{isbn}.json"
@ -102,8 +104,9 @@ def lookup_book_from_openlibrary(
top = None top = None
for result in results.get("docs"): for result in results.get("docs"):
if title.lower() == result.get("title", "").lower(): if fuzz.ratio(title.lower(), result.get("title", "").lower()) > 90:
top = result top = result
break
if not top: if not top:
for result in results.get("docs"): for result in results.get("docs"):

View File

@ -1,14 +1,14 @@
from unittest import skip
import pytest import pytest
from vrobbler.apps.books.openlibrary import ( from vrobbler.apps.books.openlibrary import lookup_book_from_openlibrary
lookup_book_from_openlibrary,
)
def test_lookup_modern_book(): def test_lookup_modern_book():
book = lookup_book_from_openlibrary("Matrix", "Lauren Groff") book = lookup_book_from_openlibrary("Matrix", "Lauren Groff")
assert book.get("title") == "Matrix" assert book.get("title") == "Matrix"
assert book.get("openlibrary_id") == "OL47572299M" assert book.get("openlibrary_id") == "OL32170218M"
assert book.get("ol_author_id") == "OL3675729A" assert book.get("ol_author_id") == "OL3675729A"
@ -26,3 +26,11 @@ def test_lookup_foreign_book():
assert book.get("title") == "Ravage" assert book.get("title") == "Ravage"
assert book.get("openlibrary_id") == "OL8837839M" assert book.get("openlibrary_id") == "OL8837839M"
assert book.get("ol_author_id") == "OL152472A" assert book.get("ol_author_id") == "OL152472A"
@skip("This is rotten in OL, updated but waiting for it to update")
def test_lookup_book():
book = lookup_book_from_openlibrary("Hark! A Vagrant")
assert book.get("title") == "Hark! A Vagrant"
assert book.get("openlibrary_id") == "OL8837839M"
assert book.get("ol_author_id") == "OL152472A"