Add fuzzing for book titles
This commit is contained in:
811
poetry.lock
generated
811
poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@ -44,6 +44,7 @@ ipython = "^8.14.0"
|
|||||||
pendulum = "^2.1.2"
|
pendulum = "^2.1.2"
|
||||||
trafilatura = "^1.6.3"
|
trafilatura = "^1.6.3"
|
||||||
django-imagekit = "^5.0.0"
|
django-imagekit = "^5.0.0"
|
||||||
|
thefuzz = "^0.22.1"
|
||||||
|
|
||||||
[tool.poetry.group.dev]
|
[tool.poetry.group.dev]
|
||||||
optional = true
|
optional = true
|
||||||
|
|||||||
@ -185,7 +185,10 @@ class Book(LongPlayScrobblableMixin):
|
|||||||
if "pages" in data.keys() and data.get("pages") == None:
|
if "pages" in data.keys() and data.get("pages") == None:
|
||||||
data.pop("pages")
|
data.pop("pages")
|
||||||
|
|
||||||
if not isinstance(data.get("pages"), int):
|
if (
|
||||||
|
not isinstance(data.get("pages"), int)
|
||||||
|
and "pages" in data.keys()
|
||||||
|
):
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Pages for {self} from OL expected to be int, but got {data.get('pages')}"
|
f"Pages for {self} from OL expected to be int, but got {data.get('pages')}"
|
||||||
)
|
)
|
||||||
|
|||||||
@ -5,6 +5,8 @@ import urllib
|
|||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
from thefuzz import fuzz
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
ISBN_URL = "https://openlibrary.org/isbn/{isbn}.json"
|
ISBN_URL = "https://openlibrary.org/isbn/{isbn}.json"
|
||||||
@ -102,8 +104,9 @@ def lookup_book_from_openlibrary(
|
|||||||
|
|
||||||
top = None
|
top = None
|
||||||
for result in results.get("docs"):
|
for result in results.get("docs"):
|
||||||
if title.lower() == result.get("title", "").lower():
|
if fuzz.ratio(title.lower(), result.get("title", "").lower()) > 90:
|
||||||
top = result
|
top = result
|
||||||
|
break
|
||||||
|
|
||||||
if not top:
|
if not top:
|
||||||
for result in results.get("docs"):
|
for result in results.get("docs"):
|
||||||
|
|||||||
@ -1,14 +1,14 @@
|
|||||||
|
from unittest import skip
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from vrobbler.apps.books.openlibrary import (
|
from vrobbler.apps.books.openlibrary import lookup_book_from_openlibrary
|
||||||
lookup_book_from_openlibrary,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_lookup_modern_book():
|
def test_lookup_modern_book():
|
||||||
book = lookup_book_from_openlibrary("Matrix", "Lauren Groff")
|
book = lookup_book_from_openlibrary("Matrix", "Lauren Groff")
|
||||||
assert book.get("title") == "Matrix"
|
assert book.get("title") == "Matrix"
|
||||||
assert book.get("openlibrary_id") == "OL47572299M"
|
assert book.get("openlibrary_id") == "OL32170218M"
|
||||||
assert book.get("ol_author_id") == "OL3675729A"
|
assert book.get("ol_author_id") == "OL3675729A"
|
||||||
|
|
||||||
|
|
||||||
@ -26,3 +26,11 @@ def test_lookup_foreign_book():
|
|||||||
assert book.get("title") == "Ravage"
|
assert book.get("title") == "Ravage"
|
||||||
assert book.get("openlibrary_id") == "OL8837839M"
|
assert book.get("openlibrary_id") == "OL8837839M"
|
||||||
assert book.get("ol_author_id") == "OL152472A"
|
assert book.get("ol_author_id") == "OL152472A"
|
||||||
|
|
||||||
|
|
||||||
|
@skip("This is rotten in OL, updated but waiting for it to update")
|
||||||
|
def test_lookup_book():
|
||||||
|
book = lookup_book_from_openlibrary("Hark! A Vagrant")
|
||||||
|
assert book.get("title") == "Hark! A Vagrant"
|
||||||
|
assert book.get("openlibrary_id") == "OL8837839M"
|
||||||
|
assert book.get("ol_author_id") == "OL152472A"
|
||||||
|
|||||||
Reference in New Issue
Block a user