Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 04f9e00c9c | |||
| c2dabd1dac |
@ -604,6 +604,12 @@ independent of the email flow it was originally creatdd for
|
||||
|
||||
** TODO [#B] Is there way to create unique slugs for media instances :media_types:
|
||||
|
||||
* Version 58.0 [1/1]
|
||||
** DONE [#B] Add scrobbling of Papers via webpages with doi.org links in them :feature:papers:
|
||||
:PROPERTIES:
|
||||
:ID: d30bb8aa-eefd-002c-38d5-3f2fcef345f2
|
||||
:END:
|
||||
|
||||
* Version 57.1 [1/1]
|
||||
** DONE [#A] Write poetry lock file :bug:deps:
|
||||
:PROPERTIES:
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "vrobbler"
|
||||
version = "57.1"
|
||||
version = "58.0"
|
||||
description = ""
|
||||
authors = ["Colin Powell <colin@unbl.ink>"]
|
||||
|
||||
|
||||
@ -1,8 +1,19 @@
|
||||
from books.models import Author, Book, Paper
|
||||
from books.models import Author, Book, Journal, Paper
|
||||
from django.contrib import admin
|
||||
from scrobbles.admin import ScrobbleInline
|
||||
|
||||
|
||||
@admin.register(Journal)
|
||||
class JournalAdmin(admin.ModelAdmin):
|
||||
date_hierarchy = "created"
|
||||
list_display = (
|
||||
"title",
|
||||
"website_url",
|
||||
)
|
||||
search_fields = ("title",)
|
||||
ordering = ("-created",)
|
||||
|
||||
|
||||
@admin.register(Author)
|
||||
class AuthorAdmin(admin.ModelAdmin):
|
||||
date_hierarchy = "created"
|
||||
|
||||
18
vrobbler/apps/books/migrations/0038_paper_pdf_file.py
Normal file
18
vrobbler/apps/books/migrations/0038_paper_pdf_file.py
Normal file
@ -0,0 +1,18 @@
|
||||
# Generated by Django 4.2.29 on 2026-06-23 14:45
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("books", "0037_book_volume_book_volume_comicvine_id"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name="paper",
|
||||
name="pdf_file",
|
||||
field=models.FileField(blank=True, null=True, upload_to="papers/pdf/"),
|
||||
),
|
||||
]
|
||||
@ -0,0 +1,92 @@
|
||||
from django.db import migrations, models
|
||||
import django.db.models.deletion
|
||||
import django_extensions.db.fields
|
||||
import uuid
|
||||
|
||||
|
||||
def migrate_journal_data(apps, schema_editor):
|
||||
Paper = apps.get_model("books", "Paper")
|
||||
Journal = apps.get_model("books", "Journal")
|
||||
for paper in Paper.objects.all():
|
||||
old_journal = getattr(paper, "journal", None)
|
||||
if old_journal:
|
||||
journal, _ = Journal.objects.get_or_create(title=str(old_journal))
|
||||
paper._journal_tmp = journal
|
||||
paper.save(update_fields=["_journal_tmp"])
|
||||
|
||||
|
||||
def reverse_migrate_journal_data(apps, schema_editor):
|
||||
Paper = apps.get_model("books", "Paper")
|
||||
for paper in Paper.objects.all():
|
||||
if paper._journal_tmp:
|
||||
paper.journal = paper._journal_tmp.title
|
||||
paper.save(update_fields=["journal"])
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("books", "0038_paper_pdf_file"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name="Journal",
|
||||
fields=[
|
||||
(
|
||||
"id",
|
||||
models.BigAutoField(
|
||||
auto_created=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
verbose_name="ID",
|
||||
),
|
||||
),
|
||||
(
|
||||
"created",
|
||||
django_extensions.db.fields.CreationDateTimeField(
|
||||
auto_now_add=True, verbose_name="created"
|
||||
),
|
||||
),
|
||||
(
|
||||
"modified",
|
||||
django_extensions.db.fields.ModificationDateTimeField(
|
||||
auto_now=True, verbose_name="modified"
|
||||
),
|
||||
),
|
||||
(
|
||||
"uuid",
|
||||
models.UUIDField(
|
||||
blank=True, default=uuid.uuid4, editable=False, null=True
|
||||
),
|
||||
),
|
||||
("title", models.CharField(max_length=255)),
|
||||
("description", models.TextField(blank=True, null=True)),
|
||||
("website_url", models.URLField(blank=True, max_length=500, null=True)),
|
||||
],
|
||||
options={
|
||||
"get_latest_by": "modified",
|
||||
"abstract": False,
|
||||
},
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="paper",
|
||||
name="_journal_tmp",
|
||||
field=models.ForeignKey(
|
||||
blank=True,
|
||||
null=True,
|
||||
on_delete=django.db.models.deletion.DO_NOTHING,
|
||||
to="books.journal",
|
||||
),
|
||||
),
|
||||
migrations.RunPython(migrate_journal_data, reverse_migrate_journal_data),
|
||||
migrations.RemoveField(
|
||||
model_name="paper",
|
||||
name="journal",
|
||||
),
|
||||
migrations.RenameField(
|
||||
model_name="paper",
|
||||
old_name="_journal_tmp",
|
||||
new_name="journal",
|
||||
),
|
||||
]
|
||||
@ -27,7 +27,11 @@ from books.sources.amazon import lookup_book_from_amazon
|
||||
from books.sources.openlibrary import (
|
||||
lookup_book_from_openlibrary as lookup_book_from_ol,
|
||||
)
|
||||
from books.sources.semantic import lookup_paper_from_semantic
|
||||
from books.sources.semantic import (
|
||||
lookup_paper_from_semantic,
|
||||
lookup_paper_from_semantic_by_doi,
|
||||
)
|
||||
from books.sources.scihub import SciHubService
|
||||
from books.utils import get_comic_issue_url
|
||||
from django.conf import settings
|
||||
from django.contrib.auth import get_user_model
|
||||
@ -82,6 +86,16 @@ class BookLogData(BaseLogData, LongPlayLogData):
|
||||
return int(total_duration / len(self.page_data))
|
||||
|
||||
|
||||
class Journal(TimeStampedModel):
|
||||
uuid = models.UUIDField(default=uuid4, editable=False, **BNULL)
|
||||
title = models.CharField(max_length=255)
|
||||
description = models.TextField(**BNULL)
|
||||
website_url = models.URLField(max_length=500, **BNULL)
|
||||
|
||||
def __str__(self):
|
||||
return self.title
|
||||
|
||||
|
||||
class Author(TimeStampedModel):
|
||||
name = models.CharField(max_length=255)
|
||||
uuid = models.UUIDField(default=uuid4, editable=False, **BNULL)
|
||||
@ -540,6 +554,21 @@ class Book(LongPlayScrobblableMixin):
|
||||
return progress
|
||||
|
||||
|
||||
@dataclass
|
||||
class PaperLogData(BaseLogData):
|
||||
@classmethod
|
||||
def override_fields(cls) -> dict:
|
||||
from scrobbles.forms import NotesDictField
|
||||
|
||||
fields = {}
|
||||
for base in cls.mro()[1:]:
|
||||
if hasattr(base, "override_fields"):
|
||||
base_fields = base.override_fields()
|
||||
fields.update(base_fields)
|
||||
fields["notes"] = NotesDictField(required=False)
|
||||
return fields
|
||||
|
||||
|
||||
class Paper(LongPlayScrobblableMixin):
|
||||
"""Keeps track of Academic Papers"""
|
||||
|
||||
@ -559,14 +588,29 @@ class Paper(LongPlayScrobblableMixin):
|
||||
language = models.CharField(max_length=4, **BNULL)
|
||||
first_publish_year = models.IntegerField(**BNULL)
|
||||
publish_date = models.DateField(**BNULL)
|
||||
journal = models.CharField(max_length=255, **BNULL)
|
||||
journal = models.ForeignKey(Journal, on_delete=models.DO_NOTHING, **BNULL)
|
||||
journal_volume = models.CharField(max_length=50, **BNULL)
|
||||
abstract = models.TextField(**BNULL)
|
||||
tldr = models.CharField(max_length=255, **BNULL)
|
||||
openaccess_pdf_url = models.CharField(max_length=255, **BNULL)
|
||||
pdf_file = models.FileField(upload_to="papers/pdf/", **BNULL)
|
||||
|
||||
genre = TaggableManager(through=ObjectWithGenres, blank=True, verbose_name="Genre")
|
||||
|
||||
@property
|
||||
def logdata_cls(self):
|
||||
return PaperLogData
|
||||
|
||||
@property
|
||||
def scihub_url(self):
|
||||
if not self.doi_id:
|
||||
return None
|
||||
domain = getattr(settings, "SCIHUB_DOMAIN", "sci-hub.st")
|
||||
return f"https://{domain}/{self.doi_id}"
|
||||
|
||||
def get_absolute_url(self):
|
||||
return reverse("books:paper_detail", kwargs={"slug": self.uuid})
|
||||
|
||||
@classmethod
|
||||
def get_from_semantic(cls, title: str, overwrite: bool = False) -> "Paper":
|
||||
paper, created = cls.objects.get_or_create(title=title)
|
||||
@ -577,7 +621,7 @@ class Paper(LongPlayScrobblableMixin):
|
||||
|
||||
if created or overwrite:
|
||||
author_list = []
|
||||
author_dicts = paper_dict.pop("author_dicts")
|
||||
author_dicts = paper_dict.pop("author_dicts", None)
|
||||
if author_dicts:
|
||||
for author_dict in author_dicts:
|
||||
if author_dict.get("authorId"):
|
||||
@ -588,8 +632,11 @@ class Paper(LongPlayScrobblableMixin):
|
||||
if a_created:
|
||||
author.name = author_dict.get("name")
|
||||
author.save()
|
||||
# TODO enrich author?
|
||||
...
|
||||
|
||||
journal_name = paper_dict.pop("journal_name", None)
|
||||
if journal_name:
|
||||
journal, _ = Journal.objects.get_or_create(title=journal_name)
|
||||
paper.journal = journal
|
||||
|
||||
for k, v in paper_dict.items():
|
||||
setattr(paper, k, v)
|
||||
@ -601,3 +648,78 @@ class Paper(LongPlayScrobblableMixin):
|
||||
if genres:
|
||||
paper.genre.add(*genres)
|
||||
return paper
|
||||
|
||||
@classmethod
|
||||
def find_or_create_by_doi(cls, doi_url: str) -> "Paper":
|
||||
doi = doi_url.replace("https://doi.org/", "").split("?")[0].rstrip("/")
|
||||
paper = cls.objects.filter(doi_id=doi).first()
|
||||
if paper:
|
||||
return paper
|
||||
|
||||
paper = cls(doi_id=doi, title=f"Paper {doi}")
|
||||
paper.save()
|
||||
|
||||
from books.sources.crossref import lookup_paper_from_crossref
|
||||
|
||||
paper_dict = lookup_paper_from_semantic_by_doi(doi)
|
||||
if not paper_dict or not paper_dict.get("abstract"):
|
||||
paper_dict = lookup_paper_from_crossref(doi)
|
||||
|
||||
if paper_dict:
|
||||
author_list = []
|
||||
author_dicts = paper_dict.pop("author_dicts", None)
|
||||
if author_dicts:
|
||||
for author_dict in author_dicts:
|
||||
author_id = author_dict.get("authorId")
|
||||
if author_id:
|
||||
author, a_created = Author.objects.get_or_create(
|
||||
semantic_id=author_id
|
||||
)
|
||||
author_list.append(author)
|
||||
if a_created:
|
||||
author.name = author_dict.get("name")
|
||||
author.save()
|
||||
else:
|
||||
author_name = author_dict.get("name")
|
||||
if author_name:
|
||||
author, a_created = Author.objects.get_or_create(
|
||||
name=author_name
|
||||
)
|
||||
author_list.append(author)
|
||||
|
||||
journal_name = paper_dict.pop("journal_name", None)
|
||||
if journal_name:
|
||||
journal, _ = Journal.objects.get_or_create(title=journal_name)
|
||||
paper.journal = journal
|
||||
|
||||
for k, v in paper_dict.items():
|
||||
if v is not None:
|
||||
setattr(paper, k, v)
|
||||
paper.save()
|
||||
|
||||
if author_list:
|
||||
paper.authors.add(*author_list)
|
||||
genres = paper_dict.pop("genres", [])
|
||||
if genres:
|
||||
paper.genre.add(*genres)
|
||||
|
||||
if not paper.pdf_file:
|
||||
service = SciHubService()
|
||||
if paper.openaccess_pdf_url:
|
||||
pdf_content = service.fetch_from_url(paper.openaccess_pdf_url)
|
||||
if pdf_content:
|
||||
filename = f"{doi.replace('/', '_')}.pdf"
|
||||
paper.pdf_file.save(filename, ContentFile(pdf_content))
|
||||
if not paper.pdf_file:
|
||||
try:
|
||||
pdf_content = service.fetch_pdf(doi)
|
||||
if pdf_content:
|
||||
filename = f"{doi.replace('/', '_')}.pdf"
|
||||
paper.pdf_file.save(filename, ContentFile(pdf_content))
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"[paper] sci-hub PDF download failed",
|
||||
extra={"doi": doi, "error": str(e)},
|
||||
)
|
||||
|
||||
return paper
|
||||
|
||||
94
vrobbler/apps/books/sources/crossref.py
Normal file
94
vrobbler/apps/books/sources/crossref.py
Normal file
@ -0,0 +1,94 @@
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
|
||||
import requests
|
||||
|
||||
CROSSREF_WORK_URL = "https://api.crossref.org/works/{}"
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _strip_jats(text: str) -> str:
|
||||
if not text:
|
||||
return ""
|
||||
text = re.sub(r"</?jats:[^>]*>", "", text)
|
||||
text = re.sub(r"^\s*Abstract\s*", "", text)
|
||||
return text.strip()
|
||||
|
||||
|
||||
def lookup_paper_from_crossref(doi: str) -> dict:
|
||||
url = CROSSREF_WORK_URL.format(doi)
|
||||
headers = {"User-Agent": "Vrobbler/1.0 (mailto:hello@example.com)"}
|
||||
response = requests.get(url, headers=headers)
|
||||
|
||||
if response.status_code != 200:
|
||||
logger.warning(
|
||||
"Bad response from Crossref",
|
||||
extra={"doi": doi, "status": response.status_code},
|
||||
)
|
||||
return {"doi_id": doi}
|
||||
|
||||
try:
|
||||
data = response.json()
|
||||
except json.JSONDecodeError:
|
||||
return {"doi_id": doi}
|
||||
|
||||
msg = data.get("message", {})
|
||||
if not msg:
|
||||
return {"doi_id": doi}
|
||||
|
||||
paper_dict = {"doi_id": doi}
|
||||
|
||||
titles = msg.get("title", [])
|
||||
if titles:
|
||||
paper_dict["title"] = titles[0]
|
||||
|
||||
abstract = msg.get("abstract", "")
|
||||
if abstract:
|
||||
paper_dict["abstract"] = _strip_jats(abstract)
|
||||
|
||||
author_dicts = []
|
||||
for author in msg.get("author", []):
|
||||
given = author.get("given", "")
|
||||
family = author.get("family", "")
|
||||
name = f"{given} {family}".strip()
|
||||
if not name:
|
||||
continue
|
||||
entry = {"name": name}
|
||||
orcid = author.get("ORCID", "")
|
||||
if orcid:
|
||||
orcid_id = orcid.replace("https://orcid.org/", "")
|
||||
entry["authorId"] = orcid_id
|
||||
author_dicts.append(entry)
|
||||
if author_dicts:
|
||||
paper_dict["author_dicts"] = author_dicts
|
||||
|
||||
container = msg.get("container-title", [])
|
||||
if container:
|
||||
paper_dict["journal_name"] = container[0]
|
||||
|
||||
volume = msg.get("volume")
|
||||
if volume:
|
||||
paper_dict["journal_volume"] = volume
|
||||
|
||||
page = msg.get("page")
|
||||
if page:
|
||||
try:
|
||||
parts = page.split("-")
|
||||
if len(parts) == 2:
|
||||
paper_dict["pages"] = int(parts[1]) - int(parts[0])
|
||||
except (ValueError, IndexError):
|
||||
pass
|
||||
|
||||
for date_field in ("published-print", "published-online", "created"):
|
||||
date_data = msg.get(date_field)
|
||||
if date_data and date_data.get("date-parts"):
|
||||
parts = date_data["date-parts"][0]
|
||||
if len(parts) >= 1:
|
||||
paper_dict["first_publish_year"] = int(parts[0])
|
||||
if len(parts) >= 3:
|
||||
paper_dict["publish_date"] = f"{parts[0]:04d}-{parts[1]:02d}-{parts[2]:02d}"
|
||||
break
|
||||
|
||||
return paper_dict
|
||||
142
vrobbler/apps/books/sources/scihub.py
Normal file
142
vrobbler/apps/books/sources/scihub.py
Normal file
@ -0,0 +1,142 @@
|
||||
import logging
|
||||
from typing import Optional
|
||||
from urllib.parse import urljoin
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from django.conf import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
SCIHUB_DOMAINS = [
|
||||
"sci-hub.ru",
|
||||
"sci-hub.ee",
|
||||
"sci-hub.st",
|
||||
"sci-hub.do",
|
||||
]
|
||||
|
||||
|
||||
class SciHubService:
|
||||
def __init__(self):
|
||||
self.session = requests.Session()
|
||||
self.session.headers.update(
|
||||
{
|
||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||
}
|
||||
)
|
||||
|
||||
def fetch_from_url(self, url: str) -> Optional[bytes]:
|
||||
try:
|
||||
resp = self.session.get(url, timeout=60)
|
||||
if resp.status_code != 200:
|
||||
logger.warning(
|
||||
"[pdf] URL download failed",
|
||||
extra={"status": resp.status_code, "url": url},
|
||||
)
|
||||
return None
|
||||
if not self._looks_like_pdf(resp):
|
||||
return None
|
||||
return resp.content
|
||||
except requests.RequestException as e:
|
||||
logger.error(
|
||||
"[pdf] URL download request failed",
|
||||
extra={"url": url, "error": str(e)},
|
||||
)
|
||||
return None
|
||||
|
||||
def fetch_pdf(self, doi: str) -> Optional[bytes]:
|
||||
configured_domain = getattr(settings, "SCIHUB_DOMAIN", None)
|
||||
domains_to_try = (
|
||||
[configured_domain] + SCIHUB_DOMAINS
|
||||
if configured_domain and configured_domain not in SCIHUB_DOMAINS
|
||||
else SCIHUB_DOMAINS
|
||||
)
|
||||
|
||||
for domain in domains_to_try:
|
||||
url = f"https://{domain}/{doi}"
|
||||
logger.info(
|
||||
"[scihub] trying domain",
|
||||
extra={"domain": domain, "doi": doi},
|
||||
)
|
||||
try:
|
||||
response = self.session.get(url, timeout=30)
|
||||
if response.status_code != 200:
|
||||
continue
|
||||
|
||||
pdf_url = self._extract_pdf_url(response.text, url)
|
||||
if not pdf_url:
|
||||
continue
|
||||
|
||||
pdf_response = self.session.get(pdf_url, timeout=60)
|
||||
if pdf_response.status_code != 200:
|
||||
continue
|
||||
|
||||
if not self._looks_like_pdf(pdf_response):
|
||||
continue
|
||||
|
||||
logger.info(
|
||||
"[scihub] PDF downloaded successfully",
|
||||
extra={
|
||||
"domain": domain,
|
||||
"doi": doi,
|
||||
"size": len(pdf_response.content),
|
||||
},
|
||||
)
|
||||
return pdf_response.content
|
||||
except requests.RequestException as e:
|
||||
logger.debug(
|
||||
"[scihub] domain failed",
|
||||
extra={"domain": domain, "doi": doi, "error": str(e)},
|
||||
)
|
||||
continue
|
||||
|
||||
logger.warning(
|
||||
"[scihub] all domains failed",
|
||||
extra={"doi": doi, "tried": domains_to_try},
|
||||
)
|
||||
return None
|
||||
|
||||
def _looks_like_pdf(self, response: requests.Response) -> bool:
|
||||
content_type = response.headers.get("Content-Type", "")
|
||||
if "application/pdf" in content_type:
|
||||
return True
|
||||
if content_type.startswith("application/octet"):
|
||||
return True
|
||||
if response.url.endswith(".pdf"):
|
||||
return True
|
||||
return False
|
||||
|
||||
def _extract_pdf_url(self, html: str, page_url: str) -> Optional[str]:
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
|
||||
iframe = soup.find("iframe", {"id": "pdf"})
|
||||
if iframe and iframe.get("src"):
|
||||
src = iframe["src"]
|
||||
if src.startswith("http"):
|
||||
return src
|
||||
return urljoin(page_url, src)
|
||||
|
||||
embed = soup.find("embed", {"type": "application/pdf"})
|
||||
if embed and embed.get("src"):
|
||||
src = embed["src"]
|
||||
if src.startswith("http"):
|
||||
return src
|
||||
return urljoin(page_url, src)
|
||||
|
||||
download_div = soup.find("div", {"id": "download"})
|
||||
if download_div:
|
||||
link = download_div.find("a")
|
||||
if link and link.get("href"):
|
||||
href = link["href"]
|
||||
if href.startswith("http"):
|
||||
return href
|
||||
return urljoin(page_url, href)
|
||||
|
||||
for link in soup.find_all("a", href=True):
|
||||
href = link["href"]
|
||||
if ".pdf" in href:
|
||||
if href.startswith("http"):
|
||||
return href
|
||||
return urljoin(page_url, href)
|
||||
|
||||
return None
|
||||
@ -9,6 +9,7 @@ PAPER_SEARCH_URL = (
|
||||
"https://api.semanticscholar.org/graph/v1/paper/search/match?query={}"
|
||||
)
|
||||
PAPER_DETAIL_URL = "https://api.semanticscholar.org/graph/v1/paper/{}?fields=title,authors,url,year,abstract,externalIds,citationCount,referenceCount,journal,fieldsOfStudy,publicationDate,openAccessPdf"
|
||||
PAPER_DOI_URL = "https://api.semanticscholar.org/graph/v1/paper/DOI:{}?fields=title,authors,url,year,abstract,externalIds,citationCount,referenceCount,journal,fieldsOfStudy,publicationDate,openAccessPdf"
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@ -39,6 +40,18 @@ def lookup_paper_from_semantic(title: str) -> dict:
|
||||
if not result:
|
||||
return paper_dict
|
||||
|
||||
paper_dict.update(_parse_semantic_result(result))
|
||||
paper_dict.setdefault("title", title)
|
||||
if paper_dict.get("publish_date"):
|
||||
paper_dict["publish_date"] = datetime.strptime(
|
||||
paper_dict["publish_date"], "%Y-%m-%d"
|
||||
)
|
||||
|
||||
return paper_dict
|
||||
|
||||
|
||||
def _parse_semantic_result(result: dict) -> dict:
|
||||
paper_dict = {}
|
||||
page_str = result.get("journal", {}).get("pages")
|
||||
if page_str:
|
||||
try:
|
||||
@ -55,12 +68,10 @@ def lookup_paper_from_semantic(title: str) -> dict:
|
||||
paper_dict["corpus_id"] = result.get("externalIds", {}).get("CorpusId")
|
||||
paper_dict["semantic_title"] = result.get("title")
|
||||
paper_dict["first_publish_year"] = result.get("year")
|
||||
paper_dict["publish_date"] = datetime.strptime(
|
||||
result.get("publicationDate", "1950-01-01"), "%Y-%m-%d"
|
||||
)
|
||||
paper_dict["publish_date"] = result.get("publicationDate")
|
||||
paper_dict["abstract"] = result.get("abstract")
|
||||
paper_dict["tldr"] = result.get("bib", {}).get("abstract")
|
||||
paper_dict["journal"] = result.get("journal", {}).get("name")
|
||||
paper_dict["journal_name"] = result.get("journal", {}).get("name")
|
||||
paper_dict["journal_volume"] = result.get("journal", {}).get("volume")
|
||||
paper_dict["openaccess_pdf_url"] = result.get("openAccessPdf", {}).get("url")
|
||||
paper_dict["base_run_time_seconds"] = paper_dict.get("pages", 10) * getattr(
|
||||
@ -68,5 +79,19 @@ def lookup_paper_from_semantic(title: str) -> dict:
|
||||
)
|
||||
paper_dict["author_dicts"] = result.get("authors")
|
||||
paper_dict["genres"] = result.get("fieldsOfStudy")
|
||||
|
||||
return paper_dict
|
||||
|
||||
|
||||
def lookup_paper_from_semantic_by_doi(doi: str) -> dict:
|
||||
response = get_api_result(PAPER_DOI_URL.format(doi))
|
||||
if not response:
|
||||
return {"doi_id": doi}
|
||||
|
||||
result = json.loads(response.content)
|
||||
if not result:
|
||||
return {"doi_id": doi}
|
||||
|
||||
paper_dict = _parse_semantic_result(result)
|
||||
if not paper_dict.get("title"):
|
||||
paper_dict["title"] = result.get("title", f"Paper {doi}")
|
||||
return paper_dict
|
||||
|
||||
@ -16,4 +16,15 @@ urlpatterns = [
|
||||
views.AuthorDetailView.as_view(),
|
||||
name="author_detail",
|
||||
),
|
||||
path("papers/", views.PaperListView.as_view(), name="paper_list"),
|
||||
path(
|
||||
"papers/<slug:slug>/",
|
||||
views.PaperDetailView.as_view(),
|
||||
name="paper_detail",
|
||||
),
|
||||
path(
|
||||
"papers/<slug:slug>/upload_pdf/",
|
||||
views.PaperUploadPdfView.as_view(),
|
||||
name="paper_upload_pdf",
|
||||
),
|
||||
]
|
||||
|
||||
@ -1,5 +1,8 @@
|
||||
from django.http import HttpResponseRedirect
|
||||
from django.urls import reverse
|
||||
from django.views import View
|
||||
from django.views import generic
|
||||
from books.models import Book, Author
|
||||
from books.models import Book, Author, Paper
|
||||
|
||||
from scrobbles.views import ScrobbleableListView, ScrobbleableDetailView
|
||||
|
||||
@ -15,3 +18,24 @@ class BookDetailView(ScrobbleableDetailView):
|
||||
class AuthorDetailView(generic.DetailView):
|
||||
model = Author
|
||||
slug_field = "uuid"
|
||||
|
||||
|
||||
class PaperListView(ScrobbleableListView):
|
||||
model = Paper
|
||||
|
||||
|
||||
class PaperDetailView(ScrobbleableDetailView):
|
||||
model = Paper
|
||||
|
||||
|
||||
class PaperUploadPdfView(View):
|
||||
def post(self, request, slug):
|
||||
paper = Paper.objects.filter(uuid=slug).first()
|
||||
if not paper or not request.user.is_authenticated:
|
||||
return HttpResponseRedirect(reverse("books:paper_detail", args=[slug]))
|
||||
|
||||
pdf_file = request.FILES.get("pdf_file")
|
||||
if pdf_file:
|
||||
paper.pdf_file.save(pdf_file.name, pdf_file)
|
||||
|
||||
return HttpResponseRedirect(reverse("books:paper_detail", args=[slug]))
|
||||
|
||||
@ -14,6 +14,7 @@ LONG_PLAY_MEDIA = {
|
||||
"books": "Book",
|
||||
"bricksets": "BrickSet",
|
||||
"tasks": "Task",
|
||||
"papers": "Paper",
|
||||
}
|
||||
|
||||
# Media types that should just be finished if they go over time
|
||||
@ -61,6 +62,7 @@ SCROBBLE_CONTENT_URLS = {
|
||||
"-b": ["https://www.amazon.com/"],
|
||||
"-t": ["https://app.todoist.com/app/task/{id}"],
|
||||
"-p": ["https://www.ipdb.plus/IPDb/puzzle.php?id="],
|
||||
"-pp": ["https://doi.org/"],
|
||||
"-l": ["https://brickset.com/sets/"],
|
||||
"-c": ["https://readcomicsonline.ru"],
|
||||
"-h": ["https://www.twitch.tv/"],
|
||||
@ -83,6 +85,7 @@ MANUAL_SCROBBLE_FNS = {
|
||||
"-f": "manual_scrobble_food",
|
||||
"-h": "manual_scrobble_twitch_channel",
|
||||
"-dg": "manual_scrobble_discgolf",
|
||||
"-pp": "manual_scrobble_paper",
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -1377,6 +1377,8 @@ class Scrobble(TimeStampedModel):
|
||||
media_obj = self.channel
|
||||
if self.birding_location:
|
||||
media_obj = self.birding_location
|
||||
if self.paper:
|
||||
media_obj = self.paper
|
||||
if self.disc_golf_course:
|
||||
media_obj = self.disc_golf_course
|
||||
return media_obj
|
||||
|
||||
@ -9,7 +9,7 @@ import requests
|
||||
from beers.models import Beer
|
||||
from boardgames.models import BoardGame, BoardGameDesigner, BoardGameLocation
|
||||
from books.constants import READCOMICSONLINE_URL
|
||||
from books.models import Book, BookLogData, BookPageLogData
|
||||
from books.models import Book, BookLogData, BookPageLogData, Paper
|
||||
from books.utils import parse_readcomicsonline_uri
|
||||
from bricksets.models import BrickSet
|
||||
from dateutil.parser import parse
|
||||
@ -641,6 +641,8 @@ def manual_scrobble_from_url(
|
||||
item_id = "tt" + str(item_id)
|
||||
elif content_key == "-h" and "twitch.tv" in url:
|
||||
item_id = url
|
||||
elif content_key == "-pp" and "doi.org" in url:
|
||||
item_id = url
|
||||
|
||||
scrobble_fn = MANUAL_SCROBBLE_FNS[content_key]
|
||||
return eval(scrobble_fn)(item_id, user_id, source=source, action=action)
|
||||
@ -995,6 +997,38 @@ def manual_scrobble_task(
|
||||
return scrobble
|
||||
|
||||
|
||||
def manual_scrobble_paper(
|
||||
doi_url: str,
|
||||
user_id: int,
|
||||
source: str = "Bookmarklet",
|
||||
action: Optional[str] = None,
|
||||
):
|
||||
paper = Paper.find_or_create_by_doi(doi_url)
|
||||
|
||||
scrobble_dict = {
|
||||
"user_id": user_id,
|
||||
"timestamp": timezone.now(),
|
||||
"playback_position_seconds": 0,
|
||||
"source": source,
|
||||
}
|
||||
logger.info(
|
||||
"[vrobbler-scrobble] paper scrobble request received",
|
||||
extra={
|
||||
"paper_id": paper.id,
|
||||
"user_id": user_id,
|
||||
"scrobble_dict": scrobble_dict,
|
||||
"media_type": Scrobble.MediaType.PAPER,
|
||||
},
|
||||
)
|
||||
|
||||
scrobble = Scrobble.create_or_update(paper, user_id, scrobble_dict)
|
||||
|
||||
if action == "stop":
|
||||
scrobble.stop(force_finish=True)
|
||||
|
||||
return scrobble
|
||||
|
||||
|
||||
def manual_scrobble_webpage(
|
||||
url: str,
|
||||
user_id: int,
|
||||
|
||||
@ -610,7 +610,12 @@ class ManualScrobbleView(FormView):
|
||||
item_str = form.cleaned_data.get("item_id")
|
||||
logger.debug(f"Looking for scrobblable media with input {item_str}")
|
||||
|
||||
key, item_id = item_str[:2], item_str[3:]
|
||||
if len(item_str) > 2 and item_str[:3] in MANUAL_SCROBBLE_FNS:
|
||||
key = item_str[:3]
|
||||
item_id = item_str[4:]
|
||||
else:
|
||||
key = item_str[:2]
|
||||
item_id = item_str[3:]
|
||||
scrobble_fn = MANUAL_SCROBBLE_FNS[key]
|
||||
scrobble = eval(scrobble_fn)(item_id, self.request.user.id)
|
||||
|
||||
|
||||
@ -397,6 +397,8 @@ else:
|
||||
MEDIA_URL = os.getenv("VROBBLER_MEDIA_URL", "/media/")
|
||||
|
||||
|
||||
SCIHUB_DOMAIN = os.getenv("VROBBLER_SCIHUB_DOMAIN", "sci-hub.st")
|
||||
|
||||
JSON_LOGGING = os.getenv("VROBBLER_JSON_LOGGING", "false").lower() in TRUTHY
|
||||
LOG_TYPE = "json" if JSON_LOGGING else "log"
|
||||
|
||||
|
||||
62
vrobbler/templates/books/paper_detail.html
Normal file
62
vrobbler/templates/books/paper_detail.html
Normal file
@ -0,0 +1,62 @@
|
||||
{% extends "base_list.html" %}
|
||||
|
||||
{% block title %}{{object.title}}{% endblock %}
|
||||
|
||||
{% block lists %}
|
||||
<div class="row">
|
||||
<div class="col">
|
||||
<h1>{{object.title}}</h1>
|
||||
{% if object.authors.all %}
|
||||
<p>{{object.authors.all|join:", "}}</p>
|
||||
{% endif %}
|
||||
{% if object.journal %}
|
||||
<p><em>{{object.journal.title}}{% if object.journal_volume %}, vol. {{object.journal_volume}}{% endif %}</em></p>
|
||||
{% endif %}
|
||||
{% if object.doi_id %}
|
||||
<p><a href="https://doi.org/{{object.doi_id}}">doi: {{object.doi_id}}</a></p>
|
||||
{% endif %}
|
||||
{% if object.abstract %}
|
||||
<p>{{object.abstract|linebreaks|truncatewords:200}}</p>
|
||||
{% endif %}
|
||||
{% if object.pdf_file %}
|
||||
<button class="btn btn-outline-secondary btn-sm" onclick="togglePdf()">Show/Hide PDF</button>
|
||||
<div id="pdf-embed" style="display:none; margin-top:0.5rem;">
|
||||
<iframe src="{{object.pdf_file.url}}" style="width:100%;height:600px;border:1px solid #ccc;"></iframe>
|
||||
</div>
|
||||
<script>
|
||||
function togglePdf() {
|
||||
var el = document.getElementById('pdf-embed');
|
||||
el.style.display = el.style.display === 'none' ? 'block' : 'none';
|
||||
}
|
||||
</script>
|
||||
{% endif %}
|
||||
{% if object.openaccess_pdf_url %}
|
||||
<p><a href="{{object.openaccess_pdf_url}}">Open Access PDF</a></p>
|
||||
{% endif %}
|
||||
{% if object.pdf_file %}
|
||||
<a href="{{object.pdf_file.url}}">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" fill="currentColor" class="text-danger" viewBox="0 0 16 16">
|
||||
<path d="M14 14V4.5L9.5 0H4a2 2 0 0 0-2 2v12a2 2 0 0 0 2 2h8a2 2 0 0 0 2-2zM9.5 3A1.5 1.5 0 0 0 11 4.5h2V14a1 1 0 0 1-1 1H4a1 1 0 0 1-1-1V2a1 1 0 0 1 1-1h5.5v2z"/>
|
||||
<path d="M4.603 14.087a.81.81 0 0 1-.438-.42c-.195-.388-.13-.776.08-1.102.197-.307.526-.568.897-.707.07-.024.15-.023.222 0l.044.014a.27.27 0 0 1 .152.295.7.7 0 0 1-.128.416c-.159.206-.344.388-.544.555-.357.299-.592.527-.406.623.08.04.272.045.578-.057a.93.93 0 0 0 .363-.226.8.8 0 0 0 .194-.277.28.28 0 0 1 .414-.112.3.3 0 0 1 .065.422 1.3 1.3 0 0 1-.67.522c-.38.147-.746.103-1.04.02zM7.12 11.5c.16-.186.34-.34.486-.514.294-.35.628-.617.947-.786.204-.108.546-.206.715-.153.087.027.135.068.16.126a.6.6 0 0 1-.003.27 1 1 0 0 1-.158.354c-.163.242-.349.47-.7.769-.332.283-.598.483-.793.607a1.1 1.1 0 0 1-.582.214c-.136 0-.234-.038-.298-.11-.05-.056-.076-.134-.07-.236a.99.99 0 0 1 .098-.45c.08-.17.21-.35.378-.57zm5.09 2.013c-.135.06-.277.104-.428.116-.205.015-.39-.048-.553-.177-.104-.082-.226-.196-.317-.325a1 1 0 0 1-.17-.572c0-.15.035-.27.095-.36.04-.063.089-.098.153-.112.138-.028.316.04.477.174.074.061.145.136.228.232.174.2.302.37.397.515.108.164.153.285.121.345a.25.25 0 0 1-.003.053c0 .079-.05.147-.198.262z"/>
|
||||
</svg>
|
||||
</a>
|
||||
{% endif %}
|
||||
{% if object.scihub_url %}
|
||||
<p><a href="{{object.scihub_url}}">View on Sci-Hub</a></p>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
<hr>
|
||||
<div class="row">
|
||||
<div class="col">
|
||||
<form method="post" enctype="multipart/form-data" action="{% url 'books:paper_upload_pdf' slug=object.uuid %}">
|
||||
{% csrf_token %}
|
||||
<div class="form-group">
|
||||
<label for="pdf_file">Upload PDF</label>
|
||||
<input type="file" class="form-control-file" id="pdf_file" name="pdf_file" accept=".pdf,application/pdf">
|
||||
</div>
|
||||
<button type="submit" class="btn btn-primary btn-sm">Upload</button>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
||||
13
vrobbler/templates/books/paper_list.html
Normal file
13
vrobbler/templates/books/paper_list.html
Normal file
@ -0,0 +1,13 @@
|
||||
{% extends "base_list.html" %}
|
||||
|
||||
{% block title %}Papers{% endblock %}
|
||||
|
||||
{% block lists %}
|
||||
<div class="row">
|
||||
<div class="col-md">
|
||||
<div class="table-responsive">
|
||||
{% include "_longplay_scrobblable_list.html" %}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
||||
Reference in New Issue
Block a user