[videos] Add skatevideo lookup properly

This commit is contained in:
2024-09-06 11:01:45 -04:00
parent b5d194e74f
commit 0a8acdf33f
3 changed files with 135 additions and 52 deletions

View File

@ -123,6 +123,7 @@ class Video(ScrobblableMixin):
UNKNOWN = "U", _("Unknown")
TV_EPISODE = "E", _("TV Episode")
MOVIE = "M", _("Movie")
SKATE_VIDEO = "S", _("Skate Video")
video_type = models.CharField(
max_length=1,

View File

@ -0,0 +1,128 @@
from enum import Enum
from typing import Optional
from bs4 import BeautifulSoup
import requests
import logging
logger = logging.getLogger(__name__)
USER_AGENT = (
"Mozilla/5.0 (Android 4.4; Mobile; rv:41.0) Gecko/41.0 Firefox/41.0"
)
SKATEVIDEOSITE_URL = "https://www.skatevideosite.com"
SKATEVIDEOSITE_SEARCH_URL = SKATEVIDEOSITE_URL + "/search/?q={title}"
class AmazonAttribute(Enum):
SERIES = 0
PAGES = 1
LANGUAGE = 2
PUBLISHER = 3
PUB_DATE = 4
DIMENSIONS = 5
ISBN_10 = 6
ISBN_13 = 7
def strip_and_clean(text):
return text.strip("\n").rstrip().lstrip()
def get_rating_from_soup(soup) -> Optional[int]:
rating = None
try:
potential_rating = soup.find("div", class_="allmusic-rating")
if potential_rating:
rating = int(strip_and_clean(potential_rating.get_text()))
except ValueError:
pass
return rating
def get_review_from_soup(soup) -> str:
review = ""
try:
potential_text = soup.find("div", class_="text")
if potential_text:
review = strip_and_clean(potential_text.get_text())
except ValueError:
pass
return review
def scrape_data_from_amazon(url) -> dict:
data_dict = {}
headers = {"User-Agent": USER_AGENT}
r = requests.get(url, headers=headers)
if r.status_code == 200:
soup = BeautifulSoup(r.text, "html.parser")
import pdb
pdb.set_trace()
data_dict["rating"] = get_rating_from_soup(soup)
data_dict["review"] = get_review_from_soup(soup)
return data_dict
def lookup_video_from_skatevideosite(title: str) -> Optional[dict]:
video_metadata = None
search_url = SKATEVIDEOSITE_SEARCH_URL.format(title=title)
headers = {
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36",
"accept-language": "en-GB,en;q=0.9",
}
response = requests.get(search_url, headers=headers)
if response.status_code != 200:
logger.info(f"Bad http response from SkateVideoSite {response}")
return video_metadata
soup = BeautifulSoup(response.text, "html.parser")
detail_url = ""
try:
detail_url = SKATEVIDEOSITE_URL + soup.findAll("a")[12]["href"]
except IndexError:
pass
detail_response = requests.get(detail_url, headers=headers)
detail_soup = BeautifulSoup(detail_response.text, "html.parser")
try:
result = soup.find("div", class_="card-body").find("a")
except:
result = None
if not result:
logger.info(
f"No search results found on skatevideosite",
extra={"title": title},
)
return video_metadata
year = (
detail_soup.find("span", class_="whitespace-normal")
.contents[0]
.replace("(", "")
.replace(")", "")
)
run_time_seconds = (
int(
detail_soup.find("div", class_="p-1")
.contents[-1]
.contents[0]
.strip("(")
.strip("min )")
)
* 60
)
return {
"title": str(result.find("img").get("alt").replace(" cover", "")),
"video_type": "S",
"year": year,
"run_time_seconds": run_time_seconds,
"cover_url": str(result.find("img").get("src")),
}

View File

@ -1,8 +1,9 @@
import logging
from scrobbles.utils import convert_to_seconds
from videos.imdb import lookup_video_from_imdb
from videos.models import Series, Video
from scrobbles.utils import convert_to_seconds
from videos.skatevideosite import lookup_video_from_skatevideosite
logger = logging.getLogger(__name__)
@ -12,10 +13,10 @@ def get_or_create_video(data_dict: dict, post_keys: dict, force_update=False):
post_keys.get("VIDEO_TITLE"), ""
)
imdb_metadata = lookup_video_from_imdb(name_or_id)
# skatevideosite_metadata = lookup_video_from_skatevideosite(name_or_id)
skatevideosite_metadata = lookup_video_from_skatevideosite(name_or_id)
# youtube_metadata = lookup_vide_from_youtube(name_or_id)
video_dict = imdb_metadata
video_dict = skatevideosite_metadata or imdb_metadata
# video_metadata = imdb_metadata or skatevideosite_metadata or youtube_metadata
if not video_dict:
logger.info(
@ -42,8 +43,8 @@ def get_or_create_video(data_dict: dict, post_keys: dict, force_update=False):
post_keys.get("TMDB_ID"), None
)
series = None
if video_dict.get("series_name"):
series_name = video_dict.pop("series_name", None)
if series_name:
series_name = video_dict.pop("series_name")
series, series_created = Series.objects.get_or_create(
@ -66,50 +67,3 @@ def get_or_create_video(data_dict: dict, post_keys: dict, force_update=False):
def get_or_create_video_from_skatevideosite(title: str, force_update=True):
...
def get_or_create_video_from_jellyfin(jellyfin_data: dict, force_update=True):
"""Given a Jellyfin webhook payload as a dictionary, lookup the video or
create a new one.
"""
video, video_created = Video.objects.get_or_create(
imdb_id=jellyfin_data.get("Provider_imdb", "").replace("tt", ""),
title=jellyfin_data.get("Name"),
)
if video_created:
video_type = Video.VideoType.MOVIE
series = None
if jellyfin_data.get("ItemType", "") == "Episode":
series_name = jellyfin_data.get("SeriesName", "")
series, series_created = Series.objects.get_or_create(
name=series_name
)
if series_created:
series.fix_metadata()
video_type = Video.VideoType.TV_EPISODE
video_dict = {
"video_type": video_type,
"year": jellyfin_data.get("Year", ""),
"overview": jellyfin_data.get("Overview", None),
"tagline": jellyfin_data.get("Tagline", None),
"run_time_seconds": convert_to_seconds(
jellyfin_data.get("RunTime", 0)
),
"tvdb_id": jellyfin_data.get("Provider_tvdb", None),
"tvrage_id": jellyfin_data.get("Provider_tvrage", None),
"episode_number": jellyfin_data.get("EpisodeNumber", None),
"season_number": jellyfin_data.get("SeasonNumber", None),
}
if series:
video_dict["tv_series_id"] = series.id
Video.objects.filter(pk=video.id).update(**video_dict)
video.refresh_from_db()
video.fix_metadata()
return video