[podcasts] Fixes enrichment of podcasts with podcastindex

This commit is contained in:
2025-04-07 13:30:29 -04:00
parent bcc3f46806
commit 4767cc7e52
7 changed files with 265 additions and 55 deletions

View File

@ -0,0 +1,40 @@
# Generated by Django 4.2.19 on 2025-04-07 17:16
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("podcasts", "0014_alter_podcastepisode_run_time_seconds"),
]
operations = [
migrations.RemoveField(
model_name="podcast",
name="google_podcasts_url",
),
migrations.AddField(
model_name="podcast",
name="dead_date",
field=models.DateField(blank=True, null=True),
),
migrations.AddField(
model_name="podcast",
name="itunes_id",
field=models.TextField(blank=True, max_length=15, null=True),
),
migrations.AddField(
model_name="podcast",
name="null",
field=models.CharField(
default="", max_length=150, verbose_name="blank"
),
preserve_default=False,
),
migrations.AddField(
model_name="podcast",
name="site_link",
field=models.URLField(blank=True, null=True),
),
]

View File

@ -0,0 +1,28 @@
# Generated by Django 4.2.19 on 2025-04-07 17:18
from django.db import migrations
import taggit.managers
class Migration(migrations.Migration):
dependencies = [
("scrobbles", "0068_scrobble_paper_alter_scrobble_media_type"),
(
"podcasts",
"0015_remove_podcast_google_podcasts_url_podcast_dead_date_and_more",
),
]
operations = [
migrations.AddField(
model_name="podcast",
name="genre",
field=taggit.managers.TaggableManager(
help_text="A comma-separated list of tags.",
through="scrobbles.ObjectWithGenres",
to="scrobbles.Genre",
verbose_name="Tags",
),
),
]

View File

@ -0,0 +1,18 @@
# Generated by Django 4.2.19 on 2025-04-07 17:29
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("podcasts", "0016_podcast_genre"),
]
operations = [
migrations.AddField(
model_name="podcast",
name="podcastindex_id",
field=models.CharField(blank=True, max_length=100, null=True),
),
]

View File

@ -1,5 +1,4 @@
import logging
from typing import Dict, Optional
from uuid import uuid4
import requests
@ -10,8 +9,16 @@ from django.db import models
from django.urls import reverse
from django.utils.translation import gettext_lazy as _
from django_extensions.db.models import TimeStampedModel
from podcasts.scrapers import scrape_data_from_google_podcasts
from scrobbles.mixins import ScrobblableConstants, ScrobblableMixin
from scrobbles.mixins import (
ObjectWithGenres,
ScrobblableConstants,
ScrobblableMixin,
)
from taggit.managers import TaggableManager
from podcasts.sources.podcastindex import (
lookup_podcast_from_podcastindex,
)
logger = logging.getLogger(__name__)
BNULL = {"blank": True, "null": True}
@ -24,6 +31,13 @@ class Producer(TimeStampedModel):
def __str__(self):
return f"{self.name}"
@classmethod
def find_or_create(cls, name):
producer = cls.objects.filter(name__iexact=name).first()
if not producer:
producer = cls.objects.create(name=name)
return producer
class Podcast(TimeStampedModel):
name = models.CharField(max_length=255)
@ -31,11 +45,17 @@ class Podcast(TimeStampedModel):
producer = models.ForeignKey(
Producer, on_delete=models.DO_NOTHING, **BNULL
)
podcastindex_id = models.CharField(max_length=100, **BNULL)
owner = models.CharField(max_length=150, *BNULL)
description = models.TextField(**BNULL)
active = models.BooleanField(default=True)
feed_url = models.URLField(**BNULL)
google_podcasts_url = models.URLField(**BNULL)
site_link = models.URLField(**BNULL)
description = models.TextField(**BNULL)
cover_image = models.ImageField(upload_to="podcasts/covers/", **BNULL)
itunes_id = models.TextField(max_length=15, **BNULL)
dead_date = models.DateField(**BNULL)
genre = TaggableManager(through=ObjectWithGenres)
def __str__(self):
return f"{self.name}"
@ -49,32 +69,43 @@ class Podcast(TimeStampedModel):
user=user, podcast_episode__podcast=self
).order_by("-timestamp")
def scrape_google_podcasts(self, force=False):
podcast_dict = {}
if not self.cover_image or force:
podcast_dict = scrape_data_from_google_podcasts(self.name)
if podcast_dict:
if not self.producer:
self.producer, created = Producer.objects.get_or_create(
name=podcast_dict["producer"]
)
self.description = podcast_dict.get("description")
self.google_podcasts_url = podcast_dict.get("google_url")
self.save(
update_fields=[
"description",
"producer",
"google_podcasts_url",
]
)
@property
def itunes_link(self) -> str:
if not self.itunes_id:
return ""
return f"https://podcasts.apple.com/us/podcast/id{self.itunes_id}"
def fix_metadata(self, force=False):
if self.podcastindex_id and not force:
logger.warning(
"Podcast already has PodcastIndex ID, use force=True to overwrite"
)
return
podcast_dict = lookup_podcast_from_podcastindex(self.name)
if not podcast_dict:
logger.info(
"No podcast data found from PodcastIndex. Are credentials setup?"
)
return
genres = podcast_dict.pop("genres")
if genres:
self.genre.add(*genres)
cover_url = podcast_dict.pop("image_url")
cover_url = podcast_dict.get("image_url")
if (not self.cover_image or force) and cover_url:
r = requests.get(cover_url)
if r.status_code == 200:
fname = f"{self.name}_{self.uuid}.jpg"
self.cover_image.save(fname, ContentFile(r.content), save=True)
for attr, value in podcast_dict.items():
setattr(self, attr, value)
self.save()
class PodcastEpisode(ScrobblableMixin):
COMPLETION_PERCENT = getattr(settings, "PODCAST_COMPLETION_PERCENT", 90)
@ -108,42 +139,45 @@ class PodcastEpisode(ScrobblableMixin):
@classmethod
def find_or_create(
cls, podcast_dict: Dict, producer_dict: Dict, episode_dict: Dict
) -> Optional["Episode"]:
cls,
title: str,
podcast_name: str,
pub_date: str,
number: int = 0,
mopidy_uri: str = "",
producer_name: str = "",
run_time_seconds: int = 1800,
enrich: bool = True,
) -> "PodcastEpisode":
"""Given a data dict from Mopidy, finds or creates a podcast and
producer before saving the epsiode so it can be scrobbled.
"""
if not podcast_dict.get("name"):
logger.warning(f"No name from source for podcast, not scrobbling")
return
producer = None
if producer_dict.get("name"):
producer, producer_created = Producer.objects.get_or_create(
**producer_dict
if producer_name:
producer = Producer.find_or_create(producer_name)
podcast = Podcast.objects.filter(
name__iexact=podcast_name,
).first()
if not podcast:
podcast = Podcast.objects.create(
name=podcast_name, producer=producer
)
if producer_created:
logger.debug(f"Created new producer {producer}")
else:
logger.debug(f"Found producer {producer}")
if enrich:
podcast.fix_metadata()
if producer:
podcast_dict["producer_id"] = producer.id
podcast, podcast_created = Podcast.objects.get_or_create(
**podcast_dict
)
if podcast_created:
logger.debug(f"Created new podcast {podcast}")
else:
logger.debug(f"Found podcast {podcast}")
episode_dict["podcast_id"] = podcast.id
episode, created = cls.objects.get_or_create(**episode_dict)
if created:
logger.debug(f"Created new episode: {episode}")
else:
logger.debug(f"Found episode {episode}")
episode = cls.objects.filter(
title__iexact=title, podcast=podcast
).first()
if not episode:
episode = cls.objects.create(
title=title,
podcast=podcast,
run_time_seconds=run_time_seconds,
number=number,
pub_date=pub_date,
mopidy_uri=mopidy_uri,
)
return episode

View File

@ -0,0 +1,75 @@
import hashlib
import time
import pytz
import requests
from django.conf import settings
from django.utils import timezone
from scrobbles.utils import timestamp_user_tz_to_utc
PODCASTINDEX_API_KEY = getattr(settings, "PODCASTINDEX_API_KEY")
PODCASTINDEX_API_SECRET = getattr(settings, "PODCASTINDEX_API_SECRET")
def get_auth_headers():
now = int(time.time())
hash_data = hashlib.sha1(
(PODCASTINDEX_API_KEY + PODCASTINDEX_API_SECRET + str(now)).encode(
"utf-8"
)
).hexdigest()
return {
"User-Agent": "MyPodcastApp/1.0",
"X-Auth-Date": str(now),
"X-Auth-Key": PODCASTINDEX_API_KEY,
"Authorization": hash_data,
"Content-Type": "application/json",
}
def lookup_podcast_from_podcastindex(
podcast_name: str, dump_raw_response: bool = False
) -> dict:
url = "https://api.podcastindex.org/api/1.0/search/byterm"
headers = get_auth_headers()
params = {"q": podcast_name}
response = requests.get(url, headers=headers, params=params)
if response.status_code == 200:
data = response.json()
if dump_raw_response:
return data.get("feeds")
if data.get("feeds"):
try:
top_feed_dict = data["feeds"][0]
newest_episode_date = timestamp_user_tz_to_utc(
top_feed_dict.get("newestItemPubdate"), pytz.UTC
)
days_since_last_episode = ()
dead_date = None
if (timezone.now() - newest_episode_date).days > 180:
dead_date = newest_episode_date
return {
"podcastindex_id": top_feed_dict.get("id"),
"title": top_feed_dict.get("title"),
"site_link": top_feed_dict.get("link"),
"description": top_feed_dict.get("description"),
"owner": top_feed_dict.get("ownerName"),
"image_url": top_feed_dict.get("artwork"),
"feed_url": top_feed_dict.get("url"),
"itunes_id": top_feed_dict.get("itunesId"),
"genres": list(top_feed_dict.get("categories").values()),
"dead_date": dead_date,
}
except IndexError:
return {}
else:
print("No podcasts found.")
return {}
else:
print("Failed to fetch data:", response.status_code, response.text)
return {}

View File

@ -15,7 +15,8 @@ from locations.models import GeoLocation
from music.constants import JELLYFIN_POST_KEYS, MOPIDY_POST_KEYS
from music.models import Track
from music.utils import get_or_create_track
from podcasts.utils import get_or_create_podcast
from podcasts.models import PodcastEpisode
from podcasts.utils import parse_mopidy_uri
from scrobbles.constants import (
JELLYFIN_AUDIO_ITEM_TYPES,
MANUAL_SCROBBLE_FNS,
@ -54,7 +55,19 @@ def mopidy_scrobble_media(post_data: dict, user_id: int) -> Scrobble:
)
if media_type == Scrobble.MediaType.PODCAST_EPISODE:
media_obj = get_or_create_podcast(post_data)
parsed_data = parse_mopidy_uri(post_data.get("mopidy_uri", ""))
podcast_name = post_data.get(
"album", parsed_data.get("podcast_name", "")
)
media_obj = PodcastEpisode.find_or_create(
title=parsed_data.get("episode_filename", ""),
podcast_name=podcast_name,
producer_name=post_data.get("artist", ""),
number=parsed_data.get("episode_num", ""),
pub_date=parsed_data.get("pub_date", ""),
mopidy_uri=post_data.get("mopidy_uri", ""),
)
else:
media_obj = Track.find_or_create(
title=post_data.get("name", ""),

View File

@ -60,6 +60,8 @@ DUMP_REQUEST_DATA = (
THESPORTSDB_API_KEY = os.getenv("VROBBLER_THESPORTSDB_API_KEY", "2")
THEAUDIODB_API_KEY = os.getenv("VROBBLER_THEAUDIODB_API_KEY", "2")
PODCASTINDEX_API_KEY = os.getenv("VROBBLER_PODCASTINDEX_API_KEY", "")
PODCASTINDEX_API_SECRET = os.getenv("VROBBLER_PODCASTINDEX_API_SECRET", "")
TMDB_API_KEY = os.getenv("VROBBLER_TMDB_API_KEY", "")
LASTFM_API_KEY = os.getenv("VROBBLER_LASTFM_API_KEY")
LASTFM_SECRET_KEY = os.getenv("VROBBLER_LASTFM_SECRET_KEY")