[podcasts] Clean up lookup and creation

This commit is contained in:
2025-10-14 11:17:20 -04:00
parent 42ce6df9bd
commit 61bab1f734
3 changed files with 59 additions and 49 deletions

View File

@ -92,7 +92,7 @@ fetching and simple saving.
:LOGBOOK:
CLOCK: [2025-07-09 Wed 09:55]--[2025-07-09 Wed 10:15] => 0:20
:END:
* Backlog [1/28]
* Backlog [3/27]
** TODO [#C] Create small utility to clean up tracks scrobbled with wonky playback times :vrobbler:personal:bug:music:scrobbles:
** TODO [#C] Move to using more robust mopidy-webhooks pacakge form pypi :utility:improvement:
:PROPERTIES:
@ -471,27 +471,33 @@ TypeError: can only concatenate str (not "NoneType") to str
an hour for part of the year. Also, we'd need to adjust any old scrobbles that
took place with DST off to roll them back by an hour.
** TODO [#A] Fix bug where podcast scrobbling creates duplicate Podcast :project:vrobbler:scrobbling:podcasts:bug:personal:
Rather than pick up an existing Podcast using the podcast title in the mopidy
file name, Vrobbler creates a new podcast with no enriched data. Not a big deal
for my use as the volume of podcasts I listen to makes manual fixes easy. But
it's annoying.
** TODO [#A] Allow scrobbling from the Food list page's start links :vrobbler:bug:food:scrobbling:personal:project:
https://life.lab.unbl.ink/scrobble/e39779c8-62a5-46a6-bdef-fb7662810dc6/start/
** TODO [#A] Allow reading comic books from readcomicsoline.ru :vrobbler:books:feature:comicbook:personal:project:scrobbling:
- Note taken on [2025-09-25 Thu 10:52]
Things to consider are whether we scrobble the issue on one page, send it to
archivebox? (yes), and how best to enrich the data
** TODO [#A] Puzzles (and all longplays) should have a "Completed?" column on their detail page :vrobbler:bug:puzzles:personal:project:
** TODO [#A] Fix raw text webpage title not truncating to 254 chars :vrobbler:personal:bug:webpages:
- Note taken on [2025-09-30 Tue 09:33]
This may have already been resolved ... need to just confirm it.
** TODO
** DONE [#A] Fix bug where podcast scrobbling creates duplicate Podcast :project:vrobbler:scrobbling:podcasts:bug:personal:
:PROPERTIES:
:ID: 7377ef6c-5fa7-9e4e-9080-f9810a76118c
:END:
Rather than pick up an existing Podcast using the podcast title in the mopidy
file name, Vrobbler creates a new podcast with no enriched data. Not a big deal
for my use as the volume of podcasts I listen to makes manual fixes easy. But
it's annoying.
** DONE [#A] Allow reading comic books from readcomicsoline.ru :vrobbler:books:feature:comicbook:personal:project:scrobbling:
:PROPERTIES:
:ID: 7c7e9ecc-b675-68c3-764f-ef771ce5d88f
:END:
- Note taken on [2025-09-25 Thu 10:52]
Things to consider are whether we scrobble the issue on one page, send it to
archivebox? (yes), and how best to enrich the data
** DONE [#A] Add RSS feed lookups to podcasts :vrobbler:personal:feature:podcasts:
:PROPERTIES:

View File

@ -144,6 +144,7 @@ class PodcastEpisode(ScrobblableMixin):
cls,
title: str,
podcast_name: str,
podcast_description: str,
pub_date: str,
number: int = 0,
mopidy_uri: str = "",
@ -155,31 +156,33 @@ class PodcastEpisode(ScrobblableMixin):
producer before saving the epsiode so it can be scrobbled.
"""
log_context={"mopidy_uri": mopidy_uri, "media_type": "Podcast"}
producer = None
if producer_name:
producer = Producer.find_or_create(producer_name)
podcast = Podcast.objects.filter(
name__iexact=podcast_name,
).first()
if not podcast:
podcast = Podcast.objects.create(
name=podcast_name, producer=producer
)
if enrich:
podcast.fix_metadata()
podcast, created = Podcast.objects.get_or_create(name=podcast_name, defaults={"description": podcast_description})
log_context["podcast_id"] = podcast.id
log_context["podcast_name"] = podcast.name
if created:
logger.info("Created new podcast", extra=log_context)
if enrich and created:
logger.info("Enriching new podcast", extra=log_context)
podcast.fix_metadata()
episode = cls.objects.filter(
title__iexact=title, podcast=podcast
).first()
if not episode:
episode = cls.objects.create(
title=title,
podcast=podcast,
run_time_seconds=run_time_seconds,
number=number,
pub_date=pub_date,
mopidy_uri=mopidy_uri,
)
episode, created = cls.objects.get_or_create(
title=title,
podcast=podcast,
defaults={
"run_time_seconds": run_time_seconds,
"number": number,
"pub_date": pub_date,
"mopidy_uri": mopidy_uri,
}
)
if created:
log_context["episode_id"] = episode.id
log_context["episode_title"] = episode.title
logger.info("Created new podcast episode", extra=log_context)
return episode

View File

@ -34,10 +34,15 @@ def fetch_metadata_from_rss(uri: str) -> dict[str, Any]:
logger.warning("Tried to parse uri as RSS feed, but no target found", extra=log_context)
return podcast_data
podcast_publisher = feed.feed.get("itunes_publisher")
podcast_owner = feed.feed.itunes_owner.get("name") if isinstance(feed.feed.itunes_owner, dict) else feed.feed.itunes_owner
podcast_other = feed.feed.get("managingeditor") or feed.feed.get("copyright")
podcast_data = {
"podcast_name": feed.feed.get("title", "Unknown Podcast"),
"podcast_description": feed.feed.get("description", ""),
"podcast_link": feed.feed.get("link", ""),
"podcast_producer": podcast_publisher or podcast_owner or podcast_other
}
for entry in feed.entries:
@ -110,24 +115,20 @@ def get_or_create_podcast(post_data: dict) -> PodcastEpisode:
mopidy_uri = post_data.get("mopidy_uri", "")
parsed_data = parse_mopidy_uri(mopidy_uri)
producer_dict = {"name": post_data.get("artist")}
podcast_name = post_data.get("album")
if not podcast_name:
podcast_name = parsed_data.get("podcast_name")
podcast_dict = {"name": podcast_name}
episode_name = parsed_data.get("episode_filename")
producer_name = parsed_data.get("podcast_producer", post_data.get("artist", ""))
podcast_name = parsed_data.get("podcast_name", post_data.get("album", ""))
episode_name = parsed_data.get("episode_title", parsed_data.get("episode_filename", ""))
run_time_seconds = parsed_data.get("episode_runtime_seconds", post_data.get("run_time", 2700))
episode_dict = {
"title": episode_name,
"run_time_seconds": run_time_seconds,
"number": parsed_data.get("episode_num"),
"podcast_name": podcast_name,
"podcast_description": parsed_data.get("podcast_description"),
"pub_date": parsed_data.get("pub_date"),
"number": parsed_data.get("episode_num"),
"mopidy_uri": mopidy_uri,
"producer_name": producer_name,
"run_time_seconds": run_time_seconds,
}
return PodcastEpisode.find_or_create(
podcast_dict, producer_dict, episode_dict
)
return PodcastEpisode.find_or_create(**episode_dict)