[podcasts] Clean up lookup and creation

2025-10-14 11:17:20 -04:00
parent 42ce6df9bd
commit 61bab1f734
3 changed files with 59 additions and 49 deletions
--- a/PROJECT.org
+++ b/PROJECT.org
@ -92,7 +92,7 @@ fetching and simple saving.
 :LOGBOOK:
 CLOCK: [2025-07-09 Wed 09:55]--[2025-07-09 Wed 10:15] =>  0:20
 :END:
-* Backlog [1/28]
+* Backlog [3/27]
 ** TODO [#C] Create small utility to clean up tracks scrobbled with wonky playback times :vrobbler:personal:bug:music:scrobbles:
 ** TODO [#C] Move to using more robust mopidy-webhooks pacakge form pypi :utility:improvement:
 :PROPERTIES:
@ -471,27 +471,33 @@ TypeError: can only concatenate str (not "NoneType") to str
  an hour for part of the year. Also, we'd need to adjust any old scrobbles that
  took place with DST off to roll them back by an hour.

-** TODO [#A] Fix bug where podcast scrobbling creates duplicate Podcast :project:vrobbler:scrobbling:podcasts:bug:personal:
-Rather than pick up an existing Podcast using the podcast title in the mopidy
-file name, Vrobbler creates a new podcast with no enriched data. Not a big deal
-for my use as the volume of podcasts I listen to makes manual fixes easy. But
-it's annoying.
 ** TODO [#A] Allow scrobbling from the Food list page's start links :vrobbler:bug:food:scrobbling:personal:project:
 https://life.lab.unbl.ink/scrobble/e39779c8-62a5-46a6-bdef-fb7662810dc6/start/
-** TODO [#A] Allow reading comic books from readcomicsoline.ru :vrobbler:books:feature:comicbook:personal:project:scrobbling:
-
- Note taken on [2025-09-25 Thu 10:52]
-
-  Things to consider are whether we scrobble the issue on one page, send it to
-  archivebox? (yes), and how best to enrich the data
-
 ** TODO [#A] Puzzles (and all longplays) should have a "Completed?" column on their detail page :vrobbler:bug:puzzles:personal:project:
 ** TODO [#A] Fix raw text webpage title not truncating to 254 chars :vrobbler:personal:bug:webpages:

 - Note taken on [2025-09-30 Tue 09:33]

  This may have already been resolved ... need to just confirm it.
-** TODO
+** DONE [#A] Fix bug where podcast scrobbling creates duplicate Podcast :project:vrobbler:scrobbling:podcasts:bug:personal:
+:PROPERTIES:
+:ID:       7377ef6c-5fa7-9e4e-9080-f9810a76118c
+:END:
+
+Rather than pick up an existing Podcast using the podcast title in the mopidy
+file name, Vrobbler creates a new podcast with no enriched data. Not a big deal
+for my use as the volume of podcasts I listen to makes manual fixes easy. But
+it's annoying.
+
+** DONE [#A] Allow reading comic books from readcomicsoline.ru :vrobbler:books:feature:comicbook:personal:project:scrobbling:
+:PROPERTIES:
+:ID:       7c7e9ecc-b675-68c3-764f-ef771ce5d88f
+:END:
+
+- Note taken on [2025-09-25 Thu 10:52]
+
+  Things to consider are whether we scrobble the issue on one page, send it to
+  archivebox? (yes), and how best to enrich the data

 ** DONE [#A] Add RSS feed lookups to podcasts :vrobbler:personal:feature:podcasts:
 :PROPERTIES:
--- a/vrobbler/apps/podcasts/models.py
+++ b/vrobbler/apps/podcasts/models.py
@ -144,6 +144,7 @@ class PodcastEpisode(ScrobblableMixin):
        cls,
        title: str,
        podcast_name: str,
+        podcast_description: str,
        pub_date: str,
        number: int = 0,
        mopidy_uri: str = "",
@ -155,31 +156,33 @@ class PodcastEpisode(ScrobblableMixin):
        producer before saving the epsiode so it can be scrobbled.

        """
+        log_context={"mopidy_uri": mopidy_uri, "media_type": "Podcast"}
        producer = None
        if producer_name:
            producer = Producer.find_or_create(producer_name)

-        podcast = Podcast.objects.filter(
-            name__iexact=podcast_name,
-        ).first()
-        if not podcast:
-            podcast = Podcast.objects.create(
-                name=podcast_name, producer=producer
-            )
-            if enrich:
-                podcast.fix_metadata()
+        podcast, created = Podcast.objects.get_or_create(name=podcast_name, defaults={"description": podcast_description})
+        log_context["podcast_id"] = podcast.id
+        log_context["podcast_name"] = podcast.name
+        if created:
+            logger.info("Created new podcast", extra=log_context)
+        if enrich and created:
+            logger.info("Enriching new podcast", extra=log_context)
+            podcast.fix_metadata()

-        episode = cls.objects.filter(
-            title__iexact=title, podcast=podcast
-        ).first()
-        if not episode:
-            episode = cls.objects.create(
-                title=title,
-                podcast=podcast,
-                run_time_seconds=run_time_seconds,
-                number=number,
-                pub_date=pub_date,
-                mopidy_uri=mopidy_uri,
-            )
+        episode, created = cls.objects.get_or_create(
+            title=title,
+            podcast=podcast,
+            defaults={
+                "run_time_seconds": run_time_seconds,
+                "number": number,
+                "pub_date": pub_date,
+                "mopidy_uri": mopidy_uri,
+            }
+        )
+        if created:
+            log_context["episode_id"] = episode.id
+            log_context["episode_title"] = episode.title
+            logger.info("Created new podcast episode", extra=log_context)

        return episode
--- a/vrobbler/apps/podcasts/utils.py
+++ b/vrobbler/apps/podcasts/utils.py
@ -34,10 +34,15 @@ def fetch_metadata_from_rss(uri: str) -> dict[str, Any]:
        logger.warning("Tried to parse uri as RSS feed, but no target found", extra=log_context)
        return podcast_data

+    podcast_publisher = feed.feed.get("itunes_publisher")
+    podcast_owner = feed.feed.itunes_owner.get("name") if isinstance(feed.feed.itunes_owner, dict) else feed.feed.itunes_owner
+    podcast_other = feed.feed.get("managingeditor") or feed.feed.get("copyright")
+
    podcast_data = {
        "podcast_name": feed.feed.get("title", "Unknown Podcast"),
        "podcast_description": feed.feed.get("description", ""),
        "podcast_link": feed.feed.get("link", ""),
+        "podcast_producer": podcast_publisher or podcast_owner or podcast_other
    }

    for entry in feed.entries:
@ -110,24 +115,20 @@ def get_or_create_podcast(post_data: dict) -> PodcastEpisode:

    mopidy_uri = post_data.get("mopidy_uri", "")
    parsed_data = parse_mopidy_uri(mopidy_uri)
-
-    producer_dict = {"name": post_data.get("artist")}
-
-    podcast_name = post_data.get("album")
-    if not podcast_name:
-        podcast_name = parsed_data.get("podcast_name")
-    podcast_dict = {"name": podcast_name}
-
-    episode_name = parsed_data.get("episode_filename")
+    producer_name = parsed_data.get("podcast_producer", post_data.get("artist", ""))
+    podcast_name = parsed_data.get("podcast_name", post_data.get("album", ""))
+    episode_name = parsed_data.get("episode_title", parsed_data.get("episode_filename", ""))
    run_time_seconds = parsed_data.get("episode_runtime_seconds", post_data.get("run_time", 2700))
+
    episode_dict = {
        "title": episode_name,
-        "run_time_seconds": run_time_seconds,
-        "number": parsed_data.get("episode_num"),
+        "podcast_name": podcast_name,
+        "podcast_description": parsed_data.get("podcast_description"),
        "pub_date": parsed_data.get("pub_date"),
+        "number": parsed_data.get("episode_num"),
        "mopidy_uri": mopidy_uri,
+        "producer_name": producer_name,
+        "run_time_seconds": run_time_seconds,
    }

-    return PodcastEpisode.find_or_create(
-        podcast_dict, producer_dict, episode_dict
-    )
+    return PodcastEpisode.find_or_create(**episode_dict)