Files
vrobbler/vrobbler/apps/music/management/commands/report_mismatched_metadata.py
Colin Powell 91c3376256
Some checks failed
build / test (push) Has been cancelled
[music] Add mgmt command to see mismatched metadata
2026-06-08 11:17:36 -04:00

128 lines
3.6 KiB
Python

import csv
import logging
from django.core.management.base import BaseCommand
logger = logging.getLogger(__name__)
def _get_source(raw_data):
if "Artist" in raw_data:
return "Jellyfin"
if "artist" in raw_data:
return "Mopidy"
return None
def _get_raw_values(raw_data, source):
if source == "Jellyfin":
return raw_data.get("Artist", ""), raw_data.get("Album", "")
return raw_data.get("artist", ""), raw_data.get("album", "")
def _normalize(name):
return name.strip().casefold()
def _artist_mismatch(raw_artist, track_artist_names):
if not raw_artist or not track_artist_names:
return False
track_names = [_normalize(n) for n in track_artist_names.split(" / ")]
raw = _normalize(raw_artist)
if raw in track_names:
return False
if raw == _normalize(track_artist_names):
return False
return True
def _album_mismatch(raw_album, track_album_name):
if not raw_album or not track_album_name:
return False
return _normalize(raw_album) != _normalize(track_album_name)
class Command(BaseCommand):
help = (
"Outputs a CSV of track IDs where raw metadata from scrobble logs "
"does not match the track's stored artists or album"
)
def add_arguments(self, parser):
parser.add_argument(
"--file-path",
type=str,
default="/tmp/metadata-report.csv",
help="Output CSV file path (default: /tmp/metadata-report.csv)",
)
def handle(self, *args, **options):
from scrobbles.models import Scrobble
file_path = options["file_path"]
qs = (
Scrobble.objects.filter(media_type=Scrobble.MediaType.TRACK)
.exclude(log__isnull=True)
.exclude(log={})
.select_related("track__album")
.prefetch_related("track__artists")
.iterator()
)
rows = []
for scrobble in qs:
track = scrobble.track
if not track:
continue
raw_data = scrobble.log.get("raw_data")
if not raw_data:
continue
source = _get_source(raw_data)
if not source:
continue
raw_artist, raw_album = _get_raw_values(raw_data, source)
if not raw_artist and not raw_album:
continue
track_artist_names = " / ".join(
track.artists.all().values_list("name", flat=True)
)
track_album_name = track.album.name if track.album else ""
if _artist_mismatch(raw_artist, track_artist_names) or _album_mismatch(
raw_album, track_album_name
):
rows.append(
{
"track_id": track.id,
"track_artist_name": track_artist_names,
"track_album_name": track_album_name,
"raw_artist": raw_artist,
"raw_album": raw_album,
"source": source,
}
)
fieldnames = [
"track_id",
"track_artist_name",
"track_album_name",
"raw_artist",
"raw_album",
"source",
]
with open(file_path, "w", newline="") as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(rows)
self.stdout.write(
self.style.SUCCESS(
f"Wrote {len(rows)} mismatched track(s) to {file_path}"
)
)