128 lines
3.6 KiB
Python
128 lines
3.6 KiB
Python
import csv
|
|
import logging
|
|
|
|
from django.core.management.base import BaseCommand
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def _get_source(raw_data):
|
|
if "Artist" in raw_data:
|
|
return "Jellyfin"
|
|
if "artist" in raw_data:
|
|
return "Mopidy"
|
|
return None
|
|
|
|
|
|
def _get_raw_values(raw_data, source):
|
|
if source == "Jellyfin":
|
|
return raw_data.get("Artist", ""), raw_data.get("Album", "")
|
|
return raw_data.get("artist", ""), raw_data.get("album", "")
|
|
|
|
|
|
def _normalize(name):
|
|
return name.strip().casefold()
|
|
|
|
|
|
def _artist_mismatch(raw_artist, track_artist_names):
|
|
if not raw_artist or not track_artist_names:
|
|
return False
|
|
track_names = [_normalize(n) for n in track_artist_names.split(" / ")]
|
|
raw = _normalize(raw_artist)
|
|
if raw in track_names:
|
|
return False
|
|
if raw == _normalize(track_artist_names):
|
|
return False
|
|
return True
|
|
|
|
|
|
def _album_mismatch(raw_album, track_album_name):
|
|
if not raw_album or not track_album_name:
|
|
return False
|
|
return _normalize(raw_album) != _normalize(track_album_name)
|
|
|
|
|
|
class Command(BaseCommand):
|
|
help = (
|
|
"Outputs a CSV of track IDs where raw metadata from scrobble logs "
|
|
"does not match the track's stored artists or album"
|
|
)
|
|
|
|
def add_arguments(self, parser):
|
|
parser.add_argument(
|
|
"--file-path",
|
|
type=str,
|
|
default="/tmp/metadata-report.csv",
|
|
help="Output CSV file path (default: /tmp/metadata-report.csv)",
|
|
)
|
|
|
|
def handle(self, *args, **options):
|
|
from scrobbles.models import Scrobble
|
|
|
|
file_path = options["file_path"]
|
|
|
|
qs = (
|
|
Scrobble.objects.filter(media_type=Scrobble.MediaType.TRACK)
|
|
.exclude(log__isnull=True)
|
|
.exclude(log={})
|
|
.select_related("track__album")
|
|
.prefetch_related("track__artists")
|
|
.iterator()
|
|
)
|
|
|
|
rows = []
|
|
for scrobble in qs:
|
|
track = scrobble.track
|
|
if not track:
|
|
continue
|
|
|
|
raw_data = scrobble.log.get("raw_data")
|
|
if not raw_data:
|
|
continue
|
|
|
|
source = _get_source(raw_data)
|
|
if not source:
|
|
continue
|
|
|
|
raw_artist, raw_album = _get_raw_values(raw_data, source)
|
|
if not raw_artist and not raw_album:
|
|
continue
|
|
|
|
track_artist_names = " / ".join(
|
|
track.artists.all().values_list("name", flat=True)
|
|
)
|
|
track_album_name = track.album.name if track.album else ""
|
|
|
|
if _artist_mismatch(raw_artist, track_artist_names) or _album_mismatch(
|
|
raw_album, track_album_name
|
|
):
|
|
rows.append(
|
|
{
|
|
"track_id": track.id,
|
|
"track_artist_name": track_artist_names,
|
|
"track_album_name": track_album_name,
|
|
"raw_artist": raw_artist,
|
|
"raw_album": raw_album,
|
|
"source": source,
|
|
}
|
|
)
|
|
|
|
fieldnames = [
|
|
"track_id",
|
|
"track_artist_name",
|
|
"track_album_name",
|
|
"raw_artist",
|
|
"raw_album",
|
|
"source",
|
|
]
|
|
with open(file_path, "w", newline="") as f:
|
|
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
|
writer.writeheader()
|
|
writer.writerows(rows)
|
|
|
|
self.stdout.write(
|
|
self.style.SUCCESS(
|
|
f"Wrote {len(rows)} mismatched track(s) to {file_path}"
|
|
)
|
|
)
|