Completely reworked album parsing

This commit is contained in:
krateng 2023-03-31 21:04:30 +02:00
parent e4bf26b86d
commit 517bc6f5c0
4 changed files with 123 additions and 35 deletions

View File

@ -148,7 +148,7 @@ def print_info():
print("Could not determine dependency versions.")
print()
@mainfunction({"l":"level","v":"version","V":"version"},flags=['version','include_images'],shield=True)
@mainfunction({"l":"level","v":"version","V":"version"},flags=['version','include_images','prefer_existing'],shield=True)
def main(*args,**kwargs):
actions = {
@ -166,7 +166,7 @@ def main(*args,**kwargs):
"generate":generate.generate_scrobbles, # maloja generate 400
"export":tasks.export, # maloja export
"apidebug":apidebug.run, # maloja apidebug
"parsealbums":tasks.parse_albums, # maloja parsealbums
"parsealbums":tasks.parse_albums, # maloja parsealbums --strategy majority
# aux
"info":print_info
}

View File

@ -483,14 +483,11 @@ def get_artist_id(artistname,create_new=True,dbconn=None):
@cached_wrapper
@connection_provider
def get_album_id(albumdict,create_new=True,dbconn=None):
def get_album_id(albumdict,create_new=True,ignore_albumartists=False,dbconn=None):
ntitle = normalize_name(albumdict['albumtitle'])
artist_ids = [get_artist_id(a,dbconn=dbconn) for a in albumdict.get('artists') or []]
artist_ids = list(set(artist_ids))
op = DB['albums'].select(
# DB['albums'].c.id
).where(
@ -498,20 +495,23 @@ def get_album_id(albumdict,create_new=True,dbconn=None):
)
result = dbconn.execute(op).all()
for row in result:
# check if the artists are the same
foundtrackartists = []
op = DB['albumartists'].select(
# DB['albumartists'].c.artist_id
).where(
DB['albumartists'].c.album_id==row.id
)
result = dbconn.execute(op).all()
match_artist_ids = [r.artist_id for r in result]
#print("required artists",artist_ids,"this match",match_artist_ids)
if set(artist_ids) == set(match_artist_ids):
#print("ID for",albumdict['title'],"was",row[0])
if ignore_albumartists:
return row.id
else:
# check if the artists are the same
foundtrackartists = []
op = DB['albumartists'].select(
# DB['albumartists'].c.artist_id
).where(
DB['albumartists'].c.album_id==row.id
)
result = dbconn.execute(op).all()
match_artist_ids = [r.artist_id for r in result]
#print("required artists",artist_ids,"this match",match_artist_ids)
if set(artist_ids) == set(match_artist_ids):
#print("ID for",albumdict['title'],"was",row[0])
return row.id
if not create_new: return None
@ -1601,7 +1601,7 @@ def guess_albums(track_ids=None,replace=False,dbconn=None):
}}
if len(artists) == 0:
# for albums without artist, assume track artist
res[track_id]["guess_artists"] = True
res[track_id]["guess_artists"] = []
else:
res[track_id] = {"assigned":False,"reason":"Not enough data"}
@ -1610,7 +1610,7 @@ def guess_albums(track_ids=None,replace=False,dbconn=None):
missing_artists = [track_id for track_id in res if res[track_id].get("guess_artists")]
missing_artists = [track_id for track_id in res if "guess_artists" in res[track_id]]
#we're pointlessly getting the albumartist names here even though the IDs would be enough
#but it's better for function separation I guess
@ -1627,10 +1627,7 @@ def guess_albums(track_ids=None,replace=False,dbconn=None):
result = dbconn.execute(op).all()
for row in result:
res[row.track_id]["assigned"]["artists"].append(row.name)
for track_id in res:
if res[track_id].get("guess_artists"):
del res[track_id]["guess_artists"]
res[row.track_id]["guess_artists"].append(row.name)
return res

View File

@ -1,19 +1,106 @@
from doreah.io import col
def parse_albums(replace=False):
def parse_albums(strategy=None,prefer_existing=False):
if strategy not in ("track","none","all","majority","most"):
print("""
Please specify your album parsing strategy:
--strategy Specify what strategy to use when the scrobble contains
no information about album artists.
track Take the track artists. This can lead to
separate albums being created for compilation
albums or albums that have collaboration tracks.
none Merge all albums with the same name and assign
'Various Artists' as the album artist.
all Merge all albums with the same name and assign
every artist that appears on the album as an album
artist.
majority Merge all albums with the same name and assign
artists that appear in at least half the tracks
of the album as album artists. [RECOMMENDED]
most Merge all albums with the same name and assign
the artist that appears most on the album as album
artist.
--prefer_existing If an album with the same name already exists, use it
without further examination of track artists.
""")
return
from ...database.sqldb import guess_albums, get_album_id, add_track_to_album
print("Parsing album information...")
result = guess_albums(replace=replace)
result = guess_albums()
result = {track_id:result[track_id] for track_id in result if result[track_id]["assigned"]}
print("Adding",len(result),"tracks to albums...")
print("Found",col['yellow'](len(result)),"Tracks to assign albums to")
result_authorative = {track_id:result[track_id] for track_id in result if result[track_id]["assigned"]["artists"]}
result_guesswork = {track_id:result[track_id] for track_id in result if not result[track_id]["assigned"]["artists"]}
i = 0
for track_id in result:
album_id = get_album_id(result[track_id]["assigned"])
add_track_to_album(track_id,album_id)
i += 1
def countup(i):
i+=1
if (i % 100) == 0:
print(i,"of",len(result))
print("Done!")
print(f"Added album information for {i} of {len(result)} tracks...")
return i
for track_id in result_authorative:
albuminfo = result[track_id]['assigned']
album_id = get_album_id(albuminfo)
add_track_to_album(track_id,album_id)
i=countup(i)
albums = {}
for track_id in result_guesswork:
albuminfo = result[track_id]['assigned']
# check if already exists
if prefer_existing:
album_id = get_album_id(albuminfo,ignore_albumartists=True,create_new=False)
if album_id:
add_track_to_album(track_id,album_id)
i=countup(i)
continue
if strategy == 'track':
albuminfo['artists'] = result[track_id]['guess_artists']
album_id = get_album_id(albuminfo)
add_track_to_album(track_id,album_id)
i=countup(i)
continue
if strategy == 'none':
albuminfo['artists'] = []
album_id = get_album_id(albuminfo)
add_track_to_album(track_id,album_id)
i=countup(i)
continue
if strategy in ['all','majority','most']:
albums.setdefault(albuminfo['albumtitle'],{'track_ids':[],'artists':{}})
albums[albuminfo['albumtitle']]['track_ids'].append(track_id)
for a in result[track_id]['guess_artists']:
albums[albuminfo['albumtitle']]['artists'].setdefault(a,0)
albums[albuminfo['albumtitle']]['artists'][a] += 1
for title in albums:
artistoptions = albums[title]['artists']
track_ids = albums[title]['track_ids']
if strategy == 'all':
artists = [a for a in artistoptions]
elif strategy == 'majority':
artists = [a for a in artistoptions if artistoptions[a] >= (len(track_ids) / 2)]
elif strategy == 'most':
artists = [max(artistoptions,key=artistoptions.get)]
for track_id in track_ids:
album_id = get_album_id({'albumtitle':title,'artists':artists})
add_track_to_album(track_id,album_id)
i=countup(i)
print(col['lawngreen']("Done!"))

View File

@ -90,7 +90,11 @@
</table>
{% if info["isalbumartist"] %}
{% set albums_info = dbc.get_albums_artist_appears_on(filterkeys,limitkeys) %}
{% set ownalbums = albums_info.own_albums %}
{% set otheralbums = albums_info.appears_on %}
{% if ownalbums or otheralbums %}
{% if settings['ALBUM_SHOWCASE'] %}
<h2><a href='{{ mlj_uri.create_uri("/charts_albums",filterkeys) }}'>Albums</a></h2>