mirror of
https://github.com/krateng/maloja.git
synced 2025-04-16 00:40:32 +03:00
Completely reworked album parsing
This commit is contained in:
parent
e4bf26b86d
commit
517bc6f5c0
@ -148,7 +148,7 @@ def print_info():
|
||||
print("Could not determine dependency versions.")
|
||||
print()
|
||||
|
||||
@mainfunction({"l":"level","v":"version","V":"version"},flags=['version','include_images'],shield=True)
|
||||
@mainfunction({"l":"level","v":"version","V":"version"},flags=['version','include_images','prefer_existing'],shield=True)
|
||||
def main(*args,**kwargs):
|
||||
|
||||
actions = {
|
||||
@ -166,7 +166,7 @@ def main(*args,**kwargs):
|
||||
"generate":generate.generate_scrobbles, # maloja generate 400
|
||||
"export":tasks.export, # maloja export
|
||||
"apidebug":apidebug.run, # maloja apidebug
|
||||
"parsealbums":tasks.parse_albums, # maloja parsealbums
|
||||
"parsealbums":tasks.parse_albums, # maloja parsealbums --strategy majority
|
||||
# aux
|
||||
"info":print_info
|
||||
}
|
||||
|
@ -483,14 +483,11 @@ def get_artist_id(artistname,create_new=True,dbconn=None):
|
||||
|
||||
@cached_wrapper
|
||||
@connection_provider
|
||||
def get_album_id(albumdict,create_new=True,dbconn=None):
|
||||
def get_album_id(albumdict,create_new=True,ignore_albumartists=False,dbconn=None):
|
||||
ntitle = normalize_name(albumdict['albumtitle'])
|
||||
artist_ids = [get_artist_id(a,dbconn=dbconn) for a in albumdict.get('artists') or []]
|
||||
artist_ids = list(set(artist_ids))
|
||||
|
||||
|
||||
|
||||
|
||||
op = DB['albums'].select(
|
||||
# DB['albums'].c.id
|
||||
).where(
|
||||
@ -498,20 +495,23 @@ def get_album_id(albumdict,create_new=True,dbconn=None):
|
||||
)
|
||||
result = dbconn.execute(op).all()
|
||||
for row in result:
|
||||
# check if the artists are the same
|
||||
foundtrackartists = []
|
||||
|
||||
op = DB['albumartists'].select(
|
||||
# DB['albumartists'].c.artist_id
|
||||
).where(
|
||||
DB['albumartists'].c.album_id==row.id
|
||||
)
|
||||
result = dbconn.execute(op).all()
|
||||
match_artist_ids = [r.artist_id for r in result]
|
||||
#print("required artists",artist_ids,"this match",match_artist_ids)
|
||||
if set(artist_ids) == set(match_artist_ids):
|
||||
#print("ID for",albumdict['title'],"was",row[0])
|
||||
if ignore_albumartists:
|
||||
return row.id
|
||||
else:
|
||||
# check if the artists are the same
|
||||
foundtrackartists = []
|
||||
|
||||
op = DB['albumartists'].select(
|
||||
# DB['albumartists'].c.artist_id
|
||||
).where(
|
||||
DB['albumartists'].c.album_id==row.id
|
||||
)
|
||||
result = dbconn.execute(op).all()
|
||||
match_artist_ids = [r.artist_id for r in result]
|
||||
#print("required artists",artist_ids,"this match",match_artist_ids)
|
||||
if set(artist_ids) == set(match_artist_ids):
|
||||
#print("ID for",albumdict['title'],"was",row[0])
|
||||
return row.id
|
||||
|
||||
if not create_new: return None
|
||||
|
||||
@ -1601,7 +1601,7 @@ def guess_albums(track_ids=None,replace=False,dbconn=None):
|
||||
}}
|
||||
if len(artists) == 0:
|
||||
# for albums without artist, assume track artist
|
||||
res[track_id]["guess_artists"] = True
|
||||
res[track_id]["guess_artists"] = []
|
||||
else:
|
||||
res[track_id] = {"assigned":False,"reason":"Not enough data"}
|
||||
|
||||
@ -1610,7 +1610,7 @@ def guess_albums(track_ids=None,replace=False,dbconn=None):
|
||||
|
||||
|
||||
|
||||
missing_artists = [track_id for track_id in res if res[track_id].get("guess_artists")]
|
||||
missing_artists = [track_id for track_id in res if "guess_artists" in res[track_id]]
|
||||
|
||||
#we're pointlessly getting the albumartist names here even though the IDs would be enough
|
||||
#but it's better for function separation I guess
|
||||
@ -1627,10 +1627,7 @@ def guess_albums(track_ids=None,replace=False,dbconn=None):
|
||||
result = dbconn.execute(op).all()
|
||||
|
||||
for row in result:
|
||||
res[row.track_id]["assigned"]["artists"].append(row.name)
|
||||
for track_id in res:
|
||||
if res[track_id].get("guess_artists"):
|
||||
del res[track_id]["guess_artists"]
|
||||
res[row.track_id]["guess_artists"].append(row.name)
|
||||
|
||||
return res
|
||||
|
||||
|
@ -1,19 +1,106 @@
|
||||
from doreah.io import col
|
||||
|
||||
def parse_albums(replace=False):
|
||||
def parse_albums(strategy=None,prefer_existing=False):
|
||||
|
||||
if strategy not in ("track","none","all","majority","most"):
|
||||
print("""
|
||||
Please specify your album parsing strategy:
|
||||
|
||||
--strategy Specify what strategy to use when the scrobble contains
|
||||
no information about album artists.
|
||||
track Take the track artists. This can lead to
|
||||
separate albums being created for compilation
|
||||
albums or albums that have collaboration tracks.
|
||||
none Merge all albums with the same name and assign
|
||||
'Various Artists' as the album artist.
|
||||
all Merge all albums with the same name and assign
|
||||
every artist that appears on the album as an album
|
||||
artist.
|
||||
majority Merge all albums with the same name and assign
|
||||
artists that appear in at least half the tracks
|
||||
of the album as album artists. [RECOMMENDED]
|
||||
most Merge all albums with the same name and assign
|
||||
the artist that appears most on the album as album
|
||||
artist.
|
||||
--prefer_existing If an album with the same name already exists, use it
|
||||
without further examination of track artists.
|
||||
""")
|
||||
return
|
||||
|
||||
|
||||
|
||||
from ...database.sqldb import guess_albums, get_album_id, add_track_to_album
|
||||
|
||||
print("Parsing album information...")
|
||||
result = guess_albums(replace=replace)
|
||||
result = guess_albums()
|
||||
|
||||
result = {track_id:result[track_id] for track_id in result if result[track_id]["assigned"]}
|
||||
print("Adding",len(result),"tracks to albums...")
|
||||
print("Found",col['yellow'](len(result)),"Tracks to assign albums to")
|
||||
|
||||
result_authorative = {track_id:result[track_id] for track_id in result if result[track_id]["assigned"]["artists"]}
|
||||
result_guesswork = {track_id:result[track_id] for track_id in result if not result[track_id]["assigned"]["artists"]}
|
||||
|
||||
i = 0
|
||||
for track_id in result:
|
||||
album_id = get_album_id(result[track_id]["assigned"])
|
||||
add_track_to_album(track_id,album_id)
|
||||
i += 1
|
||||
|
||||
def countup(i):
|
||||
i+=1
|
||||
if (i % 100) == 0:
|
||||
print(i,"of",len(result))
|
||||
print("Done!")
|
||||
print(f"Added album information for {i} of {len(result)} tracks...")
|
||||
return i
|
||||
|
||||
for track_id in result_authorative:
|
||||
albuminfo = result[track_id]['assigned']
|
||||
album_id = get_album_id(albuminfo)
|
||||
add_track_to_album(track_id,album_id)
|
||||
i=countup(i)
|
||||
|
||||
albums = {}
|
||||
for track_id in result_guesswork:
|
||||
albuminfo = result[track_id]['assigned']
|
||||
|
||||
# check if already exists
|
||||
if prefer_existing:
|
||||
album_id = get_album_id(albuminfo,ignore_albumartists=True,create_new=False)
|
||||
if album_id:
|
||||
add_track_to_album(track_id,album_id)
|
||||
i=countup(i)
|
||||
continue
|
||||
|
||||
if strategy == 'track':
|
||||
albuminfo['artists'] = result[track_id]['guess_artists']
|
||||
album_id = get_album_id(albuminfo)
|
||||
add_track_to_album(track_id,album_id)
|
||||
i=countup(i)
|
||||
continue
|
||||
|
||||
if strategy == 'none':
|
||||
albuminfo['artists'] = []
|
||||
album_id = get_album_id(albuminfo)
|
||||
add_track_to_album(track_id,album_id)
|
||||
i=countup(i)
|
||||
continue
|
||||
|
||||
if strategy in ['all','majority','most']:
|
||||
albums.setdefault(albuminfo['albumtitle'],{'track_ids':[],'artists':{}})
|
||||
albums[albuminfo['albumtitle']]['track_ids'].append(track_id)
|
||||
for a in result[track_id]['guess_artists']:
|
||||
albums[albuminfo['albumtitle']]['artists'].setdefault(a,0)
|
||||
albums[albuminfo['albumtitle']]['artists'][a] += 1
|
||||
|
||||
|
||||
for title in albums:
|
||||
artistoptions = albums[title]['artists']
|
||||
track_ids = albums[title]['track_ids']
|
||||
if strategy == 'all':
|
||||
artists = [a for a in artistoptions]
|
||||
elif strategy == 'majority':
|
||||
artists = [a for a in artistoptions if artistoptions[a] >= (len(track_ids) / 2)]
|
||||
elif strategy == 'most':
|
||||
artists = [max(artistoptions,key=artistoptions.get)]
|
||||
|
||||
for track_id in track_ids:
|
||||
album_id = get_album_id({'albumtitle':title,'artists':artists})
|
||||
add_track_to_album(track_id,album_id)
|
||||
i=countup(i)
|
||||
|
||||
print(col['lawngreen']("Done!"))
|
||||
|
@ -90,7 +90,11 @@
|
||||
</table>
|
||||
|
||||
|
||||
{% if info["isalbumartist"] %}
|
||||
{% set albums_info = dbc.get_albums_artist_appears_on(filterkeys,limitkeys) %}
|
||||
{% set ownalbums = albums_info.own_albums %}
|
||||
{% set otheralbums = albums_info.appears_on %}
|
||||
|
||||
{% if ownalbums or otheralbums %}
|
||||
|
||||
{% if settings['ALBUM_SHOWCASE'] %}
|
||||
<h2><a href='{{ mlj_uri.create_uri("/charts_albums",filterkeys) }}'>Albums</a></h2>
|
||||
|
Loading…
x
Reference in New Issue
Block a user