Added functionality to parse old album information

2025-07-11 06:01:26 +03:00 · 2023-03-30 16:08:03 +02:00 · 2023-03-30 16:08:03 +02:00 · d0d76166fc
commit d0d76166fc
parent fd5d01b728
5 changed files with 118 additions and 2 deletions
--- a/maloja/main.py
+++ b/maloja/main.py
@ -166,6 +166,7 @@ def main(*args,**kwargs):
 		"generate":generate.generate_scrobbles,	# maloja generate 400
 		"export":tasks.export,					# maloja export
 		"apidebug":apidebug.run,				# maloja apidebug
+		"parsealbums":tasks.parse_albums,		# maloja parsealbums
 		# aux
 		"info":print_info
 	}
--- a/maloja/database/init.py
+++ b/maloja/database/init.py
@ -163,7 +163,8 @@ def rawscrobble_to_scrobbledict(rawscrobble, fix=True, client=None):
 		"origin":f"client:{client}" if client else "generic",
 		"extra":{
 			k:scrobbleinfo[k] for k in scrobbleinfo if k not in
-			['scrobble_time','track_artists','track_title','track_length','scrobble_duration','album_title','album_artists']
+			['scrobble_time','track_artists','track_title','track_length','scrobble_duration']#,'album_title','album_artists']
+			# we still save album info in extra because the user might select majority album authority
 		},
 		"rawscrobble":rawscrobble
 	}
--- a/maloja/database/sqldb.py
+++ b/maloja/database/sqldb.py
@ -354,6 +354,11 @@ def add_track_to_album(track_id,album_id,replace=False,dbconn=None):
 	result = dbconn.execute(op)
 	return True

+@connection_provider
+def add_tracks_to_albums(track_to_album_id_dict,replace=False,dbconn=None):
+
+	for track_id in track_to_album_id_dict:
+		add_track_to_album(track_id,track_to_album_id_dict[track_id],dbconn=dbconn)


 ### these will 'get' the ID of an entity, creating it if necessary
@ -1356,7 +1361,6 @@ def clean_db(dbconn=None):
 			]

 			for d in to_delete:
-				print(d)
 				selection = dbconn.execute(sql.text(f"select * {d}"))
 				for row in selection.all():
 					log(f"Deleting {row}")
@ -1427,9 +1431,98 @@ def merge_duplicate_tracks(artist_id,dbconn=None):



+@connection_provider
+def guess_albums(track_ids=None,replace=False,dbconn=None):
+
+	MIN_NUM_TO_ASSIGN = 1
+
+	jointable = sql.join(
+		DB['scrobbles'],
+		DB['tracks']
+	)
+
+	# get all scrobbles of the respective tracks that have some info
+	conditions = [
+		DB['scrobbles'].c.extra.isnot(None)
+	]
+	if track_ids is not None:
+		# only do these tracks
+		conditions.append(
+			DB['scrobbles'].c.track_id.in_(track_ids)
+		)
+	if not replace:
+		# only tracks that have no album yet
+		conditions.append(
+			DB['tracks'].c.album_id.is_(None)
+		)
+
+	op = sql.select(
+		DB['scrobbles']
+	).select_from(jointable).where(
+		*conditions
+	)
+
+	result = dbconn.execute(op).all()
+
+	# for each track, count what album info appears how often
+	possible_albums = {}
+	for row in result:
+		extrainfo = json.loads(row.extra)
+		albumtitle = extrainfo.get("album_name") or extrainfo.get("album_title")
+		albumartists = extrainfo.get("album_artists",[])
+		if albumtitle:
+			hashable_albuminfo = tuple([*albumartists,albumtitle])
+			possible_albums.setdefault(row.track_id,{}).setdefault(hashable_albuminfo,0)
+			possible_albums[row.track_id][hashable_albuminfo] += 1
+
+	res = {}
+	for track_id in possible_albums:
+		options = possible_albums[track_id]
+		if len(options)>0:
+			# pick the one with most occurences
+			mostnum = max(options[albuminfo] for albuminfo in options)
+			if mostnum >= MIN_NUM_TO_ASSIGN:
+				bestpick = [albuminfo for albuminfo in options if options[albuminfo] == mostnum][0]
+				#print("best pick",track_id,bestpick)
+				*artists,title = bestpick
+				res[track_id] = {"assigned":{
+					"artists":artists,
+					"albumtitle": title
+				}}
+				if len(artists) == 0:
+					# for albums without artist, assume track artist
+					res[track_id]["guess_artists"] = True
+			else:
+				res[track_id] = {"assigned":False,"reason":"Not enough data"}
+
+		else:
+			res[track_id] = {"assigned":False,"reason":"No scrobbles with album information found"}



+	missing_artists = [track_id for track_id in res if res[track_id].get("guess_artists")]
+
+	#we're pointlessly getting the albumartist names here even though the IDs would be enough
+	#but it's better for function separation I guess
+	jointable = sql.join(
+		DB['trackartists'],
+		DB['artists']
+	)
+	op = sql.select(
+		DB['trackartists'].c.track_id,
+		DB['artists']
+	).select_from(jointable).where(
+		DB['trackartists'].c.track_id.in_(missing_artists)
+	)
+	result = dbconn.execute(op).all()
+
+	for row in result:
+		res[row.track_id]["assigned"]["artists"].append(row.name)
+	for track_id in res:
+		if res[track_id].get("guess_artists"):
+			del res[track_id]["guess_artists"]
+
+	return res



--- a/maloja/proccontrol/tasks/init.py
+++ b/maloja/proccontrol/tasks/init.py
@ -1,3 +1,4 @@
 from .import_scrobbles import import_scrobbles
 from .backup import backup
 from .export import export # read that line out loud
+from .parse_albums import parse_albums
--- a/maloja/proccontrol/tasks/parse_albums.py
+++ b/maloja/proccontrol/tasks/parse_albums.py
@ -0,0 +1,20 @@
+
+
+
+def parse_albums(replace=False):
+
+	from ...database.sqldb import guess_albums, get_album_id, add_track_to_album
+
+	print("Parsing album information...")
+	result = guess_albums(replace=replace)
+
+	result = {track_id:result[track_id] for track_id in result if result[track_id]["assigned"]}
+	print("Adding",len(result),"tracks to albums...")
+	i = 0
+	for track_id in result:
+		album_id = get_album_id(result[track_id]["assigned"])
+		add_track_to_album(track_id,album_id)
+		i += 1
+		if (i % 100) == 0:
+			print(i,"of",len(result))
+	print("Done!")