From 9eb8dc0b47695096bdad8bd42830f11cbc372f7b Mon Sep 17 00:00:00 2001 From: krateng Date: Mon, 3 Jan 2022 02:08:02 +0100 Subject: [PATCH 001/182] Initial work on SQLite --- maloja/database.py | 280 ++++++++++++++++++++++--------------------- maloja/globalconf.py | 22 ++-- maloja/upgrade.py | 60 ++++++++++ 3 files changed, 212 insertions(+), 150 deletions(-) create mode 100644 maloja/upgrade.py diff --git a/maloja/database.py b/maloja/database.py index 484d187..b7a07e7 100644 --- a/maloja/database.py +++ b/maloja/database.py @@ -23,6 +23,9 @@ except: pass import doreah +#db +import sqlalchemy as sql + # technical import os @@ -31,7 +34,7 @@ import sys import unicodedata from collections import namedtuple from threading import Lock -import yaml +import yaml, json import lru import math @@ -688,151 +691,154 @@ def get_predefined_rulesets(): ## Server operation #### +DB = {} -# Starts the server +engine = sql.create_engine(f"sqlite:///{data_dir['scrobbles']('malojadb.sqlite')}", echo = False) +meta = sql.MetaData() + +DB['scrobbles'] = sql.Table( + 'scrobbles', meta, + sql.Column('timestamp',sql.Integer,primary_key=True), + sql.Column('rawscrobble',sql.String), + sql.Column('origin',sql.String), + sql.Column('duration',sql.Integer), + sql.Column('track_id',sql.Integer) +) +DB['tracks'] = sql.Table( + 'tracks', meta, + sql.Column('id',sql.Integer,primary_key=True), + sql.Column('title',sql.String), + sql.Column('title_normalized',sql.String) +) +DB['artists'] = sql.Table( + 'artists', meta, + sql.Column('id',sql.Integer,primary_key=True), + sql.Column('name',sql.String), + sql.Column('name_normalized',sql.String) +) +DB['trackartists'] = sql.Table( + 'trackartists', meta, + sql.Column('id',sql.Integer,primary_key=True), + sql.Column('artist_id',sql.Integer), + sql.Column('track_id',sql.Integer) +) + +meta.create_all(engine) + + + + + + + +#### ATTENTION ALL ADVENTURERS +#### THIS IS WHAT A SCROBBLE DICT WILL LOOK LIKE FROM NOW ON +#### THIS IS THE SINGLE CANONICAL SOURCE OF TRUTH +#### STOP MAKING DIFFERENT LITTLE DICTS IN EVERY SINGLE FUNCTION +#### THIS IS THE SCHEMA THAT WILL DEFINITELY 100% STAY LIKE THIS AND NOT +#### RANDOMLY GET CHANGED TWO VERSIONS LATER +#### HERE WE GO +# +# { +# "time":int, +# "track":{ +# "artists":list, +# "title":string, +# "album":{ +# "name":string, +# "artists":list +# }, +# "length":None +# }, +# "duration":int, +# "origin":string +# } + +def add_scrobble(scrobbledict): + add_scrobbles([scrobbledict]) + +def add_scrobbles(scrobbleslist): + + ops = [ + DB['scrobbles'].insert().values( + rawscrobble=json.dumps(s), + timestamp=s['time'], + origin=s['origin'], + duration=s['duration'] or -1, + track_id=get_track_id(s['track']) + ) for s in scrobbleslist + ] + + with engine.begin() as conn: + for op in ops: + conn.execute(op) + + + +### DB interface functions - these will 'get' the ID of an entity, +### creating it if necessary + + +def get_track_id(trackdict): + ntitle = normalize_name(trackdict['title']) + artist_ids = [get_artist_id(a) for a in trackdict['artists']] + + + + with engine.begin() as conn: + op = DB['tracks'].select( + DB['tracks'].c.id + ).where( + DB['tracks'].c.title_normalized==ntitle + ) + result = conn.execute(op) + for row in result: + print("ID for",trackdict['title'],"was",row[0]) + return row[0] + + with engine.begin() as conn: + op = DB['tracks'].insert().values( + title=trackdict['title'], + title_normalized=ntitle + ) + result = conn.execute(op) + print("Created",trackdict['title'],result.inserted_primary_key) + return result.inserted_primary_key[0] + +def get_artist_id(artistname): + nname = normalize_name(artistname) + print("looking for",nname) + + with engine.begin() as conn: + op = DB['artists'].select( + DB['artists'].c.id + ).where( + DB['artists'].c.name_normalized==nname + ) + result = conn.execute(op) + for row in result: + print("ID for",artistname,"was",row[0]) + return row[0] + + with engine.begin() as conn: + op = DB['artists'].insert().values( + name=artistname, + name_normalized=nname + ) + result = conn.execute(op) + print("Created",artistname,result.inserted_primary_key) + return result.inserted_primary_key[0] + def start_db(): - log("Starting database...") - global lastsync - lastsync = int(datetime.datetime.now(tz=datetime.timezone.utc).timestamp()) - build_db() - #run(dbserver, host='::', port=PORT, server='waitress') - log("Database reachable!") - -def build_db(): - - global dbstatus - dbstatus['healthy'] = False - dbstatus['complete'] = False - dbstatus['rebuildinprogress'] = True - - log("Building database...") - - global SCROBBLES, ARTISTS, TRACKS - global TRACKS_NORMALIZED_SET, TRACKS_NORMALIZED, ARTISTS_NORMALIZED_SET, ARTISTS_NORMALIZED - global SCROBBLESDICT, STAMPS - - SCROBBLES = [] - ARTISTS = [] - TRACKS = [] - STAMPS = [] - SCROBBLESDICT = {} - - TRACKS_NORMALIZED = [] - ARTISTS_NORMALIZED = [] - ARTISTS_NORMALIZED_SET = set() - TRACKS_NORMALIZED_SET = set() - - - # parse files - db = tsv.parse_all(data_dir['scrobbles'](),"int","string","string",comments=False) - scrobblenum = len(db) - log(f"Found {scrobblenum} scrobbles...") - - usebar = not malojaconfig["CLEAN_OUTPUT"] - if usebar: pbar = ProgressBar(max=scrobblenum,prefix="Loading scrobbles") - else: - n = 0 - m = max(int(scrobblenum / 25),20) - #db = parseAllTSV("scrobbles","int","string","string",escape=False) - for sc in db: - artists = sc[1].split("␟") - title = sc[2] - time = sc[0] - - readScrobble(artists,title,time) - if usebar: pbar.progress() - else: - n += 1 - if n % m == 0: log(f"Loaded {n}/{scrobblenum}...") - - if usebar: pbar.done() - - - log("Database loaded, optimizing...") - - # optimize database - SCROBBLES.sort(key = lambda tup: tup[1]) - #SCROBBLESDICT = {obj[1]:obj for obj in SCROBBLES} - STAMPS = [t for t in SCROBBLESDICT] - STAMPS.sort() - - # inform malojatime module about earliest scrobble - if STAMPS: register_scrobbletime(STAMPS[0]) - - # NOT NEEDED BECAUSE WE DO THAT ON ADDING EVERY ARTIST ANYWAY - # get extra artists with no real scrobbles from countas rules - #for artist in coa.getAllArtists(): - #for artist in coa.getCreditedList(ARTISTS): - # if artist not in ARTISTS: - # log(artist + " is added to database because of countas rules",module="debug") - # ARTISTS.append(artist) - # coa.updateIDs(ARTISTS) - - dbstatus['healthy'] = True - - - #start regular tasks - utilities.update_medals() - utilities.update_weekly() - utilities.send_stats() - - - global ISSUES - ISSUES = check_issues() - - - dbstatus['complete'] = True - dbstatus['rebuildinprogress'] = False - - log("Database fully built!") + from . import upgrade + upgrade.upgrade_db(add_scrobbles) -# Saves all cached entries to disk -def sync(): - - # all entries by file collected - # so we don't open the same file for every entry - #log("Syncing",module="debug") - entries = {} - - for idx in range(len(SCROBBLES)): - if not SCROBBLES[idx].saved: - - t = get_scrobble_dict(SCROBBLES[idx]) - - artistlist = list(t["artists"]) - artistlist.sort() #we want the order of artists to be deterministic so when we update files with new rules a diff can see what has actually been changed - artistss = "␟".join(artistlist) - timestamp = datetime.date.fromtimestamp(t["time"]) - - album = t["album"] or "-" - duration = t["duration"] or "-" - - entry = [str(t["time"]),artistss,t["title"],album,duration] - - monthcode = str(timestamp.year) + "_" + str(timestamp.month) - entries.setdefault(monthcode,[]).append(entry) #i feckin love the setdefault function - - SCROBBLES[idx] = Scrobble(*SCROBBLES[idx][:-1],True) - # save copy with last tuple entry set to true - - #log("Sorted into months",module="debug") - - for e in entries: - tsv.add_entries(data_dir['scrobbles'](e + ".tsv"),entries[e],comments=False) - #addEntries("scrobbles/" + e + ".tsv",entries[e],escape=False) - - #log("Written files",module="debug") - global lastsync - lastsync = int(datetime.datetime.now(tz=datetime.timezone.utc).timestamp()) - #log("Database saved to disk.") - # save cached images - #saveCache() diff --git a/maloja/globalconf.py b/maloja/globalconf.py index 0859ed9..fba96dc 100644 --- a/maloja/globalconf.py +++ b/maloja/globalconf.py @@ -7,6 +7,7 @@ from .__pkginfo__ import VERSION + # if DATA_DIRECTORY is specified, this is the directory to use for EVERYTHING, no matter what # but with asynnetrical structure, cache and logs in subfolders # otherwise, each directory is treated seperately @@ -311,24 +312,19 @@ config( - ### API KEYS +### symmetric keys are fine since we hopefully use HTTPS - -### symmetric keys are fine for now since we hopefully use HTTPS apikeystore = KeyStore(file=data_dir['clients']("apikeys.yml"),save_endpoint="/apis/mlj_1/apikeys") +from . import upgrade +upgrade.upgrade_apikeys() + + + + + -oldfile = pthj(dir_settings['config'],"clients","authenticated_machines.tsv") -if os.path.exists(oldfile): - try: - from doreah import tsv - clients = tsv.parse(oldfile,"string","string") - for key,identifier in clients: - apikeystore[identifier] = key - os.remove(oldfile) - except: - pass # what the fuck did i just write diff --git a/maloja/upgrade.py b/maloja/upgrade.py new file mode 100644 index 0000000..e892624 --- /dev/null +++ b/maloja/upgrade.py @@ -0,0 +1,60 @@ +# This module should take care of recognizing old install data and upgrading it before the actual server deals with it + +import os +import re + +from doreah.logging import log + +from .globalconf import data_dir, dir_settings, apikeystore + + +def upgrade_apikeys(): + + oldfile = os.path.join(dir_settings['config'],"clients","authenticated_machines.tsv") + if os.path.exists(oldfile): + try: + from doreah import tsv + clients = tsv.parse(oldfile,"string","string") + for key,identifier in clients: + apikeystore[identifier] = key + os.remove(oldfile) + except: + pass + + +def upgrade_db(callback_add_scrobbles): + oldfolder = os.path.join(dir_settings['state'],"scrobbles") + if os.path.exists(oldfolder): + scrobblefiles = os.listdir(oldfolder) + for sf in scrobblefiles: + if sf.endswith(".tsv"): + log(f"Found old tsv scrobble file: {sf}") + if re.match(r"[0-9]+_[0-9]+\.tsv",sf): + origin = 'native' + elif sf == "lastfmimport.tsv": + origin = 'lastfm-import' + else: + origin = 'unknown' + + from doreah import tsv + scrobbles = tsv.parse(os.path.join(oldfolder,sf),"int","string","string","string","string",comments=False) + scrobblelist = [] + for scrobble in scrobbles: + timestamp, artists, title, album, duration = scrobble + if album in ('-',''): album = None + if duration in ('-',''): duration = None + scrobblelist.append({ + "time":int(timestamp), + "track":{ + "artists":artists.split('␟'), + "title":title, + "album":{ + "name":album, + "artists":None + }, + "length":None + }, + "duration":duration, + "origin":origin + }) + callback_add_scrobbles(scrobblelist) From 0233adedec7ef595dc671f600e79b6f18013ee01 Mon Sep 17 00:00:00 2001 From: krateng Date: Mon, 3 Jan 2022 02:46:19 +0100 Subject: [PATCH 002/182] Implemented base scrobble functions --- maloja/database.py | 44 ++++++++++++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/maloja/database.py b/maloja/database.py index b7a07e7..c0a1298 100644 --- a/maloja/database.py +++ b/maloja/database.py @@ -703,7 +703,7 @@ DB['scrobbles'] = sql.Table( sql.Column('rawscrobble',sql.String), sql.Column('origin',sql.String), sql.Column('duration',sql.Integer), - sql.Column('track_id',sql.Integer) + sql.Column('track_id',sql.Integer,sql.ForeignKey('tracks.id')) ) DB['tracks'] = sql.Table( 'tracks', meta, @@ -720,8 +720,8 @@ DB['artists'] = sql.Table( DB['trackartists'] = sql.Table( 'trackartists', meta, sql.Column('id',sql.Integer,primary_key=True), - sql.Column('artist_id',sql.Integer), - sql.Column('track_id',sql.Integer) + sql.Column('artist_id',sql.Integer,sql.ForeignKey('artists.id')), + sql.Column('track_id',sql.Integer,sql.ForeignKey('tracks.id')) ) meta.create_all(engine) @@ -792,10 +792,22 @@ def get_track_id(trackdict): ).where( DB['tracks'].c.title_normalized==ntitle ) - result = conn.execute(op) - for row in result: + result = conn.execute(op).all() + for row in result: + # check if the artists are the same + foundtrackartists = [] + with engine.begin() as conn: + op = DB['trackartists'].select( + DB['trackartists'].c.artist_id + ).where( + DB['trackartists'].c.track_id==row[0] + ) + result = conn.execute(op).all() + match_artist_ids = [r.artist_id for r in result] + print("required artists",artist_ids,"this match",match_artist_ids) + if set(artist_ids) == set(match_artist_ids): print("ID for",trackdict['title'],"was",row[0]) - return row[0] + return row.id with engine.begin() as conn: op = DB['tracks'].insert().values( @@ -803,8 +815,16 @@ def get_track_id(trackdict): title_normalized=ntitle ) result = conn.execute(op) - print("Created",trackdict['title'],result.inserted_primary_key) - return result.inserted_primary_key[0] + track_id = result.inserted_primary_key[0] + with engine.begin() as conn: + for artist_id in artist_ids: + op = DB['trackartists'].insert().values( + track_id=track_id, + artist_id=artist_id + ) + result = conn.execute(op) + print("Created",trackdict['title'],track_id) + return track_id def get_artist_id(artistname): nname = normalize_name(artistname) @@ -816,10 +836,10 @@ def get_artist_id(artistname): ).where( DB['artists'].c.name_normalized==nname ) - result = conn.execute(op) - for row in result: - print("ID for",artistname,"was",row[0]) - return row[0] + result = conn.execute(op).all() + for row in result: + print("ID for",artistname,"was",row[0]) + return row.id with engine.begin() as conn: op = DB['artists'].insert().values( From c826b069e4020a5c9b736e444c7381eb7e47d316 Mon Sep 17 00:00:00 2001 From: krateng Date: Mon, 3 Jan 2022 03:04:36 +0100 Subject: [PATCH 003/182] More work --- maloja/database.py | 135 ++++++++----------------------------------- maloja/globalconf.py | 4 +- maloja/upgrade.py | 4 +- 3 files changed, 29 insertions(+), 114 deletions(-) diff --git a/maloja/database.py b/maloja/database.py index c0a1298..1213763 100644 --- a/maloja/database.py +++ b/maloja/database.py @@ -58,25 +58,8 @@ class DatabaseNotBuilt(HTTPError): headers={"Retry-After":10} ) -SCROBBLES = [] # Format: tuple(track_ref,timestamp,saved) -ARTISTS = [] # Format: artist -TRACKS = [] # Format: namedtuple(artists=frozenset(artist_ref,...),title=title) -Track = namedtuple("Track",["artists","title"]) -Scrobble = namedtuple("Scrobble",["track","timestamp","album","duration","saved"]) -# album is saved in the scrobble because it's not actually authorative information about the track, just info -# what was sent with this scrobble - -### OPTIMIZATION -SCROBBLESDICT = {} # timestamps to scrobble mapping -STAMPS = [] # sorted -#STAMPS_SET = set() # as set for easier check if exists # we use the scrobbles dict for that now -TRACKS_NORMALIZED = [] -ARTISTS_NORMALIZED = [] -ARTISTS_NORMALIZED_SET = set() -TRACKS_NORMALIZED_SET = set() - MEDALS_ARTISTS = {} #literally only changes once per year, no need to calculate that on the fly MEDALS_TRACKS = {} WEEKLY_TOPTRACKS = {} @@ -86,27 +69,7 @@ ISSUES = {} cla = CleanerAgent() coa = CollectorAgent() -clients = [] -lastsync = 0 - - -try: - with open(data_dir['state']("known_servers.yml"),"r") as f: - KNOWN_SERVERS = set(yaml.safe_load(f)) -except: - KNOWN_SERVERS = set() - - -def add_known_server(url): - KNOWN_SERVERS.add(url) - with open(data_dir['state']("known_servers.yml"),"w") as f: - f.write(yaml.dump(list(KNOWN_SERVERS))) - - - - -log("Authenticated Machines: " + ", ".join([k for k in apikeystore])) def checkAPIkey(key): return apikeystore.check_key(key) @@ -143,80 +106,27 @@ def createScrobble(artists,title,time,album=None,duration=None,volatile=False): if len(artists) == 0 or title == "": return {} - dblock.acquire() - - i = getTrackID(artists,title) - - # idempotence - if time in SCROBBLESDICT and i == SCROBBLESDICT[time].track: - dblock.release() - return get_track_dict(TRACKS[i]) - # timestamp as unique identifier - while (time in SCROBBLESDICT): - time += 1 - - obj = Scrobble(i,time,album,duration,volatile) # if volatile generated, we simply pretend we have already saved it to disk - #SCROBBLES.append(obj) - # immediately insert scrobble correctly so we can guarantee sorted list - index = insert(SCROBBLES,obj,key=lambda x:x[1]) - SCROBBLESDICT[time] = obj - STAMPS.insert(index,time) #should be same index as scrobblelist - register_scrobbletime(time) - invalidate_caches() - dblock.release() + scrobbledict = { + "time":time, + "track":{ + "artists":artists, + "title":title, + "album":{ + "name":album, + "artists":None + }, + "length":None + }, + "duration":duration, + "origin":"generic" + } + add_scrobble(scrobbledict) proxy_scrobble_all(artists,title,time) - - return get_track_dict(TRACKS[obj.track]) - - -# this will never be called from different threads, so no lock -def readScrobble(artists,title,time): - while (time in SCROBBLESDICT): - time += 1 - i = getTrackID(artists,title) - obj = Scrobble(i,time,None,None,True) - SCROBBLES.append(obj) - SCROBBLESDICT[time] = obj - #STAMPS.append(time) + return scrobbledict -def getArtistID(name): - - obj = name - obj_normalized = normalize_name(name) - - if obj_normalized in ARTISTS_NORMALIZED_SET: - return ARTISTS_NORMALIZED.index(obj_normalized) - - i = len(ARTISTS) - ARTISTS.append(obj) - ARTISTS_NORMALIZED_SET.add(obj_normalized) - ARTISTS_NORMALIZED.append(obj_normalized) - - # with a new artist added, we might also get new artists that they are credited as - cr = coa.getCredited(name) - getArtistID(cr) - - coa.updateIDs(ARTISTS) - - return i - -def getTrackID(artists,title): - artistset = {getArtistID(name=a) for a in artists} - obj = Track(artists=frozenset(artistset),title=title) - obj_normalized = Track(artists=frozenset(artistset),title=normalize_name(title)) - - if obj_normalized in TRACKS_NORMALIZED_SET: - return TRACKS_NORMALIZED.index(obj_normalized) - i = len(TRACKS) - TRACKS.append(obj) - TRACKS_NORMALIZED_SET.add(obj_normalized) - TRACKS_NORMALIZED.append(obj_normalized) - return i - -import unicodedata # function to turn the name into a representation that can be easily compared, ignoring minor differences remove_symbols = ["'","`","’"] @@ -804,9 +714,9 @@ def get_track_id(trackdict): ) result = conn.execute(op).all() match_artist_ids = [r.artist_id for r in result] - print("required artists",artist_ids,"this match",match_artist_ids) + #print("required artists",artist_ids,"this match",match_artist_ids) if set(artist_ids) == set(match_artist_ids): - print("ID for",trackdict['title'],"was",row[0]) + #print("ID for",trackdict['title'],"was",row[0]) return row.id with engine.begin() as conn: @@ -823,12 +733,12 @@ def get_track_id(trackdict): artist_id=artist_id ) result = conn.execute(op) - print("Created",trackdict['title'],track_id) + #print("Created",trackdict['title'],track_id) return track_id def get_artist_id(artistname): nname = normalize_name(artistname) - print("looking for",nname) + #print("looking for",nname) with engine.begin() as conn: op = DB['artists'].select( @@ -838,7 +748,7 @@ def get_artist_id(artistname): ) result = conn.execute(op).all() for row in result: - print("ID for",artistname,"was",row[0]) + #print("ID for",artistname,"was",row[0]) return row.id with engine.begin() as conn: @@ -847,9 +757,10 @@ def get_artist_id(artistname): name_normalized=nname ) result = conn.execute(op) - print("Created",artistname,result.inserted_primary_key) + #print("Created",artistname,result.inserted_primary_key) return result.inserted_primary_key[0] + def start_db(): from . import upgrade upgrade.upgrade_db(add_scrobbles) diff --git a/maloja/globalconf.py b/maloja/globalconf.py index fba96dc..60df460 100644 --- a/maloja/globalconf.py +++ b/maloja/globalconf.py @@ -262,7 +262,7 @@ data_directories = { "auth":pthj(dir_settings['state'],"auth"), "backups":pthj(dir_settings['state'],"backups"), "images":pthj(dir_settings['state'],"images"), - "scrobbles":pthj(dir_settings['state'],"scrobbles"), + "scrobbles":pthj(dir_settings['state']), "rules":pthj(dir_settings['config'],"rules"), "clients":pthj(dir_settings['config']), "settings":pthj(dir_settings['config']), @@ -320,6 +320,8 @@ apikeystore = KeyStore(file=data_dir['clients']("apikeys.yml"),save_endpoint="/a from . import upgrade upgrade.upgrade_apikeys() +print("Authenticated Machines: " + ", ".join([k for k in apikeystore])) + diff --git a/maloja/upgrade.py b/maloja/upgrade.py index e892624..5caec5b 100644 --- a/maloja/upgrade.py +++ b/maloja/upgrade.py @@ -4,6 +4,7 @@ import os import re from doreah.logging import log +from doreah.io import col from .globalconf import data_dir, dir_settings, apikeystore @@ -23,12 +24,13 @@ def upgrade_apikeys(): def upgrade_db(callback_add_scrobbles): + print(col['yellow']("Upgrading v2 Database to v3 Database. This could take a while...")) oldfolder = os.path.join(dir_settings['state'],"scrobbles") if os.path.exists(oldfolder): scrobblefiles = os.listdir(oldfolder) for sf in scrobblefiles: if sf.endswith(".tsv"): - log(f"Found old tsv scrobble file: {sf}") + print(f"\tImporting from old tsv scrobble file: {sf}") if re.match(r"[0-9]+_[0-9]+\.tsv",sf): origin = 'native' elif sf == "lastfmimport.tsv": From 03dd902e1bf93499e019e0e8ffd0fa1478ec6c02 Mon Sep 17 00:00:00 2001 From: krateng Date: Mon, 3 Jan 2022 08:01:49 +0100 Subject: [PATCH 004/182] Split up DB module a bit --- maloja/database.py | 274 +++---------------------------------------- maloja/db/convert.py | 40 +++++++ maloja/db/sqldb.py | 210 +++++++++++++++++++++++++++++++++ maloja/upgrade.py | 15 ++- 4 files changed, 275 insertions(+), 264 deletions(-) create mode 100644 maloja/db/convert.py create mode 100644 maloja/db/sqldb.py diff --git a/maloja/database.py b/maloja/database.py index 1213763..7a5ea3f 100644 --- a/maloja/database.py +++ b/maloja/database.py @@ -6,10 +6,10 @@ from .cleanup import CleanerAgent, CollectorAgent from . import utilities from .malojatime import register_scrobbletime, time_stamps, ranges from .malojauri import uri_to_internal, internal_to_uri, compose_querystring - from .thirdparty import proxy_scrobble_all - from .globalconf import data_dir, malojaconfig, apikeystore +#db +from .db.sqldb import * # doreah toolkit from doreah.logging import log @@ -23,8 +23,6 @@ except: pass import doreah -#db -import sqlalchemy as sql # technical @@ -128,15 +126,7 @@ def createScrobble(artists,title,time,album=None,duration=None,volatile=False): -# function to turn the name into a representation that can be easily compared, ignoring minor differences -remove_symbols = ["'","`","’"] -replace_with_space = [" - ",": "] -def normalize_name(name): - for r in replace_with_space: - name = name.replace(r," ") - name = "".join(char for char in unicodedata.normalize('NFD',name.lower()) - if char not in remove_symbols and unicodedata.category(char) != 'Mn') - return name + @@ -173,9 +163,6 @@ def api_key_correct(request): def get_scrobbles(**keys): r = db_query(**{k:keys[k] for k in keys if k in ["artist","artists","title","since","to","within","timerange","associated","track"]}) - #offset = (keys.get('page') * keys.get('perpage')) if keys.get('perpage') is not math.inf else 0 - #r = r[offset:] - #if keys.get('perpage') is not math.inf: r = r[:keys.get('perpage')] return r @@ -198,49 +185,11 @@ def get_scrobbles_num(**keys): r = db_query(**{k:keys[k] for k in keys if k in ["artist","track","artists","title","since","to","within","timerange","associated"]}) return len(r) - -#for multiple since values (must be ordered) -# DOESN'T SEEM TO ACTUALLY BE FASTER -# REEVALUATE - -#def get_scrobbles_num_multiple(sinces=[],to=None,**keys): -# -# sinces_stamps = [time_stamps(since,to,None)[0] for since in sinces] -# #print(sinces) -# #print(sinces_stamps) -# minsince = sinces[-1] -# r = db_query(**{k:keys[k] for k in keys if k in ["artist","track","artists","title","associated","to"]},since=minsince) -# -# #print(r) -# -# validtracks = [0 for s in sinces] -# -# i = 0 -# si = 0 -# while True: -# if si == len(sinces): break -# if i == len(r): break -# if r[i]["time"] >= sinces_stamps[si]: -# validtracks[si] += 1 -# else: -# si += 1 -# continue -# i += 1 -# -# -# return validtracks - - - def get_tracks(artist=None): artistid = ARTISTS.index(artist) if artist is not None else None - # Option 1 return [get_track_dict(t) for t in TRACKS if (artistid in t.artists) or (artistid==None)] - # Option 2 is a bit more elegant but much slower - #tracklist = [get_track_dict(t) for t in TRACKS] - #ls = [t for t in tracklist if (artist in t["artists"]) or (artist==None)] def get_artists(): @@ -329,15 +278,6 @@ def get_top_tracks(**keys): return results - - - - - - - - - def artistInfo(artist): charts = db_aggregate(by="ARTIST") @@ -601,170 +541,13 @@ def get_predefined_rulesets(): ## Server operation #### -DB = {} - - -engine = sql.create_engine(f"sqlite:///{data_dir['scrobbles']('malojadb.sqlite')}", echo = False) -meta = sql.MetaData() - -DB['scrobbles'] = sql.Table( - 'scrobbles', meta, - sql.Column('timestamp',sql.Integer,primary_key=True), - sql.Column('rawscrobble',sql.String), - sql.Column('origin',sql.String), - sql.Column('duration',sql.Integer), - sql.Column('track_id',sql.Integer,sql.ForeignKey('tracks.id')) -) -DB['tracks'] = sql.Table( - 'tracks', meta, - sql.Column('id',sql.Integer,primary_key=True), - sql.Column('title',sql.String), - sql.Column('title_normalized',sql.String) -) -DB['artists'] = sql.Table( - 'artists', meta, - sql.Column('id',sql.Integer,primary_key=True), - sql.Column('name',sql.String), - sql.Column('name_normalized',sql.String) -) -DB['trackartists'] = sql.Table( - 'trackartists', meta, - sql.Column('id',sql.Integer,primary_key=True), - sql.Column('artist_id',sql.Integer,sql.ForeignKey('artists.id')), - sql.Column('track_id',sql.Integer,sql.ForeignKey('tracks.id')) -) - -meta.create_all(engine) - - - - - - - -#### ATTENTION ALL ADVENTURERS -#### THIS IS WHAT A SCROBBLE DICT WILL LOOK LIKE FROM NOW ON -#### THIS IS THE SINGLE CANONICAL SOURCE OF TRUTH -#### STOP MAKING DIFFERENT LITTLE DICTS IN EVERY SINGLE FUNCTION -#### THIS IS THE SCHEMA THAT WILL DEFINITELY 100% STAY LIKE THIS AND NOT -#### RANDOMLY GET CHANGED TWO VERSIONS LATER -#### HERE WE GO -# -# { -# "time":int, -# "track":{ -# "artists":list, -# "title":string, -# "album":{ -# "name":string, -# "artists":list -# }, -# "length":None -# }, -# "duration":int, -# "origin":string -# } - -def add_scrobble(scrobbledict): - add_scrobbles([scrobbledict]) - -def add_scrobbles(scrobbleslist): - - ops = [ - DB['scrobbles'].insert().values( - rawscrobble=json.dumps(s), - timestamp=s['time'], - origin=s['origin'], - duration=s['duration'] or -1, - track_id=get_track_id(s['track']) - ) for s in scrobbleslist - ] - - with engine.begin() as conn: - for op in ops: - conn.execute(op) - - - -### DB interface functions - these will 'get' the ID of an entity, -### creating it if necessary - - -def get_track_id(trackdict): - ntitle = normalize_name(trackdict['title']) - artist_ids = [get_artist_id(a) for a in trackdict['artists']] - - - - with engine.begin() as conn: - op = DB['tracks'].select( - DB['tracks'].c.id - ).where( - DB['tracks'].c.title_normalized==ntitle - ) - result = conn.execute(op).all() - for row in result: - # check if the artists are the same - foundtrackartists = [] - with engine.begin() as conn: - op = DB['trackartists'].select( - DB['trackartists'].c.artist_id - ).where( - DB['trackartists'].c.track_id==row[0] - ) - result = conn.execute(op).all() - match_artist_ids = [r.artist_id for r in result] - #print("required artists",artist_ids,"this match",match_artist_ids) - if set(artist_ids) == set(match_artist_ids): - #print("ID for",trackdict['title'],"was",row[0]) - return row.id - - with engine.begin() as conn: - op = DB['tracks'].insert().values( - title=trackdict['title'], - title_normalized=ntitle - ) - result = conn.execute(op) - track_id = result.inserted_primary_key[0] - with engine.begin() as conn: - for artist_id in artist_ids: - op = DB['trackartists'].insert().values( - track_id=track_id, - artist_id=artist_id - ) - result = conn.execute(op) - #print("Created",trackdict['title'],track_id) - return track_id - -def get_artist_id(artistname): - nname = normalize_name(artistname) - #print("looking for",nname) - - with engine.begin() as conn: - op = DB['artists'].select( - DB['artists'].c.id - ).where( - DB['artists'].c.name_normalized==nname - ) - result = conn.execute(op).all() - for row in result: - #print("ID for",artistname,"was",row[0]) - return row.id - - with engine.begin() as conn: - op = DB['artists'].insert().values( - name=artistname, - name_normalized=nname - ) - result = conn.execute(op) - #print("Created",artistname,result.inserted_primary_key) - return result.inserted_primary_key[0] def start_db(): from . import upgrade upgrade.upgrade_db(add_scrobbles) - + dbstatus['healthy'] = True + dbstatus['complete'] = True @@ -940,10 +723,10 @@ def reduce_caches_if_low_ram(): # Queries the database -def db_query_full(artist=None,artists=None,title=None,track=None,since=None,to=None,within=None,timerange=None,associated=False,max_=None): - +def db_query_full(artist=None,artists=None,title=None,track=None,timerange=None,associated=False,max_=None): + print((artist,artists,title,track,timerange)) if not dbstatus['healthy']: raise DatabaseNotBuilt() - (since, to) = time_stamps(since=since,to=to,within=within,range=timerange) + (since, to) = time_stamps(range=timerange) # this is not meant as a search function. we *can* query the db with a string, but it only works if it matches exactly # if a title is specified, we assume that a specific track (with the exact artist combination) is requested @@ -951,42 +734,14 @@ def db_query_full(artist=None,artists=None,title=None,track=None,since=None,to=N #artist = None - if artist is not None and isinstance(artist,str): - artist = ARTISTS.index(artist) + if artists is not None and title is not None: + return get_scrobbles_of_track(track={"artists":artists,"title":title},since=since,to=to) - # artists to numbers - if artists is not None: - artists = set([(ARTISTS.index(a) if isinstance(a,str) else a) for a in artists]) + if artist is not None: + return get_scrobbles_of_artist(artist=artist,since=since,to=to) - # track to number - if track is not None and isinstance(track,dict): - trackartists = set([(ARTISTS.index(a) if isinstance(a,str) else a) for a in track["artists"]]) - track = TRACKS.index((frozenset(trackartists),track["title"])) - artists = None + return get_scrobbles(since=since,to=to) - #check if track is requested via title - if title!=None and track==None: - track = TRACKS.index((frozenset(artists),title)) - artists = None - - # if we're not looking for a track (either directly or per title artist arguments, which is converted to track above) - # we only need one artist - elif artist is None and track is None and artists is not None and len(artists) != 0: - artist = artists.pop() - - - # db query always reverse by default - - result = [] - - i = 0 - for s in scrobbles_in_range(since,to,reverse=True): - if i == max_: break - if (track is None or s[0] == track) and (artist is None or artist in TRACKS[s[0]][0] or associated and artist in coa.getCreditedList(TRACKS[s[0]][0])): - result.append(get_scrobble_dict(s)) - i += 1 - - return result # pointless to check for artist when track is checked because every track has a fixed set of artists, but it's more elegant this way @@ -1064,6 +819,9 @@ def db_search(query,type=None): ## Useful functions #### + + + # makes a string usable for searching (special characters are blanks, accents and stuff replaced with their real part) def simplestr(input,ignorecapitalization=True): norm = unicodedata.normalize("NFKD",input) diff --git a/maloja/db/convert.py b/maloja/db/convert.py new file mode 100644 index 0000000..be9c42b --- /dev/null +++ b/maloja/db/convert.py @@ -0,0 +1,40 @@ +#### ATTENTION ALL ADVENTURERS +#### THIS IS WHAT A SCROBBLE DICT WILL LOOK LIKE FROM NOW ON +#### THIS IS THE SINGLE CANONICAL SOURCE OF TRUTH +#### STOP MAKING DIFFERENT LITTLE DICTS IN EVERY SINGLE FUNCTION +#### THIS IS THE SCHEMA THAT WILL DEFINITELY 100% STAY LIKE THIS AND NOT +#### RANDOMLY GET CHANGED TWO VERSIONS LATER +#### HERE WE GO +# +# { +# "time":int, +# "track":{ +# "artists":list, +# "title":string, +# "album":{ +# "name":string, +# "artists":list +# }, +# "length":None +# }, +# "duration":int, +# "origin":string +# } + + + +def scrobble_db_to_dict(resultrow): + return { + "time":resultrow.timestamp, + "track":track_db_to_dict(resultrow.track), + "duration":resultrow.duration, + "origin":resultrow.origin + } + +def track_db_to_dict(resultrow): + return { + "artists":[], + "title":resultrow.title, + "album":{}, + "length":resultrow.length + } diff --git a/maloja/db/sqldb.py b/maloja/db/sqldb.py new file mode 100644 index 0000000..2aa67af --- /dev/null +++ b/maloja/db/sqldb.py @@ -0,0 +1,210 @@ +import sqlalchemy as sql +import json +import unicodedata + +from ..globalconf import data_dir + + + +DB = {} + + +engine = sql.create_engine(f"sqlite:///{data_dir['scrobbles']('malojadb.sqlite')}", echo = False) +meta = sql.MetaData() + +DB['scrobbles'] = sql.Table( + 'scrobbles', meta, + sql.Column('timestamp',sql.Integer,primary_key=True), + sql.Column('rawscrobble',sql.String), + sql.Column('origin',sql.String), + sql.Column('duration',sql.Integer), + sql.Column('track_id',sql.Integer,sql.ForeignKey('tracks.id')) +) +DB['tracks'] = sql.Table( + 'tracks', meta, + sql.Column('id',sql.Integer,primary_key=True), + sql.Column('title',sql.String), + sql.Column('title_normalized',sql.String), + sql.Column('length',sql.Integer) +) +DB['artists'] = sql.Table( + 'artists', meta, + sql.Column('id',sql.Integer,primary_key=True), + sql.Column('name',sql.String), + sql.Column('name_normalized',sql.String) +) +DB['trackartists'] = sql.Table( + 'trackartists', meta, + sql.Column('id',sql.Integer,primary_key=True), + sql.Column('artist_id',sql.Integer,sql.ForeignKey('artists.id')), + sql.Column('track_id',sql.Integer,sql.ForeignKey('tracks.id')) +) + +meta.create_all(engine) + + + + + + + + + +def add_scrobble(scrobbledict): + add_scrobbles([scrobbledict]) + +def add_scrobbles(scrobbleslist): + + ops = [ + DB['scrobbles'].insert().values( + rawscrobble=json.dumps(s), + timestamp=s['time'], + origin=s['origin'], + duration=s['duration'] or -1, + track_id=get_track_id(s['track']) + ) for s in scrobbleslist + ] + + with engine.begin() as conn: + for op in ops: + try: + conn.execute(op) + except: + pass + + +### DB interface functions - these will 'get' the ID of an entity, +### creating it if necessary + + +def get_track_id(trackdict): + ntitle = normalize_name(trackdict['title']) + artist_ids = [get_artist_id(a) for a in trackdict['artists']] + + + + with engine.begin() as conn: + op = DB['tracks'].select( + DB['tracks'].c.id + ).where( + DB['tracks'].c.title_normalized==ntitle + ) + result = conn.execute(op).all() + for row in result: + # check if the artists are the same + foundtrackartists = [] + with engine.begin() as conn: + op = DB['trackartists'].select( + DB['trackartists'].c.artist_id + ).where( + DB['trackartists'].c.track_id==row[0] + ) + result = conn.execute(op).all() + match_artist_ids = [r.artist_id for r in result] + #print("required artists",artist_ids,"this match",match_artist_ids) + if set(artist_ids) == set(match_artist_ids): + #print("ID for",trackdict['title'],"was",row[0]) + return row.id + + with engine.begin() as conn: + op = DB['tracks'].insert().values( + title=trackdict['title'], + title_normalized=ntitle, + length=trackdict['length'] + ) + result = conn.execute(op) + track_id = result.inserted_primary_key[0] + with engine.begin() as conn: + for artist_id in artist_ids: + op = DB['trackartists'].insert().values( + track_id=track_id, + artist_id=artist_id + ) + result = conn.execute(op) + #print("Created",trackdict['title'],track_id) + return track_id + +def get_artist_id(artistname): + nname = normalize_name(artistname) + #print("looking for",nname) + + with engine.begin() as conn: + op = DB['artists'].select( + DB['artists'].c.id + ).where( + DB['artists'].c.name_normalized==nname + ) + result = conn.execute(op).all() + for row in result: + #print("ID for",artistname,"was",row[0]) + return row.id + + with engine.begin() as conn: + op = DB['artists'].insert().values( + name=artistname, + name_normalized=nname + ) + result = conn.execute(op) + #print("Created",artistname,result.inserted_primary_key) + return result.inserted_primary_key[0] + + +def get_scrobbles_of_artist(artist,since,to): + + artist_id = get_artist_id(artist) + + with engine.begin() as conn: + op = DB['scrobbles'].select().where( + DB['scrobbles'].c.timestamp<=to, + DB['scrobbles'].c.timestamp>=since, + ) + result = conn.execute(op).all() + + print(result) + return result + + +def get_scrobbles_of_track(track,since,to): + + track_id = get_track_id(track) + + with engine.begin() as conn: + op = DB['scrobbles'].select().where( + DB['scrobbles'].c.timestamp<=to, + DB['scrobbles'].c.timestamp>=since, + ) + result = conn.execute(op).all() + + print(result) + return result + + +def get_scrobbles(since,to): + + artist_id = get_artist_id(artist) + + with engine.begin() as conn: + op = DB['scrobbles'].select().where( + DB['scrobbles'].c.timestamp<=to, + DB['scrobbles'].c.timestamp>=since, + ) + result = conn.execute(op).all() + + print(result) + return result + + + + + + + +# function to turn the name into a representation that can be easily compared, ignoring minor differences +remove_symbols = ["'","`","’"] +replace_with_space = [" - ",": "] +def normalize_name(name): + for r in replace_with_space: + name = name.replace(r," ") + name = "".join(char for char in unicodedata.normalize('NFD',name.lower()) + if char not in remove_symbols and unicodedata.category(char) != 'Mn') + return name diff --git a/maloja/upgrade.py b/maloja/upgrade.py index 5caec5b..b43b4f2 100644 --- a/maloja/upgrade.py +++ b/maloja/upgrade.py @@ -26,13 +26,14 @@ def upgrade_apikeys(): def upgrade_db(callback_add_scrobbles): print(col['yellow']("Upgrading v2 Database to v3 Database. This could take a while...")) oldfolder = os.path.join(dir_settings['state'],"scrobbles") + newfolder = os.path.join(dir_settings['state'],".oldscrobbles") if os.path.exists(oldfolder): scrobblefiles = os.listdir(oldfolder) for sf in scrobblefiles: if sf.endswith(".tsv"): print(f"\tImporting from old tsv scrobble file: {sf}") if re.match(r"[0-9]+_[0-9]+\.tsv",sf): - origin = 'native' + origin = 'legacy' elif sf == "lastfmimport.tsv": origin = 'lastfm-import' else: @@ -50,13 +51,15 @@ def upgrade_db(callback_add_scrobbles): "track":{ "artists":artists.split('␟'), "title":title, - "album":{ - "name":album, - "artists":None - }, "length":None }, "duration":duration, - "origin":origin + "origin":origin, + "extra":{ + "album":album + # saving this in the scrobble instead of the track because for now it's not meant + # to be authorative information, just payload of the scrobble + } }) callback_add_scrobbles(scrobblelist) + os.rename(os.path.join(oldfolder,sf),os.path.join(newfolder,sf)) From 8ab42b844b32f333167efbe991b0a4bc86e5270f Mon Sep 17 00:00:00 2001 From: krateng Date: Mon, 3 Jan 2022 20:45:55 +0100 Subject: [PATCH 005/182] Removed shutdown handling --- maloja/server.py | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/maloja/server.py b/maloja/server.py index f64e83f..5140cda 100644 --- a/maloja/server.py +++ b/maloja/server.py @@ -1,6 +1,5 @@ # technical import sys -import signal import os from threading import Thread import setproctitle @@ -265,27 +264,6 @@ def redirect_track(artists,title): redirect("/track?title=" + title + "&" + "&".join("artist=" + artist for artist in artists.split("/"))) -###### -### SHUTDOWN -##### - - -def graceful_exit(sig=None,frame=None): - #urllib.request.urlopen("http://[::1]:" + str(DATABASE_PORT) + "/sync") - log("Received signal to shutdown") - try: - database.sync() - except Exception as e: - log("Error while shutting down!",e) - log("Server shutting down...") - sys.exit(0) - -#set graceful shutdown -signal.signal(signal.SIGINT, graceful_exit) -signal.signal(signal.SIGTERM, graceful_exit) - - - ###### From 0dd6cd9dd55b28277ef0b58c54b9bf6001912fd1 Mon Sep 17 00:00:00 2001 From: krateng Date: Tue, 4 Jan 2022 07:55:07 +0100 Subject: [PATCH 006/182] I promise this is temporary code! --- maloja/database.py | 14 +++--- maloja/db/convert.py | 40 ---------------- maloja/db/sqldb.py | 108 +++++++++++++++++++++++++++++++++++++++++-- maloja/upgrade.py | 2 + 4 files changed, 112 insertions(+), 52 deletions(-) diff --git a/maloja/database.py b/maloja/database.py index 7a5ea3f..5bcc712 100644 --- a/maloja/database.py +++ b/maloja/database.py @@ -9,7 +9,7 @@ from .malojauri import uri_to_internal, internal_to_uri, compose_querystring from .thirdparty import proxy_scrobble_all from .globalconf import data_dir, malojaconfig, apikeystore #db -from .db.sqldb import * +from .db import sqldb # doreah toolkit from doreah.logging import log @@ -545,7 +545,7 @@ def get_predefined_rulesets(): def start_db(): from . import upgrade - upgrade.upgrade_db(add_scrobbles) + upgrade.upgrade_db(sqldb.add_scrobbles) dbstatus['healthy'] = True dbstatus['complete'] = True @@ -735,22 +735,20 @@ def db_query_full(artist=None,artists=None,title=None,track=None,timerange=None, #artist = None if artists is not None and title is not None: - return get_scrobbles_of_track(track={"artists":artists,"title":title},since=since,to=to) + return sqldb.get_scrobbles_of_track(track={"artists":artists,"title":title},since=since,to=to) if artist is not None: - return get_scrobbles_of_artist(artist=artist,since=since,to=to) + return sqldb.get_scrobbles_of_artist(artist=artist,since=since,to=to) - return get_scrobbles(since=since,to=to) + return sqldb.get_scrobbles(since=since,to=to) - # pointless to check for artist when track is checked because every track has a fixed set of artists, but it's more elegant this way - # Queries that... well... aggregate def db_aggregate_full(by=None,since=None,to=None,within=None,timerange=None,artist=None): if not dbstatus['healthy']: raise DatabaseNotBuilt() - (since, to) = time_stamps(since=since,to=to,within=within,range=timerange) + (since, to) = time_stamps(range=timerange) if isinstance(artist, str): artist = ARTISTS.index(artist) diff --git a/maloja/db/convert.py b/maloja/db/convert.py index be9c42b..e69de29 100644 --- a/maloja/db/convert.py +++ b/maloja/db/convert.py @@ -1,40 +0,0 @@ -#### ATTENTION ALL ADVENTURERS -#### THIS IS WHAT A SCROBBLE DICT WILL LOOK LIKE FROM NOW ON -#### THIS IS THE SINGLE CANONICAL SOURCE OF TRUTH -#### STOP MAKING DIFFERENT LITTLE DICTS IN EVERY SINGLE FUNCTION -#### THIS IS THE SCHEMA THAT WILL DEFINITELY 100% STAY LIKE THIS AND NOT -#### RANDOMLY GET CHANGED TWO VERSIONS LATER -#### HERE WE GO -# -# { -# "time":int, -# "track":{ -# "artists":list, -# "title":string, -# "album":{ -# "name":string, -# "artists":list -# }, -# "length":None -# }, -# "duration":int, -# "origin":string -# } - - - -def scrobble_db_to_dict(resultrow): - return { - "time":resultrow.timestamp, - "track":track_db_to_dict(resultrow.track), - "duration":resultrow.duration, - "origin":resultrow.origin - } - -def track_db_to_dict(resultrow): - return { - "artists":[], - "title":resultrow.title, - "album":{}, - "length":resultrow.length - } diff --git a/maloja/db/sqldb.py b/maloja/db/sqldb.py index 2aa67af..26517ec 100644 --- a/maloja/db/sqldb.py +++ b/maloja/db/sqldb.py @@ -6,6 +6,8 @@ from ..globalconf import data_dir +##### DB Technical + DB = {} @@ -42,9 +44,76 @@ DB['trackartists'] = sql.Table( meta.create_all(engine) +##### DB <-> Dict translations + +## ATTENTION ALL ADVENTURERS +## this is what a scrobble dict will look like from now on +## this is the single canonical source of truth +## stop making different little dicts in every single function +## this is the schema that will definitely 100% stay like this and not +## randomly get changed two versions later +## here we go +# +# { +# "time":int, +# "track":{ +# "artists":list, +# "title":string, +# "album":{ +# "name":string, +# "artists":list +# }, +# "length":None +# }, +# "duration":int, +# "origin":string, +# "extra":{string-keyed mapping for all flags with the scrobble} +# } +def scrobble_db_to_dict(row): + + + return { + "time":row.timestamp, + "track":get_track(row.track_id), + "duration":row.duration, + "origin":row.origin + } + +def track_db_to_dict(row): + return { + "title":row.title, + "length":row.length + } + +def artist_db_to_dict(row): + return row.name + +def scrobble_dict_to_db(info): + return { + "rawscrobble":json.dumps(info), + "timestamp":info['time'], + "origin":info['origin'], + "duration":info['duration'], + "extra":info['extra'], + "track_id":get_track_id(info['track']) + } + +def track_dict_to_db(info): + return { + "title":info['title'], + "title_normalized":normalize_name(info['title']), + "length":info['length'] + } + +def artist_dict_to_db(info): + return { + "name": info, + "name_normalized":normalize_name(info) + } + @@ -60,7 +129,7 @@ def add_scrobbles(scrobbleslist): rawscrobble=json.dumps(s), timestamp=s['time'], origin=s['origin'], - duration=s['duration'] or -1, + duration=s['duration'], track_id=get_track_id(s['track']) ) for s in scrobbleslist ] @@ -180,20 +249,51 @@ def get_scrobbles_of_track(track,since,to): def get_scrobbles(since,to): - - artist_id = get_artist_id(artist) + print(since,to) with engine.begin() as conn: op = DB['scrobbles'].select().where( DB['scrobbles'].c.timestamp<=to, DB['scrobbles'].c.timestamp>=since, ) + print(str(op)) result = conn.execute(op).all() - print(result) + result = [scrobble_db_to_dict(row) for row in result] + return result + +def get_track(id): + with engine.begin() as conn: + op = DB['tracks'].select().where( + DB['tracks'].c.id==id + ) + result = conn.execute(op).all() + + trackinfo = result[0] + + + with engine.begin() as conn: + op = DB['trackartists'].select().where( + DB['trackartists'].c.track_id==id + ) + result = conn.execute(op).all() + + artists = [get_artist(row.artist_id) for row in result] + + result = track_db_to_dict(trackinfo) + result['artists'] = artists return result +def get_artist(id): + with engine.begin() as conn: + op = DB['artists'].select().where( + DB['artists'].c.id==id + ) + result = conn.execute(op).all() + + artistinfo = result[0] + return artist_db_to_dict(artistinfo) diff --git a/maloja/upgrade.py b/maloja/upgrade.py index b43b4f2..7029c88 100644 --- a/maloja/upgrade.py +++ b/maloja/upgrade.py @@ -27,6 +27,7 @@ def upgrade_db(callback_add_scrobbles): print(col['yellow']("Upgrading v2 Database to v3 Database. This could take a while...")) oldfolder = os.path.join(dir_settings['state'],"scrobbles") newfolder = os.path.join(dir_settings['state'],".oldscrobbles") + os.makedirs(newfolder,exist_ok=True) if os.path.exists(oldfolder): scrobblefiles = os.listdir(oldfolder) for sf in scrobblefiles: @@ -63,3 +64,4 @@ def upgrade_db(callback_add_scrobbles): }) callback_add_scrobbles(scrobblelist) os.rename(os.path.join(oldfolder,sf),os.path.join(newfolder,sf)) + print(col['yellow']("Done!")) From f88852ee6ab8d59d08d71b20061a30f6ff7e4ee5 Mon Sep 17 00:00:00 2001 From: krateng Date: Tue, 4 Jan 2022 08:08:38 +0100 Subject: [PATCH 007/182] We got the first working webpage! --- maloja/db/sqldb.py | 30 +++++++++++------------ maloja/web/jinja/partials/scrobbles.jinja | 2 +- maloja/web/jinja/scrobbles.jinja | 2 +- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/maloja/db/sqldb.py b/maloja/db/sqldb.py index 26517ec..1a88ab2 100644 --- a/maloja/db/sqldb.py +++ b/maloja/db/sqldb.py @@ -72,9 +72,12 @@ meta.create_all(engine) + +##### Conversions between DB and dicts +## These should only take the row info from their respective table and fill in +## other information by calling the respective id lookup functions + def scrobble_db_to_dict(row): - - return { "time":row.timestamp, "track":get_track(row.track_id), @@ -84,6 +87,7 @@ def scrobble_db_to_dict(row): def track_db_to_dict(row): return { + "artists":get_artists_of_track(row.id), "title":row.title, "length":row.length } @@ -229,7 +233,6 @@ def get_scrobbles_of_artist(artist,since,to): ) result = conn.execute(op).all() - print(result) return result @@ -244,19 +247,16 @@ def get_scrobbles_of_track(track,since,to): ) result = conn.execute(op).all() - print(result) return result def get_scrobbles(since,to): - print(since,to) with engine.begin() as conn: op = DB['scrobbles'].select().where( DB['scrobbles'].c.timestamp<=to, DB['scrobbles'].c.timestamp>=since, ) - print(str(op)) result = conn.execute(op).all() result = [scrobble_db_to_dict(row) for row in result] @@ -272,16 +272,7 @@ def get_track(id): trackinfo = result[0] - with engine.begin() as conn: - op = DB['trackartists'].select().where( - DB['trackartists'].c.track_id==id - ) - result = conn.execute(op).all() - - artists = [get_artist(row.artist_id) for row in result] - result = track_db_to_dict(trackinfo) - result['artists'] = artists return result @@ -295,6 +286,15 @@ def get_artist(id): artistinfo = result[0] return artist_db_to_dict(artistinfo) +def get_artists_of_track(track_id): + with engine.begin() as conn: + op = DB['trackartists'].select().where( + DB['trackartists'].c.track_id==track_id + ) + result = conn.execute(op).all() + + artists = [get_artist(row.artist_id) for row in result] + return artists diff --git a/maloja/web/jinja/partials/scrobbles.jinja b/maloja/web/jinja/partials/scrobbles.jinja index c3086dc..3c00b3c 100644 --- a/maloja/web/jinja/partials/scrobbles.jinja +++ b/maloja/web/jinja/partials/scrobbles.jinja @@ -11,7 +11,7 @@ {%- if loop.index0 >= firstindex and loop.index0 < lastindex -%} {{ malojatime.timestamp_desc(s["time"],short=shortTimeDesc) }} - {{ entityrow.row(s) }} + {{ entityrow.row(s.track) }} {%- endif -%} {% endfor %} diff --git a/maloja/web/jinja/scrobbles.jinja b/maloja/web/jinja/scrobbles.jinja index 062cb90..814cde6 100644 --- a/maloja/web/jinja/scrobbles.jinja +++ b/maloja/web/jinja/scrobbles.jinja @@ -12,7 +12,7 @@ {% elif filterkeys.get('artist') is not none %} {% set img = utilities.getArtistImage(filterkeys.artist,fast=True) %} {% elif scrobbles.__len__() > 0 %} - {% set img = utilities.getTrackImage(artists=scrobbles[0].artists,title=scrobbles[0].title,fast=True) %} + {% set img = utilities.getTrackImage(artists=scrobbles[0].track.artists,title=scrobbles[0].track.title,fast=True) %} {% else %} {% set img = "/favicon.png" %} {% endif %} From 03186bc49fd931cd337264219c377c0747564b1b Mon Sep 17 00:00:00 2001 From: krateng Date: Tue, 4 Jan 2022 20:45:15 +0100 Subject: [PATCH 008/182] More refactoring --- maloja/{database.py => database/__init__.py} | 16 ++++++++-------- maloja/{db => database}/sqldb.py | 0 maloja/db/convert.py | 0 maloja/proccontrol/control.py | 5 ++++- 4 files changed, 12 insertions(+), 9 deletions(-) rename maloja/{database.py => database/__init__.py} (98%) rename maloja/{db => database}/sqldb.py (100%) delete mode 100644 maloja/db/convert.py diff --git a/maloja/database.py b/maloja/database/__init__.py similarity index 98% rename from maloja/database.py rename to maloja/database/__init__.py index 5bcc712..0c4b786 100644 --- a/maloja/database.py +++ b/maloja/database/__init__.py @@ -2,14 +2,14 @@ from bottle import request, response, FormsDict, HTTPError # rest of the project -from .cleanup import CleanerAgent, CollectorAgent -from . import utilities -from .malojatime import register_scrobbletime, time_stamps, ranges -from .malojauri import uri_to_internal, internal_to_uri, compose_querystring -from .thirdparty import proxy_scrobble_all -from .globalconf import data_dir, malojaconfig, apikeystore +from ..cleanup import CleanerAgent, CollectorAgent +from .. import utilities +from ..malojatime import register_scrobbletime, time_stamps, ranges +from ..malojauri import uri_to_internal, internal_to_uri, compose_querystring +from ..thirdparty import proxy_scrobble_all +from ..globalconf import data_dir, malojaconfig, apikeystore #db -from .db import sqldb +from . import sqldb # doreah toolkit from doreah.logging import log @@ -544,7 +544,7 @@ def get_predefined_rulesets(): def start_db(): - from . import upgrade + from .. import upgrade upgrade.upgrade_db(sqldb.add_scrobbles) dbstatus['healthy'] = True dbstatus['complete'] = True diff --git a/maloja/db/sqldb.py b/maloja/database/sqldb.py similarity index 100% rename from maloja/db/sqldb.py rename to maloja/database/sqldb.py diff --git a/maloja/db/convert.py b/maloja/db/convert.py deleted file mode 100644 index e69de29..0000000 diff --git a/maloja/proccontrol/control.py b/maloja/proccontrol/control.py index dd0eff6..d68f4b3 100644 --- a/maloja/proccontrol/control.py +++ b/maloja/proccontrol/control.py @@ -124,8 +124,11 @@ def main(*args,**kwargs): else: try: action, *args = args - actions[action](*args,**kwargs) + action = actions[action] except (ValueError, KeyError): print("Valid commands: " + " ".join(a for a in actions)) + return + + return action(*args,**kwargs) return True From 2f7f4c856752af0c0cf167d8ec037425c07caa77 Mon Sep 17 00:00:00 2001 From: krateng Date: Tue, 4 Jan 2022 22:14:27 +0100 Subject: [PATCH 009/182] Implemented getting scrobbles by artist and track, more refactoring --- maloja/database/__init__.py | 166 ++---------------------------------- maloja/database/cache.py | 158 ++++++++++++++++++++++++++++++++++ maloja/database/sqldb.py | 7 +- 3 files changed, 169 insertions(+), 162 deletions(-) create mode 100644 maloja/database/cache.py diff --git a/maloja/database/__init__.py b/maloja/database/__init__.py index 0c4b786..dc7dd43 100644 --- a/maloja/database/__init__.py +++ b/maloja/database/__init__.py @@ -10,6 +10,7 @@ from ..thirdparty import proxy_scrobble_all from ..globalconf import data_dir, malojaconfig, apikeystore #db from . import sqldb +from .cache import db_query, db_aggregate # doreah toolkit from doreah.logging import log @@ -33,7 +34,6 @@ import unicodedata from collections import namedtuple from threading import Lock import yaml, json -import lru import math # url handling @@ -556,165 +556,6 @@ def start_db(): -### -## Caches in front of DB -## the volatile caches are intended mainly for excessive site navigation during one session -## the permanent caches are there to save data that is hard to calculate and never changes (old charts) -### - - - - -import copy - -if malojaconfig["USE_DB_CACHE"]: - def db_query(**kwargs): - return db_query_cached(**kwargs) - def db_aggregate(**kwargs): - return db_aggregate_cached(**kwargs) -else: - def db_query(**kwargs): - return db_query_full(**kwargs) - def db_aggregate(**kwargs): - return db_aggregate_full(**kwargs) - - -csz = malojaconfig["DB_CACHE_ENTRIES"] -cmp = malojaconfig["DB_MAX_MEMORY"] -try: - import psutil - use_psutil = True -except: - use_psutil = False - -cache_query = lru.LRU(csz) -cache_query_perm = lru.LRU(csz) -cache_aggregate = lru.LRU(csz) -cache_aggregate_perm = lru.LRU(csz) - -perm_caching = malojaconfig["CACHE_DATABASE_PERM"] -temp_caching = malojaconfig["CACHE_DATABASE_SHORT"] - -cachestats = { - "cache_query":{ - "hits_perm":0, - "hits_tmp":0, - "misses":0, - "objperm":cache_query_perm, - "objtmp":cache_query, - "name":"Query Cache" - }, - "cache_aggregate":{ - "hits_perm":0, - "hits_tmp":0, - "misses":0, - "objperm":cache_aggregate_perm, - "objtmp":cache_aggregate, - "name":"Aggregate Cache" - } -} - -from doreah.regular import runhourly - -@runhourly -def log_stats(): - logstr = "{name}: {hitsperm} Perm Hits, {hitstmp} Tmp Hits, {misses} Misses; Current Size: {sizeperm}/{sizetmp}" - for s in (cachestats["cache_query"],cachestats["cache_aggregate"]): - log(logstr.format(name=s["name"],hitsperm=s["hits_perm"],hitstmp=s["hits_tmp"],misses=s["misses"], - sizeperm=len(s["objperm"]),sizetmp=len(s["objtmp"])),module="debug") - -def db_query_cached(**kwargs): - global cache_query, cache_query_perm - key = utilities.serialize(kwargs) - - eligible_permanent_caching = ( - "timerange" in kwargs and - not kwargs["timerange"].active() and - perm_caching - ) - eligible_temporary_caching = ( - not eligible_permanent_caching and - temp_caching - ) - - # hit permanent cache for past timeranges - if eligible_permanent_caching and key in cache_query_perm: - cachestats["cache_query"]["hits_perm"] += 1 - return copy.copy(cache_query_perm.get(key)) - - # hit short term cache - elif eligible_temporary_caching and key in cache_query: - cachestats["cache_query"]["hits_tmp"] += 1 - return copy.copy(cache_query.get(key)) - - else: - cachestats["cache_query"]["misses"] += 1 - result = db_query_full(**kwargs) - if eligible_permanent_caching: cache_query_perm[key] = result - elif eligible_temporary_caching: cache_query[key] = result - - if use_psutil: - reduce_caches_if_low_ram() - - return result - - -def db_aggregate_cached(**kwargs): - global cache_aggregate, cache_aggregate_perm - key = utilities.serialize(kwargs) - - eligible_permanent_caching = ( - "timerange" in kwargs and - not kwargs["timerange"].active() and - perm_caching - ) - eligible_temporary_caching = ( - not eligible_permanent_caching and - temp_caching - ) - - # hit permanent cache for past timeranges - if eligible_permanent_caching and key in cache_aggregate_perm: - cachestats["cache_aggregate"]["hits_perm"] += 1 - return copy.copy(cache_aggregate_perm.get(key)) - - # hit short term cache - elif eligible_temporary_caching and key in cache_aggregate: - cachestats["cache_aggregate"]["hits_tmp"] += 1 - return copy.copy(cache_aggregate.get(key)) - - else: - cachestats["cache_aggregate"]["misses"] += 1 - result = db_aggregate_full(**kwargs) - if eligible_permanent_caching: cache_aggregate_perm[key] = result - elif eligible_temporary_caching: cache_aggregate[key] = result - - if use_psutil: - reduce_caches_if_low_ram() - - return result - -def invalidate_caches(): - global cache_query, cache_aggregate - cache_query.clear() - cache_aggregate.clear() - log("Database caches invalidated.") - -def reduce_caches(to=0.75): - global cache_query, cache_aggregate, cache_query_perm, cache_aggregate_perm - for c in cache_query, cache_aggregate, cache_query_perm, cache_aggregate_perm: - currentsize = len(c) - if currentsize > 100: - targetsize = max(int(currentsize * to),10) - c.set_size(targetsize) - c.set_size(csz) - -def reduce_caches_if_low_ram(): - ramprct = psutil.virtual_memory().percent - if ramprct > cmp: - log("{prct}% RAM usage, reducing caches!".format(prct=ramprct),module="debug") - ratio = (cmp / ramprct) ** 3 - reduce_caches(to=ratio) #### ## Database queries @@ -735,7 +576,10 @@ def db_query_full(artist=None,artists=None,title=None,track=None,timerange=None, #artist = None if artists is not None and title is not None: - return sqldb.get_scrobbles_of_track(track={"artists":artists,"title":title},since=since,to=to) + track = {'artists':artists,'title':title} + + if track is not None: + return sqldb.get_scrobbles_of_track(track=track,since=since,to=to) if artist is not None: return sqldb.get_scrobbles_of_artist(artist=artist,since=since,to=to) diff --git a/maloja/database/cache.py b/maloja/database/cache.py new file mode 100644 index 0000000..90a672b --- /dev/null +++ b/maloja/database/cache.py @@ -0,0 +1,158 @@ + +### +## Caches in front of DB +## the volatile caches are intended mainly for excessive site navigation during one session +## the permanent caches are there to save data that is hard to calculate and never changes (old charts) +### + +import psutil +import copy +import lru + +from doreah.logging import log + +from ..globalconf import malojaconfig +from .. import utilities +from .. import database as dbmain + +if malojaconfig["USE_DB_CACHE"]: + def db_query(**kwargs): + return db_query_cached(**kwargs) + def db_aggregate(**kwargs): + return db_aggregate_cached(**kwargs) +else: + def db_query(**kwargs): + return dbmain.db_query_full(**kwargs) + def db_aggregate(**kwargs): + return dbmain.db_aggregate_full(**kwargs) + + +csz = malojaconfig["DB_CACHE_ENTRIES"] +cmp = malojaconfig["DB_MAX_MEMORY"] + +cache_query = lru.LRU(csz) +cache_query_perm = lru.LRU(csz) +cache_aggregate = lru.LRU(csz) +cache_aggregate_perm = lru.LRU(csz) + +perm_caching = malojaconfig["CACHE_DATABASE_PERM"] +temp_caching = malojaconfig["CACHE_DATABASE_SHORT"] + +cachestats = { + "cache_query":{ + "hits_perm":0, + "hits_tmp":0, + "misses":0, + "objperm":cache_query_perm, + "objtmp":cache_query, + "name":"Query Cache" + }, + "cache_aggregate":{ + "hits_perm":0, + "hits_tmp":0, + "misses":0, + "objperm":cache_aggregate_perm, + "objtmp":cache_aggregate, + "name":"Aggregate Cache" + } +} + +from doreah.regular import runhourly + +@runhourly +def log_stats(): + logstr = "{name}: {hitsperm} Perm Hits, {hitstmp} Tmp Hits, {misses} Misses; Current Size: {sizeperm}/{sizetmp}" + for s in (cachestats["cache_query"],cachestats["cache_aggregate"]): + log(logstr.format(name=s["name"],hitsperm=s["hits_perm"],hitstmp=s["hits_tmp"],misses=s["misses"], + sizeperm=len(s["objperm"]),sizetmp=len(s["objtmp"])),module="debug") + +def db_query_cached(**kwargs): + global cache_query, cache_query_perm + key = utilities.serialize(kwargs) + + eligible_permanent_caching = ( + "timerange" in kwargs and + not kwargs["timerange"].active() and + perm_caching + ) + eligible_temporary_caching = ( + not eligible_permanent_caching and + temp_caching + ) + + # hit permanent cache for past timeranges + if eligible_permanent_caching and key in cache_query_perm: + cachestats["cache_query"]["hits_perm"] += 1 + return copy.copy(cache_query_perm.get(key)) + + # hit short term cache + elif eligible_temporary_caching and key in cache_query: + cachestats["cache_query"]["hits_tmp"] += 1 + return copy.copy(cache_query.get(key)) + + else: + cachestats["cache_query"]["misses"] += 1 + result = dbmain.db_query_full(**kwargs) + if eligible_permanent_caching: cache_query_perm[key] = result + elif eligible_temporary_caching: cache_query[key] = result + + reduce_caches_if_low_ram() + + return result + + +def db_aggregate_cached(**kwargs): + global cache_aggregate, cache_aggregate_perm + key = utilities.serialize(kwargs) + + eligible_permanent_caching = ( + "timerange" in kwargs and + not kwargs["timerange"].active() and + perm_caching + ) + eligible_temporary_caching = ( + not eligible_permanent_caching and + temp_caching + ) + + # hit permanent cache for past timeranges + if eligible_permanent_caching and key in cache_aggregate_perm: + cachestats["cache_aggregate"]["hits_perm"] += 1 + return copy.copy(cache_aggregate_perm.get(key)) + + # hit short term cache + elif eligible_temporary_caching and key in cache_aggregate: + cachestats["cache_aggregate"]["hits_tmp"] += 1 + return copy.copy(cache_aggregate.get(key)) + + else: + cachestats["cache_aggregate"]["misses"] += 1 + result = dbmain.db_aggregate_full(**kwargs) + if eligible_permanent_caching: cache_aggregate_perm[key] = result + elif eligible_temporary_caching: cache_aggregate[key] = result + + reduce_caches_if_low_ram() + + return result + +def invalidate_caches(): + global cache_query, cache_aggregate + cache_query.clear() + cache_aggregate.clear() + log("Database caches invalidated.") + +def reduce_caches(to=0.75): + global cache_query, cache_aggregate, cache_query_perm, cache_aggregate_perm + for c in cache_query, cache_aggregate, cache_query_perm, cache_aggregate_perm: + currentsize = len(c) + if currentsize > 100: + targetsize = max(int(currentsize * to),10) + c.set_size(targetsize) + c.set_size(csz) + +def reduce_caches_if_low_ram(): + ramprct = psutil.virtual_memory().percent + if ramprct > cmp: + log("{prct}% RAM usage, reducing caches!".format(prct=ramprct),module="debug") + ratio = (cmp / ramprct) ** 3 + reduce_caches(to=ratio) diff --git a/maloja/database/sqldb.py b/maloja/database/sqldb.py index 1a88ab2..708a3d0 100644 --- a/maloja/database/sqldb.py +++ b/maloja/database/sqldb.py @@ -226,13 +226,16 @@ def get_scrobbles_of_artist(artist,since,to): artist_id = get_artist_id(artist) + jointable = sql.join(DB['scrobbles'],DB['trackartists'],DB['scrobbles'].c.track_id == DB['trackartists'].c.track_id) with engine.begin() as conn: - op = DB['scrobbles'].select().where( + op = jointable.select().where( DB['scrobbles'].c.timestamp<=to, DB['scrobbles'].c.timestamp>=since, + DB['trackartists'].c.artist_id==artist_id ) result = conn.execute(op).all() + result = [scrobble_db_to_dict(row) for row in result] return result @@ -244,9 +247,11 @@ def get_scrobbles_of_track(track,since,to): op = DB['scrobbles'].select().where( DB['scrobbles'].c.timestamp<=to, DB['scrobbles'].c.timestamp>=since, + DB['scrobbles'].c.track_id==track_id ) result = conn.execute(op).all() + result = [scrobble_db_to_dict(row) for row in result] return result From 9fc838e4c80a7c90de88527d604ba4f63d92652c Mon Sep 17 00:00:00 2001 From: krateng Date: Wed, 5 Jan 2022 04:58:58 +0100 Subject: [PATCH 010/182] Implemented aggregating by track --- maloja/__pkginfo__.py | 2 +- maloja/database/__init__.py | 33 ++++++++++++--------------------- maloja/database/sqldb.py | 12 ++++++------ 3 files changed, 19 insertions(+), 28 deletions(-) diff --git a/maloja/__pkginfo__.py b/maloja/__pkginfo__.py index 9bfb1d5..df8e0a9 100644 --- a/maloja/__pkginfo__.py +++ b/maloja/__pkginfo__.py @@ -4,7 +4,7 @@ # you know what f*ck it # this is hardcoded for now because of that damn project / package name discrepancy # i'll fix it one day -VERSION = "2.14.6" +VERSION = "3.0.0-dev" HOMEPAGE = "https://github.com/krateng/maloja" diff --git a/maloja/database/__init__.py b/maloja/database/__init__.py index dc7dd43..541a005 100644 --- a/maloja/database/__init__.py +++ b/maloja/database/__init__.py @@ -569,13 +569,8 @@ def db_query_full(artist=None,artists=None,title=None,track=None,timerange=None, if not dbstatus['healthy']: raise DatabaseNotBuilt() (since, to) = time_stamps(range=timerange) - # this is not meant as a search function. we *can* query the db with a string, but it only works if it matches exactly - # if a title is specified, we assume that a specific track (with the exact artist combination) is requested - # if not, duplicate artist arguments are ignored - - #artist = None - if artists is not None and title is not None: + print(col['red']("THIS SHOULD NO LONGER HAPPEN")) track = {'artists':artists,'title':title} if track is not None: @@ -589,23 +584,16 @@ def db_query_full(artist=None,artists=None,title=None,track=None,timerange=None, # Queries that... well... aggregate -def db_aggregate_full(by=None,since=None,to=None,within=None,timerange=None,artist=None): +def db_aggregate_full(by=None,within=None,timerange=None,artist=None): if not dbstatus['healthy']: raise DatabaseNotBuilt() (since, to) = time_stamps(range=timerange) - if isinstance(artist, str): - artist = ARTISTS.index(artist) if (by=="ARTIST"): - #this is probably a really bad idea - #for a in ARTISTS: - # num = len(db_query(artist=a,since=since,to=to)) - # - # alright let's try for real charts = {} - #for s in [scr for scr in SCROBBLES if since < scr[1] < to]: + scrobbles = sqldb.get_scrobbles(since=since,to=to) for s in scrobbles_in_range(since,to): artists = TRACKS[s[0]][0] for a in coa.getCreditedList(artists): @@ -624,13 +612,16 @@ def db_aggregate_full(by=None,since=None,to=None,within=None,timerange=None,arti elif (by=="TRACK"): charts = {} - #for s in [scr for scr in SCROBBLES if since < scr[1] < to and (artist==None or (artist in TRACKS[scr[0]][0]))]: - for s in [scr for scr in scrobbles_in_range(since,to) if (artist is None or (artist in TRACKS[scr[0]][0]))]: - track = s[0] - # this either creates the new entry or increments the existing one - charts[track] = charts.setdefault(track,0) + 1 + if artist is None: + scrobbles = sqldb.get_scrobbles(since=since,to=to,resolve_references=False) + else: + scrobbles = sqldb.get_scrobbles_of_artist(since=since,to=to,artist=artist,resolve_references=False) - ls = [{"track":get_track_dict(TRACKS[t]),"scrobbles":charts[t]} for t in charts] + for s in scrobbles: + charts[s['track']] = charts.setdefault(s['track'],0) + 1 + + + ls = [{"track":sqldb.get_track(t),"scrobbles":charts[t]} for t in charts] ls.sort(key=lambda k:k["scrobbles"],reverse=True) # add ranks for rnk in range(len(ls)): diff --git a/maloja/database/sqldb.py b/maloja/database/sqldb.py index 708a3d0..59d6ef4 100644 --- a/maloja/database/sqldb.py +++ b/maloja/database/sqldb.py @@ -77,10 +77,10 @@ meta.create_all(engine) ## These should only take the row info from their respective table and fill in ## other information by calling the respective id lookup functions -def scrobble_db_to_dict(row): +def scrobble_db_to_dict(row,resolve_references=True): return { "time":row.timestamp, - "track":get_track(row.track_id), + "track":get_track(row.track_id) if resolve_references else row.track_id, "duration":row.duration, "origin":row.origin } @@ -222,7 +222,7 @@ def get_artist_id(artistname): return result.inserted_primary_key[0] -def get_scrobbles_of_artist(artist,since,to): +def get_scrobbles_of_artist(artist,since,to,resolve_references=True): artist_id = get_artist_id(artist) @@ -235,7 +235,7 @@ def get_scrobbles_of_artist(artist,since,to): ) result = conn.execute(op).all() - result = [scrobble_db_to_dict(row) for row in result] + result = [scrobble_db_to_dict(row,resolve_references=resolve_references) for row in result] return result @@ -255,7 +255,7 @@ def get_scrobbles_of_track(track,since,to): return result -def get_scrobbles(since,to): +def get_scrobbles(since,to,resolve_references=True): with engine.begin() as conn: op = DB['scrobbles'].select().where( @@ -264,7 +264,7 @@ def get_scrobbles(since,to): ) result = conn.execute(op).all() - result = [scrobble_db_to_dict(row) for row in result] + result = [scrobble_db_to_dict(row,resolve_references=resolve_references) for row in result] return result def get_track(id): From 40e733a0542011654f535ef8d4b06acf155b99cf Mon Sep 17 00:00:00 2001 From: krateng Date: Wed, 5 Jan 2022 08:16:30 +0100 Subject: [PATCH 011/182] Implemented aggregating by artist --- maloja/database/__init__.py | 21 +++++++++++++-------- maloja/database/cache.py | 2 +- maloja/database/sqldb.py | 4 ++-- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/maloja/database/__init__.py b/maloja/database/__init__.py index 541a005..533af03 100644 --- a/maloja/database/__init__.py +++ b/maloja/database/__init__.py @@ -584,7 +584,7 @@ def db_query_full(artist=None,artists=None,title=None,track=None,timerange=None, # Queries that... well... aggregate -def db_aggregate_full(by=None,within=None,timerange=None,artist=None): +def db_aggregate_full(by=None,timerange=None,artist=None): if not dbstatus['healthy']: raise DatabaseNotBuilt() (since, to) = time_stamps(range=timerange) @@ -592,15 +592,20 @@ def db_aggregate_full(by=None,within=None,timerange=None,artist=None): if (by=="ARTIST"): + trackcharts = {} charts = {} - scrobbles = sqldb.get_scrobbles(since=since,to=to) - for s in scrobbles_in_range(since,to): - artists = TRACKS[s[0]][0] - for a in coa.getCreditedList(artists): - # this either creates the new entry or increments the existing one - charts[a] = charts.setdefault(a,0) + 1 + scrobbles = sqldb.get_scrobbles(since=since,to=to,resolve_references=False) - ls = [{"artist":get_artist_dict(ARTISTS[a]),"scrobbles":charts[a],"counting":[arti for arti in coa.getAllAssociated(ARTISTS[a]) if arti in ARTISTS]} for a in charts] + for s in scrobbles: + trackcharts[s['track']] = trackcharts.setdefault(s['track'],0) + 1 + + for t in trackcharts: + artists = sqldb.get_artists_of_track(t,resolve_references=False) + for a in coa.getCreditedList(artists): + charts[a] = charts.setdefault(a,0) + trackcharts[t] + + + ls = [{"artist":sqldb.get_artist(a),"scrobbles":charts[a],"counting":[]} for a in charts] ls.sort(key=lambda k:k["scrobbles"],reverse=True) # add ranks for rnk in range(len(ls)): diff --git a/maloja/database/cache.py b/maloja/database/cache.py index 90a672b..a4c2ee3 100644 --- a/maloja/database/cache.py +++ b/maloja/database/cache.py @@ -15,7 +15,7 @@ from ..globalconf import malojaconfig from .. import utilities from .. import database as dbmain -if malojaconfig["USE_DB_CACHE"]: +if False: def db_query(**kwargs): return db_query_cached(**kwargs) def db_aggregate(**kwargs): diff --git a/maloja/database/sqldb.py b/maloja/database/sqldb.py index 59d6ef4..104ef2b 100644 --- a/maloja/database/sqldb.py +++ b/maloja/database/sqldb.py @@ -291,14 +291,14 @@ def get_artist(id): artistinfo = result[0] return artist_db_to_dict(artistinfo) -def get_artists_of_track(track_id): +def get_artists_of_track(track_id,resolve_references=True): with engine.begin() as conn: op = DB['trackartists'].select().where( DB['trackartists'].c.track_id==track_id ) result = conn.execute(op).all() - artists = [get_artist(row.artist_id) for row in result] + artists = [get_artist(row.artist_id) if resolve_references else row.artist_id for row in result] return artists From 80acf6275f478d1c6d2c59d565ee668dd85ff009 Mon Sep 17 00:00:00 2001 From: krateng Date: Thu, 6 Jan 2022 05:19:56 +0100 Subject: [PATCH 012/182] Moved API key checking to proper module --- maloja/apis/_apikeys.py | 20 +++++++++++++ maloja/apis/audioscrobbler.py | 7 +++-- maloja/apis/audioscrobbler_legacy.py | 3 +- maloja/apis/listenbrainz.py | 5 ++-- maloja/apis/native_v1.py | 4 +++ maloja/database/__init__.py | 45 ---------------------------- 6 files changed, 33 insertions(+), 51 deletions(-) create mode 100644 maloja/apis/_apikeys.py diff --git a/maloja/apis/_apikeys.py b/maloja/apis/_apikeys.py new file mode 100644 index 0000000..cdd153e --- /dev/null +++ b/maloja/apis/_apikeys.py @@ -0,0 +1,20 @@ +from ..globalconf import apikeystore + +# skip regular authentication if api key is present in request +# an api key now ONLY permits scrobbling tracks, no other admin tasks +def api_key_correct(request): + args = request.params + try: + args.update(request.json) + except: + pass + if "key" in args: + apikey = args.pop("key") + elif "apikey" in args: + apikey = args.pop("apikey") + else: return False + return checkAPIkey(apikey) +def checkAPIkey(key): + return apikeystore.check_key(key) +def allAPIkeys(): + return [apikeystore[k] for k in apikeystore] diff --git a/maloja/apis/audioscrobbler.py b/maloja/apis/audioscrobbler.py index 560c966..49d702a 100644 --- a/maloja/apis/audioscrobbler.py +++ b/maloja/apis/audioscrobbler.py @@ -1,6 +1,7 @@ from ._base import APIHandler from ._exceptions import * from .. import database +from ._apikeys import checkAPIkey, allAPIkeys class Audioscrobbler(APIHandler): __apiname__ = "Audioscrobbler" @@ -36,14 +37,14 @@ class Audioscrobbler(APIHandler): password = keys.get("password") # either username and password if user is not None and password is not None: - if password in database.allAPIkeys(): + if checkAPIkey(password): sessionkey = generate_key(self.mobile_sessions) return 200,{"session":{"key":sessionkey}} else: raise InvalidAuthException() # or username and token (deprecated by lastfm) elif user is not None and token is not None: - for key in database.allAPIkeys(): + for key in allAPIkeys(): if md5(user + md5(key)) == token: sessionkey = generate_key(self.mobile_sessions) return 200,{"session":{"key":sessionkey}} @@ -89,6 +90,6 @@ def generate_key(ls): random.choice( list(range(10)) + list("abcdefghijklmnopqrstuvwxyz") + list("ABCDEFGHIJKLMNOPQRSTUVWXYZ"))) for _ in range(64)) - + ls.append(key) return key diff --git a/maloja/apis/audioscrobbler_legacy.py b/maloja/apis/audioscrobbler_legacy.py index 09cd7bb..54ccc36 100644 --- a/maloja/apis/audioscrobbler_legacy.py +++ b/maloja/apis/audioscrobbler_legacy.py @@ -1,6 +1,7 @@ from ._base import APIHandler from ._exceptions import * from .. import database +from ._apikeys import checkAPIkey, allAPIkeys from bottle import request @@ -41,7 +42,7 @@ class AudioscrobblerLegacy(APIHandler): protocol = 'http' if (keys.get("u") == 'nossl') else request.urlparts.scheme if auth is not None: - for key in database.allAPIkeys(): + for key in allAPIkeys(): if check_token(auth, key, timestamp): sessionkey = generate_key(self.mobile_sessions) return 200, ( diff --git a/maloja/apis/listenbrainz.py b/maloja/apis/listenbrainz.py index a254ffa..7447bb9 100644 --- a/maloja/apis/listenbrainz.py +++ b/maloja/apis/listenbrainz.py @@ -2,6 +2,7 @@ from ._base import APIHandler from ._exceptions import * from .. import database import datetime +from ._apikeys import checkAPIkey from ..globalconf import malojaconfig @@ -36,7 +37,7 @@ class Listenbrainz(APIHandler): except: raise BadAuthException() - if token not in database.allAPIkeys(): + if not checkAPIkey(token): raise InvalidAuthException() try: @@ -69,7 +70,7 @@ class Listenbrainz(APIHandler): token = self.get_token_from_request_keys(keys) except: raise BadAuthException() - if token not in database.allAPIkeys(): + if not checkAPIkey(token): raise InvalidAuthException() else: return 200,{"code":200,"message":"Token valid.","valid":True,"user_name":malojaconfig["NAME"]} diff --git a/maloja/apis/native_v1.py b/maloja/apis/native_v1.py index 672d4f3..1515993 100644 --- a/maloja/apis/native_v1.py +++ b/maloja/apis/native_v1.py @@ -3,6 +3,7 @@ from ..globalconf import malojaconfig, apikeystore from ..__pkginfo__ import VERSION from ..malojauri import uri_to_internal from .. import utilities +from ._apikeys import api_key_correct from bottle import response, static_file @@ -15,6 +16,9 @@ api = API(delay=True) api.__apipath__ = "mlj_1" + + + @api.get("test") def test_server(key=None): """Pings the server. If an API key is supplied, the server will respond with 200 diff --git a/maloja/database/__init__.py b/maloja/database/__init__.py index 533af03..d0975db 100644 --- a/maloja/database/__init__.py +++ b/maloja/database/__init__.py @@ -42,7 +42,6 @@ import urllib -dblock = Lock() #global database lock dbstatus = { "healthy":False, "rebuildinprogress":False, @@ -69,34 +68,6 @@ cla = CleanerAgent() coa = CollectorAgent() -def checkAPIkey(key): - return apikeystore.check_key(key) - -def allAPIkeys(): - return [apikeystore[k] for k in apikeystore] - - -#### -## Getting dict representations of database objects -#### - -def get_scrobble_dict(o): - track = get_track_dict(TRACKS[o.track]) - return {"artists":track["artists"],"title":track["title"],"time":o.timestamp,"album":o.album,"duration":o.duration} - -def get_artist_dict(o): - return o - #technically not a dict, but... you know - -def get_track_dict(o): - artists = [get_artist_dict(ARTISTS[a]) for a in o.artists] - return {"artists":artists,"title":o.title} - - -#### -## Creating or finding existing database entries -#### - def createScrobble(artists,title,time,album=None,duration=None,volatile=False): @@ -138,23 +109,7 @@ def createScrobble(artists,title,time,album=None,duration=None,volatile=False): ######## ######## -# skip regular authentication if api key is present in request -# an api key now ONLY permits scrobbling tracks, no other admin tasks -def api_key_correct(request): - args = request.params - try: - args.update(request.json) - except: - pass - if "key" in args: - apikey = args["key"] - del args["key"] - elif "apikey" in args: - apikey = args["apikey"] - del args["apikey"] - else: return False - return checkAPIkey(apikey) From 44a124e6ec7c522cbbb192134c4ca6184c4985df Mon Sep 17 00:00:00 2001 From: krateng Date: Thu, 6 Jan 2022 09:28:34 +0100 Subject: [PATCH 013/182] More experimenting with database architecture --- maloja/database/__init__.py | 81 +++++++-------- maloja/database/sqldb.py | 196 +++++++++++++++++++++++++++++------- maloja/malojatime.py | 3 + 3 files changed, 196 insertions(+), 84 deletions(-) diff --git a/maloja/database/__init__.py b/maloja/database/__init__.py index d0975db..270b314 100644 --- a/maloja/database/__init__.py +++ b/maloja/database/__init__.py @@ -56,6 +56,13 @@ class DatabaseNotBuilt(HTTPError): ) +def waitfordb(func): + def newfunc(*args,**kwargs): + if not dbstatus['healthy']: raise DatabaseNotBuilt() + return func(*args,**kwargs) + return newfunc + + MEDALS_ARTISTS = {} #literally only changes once per year, no need to calculate that on the fly MEDALS_TRACKS = {} @@ -103,53 +110,33 @@ def createScrobble(artists,title,time,album=None,duration=None,volatile=False): -######## -######## -## HTTP requests and their associated functions -######## -######## - - - - - - - - +@waitfordb def get_scrobbles(**keys): - r = db_query(**{k:keys[k] for k in keys if k in ["artist","artists","title","since","to","within","timerange","associated","track"]}) - return r - - -def info(): - totalscrobbles = get_scrobbles_num() - artists = {} - - return { - "name":malojaconfig["NAME"], - "artists":{ - chartentry["artist"]:round(chartentry["scrobbles"] * 100 / totalscrobbles,3) - for chartentry in get_charts_artists() if chartentry["scrobbles"]/totalscrobbles >= 0 - }, - "known_servers":list(KNOWN_SERVERS) - } - - + (since,to) = keys.get('timerange').timestamps() + if 'artist' in keys: + result = sqldb.get_scrobbles_of_artist(artist=keys['artist'],since=since,to=to) + elif 'track' in keys: + result = sqldb.get_scrobbles_of_track(track=keys['track'],since=since,to=to) + else: + result = sqldb.get_scrobbles(since=since,to=to) + #return result[keys['page']*keys['perpage']:(keys['page']+1)*keys['perpage']] + return result +@waitfordb def get_scrobbles_num(**keys): - r = db_query(**{k:keys[k] for k in keys if k in ["artist","track","artists","title","since","to","within","timerange","associated"]}) - return len(r) + return len(get_scrobbles(**keys)) +@waitfordb def get_tracks(artist=None): + if artist is None: + result = sqldb.get_tracks() + else: + result = sqldb.get_tracks_of_artist(artist) + return result - artistid = ARTISTS.index(artist) if artist is not None else None - return [get_track_dict(t) for t in TRACKS if (artistid in t.artists) or (artistid==None)] - - - +@waitfordb def get_artists(): - if not dbstatus['healthy']: raise DatabaseNotBuilt() - return ARTISTS #well + return sqldb.get_artists() @@ -504,6 +491,9 @@ def start_db(): dbstatus['healthy'] = True dbstatus['complete'] = True + firstscrobble = sqldb.get_scrobbles(max=1)[0] + register_scrobbletime(firstscrobble['time']) + @@ -520,7 +510,7 @@ def start_db(): # Queries the database def db_query_full(artist=None,artists=None,title=None,track=None,timerange=None,associated=False,max_=None): - print((artist,artists,title,track,timerange)) + if not dbstatus['healthy']: raise DatabaseNotBuilt() (since, to) = time_stamps(range=timerange) @@ -529,12 +519,12 @@ def db_query_full(artist=None,artists=None,title=None,track=None,timerange=None, track = {'artists':artists,'title':title} if track is not None: - return sqldb.get_scrobbles_of_track(track=track,since=since,to=to) + return list(reversed(sqldb.get_scrobbles_of_track(track=track,since=since,to=to))) if artist is not None: - return sqldb.get_scrobbles_of_artist(artist=artist,since=since,to=to) + return list(reversed(sqldb.get_scrobbles_of_artist(artist=artist,since=since,to=to))) - return sqldb.get_scrobbles(since=since,to=to) + return list(reversed(sqldb.get_scrobbles(since=since,to=to))) @@ -592,8 +582,7 @@ def db_aggregate_full(by=None,timerange=None,artist=None): return ls else: - #return len([scr for scr in SCROBBLES if since < scr[1] < to]) - return len(list(scrobbles_in_range(since,to))) + return len(sqldb.get_scrobbles(since=since,to=to,resolve_references=False)) # Search for strings diff --git a/maloja/database/sqldb.py b/maloja/database/sqldb.py index 104ef2b..3293e1f 100644 --- a/maloja/database/sqldb.py +++ b/maloja/database/sqldb.py @@ -1,6 +1,8 @@ import sqlalchemy as sql import json import unicodedata +import math +from datetime import datetime from ..globalconf import data_dir @@ -74,27 +76,69 @@ meta.create_all(engine) ##### Conversions between DB and dicts -## These should only take the row info from their respective table and fill in -## other information by calling the respective id lookup functions -def scrobble_db_to_dict(row,resolve_references=True): - return { - "time":row.timestamp, - "track":get_track(row.track_id) if resolve_references else row.track_id, - "duration":row.duration, - "origin":row.origin - } +# These should work on whole lists and collect all the references, +# then look them up once and fill them in -def track_db_to_dict(row): - return { - "artists":get_artists_of_track(row.id), - "title":row.title, - "length":row.length - } + +### DB -> DICT + +#def scrobble_db_to_dict(row,resolve_references=True): +# return { +# "time":row.timestamp, +# "track":get_track(row.track_id) if resolve_references else row.track_id, +# "duration":row.duration, +# "origin":row.origin +# } + + +def scrobbles_db_to_dict(rows): + #track_ids = set(row.track_id for row in rows) + #tracks = { + # track_id:get_track(track_id) for track_id in track_ids + #} + tracks = get_tracks_map(set(row.track_id for row in rows)) + return [ + { + "time":row.timestamp, + "track":tracks[row.track_id], + "duration":row.duration, + "origin":row.origin + } + for row in rows + ] + +#def track_db_to_dict(row): +# return { +# "artists":get_artists_of_track(row.id), +# "title":row.title, +# "length":row.length +# } + +def tracks_db_to_dict(rows): + artists = get_artists_of_tracks(set(row.id for row in rows)) + return [ + { + "artists":artists[row.id], + "title":row.title, + "length":row.length + } + for row in rows + ] def artist_db_to_dict(row): return row.name +def artists_db_to_dict(rows): + return [ + row.name + for row in rows + ] + + +### DICT -> DB + + def scrobble_dict_to_db(info): return { "rawscrobble":json.dumps(info), @@ -122,6 +166,8 @@ def artist_dict_to_db(info): +##### Actual Database interactions + def add_scrobble(scrobbledict): add_scrobbles([scrobbledict]) @@ -130,11 +176,7 @@ def add_scrobbles(scrobbleslist): ops = [ DB['scrobbles'].insert().values( - rawscrobble=json.dumps(s), - timestamp=s['time'], - origin=s['origin'], - duration=s['duration'], - track_id=get_track_id(s['track']) + **scrobble_dict_to_db(s) ) for s in scrobbleslist ] @@ -146,8 +188,7 @@ def add_scrobbles(scrobbleslist): pass -### DB interface functions - these will 'get' the ID of an entity, -### creating it if necessary +### these will 'get' the ID of an entity, creating it if necessary def get_track_id(trackdict): @@ -222,7 +263,16 @@ def get_artist_id(artistname): return result.inserted_primary_key[0] -def get_scrobbles_of_artist(artist,since,to,resolve_references=True): + + + +### Functions that get rows according to parameters + + +def get_scrobbles_of_artist(artist,since=None,to=None,resolve_references=True): + + if since is None: since=0 + if to is None: to=now() artist_id = get_artist_id(artist) @@ -235,11 +285,15 @@ def get_scrobbles_of_artist(artist,since,to,resolve_references=True): ) result = conn.execute(op).all() - result = [scrobble_db_to_dict(row,resolve_references=resolve_references) for row in result] + result = scrobbles_db_to_dict(result) + #result = [scrobble_db_to_dict(row,resolve_references=resolve_references) for row in result] return result -def get_scrobbles_of_track(track,since,to): +def get_scrobbles_of_track(track,since=None,to=None): + + if since is None: since=0 + if to is None: to=now() track_id = get_track_id(track) @@ -251,11 +305,15 @@ def get_scrobbles_of_track(track,since,to): ) result = conn.execute(op).all() - result = [scrobble_db_to_dict(row) for row in result] + result = scrobbles_db_to_dict(result) + #result = [scrobble_db_to_dict(row) for row in result] return result -def get_scrobbles(since,to,resolve_references=True): +def get_scrobbles(since=None,to=None,resolve_references=True,max=math.inf): + + if since is None: since=0 + if to is None: to=now() with engine.begin() as conn: op = DB['scrobbles'].select().where( @@ -264,9 +322,77 @@ def get_scrobbles(since,to,resolve_references=True): ) result = conn.execute(op).all() - result = [scrobble_db_to_dict(row,resolve_references=resolve_references) for row in result] + result = scrobbles_db_to_dict(result) + #result = [scrobble_db_to_dict(row,resolve_references=resolve_references) for i,row in enumerate(result) if i datetime.utcnow().timestamp()) From 8a96a2c144a52442a81bf061148742e792a0583e Mon Sep 17 00:00:00 2001 From: krateng Date: Thu, 6 Jan 2022 20:07:55 +0100 Subject: [PATCH 014/182] Reorganized sql module, implemented artist charts --- maloja/database/__init__.py | 6 +- maloja/database/sqldb.py | 162 +++++++++++++++++++++++------------- 2 files changed, 107 insertions(+), 61 deletions(-) diff --git a/maloja/database/__init__.py b/maloja/database/__init__.py index 270b314..49ff0e8 100644 --- a/maloja/database/__init__.py +++ b/maloja/database/__init__.py @@ -139,9 +139,11 @@ def get_artists(): return sqldb.get_artists() - +@waitfordb def get_charts_artists(**keys): - return db_aggregate(by="ARTIST",**{k:keys[k] for k in keys if k in ["since","to","within","timerange"]}) + (since,to) = keys.get('timerange').timestamps() + result = sqldb.count_scrobbles_by_artist(since=since,to=to) + return result def get_charts_tracks(**keys): diff --git a/maloja/database/sqldb.py b/maloja/database/sqldb.py index 3293e1f..e22c678 100644 --- a/maloja/database/sqldb.py +++ b/maloja/database/sqldb.py @@ -83,20 +83,7 @@ meta.create_all(engine) ### DB -> DICT -#def scrobble_db_to_dict(row,resolve_references=True): -# return { -# "time":row.timestamp, -# "track":get_track(row.track_id) if resolve_references else row.track_id, -# "duration":row.duration, -# "origin":row.origin -# } - - def scrobbles_db_to_dict(rows): - #track_ids = set(row.track_id for row in rows) - #tracks = { - # track_id:get_track(track_id) for track_id in track_ids - #} tracks = get_tracks_map(set(row.track_id for row in rows)) return [ { @@ -107,13 +94,9 @@ def scrobbles_db_to_dict(rows): } for row in rows ] +def scrobble_db_to_dict(row): + return scrobbles_db_to_dict([row])[0] -#def track_db_to_dict(row): -# return { -# "artists":get_artists_of_track(row.id), -# "title":row.title, -# "length":row.length -# } def tracks_db_to_dict(rows): artists = get_artists_of_tracks(set(row.id for row in rows)) @@ -125,20 +108,23 @@ def tracks_db_to_dict(rows): } for row in rows ] +def track_db_to_dict(row): + return tracks_db_to_dict([row])[0] -def artist_db_to_dict(row): - return row.name def artists_db_to_dict(rows): return [ row.name for row in rows ] +def artist_db_to_dict(row): + return artists_db_to_dict([row])[0] + + ### DICT -> DB - - +# TODO def scrobble_dict_to_db(info): return { "rawscrobble":json.dumps(info), @@ -337,38 +323,6 @@ def get_artists_of_track(track_id,resolve_references=True): artists = [get_artist(row.artist_id) if resolve_references else row.artist_id for row in result] return artists -def get_artists_of_tracks(track_ids): - with engine.begin() as conn: - op = sql.join(DB['trackartists'],DB['artists']).select().where( - DB['trackartists'].c.track_id.in_(track_ids) - ) - result = conn.execute(op).all() - - artists = {} - for row in result: - artists.setdefault(row.track_id,[]).append(artist_db_to_dict(row)) - return artists - -def get_tracks_map(track_ids): - with engine.begin() as conn: - op = DB['tracks'].select().where( - DB['tracks'].c.id.in_(track_ids) - ) - result = conn.execute(op).all() - - tracks = {} - trackids = [row.id for row in result] - trackdicts = tracks_db_to_dict(result) - for i in range(len(trackids)): - tracks[trackids[i]] = trackdicts[i] - return tracks - -def get_tracks(): - with engine.begin() as conn: - op = DB['tracks'].select() - result = conn.execute(op).all() - - return tracks_db_to_dict(result) def get_tracks_of_artist(artist): @@ -389,6 +343,84 @@ def get_artists(): return artists_db_to_dict(result) +def get_tracks(): + with engine.begin() as conn: + op = DB['tracks'].select() + result = conn.execute(op).all() + + return tracks_db_to_dict(result) + +### functions that count rows for parameters + +def count_scrobbles_by_artist(since,to): + jointable = sql.join(DB['scrobbles'],DB['trackartists'],DB['scrobbles'].c.track_id == DB['trackartists'].c.track_id) + with engine.begin() as conn: + op = sql.select( + sql.func.count(DB['scrobbles'].c.timestamp).label('count'), + DB['trackartists'].c.artist_id + ).select_from(jointable).where( + DB['scrobbles'].c.timestamp<=to, + DB['scrobbles'].c.timestamp>=since + ).group_by(DB['trackartists'].c.artist_id).order_by(sql.desc('count')) + result = conn.execute(op).all() + + + counts = [row.count for row in result] + artists = get_artists_map(row.artist_id for row in result) + result = [{'scrobbles':row.count,'artist':artists[row.artist_id]} for row in result] + print(result) + return rank(result,key='scrobbles') + + + + +### functions that get mappings for several entities -> rows + +def get_artists_of_tracks(track_ids): + with engine.begin() as conn: + op = sql.join(DB['trackartists'],DB['artists']).select().where( + DB['trackartists'].c.track_id.in_(track_ids) + ) + result = conn.execute(op).all() + + artists = {} + for row in result: + artists.setdefault(row.track_id,[]).append(artist_db_to_dict(row)) + return artists + + +def get_tracks_map(track_ids): + with engine.begin() as conn: + op = DB['tracks'].select().where( + DB['tracks'].c.id.in_(track_ids) + ) + result = conn.execute(op).all() + + tracks = {} + trackids = [row.id for row in result] + trackdicts = tracks_db_to_dict(result) + for i in range(len(trackids)): + tracks[trackids[i]] = trackdicts[i] + return tracks + +def get_artists_map(artist_ids): + with engine.begin() as conn: + op = DB['artists'].select().where( + DB['artists'].c.id.in_(artist_ids) + ) + result = conn.execute(op).all() + + artists = {} + artistids = [row.id for row in result] + artistdicts = artists_db_to_dict(result) + for i in range(len(artistids)): + artists[artistids[i]] = artistdicts[i] + return artists + + + + + ### get a specific entity by id @@ -401,10 +433,7 @@ def get_track(id): result = conn.execute(op).all() trackinfo = result[0] - - - result = track_db_to_dict(trackinfo) - return result + return track_db_to_dict(trackinfo) def get_artist(id): @@ -420,6 +449,13 @@ def get_artist(id): + + + +##### AUX FUNCS + + + # function to turn the name into a representation that can be easily compared, ignoring minor differences remove_symbols = ["'","`","’"] replace_with_space = [" - ",": "] @@ -433,3 +469,11 @@ def normalize_name(name): def now(): return int(datetime.now().timestamp()) + +def rank(ls,key): + for rnk in range(len(ls)): + if rnk == 0 or ls[rnk][key] < ls[rnk-1][key]: + ls[rnk]["rank"] = rnk + 1 + else: + ls[rnk]["rank"] = ls[rnk-1]["rank"] + return ls From 1824a8e5dc4ec369243abfe7d7a4660730b2457a Mon Sep 17 00:00:00 2001 From: krateng Date: Thu, 6 Jan 2022 20:29:34 +0100 Subject: [PATCH 015/182] Fixed orderings --- maloja/database/sqldb.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/maloja/database/sqldb.py b/maloja/database/sqldb.py index e22c678..4f791f7 100644 --- a/maloja/database/sqldb.py +++ b/maloja/database/sqldb.py @@ -255,7 +255,7 @@ def get_artist_id(artistname): ### Functions that get rows according to parameters -def get_scrobbles_of_artist(artist,since=None,to=None,resolve_references=True): +def get_scrobbles_of_artist(artist,since=None,to=None): if since is None: since=0 if to is None: to=now() @@ -268,7 +268,7 @@ def get_scrobbles_of_artist(artist,since=None,to=None,resolve_references=True): DB['scrobbles'].c.timestamp<=to, DB['scrobbles'].c.timestamp>=since, DB['trackartists'].c.artist_id==artist_id - ) + ).order_by(sql.desc('timestamp')) result = conn.execute(op).all() result = scrobbles_db_to_dict(result) @@ -288,7 +288,7 @@ def get_scrobbles_of_track(track,since=None,to=None): DB['scrobbles'].c.timestamp<=to, DB['scrobbles'].c.timestamp>=since, DB['scrobbles'].c.track_id==track_id - ) + ).order_by(sql.desc('timestamp')) result = conn.execute(op).all() result = scrobbles_db_to_dict(result) @@ -305,7 +305,7 @@ def get_scrobbles(since=None,to=None,resolve_references=True,max=math.inf): op = DB['scrobbles'].select().where( DB['scrobbles'].c.timestamp<=to, DB['scrobbles'].c.timestamp>=since, - ) + ).order_by(sql.desc('timestamp')) result = conn.execute(op).all() result = scrobbles_db_to_dict(result) @@ -368,8 +368,8 @@ def count_scrobbles_by_artist(since,to): counts = [row.count for row in result] artists = get_artists_map(row.artist_id for row in result) result = [{'scrobbles':row.count,'artist':artists[row.artist_id]} for row in result] - print(result) - return rank(result,key='scrobbles') + result = rank(result,key='scrobbles') + return result From 11bebce807583261bbc2f936e8900bff6204f404 Mon Sep 17 00:00:00 2001 From: krateng Date: Fri, 7 Jan 2022 03:50:21 +0100 Subject: [PATCH 016/182] Implemented associated artists for artist charts --- maloja/database/__init__.py | 4 ++-- maloja/database/sqldb.py | 31 ++++++++++++++++++++++++++----- 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/maloja/database/__init__.py b/maloja/database/__init__.py index 49ff0e8..9257b43 100644 --- a/maloja/database/__init__.py +++ b/maloja/database/__init__.py @@ -51,8 +51,8 @@ class DatabaseNotBuilt(HTTPError): def __init__(self): super().__init__( status=503, - body="The Maloja Database is still being built. Try again in a few seconds.", - headers={"Retry-After":10} + body="The Maloja Database is being upgraded to Version 3. This could take several minutes.", + headers={"Retry-After":120} ) diff --git a/maloja/database/sqldb.py b/maloja/database/sqldb.py index 4f791f7..a61a0ce 100644 --- a/maloja/database/sqldb.py +++ b/maloja/database/sqldb.py @@ -44,6 +44,12 @@ DB['trackartists'] = sql.Table( sql.Column('track_id',sql.Integer,sql.ForeignKey('tracks.id')) ) +DB['associated_artists'] = sql.Table( + 'associated_artists', meta, + sql.Column('source_artist',sql.Integer,sql.ForeignKey('artists.id')), + sql.Column('target_artist',sql.Integer,sql.ForeignKey('artists.id')) +) + meta.create_all(engine) ##### DB <-> Dict translations @@ -353,15 +359,30 @@ def get_tracks(): ### functions that count rows for parameters def count_scrobbles_by_artist(since,to): - jointable = sql.join(DB['scrobbles'],DB['trackartists'],DB['scrobbles'].c.track_id == DB['trackartists'].c.track_id) + jointable = sql.join( + DB['scrobbles'], + DB['trackartists'], + DB['scrobbles'].c.track_id == DB['trackartists'].c.track_id + ) + + jointable2 = sql.join( + jointable, + DB['associated_artists'], + DB['trackartists'].c.artist_id == DB['associated_artists'].c.source_artist, + isouter=True + ) with engine.begin() as conn: op = sql.select( - sql.func.count(DB['scrobbles'].c.timestamp).label('count'), - DB['trackartists'].c.artist_id - ).select_from(jointable).where( + sql.func.count(sql.func.distinct(DB['scrobbles'].c.timestamp)).label('count'), + # only count distinct scrobbles - because of artist replacement, we could end up + # with two artists of the same scrobble counting it twice for the same artist + # e.g. Irene and Seulgi adding two scrobbles to Red Velvet for one real scrobble + sql.func.coalesce(DB['associated_artists'].c.target_artist,DB['trackartists'].c.artist_id).label('artist_id') + # use the replaced artist as artist to count if it exists, otherwise original one + ).select_from(jointable2).where( DB['scrobbles'].c.timestamp<=to, DB['scrobbles'].c.timestamp>=since - ).group_by(DB['trackartists'].c.artist_id).order_by(sql.desc('count')) + ).group_by(sql.func.coalesce(DB['associated_artists'].c.target_artist,DB['trackartists'].c.artist_id)).order_by(sql.desc('count')) result = conn.execute(op).all() From f68fe04760fb4cafe37f2855e1aab95dba1df522 Mon Sep 17 00:00:00 2001 From: krateng Date: Fri, 7 Jan 2022 04:07:10 +0100 Subject: [PATCH 017/182] Implemented track charts --- maloja/database/__init__.py | 8 +++++--- maloja/database/sqldb.py | 27 ++++++++++++++++++++++++--- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/maloja/database/__init__.py b/maloja/database/__init__.py index 9257b43..7a9b029 100644 --- a/maloja/database/__init__.py +++ b/maloja/database/__init__.py @@ -120,7 +120,7 @@ def get_scrobbles(**keys): else: result = sqldb.get_scrobbles(since=since,to=to) #return result[keys['page']*keys['perpage']:(keys['page']+1)*keys['perpage']] - return result + return list(reversed(result)) @waitfordb def get_scrobbles_num(**keys): @@ -145,9 +145,11 @@ def get_charts_artists(**keys): result = sqldb.count_scrobbles_by_artist(since=since,to=to) return result - +@waitfordb def get_charts_tracks(**keys): - return db_aggregate(by="TRACK",**{k:keys[k] for k in keys if k in ["since","to","within","timerange","artist"]}) + (since,to) = keys.get('timerange').timestamps() + result = sqldb.count_scrobbles_by_track(since=since,to=to) + return result def get_pulse(**keys): diff --git a/maloja/database/sqldb.py b/maloja/database/sqldb.py index a61a0ce..f308e1d 100644 --- a/maloja/database/sqldb.py +++ b/maloja/database/sqldb.py @@ -274,7 +274,7 @@ def get_scrobbles_of_artist(artist,since=None,to=None): DB['scrobbles'].c.timestamp<=to, DB['scrobbles'].c.timestamp>=since, DB['trackartists'].c.artist_id==artist_id - ).order_by(sql.desc('timestamp')) + ).order_by(sql.asc('timestamp')) result = conn.execute(op).all() result = scrobbles_db_to_dict(result) @@ -294,7 +294,7 @@ def get_scrobbles_of_track(track,since=None,to=None): DB['scrobbles'].c.timestamp<=to, DB['scrobbles'].c.timestamp>=since, DB['scrobbles'].c.track_id==track_id - ).order_by(sql.desc('timestamp')) + ).order_by(sql.asc('timestamp')) result = conn.execute(op).all() result = scrobbles_db_to_dict(result) @@ -311,7 +311,7 @@ def get_scrobbles(since=None,to=None,resolve_references=True,max=math.inf): op = DB['scrobbles'].select().where( DB['scrobbles'].c.timestamp<=to, DB['scrobbles'].c.timestamp>=since, - ).order_by(sql.desc('timestamp')) + ).order_by(sql.asc('timestamp')) result = conn.execute(op).all() result = scrobbles_db_to_dict(result) @@ -393,6 +393,27 @@ def count_scrobbles_by_artist(since,to): return result +def count_scrobbles_by_track(since,to): + print(since,to) + + with engine.begin() as conn: + op = sql.select( + sql.func.count(sql.func.distinct(DB['scrobbles'].c.timestamp)).label('count'), + DB['scrobbles'].c.track_id + ).select_from(DB['scrobbles']).where( + DB['scrobbles'].c.timestamp<=to, + DB['scrobbles'].c.timestamp>=since + ).group_by(DB['scrobbles'].c.track_id).order_by(sql.desc('count')) + result = conn.execute(op).all() + + + counts = [row.count for row in result] + tracks = get_tracks_map(row.track_id for row in result) + result = [{'scrobbles':row.count,'track':tracks[row.track_id]} for row in result] + result = rank(result,key='scrobbles') + return result + + ### functions that get mappings for several entities -> rows From 02ddeb4dc0a2f4487febbe50f44dd0fb7c426b1b Mon Sep 17 00:00:00 2001 From: krateng Date: Fri, 7 Jan 2022 04:30:23 +0100 Subject: [PATCH 018/182] Implemented artist track charts --- maloja/database/__init__.py | 7 +++++-- maloja/database/sqldb.py | 32 ++++++++++++++++++++++++++++++-- 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/maloja/database/__init__.py b/maloja/database/__init__.py index 7a9b029..791592b 100644 --- a/maloja/database/__init__.py +++ b/maloja/database/__init__.py @@ -148,7 +148,10 @@ def get_charts_artists(**keys): @waitfordb def get_charts_tracks(**keys): (since,to) = keys.get('timerange').timestamps() - result = sqldb.count_scrobbles_by_track(since=since,to=to) + if 'artist' in keys: + result = sqldb.count_scrobbles_by_track_of_artist(since=since,to=to,artist=keys['artist']) + else: + result = sqldb.count_scrobbles_by_track(since=since,to=to) return result def get_pulse(**keys): @@ -495,7 +498,7 @@ def start_db(): dbstatus['healthy'] = True dbstatus['complete'] = True - firstscrobble = sqldb.get_scrobbles(max=1)[0] + firstscrobble = sqldb.get_scrobbles()[0] register_scrobbletime(firstscrobble['time']) diff --git a/maloja/database/sqldb.py b/maloja/database/sqldb.py index f308e1d..42ec730 100644 --- a/maloja/database/sqldb.py +++ b/maloja/database/sqldb.py @@ -302,7 +302,7 @@ def get_scrobbles_of_track(track,since=None,to=None): return result -def get_scrobbles(since=None,to=None,resolve_references=True,max=math.inf): +def get_scrobbles(since=None,to=None,resolve_references=True): if since is None: since=0 if to is None: to=now() @@ -394,7 +394,6 @@ def count_scrobbles_by_artist(since,to): def count_scrobbles_by_track(since,to): - print(since,to) with engine.begin() as conn: op = sql.select( @@ -414,6 +413,35 @@ def count_scrobbles_by_track(since,to): return result +def count_scrobbles_by_track_of_artist(since,to,artist): + + artist_id = get_artist_id(artist) + + jointable = sql.join( + DB['scrobbles'], + DB['trackartists'], + DB['scrobbles'].c.track_id == DB['trackartists'].c.track_id + ) + + with engine.begin() as conn: + op = sql.select( + sql.func.count(sql.func.distinct(DB['scrobbles'].c.timestamp)).label('count'), + DB['scrobbles'].c.track_id + ).select_from(jointable).filter( + DB['scrobbles'].c.timestamp<=to, + DB['scrobbles'].c.timestamp>=since, + DB['trackartists'].c.artist_id==artist_id + ).group_by(DB['scrobbles'].c.track_id).order_by(sql.desc('count')) + result = conn.execute(op).all() + + + counts = [row.count for row in result] + tracks = get_tracks_map(row.track_id for row in result) + result = [{'scrobbles':row.count,'track':tracks[row.track_id]} for row in result] + result = rank(result,key='scrobbles') + return result + + ### functions that get mappings for several entities -> rows From c120850d428694aafa027f8c771e8c5420cbe552 Mon Sep 17 00:00:00 2001 From: krateng Date: Fri, 7 Jan 2022 04:38:41 +0100 Subject: [PATCH 019/182] Implemented pulse --- maloja/database/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/maloja/database/__init__.py b/maloja/database/__init__.py index 791592b..375adb1 100644 --- a/maloja/database/__init__.py +++ b/maloja/database/__init__.py @@ -154,12 +154,13 @@ def get_charts_tracks(**keys): result = sqldb.count_scrobbles_by_track(since=since,to=to) return result +@waitfordb def get_pulse(**keys): rngs = ranges(**{k:keys[k] for k in keys if k in ["since","to","within","timerange","step","stepn","trail"]}) results = [] for rng in rngs: - res = len(db_query(timerange=rng,**{k:keys[k] for k in keys if k in ["artists","artist","track","title","associated"]})) + res = get_scrobbles_num(timerange=rng,**{k:keys[k] for k in keys if k != 'timerange'}) results.append({"range":rng,"scrobbles":res}) return results From 6611ca8705f5d91b384efffbfce16b22b147e097 Mon Sep 17 00:00:00 2001 From: krateng Date: Fri, 7 Jan 2022 04:53:35 +0100 Subject: [PATCH 020/182] Implemented top artists and tracks --- maloja/database/__init__.py | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/maloja/database/__init__.py b/maloja/database/__init__.py index 375adb1..5028086 100644 --- a/maloja/database/__init__.py +++ b/maloja/database/__init__.py @@ -165,7 +165,7 @@ def get_pulse(**keys): return results - +@waitfordb def get_performance(**keys): rngs = ranges(**{k:keys[k] for k in keys if k in ["since","to","within","timerange","step","stepn","trail"]}) @@ -173,24 +173,28 @@ def get_performance(**keys): for rng in rngs: if "track" in keys: + track = sqldb.get_track(sqldb.get_track_id(keys['track'])) charts = get_charts_tracks(timerange=rng) rank = None for c in charts: - if c["track"] == keys["track"]: + if c["track"] == track: rank = c["rank"] break elif "artist" in keys: + artist = sqldb.get_artist(sqldb.get_artist_id(keys['artist'])) + # ^this is the most useless line in programming history + # but I like consistency charts = get_charts_artists(timerange=rng) rank = None for c in charts: - if c["artist"] == keys["artist"]: + if c["artist"] == artist: rank = c["rank"] break results.append({"range":rng,"rank":rank}) return results - +@waitfordb def get_top_artists(**keys): rngs = ranges(**{k:keys[k] for k in keys if k in ["since","to","within","timerange","step","stepn","trail"]}) @@ -198,21 +202,15 @@ def get_top_artists(**keys): for rng in rngs: try: - res = db_aggregate(timerange=rng,by="ARTIST")[0] - results.append({"range":rng,"artist":res["artist"],"counting":res["counting"],"scrobbles":res["scrobbles"]}) + res = get_charts_artists(timerange=rng)[0] + results.append({"range":rng,"artist":res["artist"],"scrobbles":res["scrobbles"]}) except: results.append({"range":rng,"artist":None,"scrobbles":0}) return results - - - - - - - +@waitfordb def get_top_tracks(**keys): rngs = ranges(**{k:keys[k] for k in keys if k in ["since","to","within","timerange","step","stepn","trail"]}) @@ -220,7 +218,7 @@ def get_top_tracks(**keys): for rng in rngs: try: - res = db_aggregate(timerange=rng,by="TRACK")[0] + res = get_charts_tracks(timerange=rng)[0] results.append({"range":rng,"track":res["track"],"scrobbles":res["scrobbles"]}) except: results.append({"range":rng,"track":None,"scrobbles":0}) From 65a076c24931d7fc866e9a977e984d1c311af87f Mon Sep 17 00:00:00 2001 From: krateng Date: Fri, 7 Jan 2022 04:57:13 +0100 Subject: [PATCH 021/182] Replaced old camelCase functions --- maloja/apis/native_v1.py | 8 ++++---- maloja/database/__init__.py | 4 ++-- maloja/web/jinja/artist.jinja | 2 +- maloja/web/jinja/partials/awards_artist.jinja | 2 +- maloja/web/jinja/partials/awards_track.jinja | 2 +- maloja/web/jinja/track.jinja | 2 +- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/maloja/apis/native_v1.py b/maloja/apis/native_v1.py index 1515993..f0b1d4d 100644 --- a/maloja/apis/native_v1.py +++ b/maloja/apis/native_v1.py @@ -177,16 +177,16 @@ def get_top_tracks_external(**keys): @api.get("artistinfo") -def artistInfo_external(**keys): +def artist_info_external(**keys): k_filter, _, _, _, _ = uri_to_internal(keys,forceArtist=True) ckeys = {**k_filter} - return artistInfo(**ckeys) + return artist_info(**ckeys) @api.get("trackinfo") -def trackInfo_external(artist:Multi[str],**keys): +def track_info_external(artist:Multi[str],**keys): # transform into a multidict so we can use our nomral uri_to_internal function keys = FormsDict(keys) for a in artist: @@ -194,7 +194,7 @@ def trackInfo_external(artist:Multi[str],**keys): k_filter, _, _, _, _ = uri_to_internal(keys,forceTrack=True) ckeys = {**k_filter} - return trackInfo(**ckeys) + return track_info(**ckeys) @api.get("compare") diff --git a/maloja/database/__init__.py b/maloja/database/__init__.py index 5028086..d6da832 100644 --- a/maloja/database/__init__.py +++ b/maloja/database/__init__.py @@ -226,7 +226,7 @@ def get_top_tracks(**keys): return results -def artistInfo(artist): +def artist_info(artist): charts = db_aggregate(by="ARTIST") scrobbles = len(db_query(artists=[artist])) @@ -257,7 +257,7 @@ def artistInfo(artist): -def trackInfo(track): +def track_info(track): charts = db_aggregate(by="TRACK") #scrobbles = len(db_query(artists=artists,title=title)) #chart entry of track always has right scrobble number, no countas rules here #c = [e for e in charts if set(e["track"]["artists"]) == set(artists) and e["track"]["title"] == title][0] diff --git a/maloja/web/jinja/artist.jinja b/maloja/web/jinja/artist.jinja index b59c59a..a1c92ad 100644 --- a/maloja/web/jinja/artist.jinja +++ b/maloja/web/jinja/artist.jinja @@ -9,7 +9,7 @@ {% endblock %} {% set artist = filterkeys.artist %} -{% set info = db.artistInfo(artist) %} +{% set info = db.artist_info(artist) %} {% set credited = info.get('replace') %} {% set included = info.get('associated') %} diff --git a/maloja/web/jinja/partials/awards_artist.jinja b/maloja/web/jinja/partials/awards_artist.jinja index b47eb5e..749bc27 100644 --- a/maloja/web/jinja/partials/awards_artist.jinja +++ b/maloja/web/jinja/partials/awards_artist.jinja @@ -56,7 +56,7 @@ {% for track in db.get_tracks(artist=artist) -%} - {% set info = db.trackInfo(track) %} + {% set info = db.track_info(track) %} {% if info.certification is not none -%} -{% set info = db.trackInfo(track) %} +{% set info = db.track_info(track) %} {% if info.certification is not none %} Date: Fri, 7 Jan 2022 21:47:55 +0100 Subject: [PATCH 022/182] Implemented associated artists --- maloja/database/__init__.py | 24 ++++++++++++----------- maloja/database/associated.py | 36 +++++++++++++++++++++++++++++++++++ maloja/database/sqldb.py | 35 ++++++++++++++++++++++++++++++++++ maloja/upgrade.py | 11 ++++++----- 4 files changed, 90 insertions(+), 16 deletions(-) create mode 100644 maloja/database/associated.py diff --git a/maloja/database/__init__.py b/maloja/database/__init__.py index d6da832..ba37228 100644 --- a/maloja/database/__init__.py +++ b/maloja/database/__init__.py @@ -4,7 +4,7 @@ from bottle import request, response, FormsDict, HTTPError # rest of the project from ..cleanup import CleanerAgent, CollectorAgent from .. import utilities -from ..malojatime import register_scrobbletime, time_stamps, ranges +from ..malojatime import register_scrobbletime, time_stamps, ranges, alltime from ..malojauri import uri_to_internal, internal_to_uri, compose_querystring from ..thirdparty import proxy_scrobble_all from ..globalconf import data_dir, malojaconfig, apikeystore @@ -225,30 +225,28 @@ def get_top_tracks(**keys): return results - +@waitfordb def artist_info(artist): - charts = db_aggregate(by="ARTIST") - scrobbles = len(db_query(artists=[artist])) + alltimecharts = get_charts_artists(timerange=alltime()) + scrobbles = get_scrobbles_num(artist=artist,timerange=alltime()) #we cant take the scrobble number from the charts because that includes all countas scrobbles try: - c = [e for e in charts if e["artist"] == artist][0] - others = [a for a in coa.getAllAssociated(artist) if a in ARTISTS] + c = [e for e in alltimecharts if e["artist"] == artist][0] + others = sqldb.get_associated_artists(artist) position = c["rank"] performance = get_performance(artist=artist,step="week") return { "artist":artist, "scrobbles":scrobbles, "position":position, - "associated":others, - "medals":{"gold":[],"silver":[],"bronze":[],**MEDALS_ARTISTS.get(artist,{})}, - "topweeks":WEEKLY_TOPARTISTS.get(artist,0) + "associated":others } except: # if the artist isnt in the charts, they are not being credited and we # need to show information about the credited one - artist = coa.getCredited(artist) - c = [e for e in charts if e["artist"] == artist][0] + artist = sqldb.get_credited_artists(artist)[0] + c = [e for e in alltimecharts if e["artist"] == artist][0] position = c["rank"] return {"replace":artist,"scrobbles":scrobbles,"position":position} @@ -494,6 +492,10 @@ def get_predefined_rulesets(): def start_db(): from .. import upgrade upgrade.upgrade_db(sqldb.add_scrobbles) + + from . import associated + associated.load_associated_rules() + dbstatus['healthy'] = True dbstatus['complete'] = True diff --git a/maloja/database/associated.py b/maloja/database/associated.py new file mode 100644 index 0000000..f486cef --- /dev/null +++ b/maloja/database/associated.py @@ -0,0 +1,36 @@ +## dealing with loading the associated artists rules into a database +## right now this is kind of absurd because we're storing it in a db while not +## actually using its permanence, but this makes it possible to use the information +## directly in sql + + +from doreah import tsv + + +from . import sqldb +from ..globalconf import data_dir + + +def load_associated_rules(): + # delete old + with sqldb.engine.begin() as conn: + op = sqldb.DB['associated_artists'].delete().where() + conn.execute(op) + + # load from file + raw = tsv.parse_all(data_dir["rules"](),"string","string","string") + rules = [{'source_artist':b,'target_artist':c} for [a,b,c] in raw if a=="countas"] + #allartists = set([*[r['source_artist'] for r in rules],*[r['target_artist'] for r in rules]]) + + # find ids + rules = [{k:sqldb.get_artist_id(rule[k]) for k in rule} for rule in rules] + + # write to db + ops = [ + sqldb.DB['associated_artists'].insert().values(**r) + for r in rules + ] + + with sqldb.engine.begin() as conn: + for op in ops: + conn.execute(op) diff --git a/maloja/database/sqldb.py b/maloja/database/sqldb.py index 42ec730..f92dc9f 100644 --- a/maloja/database/sqldb.py +++ b/maloja/database/sqldb.py @@ -488,8 +488,43 @@ def get_artists_map(artist_ids): return artists +### associations +def get_associated_artists(*artists): + artist_ids = [get_artist_id(a) for a in artists] + jointable = sql.join( + DB['associated_artists'], + DB['artists'], + DB['associated_artists'].c.source_artist == DB['artists'].c.id + ) + + with engine.begin() as conn: + op = jointable.select().where( + DB['associated_artists'].c.target_artist.in_(artist_ids) + ) + result = conn.execute(op).all() + + artists = artists_db_to_dict(result) + return artists + +def get_credited_artists(*artists): + artist_ids = [get_artist_id(a) for a in artists] + + jointable = sql.join( + DB['associated_artists'], + DB['artists'], + DB['associated_artists'].c.target_artist == DB['artists'].c.id + ) + + with engine.begin() as conn: + op = jointable.select().where( + DB['associated_artists'].c.source_artist.in_(artist_ids) + ) + result = conn.execute(op).all() + + artists = artists_db_to_dict(result) + return artists ### get a specific entity by id diff --git a/maloja/upgrade.py b/maloja/upgrade.py index 7029c88..4257db8 100644 --- a/maloja/upgrade.py +++ b/maloja/upgrade.py @@ -24,14 +24,15 @@ def upgrade_apikeys(): def upgrade_db(callback_add_scrobbles): - print(col['yellow']("Upgrading v2 Database to v3 Database. This could take a while...")) + oldfolder = os.path.join(dir_settings['state'],"scrobbles") newfolder = os.path.join(dir_settings['state'],".oldscrobbles") os.makedirs(newfolder,exist_ok=True) if os.path.exists(oldfolder): - scrobblefiles = os.listdir(oldfolder) - for sf in scrobblefiles: - if sf.endswith(".tsv"): + scrobblefiles = [f for f in os.listdir(oldfolder) if f.endswith(".tsv")] + if len(scrobblefiles) > 0: + print(col['yellow']("Upgrading v2 Database to v3 Database. This could take a while...")) + for sf in scrobblefiles: print(f"\tImporting from old tsv scrobble file: {sf}") if re.match(r"[0-9]+_[0-9]+\.tsv",sf): origin = 'legacy' @@ -64,4 +65,4 @@ def upgrade_db(callback_add_scrobbles): }) callback_add_scrobbles(scrobblelist) os.rename(os.path.join(oldfolder,sf),os.path.join(newfolder,sf)) - print(col['yellow']("Done!")) + print(col['yellow']("Done!")) From 1df51748b65c3c640c304b4b64f257013c44811a Mon Sep 17 00:00:00 2001 From: krateng Date: Sat, 8 Jan 2022 06:11:42 +0100 Subject: [PATCH 023/182] Implemented artist and track info, improved performance of artist page --- maloja/database/__init__.py | 66 ++++++++++++++++--- maloja/database/sqldb.py | 17 ++--- maloja/web/jinja/partials/awards_artist.jinja | 18 +++-- 3 files changed, 73 insertions(+), 28 deletions(-) diff --git a/maloja/database/__init__.py b/maloja/database/__init__.py index ba37228..2bf9c4f 100644 --- a/maloja/database/__init__.py +++ b/maloja/database/__init__.py @@ -228,6 +228,7 @@ def get_top_tracks(**keys): @waitfordb def artist_info(artist): + artist = sqldb.get_artist(sqldb.get_artist_id(artist)) alltimecharts = get_charts_artists(timerange=alltime()) scrobbles = get_scrobbles_num(artist=artist,timerange=alltime()) #we cant take the scrobble number from the charts because that includes all countas scrobbles @@ -235,12 +236,19 @@ def artist_info(artist): c = [e for e in alltimecharts if e["artist"] == artist][0] others = sqldb.get_associated_artists(artist) position = c["rank"] - performance = get_performance(artist=artist,step="week") + performance_weekly = get_performance(artist=artist,step="week")[:-1] #current week doesn't count + performance_yearly = get_performance(artist=artist,step="year")[:-1] #current year doesn't count return { "artist":artist, "scrobbles":scrobbles, "position":position, - "associated":others + "associated":others, + "medals":{ + "gold":[e['range'] for e in performance_yearly if e['rank'] == 1], + "silver":[e['range'] for e in performance_yearly if e['rank'] == 2], + "bronze":[e['range'] for e in performance_yearly if e['rank'] == 3] + }, + "topweeks":len([e for e in performance_weekly if e['rank'] == 1]) } except: # if the artist isnt in the charts, they are not being credited and we @@ -254,12 +262,13 @@ def artist_info(artist): - def track_info(track): - charts = db_aggregate(by="TRACK") - #scrobbles = len(db_query(artists=artists,title=title)) #chart entry of track always has right scrobble number, no countas rules here - #c = [e for e in charts if set(e["track"]["artists"]) == set(artists) and e["track"]["title"] == title][0] - c = [e for e in charts if e["track"] == track][0] + + track = sqldb.get_track(sqldb.get_track_id(track)) + alltimecharts = get_charts_tracks(timerange=alltime()) + #scrobbles = get_scrobbles_num(track=track,timerange=alltime()) + + c = [e for e in alltimecharts if e["track"] == track][0] scrobbles = c["scrobbles"] position = c["rank"] cert = None @@ -268,17 +277,56 @@ def track_info(track): elif scrobbles >= threshold_platinum: cert = "platinum" elif scrobbles >= threshold_gold: cert = "gold" + performance_weekly = get_performance(track=track,step="week")[:-1] #current week doesn't count + performance_yearly = get_performance(track=track,step="year")[:-1] #current year doesn't count + return { "track":track, "scrobbles":scrobbles, "position":position, - "medals":{"gold":[],"silver":[],"bronze":[],**MEDALS_TRACKS.get((frozenset(track["artists"]),track["title"]),{})}, + "medals":{ + "gold":[e['range'] for e in performance_yearly if e['rank'] == 1], + "silver":[e['range'] for e in performance_yearly if e['rank'] == 2], + "bronze":[e['range'] for e in performance_yearly if e['rank'] == 3] + }, "certification":cert, - "topweeks":WEEKLY_TOPTRACKS.get(((frozenset(track["artists"]),track["title"])),0) + "topweeks":len([e for e in performance_weekly if e['rank'] == 1]) } +def tracks_info(tracks): + tracks = [sqldb.get_track(sqldb.get_track_id(track)) for track in tracks] + alltimecharts = get_charts_tracks(timerange=alltime()) + + result = [] + for track in tracks: + c = [e for e in alltimecharts if e["track"] == track][0] + scrobbles = c["scrobbles"] + position = c["rank"] + cert = None + threshold_gold, threshold_platinum, threshold_diamond = malojaconfig["SCROBBLES_GOLD","SCROBBLES_PLATINUM","SCROBBLES_DIAMOND"] + if scrobbles >= threshold_diamond: cert = "diamond" + elif scrobbles >= threshold_platinum: cert = "platinum" + elif scrobbles >= threshold_gold: cert = "gold" + + performance_weekly = get_performance(track=track,step="week")[:-1] #current week doesn't count + performance_yearly = get_performance(track=track,step="year")[:-1] #current year doesn't count + + result.append({ + "track":track, + "scrobbles":scrobbles, + "position":position, + "medals":{ + "gold":[e['range'] for e in performance_yearly if e['rank'] == 1], + "silver":[e['range'] for e in performance_yearly if e['rank'] == 2], + "bronze":[e['range'] for e in performance_yearly if e['rank'] == 3] + }, + "certification":cert, + "topweeks":len([e for e in performance_weekly if e['rank'] == 1]) + }) + + return result def compare(remoteurl): diff --git a/maloja/database/sqldb.py b/maloja/database/sqldb.py index f92dc9f..5a18c4e 100644 --- a/maloja/database/sqldb.py +++ b/maloja/database/sqldb.py @@ -88,7 +88,6 @@ meta.create_all(engine) ### DB -> DICT - def scrobbles_db_to_dict(rows): tracks = get_tracks_map(set(row.track_id for row in rows)) return [ @@ -100,10 +99,10 @@ def scrobbles_db_to_dict(rows): } for row in rows ] + def scrobble_db_to_dict(row): return scrobbles_db_to_dict([row])[0] - def tracks_db_to_dict(rows): artists = get_artists_of_tracks(set(row.id for row in rows)) return [ @@ -114,15 +113,16 @@ def tracks_db_to_dict(rows): } for row in rows ] + def track_db_to_dict(row): return tracks_db_to_dict([row])[0] - def artists_db_to_dict(rows): return [ row.name for row in rows ] + def artist_db_to_dict(row): return artists_db_to_dict([row])[0] @@ -131,6 +131,7 @@ def artist_db_to_dict(row): ### DICT -> DB # TODO + def scrobble_dict_to_db(info): return { "rawscrobble":json.dumps(info), @@ -182,7 +183,6 @@ def add_scrobbles(scrobbleslist): ### these will 'get' the ID of an entity, creating it if necessary - def get_track_id(trackdict): ntitle = normalize_name(trackdict['title']) artist_ids = [get_artist_id(a) for a in trackdict['artists']] @@ -260,7 +260,6 @@ def get_artist_id(artistname): ### Functions that get rows according to parameters - def get_scrobbles_of_artist(artist,since=None,to=None): if since is None: since=0 @@ -281,7 +280,6 @@ def get_scrobbles_of_artist(artist,since=None,to=None): #result = [scrobble_db_to_dict(row,resolve_references=resolve_references) for row in result] return result - def get_scrobbles_of_track(track,since=None,to=None): if since is None: since=0 @@ -301,7 +299,6 @@ def get_scrobbles_of_track(track,since=None,to=None): #result = [scrobble_db_to_dict(row) for row in result] return result - def get_scrobbles(since=None,to=None,resolve_references=True): if since is None: since=0 @@ -318,7 +315,6 @@ def get_scrobbles(since=None,to=None,resolve_references=True): #result = [scrobble_db_to_dict(row,resolve_references=resolve_references) for i,row in enumerate(result) if i -{% for track in db.get_tracks(artist=artist) -%} - {% set info = db.track_info(track) %} - {% if info.certification is not none -%} - + +{% set charts = db.get_charts_tracks(artist=artist,timerange=malojatime.alltime()) %} +{% for e in charts -%} + {%- if e.scrobbles >= settings.scrobbles_gold -%}{% set cert = 'gold' %}{%- endif -%} + {%- if e.scrobbles >= settings.scrobbles_platinum -%}{% set cert = 'platinum' %}{%- endif -%} + {%- if e.scrobbles >= settings.scrobbles_diamond -%}{% set cert = 'diamond' %}{%- endif -%} + + {%- if cert -%} + {%- endif %} + {%- endfor %} {%- endmacro %} From 7021099e7b9447730749e6e633a803deb95c5cdd Mon Sep 17 00:00:00 2001 From: krateng Date: Sun, 9 Jan 2022 01:14:06 +0100 Subject: [PATCH 024/182] Removed compare functionality --- maloja/database/__init__.py | 87 +------------------------ maloja/web/jinja/compare.jinja | 112 --------------------------------- 2 files changed, 1 insertion(+), 198 deletions(-) delete mode 100644 maloja/web/jinja/compare.jinja diff --git a/maloja/database/__init__.py b/maloja/database/__init__.py index 2bf9c4f..ba9bfa7 100644 --- a/maloja/database/__init__.py +++ b/maloja/database/__init__.py @@ -261,7 +261,7 @@ def artist_info(artist): - +@waitfordb def track_info(track): track = sqldb.get_track(sqldb.get_track_id(track)) @@ -294,91 +294,6 @@ def track_info(track): "topweeks":len([e for e in performance_weekly if e['rank'] == 1]) } -def tracks_info(tracks): - - tracks = [sqldb.get_track(sqldb.get_track_id(track)) for track in tracks] - alltimecharts = get_charts_tracks(timerange=alltime()) - - result = [] - for track in tracks: - c = [e for e in alltimecharts if e["track"] == track][0] - scrobbles = c["scrobbles"] - position = c["rank"] - cert = None - threshold_gold, threshold_platinum, threshold_diamond = malojaconfig["SCROBBLES_GOLD","SCROBBLES_PLATINUM","SCROBBLES_DIAMOND"] - if scrobbles >= threshold_diamond: cert = "diamond" - elif scrobbles >= threshold_platinum: cert = "platinum" - elif scrobbles >= threshold_gold: cert = "gold" - - performance_weekly = get_performance(track=track,step="week")[:-1] #current week doesn't count - performance_yearly = get_performance(track=track,step="year")[:-1] #current year doesn't count - - result.append({ - "track":track, - "scrobbles":scrobbles, - "position":position, - "medals":{ - "gold":[e['range'] for e in performance_yearly if e['rank'] == 1], - "silver":[e['range'] for e in performance_yearly if e['rank'] == 2], - "bronze":[e['range'] for e in performance_yearly if e['rank'] == 3] - }, - "certification":cert, - "topweeks":len([e for e in performance_weekly if e['rank'] == 1]) - }) - - return result - - -def compare(remoteurl): - import json - compareurl = remoteurl + "/api/info" - - response = urllib.request.urlopen(compareurl) - strangerinfo = json.loads(response.read()) - owninfo = info() - - #add_known_server(compareto) - - artists = {} - - for a in owninfo["artists"]: - artists[a.lower()] = {"name":a,"self":int(owninfo["artists"][a]*1000),"other":0} - - for a in strangerinfo["artists"]: - artists[a.lower()] = artists.setdefault(a.lower(),{"name":a,"self":0}) - artists[a.lower()]["other"] = int(strangerinfo["artists"][a]*1000) - - for a in artists: - common = min(artists[a]["self"],artists[a]["other"]) - artists[a]["self"] -= common - artists[a]["other"] -= common - artists[a]["common"] = common - - best = sorted((artists[a]["name"] for a in artists),key=lambda x: artists[x.lower()]["common"],reverse=True) - - result = { - "unique_self":sum(artists[a]["self"] for a in artists if artists[a]["common"] == 0), - "more_self":sum(artists[a]["self"] for a in artists if artists[a]["common"] != 0), - "common":sum(artists[a]["common"] for a in artists), - "more_other":sum(artists[a]["other"] for a in artists if artists[a]["common"] != 0), - "unique_other":sum(artists[a]["other"] for a in artists if artists[a]["common"] == 0) - } - - total = sum(result[c] for c in result) - - for r in result: - result[r] = (result[r],result[r]/total) - - - - return { - "result":result, - "info":{ - "ownname":owninfo["name"], - "remotename":strangerinfo["name"] - }, - "commonartist":best[0] - } def incoming_scrobble(artists,title,album=None,duration=None,time=None,fix=True): diff --git a/maloja/web/jinja/compare.jinja b/maloja/web/jinja/compare.jinja deleted file mode 100644 index 9eaea42..0000000 --- a/maloja/web/jinja/compare.jinja +++ /dev/null @@ -1,112 +0,0 @@ -{% extends "abstracts/base.jinja" %} -{% block title %}Maloja - Compare{% endblock %} - -{% import 'snippets/links.jinja' as links %} - -{% block scripts %} - -{% endblock %} - -{% set data = db.compare(specialkeys.remote) %} -{% set comparedata = data.result %} -{% set info = data.info %} -{% set bestartist = data.commonartist %} - - - -{% set fullmatch = comparedata.common[1]*100 %} -{% set partialmatch = comparedata.more_self[1]*100 + comparedata.more_other[1]*100 %} - -{% set match = fullmatch + (partialmatch)/2 %} -{% set pixel_fullmatch = fullmatch * 2.5 %} -{% set pixel_partialmatch = (fullmatch+partialmatch) * 2.5 %} - -{% set match = [match,100] | min %} - -{% set r = [255*match/50,255] | min %} -{% set g = [255*match/50,255] | min %} -{% set b = [255*(match/50-1),0] | max %} - - -{% block content %} - - - - - - - - - - - - - - - - - -

{{ info.ownname }}

- -
- {{ match | round(1) }}% - -
-

{{ info.remotename }}

- The size of the circle shows matching music taste. - The fuzziness of its border indicates differences in quantity. -
- Common Favorite -

{{ links.link(bestartist) }}

- -
- - -{% endblock %} From df07dd7b0088777ff701f198473d4de477c1bbb2 Mon Sep 17 00:00:00 2001 From: krateng Date: Sun, 9 Jan 2022 01:19:13 +0100 Subject: [PATCH 025/182] Feels good man --- maloja/database/__init__.py | 87 -------------------- maloja/database/cache.py | 158 ------------------------------------ maloja/globalconf.py | 7 +- 3 files changed, 1 insertion(+), 251 deletions(-) delete mode 100644 maloja/database/cache.py diff --git a/maloja/database/__init__.py b/maloja/database/__init__.py index ba9bfa7..4585868 100644 --- a/maloja/database/__init__.py +++ b/maloja/database/__init__.py @@ -10,7 +10,6 @@ from ..thirdparty import proxy_scrobble_all from ..globalconf import data_dir, malojaconfig, apikeystore #db from . import sqldb -from .cache import db_query, db_aggregate # doreah toolkit from doreah.logging import log @@ -470,92 +469,6 @@ def start_db(): - - - -#### -## Database queries -#### - - - -# Queries the database -def db_query_full(artist=None,artists=None,title=None,track=None,timerange=None,associated=False,max_=None): - - if not dbstatus['healthy']: raise DatabaseNotBuilt() - (since, to) = time_stamps(range=timerange) - - if artists is not None and title is not None: - print(col['red']("THIS SHOULD NO LONGER HAPPEN")) - track = {'artists':artists,'title':title} - - if track is not None: - return list(reversed(sqldb.get_scrobbles_of_track(track=track,since=since,to=to))) - - if artist is not None: - return list(reversed(sqldb.get_scrobbles_of_artist(artist=artist,since=since,to=to))) - - return list(reversed(sqldb.get_scrobbles(since=since,to=to))) - - - -# Queries that... well... aggregate -def db_aggregate_full(by=None,timerange=None,artist=None): - - if not dbstatus['healthy']: raise DatabaseNotBuilt() - (since, to) = time_stamps(range=timerange) - - - if (by=="ARTIST"): - - trackcharts = {} - charts = {} - scrobbles = sqldb.get_scrobbles(since=since,to=to,resolve_references=False) - - for s in scrobbles: - trackcharts[s['track']] = trackcharts.setdefault(s['track'],0) + 1 - - for t in trackcharts: - artists = sqldb.get_artists_of_track(t,resolve_references=False) - for a in coa.getCreditedList(artists): - charts[a] = charts.setdefault(a,0) + trackcharts[t] - - - ls = [{"artist":sqldb.get_artist(a),"scrobbles":charts[a],"counting":[]} for a in charts] - ls.sort(key=lambda k:k["scrobbles"],reverse=True) - # add ranks - for rnk in range(len(ls)): - if rnk == 0 or ls[rnk]["scrobbles"] < ls[rnk-1]["scrobbles"]: - ls[rnk]["rank"] = rnk + 1 - else: - ls[rnk]["rank"] = ls[rnk-1]["rank"] - return ls - - elif (by=="TRACK"): - charts = {} - if artist is None: - scrobbles = sqldb.get_scrobbles(since=since,to=to,resolve_references=False) - else: - scrobbles = sqldb.get_scrobbles_of_artist(since=since,to=to,artist=artist,resolve_references=False) - - for s in scrobbles: - charts[s['track']] = charts.setdefault(s['track'],0) + 1 - - - ls = [{"track":sqldb.get_track(t),"scrobbles":charts[t]} for t in charts] - ls.sort(key=lambda k:k["scrobbles"],reverse=True) - # add ranks - for rnk in range(len(ls)): - if rnk == 0 or ls[rnk]["scrobbles"] < ls[rnk-1]["scrobbles"]: - ls[rnk]["rank"] = rnk + 1 - else: - ls[rnk]["rank"] = ls[rnk-1]["rank"] - return ls - - else: - return len(sqldb.get_scrobbles(since=since,to=to,resolve_references=False)) - - # Search for strings def db_search(query,type=None): results = [] diff --git a/maloja/database/cache.py b/maloja/database/cache.py deleted file mode 100644 index a4c2ee3..0000000 --- a/maloja/database/cache.py +++ /dev/null @@ -1,158 +0,0 @@ - -### -## Caches in front of DB -## the volatile caches are intended mainly for excessive site navigation during one session -## the permanent caches are there to save data that is hard to calculate and never changes (old charts) -### - -import psutil -import copy -import lru - -from doreah.logging import log - -from ..globalconf import malojaconfig -from .. import utilities -from .. import database as dbmain - -if False: - def db_query(**kwargs): - return db_query_cached(**kwargs) - def db_aggregate(**kwargs): - return db_aggregate_cached(**kwargs) -else: - def db_query(**kwargs): - return dbmain.db_query_full(**kwargs) - def db_aggregate(**kwargs): - return dbmain.db_aggregate_full(**kwargs) - - -csz = malojaconfig["DB_CACHE_ENTRIES"] -cmp = malojaconfig["DB_MAX_MEMORY"] - -cache_query = lru.LRU(csz) -cache_query_perm = lru.LRU(csz) -cache_aggregate = lru.LRU(csz) -cache_aggregate_perm = lru.LRU(csz) - -perm_caching = malojaconfig["CACHE_DATABASE_PERM"] -temp_caching = malojaconfig["CACHE_DATABASE_SHORT"] - -cachestats = { - "cache_query":{ - "hits_perm":0, - "hits_tmp":0, - "misses":0, - "objperm":cache_query_perm, - "objtmp":cache_query, - "name":"Query Cache" - }, - "cache_aggregate":{ - "hits_perm":0, - "hits_tmp":0, - "misses":0, - "objperm":cache_aggregate_perm, - "objtmp":cache_aggregate, - "name":"Aggregate Cache" - } -} - -from doreah.regular import runhourly - -@runhourly -def log_stats(): - logstr = "{name}: {hitsperm} Perm Hits, {hitstmp} Tmp Hits, {misses} Misses; Current Size: {sizeperm}/{sizetmp}" - for s in (cachestats["cache_query"],cachestats["cache_aggregate"]): - log(logstr.format(name=s["name"],hitsperm=s["hits_perm"],hitstmp=s["hits_tmp"],misses=s["misses"], - sizeperm=len(s["objperm"]),sizetmp=len(s["objtmp"])),module="debug") - -def db_query_cached(**kwargs): - global cache_query, cache_query_perm - key = utilities.serialize(kwargs) - - eligible_permanent_caching = ( - "timerange" in kwargs and - not kwargs["timerange"].active() and - perm_caching - ) - eligible_temporary_caching = ( - not eligible_permanent_caching and - temp_caching - ) - - # hit permanent cache for past timeranges - if eligible_permanent_caching and key in cache_query_perm: - cachestats["cache_query"]["hits_perm"] += 1 - return copy.copy(cache_query_perm.get(key)) - - # hit short term cache - elif eligible_temporary_caching and key in cache_query: - cachestats["cache_query"]["hits_tmp"] += 1 - return copy.copy(cache_query.get(key)) - - else: - cachestats["cache_query"]["misses"] += 1 - result = dbmain.db_query_full(**kwargs) - if eligible_permanent_caching: cache_query_perm[key] = result - elif eligible_temporary_caching: cache_query[key] = result - - reduce_caches_if_low_ram() - - return result - - -def db_aggregate_cached(**kwargs): - global cache_aggregate, cache_aggregate_perm - key = utilities.serialize(kwargs) - - eligible_permanent_caching = ( - "timerange" in kwargs and - not kwargs["timerange"].active() and - perm_caching - ) - eligible_temporary_caching = ( - not eligible_permanent_caching and - temp_caching - ) - - # hit permanent cache for past timeranges - if eligible_permanent_caching and key in cache_aggregate_perm: - cachestats["cache_aggregate"]["hits_perm"] += 1 - return copy.copy(cache_aggregate_perm.get(key)) - - # hit short term cache - elif eligible_temporary_caching and key in cache_aggregate: - cachestats["cache_aggregate"]["hits_tmp"] += 1 - return copy.copy(cache_aggregate.get(key)) - - else: - cachestats["cache_aggregate"]["misses"] += 1 - result = dbmain.db_aggregate_full(**kwargs) - if eligible_permanent_caching: cache_aggregate_perm[key] = result - elif eligible_temporary_caching: cache_aggregate[key] = result - - reduce_caches_if_low_ram() - - return result - -def invalidate_caches(): - global cache_query, cache_aggregate - cache_query.clear() - cache_aggregate.clear() - log("Database caches invalidated.") - -def reduce_caches(to=0.75): - global cache_query, cache_aggregate, cache_query_perm, cache_aggregate_perm - for c in cache_query, cache_aggregate, cache_query_perm, cache_aggregate_perm: - currentsize = len(c) - if currentsize > 100: - targetsize = max(int(currentsize * to),10) - c.set_size(targetsize) - c.set_size(csz) - -def reduce_caches_if_low_ram(): - ramprct = psutil.virtual_memory().percent - if ramprct > cmp: - log("{prct}% RAM usage, reducing caches!".format(prct=ramprct),module="debug") - ratio = (cmp / ramprct) ** 3 - reduce_caches(to=ratio) diff --git a/maloja/globalconf.py b/maloja/globalconf.py index 60df460..87f610b 100644 --- a/maloja/globalconf.py +++ b/maloja/globalconf.py @@ -148,12 +148,7 @@ malojaconfig = Configuration( }, "Technical":{ "cache_expire_positive":(tp.Integer(), "Image Cache Expiration", 300, "Days until images are refetched"), - "cache_expire_negative":(tp.Integer(), "Image Cache Negative Expiration", 30, "Days until failed image fetches are reattempted"), - "use_db_cache":(tp.Boolean(), "Use DB Cache", True), - "cache_database_short":(tp.Boolean(), "Use volatile Database Cache", True), - "cache_database_perm":(tp.Boolean(), "Use permanent Database Cache", True), - "db_cache_entries":(tp.Integer(), "Maximal Cache entries", 10000), - "db_max_memory":(tp.Integer(max=100,min=20), "RAM Percentage Theshold", 75, "Maximal percentage of RAM that should be used by whole system before Maloja discards cache entries. Use a higher number if your Maloja runs on a dedicated instance (e.g. a container)") + "cache_expire_negative":(tp.Integer(), "Image Cache Negative Expiration", 30, "Days until failed image fetches are reattempted") }, "Fluff":{ "scrobbles_gold":(tp.Integer(), "Scrobbles for Gold", 250, "How many scrobbles a track needs to be considered 'Gold' status"), From eb9cd4aba4daa9bc45ae4ff1ac1e9dd634e345c6 Mon Sep 17 00:00:00 2001 From: krateng Date: Sun, 9 Jan 2022 06:58:06 +0100 Subject: [PATCH 026/182] Reimplemented caching of yearly and weekly stats --- maloja/database/__init__.py | 38 ++++++----- maloja/database/cached.py | 69 +++++++++++++++++++ maloja/utilities/__init__.py | 1 - maloja/utilities/maintenance.py | 114 -------------------------------- 4 files changed, 91 insertions(+), 131 deletions(-) create mode 100644 maloja/database/cached.py delete mode 100644 maloja/utilities/maintenance.py diff --git a/maloja/database/__init__.py b/maloja/database/__init__.py index 4585868..4e4ab2d 100644 --- a/maloja/database/__init__.py +++ b/maloja/database/__init__.py @@ -10,6 +10,7 @@ from ..thirdparty import proxy_scrobble_all from ..globalconf import data_dir, malojaconfig, apikeystore #db from . import sqldb +from . import cached # doreah toolkit from doreah.logging import log @@ -42,9 +43,9 @@ import urllib dbstatus = { - "healthy":False, + "healthy":False, # we can access the db "rebuildinprogress":False, - "complete":False + "complete":False # information is complete } class DatabaseNotBuilt(HTTPError): def __init__(self): @@ -235,19 +236,17 @@ def artist_info(artist): c = [e for e in alltimecharts if e["artist"] == artist][0] others = sqldb.get_associated_artists(artist) position = c["rank"] - performance_weekly = get_performance(artist=artist,step="week")[:-1] #current week doesn't count - performance_yearly = get_performance(artist=artist,step="year")[:-1] #current year doesn't count return { "artist":artist, "scrobbles":scrobbles, "position":position, "associated":others, "medals":{ - "gold":[e['range'] for e in performance_yearly if e['rank'] == 1], - "silver":[e['range'] for e in performance_yearly if e['rank'] == 2], - "bronze":[e['range'] for e in performance_yearly if e['rank'] == 3] + "gold": [year for year in cached.medals_artists if artist in cached.medals_artists[year]['gold']], + "silver": [year for year in cached.medals_artists if artist in cached.medals_artists[year]['silver']], + "bronze": [year for year in cached.medals_artists if artist in cached.medals_artists[year]['bronze']], }, - "topweeks":len([e for e in performance_weekly if e['rank'] == 1]) + "topweeks":len([e for e in cached.weekly_topartists if e == artist]) } except: # if the artist isnt in the charts, they are not being credited and we @@ -276,21 +275,18 @@ def track_info(track): elif scrobbles >= threshold_platinum: cert = "platinum" elif scrobbles >= threshold_gold: cert = "gold" - performance_weekly = get_performance(track=track,step="week")[:-1] #current week doesn't count - performance_yearly = get_performance(track=track,step="year")[:-1] #current year doesn't count - return { "track":track, "scrobbles":scrobbles, "position":position, "medals":{ - "gold":[e['range'] for e in performance_yearly if e['rank'] == 1], - "silver":[e['range'] for e in performance_yearly if e['rank'] == 2], - "bronze":[e['range'] for e in performance_yearly if e['rank'] == 3] + "gold": [year for year in cached.medals_tracks if track in cached.medals_tracks[year]['gold']], + "silver": [year for year in cached.medals_tracks if track in cached.medals_tracks[year]['silver']], + "bronze": [year for year in cached.medals_tracks if track in cached.medals_tracks[year]['bronze']], }, "certification":cert, - "topweeks":len([e for e in performance_weekly if e['rank'] == 1]) + "topweeks":len([e for e in cached.weekly_toptracks if e == track]) } @@ -452,18 +448,28 @@ def get_predefined_rulesets(): def start_db(): + # Upgrade database from .. import upgrade upgrade.upgrade_db(sqldb.add_scrobbles) + # Load temporary tables from . import associated associated.load_associated_rules() dbstatus['healthy'] = True - dbstatus['complete'] = True + # inform time module about begin of scrobbling firstscrobble = sqldb.get_scrobbles()[0] register_scrobbletime(firstscrobble['time']) + # create cached information + cached.update_medals() + cached.update_weekly() + + dbstatus['complete'] = True + + + diff --git a/maloja/database/cached.py b/maloja/database/cached.py new file mode 100644 index 0000000..4aea767 --- /dev/null +++ b/maloja/database/cached.py @@ -0,0 +1,69 @@ +# for information that is not authorative, but should be saved anyway because it +# changes infrequently and DB access is expensive + +from doreah.regular import yearly, daily +from .. import database +from .. import malojatime as mjt + + + +medals_artists = { + # year: {'gold':[],'silver':[],'bronze':[]} +} +medals_tracks = { + # year: {'gold':[],'silver':[],'bronze':[]} +} + +weekly_topartists = [] +weekly_toptracks = [] + +@yearly +def update_medals(): + + global medals_artists, medals_tracks + medals_artists.clear() + medals_tracks.clear() + + for year in mjt.ranges(step="year"): + if year == mjt.thisyear(): break + + charts_artists = database.get_charts_artists(timerange=year) + charts_tracks = database.get_charts_tracks(timerange=year) + + entry_artists = {'gold':[],'silver':[],'bronze':[]} + entry_tracks = {'gold':[],'silver':[],'bronze':[]} + medals_artists[year.desc()] = entry_artists + medals_tracks[year.desc()] = entry_tracks + + for entry in charts_artists: + if entry['rank'] == 1: entry_artists['gold'].append(entry['artist']) + elif entry['rank'] == 2: entry_artists['silver'].append(entry['artist']) + elif entry['rank'] == 3: entry_artists['bronze'].append(entry['artist']) + else: break + for entry in charts_tracks: + if entry['rank'] == 1: entry_tracks['gold'].append(entry['track']) + elif entry['rank'] == 2: entry_tracks['silver'].append(entry['track']) + elif entry['rank'] == 3: entry_tracks['bronze'].append(entry['track']) + else: break + + + +@daily +def update_weekly(): + + global weekly_topartists, weekly_toptracks + weekly_topartists.clear() + weekly_toptracks.clear() + + for week in mjt.ranges(step="week"): + if week == mjt.thisweek(): break + + charts_artists = database.get_charts_artists(timerange=week) + charts_tracks = database.get_charts_tracks(timerange=week) + + for entry in charts_artists: + if entry['rank'] == 1: weekly_topartists.append(entry['artist']) + else: break + for entry in charts_tracks: + if entry['rank'] == 1: weekly_toptracks.append(entry['track']) + else: break diff --git a/maloja/utilities/__init__.py b/maloja/utilities/__init__.py index 5fc1c9a..20b2204 100644 --- a/maloja/utilities/__init__.py +++ b/maloja/utilities/__init__.py @@ -1,3 +1,2 @@ from .images import * -from .maintenance import * from .utils import * diff --git a/maloja/utilities/maintenance.py b/maloja/utilities/maintenance.py deleted file mode 100644 index b8df8f4..0000000 --- a/maloja/utilities/maintenance.py +++ /dev/null @@ -1,114 +0,0 @@ -from ..__pkginfo__ import VERSION -from ..malojatime import ranges, thisweek, thisyear -from ..globalconf import malojaconfig - -from doreah.regular import yearly, daily -from doreah.logging import log - -import datetime -import json -import urllib -import itertools - - - -get_track = lambda x:(frozenset(x["track"]["artists"]),x["track"]["title"]) -get_artist = lambda x:x["artist"] - -def group_by_attribute(sequence,attribute): - grouped = itertools.groupby(sequence,key=lambda x:x[attribute]) - for attrvalue,members in grouped: - yield attrvalue,list(members) - -def collect_rankings(chart,identify,collection,iteration=None,count=True): - grouped = group_by_attribute(chart,"rank") - for rank, members in grouped: - if not count and rank not in rankmedals: break - if count and rank != 1: break - - for m in members: - # get the actual object that we're counting - entity = identify(m) - - # count no1 spots - if count: - collection[entity] = collection.setdefault(entity,0) + 1 - - # collect instances of top3 spots - else: - medal = rankmedals[rank] - collection.setdefault(entity,{}).setdefault(medal,[]).append(iteration) - - -rankmedals = { - 1:'gold', - 2:'silver', - 3:'bronze' -} - -@yearly -def update_medals(): - - - from ..database import MEDALS_ARTISTS, MEDALS_TRACKS, STAMPS, get_charts_artists, get_charts_tracks - - - MEDALS_ARTISTS.clear() - MEDALS_TRACKS.clear() - - for year in ranges(step="year"): - if year == thisyear(): break - - charts_artists = get_charts_artists(timerange=year) - charts_tracks = get_charts_tracks(timerange=year) - - collect_rankings(charts_artists,get_artist,MEDALS_ARTISTS,iteration=year,count=False) - collect_rankings(charts_tracks,get_track,MEDALS_TRACKS,iteration=year,count=False) - - -@daily -def update_weekly(): - - from ..database import WEEKLY_TOPTRACKS, WEEKLY_TOPARTISTS, get_charts_artists, get_charts_tracks - - - WEEKLY_TOPARTISTS.clear() - WEEKLY_TOPTRACKS.clear() - - for week in ranges(step="week"): - if week == thisweek(): break - - charts_artists = get_charts_artists(timerange=week) - charts_tracks = get_charts_tracks(timerange=week) - - collect_rankings(charts_artists,get_artist,WEEKLY_TOPARTISTS) - collect_rankings(charts_tracks,get_track,WEEKLY_TOPTRACKS) - - -@daily -def send_stats(): - if malojaconfig["SEND_STATS"]: - - log("Sending daily stats report...") - - from ..database import ARTISTS, TRACKS, SCROBBLES - - keys = { - "url":"https://myrcella.krateng.ch/malojastats", - "method":"POST", - "headers":{"Content-Type": "application/json"}, - "data":json.dumps({ - "name":malojaconfig["NAME"], - "url":malojaconfig["PUBLIC_URL"], - "version":VERSION, - "artists":len(ARTISTS), - "tracks":len(TRACKS), - "scrobbles":len(SCROBBLES) - }).encode("utf-8") - } - try: - req = urllib.request.Request(**keys) - response = urllib.request.urlopen(req) - log("Sent daily report!") - except: - log("Could not send daily report!") From b50afe70eaee3ea89279df0d8c2da9e6b881641a Mon Sep 17 00:00:00 2001 From: krateng Date: Sun, 9 Jan 2022 07:50:34 +0100 Subject: [PATCH 027/182] Minor stuff --- maloja/database/__init__.py | 6 +++--- maloja/database/sqldb.py | 4 +++- maloja/jinjaenv/context.py | 1 + maloja/web/jinja/artist.jinja | 4 ++-- maloja/web/jinja/track.jinja | 4 ++-- 5 files changed, 11 insertions(+), 8 deletions(-) diff --git a/maloja/database/__init__.py b/maloja/database/__init__.py index 4e4ab2d..c6934f1 100644 --- a/maloja/database/__init__.py +++ b/maloja/database/__init__.py @@ -251,10 +251,10 @@ def artist_info(artist): except: # if the artist isnt in the charts, they are not being credited and we # need to show information about the credited one - artist = sqldb.get_credited_artists(artist)[0] - c = [e for e in alltimecharts if e["artist"] == artist][0] + replaceartist = sqldb.get_credited_artists(artist)[0] + c = [e for e in alltimecharts if e["artist"] == replaceartist][0] position = c["rank"] - return {"replace":artist,"scrobbles":scrobbles,"position":position} + return {"artist":artist,"replace":replaceartist,"scrobbles":scrobbles,"position":position} diff --git a/maloja/database/sqldb.py b/maloja/database/sqldb.py index 5a18c4e..0b55fab 100644 --- a/maloja/database/sqldb.py +++ b/maloja/database/sqldb.py @@ -377,7 +377,9 @@ def count_scrobbles_by_artist(since,to): ).select_from(jointable2).where( DB['scrobbles'].c.timestamp<=to, DB['scrobbles'].c.timestamp>=since - ).group_by(sql.func.coalesce(DB['associated_artists'].c.target_artist,DB['trackartists'].c.artist_id)).order_by(sql.desc('count')) + ).group_by( + sql.func.coalesce(DB['associated_artists'].c.target_artist,DB['trackartists'].c.artist_id) + ).order_by(sql.desc('count')) result = conn.execute(op).all() diff --git a/maloja/jinjaenv/context.py b/maloja/jinjaenv/context.py index f39755b..10f1b9e 100644 --- a/maloja/jinjaenv/context.py +++ b/maloja/jinjaenv/context.py @@ -35,6 +35,7 @@ def update_jinja_environment(): # external "urllib": urllib, "math":math, + "print":print, # TODO: remove this # config "ranges": [ ('day','7 days',malojatime.today().next(-6),'day',7), diff --git a/maloja/web/jinja/artist.jinja b/maloja/web/jinja/artist.jinja index a1c92ad..9aa0c4a 100644 --- a/maloja/web/jinja/artist.jinja +++ b/maloja/web/jinja/artist.jinja @@ -1,5 +1,5 @@ {% extends "abstracts/base.jinja" %} -{% block title %}Maloja - {{ artist }}{% endblock %} +{% block title %}Maloja - {{ info.artist }}{% endblock %} {% import 'snippets/links.jinja' as links %} {% import 'partials/awards_artist.jinja' as awards %} @@ -47,7 +47,7 @@ {% endif %} -

{{ artist }}

+

{{ info.artist }}

{% if competes %}#{{ info.position }}{% endif %}
{% if competes and included %} diff --git a/maloja/web/jinja/track.jinja b/maloja/web/jinja/track.jinja index a628464..50abd59 100644 --- a/maloja/web/jinja/track.jinja +++ b/maloja/web/jinja/track.jinja @@ -1,5 +1,5 @@ {% extends "abstracts/base.jinja" %} -{% block title %}Maloja - {{ track.title }}{% endblock %} +{% block title %}Maloja - {{ info.track.title }}{% endblock %} {% import 'snippets/links.jinja' as links %} @@ -42,7 +42,7 @@ {{ links.links(track.artists) }}
-

{{ track.title }}

+

{{ info.track.title }}

{{ awards.certs(track) }} #{{ info.position }}
From eb9d29686b489d769ae2a50355e9d8017f786317 Mon Sep 17 00:00:00 2001 From: krateng Date: Mon, 10 Jan 2022 04:51:58 +0100 Subject: [PATCH 028/182] Ported reasonable changes from the eldritch branch --- maloja/database/sqldb.py | 12 +++--------- maloja/upgrade.py | 4 +++- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/maloja/database/sqldb.py b/maloja/database/sqldb.py index 0b55fab..677635d 100644 --- a/maloja/database/sqldb.py +++ b/maloja/database/sqldb.py @@ -138,7 +138,6 @@ def scrobble_dict_to_db(info): "timestamp":info['time'], "origin":info['origin'], "duration":info['duration'], - "extra":info['extra'], "track_id":get_track_id(info['track']) } @@ -146,7 +145,7 @@ def track_dict_to_db(info): return { "title":info['title'], "title_normalized":normalize_name(info['title']), - "length":info['length'] + "length":info.get('length') } def artist_dict_to_db(info): @@ -175,10 +174,7 @@ def add_scrobbles(scrobbleslist): with engine.begin() as conn: for op in ops: - try: - conn.execute(op) - except: - pass + conn.execute(op) ### these will 'get' the ID of an entity, creating it if necessary @@ -214,9 +210,7 @@ def get_track_id(trackdict): with engine.begin() as conn: op = DB['tracks'].insert().values( - title=trackdict['title'], - title_normalized=ntitle, - length=trackdict['length'] + **track_dict_to_db(trackdict) ) result = conn.execute(op) track_id = result.inserted_primary_key[0] diff --git a/maloja/upgrade.py b/maloja/upgrade.py index 4257db8..7126131 100644 --- a/maloja/upgrade.py +++ b/maloja/upgrade.py @@ -32,8 +32,9 @@ def upgrade_db(callback_add_scrobbles): scrobblefiles = [f for f in os.listdir(oldfolder) if f.endswith(".tsv")] if len(scrobblefiles) > 0: print(col['yellow']("Upgrading v2 Database to v3 Database. This could take a while...")) + idx = 0 for sf in scrobblefiles: - print(f"\tImporting from old tsv scrobble file: {sf}") + idx += 1 if re.match(r"[0-9]+_[0-9]+\.tsv",sf): origin = 'legacy' elif sf == "lastfmimport.tsv": @@ -44,6 +45,7 @@ def upgrade_db(callback_add_scrobbles): from doreah import tsv scrobbles = tsv.parse(os.path.join(oldfolder,sf),"int","string","string","string","string",comments=False) scrobblelist = [] + print(f"\tImporting from {sf} ({idx}/{len(scrobblefiles)}) - {len(scrobbles)} Scrobbles") for scrobble in scrobbles: timestamp, artists, title, album, duration = scrobble if album in ('-',''): album = None From b325fab698f8ced66cefd5d61f8749575b76595d Mon Sep 17 00:00:00 2001 From: krateng Date: Mon, 10 Jan 2022 05:05:54 +0100 Subject: [PATCH 029/182] Fixing and renaming --- maloja/database/sqldb.py | 5 ++++- maloja/server.py | 12 ++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/maloja/database/sqldb.py b/maloja/database/sqldb.py index 677635d..b3817d2 100644 --- a/maloja/database/sqldb.py +++ b/maloja/database/sqldb.py @@ -174,7 +174,10 @@ def add_scrobbles(scrobbleslist): with engine.begin() as conn: for op in ops: - conn.execute(op) + try: + conn.execute(op) + except sql.exc.IntegrityError: + pass ### these will 'get' the ID of an entity, creating it if necessary diff --git a/maloja/server.py b/maloja/server.py index 5140cda..c08b29f 100644 --- a/maloja/server.py +++ b/maloja/server.py @@ -211,7 +211,7 @@ def static(name,ext): ### DYNAMIC -def static_html(name): +def jinja_page(name): if name in aliases: redirect(aliases[name]) keys = remove_identical(FormsDict.decode(request.query)) @@ -241,17 +241,17 @@ def static_html(name): @webserver.route("/") @auth.authenticated -def static_html_private(name): - return static_html(name) +def jinja_page_private(name): + return jinja_page(name) @webserver.route("/") -def static_html_public(name): - return static_html(name) +def jinja_page_public(name): + return jinja_page(name) @webserver.route("") @webserver.route("/") def mainpage(): - return static_html("start") + return jinja_page("start") # Shortlinks From a64d3610d36a03912994e214863ae10072a94adf Mon Sep 17 00:00:00 2001 From: krateng Date: Sun, 13 Feb 2022 06:15:29 +0100 Subject: [PATCH 030/182] Fixed convoluted old scrobbling functionality --- maloja/apis/_base.py | 3 +-- maloja/database/__init__.py | 37 +++++++++++--------------- maloja/thirdparty/__init__.py | 2 +- maloja/web/static/js/manualscrobble.js | 4 +-- 4 files changed, 20 insertions(+), 26 deletions(-) diff --git a/maloja/apis/_base.py b/maloja/apis/_base.py index 9a365d9..e1d7d36 100644 --- a/maloja/apis/_base.py +++ b/maloja/apis/_base.py @@ -95,7 +95,6 @@ class APIHandler: if time is None: time = int(datetime.datetime.now(tz=datetime.timezone.utc).timestamp()) try: (artists,title) = cla.fullclean(artiststr,titlestr) - database.createScrobble(artists,title,time) - database.sync() + database.incoming_scrobble(artists,title,time) except: raise ScrobblingException() diff --git a/maloja/database/__init__.py b/maloja/database/__init__.py index c6934f1..b6a5db5 100644 --- a/maloja/database/__init__.py +++ b/maloja/database/__init__.py @@ -64,11 +64,6 @@ def waitfordb(func): -MEDALS_ARTISTS = {} #literally only changes once per year, no need to calculate that on the fly -MEDALS_TRACKS = {} -WEEKLY_TOPTRACKS = {} -WEEKLY_TOPARTISTS = {} - ISSUES = {} cla = CleanerAgent() @@ -77,10 +72,19 @@ coa = CollectorAgent() -def createScrobble(artists,title,time,album=None,duration=None,volatile=False): +def incoming_scrobble(artists,title,album=None,albumartists=None,duration=None,time=None,fix=True): + if time is None: + time = int(datetime.datetime.now(tz=datetime.timezone.utc).timestamp()) + + log("Incoming scrobble (): ARTISTS: " + str(artists) + ", TRACK: " + title,module="debug") + if fix: + (artists,title) = cla.fullclean(artists,title) if len(artists) == 0 or title == "": - return {} + return {"status":"failure"} + + if albumartists is None: + albumartists = artists scrobbledict = { "time":time, @@ -89,7 +93,7 @@ def createScrobble(artists,title,time,album=None,duration=None,volatile=False): "title":title, "album":{ "name":album, - "artists":None + "artists":albumartists }, "length":None }, @@ -97,9 +101,11 @@ def createScrobble(artists,title,time,album=None,duration=None,volatile=False): "origin":"generic" } - add_scrobble(scrobbledict) + sqldb.add_scrobble(scrobbledict) proxy_scrobble_all(artists,title,time) - return scrobbledict + + return {"status":"success","scrobble":scrobbledict} + @@ -291,18 +297,7 @@ def track_info(track): -def incoming_scrobble(artists,title,album=None,duration=None,time=None,fix=True): - if time is None: - time = int(datetime.datetime.now(tz=datetime.timezone.utc).timestamp()) - log("Incoming scrobble (): ARTISTS: " + str(artists) + ", TRACK: " + title,module="debug") - if fix: - (artists,title) = cla.fullclean(artists,title) - trackdict = createScrobble(artists,title,time,album,duration) - - sync() - - return {"status":"success","track":trackdict} diff --git a/maloja/thirdparty/__init__.py b/maloja/thirdparty/__init__.py index 68e221c..c065c0b 100644 --- a/maloja/thirdparty/__init__.py +++ b/maloja/thirdparty/__init__.py @@ -154,7 +154,7 @@ class ImportInterface(GenericInterface,abstract=True): def import_scrobbles(self): for scrobble in self.get_remote_scrobbles(): - database.createScrobble( + database.incoming_scrobble( artists=scrobble['artists'], title=scrobble['title'], time=scrobble['time'] diff --git a/maloja/web/static/js/manualscrobble.js b/maloja/web/static/js/manualscrobble.js index 4816632..30c3e9f 100644 --- a/maloja/web/static/js/manualscrobble.js +++ b/maloja/web/static/js/manualscrobble.js @@ -83,8 +83,8 @@ function scrobble(artists,title) { function scrobbledone(req) { result = req.response; - txt = result["track"]["title"] + " by " + result["track"]["artists"][0]; - if (result["track"]["artists"].length > 1) { + txt = result["scrobble"]["track"]["title"] + " by " + result["scrobble"]["track"]["artists"][0]; + if (result["scrobble"]["track"]["artists"].length > 1) { txt += " et al."; } document.getElementById("notification").innerHTML = "Scrobbled " + txt + "!"; From 8db87bdbc54a073e0cf38158fdf8eb0babd61147 Mon Sep 17 00:00:00 2001 From: krateng Date: Sun, 13 Feb 2022 07:45:22 +0100 Subject: [PATCH 031/182] Various fixes --- maloja/apis/native_v1.py | 13 ++++++------- maloja/database/__init__.py | 2 +- maloja/malojauri.py | 2 +- maloja/web/jinja/abstracts/base.jinja | 1 + maloja/web/jinja/admin_setup.jinja | 22 ++++++++++++++-------- pyproject.toml | 5 +++-- 6 files changed, 26 insertions(+), 19 deletions(-) diff --git a/maloja/apis/native_v1.py b/maloja/apis/native_v1.py index f0b1d4d..0a4b10f 100644 --- a/maloja/apis/native_v1.py +++ b/maloja/apis/native_v1.py @@ -225,19 +225,18 @@ def post_scrobble(artist:Multi=None,**keys): :param string artists: List of artists. Overwritten by artist parameter. :param string title: Title of the track. :param string album: Name of the album. Optional. + :param string albumartists: Album artists. Optional. :param int duration: Actual listened duration of the scrobble in seconds. Optional. + :param int length: Total length of the track in seconds. Optional. :param int time: UNIX timestamp of the scrobble. Optional, not needed if scrobble is at time of request. + :param boolean nofix: Skip server-side metadata parsing. Optional. """ #artists = "/".join(artist) - artists = artist if artist is not None else keys.get("artists") - title = keys.get("title") - album = keys.get("album") - duration = keys.get("seconds") - time = keys.get("time") - nofix = keys.get("nofix") is not None + keys['artists'] = [artist] if artist is not None else keys.get("artists") + keys['fix'] = keys.get("nofix") is None if time is not None: time = int(time) - return incoming_scrobble(artists,title,album=album,duration=duration,time=time,fix=not nofix) + return incoming_scrobble(**keys) diff --git a/maloja/database/__init__.py b/maloja/database/__init__.py index b6a5db5..0317b2c 100644 --- a/maloja/database/__init__.py +++ b/maloja/database/__init__.py @@ -72,7 +72,7 @@ coa = CollectorAgent() -def incoming_scrobble(artists,title,album=None,albumartists=None,duration=None,time=None,fix=True): +def incoming_scrobble(artists,title,album=None,albumartists=None,duration=None,length=None,time=None,fix=True): if time is None: time = int(datetime.datetime.now(tz=datetime.timezone.utc).timestamp()) diff --git a/maloja/malojauri.py b/maloja/malojauri.py index 8b3f34b..d93caeb 100644 --- a/maloja/malojauri.py +++ b/maloja/malojauri.py @@ -36,7 +36,7 @@ def uri_to_internal(keys,forceTrack=False,forceArtist=False,api=False): limitkeys["timerange"] = get_range_object(since=since,to=to,within=within) #3 - delimitkeys = {"step":"month","stepn":1,"trail":1} + delimitkeys = {"step":"year","stepn":1,"trail":1} if "step" in keys: [delimitkeys["step"],delimitkeys["stepn"]] = (keys["step"].split("-") + [1])[:2] if "stepn" in keys: delimitkeys["stepn"] = keys["stepn"] #overwrite if explicitly given if "stepn" in delimitkeys: delimitkeys["stepn"] = int(delimitkeys["stepn"]) #in both cases, convert it here diff --git a/maloja/web/jinja/abstracts/base.jinja b/maloja/web/jinja/abstracts/base.jinja index 1cab479..8411e93 100644 --- a/maloja/web/jinja/abstracts/base.jinja +++ b/maloja/web/jinja/abstracts/base.jinja @@ -49,6 +49,7 @@