Initial commit

2025-07-09 05:01:22 +03:00 · 2018-11-24 16:29:24 +01:00 · 2018-11-24 16:29:24 +01:00 · 7b151d60ef
commit 7b151d60ef
7 changed files with 423 additions and 0 deletions
--- a/cleanup.py
+++ b/cleanup.py
@ -0,0 +1,152 @@
+import re
+
+def cleanup(artiststr):
+
+	if artiststr == "":
+		return []
+
+	artists = [artiststr]
+	
+	artistsnew = []
+	for a in artists:
+		artistsnew.append(re.sub(r"(.*) \(ft. (.*)\)",r"\1",a))
+		artistsnew.append(re.sub(r"(.*) \(ft. (.*)\)",r"\2",a))
+	
+	artists = artistsnew
+	artistsnew = []
+	
+	for a in artists:
+		artistsnew.append(a.split(" vs. "))
+		
+	artists = flatten(artistsnew)
+	artistsnew = []
+	
+	for a in artists:
+		artistsnew.append(a.split(" vs "))
+		
+	artists = flatten(artistsnew)
+	artistsnew = []
+	
+	
+	for a in artists:
+		artistsnew.append(a.split(" ft. "))
+		
+	artists = flatten(artistsnew)
+	artistsnew = []
+	
+	for a in artists:
+		artistsnew.append(a.split(" Ft. "))
+		
+	artists = flatten(artistsnew)
+	artistsnew = []
+	
+	
+	for a in artists:
+		artistsnew.append(a.split(" Feat. "))
+		
+	artists = flatten(artistsnew)
+	artistsnew = []
+	
+	for a in artists:
+		artistsnew.append(a.split(" feat. "))
+		
+	artists = flatten(artistsnew)
+	artistsnew = []
+	
+	
+	for a in artists:
+		artistsnew.append(a.split(" featuring "))
+		
+	artists = flatten(artistsnew)
+	artistsnew = []
+	
+	
+	for a in artists:
+		artistsnew.append(a.split(" Featuring "))
+		
+	artists = flatten(artistsnew)
+	artistsnew = []
+	
+	for a in artists:
+		artistsnew.append(a.split(" ; "))
+		
+	artists = flatten(artistsnew)
+	artistsnew = []
+	
+	for a in artists:
+		artistsnew.append(a.split("; "))
+		
+	artists = flatten(artistsnew)
+	artistsnew = []
+	
+	for a in artists:
+		artistsnew.append(a.split(";"))
+		
+	artists = flatten(artistsnew)
+	artistsnew = []
+	
+	#if not artists[0] == artiststr:
+	#	print(artiststr + " became " + str(artists))
+	
+	return artists
+	
+	
+def cleantitle(title):
+	title = title.replace("[","(").replace("]",")")
+	
+	title = re.sub(r" \(as made famous by .*?\)","",title)
+	title = re.sub(r" \(originally by .*?\)","",title)
+	
+	return title
+
+def findartistsintitle(title):
+	
+	truetitle = title
+	artists = ""
+	
+	newtitle = re.sub(r"(.*) \(ft. (.*?)\)",r"\1",title)
+	if (title != newtitle):
+		artists = re.sub(r"(.*) \(ft. (.*?)\).*",r"\2",title)
+		truetitle = newtitle
+	
+	newtitle = re.sub(r"(.*) \(feat. (.*?)\)",r"\1",title)
+	if (title != newtitle):
+		artists = re.sub(r"(.*) \(feat. (.*?)\).*",r"\2",title)
+		truetitle = newtitle
+	
+	newtitle = re.sub(r"(.*) \(Feat. (.*?)\)",r"\1",title)
+	if (title != newtitle):
+		artists = re.sub(r"(.*) \(Feat. (.*?)\).*",r"\2",title)
+		truetitle = newtitle
+		
+	newtitle = re.sub(r"(.*) \(Ft. (.*?)\)",r"\1",title)
+	if (title != newtitle):
+		artists = re.sub(r"(.*) \(Ft. (.*?)\).*",r"\2",title)
+		truetitle = newtitle
+		
+	newtitle = re.sub(r"(.*) \(Featuring (.*?)\)",r"\1",title)
+	if (title != newtitle):
+		artists = re.sub(r"(.*) \(Featuring. (.*?)\).*",r"\2",title)
+		truetitle = newtitle
+		
+	newtitle = re.sub(r"(.*) \(featuring (.*?)\)",r"\1",title)
+	if (title != newtitle):
+		artists = re.sub(r"(.*) \(featuring (.*?)\).*",r"\2",title)
+		truetitle = newtitle
+		
+	
+	artistlist = cleanup(artists)
+	
+	return (truetitle,artistlist)
+	
+def flatten(lis):
+
+	newlist = []
+	
+	for l in lis:
+		if isinstance(l, str):
+			newlist.append(l)
+		else:
+			newlist = newlist + l
+			
+	return list(set(newlist))
--- a/database.py
+++ b/database.py
@ -0,0 +1,186 @@
+from bottle import route, run, template, static_file, request, response
+from importlib.machinery import SourceFileLoader
+import waitress
+import os
+import datetime
+
+DATABASE = []
+
+ARTISTS = []
+TRACKS = []
+
+
+@route("/scrobbles")
+def get_scrobbles():
+	keys = request.query
+	r = db_query(artist=keys.get("artist"))
+	#print(r)
+	response.content_type = "application/json"
+	return {"object":r} ##json can't be a list apparently???
+
+	#r = db_query(artist=keys.get("artist"))
+	#text = ""
+	#for e in r:
+	#	entry = ""
+	#	for a in e["artists"]:
+	#		entry += a + "/"
+	#	entry += "	" + e["title"] + "\n"
+	#	text += entry
+	#return text
+
+@route("/tracks")
+def get_tracks():
+	artist = request.query.get("artist")
+	
+	ls = [t for t in TRACKS if (artist in t["artists"])]
+	return {"object":ls}
+
+# Starts the server
+def runserver(DATABASE_PORT):
+	
+	reload()
+	buildh()
+
+	run(host='0.0.0.0', port=DATABASE_PORT, server='waitress')
+	
+
+# builds database of artists and tracks
+# UNUSED as it is very resource-heavy, use buildh() instead
+def build():
+	global ARTISTS
+	global TRACKS
+	
+	artistlist = []
+	tracklist = []
+	for t in DATABASE:
+		for a in t["artists"]:
+			if a in artistlist:
+				continue
+			artistlist.append(a)
+		
+		# first check if the title exists at all to quickly rule out most titles	
+		if (t["title"] in [tr["title"] for tr in tracklist]):
+			#only it same title actually exists do we need to check if the song is the same
+			
+			
+			if not (set(t["artists"]) in [set(tr["artists"]) for tr in tracklist if tr["title"] == t["title"]]): #wut
+				tracklist.append({"artists":t["artists"],"title":t["title"]})
+			
+			### ALRIGHT
+			#foundexisting = False
+			#for track in [tr for tr in tracklist if tr["title"] == t["title"]]: #wtf did I just write
+			#	#print("Check duplicate: " + str(track) + " AND " + str(t))
+			#	if (set(track["artists"]) == set(t["artists"])):
+			#		foundexisting = True
+			#		#print("MATCH!")
+			#		break
+			#	#else:
+			#		#print("NO MATCH!")
+					
+			#if not foundexisting:
+			#	tracklist.append({"artists":t["artists"],"title":t["title"]})
+		else:
+			tracklist.append({"artists":t["artists"],"title":t["title"]})
+		
+		
+	ARTISTS = artistlist
+	TRACKS = tracklist
+
+
+# builds database of artists and tracks
+# uses better data types to quickly find all unique tracks
+def buildh():
+	global ARTISTS
+	global TRACKS
+	
+	artistset = set()
+	trackset = set()
+	for t in DATABASE:
+		for a in t["artists"]:
+			if a not in artistset:
+				artistset.add(a)
+		
+		# we list the tracks as tupels of frozenset(artists) and track
+		# this way they're hashable and easily comparable, but we need to change them back after we have the list		
+		if ((frozenset(t["artists"]),t["title"])) not in trackset:
+			trackset.add((frozenset(t["artists"]),t["title"]))
+			
+	print("Done, now converting back!")
+	
+	ARTISTS = list(artistset)
+	TRACKS = [{"artists":list(a[0]),"title":a[1]} for a in trackset]
+
+# Rebuilds the database from disk, keeps cached entries	
+def reload():
+	newdb = [t for t in DATABASE if not t["saved"]]
+	
+	for f in os.listdir("logs/"):
+		#print(f)
+		
+		if not (".csv" in f):
+			continue
+		
+		logfile = open("logs/" + f)
+		for l in logfile:
+			
+			l = l.replace("\n","")
+			data = l.split(",")
+			#print(l)
+			
+			artists = data[1].split("/")
+			#album = data[3]
+			title = data[2]
+			time = int(data[0])
+			
+			DATABASE.append({"artists":artists,"title":title,"time":time,"saved":True})
+
+# Saves all cached entries to disk			
+def flush():
+	for t in DATABASE:
+		if not t["saved"]:
+		
+			artistss = "/".join(t["artists"])
+			timestamp = datetime.date.fromtimestamp(t["time"])
+			
+			entry = ",".join([str(t["time"]),artistss,t["title"]])
+		
+			monthfile = open("logs/" + str(timestamp.year) + "_" + str(timestamp.month) + ".csv","a")
+			monthfile.write(entry)
+			monthfile.write("\n")
+			monthfile.close()
+			
+			t["saved"] = True
+			
+
+# Queries the database			
+def db_query(artist=None,title=None,since=0,to=9999999999):
+	if isinstance(since, str):
+		sdate = [int(x) for x in since.split("/")]
+		date = [1970,1,1,0,0]
+		date[:len(sdate)] = sdate
+		since = int(datetime.datetime(date[0],date[1],date[2],date[3],date[4],tzinfo=datetime.timezone.utc).timestamp())
+	if isinstance(to, str):
+		sdate = [int(x) for x in to.split("/")]
+		date = [1970,1,1,0,0]
+		date[:len(sdate)] = sdate
+		to = int(datetime.datetime(date[0],date[1],date[2],date[3],date[4],tzinfo=datetime.timezone.utc).timestamp())
+		
+	thingsweneed = ["artists","title","time"]
+	return [{key:t[key] for key in thingsweneed} for t in DATABASE if (artist in t["artists"] or artist==None) and (t["title"]==title or title==None) and (since < t["time"] < to)]
+	
+# Search for strings
+def db_search(query,type=None):
+	if type=="ARTIST":
+		results = []
+		for a in ARTISTS:
+			if query.lower() in a.lower():
+				results.append(a)
+	
+	if type=="TRACK":
+		results = []
+		for t in TRACKS:
+			if query.lower() in t[1].lower():
+				results.append(t)
+	
+	return results
+			
--- a/lastfmconverter.py
+++ b/lastfmconverter.py
@ -0,0 +1,39 @@
+import sys, os, datetime, re, cleanup
+
+log = open(sys.argv[1])
+
+outputlog = open(sys.argv[2],"a")
+
+for l in log:
+	l = l.replace("\n","")
+	data = l.split(",")
+	
+	artist = data[0]
+	album = data[1]
+	title = data[2]
+	time = data[3]
+	
+	title = cleanup.cleantitle(title)
+	artists = cleanup.cleanup(artist)
+	(title,extraartists) = cleanup.findartistsintitle(title)
+	artists = list(set(artists + extraartists))
+	
+	artistsstr = "/".join(artists)
+	
+	
+	timeparts = time.split(" ")
+	(h,m) = timeparts[3].split(":")
+	
+	months = {"Jan":1,"Feb":2,"Mar":3,"Apr":4,"May":5,"Jun":6,"Jul":7,"Aug":8,"Sep":9,"Oct":10,"Nov":11,"Dec":12}
+	
+	timestamp = int(datetime.datetime(int(timeparts[2]),months[timeparts[1]],int(timeparts[0]),int(h),int(m)).timestamp())
+	
+	entry = ",".join([str(timestamp),artistsstr,title,album])
+	
+	
+	outputlog.write(entry)
+	outputlog.write("\n")
+	
+	
+
+
--- a/logs/.gitignore
+++ b/logs/.gitignore
@ -0,0 +1 @@
+*.csv
--- a/logs/dummy
+++ b/logs/dummy
--- a/rules/dummy
+++ b/rules/dummy
--- a/server.py
+++ b/server.py
@ -0,0 +1,45 @@
+from bottle import route, run, template, static_file, request
+#import os
+from importlib.machinery import SourceFileLoader
+#from serverutil import log, db_remove, createVideoFile
+import _thread
+import waitress
+
+
+MAIN_PORT = 12345
+DATABASE_PORT = 12349
+
+#@route("/<pth:path>/<file:re:.*\\.html>")
+#@route("/<pth:path>/<file:re:.*\\.css>")
+#@route("/<pth:path>/<file:re:.*\\.js>")
+#@route("/<pth:path>/<file:re:.*\\.jpg>")
+#@route("/<pth:path>/<file:re:.*\\.png>")
+#@route("/<pth:path>/<file:re:.*\\.mp4>")
+#@route("/<pth:path>/<file:re:.*\\.mkv>")
+@route("/<pth:path>")
+def static(pth):
+	
+	return static_file(pth,root="")
+
+
+@route("")
+@route("/")
+def mainpage():
+	keys = request.query
+	
+	return SourceFileLoader("mainpage","mainpage.py").load_module().GET(keys)
+	
+@route("/xhttp")
+def xhttp():
+	keys = request.query
+	
+	return SourceFileLoader("download","download.py").load_module().GET(keys)
+
+
+
+## other programs to always run with the server
+#_thread.start_new_thread(SourceFileLoader("downloader","downloader.py").load_module().loop,())
+_thread.start_new_thread(SourceFileLoader("database","database.py").load_module().runserver,(DATABASE_PORT,))
+
+print("wat")
+run(host='0.0.0.0', port=MAIN_PORT, server='waitress')