Merge branch 'master' into next_minor_version

# Conflicts: # maloja/proccontrol/tasks/import_scrobbles.py
2025-07-06 03:39:05 +03:00 · 2023-10-23 12:14:44 +02:00 · 2023-10-23 12:14:44 +02:00 · 9cbdbfc2ad
commit 9cbdbfc2ad
parent 8bb6d9d7ad b7e3d9bb48
2 changed files with 103 additions and 13 deletions
--- a/README.md
+++ b/README.md
@ -81,9 +81,10 @@ Pull the [latest image](https://hub.docker.com/r/krateng/maloja) or check out th

 Of note are these settings which should be passed as environmental variables to the container:

+* `MALOJA_SKIP_SETUP` -- Make the server setup process non-interactive. Maloja will not work properly in a container without this variable set. This is done by default in the provided Containerfile.
+* `MALOJA_FORCE_PASSWORD` -- Set an admin password for Maloja. You only need this on the first run.
 * `MALOJA_DATA_DIRECTORY` -- Set the directory in the container where configuration folders/files should be located
  * Mount a [volume](https://docs.docker.com/engine/reference/builder/#volume) to the specified directory to access these files outside the container (and to make them persistent)
-* `MALOJA_FORCE_PASSWORD` -- Set an admin password for maloja

 You must publish a port on your host machine to bind to the container's web port (default 42010). The container uses IPv4 per default.

--- a/maloja/proccontrol/tasks/import_scrobbles.py
+++ b/maloja/proccontrol/tasks/import_scrobbles.py
@ -33,27 +33,35 @@ def import_scrobbles(inputf):

 	filename = os.path.basename(inputf)

-	if re.match(".*\.csv",filename):
+	if re.match(r".*\.csv",filename):
 		typeid,typedesc = "lastfm","Last.fm"
 		importfunc = parse_lastfm

-	elif re.match("endsong_[0-9]+\.json",filename):
-		typeid,typedesc = "spotify","Spotify"
-		importfunc = parse_spotify_full
-
-	elif re.match("StreamingHistory[0-9]+\.json",filename):
+	elif re.match(r"Streaming_History_Audio.+\.json",filename):
 		typeid,typedesc = "spotify","Spotify"
 		importfunc = parse_spotify_lite

-	elif re.match("maloja_export_[0-9]+\.json",filename):
+	elif re.match(r"endsong_[0-9]+\.json",filename):
+		typeid,typedesc = "spotify","Spotify"
+		importfunc = parse_spotify
+
+	elif re.match(r"StreamingHistory[0-9]+\.json",filename):
+		typeid,typedesc = "spotify","Spotify"
+		importfunc = parse_spotify_lite_legacy
+
+	elif re.match(r"maloja_export_[0-9]+\.json",filename):
 		typeid,typedesc = "maloja","Maloja"
 		importfunc = parse_maloja

 	# username_lb-YYYY-MM-DD.json
-	elif re.match(".*_lb-[0-9-]+\.json",filename):
+	elif re.match(r".*_lb-[0-9-]+\.json",filename):
 		typeid,typedesc = "listenbrainz","ListenBrainz"
 		importfunc = parse_listenbrainz

+	elif re.match(r"\.scrobbler\.log",filename):
+		typeid,typedesc = "rockbox","Rockbox"
+		importfunc = parse_rockbox
+
 	else:
 		print("File",inputf,"could not be identified as a valid import source.")
 		return result
@ -123,7 +131,7 @@ def import_scrobbles(inputf):

 	return result

-def parse_spotify_lite(inputf):
+def parse_spotify_lite_legacy(inputf):
 	pth = os.path
 	inputfolder = pth.relpath(pth.dirname(pth.abspath(inputf)))
 	filenames = re.compile(r'StreamingHistory[0-9]+\.json')
@ -173,7 +181,63 @@ def parse_spotify_lite(inputf):
 		print()


-def parse_spotify_full(inputf):
+def parse_spotify_lite(inputf):
+	pth = os.path
+	inputfolder = pth.relpath(pth.dirname(pth.abspath(inputf)))
+	filenames = re.compile(r'Streaming_History_Audio.+\.json')
+	inputfiles = [os.path.join(inputfolder,f) for f in os.listdir(inputfolder) if filenames.match(f)]
+
+	if len(inputfiles) == 0:
+		print("No files found!")
+		return
+
+	if inputfiles != [inputf]:
+		print("Spotify files should all be imported together to identify duplicates across the whole dataset.")
+		if not ask("Import " + ", ".join(col['yellow'](i) for i in inputfiles) + "?",default=True):
+			inputfiles = [inputf]
+
+	for inputf in inputfiles:
+
+		print("Importing",col['yellow'](inputf),"...")
+		with open(inputf,'r') as inputfd:
+			data = json.load(inputfd)
+
+		for entry in data:
+
+			try:
+				played = int(entry['ms_played'] / 1000)
+				timestamp = int(
+					datetime.datetime.strptime(entry['ts'],"%Y-%m-%dT%H:%M:%SZ").timestamp()
+				)
+				artist = entry['master_metadata_album_artist_name'] # hmmm
+				title = entry['master_metadata_track_name']
+				album = entry['master_metadata_album_album_name']
+				albumartist = entry['master_metadata_album_artist_name']
+
+				if None in [title,artist]:
+					yield ('CONFIDENT_SKIP',None,f"{entry} has relevant fields set to null, skipping...")
+					continue
+
+				if played < 30:
+					yield ('CONFIDENT_SKIP',None,f"{entry} is shorter than 30 seconds, skipping...")
+					continue
+
+				yield ("CONFIDENT_IMPORT",{
+					'track_title':title,
+					'track_artists': artist,
+					'track_length': None,
+					'scrobble_time': timestamp,
+					'scrobble_duration':played,
+					'album_name': album,
+					'album_artist': albumartist
+				},'')
+			except Exception as e:
+				yield ('FAIL',None,f"{entry} could not be parsed. Scrobble not imported. ({repr(e)})")
+				continue
+
+		print()
+
+def parse_spotify(inputf):
 	pth = os.path
 	inputfolder = pth.relpath(pth.dirname(pth.abspath(inputf)))
 	filenames = re.compile(r'endsong_[0-9]+\.json')
@ -308,8 +372,6 @@ def parse_lastfm(inputf):
 					'scrobble_time': int(datetime.datetime.strptime(
 						time + '+0000',
 						"%d %b %Y %H:%M%z"
-						# lastfm exports have time in UTC
-						# some old imports might have the wrong time here!
 					).timestamp()),
 					'scrobble_duration':None
 				},'')
@ -340,6 +402,33 @@ def parse_listenbrainz(inputf):
 			yield ('FAIL',None,f"{entry} could not be parsed. Scrobble not imported. ({repr(e)})")
 			continue

+def parse_rockbox(inputf):
+	with open(inputf,'r') as inputfd:
+		for line in inputfd.readlines():
+			if line == "#TZ/UNKNOWN":
+				use_local_time = True
+			elif line == "#TZ/UTC":
+				use_local_time = False
+			line = line.split("#")[0].split("\n")[0]
+			if line:
+				try:
+					artist,album,track,pos,duration,rate,timestamp,track_id, *_ = line.split("\t") + [None]
+					if rate == 'L':
+						yield ("CONFIDENT_IMPORT",{
+							'track_title':track,
+							'track_artists':artist,
+							'track_length':duration,
+							'album_name':album,
+							'scrobble_time':timestamp,
+							'scrobble_duration': None
+						},'')
+					else:
+						yield ('CONFIDENT_SKIP',None,f"{track} at {timestamp} is marked as skipped.")
+				except Exception as e:
+					yield ('FAIL',None,f"{line} could not be parsed. Scrobble not imported. ({repr(e)})")
+					continue
+
+
 def parse_maloja(inputf):

 	with open(inputf,'r') as inputfd: