Include import in normal server startup, GH-393

This commit is contained in:
krateng 2025-02-05 18:24:41 +01:00
parent cc64c894f0
commit 7f774f03c4
5 changed files with 42 additions and 43 deletions

View File

@ -90,24 +90,13 @@ When not running in a container, you can run the application with `maloja run`.
### Data
If you would like to import your previous scrobbles, use the command `maloja import *filename*`. This works on:
If you would like to import your previous scrobbles, copy them into the import folder in your data directory. This works on:
* a Last.fm export generated by [ghan64's website](https://lastfm.ghan.nl/export/)
* an official [Spotify data export file](https://www.spotify.com/us/account/privacy/)
* an official [ListenBrainz export file](https://listenbrainz.org/profile/export/)
* the export of another Maloja instance
⚠️ Never import your data while maloja is running. When you need to do import inside docker container start it in shell mode instead and perform import before starting the container as mentioned above.
```console
docker run -it --entrypoint sh -v $PWD/malojadata:/mljdata -e MALOJA_DATA_DIRECTORY=/mljdata krateng/maloja
cd /mljdata
maloja import my_last_fm_export.csv
```
To backup your data, run `maloja backup`, optional with `--include_images`.
### Customization
* Have a look at the [available settings](settings.md) and specifiy your choices in `/etc/maloja/settings.ini`. You can also set each of these settings as an environment variable with the prefix `MALOJA_` (e.g. `MALOJA_SKIP_SETUP`).

View File

View File

@ -941,6 +941,13 @@ def start_db():
from . import associated
associated.load_associated_rules()
# import scrobbles
from ..proccontrol.tasks.import_scrobbles import import_scrobbles #lmao this codebase is so fucked
print(data_dir['import']())
for f in os.listdir(data_dir['import']()):
if f != 'dummy':
import_scrobbles(data_dir['import'](f))
dbstatus['healthy'] = True
# inform time module about begin of scrobbling

View File

@ -298,6 +298,7 @@ data_directories = {
"auth":pthj(dir_settings['state'],"auth"),
"backups":pthj(dir_settings['state'],"backups"),
"images":pthj(dir_settings['state'],"images"),
"import":pthj(dir_settings['state'],"import"),
"scrobbles":pthj(dir_settings['state']),
"rules":pthj(dir_settings['config'],"rules"),
"clients":pthj(dir_settings['config']),

View File

@ -84,7 +84,6 @@ def import_scrobbles(inputf):
print(f"Parsing {col['yellow'](inputf)} as {col['cyan'](typedesc)} export")
print("This could take a while...")
timestamps = set()
scrobblebuffer = []
@ -154,21 +153,22 @@ def parse_spotify_lite_legacy(inputf):
inputf = pth.abspath(inputf)
inputfolder = pth.dirname(inputf)
filenames = re.compile(r'StreamingHistory[0-9]+\.json')
inputfiles = [os.path.join(inputfolder,f) for f in os.listdir(inputfolder) if filenames.match(f)]
#inputfiles = [os.path.join(inputfolder,f) for f in os.listdir(inputfolder) if filenames.match(f)]
inputfiles = [inputf]
if len(inputfiles) == 0:
print("No files found!")
return
#if len(inputfiles) == 0:
# print("No files found!")
# return
if inputfiles != [inputf]:
print("Spotify files should all be imported together to identify duplicates across the whole dataset.")
if not ask("Import " + ", ".join(col['yellow'](pth.basename(i)) for i in inputfiles) + "?",default=True):
inputfiles = [inputf]
print("Only importing", col['yellow'](pth.basename(inputf)))
#if inputfiles != [inputf]:
# print("Spotify files should all be imported together to identify duplicates across the whole dataset.")
# if not ask("Import " + ", ".join(col['yellow'](pth.basename(i)) for i in inputfiles) + "?",default=True):
# inputfiles = [inputf]
# print("Only importing", col['yellow'](pth.basename(inputf)))
for inputf in inputfiles:
print("Importing",col['yellow'](inputf),"...")
#print("Importing",col['yellow'](inputf),"...")
with open(inputf,'r') as inputfd:
data = json.load(inputfd)
@ -207,21 +207,22 @@ def parse_spotify_lite(inputf):
inputf = pth.abspath(inputf)
inputfolder = pth.dirname(inputf)
filenames = re.compile(r'Streaming_History_Audio.+\.json')
inputfiles = [os.path.join(inputfolder,f) for f in os.listdir(inputfolder) if filenames.match(f)]
#inputfiles = [os.path.join(inputfolder,f) for f in os.listdir(inputfolder) if filenames.match(f)]
inputfiles = [inputf]
if len(inputfiles) == 0:
print("No files found!")
return
#if len(inputfiles) == 0:
# print("No files found!")
# return
if inputfiles != [inputf]:
print("Spotify files should all be imported together to identify duplicates across the whole dataset.")
if not ask("Import " + ", ".join(col['yellow'](pth.basename(i)) for i in inputfiles) + "?",default=True):
inputfiles = [inputf]
print("Only importing", col['yellow'](pth.basename(inputf)))
#if inputfiles != [inputf]:
# print("Spotify files should all be imported together to identify duplicates across the whole dataset.")
# if not ask("Import " + ", ".join(col['yellow'](pth.basename(i)) for i in inputfiles) + "?",default=True):
# inputfiles = [inputf]
# print("Only importing", col['yellow'](pth.basename(inputf)))
for inputf in inputfiles:
print("Importing",col['yellow'](inputf),"...")
#print("Importing",col['yellow'](inputf),"...")
with open(inputf,'r') as inputfd:
data = json.load(inputfd)
@ -267,17 +268,18 @@ def parse_spotify(inputf):
inputf = pth.abspath(inputf)
inputfolder = pth.dirname(inputf)
filenames = re.compile(r'endsong_[0-9]+\.json')
inputfiles = [os.path.join(inputfolder,f) for f in os.listdir(inputfolder) if filenames.match(f)]
#inputfiles = [os.path.join(inputfolder,f) for f in os.listdir(inputfolder) if filenames.match(f)]
inputfiles = [inputf]
if len(inputfiles) == 0:
print("No files found!")
return
#if len(inputfiles) == 0:
# print("No files found!")
# return
if inputfiles != [inputf]:
print("Spotify files should all be imported together to identify duplicates across the whole dataset.")
if not ask("Import " + ", ".join(col['yellow'](pth.basename(i)) for i in inputfiles) + "?",default=True):
inputfiles = [inputf]
print("Only importing", col['yellow'](pth.basename(inputf)))
#if inputfiles != [inputf]:
# print("Spotify files should all be imported together to identify duplicates across the whole dataset.")
# if not ask("Import " + ", ".join(col['yellow'](pth.basename(i)) for i in inputfiles) + "?",default=True):
# inputfiles = [inputf]
# print("Only importing", col['yellow'](pth.basename(inputf)))
# we keep timestamps here as well to remove duplicates because spotify's export
# is messy - this is specific to this import type and should not be mixed with
@ -288,7 +290,7 @@ def parse_spotify(inputf):
for inputf in inputfiles:
print("Importing",col['yellow'](inputf),"...")
#print("Importing",col['yellow'](inputf),"...")
with open(inputf,'r') as inputfd:
data = json.load(inputfd)