From 61d3015443747f96854e17a1279a28b5ce9a001f Mon Sep 17 00:00:00 2001 From: krateng Date: Sat, 1 Apr 2023 14:21:12 +0200 Subject: [PATCH 1/4] Made image fetching asynchronous to incoming image requests --- maloja/images.py | 222 ++++++++++++++++++++++++++--------------------- maloja/server.py | 16 ++-- 2 files changed, 130 insertions(+), 108 deletions(-) diff --git a/maloja/images.py b/maloja/images.py index e6ffec7..639e464 100644 --- a/maloja/images.py +++ b/maloja/images.py @@ -12,7 +12,8 @@ import base64 import requests import datauri import io -from threading import Thread, Timer, BoundedSemaphore +from threading import Lock +from concurrent.futures import ThreadPoolExecutor import re import datetime @@ -25,6 +26,8 @@ DB = {} engine = sql.create_engine(f"sqlite:///{data_dir['cache']('images.sqlite')}", echo = False) meta = sql.MetaData() +dblock = Lock() + DB['artists'] = sql.Table( 'artists', meta, sql.Column('id',sql.Integer,primary_key=True), @@ -49,8 +52,18 @@ DB['albums'] = sql.Table( meta.create_all(engine) -def get_image_from_cache(id,table): +def get_image_from_cache(track_id=None,artist_id=None,album_id=None): now = int(datetime.datetime.now().timestamp()) + if track_id: + table = 'tracks' + id = track_id + elif album_id: + table = 'albums' + id = album_id + elif artist_id: + table = 'artists' + id = artist_id + with engine.begin() as conn: op = DB[table].select().where( DB[table].c.id==id, @@ -66,29 +79,31 @@ def get_image_from_cache(id,table): def set_image_in_cache(id,table,url): remove_image_from_cache(id,table) - now = int(datetime.datetime.now().timestamp()) - if url is None: - expire = now + (malojaconfig["CACHE_EXPIRE_NEGATIVE"] * 24 * 3600) - else: - expire = now + (malojaconfig["CACHE_EXPIRE_POSITIVE"] * 24 * 3600) + with dblock: + now = int(datetime.datetime.now().timestamp()) + if url is None: + expire = now + (malojaconfig["CACHE_EXPIRE_NEGATIVE"] * 24 * 3600) + else: + expire = now + (malojaconfig["CACHE_EXPIRE_POSITIVE"] * 24 * 3600) - raw = dl_image(url) + raw = dl_image(url) - with engine.begin() as conn: - op = DB[table].insert().values( - id=id, - url=url, - expire=expire, - raw=raw - ) - result = conn.execute(op) + with engine.begin() as conn: + op = DB[table].insert().values( + id=id, + url=url, + expire=expire, + raw=raw + ) + result = conn.execute(op) def remove_image_from_cache(id,table): - with engine.begin() as conn: - op = DB[table].delete().where( - DB[table].c.id==id, - ) - result = conn.execute(op) + with dblock: + with engine.begin() as conn: + op = DB[table].delete().where( + DB[table].c.id==id, + ) + result = conn.execute(op) def dl_image(url): if not malojaconfig["PROXY_IMAGES"]: return None @@ -107,122 +122,131 @@ def dl_image(url): + +resolver = ThreadPoolExecutor(max_workers=5) + ### getting images for any website embedding now ALWAYS returns just the generic link ### even if we have already cached it, we will handle that on request def get_track_image(track=None,track_id=None): if track_id is None: track_id = database.sqldb.get_track_id(track,create_new=False) - return f"/image?type=track&id={track_id}" + if malojaconfig["USE_ALBUM_ARTWORK_FOR_TRACKS"]: + if track is None: + track = database.sqldb.get_track(track_id) + if track.get("album"): + album_id = database.sqldb.get_album_id(track["album"]) + return get_album_image(album_id=album_id) + resolver.submit(resolve_image,track_id=track_id) + + return f"/image?track_id={track_id}" def get_artist_image(artist=None,artist_id=None): if artist_id is None: artist_id = database.sqldb.get_artist_id(artist,create_new=False) - return f"/image?type=artist&id={artist_id}" + resolver.submit(resolve_image,artist_id=artist_id) + + return f"/image?artist_id={artist_id}" def get_album_image(album=None,album_id=None): if album_id is None: album_id = database.sqldb.get_album_id(album,create_new=False) - return f"/image?type=album&id={album_id}" + resolver.submit(resolve_image,album_id=album_id) + + return f"/image?album_id={album_id}" -resolve_semaphore = BoundedSemaphore(8) +# this is to keep track of what is currently being resolved +# so new requests know that they don't need to queue another resolve +image_resolve_controller_lock = Lock() +image_resolve_controller = { + 'artists':set(), + 'albums':set(), + 'tracks':set() +} +# this function doesn't need to return any info +# it runs async to do all the work that takes time and only needs to write the result +# to the cache so the synchronous functions (http requests) can access it +def resolve_image(artist_id=None,track_id=None,album_id=None): + result = get_image_from_cache(artist_id=artist_id,track_id=track_id,album_id=album_id) + if result is not None: + # No need to do anything + return -def resolve_track_image(track_id): + if artist_id: + entitytype = 'artist' + table = 'artists' + getfunc, entity_id = database.sqldb.get_artist, artist_id + elif track_id: + entitytype = 'track' + table = 'tracks' + getfunc, entity_id = database.sqldb.get_track, track_id + elif album_id: + entitytype = 'album' + table = 'albums' + getfunc, entity_id = database.sqldb.get_album, album_id - if malojaconfig["USE_ALBUM_ARTWORK_FOR_TRACKS"]: - track = database.sqldb.get_track(track_id) - if "album" in track: - album_id = database.sqldb.get_album_id(track["album"]) - albumart = resolve_album_image(album_id) - if albumart: - return albumart + # is another thread already working on this? + with image_resolve_controller_lock: + if entity_id in image_resolve_controller[table]: + return + else: + image_resolve_controller[table].add(entity_id) - with resolve_semaphore: - # check cache - result = get_image_from_cache(track_id,'tracks') - if result is not None: - return result - - track = database.sqldb.get_track(track_id) + try: + entity = getfunc(entity_id) # local image if malojaconfig["USE_LOCAL_IMAGES"]: - images = local_files(track=track) + images = local_files(**{entitytype: entity}) if len(images) != 0: result = random.choice(images) result = urllib.parse.quote(result) result = {'type':'url','value':result} - set_image_in_cache(track_id,'tracks',result['value']) + set_image_in_cache(artist_id or track_id or album_id,table,result['value']) return result # third party - result = thirdparty.get_image_track_all((track['artists'],track['title'])) + if artist_id: + result = thirdparty.get_image_artist_all(entity) + elif track_id: + result = thirdparty.get_image_track_all((entity['artists'],entity['title'])) + elif album_id: + result = thirdparty.get_image_album_all((entity['artists'],entity['albumtitle'])) + result = {'type':'url','value':result} - set_image_in_cache(track_id,'tracks',result['value']) + set_image_in_cache(artist_id or track_id or album_id,table,result['value']) + finally: + with image_resolve_controller_lock: + image_resolve_controller[table].remove(entity_id) + + +# the actual http request for the full image +def image_request(artist_id=None,track_id=None,album_id=None): + # check cache + result = get_image_from_cache(artist_id=artist_id,track_id=track_id,album_id=album_id) + if result is not None: + # we got an entry, even if it's that there is no image (value None) + if result['value'] is None: + # use placeholder + placeholder_url = "https://generative-placeholders.glitch.me/image?width=300&height=300&style=" + if artist_id: + result['value'] = placeholder_url + f"123&colors={artist_id % 100}" + if track_id: + result['value'] = placeholder_url + f"triangles&colors={track_id % 100}" + if album_id: + result['value'] = placeholder_url + f"joy-division&colors={album_id % 100}" return result + else: + # no entry, which means we're still working on it + return {'type':'noimage','value':'wait'} -def resolve_artist_image(artist_id): - - with resolve_semaphore: - # check cache - result = get_image_from_cache(artist_id,'artists') - if result is not None: - return result - - artist = database.sqldb.get_artist(artist_id) - - # local image - if malojaconfig["USE_LOCAL_IMAGES"]: - images = local_files(artist=artist) - if len(images) != 0: - result = random.choice(images) - result = urllib.parse.quote(result) - result = {'type':'url','value':result} - set_image_in_cache(artist_id,'artists',result['value']) - return result - - # third party - result = thirdparty.get_image_artist_all(artist) - result = {'type':'url','value':result} - set_image_in_cache(artist_id,'artists',result['value']) - - return result - - -def resolve_album_image(album_id): - - with resolve_semaphore: - # check cache - result = get_image_from_cache(album_id,'albums') - if result is not None: - return result - - album = database.sqldb.get_album(album_id) - - # local image - if malojaconfig["USE_LOCAL_IMAGES"]: - images = local_files(album=album) - if len(images) != 0: - result = random.choice(images) - result = urllib.parse.quote(result) - result = {'type':'url','value':result} - set_image_in_cache(album_id,'tracks',result['value']) - return result - - # third party - result = thirdparty.get_image_album_all((album['artists'],album['albumtitle'])) - result = {'type':'url','value':result} - set_image_in_cache(album_id,'albums',result['value']) - - return result - # removes emojis and weird shit from names def clean(name): diff --git a/maloja/server.py b/maloja/server.py index a071c98..a92c1e9 100644 --- a/maloja/server.py +++ b/maloja/server.py @@ -19,7 +19,7 @@ from doreah import auth # rest of the project from . import database from .database.jinjaview import JinjaDBConnection -from .images import resolve_track_image, resolve_artist_image, resolve_album_image +from .images import image_request from .malojauri import uri_to_internal, remove_identical from .pkg_global.conf import malojaconfig, data_dir from .pkg_global import conf @@ -121,15 +121,13 @@ def deprecated_api(pth): @webserver.route("/image") def dynamic_image(): keys = FormsDict.decode(request.query) - if keys['type'] == 'track': - result = resolve_track_image(keys['id']) - elif keys['type'] == 'artist': - result = resolve_artist_image(keys['id']) - elif keys['type'] == 'album': - result = resolve_album_image(keys['id']) + result = image_request(**{k:int(keys[k]) for k in keys}) - if result is None or result['value'] in [None,'']: - return "" + if result['type'] == 'noimage' and result['value'] == 'wait': + # still being worked on + response.status = 503 + response.set_header('Retry-After',5) + return if result['type'] == 'raw': # data uris are directly served as image because a redirect to a data uri # doesnt work From 31661c41411e13a071c32b261b0cb3744455b8ef Mon Sep 17 00:00:00 2001 From: krateng Date: Sat, 1 Apr 2023 15:28:33 +0200 Subject: [PATCH 2/4] Improved image proxying --- maloja/data_files/cache/images/dummy | 0 maloja/images.py | 67 ++++++++++++++++++---------- maloja/server.py | 11 ++--- 3 files changed, 48 insertions(+), 30 deletions(-) create mode 100644 maloja/data_files/cache/images/dummy diff --git a/maloja/data_files/cache/images/dummy b/maloja/data_files/cache/images/dummy new file mode 100644 index 0000000..e69de29 diff --git a/maloja/images.py b/maloja/images.py index 639e464..19eb6b3 100644 --- a/maloja/images.py +++ b/maloja/images.py @@ -21,9 +21,14 @@ import sqlalchemy as sql +# remove old db file (columns missing) +try: + os.remove(data_dir['cache']('images.sqlite')) +except: + pass DB = {} -engine = sql.create_engine(f"sqlite:///{data_dir['cache']('images.sqlite')}", echo = False) +engine = sql.create_engine(f"sqlite:///{data_dir['cache']('imagecache.sqlite')}", echo = False) meta = sql.MetaData() dblock = Lock() @@ -33,51 +38,61 @@ DB['artists'] = sql.Table( sql.Column('id',sql.Integer,primary_key=True), sql.Column('url',sql.String), sql.Column('expire',sql.Integer), - sql.Column('raw',sql.String) +# sql.Column('raw',sql.String) + sql.Column('local',sql.Boolean), + sql.Column('localproxyurl',sql.String) ) DB['tracks'] = sql.Table( 'tracks', meta, sql.Column('id',sql.Integer,primary_key=True), sql.Column('url',sql.String), sql.Column('expire',sql.Integer), - sql.Column('raw',sql.String) +# sql.Column('raw',sql.String) + sql.Column('local',sql.Boolean), + sql.Column('localproxyurl',sql.String) ) DB['albums'] = sql.Table( 'albums', meta, sql.Column('id',sql.Integer,primary_key=True), sql.Column('url',sql.String), sql.Column('expire',sql.Integer), - sql.Column('raw',sql.String) +# sql.Column('raw',sql.String) + sql.Column('local',sql.Boolean), + sql.Column('localproxyurl',sql.String) ) meta.create_all(engine) + + def get_image_from_cache(track_id=None,artist_id=None,album_id=None): now = int(datetime.datetime.now().timestamp()) if track_id: table = 'tracks' - id = track_id + entity_id = track_id elif album_id: table = 'albums' - id = album_id + entity_id = album_id elif artist_id: table = 'artists' - id = artist_id + entity_id = artist_id with engine.begin() as conn: op = DB[table].select().where( - DB[table].c.id==id, + DB[table].c.id==entity_id, DB[table].c.expire>now ) result = conn.execute(op).all() for row in result: - if row.raw is not None: - return {'type':'raw','value':row.raw} + if row.local: + return {'type':'localurl','value':row.url} + elif row.localproxyurl: + return {'type':'localurl','value':row.localproxyurl} else: return {'type':'url','value':row.url} # returns None as value if nonexistence cached return None # no cache entry -def set_image_in_cache(id,table,url): +def set_image_in_cache(id,table,url,local=False): remove_image_from_cache(id,table) with dblock: now = int(datetime.datetime.now().timestamp()) @@ -86,14 +101,18 @@ def set_image_in_cache(id,table,url): else: expire = now + (malojaconfig["CACHE_EXPIRE_POSITIVE"] * 24 * 3600) - raw = dl_image(url) + if not local and malojaconfig["PROXY_IMAGES"] and url is not None: + localproxyurl = dl_image(url) + else: + localproxyurl = None with engine.begin() as conn: op = DB[table].insert().values( id=id, url=url, expire=expire, - raw=raw + local=local, + localproxyurl=localproxyurl ) result = conn.execute(op) @@ -105,17 +124,19 @@ def remove_image_from_cache(id,table): ) result = conn.execute(op) + # TODO delete proxy + def dl_image(url): - if not malojaconfig["PROXY_IMAGES"]: return None - if url is None: return None - if url.startswith("/"): return None #local image try: r = requests.get(url) mime = r.headers.get('content-type') or 'image/jpg' data = io.BytesIO(r.content).read() - uri = datauri.DataURI.make(mime,charset='ascii',base64=True,data=data) - log(f"Downloaded {url} for local caching") - return uri + #uri = datauri.DataURI.make(mime,charset='ascii',base64=True,data=data) + targetname = '%030x' % random.getrandbits(128) + targetpath = data_dir['cache']('images',targetname) + with open(targetpath,'wb') as fd: + fd.write(data) + return os.path.join("cacheimages",targetname) except Exception: log(f"Image {url} could not be downloaded for local caching") return None @@ -206,8 +227,8 @@ def resolve_image(artist_id=None,track_id=None,album_id=None): if len(images) != 0: result = random.choice(images) result = urllib.parse.quote(result) - result = {'type':'url','value':result} - set_image_in_cache(artist_id or track_id or album_id,table,result['value']) + result = {'type':'localurl','value':result} + set_image_in_cache(artist_id or track_id or album_id,table,result['value'],local=True) return result # third party @@ -236,7 +257,7 @@ def image_request(artist_id=None,track_id=None,album_id=None): # use placeholder placeholder_url = "https://generative-placeholders.glitch.me/image?width=300&height=300&style=" if artist_id: - result['value'] = placeholder_url + f"123&colors={artist_id % 100}" + result['value'] = placeholder_url + f"tiles&colors={artist_id % 100}" if track_id: result['value'] = placeholder_url + f"triangles&colors={track_id % 100}" if album_id: @@ -377,6 +398,6 @@ def set_image(b64,**keys): log("Saved image as " + data_dir['images'](folder,filename),module="debug") # set as current picture in rotation - set_image_in_cache(id,dbtable,os.path.join("/images",folder,filename)) + set_image_in_cache(id,dbtable,os.path.join("/images",folder,filename),local=True) return os.path.join("/images",folder,filename) diff --git a/maloja/server.py b/maloja/server.py index a92c1e9..540f65c 100644 --- a/maloja/server.py +++ b/maloja/server.py @@ -128,13 +128,7 @@ def dynamic_image(): response.status = 503 response.set_header('Retry-After',5) return - if result['type'] == 'raw': - # data uris are directly served as image because a redirect to a data uri - # doesnt work - duri = datauri.DataURI(result['value']) - response.content_type = duri.mimetype - return duri.data - if result['type'] == 'url': + if result['type'] in ('url','localurl'): redirect(result['value'],307) @webserver.route("/images/") @@ -161,6 +155,9 @@ def static_image(pth): resp.set_header("Content-Type", "image/" + ext) return resp +@webserver.route("/cacheimages/") +def static_proxied_image(uuid): + return static_file(uuid,root=data_dir['cache']('images')) @webserver.route("/login") def login(): From 3b286bd7f288a6e3ef4be391d33d1bdb36b8ae51 Mon Sep 17 00:00:00 2001 From: krateng Date: Sat, 1 Apr 2023 15:31:18 +0200 Subject: [PATCH 3/4] Small fix to entity main image display --- maloja/web/jinja/album.jinja | 4 ++-- maloja/web/jinja/artist.jinja | 4 ++-- maloja/web/jinja/track.jinja | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/maloja/web/jinja/album.jinja b/maloja/web/jinja/album.jinja index 6a16da3..8dafdaf 100644 --- a/maloja/web/jinja/album.jinja +++ b/maloja/web/jinja/album.jinja @@ -45,11 +45,11 @@ {% if adminmode %}
{% else %} -
+
{% endif %} diff --git a/maloja/web/jinja/artist.jinja b/maloja/web/jinja/artist.jinja index 29d8879..8249b99 100644 --- a/maloja/web/jinja/artist.jinja +++ b/maloja/web/jinja/artist.jinja @@ -53,11 +53,11 @@ {% if adminmode %}
{% else %} -
+
{% endif %} diff --git a/maloja/web/jinja/track.jinja b/maloja/web/jinja/track.jinja index 3fbc909..982097e 100644 --- a/maloja/web/jinja/track.jinja +++ b/maloja/web/jinja/track.jinja @@ -50,11 +50,11 @@ {% if adminmode %}
{% else %} -
+
{% endif %} From 31aaf23d808bc5adc845f4fb804eff9fa26e6476 Mon Sep 17 00:00:00 2001 From: krateng Date: Sat, 1 Apr 2023 15:46:15 +0200 Subject: [PATCH 4/4] Refactored images a bit --- maloja/images.py | 48 ++++++++++++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/maloja/images.py b/maloja/images.py index 19eb6b3..151c0cd 100644 --- a/maloja/images.py +++ b/maloja/images.py @@ -21,6 +21,9 @@ import sqlalchemy as sql +MAX_RESOLVE_THREADS = 10 + + # remove old db file (columns missing) try: os.remove(data_dir['cache']('images.sqlite')) @@ -63,19 +66,17 @@ DB['albums'] = sql.Table( meta.create_all(engine) - +def get_id_and_table(track_id=None,artist_id=None,album_id=None): + if track_id: + return track_id,'tracks' + elif album_id: + return album_id,'albums' + elif artist_id: + return artist_id,'artists' def get_image_from_cache(track_id=None,artist_id=None,album_id=None): now = int(datetime.datetime.now().timestamp()) - if track_id: - table = 'tracks' - entity_id = track_id - elif album_id: - table = 'albums' - entity_id = album_id - elif artist_id: - table = 'artists' - entity_id = artist_id + entity_id, table = get_id_and_table(track_id=track_id,artist_id=artist_id,album_id=album_id) with engine.begin() as conn: op = DB[table].select().where( @@ -92,8 +93,10 @@ def get_image_from_cache(track_id=None,artist_id=None,album_id=None): return {'type':'url','value':row.url} # returns None as value if nonexistence cached return None # no cache entry -def set_image_in_cache(id,table,url,local=False): - remove_image_from_cache(id,table) +def set_image_in_cache(url,track_id=None,artist_id=None,album_id=None,local=False): + remove_image_from_cache(track_id=track_id,artist_id=artist_id,album_id=album_id) + entity_id, table = get_id_and_table(track_id=track_id,artist_id=artist_id,album_id=album_id) + with dblock: now = int(datetime.datetime.now().timestamp()) if url is None: @@ -108,7 +111,7 @@ def set_image_in_cache(id,table,url,local=False): with engine.begin() as conn: op = DB[table].insert().values( - id=id, + id=entity_id, url=url, expire=expire, local=local, @@ -116,11 +119,13 @@ def set_image_in_cache(id,table,url,local=False): ) result = conn.execute(op) -def remove_image_from_cache(id,table): +def remove_image_from_cache(track_id=None,artist_id=None,album_id=None): + entity_id, table = get_id_and_table(track_id=track_id,artist_id=artist_id,album_id=album_id) + with dblock: with engine.begin() as conn: op = DB[table].delete().where( - DB[table].c.id==id, + DB[table].c.id==entity_id, ) result = conn.execute(op) @@ -136,7 +141,7 @@ def dl_image(url): targetpath = data_dir['cache']('images',targetname) with open(targetpath,'wb') as fd: fd.write(data) - return os.path.join("cacheimages",targetname) + return os.path.join("/cacheimages",targetname) except Exception: log(f"Image {url} could not be downloaded for local caching") return None @@ -144,7 +149,7 @@ def dl_image(url): -resolver = ThreadPoolExecutor(max_workers=5) +resolver = ThreadPoolExecutor(max_workers=MAX_RESOLVE_THREADS) ### getting images for any website embedding now ALWAYS returns just the generic link ### even if we have already cached it, we will handle that on request @@ -228,7 +233,7 @@ def resolve_image(artist_id=None,track_id=None,album_id=None): result = random.choice(images) result = urllib.parse.quote(result) result = {'type':'localurl','value':result} - set_image_in_cache(artist_id or track_id or album_id,table,result['value'],local=True) + set_image_in_cache(artist_id=artist_id,track_id=track_id,album_id=album_id,url=result['value'],local=True) return result # third party @@ -240,7 +245,7 @@ def resolve_image(artist_id=None,track_id=None,album_id=None): result = thirdparty.get_image_album_all((entity['artists'],entity['albumtitle'])) result = {'type':'url','value':result} - set_image_in_cache(artist_id or track_id or album_id,table,result['value']) + set_image_in_cache(artist_id=artist_id,track_id=track_id,album_id=album_id,url=result['value']) finally: with image_resolve_controller_lock: image_resolve_controller[table].remove(entity_id) @@ -364,14 +369,17 @@ def set_image(b64,**keys): if "title" in keys: entity = {"track":keys} id = database.sqldb.get_track_id(entity['track']) + idkeys = {'track_id':id} dbtable = "tracks" elif "albumtitle" in keys: entity = {"album":keys} id = database.sqldb.get_album_id(entity['album']) + idkeys = {'album_id':id} dbtable = "albums" elif "artist" in keys: entity = keys id = database.sqldb.get_artist_id(entity['artist']) + idkeys = {'artist_id':id} dbtable = "artists" log("Trying to set image, b64 string: " + str(b64[:30] + "..."),module="debug") @@ -398,6 +406,6 @@ def set_image(b64,**keys): log("Saved image as " + data_dir['images'](folder,filename),module="debug") # set as current picture in rotation - set_image_in_cache(id,dbtable,os.path.join("/images",folder,filename),local=True) + set_image_in_cache(**idkeys,url=os.path.join("/images",folder,filename),local=True) return os.path.join("/images",folder,filename)