diff options
author | nil0j <nil.jimeno@estudiant.fjaverianas.com> | 2024-09-27 17:27:02 +0200 |
---|---|---|
committer | nil0j <nil.jimeno@estudiant.fjaverianas.com> | 2024-09-27 17:27:02 +0200 |
commit | 4328dd17bfcd4c33a77c4405d52c3660a6c62bf5 (patch) | |
tree | 62b48c5088fac13b93a3bcdaa85f4594601cf3fe /rust/scripts/populate/parser.py | |
parent | db58cb58b5d5612ec2aa347d0f8531b26ab2e7f3 (diff) |
update
Diffstat (limited to 'rust/scripts/populate/parser.py')
-rw-r--r-- | rust/scripts/populate/parser.py | 83 |
1 files changed, 83 insertions, 0 deletions
diff --git a/rust/scripts/populate/parser.py b/rust/scripts/populate/parser.py new file mode 100644 index 0000000..cbb51b5 --- /dev/null +++ b/rust/scripts/populate/parser.py @@ -0,0 +1,83 @@ +import json +import re +import structures +import os + + +def process_json_file(name, album_id, artist_id): + link = getLink(name) + + file_json = open(link, "r") + album_json = file_json.read() + file_json.close() + + return process_json(album_json, album_id, artist_id) + + +def getLink(name): + return "Lyrics_"+name.replace(" ", "")+".json" + + +def process_json(album_json, album_id, artist_id): + data = json.loads(album_json) + album_name = data["name"].lower() + + off_cover = data["cover_art_thumbnail_url"] + + new_cover = get_cover_link(artist_id, album_id) + download_cover(off_cover, new_cover, artist_id) + + year = data["release_date_components"]["year"] + month = data["release_date_components"]["year"] + day = data["release_date_components"]["year"] + release = [year, month, day] + + songs = [analyze_song(song) for song in data["tracks"]] + return structures.album(album_name, new_cover, songs, artist_id, release) + + +def get_cover_link(artist_id, album_id): + cover_link = f"covers/{artist_id}/{album_id}.png" + return cover_link + + +def download_cover(off_cover, new_cover, artist_id): + if not os.path.isdir("covers"): + os.system("mkdir covers") + + if not os.path.isdir(f"covers/{artist_id}"): + os.system(f"mkdir 'covers/{artist_id}'") + + if not os.path.isfile(new_cover): + os.system(f"wget {off_cover} -O '{new_cover}'") + + +def analyze_song(song): + name = song["song"]["title"] + name = name.lower() + + lyrics = song["song"]["lyrics"] + lyrics = format_lyrics(lyrics) + + return structures.song(name, lyrics) + + +def format_lyrics(lyrics): + if lyrics != "": + lyrics = lyrics.split("Lyrics")[1].lstrip() + lyrics = lyrics.split("Embed")[0].rstrip() + lyrics = lyrics.replace("You might also like", "") + lyrics = re.sub(r'See (.*?) LiveGet', 'liveget', lyrics) + lyrics = lyrics.split("liveget")[0].rstrip() + + lyrics = re.sub(r'[\(\[].*?[\)\]]', '', lyrics) + lyrics = re.sub("\n{3,}", "\n\n", lyrics) + + lyrics = lyrics.replace("\u2005", " ") + + while lyrics[0] == '\n': + lyrics = lyrics[1:] + + lyrics = lyrics.lower() + + return lyrics |