diff options
author | nil <nil@tfwhyno.gf> | 2024-10-02 17:48:56 +0200 |
---|---|---|
committer | nil <nil@tfwhyno.gf> | 2024-10-02 17:48:56 +0200 |
commit | 0972fa6eab8c9111311f082ba8abfdc6b4a40945 (patch) | |
tree | 4eedbdc1a7cfdeee9d651c9871a9f26ab6da56d4 /scripts/populate | |
parent | 012c2c03b29a987ca4eead023ded22e01aa7477b (diff) |
commit changes
Diffstat (limited to 'scripts/populate')
-rw-r--r-- | scripts/populate/.gitignore | 3 | ||||
-rw-r--r-- | scripts/populate/albums.py | 38 | ||||
-rw-r--r-- | scripts/populate/api.py | 21 | ||||
-rw-r--r-- | scripts/populate/create_db.sql | 39 | ||||
-rw-r--r-- | scripts/populate/database.py | 83 | ||||
-rw-r--r-- | scripts/populate/main.py | 46 | ||||
-rw-r--r-- | scripts/populate/parser.py | 83 | ||||
-rw-r--r-- | scripts/populate/structures.py | 13 |
8 files changed, 326 insertions, 0 deletions
diff --git a/scripts/populate/.gitignore b/scripts/populate/.gitignore new file mode 100644 index 0000000..6922c3e --- /dev/null +++ b/scripts/populate/.gitignore @@ -0,0 +1,3 @@ +__pycache__/ +*.json +covers/ diff --git a/scripts/populate/albums.py b/scripts/populate/albums.py new file mode 100644 index 0000000..63114da --- /dev/null +++ b/scripts/populate/albums.py @@ -0,0 +1,38 @@ +album_data = [ + ["astrophysics", [ + "apathy", + "The Unending Need For Perpetual Motion", + "HOPE LEFT ME Complete Version", + "Selected Tragic", + "Cute Tragedies", + "selected apathetic", + "ENTITY", + ]], + ["sadsvit", [ + "Неонова Мрія Neon Dream", + "20 21", + "Cassette", + "Суматоха Bustle", + ]], + ["Молчат Дома", [ + "Белая Полоса White Stripe", + "Монумент Monument", + "Этажи Floors", + "С крыш наших домов From our houses rooftops", + ]], + ["amesoeurs", [ + "Amesoeurs", + ]], + ["severoth", [ + "Vsesvit", + "Winterfall", + ]], + ["sadness", [ + "Motionless Watching You", + "Circle of Veins", + ]], + ["Буерак", [ + "Голд Gold", + ]], + +] diff --git a/scripts/populate/api.py b/scripts/populate/api.py new file mode 100644 index 0000000..5cc24d3 --- /dev/null +++ b/scripts/populate/api.py @@ -0,0 +1,21 @@ +import lyricsgenius +import parser +import os + + +genius = lyricsgenius.Genius( + "0uSA9UFGsiO2WozVmbWPhyhOoVmUNuM3PXRt9rvWhptHBMgSO5CZBxGUMkwet5mv" +) + + +def download_albums(albums): + [get_album_json(artist[0], album) + for artist in albums for album in artist[1]] + + +def get_album_json(artist_name, album_name): + if os.path.isfile(parser.getLink(album_name)): + return + + album = genius.search_album(album_name, artist_name) + album.save_lyrics() diff --git a/scripts/populate/create_db.sql b/scripts/populate/create_db.sql new file mode 100644 index 0000000..cfde7b0 --- /dev/null +++ b/scripts/populate/create_db.sql @@ -0,0 +1,39 @@ +CREATE DATABASE IF NOT EXISTS balalaika; +USE balalaika; + +DROP TABLE IF EXISTS song; +DROP TABLE IF EXISTS album; +DROP TABLE IF EXISTS artist; + +CREATE TABLE artist ( + id int NOT NULL AUTO_INCREMENT, + name varchar(255), + + PRIMARY KEY (id) +); + +CREATE TABLE album ( + id int NOT NULL AUTO_INCREMENT, + name varchar(255), + cover varchar(255), + artist_id int, + release DATE, + + PRIMARY KEY (id), + FOREIGN KEY (artist_id) REFERENCES artist(id) +); + +CREATE TABLE song ( + id int NOT NULL AUTO_INCREMENT, + name varchar(255), + lyrics TEXT, + + album_id int, + + PRIMARY KEY (id), + FOREIGN KEY (album_id) REFERENCES album(id) +); + +ALTER TABLE song CONVERT TO CHARACTER SET utf8; +ALTER TABLE album CONVERT TO CHARACTER SET utf8; +ALTER TABLE artist CONVERT TO CHARACTER SET utf8; diff --git a/scripts/populate/database.py b/scripts/populate/database.py new file mode 100644 index 0000000..2319812 --- /dev/null +++ b/scripts/populate/database.py @@ -0,0 +1,83 @@ +import mysql.connector +import getpass + + +def get_database_password(): + return getpass.getpass("Insert database password: ") + + +connector = mysql.connector.connect( + host="localhost", + user="balalaika_user", + password=get_database_password(), + database="balalaika", +) + +cursor = connector.cursor() + + + + +def process_albums(album_list): + [process_album(album, album_id) + for album_id, album in enumerate(album_list, 1)] + + +def process_album(album, album_id): + upload_album(album) + [upload_song(song, album_id) for song in album.songs] + + +def upload_album(album): + album.name = album.name.lower() + cursor.execute(""" + INSERT INTO album ( + name, cover, artist_id + ) + VALUES ( + %(name)s, %(cover)s, %(artist_id)s + ); + """, { + 'name': album.name, + 'cover': album.cover, + 'artist_id': album.artist + }) + + +def upload_song(song, album_id): + cursor.execute(""" + INSERT INTO song ( + name, lyrics, album_id + ) + VALUES ( + %(name)s, %(lyrics)s, %(album_id)s + ) + """, { + 'name': song.name, + 'lyrics': song.lyrics, + 'album_id': album_id + }) + + +def process_artists(artist_names): + [process_artist(artist) for artist in artist_names] + + +def process_artist(artist): + artist = artist.lower() + cursor.execute(""" + INSERT INTO artist ( + name + ) + VALUES ( + %(name)s + ) + """, { + 'name': artist, + }) + + +def close(): + cursor.close() + connector.commit() + connector.close() diff --git a/scripts/populate/main.py b/scripts/populate/main.py new file mode 100644 index 0000000..ec0157c --- /dev/null +++ b/scripts/populate/main.py @@ -0,0 +1,46 @@ +from albums import album_data +import api +import parser +import database + + +def start(): + print("downloading data...") + api.download_albums(album_data) + + print("uploading data...") + upload_albums( + get_album_data(), + get_artist_names() + ) + database.close() + print("upload finished!") + print("remember to move the covers directory once you're done") + + +def upload_albums(album_data, artist_names): + database.process_artists(artist_names) + database.process_albums(album_data) + + +def get_album_data(): + result = [] + + for artist_id, artist in enumerate(album_data, 1): + album_list = artist[1] + + for album_id, album in enumerate(album_list, 1): + result.append(parser.process_json_file(album, album_id, artist_id)) + + return result + + +def get_artist_names(): + artist_data = [] + [artist_data.append( + artist[0] + ) for artist in album_data] + return artist_data + + +start() diff --git a/scripts/populate/parser.py b/scripts/populate/parser.py new file mode 100644 index 0000000..cbb51b5 --- /dev/null +++ b/scripts/populate/parser.py @@ -0,0 +1,83 @@ +import json +import re +import structures +import os + + +def process_json_file(name, album_id, artist_id): + link = getLink(name) + + file_json = open(link, "r") + album_json = file_json.read() + file_json.close() + + return process_json(album_json, album_id, artist_id) + + +def getLink(name): + return "Lyrics_"+name.replace(" ", "")+".json" + + +def process_json(album_json, album_id, artist_id): + data = json.loads(album_json) + album_name = data["name"].lower() + + off_cover = data["cover_art_thumbnail_url"] + + new_cover = get_cover_link(artist_id, album_id) + download_cover(off_cover, new_cover, artist_id) + + year = data["release_date_components"]["year"] + month = data["release_date_components"]["year"] + day = data["release_date_components"]["year"] + release = [year, month, day] + + songs = [analyze_song(song) for song in data["tracks"]] + return structures.album(album_name, new_cover, songs, artist_id, release) + + +def get_cover_link(artist_id, album_id): + cover_link = f"covers/{artist_id}/{album_id}.png" + return cover_link + + +def download_cover(off_cover, new_cover, artist_id): + if not os.path.isdir("covers"): + os.system("mkdir covers") + + if not os.path.isdir(f"covers/{artist_id}"): + os.system(f"mkdir 'covers/{artist_id}'") + + if not os.path.isfile(new_cover): + os.system(f"wget {off_cover} -O '{new_cover}'") + + +def analyze_song(song): + name = song["song"]["title"] + name = name.lower() + + lyrics = song["song"]["lyrics"] + lyrics = format_lyrics(lyrics) + + return structures.song(name, lyrics) + + +def format_lyrics(lyrics): + if lyrics != "": + lyrics = lyrics.split("Lyrics")[1].lstrip() + lyrics = lyrics.split("Embed")[0].rstrip() + lyrics = lyrics.replace("You might also like", "") + lyrics = re.sub(r'See (.*?) LiveGet', 'liveget', lyrics) + lyrics = lyrics.split("liveget")[0].rstrip() + + lyrics = re.sub(r'[\(\[].*?[\)\]]', '', lyrics) + lyrics = re.sub("\n{3,}", "\n\n", lyrics) + + lyrics = lyrics.replace("\u2005", " ") + + while lyrics[0] == '\n': + lyrics = lyrics[1:] + + lyrics = lyrics.lower() + + return lyrics diff --git a/scripts/populate/structures.py b/scripts/populate/structures.py new file mode 100644 index 0000000..31feb1b --- /dev/null +++ b/scripts/populate/structures.py @@ -0,0 +1,13 @@ +class song: + def __init__(self, name, lyrics): + self.name = name + self.lyrics = lyrics + + +class album: + def __init__(self, name, cover, songs, artist_id, release=None): + self.name = name + self.cover = cover + self.songs = songs + self.artist = artist_id + self.release = release |