diff options
Diffstat (limited to 'scrap')
-rw-r--r-- | scrap/.gitignore | 3 | ||||
-rw-r--r-- | scrap/albums.py | 38 | ||||
-rw-r--r-- | scrap/api.py | 21 | ||||
-rw-r--r-- | scrap/create_db.sql | 38 | ||||
-rw-r--r-- | scrap/database.py | 83 | ||||
-rw-r--r-- | scrap/main.py | 46 | ||||
-rw-r--r-- | scrap/parser.py | 78 | ||||
-rw-r--r-- | scrap/structures.py | 12 |
8 files changed, 0 insertions, 319 deletions
diff --git a/scrap/.gitignore b/scrap/.gitignore deleted file mode 100644 index 6922c3e..0000000 --- a/scrap/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -__pycache__/ -*.json -covers/ diff --git a/scrap/albums.py b/scrap/albums.py deleted file mode 100644 index 63114da..0000000 --- a/scrap/albums.py +++ /dev/null @@ -1,38 +0,0 @@ -album_data = [ - ["astrophysics", [ - "apathy", - "The Unending Need For Perpetual Motion", - "HOPE LEFT ME Complete Version", - "Selected Tragic", - "Cute Tragedies", - "selected apathetic", - "ENTITY", - ]], - ["sadsvit", [ - "Неонова Мрія Neon Dream", - "20 21", - "Cassette", - "Суматоха Bustle", - ]], - ["Молчат Дома", [ - "Белая Полоса White Stripe", - "Монумент Monument", - "Этажи Floors", - "С крыш наших домов From our houses rooftops", - ]], - ["amesoeurs", [ - "Amesoeurs", - ]], - ["severoth", [ - "Vsesvit", - "Winterfall", - ]], - ["sadness", [ - "Motionless Watching You", - "Circle of Veins", - ]], - ["Буерак", [ - "Голд Gold", - ]], - -] diff --git a/scrap/api.py b/scrap/api.py deleted file mode 100644 index 5cc24d3..0000000 --- a/scrap/api.py +++ /dev/null @@ -1,21 +0,0 @@ -import lyricsgenius -import parser -import os - - -genius = lyricsgenius.Genius( - "0uSA9UFGsiO2WozVmbWPhyhOoVmUNuM3PXRt9rvWhptHBMgSO5CZBxGUMkwet5mv" -) - - -def download_albums(albums): - [get_album_json(artist[0], album) - for artist in albums for album in artist[1]] - - -def get_album_json(artist_name, album_name): - if os.path.isfile(parser.getLink(album_name)): - return - - album = genius.search_album(album_name, artist_name) - album.save_lyrics() diff --git a/scrap/create_db.sql b/scrap/create_db.sql deleted file mode 100644 index bb5dbeb..0000000 --- a/scrap/create_db.sql +++ /dev/null @@ -1,38 +0,0 @@ -CREATE DATABASE IF NOT EXISTS balalaika; -USE balalaika; - -DROP TABLE IF EXISTS song; -DROP TABLE IF EXISTS album; -DROP TABLE IF EXISTS artist; - -CREATE TABLE artist ( - id int NOT NULL AUTO_INCREMENT, - name varchar(255), - - PRIMARY KEY (id) -); - -CREATE TABLE album ( - id int NOT NULL AUTO_INCREMENT, - name varchar(255), - cover varchar(510), - artist_id int, - - PRIMARY KEY (id), - FOREIGN KEY (artist_id) REFERENCES artist(id) -); - -CREATE TABLE song ( - id int NOT NULL AUTO_INCREMENT, - name varchar(255), - lyrics TEXT, - - album_id int, - - PRIMARY KEY (id), - FOREIGN KEY (album_id) REFERENCES album(id) -); - -ALTER TABLE song CONVERT TO CHARACTER SET utf8; -ALTER TABLE album CONVERT TO CHARACTER SET utf8; -ALTER TABLE artist CONVERT TO CHARACTER SET utf8; diff --git a/scrap/database.py b/scrap/database.py deleted file mode 100644 index 2319812..0000000 --- a/scrap/database.py +++ /dev/null @@ -1,83 +0,0 @@ -import mysql.connector -import getpass - - -def get_database_password(): - return getpass.getpass("Insert database password: ") - - -connector = mysql.connector.connect( - host="localhost", - user="balalaika_user", - password=get_database_password(), - database="balalaika", -) - -cursor = connector.cursor() - - - - -def process_albums(album_list): - [process_album(album, album_id) - for album_id, album in enumerate(album_list, 1)] - - -def process_album(album, album_id): - upload_album(album) - [upload_song(song, album_id) for song in album.songs] - - -def upload_album(album): - album.name = album.name.lower() - cursor.execute(""" - INSERT INTO album ( - name, cover, artist_id - ) - VALUES ( - %(name)s, %(cover)s, %(artist_id)s - ); - """, { - 'name': album.name, - 'cover': album.cover, - 'artist_id': album.artist - }) - - -def upload_song(song, album_id): - cursor.execute(""" - INSERT INTO song ( - name, lyrics, album_id - ) - VALUES ( - %(name)s, %(lyrics)s, %(album_id)s - ) - """, { - 'name': song.name, - 'lyrics': song.lyrics, - 'album_id': album_id - }) - - -def process_artists(artist_names): - [process_artist(artist) for artist in artist_names] - - -def process_artist(artist): - artist = artist.lower() - cursor.execute(""" - INSERT INTO artist ( - name - ) - VALUES ( - %(name)s - ) - """, { - 'name': artist, - }) - - -def close(): - cursor.close() - connector.commit() - connector.close() diff --git a/scrap/main.py b/scrap/main.py deleted file mode 100644 index ec0157c..0000000 --- a/scrap/main.py +++ /dev/null @@ -1,46 +0,0 @@ -from albums import album_data -import api -import parser -import database - - -def start(): - print("downloading data...") - api.download_albums(album_data) - - print("uploading data...") - upload_albums( - get_album_data(), - get_artist_names() - ) - database.close() - print("upload finished!") - print("remember to move the covers directory once you're done") - - -def upload_albums(album_data, artist_names): - database.process_artists(artist_names) - database.process_albums(album_data) - - -def get_album_data(): - result = [] - - for artist_id, artist in enumerate(album_data, 1): - album_list = artist[1] - - for album_id, album in enumerate(album_list, 1): - result.append(parser.process_json_file(album, album_id, artist_id)) - - return result - - -def get_artist_names(): - artist_data = [] - [artist_data.append( - artist[0] - ) for artist in album_data] - return artist_data - - -start() diff --git a/scrap/parser.py b/scrap/parser.py deleted file mode 100644 index c447a0d..0000000 --- a/scrap/parser.py +++ /dev/null @@ -1,78 +0,0 @@ -import json -import re -import structures -import os - - -def process_json_file(name, album_id, artist_id): - link = getLink(name) - - file_json = open(link, "r") - album_json = file_json.read() - file_json.close() - - return process_json(album_json, album_id, artist_id) - - -def getLink(name): - return "Lyrics_"+name.replace(" ", "")+".json" - - -def process_json(album_json, album_id, artist_id): - data = json.loads(album_json) - album_name = data["name"].lower() - - off_cover = data["cover_art_thumbnail_url"] - - new_cover = get_cover_link(artist_id, album_id) - download_cover(off_cover, new_cover, artist_id) - - songs = [analyze_song(song) for song in data["tracks"]] - return structures.album(album_name, new_cover, songs, artist_id) - - -def get_cover_link(artist_id, album_id): - cover_link = f"covers/{artist_id}/{album_id}.png" - return cover_link - - -def download_cover(off_cover, new_cover, artist_id): - if not os.path.isdir("covers"): - os.system("mkdir covers") - - if not os.path.isdir(f"covers/{artist_id}"): - os.system(f"mkdir 'covers/{artist_id}'") - - if not os.path.isfile(new_cover): - os.system(f"wget {off_cover} -O '{new_cover}'") - - -def analyze_song(song): - name = song["song"]["title"] - name = name.lower() - - lyrics = song["song"]["lyrics"] - lyrics = format_lyrics(lyrics) - - return structures.song(name, lyrics) - - -def format_lyrics(lyrics): - if lyrics != "": - lyrics = lyrics.split("Lyrics")[1].lstrip() - lyrics = lyrics.split("Embed")[0].rstrip() - lyrics = lyrics.replace("You might also like", "") - lyrics = re.sub(r'See (.*?) LiveGet', 'liveget', lyrics) - lyrics = lyrics.split("liveget")[0].rstrip() - - lyrics = re.sub(r'[\(\[].*?[\)\]]', '', lyrics) - lyrics = re.sub("\n{3,}", "\n\n", lyrics) - - lyrics = lyrics.replace("\u2005", " ") - - while lyrics[0] == '\n': - lyrics = lyrics[1:] - - lyrics = lyrics.lower() - - return lyrics diff --git a/scrap/structures.py b/scrap/structures.py deleted file mode 100644 index b139489..0000000 --- a/scrap/structures.py +++ /dev/null @@ -1,12 +0,0 @@ -class song: - def __init__(self, name, lyrics): - self.name = name - self.lyrics = lyrics - - -class album: - def __init__(self, name, cover, songs, artist_id): - self.name = name - self.cover = cover - self.songs = songs - self.artist = artist_id |