diff options
-rw-r--r-- | scrap/albums.py | 2 | ||||
-rw-r--r-- | scrap/database.py | 6 | ||||
-rw-r--r-- | scrap/main.py | 19 | ||||
-rw-r--r-- | scrap/parser.py | 37 |
4 files changed, 27 insertions, 37 deletions
diff --git a/scrap/albums.py b/scrap/albums.py index 5ff5a36..63114da 100644 --- a/scrap/albums.py +++ b/scrap/albums.py @@ -1,4 +1,4 @@ -album_list = [ +album_data = [ ["astrophysics", [ "apathy", "The Unending Need For Perpetual Motion", diff --git a/scrap/database.py b/scrap/database.py index 3934361..dab2592 100644 --- a/scrap/database.py +++ b/scrap/database.py @@ -56,15 +56,16 @@ def setup(): def process_albums(album_list): [process_album(album, album_id) - for album_id, album in enumerate(album_list)] + for album_id, album in enumerate(album_list, 1)] def process_album(album, album_id): upload_album(album) - [upload_song(song, album_id+1) for song in album.songs] + [upload_song(song, album_id) for song in album.songs] def upload_album(album): + album.name = album.name.lower() cursor.execute(""" INSERT INTO album ( name, cover, artist_id @@ -99,6 +100,7 @@ def process_artists(artist_names): def process_artist(artist): + artist = artist.lower() cursor.execute(""" INSERT INTO artist ( name diff --git a/scrap/main.py b/scrap/main.py index 46d5853..fbed3cd 100644 --- a/scrap/main.py +++ b/scrap/main.py @@ -1,4 +1,4 @@ -from albums import album_list +from albums import album_data import api import parser import database @@ -6,7 +6,7 @@ import database def start(): print("downloading data...") - api.download_albums(album_list) + api.download_albums(album_data) print("uploading data...") database.setup() @@ -25,23 +25,22 @@ def upload_albums(album_data, artist_names): def get_album_data(): - album_data = [] - artist_id = 0 + result = [] - for artist in album_list: - artist_id += 1 + for artist_id, artist in enumerate(album_data, 1): + album_list = artist[1] - for album in artist[1]: - album_data.append(parser.process_json_file(album, artist_id)) + for album_id, album in enumerate(album_list, 1): + result.append(parser.process_json_file(album, album_id, artist_id)) - return album_data + return result def get_artist_names(): artist_data = [] [artist_data.append( artist[0] - ) for artist in album_list] + ) for artist in album_data] return artist_data diff --git a/scrap/parser.py b/scrap/parser.py index a191fa2..c447a0d 100644 --- a/scrap/parser.py +++ b/scrap/parser.py @@ -4,54 +4,44 @@ import structures import os -def process_json_file(name, artist_id): +def process_json_file(name, album_id, artist_id): link = getLink(name) file_json = open(link, "r") album_json = file_json.read() file_json.close() - return process_json(album_json, artist_id) + return process_json(album_json, album_id, artist_id) def getLink(name): return "Lyrics_"+name.replace(" ", "")+".json" -def process_json(album_json, artist_id): +def process_json(album_json, album_id, artist_id): data = json.loads(album_json) - name = data["name"].lower() + album_name = data["name"].lower() - artist_name_api = data["artist"]["name"].lower() - artist_name = format_artist_name(artist_name_api) off_cover = data["cover_art_thumbnail_url"] - new_cover = get_cover_link(artist_name, name) - download_cover(off_cover, new_cover, artist_name) + new_cover = get_cover_link(artist_id, album_id) + download_cover(off_cover, new_cover, artist_id) songs = [analyze_song(song) for song in data["tracks"]] - return structures.album(name, new_cover, songs, artist_id) + return structures.album(album_name, new_cover, songs, artist_id) -def format_artist_name(name): - name = name.replace(" ", "") - name = re.sub(r'[\(\[].*?[\)\]]', '', name) - return name +def get_cover_link(artist_id, album_id): + cover_link = f"covers/{artist_id}/{album_id}.png" + return cover_link -def get_cover_link(artist_name, album_name): - album_name = album_name.replace(" ", "") - location = f"covers/{artist_name}/{album_name}.png" - - return location - - -def download_cover(off_cover, new_cover, artist_name): +def download_cover(off_cover, new_cover, artist_id): if not os.path.isdir("covers"): os.system("mkdir covers") - if not os.path.isdir(f"covers/{artist_name}"): - os.system(f"mkdir 'covers/{artist_name}'") + if not os.path.isdir(f"covers/{artist_id}"): + os.system(f"mkdir 'covers/{artist_id}'") if not os.path.isfile(new_cover): os.system(f"wget {off_cover} -O '{new_cover}'") @@ -78,7 +68,6 @@ def format_lyrics(lyrics): lyrics = re.sub(r'[\(\[].*?[\)\]]', '', lyrics) lyrics = re.sub("\n{3,}", "\n\n", lyrics) - lyrics = lyrics.replace("\u0435", "e") lyrics = lyrics.replace("\u2005", " ") while lyrics[0] == '\n': |