summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authornil0j <nil.jimeno@estudiant.fjaverianas.com>2024-09-26 17:13:42 +0200
committernil0j <nil.jimeno@estudiant.fjaverianas.com>2024-09-26 17:13:42 +0200
commit950bb3533cf59e074f485dfbb48fcf931e245ea6 (patch)
tree23be01e7584d7227451c2c710f1fe1add4dfa499
parentabad2ebad9c78e3978343aa0fc572a87dbf9fa78 (diff)
update scrap
-rw-r--r--scrap/albums.py2
-rw-r--r--scrap/database.py6
-rw-r--r--scrap/main.py19
-rw-r--r--scrap/parser.py37
4 files changed, 27 insertions, 37 deletions
diff --git a/scrap/albums.py b/scrap/albums.py
index 5ff5a36..63114da 100644
--- a/scrap/albums.py
+++ b/scrap/albums.py
@@ -1,4 +1,4 @@
-album_list = [
+album_data = [
["astrophysics", [
"apathy",
"The Unending Need For Perpetual Motion",
diff --git a/scrap/database.py b/scrap/database.py
index 3934361..dab2592 100644
--- a/scrap/database.py
+++ b/scrap/database.py
@@ -56,15 +56,16 @@ def setup():
def process_albums(album_list):
[process_album(album, album_id)
- for album_id, album in enumerate(album_list)]
+ for album_id, album in enumerate(album_list, 1)]
def process_album(album, album_id):
upload_album(album)
- [upload_song(song, album_id+1) for song in album.songs]
+ [upload_song(song, album_id) for song in album.songs]
def upload_album(album):
+ album.name = album.name.lower()
cursor.execute("""
INSERT INTO album (
name, cover, artist_id
@@ -99,6 +100,7 @@ def process_artists(artist_names):
def process_artist(artist):
+ artist = artist.lower()
cursor.execute("""
INSERT INTO artist (
name
diff --git a/scrap/main.py b/scrap/main.py
index 46d5853..fbed3cd 100644
--- a/scrap/main.py
+++ b/scrap/main.py
@@ -1,4 +1,4 @@
-from albums import album_list
+from albums import album_data
import api
import parser
import database
@@ -6,7 +6,7 @@ import database
def start():
print("downloading data...")
- api.download_albums(album_list)
+ api.download_albums(album_data)
print("uploading data...")
database.setup()
@@ -25,23 +25,22 @@ def upload_albums(album_data, artist_names):
def get_album_data():
- album_data = []
- artist_id = 0
+ result = []
- for artist in album_list:
- artist_id += 1
+ for artist_id, artist in enumerate(album_data, 1):
+ album_list = artist[1]
- for album in artist[1]:
- album_data.append(parser.process_json_file(album, artist_id))
+ for album_id, album in enumerate(album_list, 1):
+ result.append(parser.process_json_file(album, album_id, artist_id))
- return album_data
+ return result
def get_artist_names():
artist_data = []
[artist_data.append(
artist[0]
- ) for artist in album_list]
+ ) for artist in album_data]
return artist_data
diff --git a/scrap/parser.py b/scrap/parser.py
index a191fa2..c447a0d 100644
--- a/scrap/parser.py
+++ b/scrap/parser.py
@@ -4,54 +4,44 @@ import structures
import os
-def process_json_file(name, artist_id):
+def process_json_file(name, album_id, artist_id):
link = getLink(name)
file_json = open(link, "r")
album_json = file_json.read()
file_json.close()
- return process_json(album_json, artist_id)
+ return process_json(album_json, album_id, artist_id)
def getLink(name):
return "Lyrics_"+name.replace(" ", "")+".json"
-def process_json(album_json, artist_id):
+def process_json(album_json, album_id, artist_id):
data = json.loads(album_json)
- name = data["name"].lower()
+ album_name = data["name"].lower()
- artist_name_api = data["artist"]["name"].lower()
- artist_name = format_artist_name(artist_name_api)
off_cover = data["cover_art_thumbnail_url"]
- new_cover = get_cover_link(artist_name, name)
- download_cover(off_cover, new_cover, artist_name)
+ new_cover = get_cover_link(artist_id, album_id)
+ download_cover(off_cover, new_cover, artist_id)
songs = [analyze_song(song) for song in data["tracks"]]
- return structures.album(name, new_cover, songs, artist_id)
+ return structures.album(album_name, new_cover, songs, artist_id)
-def format_artist_name(name):
- name = name.replace(" ", "")
- name = re.sub(r'[\(\[].*?[\)\]]', '', name)
- return name
+def get_cover_link(artist_id, album_id):
+ cover_link = f"covers/{artist_id}/{album_id}.png"
+ return cover_link
-def get_cover_link(artist_name, album_name):
- album_name = album_name.replace(" ", "")
- location = f"covers/{artist_name}/{album_name}.png"
-
- return location
-
-
-def download_cover(off_cover, new_cover, artist_name):
+def download_cover(off_cover, new_cover, artist_id):
if not os.path.isdir("covers"):
os.system("mkdir covers")
- if not os.path.isdir(f"covers/{artist_name}"):
- os.system(f"mkdir 'covers/{artist_name}'")
+ if not os.path.isdir(f"covers/{artist_id}"):
+ os.system(f"mkdir 'covers/{artist_id}'")
if not os.path.isfile(new_cover):
os.system(f"wget {off_cover} -O '{new_cover}'")
@@ -78,7 +68,6 @@ def format_lyrics(lyrics):
lyrics = re.sub(r'[\(\[].*?[\)\]]', '', lyrics)
lyrics = re.sub("\n{3,}", "\n\n", lyrics)
- lyrics = lyrics.replace("\u0435", "e")
lyrics = lyrics.replace("\u2005", " ")
while lyrics[0] == '\n':