summaryrefslogtreecommitdiff
path: root/scrap
diff options
context:
space:
mode:
Diffstat (limited to 'scrap')
-rw-r--r--scrap/.gitignore3
-rw-r--r--scrap/albums.py38
-rw-r--r--scrap/api.py21
-rw-r--r--scrap/create_db.sql38
-rw-r--r--scrap/database.py83
-rw-r--r--scrap/main.py46
-rw-r--r--scrap/parser.py78
-rw-r--r--scrap/structures.py12
8 files changed, 0 insertions, 319 deletions
diff --git a/scrap/.gitignore b/scrap/.gitignore
deleted file mode 100644
index 6922c3e..0000000
--- a/scrap/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-__pycache__/
-*.json
-covers/
diff --git a/scrap/albums.py b/scrap/albums.py
deleted file mode 100644
index 63114da..0000000
--- a/scrap/albums.py
+++ /dev/null
@@ -1,38 +0,0 @@
-album_data = [
- ["astrophysics", [
- "apathy",
- "The Unending Need For Perpetual Motion",
- "HOPE LEFT ME Complete Version",
- "Selected Tragic",
- "Cute Tragedies",
- "selected apathetic",
- "ENTITY",
- ]],
- ["sadsvit", [
- "Неонова Мрія Neon Dream",
- "20 21",
- "Cassette",
- "Суматоха Bustle",
- ]],
- ["Молчат Дома", [
- "Белая Полоса White Stripe",
- "Монумент Monument",
- "Этажи Floors",
- "С крыш наших домов From our houses rooftops",
- ]],
- ["amesoeurs", [
- "Amesoeurs",
- ]],
- ["severoth", [
- "Vsesvit",
- "Winterfall",
- ]],
- ["sadness", [
- "Motionless Watching You",
- "Circle of Veins",
- ]],
- ["Буерак", [
- "Голд Gold",
- ]],
-
-]
diff --git a/scrap/api.py b/scrap/api.py
deleted file mode 100644
index 5cc24d3..0000000
--- a/scrap/api.py
+++ /dev/null
@@ -1,21 +0,0 @@
-import lyricsgenius
-import parser
-import os
-
-
-genius = lyricsgenius.Genius(
- "0uSA9UFGsiO2WozVmbWPhyhOoVmUNuM3PXRt9rvWhptHBMgSO5CZBxGUMkwet5mv"
-)
-
-
-def download_albums(albums):
- [get_album_json(artist[0], album)
- for artist in albums for album in artist[1]]
-
-
-def get_album_json(artist_name, album_name):
- if os.path.isfile(parser.getLink(album_name)):
- return
-
- album = genius.search_album(album_name, artist_name)
- album.save_lyrics()
diff --git a/scrap/create_db.sql b/scrap/create_db.sql
deleted file mode 100644
index bb5dbeb..0000000
--- a/scrap/create_db.sql
+++ /dev/null
@@ -1,38 +0,0 @@
-CREATE DATABASE IF NOT EXISTS balalaika;
-USE balalaika;
-
-DROP TABLE IF EXISTS song;
-DROP TABLE IF EXISTS album;
-DROP TABLE IF EXISTS artist;
-
-CREATE TABLE artist (
- id int NOT NULL AUTO_INCREMENT,
- name varchar(255),
-
- PRIMARY KEY (id)
-);
-
-CREATE TABLE album (
- id int NOT NULL AUTO_INCREMENT,
- name varchar(255),
- cover varchar(510),
- artist_id int,
-
- PRIMARY KEY (id),
- FOREIGN KEY (artist_id) REFERENCES artist(id)
-);
-
-CREATE TABLE song (
- id int NOT NULL AUTO_INCREMENT,
- name varchar(255),
- lyrics TEXT,
-
- album_id int,
-
- PRIMARY KEY (id),
- FOREIGN KEY (album_id) REFERENCES album(id)
-);
-
-ALTER TABLE song CONVERT TO CHARACTER SET utf8;
-ALTER TABLE album CONVERT TO CHARACTER SET utf8;
-ALTER TABLE artist CONVERT TO CHARACTER SET utf8;
diff --git a/scrap/database.py b/scrap/database.py
deleted file mode 100644
index 2319812..0000000
--- a/scrap/database.py
+++ /dev/null
@@ -1,83 +0,0 @@
-import mysql.connector
-import getpass
-
-
-def get_database_password():
- return getpass.getpass("Insert database password: ")
-
-
-connector = mysql.connector.connect(
- host="localhost",
- user="balalaika_user",
- password=get_database_password(),
- database="balalaika",
-)
-
-cursor = connector.cursor()
-
-
-
-
-def process_albums(album_list):
- [process_album(album, album_id)
- for album_id, album in enumerate(album_list, 1)]
-
-
-def process_album(album, album_id):
- upload_album(album)
- [upload_song(song, album_id) for song in album.songs]
-
-
-def upload_album(album):
- album.name = album.name.lower()
- cursor.execute("""
- INSERT INTO album (
- name, cover, artist_id
- )
- VALUES (
- %(name)s, %(cover)s, %(artist_id)s
- );
- """, {
- 'name': album.name,
- 'cover': album.cover,
- 'artist_id': album.artist
- })
-
-
-def upload_song(song, album_id):
- cursor.execute("""
- INSERT INTO song (
- name, lyrics, album_id
- )
- VALUES (
- %(name)s, %(lyrics)s, %(album_id)s
- )
- """, {
- 'name': song.name,
- 'lyrics': song.lyrics,
- 'album_id': album_id
- })
-
-
-def process_artists(artist_names):
- [process_artist(artist) for artist in artist_names]
-
-
-def process_artist(artist):
- artist = artist.lower()
- cursor.execute("""
- INSERT INTO artist (
- name
- )
- VALUES (
- %(name)s
- )
- """, {
- 'name': artist,
- })
-
-
-def close():
- cursor.close()
- connector.commit()
- connector.close()
diff --git a/scrap/main.py b/scrap/main.py
deleted file mode 100644
index ec0157c..0000000
--- a/scrap/main.py
+++ /dev/null
@@ -1,46 +0,0 @@
-from albums import album_data
-import api
-import parser
-import database
-
-
-def start():
- print("downloading data...")
- api.download_albums(album_data)
-
- print("uploading data...")
- upload_albums(
- get_album_data(),
- get_artist_names()
- )
- database.close()
- print("upload finished!")
- print("remember to move the covers directory once you're done")
-
-
-def upload_albums(album_data, artist_names):
- database.process_artists(artist_names)
- database.process_albums(album_data)
-
-
-def get_album_data():
- result = []
-
- for artist_id, artist in enumerate(album_data, 1):
- album_list = artist[1]
-
- for album_id, album in enumerate(album_list, 1):
- result.append(parser.process_json_file(album, album_id, artist_id))
-
- return result
-
-
-def get_artist_names():
- artist_data = []
- [artist_data.append(
- artist[0]
- ) for artist in album_data]
- return artist_data
-
-
-start()
diff --git a/scrap/parser.py b/scrap/parser.py
deleted file mode 100644
index c447a0d..0000000
--- a/scrap/parser.py
+++ /dev/null
@@ -1,78 +0,0 @@
-import json
-import re
-import structures
-import os
-
-
-def process_json_file(name, album_id, artist_id):
- link = getLink(name)
-
- file_json = open(link, "r")
- album_json = file_json.read()
- file_json.close()
-
- return process_json(album_json, album_id, artist_id)
-
-
-def getLink(name):
- return "Lyrics_"+name.replace(" ", "")+".json"
-
-
-def process_json(album_json, album_id, artist_id):
- data = json.loads(album_json)
- album_name = data["name"].lower()
-
- off_cover = data["cover_art_thumbnail_url"]
-
- new_cover = get_cover_link(artist_id, album_id)
- download_cover(off_cover, new_cover, artist_id)
-
- songs = [analyze_song(song) for song in data["tracks"]]
- return structures.album(album_name, new_cover, songs, artist_id)
-
-
-def get_cover_link(artist_id, album_id):
- cover_link = f"covers/{artist_id}/{album_id}.png"
- return cover_link
-
-
-def download_cover(off_cover, new_cover, artist_id):
- if not os.path.isdir("covers"):
- os.system("mkdir covers")
-
- if not os.path.isdir(f"covers/{artist_id}"):
- os.system(f"mkdir 'covers/{artist_id}'")
-
- if not os.path.isfile(new_cover):
- os.system(f"wget {off_cover} -O '{new_cover}'")
-
-
-def analyze_song(song):
- name = song["song"]["title"]
- name = name.lower()
-
- lyrics = song["song"]["lyrics"]
- lyrics = format_lyrics(lyrics)
-
- return structures.song(name, lyrics)
-
-
-def format_lyrics(lyrics):
- if lyrics != "":
- lyrics = lyrics.split("Lyrics")[1].lstrip()
- lyrics = lyrics.split("Embed")[0].rstrip()
- lyrics = lyrics.replace("You might also like", "")
- lyrics = re.sub(r'See (.*?) LiveGet', 'liveget', lyrics)
- lyrics = lyrics.split("liveget")[0].rstrip()
-
- lyrics = re.sub(r'[\(\[].*?[\)\]]', '', lyrics)
- lyrics = re.sub("\n{3,}", "\n\n", lyrics)
-
- lyrics = lyrics.replace("\u2005", " ")
-
- while lyrics[0] == '\n':
- lyrics = lyrics[1:]
-
- lyrics = lyrics.lower()
-
- return lyrics
diff --git a/scrap/structures.py b/scrap/structures.py
deleted file mode 100644
index b139489..0000000
--- a/scrap/structures.py
+++ /dev/null
@@ -1,12 +0,0 @@
-class song:
- def __init__(self, name, lyrics):
- self.name = name
- self.lyrics = lyrics
-
-
-class album:
- def __init__(self, name, cover, songs, artist_id):
- self.name = name
- self.cover = cover
- self.songs = songs
- self.artist = artist_id