summaryrefslogtreecommitdiff
path: root/scrap/scrap.py
diff options
context:
space:
mode:
authorniliara-edu <nil.jimeno@estudiant.fjaverianas.com>2024-09-24 12:21:40 +0200
committerniliara-edu <nil.jimeno@estudiant.fjaverianas.com>2024-09-24 12:21:40 +0200
commit2e8ddbf422a3a6d75b1e1f73cad089111f801c05 (patch)
treea3b2c7ea8560fcbc89fb9b3ee15933d434b1ff51 /scrap/scrap.py
parentfaa13839f898c60ff5618be6e916ad2e60958468 (diff)
finished web scrapper
Diffstat (limited to 'scrap/scrap.py')
-rw-r--r--scrap/scrap.py25
1 files changed, 0 insertions, 25 deletions
diff --git a/scrap/scrap.py b/scrap/scrap.py
deleted file mode 100644
index d979f49..0000000
--- a/scrap/scrap.py
+++ /dev/null
@@ -1,25 +0,0 @@
-from bs4 import BeautifulSoup
-import re
-# import os
-import sys
-import requests
-
-
-def scrape_song_lyrics(url):
- page = requests.get(url)
- html = BeautifulSoup(page.text, 'html.parser')
- lyrics = str(html.find(
- 'div',
- # class_='lyrics-root-pin-spacer',
- class_='Lyrics__Container-sc-1ynbvzw-1 kUgSbL'
- ))
- # remove identifiers like chorus, verse, etc
- lyrics = re.sub(r'[\(\[].*?[\)\]]', '', lyrics)
- lyrics = re.sub(r'<.*?>', '\n', lyrics)
- # remove empty lines
- # lyrics = os.linesep.join([s for s in lyrics.splitlines() if s])
- lyrics = "\n".join([s for s in lyrics.split("\n") if s])
- return lyrics
-
-
-print(scrape_song_lyrics(sys.argv[1]))