diff options
author | niliara-edu <nil.jimeno@estudiant.fjaverianas.com> | 2024-09-24 12:21:40 +0200 |
---|---|---|
committer | niliara-edu <nil.jimeno@estudiant.fjaverianas.com> | 2024-09-24 12:21:40 +0200 |
commit | 2e8ddbf422a3a6d75b1e1f73cad089111f801c05 (patch) | |
tree | a3b2c7ea8560fcbc89fb9b3ee15933d434b1ff51 /scrap/scrap.py | |
parent | faa13839f898c60ff5618be6e916ad2e60958468 (diff) |
finished web scrapper
Diffstat (limited to 'scrap/scrap.py')
-rw-r--r-- | scrap/scrap.py | 25 |
1 files changed, 0 insertions, 25 deletions
diff --git a/scrap/scrap.py b/scrap/scrap.py deleted file mode 100644 index d979f49..0000000 --- a/scrap/scrap.py +++ /dev/null @@ -1,25 +0,0 @@ -from bs4 import BeautifulSoup -import re -# import os -import sys -import requests - - -def scrape_song_lyrics(url): - page = requests.get(url) - html = BeautifulSoup(page.text, 'html.parser') - lyrics = str(html.find( - 'div', - # class_='lyrics-root-pin-spacer', - class_='Lyrics__Container-sc-1ynbvzw-1 kUgSbL' - )) - # remove identifiers like chorus, verse, etc - lyrics = re.sub(r'[\(\[].*?[\)\]]', '', lyrics) - lyrics = re.sub(r'<.*?>', '\n', lyrics) - # remove empty lines - # lyrics = os.linesep.join([s for s in lyrics.splitlines() if s]) - lyrics = "\n".join([s for s in lyrics.split("\n") if s]) - return lyrics - - -print(scrape_song_lyrics(sys.argv[1])) |