From 2e8ddbf422a3a6d75b1e1f73cad089111f801c05 Mon Sep 17 00:00:00 2001 From: niliara-edu Date: Tue, 24 Sep 2024 12:21:40 +0200 Subject: finished web scrapper --- scrap/scrap.py | 25 ------------------------- 1 file changed, 25 deletions(-) delete mode 100644 scrap/scrap.py (limited to 'scrap/scrap.py') diff --git a/scrap/scrap.py b/scrap/scrap.py deleted file mode 100644 index d979f49..0000000 --- a/scrap/scrap.py +++ /dev/null @@ -1,25 +0,0 @@ -from bs4 import BeautifulSoup -import re -# import os -import sys -import requests - - -def scrape_song_lyrics(url): - page = requests.get(url) - html = BeautifulSoup(page.text, 'html.parser') - lyrics = str(html.find( - 'div', - # class_='lyrics-root-pin-spacer', - class_='Lyrics__Container-sc-1ynbvzw-1 kUgSbL' - )) - # remove identifiers like chorus, verse, etc - lyrics = re.sub(r'[\(\[].*?[\)\]]', '', lyrics) - lyrics = re.sub(r'<.*?>', '\n', lyrics) - # remove empty lines - # lyrics = os.linesep.join([s for s in lyrics.splitlines() if s]) - lyrics = "\n".join([s for s in lyrics.split("\n") if s]) - return lyrics - - -print(scrape_song_lyrics(sys.argv[1])) -- cgit v1.2.3