diff options
Diffstat (limited to 'scrap/scrap.py')
-rw-r--r-- | scrap/scrap.py | 25 |
1 files changed, 25 insertions, 0 deletions
diff --git a/scrap/scrap.py b/scrap/scrap.py new file mode 100644 index 0000000..d979f49 --- /dev/null +++ b/scrap/scrap.py @@ -0,0 +1,25 @@ +from bs4 import BeautifulSoup +import re +# import os +import sys +import requests + + +def scrape_song_lyrics(url): + page = requests.get(url) + html = BeautifulSoup(page.text, 'html.parser') + lyrics = str(html.find( + 'div', + # class_='lyrics-root-pin-spacer', + class_='Lyrics__Container-sc-1ynbvzw-1 kUgSbL' + )) + # remove identifiers like chorus, verse, etc + lyrics = re.sub(r'[\(\[].*?[\)\]]', '', lyrics) + lyrics = re.sub(r'<.*?>', '\n', lyrics) + # remove empty lines + # lyrics = os.linesep.join([s for s in lyrics.splitlines() if s]) + lyrics = "\n".join([s for s in lyrics.split("\n") if s]) + return lyrics + + +print(scrape_song_lyrics(sys.argv[1])) |