From 2e8ddbf422a3a6d75b1e1f73cad089111f801c05 Mon Sep 17 00:00:00 2001
From: niliara-edu <nil.jimeno@estudiant.fjaverianas.com>
Date: Tue, 24 Sep 2024 12:21:40 +0200
Subject: finished web scrapper

---
 scrap/scrap.py | 25 -------------------------
 1 file changed, 25 deletions(-)
 delete mode 100644 scrap/scrap.py

(limited to 'scrap/scrap.py')

diff --git a/scrap/scrap.py b/scrap/scrap.py
deleted file mode 100644
index d979f49..0000000
--- a/scrap/scrap.py
+++ /dev/null
@@ -1,25 +0,0 @@
-from bs4 import BeautifulSoup
-import re
-# import os
-import sys
-import requests
-
-
-def scrape_song_lyrics(url):
-    page = requests.get(url)
-    html = BeautifulSoup(page.text, 'html.parser')
-    lyrics = str(html.find(
-                'div',
-                # class_='lyrics-root-pin-spacer',
-                class_='Lyrics__Container-sc-1ynbvzw-1 kUgSbL'
-    ))
-    # remove identifiers like chorus, verse, etc
-    lyrics = re.sub(r'[\(\[].*?[\)\]]', '', lyrics)
-    lyrics = re.sub(r'<.*?>', '\n', lyrics)
-    # remove empty lines
-    # lyrics = os.linesep.join([s for s in lyrics.splitlines() if s])
-    lyrics = "\n".join([s for s in lyrics.split("\n") if s])
-    return lyrics
-
-
-print(scrape_song_lyrics(sys.argv[1]))
-- 
cgit v1.2.3