27
27
from functools import cached_property , partial , total_ordering
28
28
from html import unescape
29
29
from http import HTTPStatus
30
- from typing import TYPE_CHECKING , ClassVar , Iterable , Iterator , NamedTuple
31
- from urllib .parse import quote , urlencode , urlparse
30
+ from typing import TYPE_CHECKING , Iterable , Iterator , NamedTuple
31
+ from urllib .parse import quote , quote_plus , urlencode , urlparse
32
32
33
33
import langdetect
34
34
import requests
@@ -396,22 +396,7 @@ def fetch(
396
396
return None
397
397
398
398
399
- class DirectBackend (Backend ):
400
- """A backend for fetching lyrics directly."""
401
-
402
- URL_TEMPLATE : ClassVar [str ] #: May include formatting placeholders
403
-
404
- @classmethod
405
- def encode (cls , text : str ) -> str :
406
- """Encode the string for inclusion in a URL."""
407
- raise NotImplementedError
408
-
409
- @classmethod
410
- def build_url (cls , * args : str ) -> str :
411
- return cls .URL_TEMPLATE .format (* map (cls .encode , args ))
412
-
413
-
414
- class MusiXmatch (DirectBackend ):
399
+ class MusiXmatch (Backend ):
415
400
URL_TEMPLATE = "https://www.musixmatch.com/lyrics/{}/{}"
416
401
417
402
REPLACEMENTS = {
@@ -430,6 +415,10 @@ def encode(cls, text: str) -> str:
430
415
431
416
return quote (unidecode (text ))
432
417
418
+ @classmethod
419
+ def build_url (cls , * args : str ) -> str :
420
+ return cls .URL_TEMPLATE .format (* map (cls .encode , args ))
421
+
433
422
def fetch (self , artist : str , title : str , * _ ) -> tuple [str , str ] | None :
434
423
url = self .build_url (artist , title )
435
424
@@ -608,26 +597,25 @@ def scrape(cls, html: str) -> str | None:
608
597
return None
609
598
610
599
611
- class Tekstowo (SoupMixin , DirectBackend ):
600
+ class Tekstowo (SearchBackend ):
612
601
"""Fetch lyrics from Tekstowo.pl."""
613
602
614
- URL_TEMPLATE = "https://www.tekstowo.pl/piosenka,{},{}.html"
603
+ BASE_URL = "https://www.tekstowo.pl"
604
+ SEARCH_URL = BASE_URL + "/szukaj,{}.html"
615
605
616
- non_alpha_to_underscore = partial (re .compile (r"\W" ).sub , "_" )
617
-
618
- @classmethod
619
- def encode (cls , text : str ) -> str :
620
- return cls .non_alpha_to_underscore (unidecode (text .lower ()))
606
+ def build_url (self , artist , title ):
607
+ artistitle = f"{ artist .title ()} { title .title ()} "
621
608
622
- def fetch (self , artist : str , title : str , * _ ) -> tuple [str , str ] | None :
623
- url = self .build_url (artist , title )
624
- # We are expecting to receive a 404 since we are guessing the URL.
625
- # Thus suppress the error so that it does not end up in the logs.
626
- with suppress (NotFoundError ):
627
- if lyrics := self .scrape (self .fetch_text (url )):
628
- return lyrics , url
609
+ return self .SEARCH_URL .format (quote_plus (unidecode (artistitle )))
629
610
630
- return None
611
+ def search (self , artist : str , title : str ) -> Iterable [SearchResult ]:
612
+ if html := self .fetch_text (self .build_url (title , artist )):
613
+ soup = self .get_soup (html )
614
+ for tag in soup .select ("div[class=flex-group] > a[title*=' - ']" ):
615
+ artist , title = str (tag ["title" ]).split (" - " , 1 )
616
+ yield SearchResult (
617
+ artist , title , f"{ self .BASE_URL } { tag ['href' ]} "
618
+ )
631
619
632
620
return None
633
621
0 commit comments