27
27
from functools import cached_property , partial , total_ordering
28
28
from html import unescape
29
29
from http import HTTPStatus
30
- from typing import TYPE_CHECKING , ClassVar , Iterable , Iterator , NamedTuple
31
- from urllib .parse import quote , urlencode , urlparse
30
+ from typing import TYPE_CHECKING , Iterable , Iterator , NamedTuple
31
+ from urllib .parse import quote , quote_plus , urlencode , urlparse
32
32
33
33
import langdetect
34
34
import requests
@@ -399,22 +399,7 @@ def fetch(
399
399
return None
400
400
401
401
402
- class DirectBackend (Backend ):
403
- """A backend for fetching lyrics directly."""
404
-
405
- URL_TEMPLATE : ClassVar [str ] #: May include formatting placeholders
406
-
407
- @classmethod
408
- def encode (cls , text : str ) -> str :
409
- """Encode the string for inclusion in a URL."""
410
- raise NotImplementedError
411
-
412
- @classmethod
413
- def build_url (cls , * args : str ) -> str :
414
- return cls .URL_TEMPLATE .format (* map (cls .encode , args ))
415
-
416
-
417
- class MusiXmatch (DirectBackend ):
402
+ class MusiXmatch (Backend ):
418
403
URL_TEMPLATE = "https://www.musixmatch.com/lyrics/{}/{}"
419
404
420
405
REPLACEMENTS = {
@@ -433,6 +418,10 @@ def encode(cls, text: str) -> str:
433
418
434
419
return quote (unidecode (text ))
435
420
421
+ @classmethod
422
+ def build_url (cls , * args : str ) -> str :
423
+ return cls .URL_TEMPLATE .format (* map (cls .encode , args ))
424
+
436
425
def fetch (self , artist : str , title : str , * _ ) -> tuple [str , str ] | None :
437
426
url = self .build_url (artist , title )
438
427
@@ -611,26 +600,25 @@ def scrape(cls, html: str) -> str | None:
611
600
return None
612
601
613
602
614
- class Tekstowo (SoupMixin , DirectBackend ):
603
+ class Tekstowo (SearchBackend ):
615
604
"""Fetch lyrics from Tekstowo.pl."""
616
605
617
- URL_TEMPLATE = "https://www.tekstowo.pl/piosenka,{},{}.html"
606
+ BASE_URL = "https://www.tekstowo.pl"
607
+ SEARCH_URL = BASE_URL + "/szukaj,{}.html"
618
608
619
- non_alpha_to_underscore = partial (re .compile (r"\W" ).sub , "_" )
620
-
621
- @classmethod
622
- def encode (cls , text : str ) -> str :
623
- return cls .non_alpha_to_underscore (unidecode (text .lower ()))
609
+ def build_url (self , artist , title ):
610
+ artistitle = f"{ artist .title ()} { title .title ()} "
624
611
625
- def fetch (self , artist : str , title : str , * _ ) -> tuple [str , str ] | None :
626
- url = self .build_url (artist , title )
627
- # We are expecting to receive a 404 since we are guessing the URL.
628
- # Thus suppress the error so that it does not end up in the logs.
629
- with suppress (NotFoundError ):
630
- if lyrics := self .scrape (self .fetch_text (url )):
631
- return lyrics , url
612
+ return self .SEARCH_URL .format (quote_plus (unidecode (artistitle )))
632
613
633
- return None
614
+ def search (self , artist : str , title : str ) -> Iterable [SearchResult ]:
615
+ if html := self .fetch_text (self .build_url (title , artist )):
616
+ soup = self .get_soup (html )
617
+ for tag in soup .select ("div[class=flex-group] > a[title*=' - ']" ):
618
+ artist , title = str (tag ["title" ]).split (" - " , 1 )
619
+ yield SearchResult (
620
+ artist , title , f"{ self .BASE_URL } { tag ['href' ]} "
621
+ )
634
622
635
623
return None
636
624
0 commit comments