Skip to content

Commit dc3773e

Browse files
committed
Bring back Tekstowo search
It was my mistake to remove search earlier - I found that in many cases it works fine.
1 parent 8436f5b commit dc3773e

File tree

1 file changed

+21
-33
lines changed

1 file changed

+21
-33
lines changed

beetsplug/lyrics.py

+21-33
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@
2727
from functools import cached_property, partial, total_ordering
2828
from html import unescape
2929
from http import HTTPStatus
30-
from typing import TYPE_CHECKING, ClassVar, Iterable, Iterator, NamedTuple
31-
from urllib.parse import quote, urlencode, urlparse
30+
from typing import TYPE_CHECKING, Iterable, Iterator, NamedTuple
31+
from urllib.parse import quote, quote_plus, urlencode, urlparse
3232

3333
import langdetect
3434
import requests
@@ -396,22 +396,7 @@ def fetch(
396396
return None
397397

398398

399-
class DirectBackend(Backend):
400-
"""A backend for fetching lyrics directly."""
401-
402-
URL_TEMPLATE: ClassVar[str] #: May include formatting placeholders
403-
404-
@classmethod
405-
def encode(cls, text: str) -> str:
406-
"""Encode the string for inclusion in a URL."""
407-
raise NotImplementedError
408-
409-
@classmethod
410-
def build_url(cls, *args: str) -> str:
411-
return cls.URL_TEMPLATE.format(*map(cls.encode, args))
412-
413-
414-
class MusiXmatch(DirectBackend):
399+
class MusiXmatch(Backend):
415400
URL_TEMPLATE = "https://www.musixmatch.com/lyrics/{}/{}"
416401

417402
REPLACEMENTS = {
@@ -430,6 +415,10 @@ def encode(cls, text: str) -> str:
430415

431416
return quote(unidecode(text))
432417

418+
@classmethod
419+
def build_url(cls, *args: str) -> str:
420+
return cls.URL_TEMPLATE.format(*map(cls.encode, args))
421+
433422
def fetch(self, artist: str, title: str, *_) -> tuple[str, str] | None:
434423
url = self.build_url(artist, title)
435424

@@ -608,26 +597,25 @@ def scrape(cls, html: str) -> str | None:
608597
return None
609598

610599

611-
class Tekstowo(SoupMixin, DirectBackend):
600+
class Tekstowo(SearchBackend):
612601
"""Fetch lyrics from Tekstowo.pl."""
613602

614-
URL_TEMPLATE = "https://www.tekstowo.pl/piosenka,{},{}.html"
603+
BASE_URL = "https://www.tekstowo.pl"
604+
SEARCH_URL = BASE_URL + "/szukaj,{}.html"
615605

616-
non_alpha_to_underscore = partial(re.compile(r"\W").sub, "_")
617-
618-
@classmethod
619-
def encode(cls, text: str) -> str:
620-
return cls.non_alpha_to_underscore(unidecode(text.lower()))
606+
def build_url(self, artist, title):
607+
artistitle = f"{artist.title()} {title.title()}"
621608

622-
def fetch(self, artist: str, title: str, *_) -> tuple[str, str] | None:
623-
url = self.build_url(artist, title)
624-
# We are expecting to receive a 404 since we are guessing the URL.
625-
# Thus suppress the error so that it does not end up in the logs.
626-
with suppress(NotFoundError):
627-
if lyrics := self.scrape(self.fetch_text(url)):
628-
return lyrics, url
609+
return self.SEARCH_URL.format(quote_plus(unidecode(artistitle)))
629610

630-
return None
611+
def search(self, artist: str, title: str) -> Iterable[SearchResult]:
612+
if html := self.fetch_text(self.build_url(title, artist)):
613+
soup = self.get_soup(html)
614+
for tag in soup.select("div[class=flex-group] > a[title*=' - ']"):
615+
artist, title = str(tag["title"]).split(" - ", 1)
616+
yield SearchResult(
617+
artist, title, f"{self.BASE_URL}{tag['href']}"
618+
)
631619

632620
return None
633621

0 commit comments

Comments
 (0)