Skip to content

Commit ef105a9

Browse files
committed
Resurrect translation functionality
1 parent 4531297 commit ef105a9

File tree

5 files changed

+263
-73
lines changed

5 files changed

+263
-73
lines changed

beetsplug/_typing.py

+20
Original file line numberDiff line numberDiff line change
@@ -113,3 +113,23 @@ class Pagemap(TypedDict):
113113
"""Pagemap data with a single meta tags dict in a list."""
114114

115115
metatags: list[JSONDict]
116+
117+
118+
class TranslatorAPI:
119+
class Language(TypedDict):
120+
"""Language data returned by the translator API."""
121+
122+
language: str
123+
score: float
124+
125+
class Translation(TypedDict):
126+
"""Translation data returned by the translator API."""
127+
128+
text: str
129+
to: str
130+
131+
class Response(TypedDict):
132+
"""Response from the translator API."""
133+
134+
detectedLanguage: TranslatorAPI.Language
135+
translations: list[TranslatorAPI.Translation]

beetsplug/lyrics.py

+121-58
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,12 @@
4343
from beets.importer import ImportTask
4444
from beets.library import Item
4545

46-
from ._typing import GeniusAPI, GoogleCustomSearchAPI, LRCLibAPI
46+
from ._typing import (
47+
GeniusAPI,
48+
GoogleCustomSearchAPI,
49+
LRCLibAPI,
50+
TranslatorAPI,
51+
)
4752

4853
USER_AGENT = f"beets/{beets.__version__}"
4954
INSTRUMENTAL_LYRICS = "[Instrumental]"
@@ -231,6 +236,11 @@ def fetch_json(self, url: str, **kwargs):
231236
self.debug("Fetching JSON from {}", url)
232237
return r_session.get(url, **kwargs).json()
233238

239+
def post_json(self, url: str, **kwargs):
240+
"""Send POST request and return JSON response."""
241+
self.debug("Posting data to {}", url)
242+
return r_session.post(url, **kwargs).json()
243+
234244
@contextmanager
235245
def handle_request(self) -> Iterator[None]:
236246
try:
@@ -753,6 +763,97 @@ def scrape(cls, html: str) -> str | None:
753763
return None
754764

755765

766+
@dataclass
767+
class Translator(RequestHandler):
768+
TRANSLATE_URL = "https://api.cognitive.microsofttranslator.com/translate"
769+
LINE_PARTS_RE = re.compile(r"^(\[\d\d:\d\d.\d\d\]|) *(.*)$")
770+
771+
_log: beets.logging.Logger
772+
api_key: str
773+
to_language: str
774+
from_languages: list[str]
775+
776+
@classmethod
777+
def from_config(
778+
cls,
779+
log: beets.logging.BeetsLogger,
780+
api_key: str,
781+
to_language: str,
782+
from_languages: list[str] = None,
783+
) -> Translator:
784+
return cls(
785+
log,
786+
api_key,
787+
to_language.upper(),
788+
[x.upper() for x in from_languages or []],
789+
)
790+
791+
def get_translations(self, texts: Iterable[str]) -> list[tuple[str, str]]:
792+
"""Return translations for the given texts.
793+
794+
To reduce the translation 'cost', we translate unique texts, and then
795+
map the translations back to the original texts.
796+
"""
797+
unique_texts = list(dict.fromkeys(texts))
798+
data: list[TranslatorAPI.Response] = self.post_json(
799+
self.TRANSLATE_URL,
800+
headers={"Ocp-Apim-Subscription-Key": self.api_key},
801+
json=[{"text": "|".join(unique_texts)}],
802+
params={"api-version": "3.0", "to": self.to_language},
803+
)
804+
805+
translations = data[0]["translations"][0]["text"].split("|")
806+
trans_by_text = dict(zip(unique_texts, translations))
807+
return list(zip(texts, (trans_by_text.get(t, "") for t in texts)))
808+
809+
@classmethod
810+
def split_line(cls, line: str) -> tuple[str, str]:
811+
"""Split line to (timestamp, text)."""
812+
if m := cls.LINE_PARTS_RE.match(line):
813+
return m[1], m[2]
814+
815+
return "", ""
816+
817+
def append_translations(self, lines: Iterable[str]) -> list[str]:
818+
"""Append translations to the given lyrics texts.
819+
820+
Lines may contain timestamps from LRCLib which need to be temporarily
821+
removed for the translation. They can take any of these forms:
822+
- empty
823+
Text - text only
824+
[00:00:00] - timestamp only
825+
[00:00:00] Text - timestamp with text
826+
"""
827+
# split into [(timestamp, text), ...]]
828+
ts_and_text = list(map(self.split_line, lines))
829+
timestamps = [ts for ts, _ in ts_and_text]
830+
text_pairs = self.get_translations([ln for _, ln in ts_and_text])
831+
832+
# only add the separator for non-empty translations
833+
texts = [" / ".join(filter(None, p)) for p in text_pairs]
834+
# only add the space between non-empty timestamps and texts
835+
return [" ".join(filter(None, p)) for p in zip(timestamps, texts)]
836+
837+
def translate(self, lyrics: str) -> str:
838+
"""Translate the given lyrics to the target language.
839+
840+
If the lyrics are already in the target language or not in any of
841+
of the source languages (if configured), they are returned as is.
842+
843+
The footer with the source URL is preserved, if present.
844+
"""
845+
lyrics_language = langdetect.detect(lyrics).upper()
846+
if lyrics_language == self.to_language or (
847+
self.from_languages and lyrics_language not in self.from_languages
848+
):
849+
return lyrics
850+
851+
lyrics, *url = lyrics.split("\n\nSource: ")
852+
with self.handle_request():
853+
translated_lines = self.append_translations(lyrics.splitlines())
854+
return "\n\nSource: ".join(["\n".join(translated_lines), *url])
855+
856+
756857
class LyricsPlugin(RequestHandler, plugins.BeetsPlugin):
757858
BACKEND_BY_NAME = {
758859
b.name: b for b in [LRCLib, Google, Genius, Tekstowo, MusiXmatch]
@@ -769,15 +870,24 @@ def backends(self) -> list[Backend]:
769870

770871
return [self.BACKEND_BY_NAME[c](self.config, self._log) for c in chosen]
771872

873+
@cached_property
874+
def translator(self) -> Translator | None:
875+
config = self.config["translate"]
876+
if config["api_key"].get() and config["to_language"].get():
877+
return Translator.from_config(self._log, **config.flatten())
878+
return None
879+
772880
def __init__(self):
773881
super().__init__()
774882
self.import_stages = [self.imported]
775883
self.config.add(
776884
{
777885
"auto": True,
778-
"bing_client_secret": None,
779-
"bing_lang_from": [],
780-
"bing_lang_to": None,
886+
"translate": {
887+
"api_key": None,
888+
"from_languages": [],
889+
"to_language": None,
890+
},
781891
"dist_thresh": 0.11,
782892
"google_API_key": None,
783893
"google_engine_ID": "009217259823014548361:lndtuqkycfu",
@@ -796,7 +906,7 @@ def __init__(self):
796906
],
797907
}
798908
)
799-
self.config["bing_client_secret"].redact = True
909+
self.config["translate"]["api_key"].redact = True
800910
self.config["google_API_key"].redact = True
801911
self.config["google_engine_ID"].redact = True
802912
self.config["genius_api_key"].redact = True
@@ -810,24 +920,6 @@ def __init__(self):
810920
# open yet.
811921
self.rest = None
812922

813-
self.config["bing_lang_from"] = [
814-
x.lower() for x in self.config["bing_lang_from"].as_str_seq()
815-
]
816-
817-
@cached_property
818-
def bing_access_token(self) -> str | None:
819-
params = {
820-
"client_id": "beets",
821-
"client_secret": self.config["bing_client_secret"],
822-
"scope": "https://api.microsofttranslator.com",
823-
"grant_type": "client_credentials",
824-
}
825-
826-
oauth_url = "https://datamarket.accesscontrol.windows.net/v2/OAuth2-13"
827-
with self.handle_request():
828-
r = r_session.post(oauth_url, params=params)
829-
return r.json()["access_token"]
830-
831923
def commands(self):
832924
cmd = ui.Subcommand("lyrics", help="fetch song lyrics")
833925
cmd.parser.add_option(
@@ -989,14 +1081,12 @@ def fetch_item_lyrics(self, item: Item, write: bool, force: bool) -> None:
9891081

9901082
if lyrics:
9911083
self.info("🟢 Found lyrics: {0}", item)
992-
if self.config["bing_client_secret"].get():
993-
lang_from = langdetect.detect(lyrics)
994-
if self.config["bing_lang_to"].get() != lang_from and (
995-
not self.config["bing_lang_from"]
996-
or (lang_from in self.config["bing_lang_from"].as_str_seq())
997-
):
998-
lyrics = self.append_translation(
999-
lyrics, self.config["bing_lang_to"]
1084+
if translator := self.translator:
1085+
initial_lyrics = lyrics
1086+
if (lyrics := translator.translate(lyrics)) != initial_lyrics:
1087+
self.info(
1088+
"🟢 Added translation to {}",
1089+
self.config["translate_to"].get().upper(),
10001090
)
10011091
else:
10021092
self.info("🔴 Lyrics not found: {}", item)
@@ -1020,30 +1110,3 @@ def get_lyrics(self, artist: str, title: str, *args) -> str | None:
10201110
return f"{lyrics}\n\nSource: {url}"
10211111

10221112
return None
1023-
1024-
def append_translation(self, text, to_lang):
1025-
from xml.etree import ElementTree
1026-
1027-
if not (token := self.bing_access_token):
1028-
self.warn(
1029-
"Could not get Bing Translate API access token. "
1030-
"Check your 'bing_client_secret' password."
1031-
)
1032-
return text
1033-
1034-
# Extract unique lines to limit API request size per song
1035-
lines = text.split("\n")
1036-
unique_lines = set(lines)
1037-
url = "https://api.microsofttranslator.com/v2/Http.svc/Translate"
1038-
with self.handle_request():
1039-
text = self.fetch_text(
1040-
url,
1041-
headers={"Authorization": f"Bearer {token}"},
1042-
params={"text": "|".join(unique_lines), "to": to_lang},
1043-
)
1044-
if translated := ElementTree.fromstring(text.encode("utf-8")).text:
1045-
# Use a translation mapping dict to build resulting lyrics
1046-
translations = dict(zip(unique_lines, translated.split("|")))
1047-
return "".join(f"{ln} / {translations[ln]}\n" for ln in lines)
1048-
1049-
return text

docs/changelog.rst

+2
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ New features:
1515
control the maximum allowed distance between the lyrics search result and the
1616
tagged item's artist and title. This is useful for preventing false positives
1717
when fetching lyrics.
18+
* :doc:`plugins/lyrics`: Rewrite lyrics translation functionality to use Azure
19+
AI Translator API and add relevant instructions to the documentation.
1820

1921
Bug fixes:
2022

docs/plugins/lyrics.rst

+35-15
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,10 @@ Default configuration:
3838
3939
lyrics:
4040
auto: yes
41-
bing_client_secret: null
42-
bing_lang_from: []
43-
bing_lang_to: null
41+
translate:
42+
api_key:
43+
from_languages: []
44+
to_language:
4445
dist_thresh: 0.11
4546
fallback: null
4647
force: no
@@ -52,12 +53,13 @@ Default configuration:
5253
The available options are:
5354

5455
- **auto**: Fetch lyrics automatically during import.
55-
- **bing_client_secret**: Your Bing Translation application password
56-
(see :ref:`lyrics-translation`)
57-
- **bing_lang_from**: By default all lyrics with a language other than
58-
``bing_lang_to`` are translated. Use a list of lang codes to restrict the set
59-
of source languages to translate.
60-
- **bing_lang_to**: Language to translate lyrics into.
56+
- **translate**
57+
- **api_key**: Api key to access your Azure Translator resource. (see
58+
:ref:`lyrics-translation`)
59+
- **from_languages**: By default all lyrics with a language other than
60+
``translate_to`` are translated. Use a list of language codes to restrict
61+
them.
62+
- **to_language**: Language code to translate lyrics to.
6163
- **dist_thresh**: The maximum distance between the artist and title
6264
combination of the music file and lyrics candidate to consider them a match.
6365
Lower values will make the plugin more strict, higher values will make it
@@ -165,10 +167,28 @@ After that, the lyrics plugin will fall back on other declared data sources.
165167
Activate On-the-Fly Translation
166168
-------------------------------
167169

168-
You need to register for a Microsoft Azure Marketplace free account and
169-
to the `Microsoft Translator API`_. Follow the four steps process, specifically
170-
at step 3 enter ``beets`` as *Client ID* and copy/paste the generated
171-
*Client secret* into your ``bing_client_secret`` configuration, alongside
172-
``bing_lang_to`` target ``language code``.
170+
We use Azure to optionally translate your lyrics. To set up the integration,
171+
follow these steps:
173172

174-
.. _Microsoft Translator API: https://docs.microsoft.com/en-us/azure/cognitive-services/translator/translator-how-to-signup
173+
1. `Create a Translator resource`_ on Azure.
174+
2. `Obtain its API key`_.
175+
3. Add the API key to your configuration as ``translate.api_key``.
176+
4. Configure your target language using the ``translate.to_language`` option.
177+
178+
179+
For example, with the following configuration
180+
181+
.. code-block:: yaml
182+
183+
lyrics:
184+
translate:
185+
api_key: YOUR_TRANSLATOR_API_KEY
186+
to_language: de
187+
188+
You should expect lyrics like this::
189+
190+
Original verse / Ursprünglicher Vers
191+
Some other verse / Ein anderer Vers
192+
193+
.. _create a Translator resource: https://learn.microsoft.com/en-us/azure/ai-services/translator/create-translator-resource
194+
.. _obtain its API key: https://learn.microsoft.com/en-us/python/api/overview/azure/ai-translation-text-readme?view=azure-python&preserve-view=true#get-an-api-key

0 commit comments

Comments
 (0)