Skip to content

Commit 3763499

Browse files
committed
translations: use a more distinctive separator
I found that the translator would sometimes replace the pipe character with another symbol (maybe it got confused thinking the character is part of the text?). Added spaces around the pipe to make it more clear that it's definitely the separator.
1 parent 5a7e503 commit 3763499

File tree

2 files changed

+14
-11
lines changed

2 files changed

+14
-11
lines changed

beetsplug/lyrics.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -747,6 +747,7 @@ def scrape(cls, html: str) -> str | None:
747747
class Translator(RequestHandler):
748748
TRANSLATE_URL = "https://api.cognitive.microsofttranslator.com/translate"
749749
LINE_PARTS_RE = re.compile(r"^(\[\d\d:\d\d.\d\d\]|) *(.*)$")
750+
SEPARATOR = " | "
750751
remove_translations = partial(re.compile(r" / [^\n]+").sub, "")
751752

752753
_log: Logger
@@ -776,14 +777,16 @@ def get_translations(self, texts: Iterable[str]) -> list[tuple[str, str]]:
776777
map the translations back to the original texts.
777778
"""
778779
unique_texts = list(dict.fromkeys(texts))
780+
text = self.SEPARATOR.join(unique_texts)
779781
data: list[TranslatorAPI.Response] = self.post_json(
780782
self.TRANSLATE_URL,
781783
headers={"Ocp-Apim-Subscription-Key": self.api_key},
782-
json=[{"text": "|".join(unique_texts)}],
784+
json=[{"text": text}],
783785
params={"api-version": "3.0", "to": self.to_language},
784786
)
785787

786-
translations = data[0]["translations"][0]["text"].split("|")
788+
translated_text = data[0]["translations"][0]["text"]
789+
translations = translated_text.split(self.SEPARATOR)
787790
trans_by_text = dict(zip(unique_texts, translations))
788791
return list(zip(texts, (trans_by_text.get(t, "") for t in texts)))
789792

test/plugins/test_lyrics.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -547,23 +547,23 @@ def callback(request, _):
547547
if b"Refrain" in request.body:
548548
translations = (
549549
""
550-
"|[Refrain : Doja Cat]"
551-
"|Difficile pour moi de te laisser partir (Te laisser partir, te laisser partir)" # noqa: E501
552-
"|Mon corps ne me laissait pas le cacher (Cachez-le)"
553-
"|Quoi qu’il arrive, je ne plierais pas (Ne plierait pas, ne plierais pas)" # noqa: E501
554-
"|Chevauchant à travers le tonnerre, la foudre"
550+
" | [Refrain : Doja Cat]"
551+
" | Difficile pour moi de te laisser partir (Te laisser partir, te laisser partir)" # noqa: E501
552+
" | Mon corps ne me laissait pas le cacher (Cachez-le)"
553+
" | Quoi qu’il arrive, je ne plierais pas (Ne plierait pas, ne plierais pas)" # noqa: E501
554+
" | Chevauchant à travers le tonnerre, la foudre"
555555
)
556556
elif b"00:00.00" in request.body:
557557
translations = (
558558
""
559-
"|[00:00.00] Quelques paroles synchronisées"
560-
"|[00:01.00] Quelques paroles plus synchronisées"
559+
" | [00:00.00] Quelques paroles synchronisées"
560+
" | [00:01.00] Quelques paroles plus synchronisées"
561561
)
562562
else:
563563
translations = (
564564
""
565-
"|Quelques paroles synchronisées"
566-
"|Quelques paroles plus synchronisées"
565+
" | Quelques paroles synchronisées"
566+
" | Quelques paroles plus synchronisées"
567567
)
568568

569569
return [

0 commit comments

Comments
 (0)