Skip to content

Commit 43032f7

Browse files
committed
translations: make sure we do not re-translate
1 parent 7893766 commit 43032f7

File tree

2 files changed

+42
-15
lines changed

2 files changed

+42
-15
lines changed

beetsplug/lyrics.py

+30-12
Original file line numberDiff line numberDiff line change
@@ -744,6 +744,7 @@ def scrape(cls, html: str) -> str | None:
744744
class Translator(RequestHandler):
745745
TRANSLATE_URL = "https://api.cognitive.microsofttranslator.com/translate"
746746
LINE_PARTS_RE = re.compile(r"^(\[\d\d:\d\d.\d\d\]|) *(.*)$")
747+
remove_translations = partial(re.compile(r" / [^\n]+").sub, "")
747748

748749
_log: Logger
749750
api_key: str
@@ -811,23 +812,45 @@ def append_translations(self, lines: Iterable[str]) -> list[str]:
811812
# only add the space between non-empty timestamps and texts
812813
return [" ".join(filter(None, p)) for p in zip(timestamps, texts)]
813814

814-
def translate(self, lyrics: str) -> str:
815+
def translate(self, new_lyrics: str, old_lyrics: str) -> str:
815816
"""Translate the given lyrics to the target language.
816817
818+
Check old lyrics for existing translations and return them if their
819+
original text matches the new lyrics. This is to avoid translating
820+
the same lyrics multiple times.
821+
817822
If the lyrics are already in the target language or not in any of
818823
of the source languages (if configured), they are returned as is.
819824
820825
The footer with the source URL is preserved, if present.
821826
"""
822-
lyrics_language = langdetect.detect(lyrics).upper()
823-
if lyrics_language == self.to_language or (
824-
self.from_languages and lyrics_language not in self.from_languages
827+
if (
828+
" / " in old_lyrics
829+
and self.remove_translations(old_lyrics) == new_lyrics
825830
):
826-
return lyrics
831+
self.info("🔵 Translations already exist")
832+
return old_lyrics
833+
834+
lyrics_language = langdetect.detect(new_lyrics).upper()
835+
if lyrics_language == self.to_language:
836+
self.info(
837+
"🔵 Lyrics are already in the target language {}",
838+
self.to_language,
839+
)
840+
return new_lyrics
841+
842+
if self.from_languages and lyrics_language not in self.from_languages:
843+
self.info(
844+
"🔵 Configuration {} does not permit translating from {}",
845+
self.from_languages,
846+
lyrics_language,
847+
)
848+
return new_lyrics
827849

828-
lyrics, *url = lyrics.split("\n\nSource: ")
850+
lyrics, *url = new_lyrics.split("\n\nSource: ")
829851
with self.handle_request():
830852
translated_lines = self.append_translations(lyrics.splitlines())
853+
self.info("🟢 Translated lyrics to {}", self.to_language)
831854
return "\n\nSource: ".join(["\n".join(translated_lines), *url])
832855

833856

@@ -1065,12 +1088,7 @@ def add_item_lyrics(self, item: Item, write: bool) -> None:
10651088
if lyrics := self.find_lyrics(item):
10661089
self.info("🟢 Found lyrics: {0}", item)
10671090
if translator := self.translator:
1068-
initial_lyrics = lyrics
1069-
if (lyrics := translator.translate(lyrics)) != initial_lyrics:
1070-
self.info(
1071-
"🟢 Added translation to {}",
1072-
self.config["translate_to"].get().upper(),
1073-
)
1091+
lyrics = translator.translate(lyrics, item.lyrics)
10741092
else:
10751093
self.info("🔴 Lyrics not found: {}", item)
10761094
lyrics = self.config["fallback"].get()

test/plugins/test_lyrics.py

+12-3
Original file line numberDiff line numberDiff line change
@@ -583,7 +583,7 @@ def callback(request, _):
583583
requests_mock.post(lyrics.Translator.TRANSLATE_URL, json=callback)
584584

585585
@pytest.mark.parametrize(
586-
"initial_lyrics, expected",
586+
"new_lyrics, old_lyrics, expected",
587587
[
588588
pytest.param(
589589
"""
@@ -592,6 +592,7 @@ def callback(request, _):
592592
My body wouldn't let me hide it (Hide it)
593593
No matter what, I wouldn't fold (Wouldn't fold, wouldn't fold)
594594
Ridin' through the thunder, lightnin'""",
595+
"",
595596
"""
596597
[Refrain: Doja Cat] / [Refrain : Doja Cat]
597598
Hard for me to let you go (Let you go, let you go) / Difficile pour moi de te laisser partir (Te laisser partir, te laisser partir)
@@ -607,6 +608,7 @@ def callback(request, _):
607608
[00:01.00] Some more synced lyrics
608609
609610
Source: https://lrclib.net/api/123""",
611+
"",
610612
"""
611613
[00:00.00] Some synced lyrics / Quelques paroles synchronisées
612614
[00:00:50]
@@ -617,17 +619,24 @@ def callback(request, _):
617619
),
618620
pytest.param(
619621
"Quelques paroles",
622+
"",
620623
"Quelques paroles",
621624
id="already in the target language",
622625
),
626+
pytest.param(
627+
"Some lyrics",
628+
"Some lyrics / Some translation",
629+
"Some lyrics / Some translation",
630+
id="already translated",
631+
),
623632
],
624633
)
625-
def test_translate(self, initial_lyrics, expected):
634+
def test_translate(self, new_lyrics, old_lyrics, expected):
626635
plugin = lyrics.LyricsPlugin()
627636
bing = lyrics.Translator(plugin._log, "123", "FR", ["EN"])
628637

629638
assert bing.translate(
630-
textwrap.dedent(initial_lyrics)
639+
textwrap.dedent(new_lyrics), old_lyrics
631640
) == textwrap.dedent(expected)
632641

633642

0 commit comments

Comments
 (0)