40
40
from beets .autotag .hooks import string_dist
41
41
42
42
if TYPE_CHECKING :
43
+ from logging import Logger
44
+
43
45
from beets .importer import ImportTask
44
46
from beets .library import Item
45
47
46
- from ._typing import GeniusAPI , GoogleCustomSearchAPI , JSONDict , LRCLibAPI
48
+ from ._typing import (
49
+ GeniusAPI ,
50
+ GoogleCustomSearchAPI ,
51
+ JSONDict ,
52
+ LRCLibAPI ,
53
+ TranslatorAPI ,
54
+ )
47
55
48
56
USER_AGENT = f"beets/{ beets .__version__ } "
49
57
INSTRUMENTAL_LYRICS = "[Instrumental]"
@@ -252,6 +260,12 @@ def fetch_json(self, url: str, params: JSONDict | None = None, **kwargs):
252
260
self .debug ("Fetching JSON from {}" , url )
253
261
return r_session .get (url , ** kwargs ).json ()
254
262
263
+ def post_json (self , url : str , params : JSONDict | None = None , ** kwargs ):
264
+ """Send POST request and return JSON response."""
265
+ url = self .format_url (url , params )
266
+ self .debug ("Posting JSON to {}" , url )
267
+ return r_session .post (url , ** kwargs ).json ()
268
+
255
269
@contextmanager
256
270
def handle_request (self ) -> Iterator [None ]:
257
271
try :
@@ -760,6 +774,97 @@ def scrape(cls, html: str) -> str | None:
760
774
return None
761
775
762
776
777
+ @dataclass
778
+ class Translator (RequestHandler ):
779
+ TRANSLATE_URL = "https://api.cognitive.microsofttranslator.com/translate"
780
+ LINE_PARTS_RE = re .compile (r"^(\[\d\d:\d\d.\d\d\]|) *(.*)$" )
781
+
782
+ _log : Logger
783
+ api_key : str
784
+ to_language : str
785
+ from_languages : list [str ]
786
+
787
+ @classmethod
788
+ def from_config (
789
+ cls ,
790
+ log : Logger ,
791
+ api_key : str ,
792
+ to_language : str ,
793
+ from_languages : list [str ] | None = None ,
794
+ ) -> Translator :
795
+ return cls (
796
+ log ,
797
+ api_key ,
798
+ to_language .upper (),
799
+ [x .upper () for x in from_languages or []],
800
+ )
801
+
802
+ def get_translations (self , texts : Iterable [str ]) -> list [tuple [str , str ]]:
803
+ """Return translations for the given texts.
804
+
805
+ To reduce the translation 'cost', we translate unique texts, and then
806
+ map the translations back to the original texts.
807
+ """
808
+ unique_texts = list (dict .fromkeys (texts ))
809
+ data : list [TranslatorAPI .Response ] = self .post_json (
810
+ self .TRANSLATE_URL ,
811
+ headers = {"Ocp-Apim-Subscription-Key" : self .api_key },
812
+ json = [{"text" : "|" .join (unique_texts )}],
813
+ params = {"api-version" : "3.0" , "to" : self .to_language },
814
+ )
815
+
816
+ translations = data [0 ]["translations" ][0 ]["text" ].split ("|" )
817
+ trans_by_text = dict (zip (unique_texts , translations ))
818
+ return list (zip (texts , (trans_by_text .get (t , "" ) for t in texts )))
819
+
820
+ @classmethod
821
+ def split_line (cls , line : str ) -> tuple [str , str ]:
822
+ """Split line to (timestamp, text)."""
823
+ if m := cls .LINE_PARTS_RE .match (line ):
824
+ return m [1 ], m [2 ]
825
+
826
+ return "" , ""
827
+
828
+ def append_translations (self , lines : Iterable [str ]) -> list [str ]:
829
+ """Append translations to the given lyrics texts.
830
+
831
+ Lines may contain timestamps from LRCLib which need to be temporarily
832
+ removed for the translation. They can take any of these forms:
833
+ - empty
834
+ Text - text only
835
+ [00:00:00] - timestamp only
836
+ [00:00:00] Text - timestamp with text
837
+ """
838
+ # split into [(timestamp, text), ...]]
839
+ ts_and_text = list (map (self .split_line , lines ))
840
+ timestamps = [ts for ts , _ in ts_and_text ]
841
+ text_pairs = self .get_translations ([ln for _ , ln in ts_and_text ])
842
+
843
+ # only add the separator for non-empty translations
844
+ texts = [" / " .join (filter (None , p )) for p in text_pairs ]
845
+ # only add the space between non-empty timestamps and texts
846
+ return [" " .join (filter (None , p )) for p in zip (timestamps , texts )]
847
+
848
+ def translate (self , lyrics : str ) -> str :
849
+ """Translate the given lyrics to the target language.
850
+
851
+ If the lyrics are already in the target language or not in any of
852
+ of the source languages (if configured), they are returned as is.
853
+
854
+ The footer with the source URL is preserved, if present.
855
+ """
856
+ lyrics_language = langdetect .detect (lyrics ).upper ()
857
+ if lyrics_language == self .to_language or (
858
+ self .from_languages and lyrics_language not in self .from_languages
859
+ ):
860
+ return lyrics
861
+
862
+ lyrics , * url = lyrics .split ("\n \n Source: " )
863
+ with self .handle_request ():
864
+ translated_lines = self .append_translations (lyrics .splitlines ())
865
+ return "\n \n Source: " .join (["\n " .join (translated_lines ), * url ])
866
+
867
+
763
868
class LyricsPlugin (RequestHandler , plugins .BeetsPlugin ):
764
869
BACKEND_BY_NAME = {
765
870
b .name : b for b in [LRCLib , Google , Genius , Tekstowo , MusiXmatch ]
@@ -776,15 +881,24 @@ def backends(self) -> list[Backend]:
776
881
777
882
return [self .BACKEND_BY_NAME [c ](self .config , self ._log ) for c in chosen ]
778
883
884
+ @cached_property
885
+ def translator (self ) -> Translator | None :
886
+ config = self .config ["translate" ]
887
+ if config ["api_key" ].get () and config ["to_language" ].get ():
888
+ return Translator .from_config (self ._log , ** config .flatten ())
889
+ return None
890
+
779
891
def __init__ (self ):
780
892
super ().__init__ ()
781
893
self .import_stages = [self .imported ]
782
894
self .config .add (
783
895
{
784
896
"auto" : True ,
785
- "bing_client_secret" : None ,
786
- "bing_lang_from" : [],
787
- "bing_lang_to" : None ,
897
+ "translate" : {
898
+ "api_key" : None ,
899
+ "from_languages" : [],
900
+ "to_language" : None ,
901
+ },
788
902
"dist_thresh" : 0.11 ,
789
903
"google_API_key" : None ,
790
904
"google_engine_ID" : "009217259823014548361:lndtuqkycfu" ,
@@ -803,7 +917,7 @@ def __init__(self):
803
917
],
804
918
}
805
919
)
806
- self .config ["bing_client_secret " ].redact = True
920
+ self .config ["translate" ][ "api_key " ].redact = True
807
921
self .config ["google_API_key" ].redact = True
808
922
self .config ["google_engine_ID" ].redact = True
809
923
self .config ["genius_api_key" ].redact = True
@@ -817,24 +931,6 @@ def __init__(self):
817
931
# open yet.
818
932
self .rest = None
819
933
820
- self .config ["bing_lang_from" ] = [
821
- x .lower () for x in self .config ["bing_lang_from" ].as_str_seq ()
822
- ]
823
-
824
- @cached_property
825
- def bing_access_token (self ) -> str | None :
826
- params = {
827
- "client_id" : "beets" ,
828
- "client_secret" : self .config ["bing_client_secret" ],
829
- "scope" : "https://api.microsofttranslator.com" ,
830
- "grant_type" : "client_credentials" ,
831
- }
832
-
833
- oauth_url = "https://datamarket.accesscontrol.windows.net/v2/OAuth2-13"
834
- with self .handle_request ():
835
- r = r_session .post (oauth_url , params = params )
836
- return r .json ()["access_token" ]
837
-
838
934
def commands (self ):
839
935
cmd = ui .Subcommand ("lyrics" , help = "fetch song lyrics" )
840
936
cmd .parser .add_option (
@@ -996,14 +1092,12 @@ def fetch_item_lyrics(self, item: Item, write: bool, force: bool) -> None:
996
1092
997
1093
if lyrics :
998
1094
self .info ("🟢 Found lyrics: {0}" , item )
999
- if self .config ["bing_client_secret" ].get ():
1000
- lang_from = langdetect .detect (lyrics )
1001
- if self .config ["bing_lang_to" ].get () != lang_from and (
1002
- not self .config ["bing_lang_from" ]
1003
- or (lang_from in self .config ["bing_lang_from" ].as_str_seq ())
1004
- ):
1005
- lyrics = self .append_translation (
1006
- lyrics , self .config ["bing_lang_to" ]
1095
+ if translator := self .translator :
1096
+ initial_lyrics = lyrics
1097
+ if (lyrics := translator .translate (lyrics )) != initial_lyrics :
1098
+ self .info (
1099
+ "🟢 Added translation to {}" ,
1100
+ self .config ["translate_to" ].get ().upper (),
1007
1101
)
1008
1102
else :
1009
1103
self .info ("🔴 Lyrics not found: {}" , item )
@@ -1027,30 +1121,3 @@ def get_lyrics(self, artist: str, title: str, *args) -> str | None:
1027
1121
return f"{ lyrics } \n \n Source: { url } "
1028
1122
1029
1123
return None
1030
-
1031
- def append_translation (self , text , to_lang ):
1032
- from xml .etree import ElementTree
1033
-
1034
- if not (token := self .bing_access_token ):
1035
- self .warn (
1036
- "Could not get Bing Translate API access token. "
1037
- "Check your 'bing_client_secret' password."
1038
- )
1039
- return text
1040
-
1041
- # Extract unique lines to limit API request size per song
1042
- lines = text .split ("\n " )
1043
- unique_lines = set (lines )
1044
- url = "https://api.microsofttranslator.com/v2/Http.svc/Translate"
1045
- with self .handle_request ():
1046
- text = self .fetch_text (
1047
- url ,
1048
- headers = {"Authorization" : f"Bearer { token } " },
1049
- params = {"text" : "|" .join (unique_lines ), "to" : to_lang },
1050
- )
1051
- if translated := ElementTree .fromstring (text .encode ("utf-8" )).text :
1052
- # Use a translation mapping dict to build resulting lyrics
1053
- translations = dict (zip (unique_lines , translated .split ("|" )))
1054
- return "" .join (f"{ ln } / { translations [ln ]} \n " for ln in lines )
1055
-
1056
- return text
0 commit comments