43
43
from beets .importer import ImportTask
44
44
from beets .library import Item
45
45
46
- from ._typing import GeniusAPI , GoogleCustomSearchAPI , LRCLibAPI
46
+ from ._typing import (
47
+ GeniusAPI ,
48
+ GoogleCustomSearchAPI ,
49
+ LRCLibAPI ,
50
+ TranslatorAPI ,
51
+ )
47
52
48
53
USER_AGENT = f"beets/{ beets .__version__ } "
49
54
INSTRUMENTAL_LYRICS = "[Instrumental]"
@@ -231,6 +236,11 @@ def fetch_json(self, url: str, **kwargs):
231
236
self .debug ("Fetching JSON from {}" , url )
232
237
return r_session .get (url , ** kwargs ).json ()
233
238
239
+ def post_json (self , url : str , ** kwargs ):
240
+ """Send POST request and return JSON response."""
241
+ self .debug ("Posting data to {}" , url )
242
+ return r_session .post (url , ** kwargs ).json ()
243
+
234
244
@contextmanager
235
245
def handle_request (self ) -> Iterator [None ]:
236
246
try :
@@ -753,6 +763,97 @@ def scrape(cls, html: str) -> str | None:
753
763
return None
754
764
755
765
766
+ @dataclass
767
+ class Translator (RequestHandler ):
768
+ TRANSLATE_URL = "https://api.cognitive.microsofttranslator.com/translate"
769
+ LINE_PARTS_RE = re .compile (r"^(\[\d\d:\d\d.\d\d\]|) *(.*)$" )
770
+
771
+ _log : beets .logging .Logger
772
+ api_key : str
773
+ to_language : str
774
+ from_languages : list [str ]
775
+
776
+ @classmethod
777
+ def from_config (
778
+ cls ,
779
+ log : beets .logging .BeetsLogger ,
780
+ api_key : str ,
781
+ to_language : str ,
782
+ from_languages : list [str ] = None ,
783
+ ) -> Translator :
784
+ return cls (
785
+ log ,
786
+ api_key ,
787
+ to_language .upper (),
788
+ [x .upper () for x in from_languages or []],
789
+ )
790
+
791
+ def get_translations (self , texts : Iterable [str ]) -> list [tuple [str , str ]]:
792
+ """Return translations for the given texts.
793
+
794
+ To reduce the translation 'cost', we translate unique texts, and then
795
+ map the translations back to the original texts.
796
+ """
797
+ unique_texts = list (dict .fromkeys (texts ))
798
+ data : list [TranslatorAPI .Response ] = self .post_json (
799
+ self .TRANSLATE_URL ,
800
+ headers = {"Ocp-Apim-Subscription-Key" : self .api_key },
801
+ json = [{"text" : "|" .join (unique_texts )}],
802
+ params = {"api-version" : "3.0" , "to" : self .to_language },
803
+ )
804
+
805
+ translations = data [0 ]["translations" ][0 ]["text" ].split ("|" )
806
+ trans_by_text = dict (zip (unique_texts , translations ))
807
+ return list (zip (texts , (trans_by_text .get (t , "" ) for t in texts )))
808
+
809
+ @classmethod
810
+ def split_line (cls , line : str ) -> tuple [str , str ]:
811
+ """Split line to (timestamp, text)."""
812
+ if m := cls .LINE_PARTS_RE .match (line ):
813
+ return m [1 ], m [2 ]
814
+
815
+ return "" , ""
816
+
817
+ def append_translations (self , lines : Iterable [str ]) -> list [str ]:
818
+ """Append translations to the given lyrics texts.
819
+
820
+ Lines may contain timestamps from LRCLib which need to be temporarily
821
+ removed for the translation. They can take any of these forms:
822
+ - empty
823
+ Text - text only
824
+ [00:00:00] - timestamp only
825
+ [00:00:00] Text - timestamp with text
826
+ """
827
+ # split into [(timestamp, text), ...]]
828
+ ts_and_text = list (map (self .split_line , lines ))
829
+ timestamps = [ts for ts , _ in ts_and_text ]
830
+ text_pairs = self .get_translations ([ln for _ , ln in ts_and_text ])
831
+
832
+ # only add the separator for non-empty translations
833
+ texts = [" / " .join (filter (None , p )) for p in text_pairs ]
834
+ # only add the space between non-empty timestamps and texts
835
+ return [" " .join (filter (None , p )) for p in zip (timestamps , texts )]
836
+
837
+ def translate (self , lyrics : str ) -> str :
838
+ """Translate the given lyrics to the target language.
839
+
840
+ If the lyrics are already in the target language or not in any of
841
+ of the source languages (if configured), they are returned as is.
842
+
843
+ The footer with the source URL is preserved, if present.
844
+ """
845
+ lyrics_language = langdetect .detect (lyrics ).upper ()
846
+ if lyrics_language == self .to_language or (
847
+ self .from_languages and lyrics_language not in self .from_languages
848
+ ):
849
+ return lyrics
850
+
851
+ lyrics , * url = lyrics .split ("\n \n Source: " )
852
+ with self .handle_request ():
853
+ translated_lines = self .append_translations (lyrics .splitlines ())
854
+ return "\n \n Source: " .join (["\n " .join (translated_lines ), * url ])
855
+
856
+
756
857
class LyricsPlugin (RequestHandler , plugins .BeetsPlugin ):
757
858
BACKEND_BY_NAME = {
758
859
b .name : b for b in [LRCLib , Google , Genius , Tekstowo , MusiXmatch ]
@@ -769,15 +870,24 @@ def backends(self) -> list[Backend]:
769
870
770
871
return [self .BACKEND_BY_NAME [c ](self .config , self ._log ) for c in chosen ]
771
872
873
+ @cached_property
874
+ def translator (self ) -> Translator | None :
875
+ config = self .config ["translate" ]
876
+ if config ["api_key" ].get () and config ["to_language" ].get ():
877
+ return Translator .from_config (self ._log , ** config .flatten ())
878
+ return None
879
+
772
880
def __init__ (self ):
773
881
super ().__init__ ()
774
882
self .import_stages = [self .imported ]
775
883
self .config .add (
776
884
{
777
885
"auto" : True ,
778
- "bing_client_secret" : None ,
779
- "bing_lang_from" : [],
780
- "bing_lang_to" : None ,
886
+ "translate" : {
887
+ "api_key" : None ,
888
+ "from_languages" : [],
889
+ "to_language" : None ,
890
+ },
781
891
"dist_thresh" : 0.11 ,
782
892
"google_API_key" : None ,
783
893
"google_engine_ID" : "009217259823014548361:lndtuqkycfu" ,
@@ -796,7 +906,7 @@ def __init__(self):
796
906
],
797
907
}
798
908
)
799
- self .config ["bing_client_secret " ].redact = True
909
+ self .config ["translate" ][ "api_key " ].redact = True
800
910
self .config ["google_API_key" ].redact = True
801
911
self .config ["google_engine_ID" ].redact = True
802
912
self .config ["genius_api_key" ].redact = True
@@ -810,24 +920,6 @@ def __init__(self):
810
920
# open yet.
811
921
self .rest = None
812
922
813
- self .config ["bing_lang_from" ] = [
814
- x .lower () for x in self .config ["bing_lang_from" ].as_str_seq ()
815
- ]
816
-
817
- @cached_property
818
- def bing_access_token (self ) -> str | None :
819
- params = {
820
- "client_id" : "beets" ,
821
- "client_secret" : self .config ["bing_client_secret" ],
822
- "scope" : "https://api.microsofttranslator.com" ,
823
- "grant_type" : "client_credentials" ,
824
- }
825
-
826
- oauth_url = "https://datamarket.accesscontrol.windows.net/v2/OAuth2-13"
827
- with self .handle_request ():
828
- r = r_session .post (oauth_url , params = params )
829
- return r .json ()["access_token" ]
830
-
831
923
def commands (self ):
832
924
cmd = ui .Subcommand ("lyrics" , help = "fetch song lyrics" )
833
925
cmd .parser .add_option (
@@ -989,14 +1081,12 @@ def fetch_item_lyrics(self, item: Item, write: bool, force: bool) -> None:
989
1081
990
1082
if lyrics :
991
1083
self .info ("🟢 Found lyrics: {0}" , item )
992
- if self .config ["bing_client_secret" ].get ():
993
- lang_from = langdetect .detect (lyrics )
994
- if self .config ["bing_lang_to" ].get () != lang_from and (
995
- not self .config ["bing_lang_from" ]
996
- or (lang_from in self .config ["bing_lang_from" ].as_str_seq ())
997
- ):
998
- lyrics = self .append_translation (
999
- lyrics , self .config ["bing_lang_to" ]
1084
+ if translator := self .translator :
1085
+ initial_lyrics = lyrics
1086
+ if (lyrics := translator .translate (lyrics )) != initial_lyrics :
1087
+ self .info (
1088
+ "🟢 Added translation to {}" ,
1089
+ self .config ["translate_to" ].get ().upper (),
1000
1090
)
1001
1091
else :
1002
1092
self .info ("🔴 Lyrics not found: {}" , item )
@@ -1020,30 +1110,3 @@ def get_lyrics(self, artist: str, title: str, *args) -> str | None:
1020
1110
return f"{ lyrics } \n \n Source: { url } "
1021
1111
1022
1112
return None
1023
-
1024
- def append_translation (self , text , to_lang ):
1025
- from xml .etree import ElementTree
1026
-
1027
- if not (token := self .bing_access_token ):
1028
- self .warn (
1029
- "Could not get Bing Translate API access token. "
1030
- "Check your 'bing_client_secret' password."
1031
- )
1032
- return text
1033
-
1034
- # Extract unique lines to limit API request size per song
1035
- lines = text .split ("\n " )
1036
- unique_lines = set (lines )
1037
- url = "https://api.microsofttranslator.com/v2/Http.svc/Translate"
1038
- with self .handle_request ():
1039
- text = self .fetch_text (
1040
- url ,
1041
- headers = {"Authorization" : f"Bearer { token } " },
1042
- params = {"text" : "|" .join (unique_lines ), "to" : to_lang },
1043
- )
1044
- if translated := ElementTree .fromstring (text .encode ("utf-8" )).text :
1045
- # Use a translation mapping dict to build resulting lyrics
1046
- translations = dict (zip (unique_lines , translated .split ("|" )))
1047
- return "" .join (f"{ ln } / { translations [ln ]} \n " for ln in lines )
1048
-
1049
- return text
0 commit comments