From c7fdf2235f5e65ca6980a8c851d7eaa475a52547 Mon Sep 17 00:00:00 2001 From: BingLingGroup <42505588+BingLingGroup@users.noreply.github.com> Date: Tue, 14 Jan 2020 19:09:08 +0800 Subject: [PATCH 01/10] Refactor update_po_files.py --- scripts/update_po_files.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/scripts/update_po_files.py b/scripts/update_po_files.py index 0919ab7f..e083d60f 100644 --- a/scripts/update_po_files.py +++ b/scripts/update_po_files.py @@ -6,16 +6,6 @@ import sys import subprocess -locale_codes = [ - "__init__", - "cmdline_utils", - "core", - "metadata", - "options", - "ffmpeg_utils", - "lang_code_utils" -] - name = "autosub" @@ -46,7 +36,13 @@ def run_cmd(cmd): if not os.path.exists(locale_dir): os.makedirs(locale_dir) - locale_codes = sorted(locale_codes) + locale_codes = [] + + for item in os.listdir(name): + filename = os.path.splitext(item) + if filename[-1] == ".py": + locale_codes.append(filename[0]) + for python_code in locale_codes: code = "{name}/{py}.py".format( name=name, From a8acc03fecdb260cfadec37a07a4b5e43ea8aa0e Mon Sep 17 00:00:00 2001 From: BingLingGroup <42505588+BingLingGroup@users.noreply.github.com> Date: Tue, 14 Jan 2020 20:39:08 +0800 Subject: [PATCH 02/10] Add .aac requests headers support --- autosub/cmdline_utils.py | 10 +++- autosub/core.py | 17 +++++-- .../LC_MESSAGES/autosub.cmdline_utils.mo | Bin 8988 -> 8988 bytes .../LC_MESSAGES/autosub.cmdline_utils.po | 26 +++++----- .../locale/zh_CN/LC_MESSAGES/autosub.core.mo | Bin 1374 -> 1458 bytes .../locale/zh_CN/LC_MESSAGES/autosub.core.po | 37 ++++++++------ .../zh_CN/LC_MESSAGES/autosub.options.mo | Bin 21873 -> 21669 bytes .../zh_CN/LC_MESSAGES/autosub.options.po | 45 +++++++++--------- autosub/options.py | 4 +- autosub/speech_trans_api.py | 7 ++- scripts/nuitka_build.bat | 2 +- 11 files changed, 85 insertions(+), 63 deletions(-) diff --git a/autosub/cmdline_utils.py b/autosub/cmdline_utils.py index 28539f1a..8dd4aef7 100644 --- a/autosub/cmdline_utils.py +++ b/autosub/cmdline_utils.py @@ -910,6 +910,14 @@ def audio_or_video_prcs( # pylint: disable=too-many-branches, too-many-statemen else: audio_for_api = args.input + if args.api_suffix == ".flac": + headers = \ + {"Content-Type": "audio/x-flac; rate={rate}".format(rate=args.api_sample_rate)} + else: + headers = \ + {"Content-Type": "audio/aac; rate={rate}".format(rate=args.api_sample_rate)} + args.audio_split_cmd = args.audio_split_cmd.replace("-c copy ", "") + audio_fragments = core.bulk_audio_conversion( source_file=audio_for_api, output=args.output, @@ -940,12 +948,12 @@ def audio_or_video_prcs( # pylint: disable=too-many-branches, too-many-statemen text_list = core.audio_to_text( audio_fragments=audio_fragments, api_url=gsv2_api_url, + headers=headers, regions=regions, api_key=args.gspeechv2, concurrency=args.speech_concurrency, src_language=args.speech_language, min_confidence=args.min_confidence, - audio_rate=args.api_sample_rate, is_keep=args.keep ) diff --git a/autosub/core.py b/autosub/core.py index d2ef63c1..901da2ee 100644 --- a/autosub/core.py +++ b/autosub/core.py @@ -22,6 +22,7 @@ from autosub import sub_utils from autosub import constants from autosub import ffmpeg_utils +from autosub import exceptions CORE_TEXT = gettext.translation(domain=__name__, localedir=constants.LOCALE_PATH, @@ -123,11 +124,11 @@ def audio_to_text( # pylint: disable=too-many-locals,too-many-arguments,too-man audio_fragments, api_url, regions, + headers, api_key=None, concurrency=constants.DEFAULT_CONCURRENCY, src_language=constants.DEFAULT_SRC_LANGUAGE, min_confidence=0.0, - audio_rate=44100, is_keep=False ): """ @@ -139,20 +140,21 @@ def audio_to_text( # pylint: disable=too-many-locals,too-many-arguments,too-man text_list = [] pool = multiprocessing.Pool(concurrency) + if api_key: recognizer = speech_trans_api.GoogleSpeechToTextV2( api_url=api_url, api_key=api_key, + headers=headers, min_confidence=min_confidence, - lang_code=src_language, - rate=audio_rate) + lang_code=src_language) else: recognizer = speech_trans_api.GoogleSpeechToTextV2( api_url=api_url, api_key=constants.GOOGLE_SPEECH_V2_API_KEY, + headers=headers, min_confidence=min_confidence, lang_code=src_language, - rate=audio_rate, is_keep=is_keep) print(_("\nSending short-term fragments to API and getting result.")) @@ -170,10 +172,15 @@ def audio_to_text( # pylint: disable=too-many-locals,too-many-arguments,too-man text_list.append("") pbar.finish() - except KeyboardInterrupt: + except (KeyboardInterrupt, AttributeError) as error: pbar.finish() pool.terminate() pool.join() + + if error == AttributeError: + raise exceptions.SpeechToTextException( + _("Error: Connection error happened too many times.\nAll works done.")) + return None return text_list diff --git a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.cmdline_utils.mo b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.cmdline_utils.mo index 6b6b532b209f4ea13a47d9755b3f48d778e77901..5c63145185d705157bf77bc489f625e81eb5b1d9 100644 GIT binary patch delta 27 icmbQ^HpgwlBVj%x16>0{T|*NEBLgdA!_6Orb@%~$?Fb71 delta 27 icmbQ^HpgwlBVj&6OI<@FU1I|ULo+Kwlg%H5b@%~%wFnsi diff --git a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.cmdline_utils.po b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.cmdline_utils.po index e1523755..2c14f369 100644 --- a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.cmdline_utils.po +++ b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.cmdline_utils.po @@ -7,7 +7,7 @@ msgid "" msgstr "" "Project-Id-Version: \n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2019-12-30 16:14+0800\n" +"POT-Creation-Date: 2020-01-14 20:31+0800\n" "PO-Revision-Date: 2019-08-12 09:16+0800\n" "Last-Translator: \n" "Language-Team: \n" @@ -267,16 +267,16 @@ msgstr "" "\n" "啥都没做。检查你的\"-of\"/\"--output-files\"选项。" -#: autosub/cmdline_utils.py:637 autosub/cmdline_utils.py:1025 +#: autosub/cmdline_utils.py:637 autosub/cmdline_utils.py:1033 msgid "Error: Translation failed." msgstr "错误:翻译失败。" -#: autosub/cmdline_utils.py:682 autosub/cmdline_utils.py:1061 +#: autosub/cmdline_utils.py:682 autosub/cmdline_utils.py:1069 msgid "Bilingual subtitles file created at \"{}\"." msgstr "双语字幕创建在了\"{}\"。" #: autosub/cmdline_utils.py:686 autosub/cmdline_utils.py:881 -#: autosub/cmdline_utils.py:996 autosub/cmdline_utils.py:1065 +#: autosub/cmdline_utils.py:1004 autosub/cmdline_utils.py:1073 msgid "" "\n" "All works done." @@ -284,7 +284,7 @@ msgstr "" "\n" "做完了。" -#: autosub/cmdline_utils.py:725 autosub/cmdline_utils.py:1104 +#: autosub/cmdline_utils.py:725 autosub/cmdline_utils.py:1112 msgid "Destination language subtitles file created at \"{}\"." msgstr "目的语言字幕文件创建在了\"{}\"。" @@ -305,7 +305,7 @@ msgstr "" msgid "Error: Convert source audio to \"{name}\" failed." msgstr "错误:转换源音频至\"{name}\"失败。" -#: autosub/cmdline_utils.py:841 autosub/cmdline_utils.py:933 +#: autosub/cmdline_utils.py:841 autosub/cmdline_utils.py:941 msgid "" "\n" "\"{name}\" has been deleted." @@ -317,7 +317,7 @@ msgstr "" msgid "Error: Can't get speech regions." msgstr "错误:无法得到语音区域。" -#: autosub/cmdline_utils.py:878 autosub/cmdline_utils.py:1180 +#: autosub/cmdline_utils.py:878 autosub/cmdline_utils.py:1188 msgid "Times file created at \"{}\"." msgstr "时间轴文件创建在了\"{}\"." @@ -329,11 +329,11 @@ msgstr "" "\n" "为API转换为\"{name}\"。" -#: autosub/cmdline_utils.py:929 +#: autosub/cmdline_utils.py:937 msgid "Error: Conversion failed." msgstr "错误:转换失败。" -#: autosub/cmdline_utils.py:937 +#: autosub/cmdline_utils.py:945 msgid "" "Audio processing complete.\n" "All works done." @@ -341,7 +341,7 @@ msgstr "" "音频处理完毕。\n" "做完了。" -#: autosub/cmdline_utils.py:954 +#: autosub/cmdline_utils.py:962 msgid "" "Error: Speech-to-text failed.\n" "All works done." @@ -349,11 +349,11 @@ msgstr "" "错误:语音转文字失败。\n" "做完了。" -#: autosub/cmdline_utils.py:992 autosub/cmdline_utils.py:1146 +#: autosub/cmdline_utils.py:1000 autosub/cmdline_utils.py:1154 msgid "Speech language subtitles file created at \"{}\"." msgstr "语音字幕文件创建在了\"{}\"。" -#: autosub/cmdline_utils.py:1113 +#: autosub/cmdline_utils.py:1121 msgid "" "Override \"-of\"/\"--output-files\" due to your args too few.\n" "Output source subtitles file only." @@ -361,7 +361,7 @@ msgstr "" "因为你其他参数输入得太少了,忽略\"-of\"/\"--output-files\"参数。\n" "只输出源语言字幕文件。" -#: autosub/cmdline_utils.py:1151 +#: autosub/cmdline_utils.py:1159 msgid "" "Override \"-of\"/\"--output-files\" due to your args too few.\n" "Output regions subtitles file only." diff --git a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.core.mo b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.core.mo index 48841cc78fc66515e8971be8e51b4c035b76e6a2..0ad1be9f87e9f09104909b07b53614de5b07a43c 100644 GIT binary patch delta 424 zcmXZW&r1S96bJCPtzoX(4+){Dm@ZvJ*>#0N3XCv_&RseLk_$b&&M0fL08Fs0Ahv>W3gYSH19`l%~OBH9n?y(&b%7qLdYe)}LL#$!~ zkqU?57@UJ6umJnu4YXz-VFcFTI2>#vnuhbR8=k^hSb=W%1HXy#F+UuD@uO3MVK$z+nPaAP^c;#}7P z*AK#wV0uJPXHfRbieCw_pd40JGbmmNA@fYU6vy$CHf>GnpVmQ83{(UcxpH$uX$?C02a1D1baBg&p^J(JN9z-~1Rudg za8O5w`T!k#f(kx~-y57fzN;cVJ_N4F3ZCHtUZGvM#}FTIfS>mB1OCs=ezh&4-s4 zYCp}hxMOsguUzqtW AR{#J2 diff --git a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.core.po b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.core.po index b67f3fbe..40541fe1 100644 --- a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.core.po +++ b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.core.po @@ -7,17 +7,17 @@ msgid "" msgstr "" "Project-Id-Version: \n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2019-09-03 21:22+0800\n" -"PO-Revision-Date: 2019-07-25 20:25+0800\n" +"POT-Creation-Date: 2020-01-14 20:33+0800\n" +"PO-Revision-Date: 2020-01-14 20:33+0800\n" "Last-Translator: \n" "Language-Team: \n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" "Language: zh_CN\n" -"X-Generator: Poedit 2.2.3\n" +"X-Generator: Poedit 2.2.4\n" -#: autosub/core.py:100 +#: autosub/core.py:101 msgid "" "\n" "Converting speech regions to short-term fragments." @@ -25,11 +25,11 @@ msgstr "" "\n" "按照语音区域将音频转换为多个短语音片段。" -#: autosub/core.py:101 +#: autosub/core.py:102 msgid "Converting: " msgstr "转换中: " -#: autosub/core.py:158 +#: autosub/core.py:160 msgid "" "\n" "Sending short-term fragments to API and getting result." @@ -37,11 +37,19 @@ msgstr "" "\n" "将短片段语音发送给API并得到识别结果。" -#: autosub/core.py:159 +#: autosub/core.py:161 msgid "Speech-to-Text: " msgstr "语音转文字中: " -#: autosub/core.py:204 autosub/core.py:255 +#: autosub/core.py:182 +msgid "" +"Error: Connection error happened too many times.\n" +"All works done." +msgstr "" +"错误:过多连接错误。\n" +"做完了。" + +#: autosub/core.py:211 autosub/core.py:262 msgid "" "\n" "Translating text from \"{0}\" to \"{1}\"." @@ -49,24 +57,23 @@ msgstr "" "\n" "从\"{0}\"翻译为\"{1}\"。" -#: autosub/core.py:214 autosub/core.py:305 +#: autosub/core.py:221 autosub/core.py:312 msgid "Translation: " msgstr "翻译中: " -#: autosub/core.py:233 autosub/core.py:366 +#: autosub/core.py:240 autosub/core.py:373 msgid "Cancelling transcription." msgstr "取消翻译。" -#: autosub/core.py:447 autosub/core.py:514 +#: autosub/core.py:454 autosub/core.py:521 msgid "Format \"{fmt}\" not supported. Using \"{default_fmt}\" instead." msgstr "不支持格式\"{fmt}\"。改用\"{default_fmt}\"。" -#: autosub/core.py:543 -msgid "" -"There is already a file with the same name in this location: \"{dest_name}\"." +#: autosub/core.py:550 +msgid "There is already a file with the same name in this location: \"{dest_name}\"." msgstr "在此处已有同名文件:\"{dest_name}\"。" -#: autosub/core.py:546 +#: autosub/core.py:553 msgid "Input a new path (including directory and file name) for output file.\n" msgstr "为输出文件输入一个新的路径(包括路径和文件名)。\n" diff --git a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.options.mo b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.options.mo index 2a6907bd7d9cc9fbadd22e4ec49a7ecad830070f..76a16be70b296f195fce3d42e40d4217f986a022 100644 GIT binary patch delta 1981 zcmY+_S!`5Q9LMqBEzl{YEnP-RTaf9#F{M*Rkb+&tw~Sv@8|ySa^{|M{^$1b zN$1NGPU3F%~upfWLKJ1(-UBpW` zh=Zw893SH@e0`b}a-=^n$>FEc)1{eQ$jp$YVbhv&Q_mhuhOaEe94!?n|K`9M4LjJI`Uuv#sQeVBX?p2d<q=_c2AR7xzf8mlDA zMy;5P9cbq_RWbi|qL&lYi+12-96;(xaXKkq%>U;cJYX7Z>IQ10ukm-Z8_&%1Mm)DxIzoO8m(%g~2*rjc znMdkNzs>iG>ua4=UToo4(p%HykB$pptQ4}aq0#}#|?Sx9V^N}r&nxQ$frRwaXQxc=Nr&o zV1=J=@O{Cz2ZLPiL+dY&lkgME`F-umqCks6F?SoPySP-wACN zSrL&+R1&P*gtnf{K6s2*340T3)Qi~0ZexkWD*WFlYCS_xTWTO|t65`aMY*`m1~A^b zIcOs05haApS}tL&nXqNF#@Ert?-dZs;(p&Px9MbuxQ$iN3W5!wG9rtx79=Qs_kV;< zt2I_qIYiVFS~^iqEVP6+%|k7|#s2TRDYNc=PA#b>Y+Hp1+o#sDi8_MK;O=GW%Wd|l zw#JTCh^Qj0KE_*sgF+&cu$?pBsAEmBQBa7g@XzvnQR9K(z>?8|lr_Z}b+zHzNH`J= U)z#L|kBq)sz0nzM3P&pb1HQz^hyVZp delta 2066 zcmY+_drX&A9LMp`4=)IEkxO!k^v6x^$d5~bLV%cxl9YzBW(FnVXc3Z2GxOJSl`YrU ze!8Mqn$c!88e z`xUftC*vd@q0g`ezeXR|lj5aAv{MqKzwu3Ehz1j-DErh=D#LVqjJM$Ko1|$xf}it% zy$hubT+Lhd(%+RLt;HG7l2rExKY?55e*quFAMieGx>;($37Emkx5cN?!+OqB}h$|HsVF(pYk(Y zn=n{PM>mxsJc{;!FR%@#um@{1rL*`x+Jk$tq*w8Kd=K|$^Fx@S;w7vxR_D0wv0UjK z*T?fDmRtS#66vEsoQo&&nSZ-5;&pt4_YxAG3!1JfGVT9}8nzhA=b_!qY0wv{}92Y!O~MZK$;W*oz0yySV!GjfgF zpNZ*Quf{gqioJH!oTXwfE^TxlREyT$ieKSFcmW&My5~XiI*H}f9%Pkt3~TYcXF`)Z zej|3$--b`&1b&K*ce2GXaXn{??f*kmKA^+Td{;3=lQ@AE+3z&? zQ%;b#&M|9K@BLRHW>%z$Q^GxsR-&F@(}<(O*}|49l%U8vEp_UAxu>0mtcr6eSywc{ z25}0r>?EFTpBy6I>2;Taq}LsUods6zB5bNFtU-w(atSt*Q=IePYUGwvviNIDVTD)p0G4cJvn2cXo8|^6%(q-`3F^dM(8t+8r4?d@6i%W=x=}sHCzeQ0@= 1) (default: %(default)s)" msgstr "" "输出更多的文件。可选种类:regions, src, dst, bilingual, all.(时间轴,源语言" -"字幕,目标语言字幕,双语字幕,所有)(参数个数在4和1之间)(默认参数" -"为%(default)s)" +"字幕,目标语言字幕,双语字幕,所有)(参数个数在4和1之间)(默认参数为%" +"(default)s)" #: autosub/options.py:253 msgid "" @@ -354,8 +354,8 @@ msgid "" "Number of concurrent Google Speech V2 requests to make. (arg_num = 1) " "(default: %(default)s)" msgstr "" -"用于Google Speech V2 requests请求的并行数量。(参数个数为1)(默认参数" -"为%(default)s)" +"用于Google Speech V2 requests请求的并行数量。(参数个数为1)(默认参数为%" +"(default)s)" #: autosub/options.py:300 autosub/options.py:526 autosub/options.py:536 #: autosub/options.py:546 @@ -368,8 +368,8 @@ msgid "" "(Experimental)Seconds to sleep between two translation requests. (arg_num = " "1) (default: %(default)s)" msgstr "" -"(实验性)在两次翻译请求之间睡眠(暂停)的时间。(参数个数为1)(默认参数" -"为%(default)s)" +"(实验性)在两次翻译请求之间睡眠(暂停)的时间。(参数个数为1)(默认参数为%" +"(default)s)" #: autosub/options.py:312 msgid "" @@ -380,8 +380,9 @@ msgstr "" "latest/(参数个数大于等于1)" #: autosub/options.py:320 +#, fuzzy msgid "" -"(Experimental)Customize User-Agent header. Same docs above. (arg_num = 1)" +"(Experimental)Customize User-Agent headers. Same docs above. (arg_num = 1)" msgstr "" "(实验性)自定义用户代理(User-Agent)头部。同样的参考文档如上。(参数个数为" "1)" @@ -397,8 +398,8 @@ msgstr "" #: autosub/options.py:338 #, python-format msgid "" -"Number of lines per Google Translate V2 request. (arg_num = 1) (default: " -"%(default)s)" +"Number of lines per Google Translate V2 request. (arg_num = 1) (default: %" +"(default)s)" msgstr "" "Google Translate V2请求的每行行数。(参数个数为1)(默认参数为%(default)s)" @@ -408,8 +409,8 @@ msgid "" "Number of concurrent Google translate V2 API requests to make. (arg_num = 1) " "(default: %(default)s)" msgstr "" -"Google translate V2 API请求的并行数量。(参数个数为1)(默认参数" -"为%(default)s)" +"Google translate V2 API请求的并行数量。(参数个数为1)(默认参数为%(default)" +"s)" #: autosub/options.py:355 msgid "Change the Google Speech V2 API URL into the http one. (arg_num = 0)" @@ -532,8 +533,8 @@ msgid "" "(Experimental)This arg will override the default API audio suffix. (arg_num " "= 1) (default: %(default)s)" msgstr "" -"(实验性)这个参数会取代默认的给API使用的音频文件后缀。(默认参数" -"为%(default)s)" +"(实验性)这个参数会取代默认的给API使用的音频文件后缀。(默认参数为%(default)" +"s)" #: autosub/options.py:495 msgid "sample_rate" @@ -581,16 +582,16 @@ msgstr "" msgid "" "Minimum region size. Same docs above. (arg_num = 1) (default: %(default)s)" msgstr "" -"最小语音区域大小。同样的参考文档如上。(参数个数为1)(默认参数" -"为%(default)s)" +"最小语音区域大小。同样的参考文档如上。(参数个数为1)(默认参数为%(default)" +"s)" #: autosub/options.py:539 #, python-format msgid "" "Maximum region size. Same docs above. (arg_num = 1) (default: %(default)s)" msgstr "" -"最大音频区域大小。同样的参考文档如上。(参数个数为1)(默认参数" -"为%(default)s)" +"最大音频区域大小。同样的参考文档如上。(参数个数为1)(默认参数为%(default)" +"s)" #: autosub/options.py:549 #, python-format diff --git a/autosub/options.py b/autosub/options.py index 6bf44c2c..0d8992b9 100644 --- a/autosub/options.py +++ b/autosub/options.py @@ -316,8 +316,8 @@ def get_cmd_args(): # pylint: disable=too-many-statements pygt_group.add_argument( '-ua', '--user-agent', - metavar='User-Agent header', - help=_("(Experimental)Customize User-Agent header. " + metavar='User-Agent headers', + help=_("(Experimental)Customize User-Agent headers. " "Same docs above. " "(arg_num = 1)") ) diff --git a/autosub/speech_trans_api.py b/autosub/speech_trans_api.py index 58a9faa3..50deae93 100644 --- a/autosub/speech_trans_api.py +++ b/autosub/speech_trans_api.py @@ -26,19 +26,19 @@ class GoogleSpeechToTextV2(object): # pylint: disable=too-few-public-methods def __init__(self, api_url, api_key, + headers, min_confidence=0.0, lang_code="en", - rate=44100, retries=3, is_keep=False): # pylint: disable=too-many-arguments self.min_confidence = min_confidence - self.rate = rate self.retries = retries self.api_key = api_key self.lang_code = lang_code self.api_url = api_url.format(lang=lang_code, key=api_key) self.is_keep = is_keep + self.headers = headers def __call__(self, filename): try: # pylint: disable=too-many-nested-blocks @@ -48,9 +48,8 @@ def __call__(self, filename): if not self.is_keep: os.remove(filename) for _ in range(self.retries): - headers = {"Content-Type": "audio/x-flac; rate=%d" % self.rate} try: - result = requests.post(self.api_url, data=audio_data, headers=headers) + result = requests.post(self.api_url, data=audio_data, headers=self.headers) except requests.exceptions.ConnectionError: continue diff --git a/scripts/nuitka_build.bat b/scripts/nuitka_build.bat index 83241906..76ff50b8 100644 --- a/scripts/nuitka_build.bat +++ b/scripts/nuitka_build.bat @@ -8,4 +8,4 @@ cd %~dp0 call update_requirements.bat cd ..\ pip install -r requirements.txt -nuitka "%package_name%" --standalone --output-dir %output_dir% --show-progress --show-scons --show-modules --windows-icon=%icon_dir% --recurse-to=multiprocessing --plugin-enable=multiprocessing --lto --assume-yes-for-downloads \ No newline at end of file +nuitka "%package_name%" --standalone --output-dir %output_dir% --show-progress --show-scons --show-modules --windows-icon=%icon_dir% --recurse-to=multiprocessing --plugin-enable=multiprocessing --assume-yes-for-downloads \ No newline at end of file From 57546d70c406cb144f239484e41de76b51f21639 Mon Sep 17 00:00:00 2001 From: BingLingGroup <42505588+BingLingGroup@users.noreply.github.com> Date: Wed, 22 Jan 2020 21:35:29 +0800 Subject: [PATCH 03/10] Add (close #10) Google Cloud Speech-to-Text support --- .gitignore | 2 + autosub/cmdline_utils.py | 124 +++++++++++++++++++------ autosub/constants.py | 2 +- autosub/core.py | 174 +++++++++++++++++++++++++++++++----- autosub/options.py | 53 ++++++++--- autosub/speech_trans_api.py | 157 ++++++++++++++++++++++++++++++-- requirements.txt | 10 ++- setup.py | 5 +- 8 files changed, 447 insertions(+), 80 deletions(-) diff --git a/.gitignore b/.gitignore index f6c7a024..3453adc3 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,5 @@ MANIFEST /.build_and_dist_win32 /.release *.po~ +Pipfile +*.lock \ No newline at end of file diff --git a/autosub/cmdline_utils.py b/autosub/cmdline_utils.py index 8dd4aef7..a7bd8589 100644 --- a/autosub/cmdline_utils.py +++ b/autosub/cmdline_utils.py @@ -263,7 +263,7 @@ def validate_aovp_args(args): # pylint: disable=too-many-branches, too-many-ret ) if args.speech_language: # pylint: disable=too-many-nested-blocks - if not args.gspeechv2: + if args.speech_api == "gsv2" or args.speech_api == "gcsv1": args.speech_language = args.speech_language.lower() if args.speech_language \ not in constants.SPEECH_TO_TEXT_LANGUAGE_CODES: @@ -292,6 +292,11 @@ def validate_aovp_args(args): # pylint: disable=too-many-branches, too-many-ret _("Error: The arg of \"-mnc\"/\"--min-confidence\" isn't legal.") ) + else: + raise exceptions.AutosubException( + _("Error: Wrong API code.") + ) + if args.dst_language is None: print( _("Destination language not provided. " @@ -764,11 +769,6 @@ def audio_or_video_prcs( # pylint: disable=too-many-branches, too-many-statemen Give args and process an input audio or video file. """ - if args.http_speech_api: - gsv2_api_url = "http://" + constants.GOOGLE_SPEECH_V2_API_URL - else: - gsv2_api_url = "https://" + constants.GOOGLE_SPEECH_V2_API_URL - if not args.output_files: raise exceptions.AutosubException( _("\nNo works done." @@ -817,7 +817,7 @@ def audio_or_video_prcs( # pylint: disable=too-many-branches, too-many-statemen out_=audio_wav ) print(_("\nConvert source audio to \"{name}\" " - "and get audio length for regions detection.").format( + "to get audio length and detect audio regions.").format( name=audio_wav)) print(command) subprocess.check_output( @@ -906,18 +906,9 @@ def audio_or_video_prcs( # pylint: disable=too-many-branches, too-many-statemen raise exceptions.AutosubException( _("Error: Convert source audio to \"{name}\" failed.").format( name=audio_for_api)) - else: audio_for_api = args.input - if args.api_suffix == ".flac": - headers = \ - {"Content-Type": "audio/x-flac; rate={rate}".format(rate=args.api_sample_rate)} - else: - headers = \ - {"Content-Type": "audio/aac; rate={rate}".format(rate=args.api_sample_rate)} - args.audio_split_cmd = args.audio_split_cmd.replace("-c copy ", "") - audio_fragments = core.bulk_audio_conversion( source_file=audio_for_api, output=args.output, @@ -944,18 +935,95 @@ def audio_or_video_prcs( # pylint: disable=too-many-branches, too-many-statemen raise exceptions.AutosubException( _("Audio processing complete.\nAll works done.")) - # speech to text - text_list = core.audio_to_text( - audio_fragments=audio_fragments, - api_url=gsv2_api_url, - headers=headers, - regions=regions, - api_key=args.gspeechv2, - concurrency=args.speech_concurrency, - src_language=args.speech_language, - min_confidence=args.min_confidence, - is_keep=args.keep - ) + if args.speech_api == "gsv2": + # Google speech-to-text v2 + if args.http_speech_api: + gsv2_api_url = "http://" + \ + constants.GOOGLE_SPEECH_V2_API_URL + else: + gsv2_api_url = "https://" + \ + constants.GOOGLE_SPEECH_V2_API_URL + + if args.speech_key: + gsv2_api_url = gsv2_api_url.format( + lang=args.speech_language, + key=args.speech_key) + else: + gsv2_api_url = gsv2_api_url.format( + lang=args.speech_language, + key=constants.GOOGLE_SPEECH_V2_API_KEY) + + if args.api_suffix == ".flac": + headers = \ + {"Content-Type": "audio/x-flac; rate={rate}".format(rate=args.api_sample_rate)} + else: + headers = \ + {"Content-Type": "audio/aac; rate={rate}".format(rate=args.api_sample_rate)} + + text_list = core.gsv2_to_text( + audio_fragments=audio_fragments, + api_url=gsv2_api_url, + headers=headers, + regions=regions, + concurrency=args.speech_concurrency, + min_confidence=args.min_confidence, + is_keep=args.keep + ) + + elif args.speech_api == "gcsv1": + # Google Cloud speech-to-text V1P1Beta1 + if args.speech_key: + headers = \ + {"Content-Type": "application/json"} + gcsv1_api_url = \ + "https://speech.googleapis.com/" \ + "v1p1beta1/speech:recognize?key={api_key}".format( + api_key=args.speech_key) + print(_("Use the API key " + "given in the option \"-skey\"/\"--speech-key\".")) + text_list = core.gcsv1_to_text( + audio_fragments=audio_fragments, + sample_rate=args.api_sample_rate, + regions=regions, + api_url=gcsv1_api_url, + headers=headers, + concurrency=args.speech_concurrency, + src_language=args.speech_language, + min_confidence=args.min_confidence, + is_keep=args.keep + ) + elif args.service_account and os.path.isfile(args.service_account): + print(_("Set the GOOGLE_APPLICATION_CREDENTIALS " + "given in the option \"-sa\"/\"--service-account\".")) + os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = args.service_account + text_list = core.gcsv1_to_text( + audio_fragments=audio_fragments, + sample_rate=args.api_sample_rate, + regions=regions, + concurrency=args.speech_concurrency, + src_language=args.speech_language, + min_confidence=args.min_confidence, + is_keep=args.keep + ) + else: + if 'GOOGLE_APPLICATION_CREDENTIALS' in os.environ: + print(_("Use the GOOGLE_APPLICATION_CREDENTIALS " + "in the environment variables.")) + text_list = core.gcsv1_to_text( + audio_fragments=audio_fragments, + sample_rate=args.api_sample_rate, + regions=regions, + concurrency=args.speech_concurrency, + src_language=args.speech_language, + min_confidence=args.min_confidence, + is_keep=args.keep + ) + else: + print(_("No available GOOGLE_APPLICATION_CREDENTIALS. " + "Use \"-sa\"/\"--service-account\" to set one.")) + text_list = None + else: + text_list = None if not text_list or len(text_list) != len(regions): raise exceptions.SpeechToTextException( diff --git a/autosub/constants.py b/autosub/constants.py index c330b8a2..49a2c0fc 100644 --- a/autosub/constants.py +++ b/autosub/constants.py @@ -88,7 +88,7 @@ "ffmpeg -hide_banner -y -i \"{in_}\" -vn -ac {channel} -ar {sample_rate} \"{out_}\"" DEFAULT_AUDIO_SPLT = \ - "ffmpeg -ss {start} -t {dura} -y -i \"{in_}\" -c copy -loglevel error \"{out_}\"" + "ffmpeg -y -ss {start} -i \"{in_}\" -t {dura} -loglevel error \"{out_}\"" DEFAULT_VIDEO_FPS_CMD = "ffprobe -v 0 -of csv=p=0 -select_streams" \ " v:0 -show_entries stream=r_frame_rate \"{in_}\"" diff --git a/autosub/core.py b/autosub/core.py index 901da2ee..c055a5cf 100644 --- a/autosub/core.py +++ b/autosub/core.py @@ -16,6 +16,7 @@ import auditok import googletrans import wcwidth +from google.cloud.speech_v1p1beta1 import enums # Any changes to the path and your own modules from autosub import speech_trans_api @@ -120,20 +121,18 @@ def bulk_audio_conversion( # pylint: disable=too-many-arguments return audio_fragments -def audio_to_text( # pylint: disable=too-many-locals,too-many-arguments,too-many-branches,too-many-statements +def gsv2_to_text( # pylint: disable=too-many-locals,too-many-arguments,too-many-branches,too-many-statements audio_fragments, api_url, regions, headers, - api_key=None, concurrency=constants.DEFAULT_CONCURRENCY, - src_language=constants.DEFAULT_SRC_LANGUAGE, min_confidence=0.0, is_keep=False ): """ Give a list of short-term audio fragment files - and generate text_list from speech-to-text api. + and generate text_list from Google speech-to-text V2 api. """ if not regions: return None @@ -141,23 +140,13 @@ def audio_to_text( # pylint: disable=too-many-locals,too-many-arguments,too-man text_list = [] pool = multiprocessing.Pool(concurrency) - if api_key: - recognizer = speech_trans_api.GoogleSpeechToTextV2( - api_url=api_url, - api_key=api_key, - headers=headers, - min_confidence=min_confidence, - lang_code=src_language) - else: - recognizer = speech_trans_api.GoogleSpeechToTextV2( - api_url=api_url, - api_key=constants.GOOGLE_SPEECH_V2_API_KEY, - headers=headers, - min_confidence=min_confidence, - lang_code=src_language, - is_keep=is_keep) - - print(_("\nSending short-term fragments to API and getting result.")) + recognizer = speech_trans_api.GoogleSpeechV2( + api_url=api_url, + headers=headers, + min_confidence=min_confidence, + is_keep=is_keep) + + print(_("\nSending short-term fragments to Google Speech V2 API and getting result.")) widgets = [_("Speech-to-Text: "), progressbar.Percentage(), ' ', progressbar.Bar(), ' ', @@ -167,9 +156,9 @@ def audio_to_text( # pylint: disable=too-many-locals,too-many-arguments,too-man for i, transcript in enumerate(pool.imap(recognizer, audio_fragments)): if transcript: text_list.append(transcript) - pbar.update(i) else: text_list.append("") + pbar.update(i) pbar.finish() except (KeyboardInterrupt, AttributeError) as error: @@ -178,7 +167,7 @@ def audio_to_text( # pylint: disable=too-many-locals,too-many-arguments,too-man pool.join() if error == AttributeError: - raise exceptions.SpeechToTextException( + print( _("Error: Connection error happened too many times.\nAll works done.")) return None @@ -186,6 +175,143 @@ def audio_to_text( # pylint: disable=too-many-locals,too-many-arguments,too-man return text_list +def gcsv1_to_text( # pylint: disable=too-many-locals,too-many-arguments,too-many-branches,too-many-statements + audio_fragments, + sample_rate, + regions, + api_url=None, + headers=None, + concurrency=constants.DEFAULT_CONCURRENCY, + src_language=constants.DEFAULT_SRC_LANGUAGE, + min_confidence=0.0, + is_keep=False +): + """ + Give a list of short-term audio fragment files + and generate text_list from Google cloud speech-to-text V1P1Beta1 api. + """ + if not regions: + return None + + text_list = [] + pool = multiprocessing.Pool(concurrency) + + print(_("\nSending short-term fragments to Google Cloud Speech V1P1Beta1 API" + " and getting result.")) + widgets = [_("Speech-to-Text: "), + progressbar.Percentage(), ' ', + progressbar.Bar(), ' ', + progressbar.ETA()] + pbar = progressbar.ProgressBar(widgets=widgets, maxval=len(regions)).start() + + try: + if api_url: + # https://cloud.google.com/speech-to-text/docs/quickstart-protocol + if audio_fragments[0].lower().endswith(".flac"): + encoding = "FLAC" + elif audio_fragments[0].lower().endswith(".mp3"): + encoding = "MP3" + elif audio_fragments[0].lower().endswith(".wav"): + # regard WAV as PCM + encoding = "PCM" + elif audio_fragments[0].lower().endswith(".ogg"): + encoding = "OGG" + else: + encoding = "" + + config = { + "encoding": encoding, + "sampleRateHertz": sample_rate, + "languageCode": src_language, + } + + recognizer = speech_trans_api.GCSV1P1Beta1URL( + config=config, + api_url=api_url, + headers=headers, + min_confidence=min_confidence, + is_keep=is_keep) + + for i, transcript in enumerate(pool.imap(recognizer, audio_fragments)): + if transcript: + text_list.append(transcript) + else: + text_list.append("") + pbar.update(i) + + else: + # https://cloud.google.com/speech-to-text/docs/reference/rest/v1p1beta1/RecognitionConfig?hl=zh-cn#AudioEncoding + if audio_fragments[0].lower().endswith(".flac"): + encoding = \ + enums.RecognitionConfig.AudioEncoding.FLAC + # encoding = 2 + elif audio_fragments[0].lower().endswith(".mp3"): + encoding = \ + enums.RecognitionConfig.AudioEncoding.MP3 + # encoding = 8 + elif audio_fragments[0].lower().endswith(".wav"): + # regard WAV as PCM + encoding = \ + enums.RecognitionConfig.AudioEncoding.LINEAR16 + # encoding = 1 + elif audio_fragments[0].lower().endswith(".ogg"): + encoding = \ + enums.RecognitionConfig.AudioEncoding.OGG_OPUS + # encoding = 6 + else: + encoding = \ + enums.RecognitionConfig.AudioEncoding.ENCODING_UNSPECIFIED + # encoding = 0 + + # https://pypi.org/project/google-cloud-speech/ + config = { + "language_code": src_language, + "sample_rate_hertz": sample_rate, + "encoding": encoding, + } + + i = 0 + tasks = [] + for filename in audio_fragments: + # google cloud speech-to-text client can't use multiprocessing.pool + # based on class call, otherwise will receive pickling error + tasks.append(pool.apply_async( + speech_trans_api.gcsv1p1beta1_service_client, + args=(filename, is_keep, config, min_confidence))) + + for task in tasks: + i = i + 1 + transcript = task.get() + if transcript: + text_list.append(transcript) + else: + text_list.append("") + pbar.update(i) + + pbar.finish() + + except (KeyboardInterrupt, AttributeError) as error: + pbar.finish() + pool.terminate() + pool.join() + + if error == AttributeError: + print( + _("Error: Connection error happened too many times.\nAll works done.")) + + return None + + except exceptions.SpeechToTextException as err_msg: + pbar.finish() + pool.terminate() + pool.join() + print(_("Receive something unexpected:")) + print(err_msg) + return None + + return text_list + + def list_to_gtv2( # pylint: disable=too-many-locals,too-many-arguments text_list, api_key=None, @@ -279,7 +405,7 @@ def list_to_googletrans( # pylint: disable=too-many-locals, too-many-arguments, # If text contains full-wide char, # count its length about 4 times than the ordinary text. # Avoid weird problem when text has full-wide char. - # In this case google will count a full-wide char + # In this case Google will count a full-wide char # at least 2 times larger than a half-wide char. # It will certainly exceed the limit of the size_per_trans. # Causing a googletrans internal jsondecode error. diff --git a/autosub/options.py b/autosub/options.py index 0d8992b9..ca559e58 100644 --- a/autosub/options.py +++ b/autosub/options.py @@ -75,8 +75,8 @@ def get_cmd_args(): # pylint: disable=too-many-statements _('Options to control translation. ' 'Default method to translate. ' 'Could be blocked at any time.')) - gsv2_group = parser.add_argument_group( - _('Google Speech V2 Options'), + gtv2_group = parser.add_argument_group( + _('Google Translate V2 Options'), _('Options to control translation.(Not been tested) ' 'If the API key is given, ' 'it will replace the py-googletrans method.')) @@ -231,7 +231,7 @@ def get_cmd_args(): # pylint: disable=too-many-statements output_group.add_argument( '-y', '--yes', action='store_true', - help=_("Avoid any pause and overwriting files. " + help=_("Prevent pauses and allow files to be overwritten. " "Stop the program when your args are wrong. (arg_num = 0)") ) @@ -266,11 +266,24 @@ def get_cmd_args(): # pylint: disable=too-many-statements ) speech_group.add_argument( - '-gsv2', '--gspeechv2', + '-sapi', '--speech-api', + metavar=_('api_code'), + default='gsv2', + help=_("Choose which Speech-to-Text API to use. " + "Currently supported: " + "gsv2: Google Speech V2 (https://github.com/gillesdemey/google-speech-v2). " + "gcsv1: Google Cloud Speech-to-Text V1P1Beta1 " + "(https://cloud.google.com/speech-to-text/docs). " + "(arg_num = 1) (default: %(default)s)")) + + speech_group.add_argument( + '-skey', '--speech-key', metavar='key', - help=_("The Google Speech V2 API key to be used. " - "If not provided, use free API key instead." - "(arg_num = 1)") + help=_("The API key for Speech-to-Text API. (arg_num = 1) " + "Currently supported: " + "gsv2: The API key for gsv2. (default: Free API key) " + "gcsv1: The API key for gcsv1. " + "(Can be overridden by \"-sa\"/\"--service-account\")") ) speech_group.add_argument( @@ -278,7 +291,7 @@ def get_cmd_args(): # pylint: disable=too-many-statements metavar='float', type=float, default=0.0, - help=_("Google Speech V2 API response for text confidence. " + help=_("API response for text confidence. " "A float value between 0 and 1. " "Confidence bigger means the result is better. " "Input this argument will drop any result below it. " @@ -291,7 +304,7 @@ def get_cmd_args(): # pylint: disable=too-many-statements metavar='integer', type=int, default=constants.DEFAULT_CONCURRENCY, - help=_("Number of concurrent Google Speech V2 requests to make. " + help=_("Number of concurrent Speech-to-Text requests to make. " "(arg_num = 1) (default: %(default)s)") ) @@ -322,15 +335,15 @@ def get_cmd_args(): # pylint: disable=too-many-statements "(arg_num = 1)") ) - gsv2_group.add_argument( + gtv2_group.add_argument( '-gtv2', '--gtransv2', metavar='key', help=_("The Google Translate V2 API key to be used. " - "If not provided, use free API(py-googletrans) instead. " + "If not provided, use free API (py-googletrans) instead. " "(arg_num = 1)") ) - gsv2_group.add_argument( + gtv2_group.add_argument( '-lpt', '--lines-per-trans', metavar='integer', type=int, @@ -339,7 +352,7 @@ def get_cmd_args(): # pylint: disable=too-many-statements "(arg_num = 1) (default: %(default)s)") ) - gsv2_group.add_argument( + gtv2_group.add_argument( '-tc', '--trans-concurrency', metavar='integer', type=int, @@ -404,6 +417,18 @@ def get_cmd_args(): # pylint: disable=too-many-statements help=_("Show %(prog)s version and exit. (arg_num = 0)") ) + options_group.add_argument( + '-sa', '--service-account', + metavar=_('path'), + help=_("Set service account key environment variable. " + "It should be the file path of the JSON file " + "that contains your service account credentials. " + "If used, override the API key options. " + "Ref: https://cloud.google.com/docs/authentication/getting-started" + "Currently supported: gcsv1 (GOOGLE_APPLICATION_CREDENTIALS) " + "(arg_num = 1)") + ) + audio_prcs_group.add_argument( '-ap', '--audio-process', nargs='*', metavar=_('mode'), @@ -587,7 +612,7 @@ def get_cmd_args(): # pylint: disable=too-many-statements "If no arg is given, list all. " "Or else will list get a group of \"good match\" " "of the arg. Default \"good match\" standard is whose " - "match score above 90(score between 0 and 100). " + "match score above 90 (score between 0 and 100). " "Ref: https://tools.ietf.org/html/bcp47 " "https://github.com/LuminosoInsight/langcodes/blob/master/langcodes/__init__.py " "lang code example: language-script-region-variant-extension-privateuse " diff --git a/autosub/speech_trans_api.py b/autosub/speech_trans_api.py index 50deae93..88a9a71e 100644 --- a/autosub/speech_trans_api.py +++ b/autosub/speech_trans_api.py @@ -6,6 +6,7 @@ # Import built-in modules from __future__ import absolute_import, unicode_literals import os +import base64 import json try: from json.decoder import JSONDecodeError @@ -15,28 +16,27 @@ # Import third-party modules import requests from googleapiclient.discovery import build +from google.cloud import speech_v1p1beta1 +from google.protobuf.json_format import MessageToDict # Any changes to the path and your own modules +from autosub import exceptions -class GoogleSpeechToTextV2(object): # pylint: disable=too-few-public-methods +class GoogleSpeechV2(object): # pylint: disable=too-few-public-methods """ - Class for performing speech-to-text for an input FLAC file. + Class for performing speech-to-text using Google Speech V2 API for an input FLAC file. """ def __init__(self, api_url, - api_key, headers, min_confidence=0.0, - lang_code="en", retries=3, is_keep=False): # pylint: disable=too-many-arguments self.min_confidence = min_confidence self.retries = retries - self.api_key = api_key - self.lang_code = lang_code - self.api_url = api_url.format(lang=lang_code, key=api_key) + self.api_url = api_url self.is_keep = is_keep self.headers = headers @@ -89,6 +89,149 @@ def __call__(self, filename): return None +def gcsv1p1beta1_service_client( + filename, + is_keep, + config, + min_confidence +): + """ + Function for performing speech-to-text + using Google Cloud Speech V1P1Beta1 API for an input FLAC file. + """ + try: # pylint: disable=too-many-nested-blocks + audio_file = open(filename, mode='rb') + audio_data = audio_file.read() + audio_file.close() + if not is_keep: + os.remove(filename) + + # https://cloud.google.com/speech-to-text/docs/quickstart-client-libraries + # https://cloud.google.com/speech-to-text/docs/basics + # https://cloud.google.com/speech-to-text/docs/reference/rpc/google.cloud.speech.v1p1beta1 + client = speech_v1p1beta1.SpeechClient() + audio_dict = {"content": audio_data} + recognize_reponse = client.recognize(config, audio_dict) + result_dict = MessageToDict( + recognize_reponse, + preserving_proto_field_name=True) + + if 'results' in result_dict and result_dict['results'] \ + and 'alternatives' in result_dict['results'][0] \ + and result_dict['results'][0]['alternatives'] \ + and 'transcript' in result_dict['results'][0]['alternatives'][0]: + result_dict = result_dict['results'][0]['alternatives'][0] + + if 'transcript' not in result_dict: + return None + + else: + raise exceptions.SpeechToTextException( + result_dict) + + if 'confidence' in result_dict: + confidence = \ + float(result_dict['confidence']) + if confidence > min_confidence: + result_dict = result_dict['transcript'] + result = result_dict[:1].upper() + result_dict[1:] + result = result.replace('’', '\'') + return result + return None + else: + # can't find confidence in json + # means it's 100% confident + result_dict = result_dict['transcript'] + result = result_dict[:1].upper() + result_dict[1:] + result = result.replace('’', '\'') + return result + + except KeyboardInterrupt: + return None + + +class GCSV1P1Beta1URL(object): # pylint: disable=too-few-public-methods + """ + Class for performing speech-to-text + using Google Cloud Speech V1P1Beta1 API for an input FLAC file. + """ + def __init__(self, + config, + api_url=None, + headers=None, + min_confidence=0.0, + retries=3, + is_keep=False): + # pylint: disable=too-many-arguments + self.config = config + self.api_url = api_url + self.headers = headers + self.min_confidence = min_confidence + self.retries = retries + self.is_keep = is_keep + + def __call__(self, filename): + try: # pylint: disable=too-many-nested-blocks + audio_file = open(filename, mode='rb') + audio_data = audio_file.read() + audio_file.close() + if not self.is_keep: + os.remove(filename) + + for _ in range(self.retries): + # https://cloud.google.com/speech-to-text/docs/quickstart-protocol + # https://cloud.google.com/speech-to-text/docs/base64-encoding + # https://gist.github.com/bretmcg/07e0efe27611d7039c2e4051b4354908 + audio_dict = \ + {"content": base64.b64encode(audio_data).decode("utf-8")} + request_data = {"config": self.config, "audio": audio_dict} + config_json = json.dumps(request_data, ensure_ascii=False) + requests_result = \ + requests.post(self.api_url, data=config_json, headers=self.headers) + requests_result_json = requests_result.content.decode('utf-8') + + try: + result_dict = json.loads(requests_result_json) + except JSONDecodeError: + # no result + continue + + if 'results' in result_dict and result_dict['results'] \ + and 'alternatives' in result_dict['results'][0] \ + and result_dict['results'][0]['alternatives'] \ + and 'transcript' in result_dict['results'][0]['alternatives'][0]: + result_dict = result_dict['results'][0]['alternatives'][0] + + if 'transcript' not in result_dict: + return None + + else: + raise exceptions.SpeechToTextException( + result_dict) + + if 'confidence' in result_dict: + confidence = \ + float(result_dict['confidence']) + if confidence > self.min_confidence: + result_dict = result_dict['transcript'] + result = result_dict[:1].upper() + result_dict[1:] + result = result.replace('’', '\'') + return result + return None + else: + # can't find confidence in json + # means it's 100% confident + result_dict = result_dict['transcript'] + result = result_dict[:1].upper() + result_dict[1:] + result = result.replace('’', '\'') + return result + + except KeyboardInterrupt: + return None + + return None + + class GoogleTranslatorV2(object): # pylint: disable=too-few-public-methods """ Class for GoogleTranslatorV2 translating text from one language to another. diff --git a/requirements.txt b/requirements.txt index f4301fc3..175a9b14 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,11 @@ -googletrans==2.4.0 +setuptools==40.8.0 wcwidth==0.1.7 -langcodes_py2==1.2.0 -auditok==0.1.5 -pysubs2==0.2.4 requests==2.21.0 +pysubs2==0.2.4 +auditok==0.1.5 +langcodes_py2==1.2.0 +googletrans==2.4.0 +protobuf==3.11.2 google_api_python_client==1.7.11 langcodes==1.4.1 progressbar2==3.47.0 diff --git a/setup.py b/setup.py index 3af1a745..a4602208 100644 --- a/setup.py +++ b/setup.py @@ -33,14 +33,15 @@ }, package_data={str('autosub'): [str('data/locale/zh_CN/LC_MESSAGES/*mo')]}, install_requires=[ - 'google-api-python-client>=1.4.2', + 'google-api-python-client>=1.7.11', 'requests>=2.3.0', 'pysubs2>=0.2.4', 'progressbar2>=3.34.3', 'auditok>=0.1.5', 'googletrans>=2.4.0', 'langcodes-py2>=1.2.0', - 'wcwidth>=0.1.7' + 'wcwidth>=0.1.7', + 'google-cloud-speech>=1.3.1' ], license=open(os.path.join(here, "LICENSE")).read() ) From 9c956ed2f520832b7c577c2131a54b7d11054dcb Mon Sep 17 00:00:00 2001 From: BingLingGroup <42505588+BingLingGroup@users.noreply.github.com> Date: Thu, 23 Jan 2020 17:06:35 +0800 Subject: [PATCH 04/10] Docs update translations, changelog and readme --- CHANGELOG.md | 4 + README.md | 135 ++++++---- .../LC_MESSAGES/autosub.cmdline_utils.mo | Bin 8988 -> 9679 bytes .../LC_MESSAGES/autosub.cmdline_utils.po | 107 +++++--- .../locale/zh_CN/LC_MESSAGES/autosub.core.mo | Bin 1458 -> 1740 bytes .../locale/zh_CN/LC_MESSAGES/autosub.core.po | 46 ++-- .../zh_CN/LC_MESSAGES/autosub.options.mo | Bin 21669 -> 23247 bytes .../zh_CN/LC_MESSAGES/autosub.options.po | 252 ++++++++++-------- autosub/options.py | 8 +- autosub/speech_trans_api.py | 4 +- docs/CHANGELOG.zh-Hans.md | 4 + docs/README.zh-Hans.md | 67 +++-- 12 files changed, 372 insertions(+), 255 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c32de3eb..84279570 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,10 @@ Click up arrow to go back to TOC. ### [Unreleased] +#### Added(Unreleased) + +- Add Google Cloud Speech-to-Text support. [issue #10](https://github.com/BingLingGroup/autosub/issues/10) + ### [0.5.3-alpha] - 2019-12-30 #### Changed(0.5.3-alpha) diff --git a/README.md b/README.md index 9272e390..be126b27 100644 --- a/README.md +++ b/README.md @@ -433,10 +433,10 @@ Language Options: language". (3 >= arg_num >= 1) -mns integer, --min-score integer An integer between 0 and 100 to control the good match - group of "-lsc"/"--list-speech-codes" or "-ltc"/"-- - list-translation-codes" or the match result in "-bm"/" - --best-match". Result will be a group of "good match" - whose score is above this arg. (arg_num = 1) + group of "-lsc"/"--list-speech-codes" or "-ltc + "/"--list-translation-codes" or the match result in + "-bm"/"--best-match". Result will be a group of "good + match" whose score is above this arg. (arg_num = 1) Output Options: Options to control output. @@ -452,11 +452,12 @@ Output Options: "srt" instead. In this case, if "-i"/"--input" arg is a subtitles file, use the same extension from the subtitles file. (arg_num = 1) (default: srt) - -y, --yes Avoid any pause and overwriting files. Stop the - program when your args are wrong. (arg_num = 0) + -y, --yes Prevent pauses and allow files to be overwritten. Stop + the program when your args are wrong. (arg_num = 0) -of [type [type ...]], --output-files [type [type ...]] Output more files. Available types: regions, src, dst, - bilingual, all. (4 >= arg_num >= 1) (default: ['dst']) + bilingual, all. (4 >= arg_num >= 1) (default: + [u'dst']) -fps float, --sub-fps float Valid when your output format is "sub". If input, it will override the fps check on the input file. Ref: @@ -469,19 +470,27 @@ Output Options: Speech Options: Options to control speech-to-text. If Speech Options not given, it will only generate the times. - -gsv2 key, --gspeechv2 key - The Google Speech V2 API key to be used. If not - provided, use free API key instead.(arg_num = 1) + -sapi API_code, --speech-api API_code + Choose which Speech-to-Text API to use. Currently + supported: gsv2: Google Speech V2 + (https://github.com/gillesdemey/google-speech-v2). + gcsv1: Google Cloud Speech-to-Text V1P1Beta1 + (https://cloud.google.com/speech-to-text/docs). + (arg_num = 1) (default: gsv2) + -skey key, --speech-key key + The API key for Speech-to-Text API. (arg_num = 1) + Currently supported: gsv2: The API key for gsv2. + (default: Free API key) gcsv1: The API key for gcsv1. + (Can be overridden by "-sa"/"--service-account") -mnc float, --min-confidence float - Google Speech V2 API response for text confidence. A - float value between 0 and 1. Confidence bigger means - the result is better. Input this argument will drop - any result below it. Ref: - https://github.com/BingLingGroup/google- + API response for text confidence. A float value + between 0 and 1. Confidence bigger means the result is + better. Input this argument will drop any result below + it. Ref: https://github.com/BingLingGroup/google- speech-v2#response (arg_num = 1) (default: 0.0) -sc integer, --speech-concurrency integer - Number of concurrent Google Speech V2 requests to - make. (arg_num = 1) (default: 10) + Number of concurrent Speech-to-Text requests to make. + (arg_num = 1) (default: 10) py-googletrans Options: Options to control translation. Default method to translate. Could be blocked at any time. @@ -491,17 +500,17 @@ py-googletrans Options: requests. (arg_num = 1) (default: 5) -surl [URL [URL ...]], --service-urls [URL [URL ...]] (Experimental)Customize request urls. Ref: https://py- - googletrans.readthedocs.io/en/latest/ (arg_num = 1) - -ua User-Agent header, --user-agent User-Agent header - (Experimental)Customize User-Agent header. Same docs + googletrans.readthedocs.io/en/latest/ (arg_num >= 1) + -ua User-Agent headers, --user-agent User-Agent headers + (Experimental)Customize User-Agent headers. Same docs above. (arg_num = 1) -Google Speech V2 Options: +Google Translate V2 Options: Options to control translation.(Not been tested) If the API key is given, it will replace the py-googletrans method. -gtv2 key, --gtransv2 key The Google Translate V2 API key to be used. If not - provided, use free API(py-googletrans) instead. + provided, use free API (py-googletrans) instead. (arg_num = 1) -lpt integer, --lines-per-trans integer Number of lines per Google Translate V2 request. @@ -534,30 +543,38 @@ Other Options: -h, --help Show autosub help message and exit. (arg_num = 0) -V, --version Show autosub version and exit. (arg_num = 0) + -sa path, --service-account path + Set service account key environment variable. It + should be the file path of the JSON file that contains + your service account credentials. If used, override + the API key options. Ref: + https://cloud.google.com/docs/authentication/getting- + started Currently supported: gcsv1 + (GOOGLE_APPLICATION_CREDENTIALS) (arg_num = 1) Audio Processing Options: Options to control audio processing. -ap [mode [mode ...]], --audio-process [mode [mode ...]] Option to control audio process. If not given the - option, do normal conversion work. - "y": pre-process the input first then start normal - workflow. If succeed, no more conversion before the - speech-to-text procedure. "o": only pre-process the - input audio. ("-k"/"--keep" is true) "s": only split - the input audio. ("-k"/"--keep" is true) "n": FORCED - NO EXTRA CHECK/CONVERSION before the speech-to-text - procedure. Default command to pre-process the audio: - ffmpeg -hide_banner -i "{in_}" -af "asplit[a],aphaseme - ter=video=0,ametadata=select:key=lavfi.aphasemeter.pha - se:value=-0.005:function=less,pan=1c|c0=c0,aresample=a - sync=1:first_pts=0,[a]amix" -ac 1 -f flac "{out_}" | - ffmpeg -hide_banner -i "{in_}" -af - lowpass=3000,highpass=200 "{out_}" | ffmpeg-normalize - -v "{in_}" -ar 44100 -ofmt flac -c:a flac -pr -p -o - "{out_}" (Ref: https://github.com/stevenj/autosub/blob - /master/scripts/subgen.sh https://ffmpeg.org/ffmpeg- - filters.html) (2 >= arg_num >= 1) + option, do normal conversion work. "y": pre-process + the input first then start normal workflow. If + succeed, no more conversion before the speech-to-text + procedure. "o": only pre-process the input audio. + ("-k"/"--keep" is true) "s": only split the input + audio. ("-k"/"--keep" is true) "n": FORCED NO EXTRA + CHECK/CONVERSION before the speech-to-text procedure. + Default command to pre-process the audio: ffmpeg + -hide_banner -i "{in_}" -af "asplit[a],aphasemeter=vid + eo=0,ametadata=select:key=lavfi.aphasemeter.phase:valu + e=-0.005:function=less,pan=1c|c0=c0,aresample=async=1: + first_pts=0,[a]amix" -ac 1 -f flac "{out_}" | ffmpeg + -hide_banner -i "{in_}" -af lowpass=3000,highpass=200 + "{out_}" | ffmpeg-normalize -v "{in_}" -ar 44100 -ofmt + flac -c:a flac -pr -p -o "{out_}" (Ref: https://github + .com/stevenj/autosub/blob/master/scripts/subgen.sh + https://ffmpeg.org/ffmpeg-filters.html) (2 >= arg_num + >= 1) -k, --keep Keep audio processing files to the output path. (arg_num = 0) -apc [command [command ...]], --audio-process-cmd [command [command ...]] @@ -573,12 +590,13 @@ Audio Processing Options: conversion command. Need to follow the python references keyword argument. Default command to process the audio: ffmpeg -hide_banner -y -i "{in_}" - -ac {channel} -ar {sample_rate} "{out_}" (arg_num = 1) + -vn -ac {channel} -ar {sample_rate} "{out_}" (arg_num + = 1) -asc command, --audio-split-cmd command (Experimental)This arg will override the default audio split command. Same attention above. Default: ffmpeg - -ss {start} -t {dura} -y -i "{in_}" -c copy -loglevel - error "{out_}" (arg_num = 1) + -y -ss {start} -i "{in_}" -t {dura} -loglevel error + "{out_}" (arg_num = 1) -asf file_suffix, --api-suffix file_suffix (Experimental)This arg will override the default API audio suffix. (arg_num = 1) (default: .flac) @@ -608,11 +626,13 @@ Auditok Options: audio activity. Same docs above. (arg_num = 1) (default: 0.3) -sml, --strict-min-length - Ref: https://auditok.readthedocs.io/en/latest/core.htm - l#class-summary (arg_num = 0) + Ref: + https://auditok.readthedocs.io/en/latest/core.html + #class-summary (arg_num = 0) -dts, --drop-trailing-silence - Ref: https://auditok.readthedocs.io/en/latest/core.htm - l#class-summary (arg_num = 0) + Ref: + https://auditok.readthedocs.io/en/latest/core.html + #class-summary (arg_num = 0) List Options: List all available arguments. @@ -624,13 +644,13 @@ List Options: format. (arg_num = 0) -lsc [lang_code], --list-speech-codes [lang_code] List all recommended "-S"/"--speech-language" Google - Speech V2 language codes. If no arg is given, list - all. Or else will list get a group of "good match" of - the arg. Default "good match" standard is whose match - score above 90(score between 0 and 100). Ref: - https://tools.ietf.org/html/bcp47 https://github.com/L - uminosoInsight/langcodes/blob/master/langcodes/__init_ - _.py lang code example: language-script-region- + Speech-to-Text language codes. If no arg is given, + list all. Or else will list get a group of "good + match" of the arg. Default "good match" standard is + whose match score above 90 (score between 0 and 100). + Ref: https://tools.ietf.org/html/bcp47 https://github. + com/LuminosoInsight/langcodes/blob/master/langcodes/__ + init__.py lang code example: language-script-region- variant-extension-privateuse (arg_num = 0 or 1) -ltc [lang_code], --list-translation-codes [lang_code] List all available "-SRC"/"--src-language" py- @@ -640,9 +660,10 @@ List Options: -dsl path, --detect-sub-language path Use py-googletrans to detect a sub file's first line language. And list a group of matched language in - recommended "-S"/"--speech-language" Google Speech V2 - language codes. Ref: https://cloud.google.com/speech- - to-text/docs/languages (arg_num = 1) (default: None) + recommended "-S"/"--speech-language" Google Speech-to- + Text language codes. Ref: https://cloud.google.com + /speech-to-text/docs/languages (arg_num = 1) (default: + None) Make sure the argument with space is in quotes. The default value is used diff --git a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.cmdline_utils.mo b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.cmdline_utils.mo index 5c63145185d705157bf77bc489f625e81eb5b1d9..6e7fd10e746f579b5168dcb5c3ca84b0f44b4b02 100644 GIT binary patch delta 2324 zcmajfZBSHI7{Kwv27;g<1ZkLb+cYCx-b7MB1$i5Ygh0h+0%n8jvWdIME|})bqBJ^2 ziUy|H3#EgZ6EM0{VTCpMp_!a=@I%uy6O+>}yU>TGPt7>f%m3M3K4c+h=I}e`-n;iX z&vVY@tnx5z#M!QpT-(|AGhHhBpFq` zF!+2O5=*t?Dm;LX<3*H;jH3~!L)RC^1}l_<8rR!!JMq;?1_kKHdQ6R@{CETB-&N3>uVWvWCg?!u384;r=L7WJbn)Pwn$MEWMI zK?&G}+)@{i>Z`9&5^_y2kOlmK8&P9pp2S96kEc=2#yB3u%11dL_&rL1diLQN>_It{ zKZRUDR5SD47>3d)rYfRKJXtJ_`iB~4EwJa|q4{Ra3XP=_vP~S|f3;L*QpRXH1qn3C zYEft`2~fcQH2G{U98Lyfu9Y;83^->Pe6RaIq`C5?sE)s^K%g{G-|BI!D{R_y>DLZ z-?a>#Epxq7vsgQIn`X5Ip0&3*S?cT|+=jSYVJ=yPyjil?rSF^V72FE0vrX5VT1+xc zQ!~H4ccwGz(QTbp3btFfIkir6yH$$hn4hNV%DmX(o}VamufWv$UNb8^>a_AN=g6qen0J+UHT@_g6iM?LSUnVurXXkzI9(4PYUpF*(dq;fz7rB1#W&hH3$TRi@Pfm3m zoqF%;tUERN_Mm_Gns02_Guq`Dz2tT-OAo6IUYYBcy(c`^-kftG?tv6nnCq^!{Lg+& z&b*F%y(5RGUK=C@75ts=_=xvhx3~Aq)RA7#=+K-o<(K}U3*M8j{Py`-&*-s9_bTmo F!#{P;#zz1E delta 1651 zcmYk+U2GIp6u|MbYm4ntN?Xb*P}`vkRSTmtE!K8XmVi)7A+=EANAO`o$F|Y5c3G4_ z(#3=j3CKs*nn2J-z63&*FaeSUA`l6I2OA>~#s}1-+Ye~;L4AQ2|7Uhfxar;By))Cf z_nvd_3^gBFnYdSy(=U_=wTxQo7kLK@a`>Tqlq)g^d$9;V$F=x1K8F+dEC!~CG++p; z@jZMIKf?f?$EkP~SK@74B$AM#r$t`o!U_!I7Bui2&cdIw_aESQ^ylPdUVH~_`u}FF z$`?6J|Lv@PM%6`~#d$b^CKfY`dW@jO__CG8Y%ZKeUHOl=4$BKfHefF@S-FM!+)v0D zGL8%IAAA8Trez|r3M=VHQLpbrjm#h#IEqU#uaGD)zBJL;h~J_{AZL1}AHhcYM^Ga) zjGBs4@}Lv3P*c{73$YWIU_a`6@8ULWD;D_#hp-wOOGH{QhKVK`H)yQJf*DK(-@=ej zq#R=4jG|tTIflauW{->xbzls(32Wq4wn2BHj>*yAvMs8J*d0I5$ zTv&!DP;)hgI`Fh-GApY>&HZ|ui+*nEO6K89*n)b$53ixg#?kwKVL8qsT>4x+hOq;6 z+@rIJ6aOQ>aG?_?Fp3*kUOQew&3zeh))$44bjvdI2FCgH`;b35&X4B&I_e7VqYfD4 zs8nGquEDMN0p3i|=%r!vcAe2Nd=2lSiN(ZG2Z*474iX&Mg*x%CPzSt-OYnExhUJFH zZajp$Fhtn&eP^&0e?{F}iE0w2jTlBTg+81tcD4~f((V-_tD>I?wuyJv{TQKY=yj61 z;;B?Ex<7ce7e)4^aGHhvnDBNoZLic#Hc-Q?rI4z}N7JAyV6W^*WUWc`4A7 z@?_DIF#*K@+DlBcNbEqR5i;ndJ-I-aL$9{I@0t5nUU=3NVs_EMGS^)f%2 z0xf!4$gKCF;SflOsvEnO%AU-WK;`!CWIDIGI@;ZCbU1M%x~09V$N13c?ud69@At%v znA6eK)4kbfcjC^bI8Arsls|Lbwt}Vp(y5bwU)ekQ#i`-czOnBIxtrdbN)HT-9NRH+ iVUIf)JfG|KmF@PsJr(nP?y#}lms}IdcYmw=&;LKvJlMtn diff --git a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.cmdline_utils.po b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.cmdline_utils.po index 2c14f369..61548151 100644 --- a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.cmdline_utils.po +++ b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.cmdline_utils.po @@ -7,15 +7,15 @@ msgid "" msgstr "" "Project-Id-Version: \n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2020-01-14 20:31+0800\n" -"PO-Revision-Date: 2019-08-12 09:16+0800\n" +"POT-Creation-Date: 2020-01-22 21:01+0800\n" +"PO-Revision-Date: 2020-01-23 16:51+0800\n" "Last-Translator: \n" "Language-Team: \n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" "Language: zh_CN\n" -"X-Generator: Poedit 2.2.3\n" +"X-Generator: Poedit 2.2.4\n" #: autosub/cmdline_utils.py:45 msgid "List of output formats:\n" @@ -142,9 +142,9 @@ msgstr "" msgid "Use langcodes-py2 to standardize the result." msgstr "使用langcodes-py2来标准化结果。" -#: autosub/cmdline_utils.py:283 autosub/cmdline_utils.py:337 -#: autosub/cmdline_utils.py:368 autosub/cmdline_utils.py:455 -#: autosub/cmdline_utils.py:485 +#: autosub/cmdline_utils.py:283 autosub/cmdline_utils.py:342 +#: autosub/cmdline_utils.py:373 autosub/cmdline_utils.py:460 +#: autosub/cmdline_utils.py:490 msgid "Use \"{lang_code}\" instead." msgstr "改用\"{lang_code}\"。" @@ -157,14 +157,18 @@ msgid "Error: The arg of \"-mnc\"/\"--min-confidence\" isn't legal." msgstr "错误:\"-mnc\"/\"--min-confidence\"参数的值不合法。" #: autosub/cmdline_utils.py:297 +msgid "Error: Wrong API code." +msgstr "错误:错误的API代码。" + +#: autosub/cmdline_utils.py:302 msgid "Destination language not provided. Only performing speech recognition." msgstr "翻译目的语言未提供。只进行语音识别。" -#: autosub/cmdline_utils.py:304 +#: autosub/cmdline_utils.py:309 msgid "Source language not provided. Use Speech language instead." msgstr "翻译源语言未提供。改用语音语言。" -#: autosub/cmdline_utils.py:328 autosub/cmdline_utils.py:446 +#: autosub/cmdline_utils.py:333 autosub/cmdline_utils.py:451 msgid "" "Warning: Source language \"{src}\" not supported. Run with \"-lsc\"/\"--list-" "translation-codes\" to see all supported languages." @@ -172,12 +176,12 @@ msgstr "" "警告:源语言\"{src}\"不在推荐的清单里面。用 \"-lsc\"/\"--list-translation-" "codes\"选项运行来查看所有支持的语言。" -#: autosub/cmdline_utils.py:342 autosub/cmdline_utils.py:373 -#: autosub/cmdline_utils.py:459 autosub/cmdline_utils.py:489 +#: autosub/cmdline_utils.py:347 autosub/cmdline_utils.py:378 +#: autosub/cmdline_utils.py:464 autosub/cmdline_utils.py:494 msgid "Match failed. Still using \"{lang_code}\". Program stopped." msgstr "匹配失败。仍在使用\"{lang_code}\"。程序终止。" -#: autosub/cmdline_utils.py:348 autosub/cmdline_utils.py:465 +#: autosub/cmdline_utils.py:353 autosub/cmdline_utils.py:470 msgid "" "Error: Source language \"{src}\" not supported. Run with \"-lsc\"/\"--list-" "translation-codes\" to see all supported languages. Or use \"-bm\"/\"--best-" @@ -187,7 +191,7 @@ msgstr "" "codes\"选项运行来查看所有支持的语言。或者使用\"-bm\"/\"--best-match\"来获得最" "佳匹配。" -#: autosub/cmdline_utils.py:359 autosub/cmdline_utils.py:476 +#: autosub/cmdline_utils.py:364 autosub/cmdline_utils.py:481 msgid "" "Warning: Destination language \"{dst}\" not supported. Run with \"-lsc\"/\"--" "list-translation-codes\" to see all supported languages." @@ -195,7 +199,7 @@ msgstr "" "警告:不支持目的语言\"{dst}\"。用 \"-lsc\"/\"--list-translation-codes\"选项运" "行来查看所有支持的语言。" -#: autosub/cmdline_utils.py:379 autosub/cmdline_utils.py:495 +#: autosub/cmdline_utils.py:384 autosub/cmdline_utils.py:500 msgid "" "Error: Destination language \"{dst}\" not supported. Run with \"-lsc\"/\"--" "list-translation-codes\" to see all supported languages. Or use \"-bm\"/\"--" @@ -205,37 +209,37 @@ msgstr "" "看所有支持的格式。\n" "或者使用ffmpeg或者SubtitleEdit来转换格式。" -#: autosub/cmdline_utils.py:389 +#: autosub/cmdline_utils.py:394 msgid "" "Speech language is the same as the Destination language. Only performing " "speech recognition." msgstr "语音语言和目的语言一致。只进行语音识别。" -#: autosub/cmdline_utils.py:399 +#: autosub/cmdline_utils.py:404 msgid "You've already input times. No works done." msgstr "你已经输入了时间轴。啥都没做。" -#: autosub/cmdline_utils.py:405 +#: autosub/cmdline_utils.py:410 msgid "Speech language not provided. Only performing speech regions detection." msgstr "语音语言未提供。只生成时间轴。" -#: autosub/cmdline_utils.py:414 autosub/cmdline_utils.py:517 +#: autosub/cmdline_utils.py:419 autosub/cmdline_utils.py:522 msgid "Error: External speech regions file not provided." msgstr "错误:外部时间轴文件未提供。" -#: autosub/cmdline_utils.py:428 +#: autosub/cmdline_utils.py:433 msgid "Error: Destination language not provided." msgstr "错误:目的语言未提供。" -#: autosub/cmdline_utils.py:504 +#: autosub/cmdline_utils.py:509 msgid "Error: Source language is the same as the Destination language." msgstr "错误:源语言和目的语言一致。" -#: autosub/cmdline_utils.py:509 +#: autosub/cmdline_utils.py:514 msgid "Error: Source language not provided." msgstr "错误:源语言未提供。" -#: autosub/cmdline_utils.py:531 +#: autosub/cmdline_utils.py:536 msgid "" "Your minimum region size {mrs0} is smaller than {mrs}.\n" "Now reset to {mrs}." @@ -243,7 +247,7 @@ msgstr "" "你输入的语音区域{mrs0}比{mrs}还小。\n" "现在重置为{mrs}。" -#: autosub/cmdline_utils.py:539 +#: autosub/cmdline_utils.py:544 msgid "" "Your maximum region size {mrs0} is larger than {mrs}.\n" "Now reset to {mrs}." @@ -251,7 +255,7 @@ msgstr "" "你输入的语音区域{mrs0}比{mrs}还大。\n" "现在重置为{mrs}。" -#: autosub/cmdline_utils.py:547 +#: autosub/cmdline_utils.py:552 msgid "" "Your maximum continuous silence {mxcs} is smaller than 0.\n" "Now reset to {dmxcs}." @@ -259,7 +263,7 @@ msgstr "" "你输入的最大连续安静区域{mxcs}比0还小。\n" "现在重置为{dmxcs}。" -#: autosub/cmdline_utils.py:592 autosub/cmdline_utils.py:774 +#: autosub/cmdline_utils.py:597 autosub/cmdline_utils.py:774 msgid "" "\n" "No works done. Check your \"-of\"/\"--output-files\" option." @@ -267,16 +271,16 @@ msgstr "" "\n" "啥都没做。检查你的\"-of\"/\"--output-files\"选项。" -#: autosub/cmdline_utils.py:637 autosub/cmdline_utils.py:1033 +#: autosub/cmdline_utils.py:642 autosub/cmdline_utils.py:1101 msgid "Error: Translation failed." msgstr "错误:翻译失败。" -#: autosub/cmdline_utils.py:682 autosub/cmdline_utils.py:1069 +#: autosub/cmdline_utils.py:687 autosub/cmdline_utils.py:1137 msgid "Bilingual subtitles file created at \"{}\"." msgstr "双语字幕创建在了\"{}\"。" -#: autosub/cmdline_utils.py:686 autosub/cmdline_utils.py:881 -#: autosub/cmdline_utils.py:1004 autosub/cmdline_utils.py:1073 +#: autosub/cmdline_utils.py:691 autosub/cmdline_utils.py:881 +#: autosub/cmdline_utils.py:1072 autosub/cmdline_utils.py:1141 msgid "" "\n" "All works done." @@ -284,7 +288,7 @@ msgstr "" "\n" "做完了。" -#: autosub/cmdline_utils.py:725 autosub/cmdline_utils.py:1112 +#: autosub/cmdline_utils.py:730 autosub/cmdline_utils.py:1180 msgid "Destination language subtitles file created at \"{}\"." msgstr "目的语言字幕文件创建在了\"{}\"。" @@ -295,17 +299,17 @@ msgstr "使用外部时间轴。" #: autosub/cmdline_utils.py:819 msgid "" "\n" -"Convert source audio to \"{name}\" and get audio length for regions " -"detection." +"Convert source audio to \"{name}\" to get audio length and detect audio " +"regions." msgstr "" "\n" -"将源音频转换为\"{name}\"来获取源音频的总长度,用于语音区域检测。" +"将源音频转换为\"{name}\"来检测语音区域。" #: autosub/cmdline_utils.py:829 autosub/cmdline_utils.py:907 msgid "Error: Convert source audio to \"{name}\" failed." msgstr "错误:转换源音频至\"{name}\"失败。" -#: autosub/cmdline_utils.py:841 autosub/cmdline_utils.py:941 +#: autosub/cmdline_utils.py:841 autosub/cmdline_utils.py:932 msgid "" "\n" "\"{name}\" has been deleted." @@ -317,7 +321,7 @@ msgstr "" msgid "Error: Can't get speech regions." msgstr "错误:无法得到语音区域。" -#: autosub/cmdline_utils.py:878 autosub/cmdline_utils.py:1188 +#: autosub/cmdline_utils.py:878 autosub/cmdline_utils.py:1256 msgid "Times file created at \"{}\"." msgstr "时间轴文件创建在了\"{}\"." @@ -329,11 +333,11 @@ msgstr "" "\n" "为API转换为\"{name}\"。" -#: autosub/cmdline_utils.py:937 +#: autosub/cmdline_utils.py:928 msgid "Error: Conversion failed." msgstr "错误:转换失败。" -#: autosub/cmdline_utils.py:945 +#: autosub/cmdline_utils.py:936 msgid "" "Audio processing complete.\n" "All works done." @@ -341,7 +345,30 @@ msgstr "" "音频处理完毕。\n" "做完了。" -#: autosub/cmdline_utils.py:962 +#: autosub/cmdline_utils.py:982 +msgid "Use the API key given in the option \"-skey\"/\"--speech-key\"." +msgstr "使用选项\"-skey\"/\"--speech-key\"提供的API密钥。" + +#: autosub/cmdline_utils.py:996 +msgid "" +"Set the GOOGLE_APPLICATION_CREDENTIALS given in the option \"-sa\"/\"--" +"service-account\"." +msgstr "" +"设置选项\"-sa\"/\"--service-account\"提供的GOOGLE_APPLICATION_CREDENTIALS。" + +#: autosub/cmdline_utils.py:1010 +msgid "Use the GOOGLE_APPLICATION_CREDENTIALS in the environment variables." +msgstr "使用环境变量中的GOOGLE_APPLICATION_CREDENTIALS。" + +#: autosub/cmdline_utils.py:1022 +msgid "" +"No available GOOGLE_APPLICATION_CREDENTIALS. Use \"-sa\"/\"--service-account" +"\" to set one." +msgstr "" +"没有可用的GOOGLE_APPLICATION_CREDENTIALS。使用选项\"-sa\"/\"--service-account" +"\"来设置一个。" + +#: autosub/cmdline_utils.py:1030 msgid "" "Error: Speech-to-text failed.\n" "All works done." @@ -349,11 +376,11 @@ msgstr "" "错误:语音转文字失败。\n" "做完了。" -#: autosub/cmdline_utils.py:1000 autosub/cmdline_utils.py:1154 +#: autosub/cmdline_utils.py:1068 autosub/cmdline_utils.py:1222 msgid "Speech language subtitles file created at \"{}\"." msgstr "语音字幕文件创建在了\"{}\"。" -#: autosub/cmdline_utils.py:1121 +#: autosub/cmdline_utils.py:1189 msgid "" "Override \"-of\"/\"--output-files\" due to your args too few.\n" "Output source subtitles file only." @@ -361,7 +388,7 @@ msgstr "" "因为你其他参数输入得太少了,忽略\"-of\"/\"--output-files\"参数。\n" "只输出源语言字幕文件。" -#: autosub/cmdline_utils.py:1159 +#: autosub/cmdline_utils.py:1227 msgid "" "Override \"-of\"/\"--output-files\" due to your args too few.\n" "Output regions subtitles file only." diff --git a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.core.mo b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.core.mo index 0ad1be9f87e9f09104909b07b53614de5b07a43c..7c8c0aed8ba086221e3430c330c1784d1bf2680b 100644 GIT binary patch delta 525 zcmZ9|&nts*90%~vvpuu08Q$)UzKBix?Tpfh;>H@w9#0R9pc9L?dP8aIyM z8@z&j@LVMF!B;p6Jtm@NxB_e8Hq>(muoGTF2|hr5L0cJ-9nQi=xCFc5Ce(8`P@w+r ztuYf8{5nAXx9|*EN)z_@`b*`d8GDjhwF$C&B$bLLI2%f&vJnWnkuNGeiHDC)*d5b6X;B(y1z7H(?OBIqFm7sk9nn~TskXwg>1 zRz5<|3y4UM;UfBvdEn!BIoxya`O437@w((pLAkU6<%UBCEefy7Vr&kafW&P#jO-8NJ~8$o+F*(H6G(L9^fk`10Q&TpV-D9>|@!L z4)GCBafr$NcVw4caa5M@=dQfHfsX!9?4ZoP$-DAx&pg?s3Foy`$=E@9>bKA8oz_JZ S&3(r+Hgu~tb~kpEx%&f2>mk_y diff --git a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.core.po b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.core.po index 40541fe1..f1dfa416 100644 --- a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.core.po +++ b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.core.po @@ -7,8 +7,8 @@ msgid "" msgstr "" "Project-Id-Version: \n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2020-01-14 20:33+0800\n" -"PO-Revision-Date: 2020-01-14 20:33+0800\n" +"POT-Creation-Date: 2020-01-23 15:38+0800\n" +"PO-Revision-Date: 2020-01-23 16:45+0800\n" "Last-Translator: \n" "Language-Team: \n" "MIME-Version: 1.0\n" @@ -17,7 +17,7 @@ msgstr "" "Language: zh_CN\n" "X-Generator: Poedit 2.2.4\n" -#: autosub/core.py:101 +#: autosub/core.py:102 msgid "" "\n" "Converting speech regions to short-term fragments." @@ -25,23 +25,23 @@ msgstr "" "\n" "按照语音区域将音频转换为多个短语音片段。" -#: autosub/core.py:102 +#: autosub/core.py:103 msgid "Converting: " msgstr "转换中: " -#: autosub/core.py:160 +#: autosub/core.py:149 msgid "" "\n" -"Sending short-term fragments to API and getting result." +"Sending short-term fragments to Google Speech V2 API and getting result." msgstr "" "\n" -"将短片段语音发送给API并得到识别结果。" +"将短片段语音发送给Google Speech V2 API并得到识别结果。" -#: autosub/core.py:161 +#: autosub/core.py:150 autosub/core.py:201 msgid "Speech-to-Text: " msgstr "语音转文字中: " -#: autosub/core.py:182 +#: autosub/core.py:171 autosub/core.py:300 msgid "" "Error: Connection error happened too many times.\n" "All works done." @@ -49,7 +49,20 @@ msgstr "" "错误:过多连接错误。\n" "做完了。" -#: autosub/core.py:211 autosub/core.py:262 +#: autosub/core.py:199 +msgid "" +"\n" +"Sending short-term fragments to Google Cloud Speech V1P1Beta1 API and " +"getting result." +msgstr "" +"\n" +"将短片段语音发送给V1P1Beta1 API并得到识别结果。" + +#: autosub/core.py:308 +msgid "Receive something unexpected:" +msgstr "收到未预期数据:" + +#: autosub/core.py:337 autosub/core.py:388 msgid "" "\n" "Translating text from \"{0}\" to \"{1}\"." @@ -57,23 +70,24 @@ msgstr "" "\n" "从\"{0}\"翻译为\"{1}\"。" -#: autosub/core.py:221 autosub/core.py:312 +#: autosub/core.py:347 autosub/core.py:438 msgid "Translation: " msgstr "翻译中: " -#: autosub/core.py:240 autosub/core.py:373 +#: autosub/core.py:366 autosub/core.py:499 msgid "Cancelling transcription." msgstr "取消翻译。" -#: autosub/core.py:454 autosub/core.py:521 +#: autosub/core.py:580 autosub/core.py:647 msgid "Format \"{fmt}\" not supported. Using \"{default_fmt}\" instead." msgstr "不支持格式\"{fmt}\"。改用\"{default_fmt}\"。" -#: autosub/core.py:550 -msgid "There is already a file with the same name in this location: \"{dest_name}\"." +#: autosub/core.py:676 +msgid "" +"There is already a file with the same name in this location: \"{dest_name}\"." msgstr "在此处已有同名文件:\"{dest_name}\"。" -#: autosub/core.py:553 +#: autosub/core.py:679 msgid "Input a new path (including directory and file name) for output file.\n" msgstr "为输出文件输入一个新的路径(包括路径和文件名)。\n" diff --git a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.options.mo b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.options.mo index 76a16be70b296f195fce3d42e40d4217f986a022..9961788807a7731693962ad6f1a907a96d6dbbb3 100644 GIT binary patch delta 3724 zcmc(gdr(x@8NkoFB47+EY6zI7m-R_ufkmnA_mbTUhSaz! ziXpy8uV#$eL=$zi&WP#S(uj|=jjgt6Y%+E-oypi~!}6FuY^NRbM`xV=&b_NqGfDeL zXL@GO{hf2}`M&R*?{W9Yaod$2+9KCdNA3}{mGp<`{~RGi1iqKRgLWxVh_AxSunG>t z)vz#02o=5#n_)t-5G^nWeen0N4_1s8;yGx)Pl!=AA=W`eM8_D$($sbaFR+k&zYq!= zw!#tcSc(u2vVLN$5d6fuJQl*MFp2l+<2VF#j29w@^~X|$m<%^VREUFc9y|$c?E5p^ z&itS=!mq%Z2ZZQ{tKo+zs7u3`G?*X1iN8X9^ zKCH`wE{7i21fPJvgipa~xOW-sfNs3}J6I)MU5tOH!cK^8aRTyAT!hGqKSFGXoA4Q! zu2_nnhdT4$LV4q=OiO+R#CakFpM@J?AN&WDq3dx7u@q)Z!T+p@AOp#6IM*h`4`2cF z>zv979n&m@_3$$DMkp0$O&8(<+z;h^R4D0Vkz9F+vh>zeRI0Q4;nD|b4c~z$(C2v6y3qSoLOjg+ zq-v|23*%gDF(R7c3lW&f#=Y`S?J_~2O&-n@7GI4$n0-f25R9&UaW*0VCE8P7rp`4!Zp;40b}qj*xq2p zJe#7vgyPl9sed?Yg%E$?ec9tyF1YvzjAC94C&5xU7Os%%*cZ(V#z0Ljz;$tU#Q74O z$ok7rCf8ADho|E6(@@HtkIyec&S8Enf!W6MWZC?LOIw+AsY}`y9Q>{(-8_|kKV7ItbG|<>0S!SY>)9=NvA-}uigZ6)p|T4?@ywd9|^f6 z6GeVVC{we^(wrF|SHhI|m|Qd4G@j&<$f6T=v$SNWoOJmgl1G1#P70Z^FN73_`Nu>a z3L#zikQ~s8ESUdhsqwLla}s?Dolk;L=xKD>NN_Xm4#=0)Yz6XZY)PD(x-6`QoFzUr zs4LB?+pC7c#%t+{v3B&>Xm4V%J-TGvC|goVePyFZ^QzImjjpsCucnkQD{I!Yu&S(W z_IsL@r7f!JX?E(Gvq61MSJ?WJmEIiZQ-(j*6O|2{&H8n zt5nt9E;M#d&#`40J?S%|*HZkp$(~=o=zz? z)@8mLQFR3$xA{G);`Vs7R&;z@ZCBJ_n?Ixl*GQdh?vUTTDuBN#+1#wP2E0<6-mIG1 zv8gvJT9dg@wY09*TGgA~y5iA-y4xQNEA3iq=yQfVAvM>F1;0BGMtPGAiZ{nRX2|cg zEXZ)l18FU~Uke)Rr)M^LHBUIt-HLrgJ#M+4=TmjvAM`oHx?4u^3nD9{qs*+RtE;Ff zZ!D>=uc<66X{fBLZ7f?-zOcNup|YfAX_k@T=!o1h+oXl=4Ir;_m6`64uQAxVMwzF$ zvcABYnMVzH=}u)byAb!PP43o!UZgAvskb*|-O8Va@_VJ^$q_4a2X6%u58*pyRlDMF zhTV=lhtnAkq%$5!hw+J{n#9SSGT(kXnW8Pzw~aKWW~ZhUEVuJ#j!5$oD9 zu)Akq`^M{M_emFl&s892ku})xWi`kVQE2(`Gadh-5T#;D(T=e3a%qwA=;ATaj|+F) zS&$dzyqA!Ct00x=&Usfx8ow$H+*6Cjy3)F+t88-OnBk2VVq13f_x1Ik+ha^EY%<=e zaT)8Yx*|g-&kvqEIdEV{?4?6Pr;o;V^~QQShrfGdaNp*^ojtKPI%2!_4sYEhy>e#v zP|uNpy*;t*TVmZOERoocyYCcH%z7JmWov)mkI-NZhI)Hq=Qm3YM>h?;cEECc{~2Pr zy+7^jzM-R?gZp2>^Oj+vGkoZ6<8s}!C;ywmx;IubG4bD**OYYZ2eZ(kH;>CLb74Ze}?TR2EqL$N?^nkY%kPE&PhBeK1 z=Qy}h+nUXsX3p8wO+|+-+bk`uIqNTNg>Ox_vaLU=wZ6Z5t}g!Q*?xSU=bZCA_dL(@ zdk#0wIzBq?XuT5`v0o^Qh#|zS9wM!{yC;7r+g&2Va0kAM$I-<2D3Mk8IyT_XSc_G? zL^|*yZom!EA}#nEZo<0WB8EfcG4^oqQ+@l2ByyvFKam(5iOX;T&cFkhM~6E&n)ZPH zA_=soqV{_+R)isBEl$Q(ba4L??xx)tC-M)L4&XPiBwnNsF2SHq)lX$87lsZL>B5P4 zodJwNBB}T!fmw3Bf0D=?+!gjV>IP3fzlJ@hL9Aie!;$R`n=q|EL#4(r`W| z=~8c?qD$X~JR_%&zH$wh<9%F=KK7vrKSZ5zg24cojx3H8;#w?2-Ga+FA6rs*NxT{M zDdy0QPi3{-RD4u)fM#5dTTu7(9(Llw;r76Tc9x8o^HOXo!#DZxmbk9vLaDAr#u)Np}(ku?~B>ydntmT-G3vKz7!wc}ybekU;+ z&!G0}MD2G4Yj~lrF_U&4iSsrt!8h?PZo`+|tp7eL_gMyAy7k#2pW`Fc2TzW%XFS>? z@;>dWID>&rFUf`o3%7?~^Ao3wPi`p1X+!v@^$vl+%6%bwZ!Ej<>V% zODyNYZ5+%48Tlf|XctWonMV5;)U0(+v@O8zX!|f32T!v5jl=0ZMCNf6vgx0zgY zU4$vzZ@}f)hK=|;>JQh<4vo_)0V;Z-4Zp_^@D{Eww!Z_Vb40Ree}e3a{D4eKTqU+X z)c)JB7Ta+%{*7PZrc%bj*>goKyo{%@s*G=pgZ+r+r)U}m^o8TO^H%Q2q&wW#B@pcCKGHoq@BsYGIX_<=*ndvq)P zXe}gk3&_3DCpikW@V?#35L&53G%=E3=em`7wDg0=e5F<2L<{*M9OF=7i-cWxUPx*g zOORVKh0vp>g_RZ3B~)|*&&nbyQ;9J|8lkI}KxioZ8f z3cDaP2@Zf{5V3@oc!J~){g2RP)xu8703uHtl)gkJFOKh;_|v0@OT2+faY#YH8B=uha$c~;0{>8Z0I5oJX9 zD&H3=w7zgn3YJEBoxweQe9rWub@l$0Ms?jrqoKMvU{YIY)UPue*Ejl`{B^ZPjeoTn zuTavqw}n)-J1S zUSmu)yjEsLNidL6=+sez7e?-iD6Vo(Z#1i${PlJ28P!c@fsy0M@wh!+w>OV!L9RE; zlkf3FSCqRe&2|33f1mSCaC`FgId4vZcbsKrpLgV1!^W%&x;*Ec){(JU!Q8QDor#a$ z>b$pgPgh4r*QNarJNG?2yu0h%$p;6wSewggtVj8?f^!NEIHI4=%m{Yo7CNlUK38OT z86Ho(-ZS*VpB3GR81|t3Sl5NU-4=?;7>(xIW;4(fu%dnC+Sz({mKj{;8|bp`m33I( zRO}i0KOKuIiuGlRDw79wT|C%zsgvh;--l<;bH`vMt()GYpi#NQUbNE-Ug-HR-=}-p diff --git a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.options.po b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.options.po index 06b08d51..300f0638 100644 --- a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.options.po +++ b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.options.po @@ -7,15 +7,15 @@ msgid "" msgstr "" "Project-Id-Version: \n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2020-01-14 20:31+0800\n" -"PO-Revision-Date: 2019-08-14 12:16+0800\n" +"POT-Creation-Date: 2020-01-23 16:43+0800\n" +"PO-Revision-Date: 2020-01-23 16:44+0800\n" "Last-Translator: \n" "Language-Team: \n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" "Generated-By: pygettext.py 1.5\n" -"X-Generator: Poedit 2.2.3\n" +"X-Generator: Poedit 2.2.4\n" "Plural-Forms: nplurals=1; plural=0;\n" "Language: zh_CN\n" @@ -93,8 +93,8 @@ msgid "" msgstr "控制翻译的选项。同时也是默认的翻译方法。可能随时会被谷歌爸爸封。" #: autosub/options.py:79 -msgid "Google Speech V2 Options" -msgstr "Google Speech V2选项" +msgid "Google Translate V2 Options" +msgstr "Google Translate V2选项" #: autosub/options.py:80 msgid "" @@ -146,7 +146,7 @@ msgid "List all available arguments." msgstr "列出所有可选参数。" #: autosub/options.py:102 autosub/options.py:112 autosub/options.py:122 -#: autosub/options.py:211 autosub/options.py:612 +#: autosub/options.py:211 autosub/options.py:422 autosub/options.py:637 msgid "path" msgstr "路径" @@ -199,7 +199,7 @@ msgstr "" "言行使用第二个。(参数个数为1或2)" #: autosub/options.py:151 autosub/options.py:163 autosub/options.py:174 -#: autosub/options.py:582 autosub/options.py:599 +#: autosub/options.py:607 autosub/options.py:624 msgid "lang_code" msgstr "语言代码" @@ -208,8 +208,8 @@ msgstr "语言代码" msgid "" "Lang code/Lang tag for speech-to-text. Recommend using the Google Cloud " "Speech reference lang codes. WRONG INPUT WON'T STOP RUNNING. But use it at " -"your own risk. Ref: https://cloud.google.com/speech-to-text/docs/languages" -"(arg_num = 1) (default: %(default)s)" +"your own risk. Ref: https://cloud.google.com/speech-to-text/docs/" +"languages(arg_num = 1) (default: %(default)s)" msgstr "" "用于语音识别的语言代码/语言标识符。推荐使用Google Cloud Speech的参考语言代" "码。错误的输入不会终止程序。但是后果自负。参考:https://cloud.google.com/" @@ -225,8 +225,8 @@ msgid "" msgstr "" "用于翻译的源语言的语言代码/语言标识符。如果没有提供,会使用langcodes-py2从列" "表里获取一个最佳匹配选项\"-S\"/\"--speech-language\"的语言代码。如果使用py-" -"googletrans作为翻译的方法,错误的输入会终止运行。(参数个数为1)(默认参数为%" -"(default)s)" +"googletrans作为翻译的方法,错误的输入会终止运行。(参数个数为1)(默认参数" +"为%(default)s)" #: autosub/options.py:175 #, python-format @@ -237,7 +237,7 @@ msgstr "" "用于翻译的目标语言的语言代码/语言标识符。同样的注意参考选项\"-SRC\"/\"--src-" "language\"。(参数个数为1)(默认参数为%(default)s)" -#: autosub/options.py:182 autosub/options.py:409 +#: autosub/options.py:182 autosub/options.py:434 msgid "mode" msgstr "模式" @@ -293,11 +293,10 @@ msgstr "" #: autosub/options.py:234 msgid "" -"Avoid any pause and overwriting files. Stop the program when your args are " -"wrong. (arg_num = 0)" +"Prevent pauses and allow files to be overwritten. Stop the program when your " +"args are wrong. (arg_num = 0)" msgstr "" -"避免任何运行中的暂停和覆写文件的行为。如果参数有误,会直接停止程序。(参数个" -"数为0)" +"避免任何暂停和覆写文件的行为。如果参数有误,会直接停止程序。(参数个数为0)" #: autosub/options.py:240 msgid "type" @@ -310,8 +309,8 @@ msgid "" "arg_num >= 1) (default: %(default)s)" msgstr "" "输出更多的文件。可选种类:regions, src, dst, bilingual, all.(时间轴,源语言" -"字幕,目标语言字幕,双语字幕,所有)(参数个数在4和1之间)(默认参数为%" -"(default)s)" +"字幕,目标语言字幕,双语字幕,所有)(参数个数在4和1之间)(默认参数" +"为%(default)s)" #: autosub/options.py:253 msgid "" @@ -327,51 +326,69 @@ msgstr "" msgid "Drop any regions without text. (arg_num = 0)" msgstr "删除所有没有文本的空轴。(参数个数为0)" -#: autosub/options.py:271 +#: autosub/options.py:270 +msgid "API_code" +msgstr "API代码" + +#: autosub/options.py:272 +#, python-format msgid "" -"The Google Speech V2 API key to be used. If not provided, use free API key " -"instead.(arg_num = 1)" +"Choose which Speech-to-Text API to use. Currently supported: gsv2: Google " +"Speech V2 (https://github.com/gillesdemey/google-speech-v2). gcsv1: Google " +"Cloud Speech-to-Text V1P1Beta1 (https://cloud.google.com/speech-to-text/" +"docs). (arg_num = 1) (default: %(default)s)" msgstr "" -"用于Google Speech V2 API的key。如果没有提供,会使用免费API key。(参数个数为" -"1)" +"选择使用Speech-to-Text API。当前支持:gsv2:Google Speech V2 (https://" +"github.com/gillesdemey/google-speech-v2)。 gcsv1:Google Cloud Speech-to-" +"Text V1P1Beta1 (https://cloud.google.com/speech-to-text/docs)。(参数个数为" +"1)(默认参数为%(default)s)" -#: autosub/options.py:281 -#, python-format +#: autosub/options.py:282 msgid "" -"Google Speech V2 API response for text confidence. A float value between 0 " -"and 1. Confidence bigger means the result is better. Input this argument " -"will drop any result below it. Ref: https://github.com/BingLingGroup/google-" -"speech-v2#response (arg_num = 1) (default: %(default)s)" +"The API key for Speech-to-Text API. (arg_num = 1) Currently supported: gsv2: " +"The API key for gsv2. (default: Free API key) gcsv1: The API key for gcsv1. " +"(Can be overridden by \"-sa\"/\"--service-account\")" msgstr "" -"Google Speech V2 API用于识别可信度的回应参数。一个介于0和1之间的浮点数。可信" -"度越高意味着结果越好。输入这个参数会导致所有低于这个结果的识别结果被删除。参" -"考:https://github.com/BingLingGroup/google-speech-v2#response(参数个数为1)" -"(默认参数为%(default)s)" +"API key for Speech-to-Text API. (arg_num = 1) Currently supported: gsv2: The " +"API key for gsv2. (default: Free API key) gcsv1: The API key for gcsv1. (Can " +"be overridden by \"-sa\"/\"--service-account\")" #: autosub/options.py:294 #, python-format msgid "" -"Number of concurrent Google Speech V2 requests to make. (arg_num = 1) " +"API response for text confidence. A float value between 0 and 1. Confidence " +"bigger means the result is better. Input this argument will drop any result " +"below it. Ref: https://github.com/BingLingGroup/google-speech-v2#response " +"(arg_num = 1) (default: %(default)s)" +msgstr "" +"API用于识别可信度的回应参数。一个介于0和1之间的浮点数。可信度越高意味着结果越" +"好。输入这个参数会导致所有低于这个结果的识别结果被删除。参考:https://github." +"com/BingLingGroup/google-speech-v2#response(参数个数为1)(默认参数" +"为%(default)s)" + +#: autosub/options.py:307 +#, python-format +msgid "" +"Number of concurrent Speech-to-Text requests to make. (arg_num = 1) " "(default: %(default)s)" msgstr "" -"用于Google Speech V2 requests请求的并行数量。(参数个数为1)(默认参数为%" -"(default)s)" +"用于Speech-to-Text请求的并行数量。(参数个数为1)(默认参数为%(default)s)" -#: autosub/options.py:300 autosub/options.py:526 autosub/options.py:536 -#: autosub/options.py:546 +#: autosub/options.py:313 autosub/options.py:551 autosub/options.py:561 +#: autosub/options.py:571 msgid "second" msgstr "秒" -#: autosub/options.py:303 +#: autosub/options.py:316 #, python-format msgid "" "(Experimental)Seconds to sleep between two translation requests. (arg_num = " "1) (default: %(default)s)" msgstr "" -"(实验性)在两次翻译请求之间睡眠(暂停)的时间。(参数个数为1)(默认参数为%" -"(default)s)" +"(实验性)在两次翻译请求之间睡眠(暂停)的时间。(参数个数为1)(默认参数" +"为%(default)s)" -#: autosub/options.py:312 +#: autosub/options.py:325 msgid "" "(Experimental)Customize request urls. Ref: https://py-googletrans." "readthedocs.io/en/latest/ (arg_num >= 1)" @@ -379,44 +396,43 @@ msgstr "" "(实验性)自定义多个请求URL。参考:https://py-googletrans.readthedocs.io/en/" "latest/(参数个数大于等于1)" -#: autosub/options.py:320 -#, fuzzy +#: autosub/options.py:333 msgid "" "(Experimental)Customize User-Agent headers. Same docs above. (arg_num = 1)" msgstr "" "(实验性)自定义用户代理(User-Agent)头部。同样的参考文档如上。(参数个数为" "1)" -#: autosub/options.py:328 +#: autosub/options.py:341 msgid "" -"The Google Translate V2 API key to be used. If not provided, use free API(py-" -"googletrans) instead. (arg_num = 1)" +"The Google Translate V2 API key to be used. If not provided, use free API " +"(py-googletrans) instead. (arg_num = 1)" msgstr "" "用于Google Translate V2的API key。如果没有提供,则使用免费的API也就是py-" "googletrans。(参数个数为1)" -#: autosub/options.py:338 +#: autosub/options.py:351 #, python-format msgid "" -"Number of lines per Google Translate V2 request. (arg_num = 1) (default: %" -"(default)s)" +"Number of lines per Google Translate V2 request. (arg_num = 1) (default: " +"%(default)s)" msgstr "" "Google Translate V2请求的每行行数。(参数个数为1)(默认参数为%(default)s)" -#: autosub/options.py:347 +#: autosub/options.py:360 #, python-format msgid "" "Number of concurrent Google translate V2 API requests to make. (arg_num = 1) " "(default: %(default)s)" msgstr "" -"Google translate V2 API请求的并行数量。(参数个数为1)(默认参数为%(default)" -"s)" +"Google translate V2 API请求的并行数量。(参数个数为1)(默认参数" +"为%(default)s)" -#: autosub/options.py:355 +#: autosub/options.py:368 msgid "Change the Google Speech V2 API URL into the http one. (arg_num = 0)" msgstr "将Google Speech V2 API的URL改为http类型。(参数个数为0)" -#: autosub/options.py:364 +#: autosub/options.py:377 #, python-format msgid "" "Add https proxy by setting environment variables. If arg_num is 0, use const " @@ -425,7 +441,7 @@ msgstr "" "通过设置环境变量的方式添加https代理。如果参数个数是0,使用const里的代理URL。" "(参数个数为0或1)(const为%(const)s)" -#: autosub/options.py:373 +#: autosub/options.py:386 #, python-format msgid "" "Add http proxy by setting environment variables. If arg_num is 0, use const " @@ -434,33 +450,46 @@ msgstr "" "通过设置环境变量的方式添加http代理。如果参数个数是0,使用const里的代理URL。" "(参数个数为0或1)(const为%(const)s)" -#: autosub/options.py:380 +#: autosub/options.py:393 msgid "username" msgstr "用户名" -#: autosub/options.py:381 +#: autosub/options.py:394 msgid "Set proxy username. (arg_num = 1)" msgstr "设置代理用户名。(参数个数为1)" -#: autosub/options.py:387 +#: autosub/options.py:400 msgid "password" msgstr "密码" -#: autosub/options.py:388 +#: autosub/options.py:401 msgid "Set proxy password. (arg_num = 1)" msgstr "设置代理密码。(参数个数为1)" -#: autosub/options.py:395 +#: autosub/options.py:408 #, python-format msgid "Show %(prog)s help message and exit. (arg_num = 0)" msgstr "显示%(prog)s的帮助信息并退出。(参数个数为0)" -#: autosub/options.py:404 +#: autosub/options.py:417 #, python-format msgid "Show %(prog)s version and exit. (arg_num = 0)" msgstr "显示%(prog)s的版本信息并退出。(参数个数为0)" -#: autosub/options.py:410 +#: autosub/options.py:423 +msgid "" +"Set service account key environment variable. It should be the file path of " +"the JSON file that contains your service account credentials. If used, " +"override the API key options. Ref: https://cloud.google.com/docs/" +"authentication/getting-started Currently supported: gcsv1 " +"(GOOGLE_APPLICATION_CREDENTIALS) (arg_num = 1)" +msgstr "" +"设置服务账号密钥的环境变量。应该是包含服务帐号密钥的 JSON 文件的文件路径。如" +"果使用了,会覆盖API密钥选项。参考:https://cloud.google.com/docs/" +"authentication/getting-started 当前支持:" +"gcsv1(GOOGLE_APPLICATION_CREDENTIALS)(参数个数为1)" + +#: autosub/options.py:435 msgid "" "Option to control audio process. If not given the option, do normal " "conversion work. \"y\": pre-process the input first then start normal " @@ -481,15 +510,15 @@ msgstr "" "scripts/subgen.sh https://ffmpeg.org/ffmpeg-filters.html)(参数个数介于1和2" "之间)" -#: autosub/options.py:437 +#: autosub/options.py:462 msgid "Keep audio processing files to the output path. (arg_num = 0)" msgstr "将音频处理中产生的文件放在输出路径中。(参数个数为0)" -#: autosub/options.py:443 autosub/options.py:463 autosub/options.py:475 +#: autosub/options.py:468 autosub/options.py:488 autosub/options.py:500 msgid "command" msgstr "命令" -#: autosub/options.py:444 +#: autosub/options.py:469 msgid "" "This arg will override the default audio process command. Every line of the " "commands need to be in quotes. Input file name is {in_}. Output file name is " @@ -498,7 +527,7 @@ msgstr "" "这个参数会取代默认的音频处理命令。每行命令需要放在一个引号内。输入文件名写为" "{in_}。输出文件名写为{out_}。(参数个数大于1)" -#: autosub/options.py:457 +#: autosub/options.py:482 #, python-format msgid "" "Number of concurrent ffmpeg audio split process to make. (arg_num = 1) " @@ -506,7 +535,7 @@ msgid "" msgstr "" "用于ffmpeg音频切割的进程并行数量。(参数个数为1)(默认参数为%(default)s)" -#: autosub/options.py:464 +#: autosub/options.py:489 msgid "" "(Experimental)This arg will override the default audio conversion command. " "Need to follow the python references keyword argument. Default command to " @@ -515,7 +544,7 @@ msgstr "" "(实验性)这个参数会取代默认的音频转换命令。需要遵循原有的python参考关键词参" "数写法。以下是用于处理音频的默认命令:{dft}(默认参数为1)" -#: autosub/options.py:476 +#: autosub/options.py:501 msgid "" "(Experimental)This arg will override the default audio split command. Same " "attention above. Default: {dft} (arg_num = 1)" @@ -523,24 +552,24 @@ msgstr "" "(实验性)这个参数会取代默认的音频转换命令。相同的注意如上。默认:{dft}(参数" "个数为1)" -#: autosub/options.py:486 +#: autosub/options.py:511 msgid "file_suffix" msgstr "文件名后缀" -#: autosub/options.py:488 +#: autosub/options.py:513 #, python-format msgid "" "(Experimental)This arg will override the default API audio suffix. (arg_num " "= 1) (default: %(default)s)" msgstr "" -"(实验性)这个参数会取代默认的给API使用的音频文件后缀。(默认参数为%(default)" -"s)" +"(实验性)这个参数会取代默认的给API使用的音频文件后缀。(默认参数" +"为%(default)s)" -#: autosub/options.py:495 +#: autosub/options.py:520 msgid "sample_rate" msgstr "采样率" -#: autosub/options.py:498 +#: autosub/options.py:523 #, python-format msgid "" "(Experimental)This arg will override the default API audio sample rate(Hz). " @@ -549,11 +578,11 @@ msgstr "" "(实验性)这个参数会取代默认的给API使用的音频采样率(赫兹)。(参数个数为1)" "(默认参数为%(default)s)" -#: autosub/options.py:505 +#: autosub/options.py:530 msgid "channel_num" msgstr "声道数" -#: autosub/options.py:508 +#: autosub/options.py:533 #, python-format msgid "" "(Experimental)This arg will override the default API audio channel. (arg_num " @@ -562,11 +591,11 @@ msgstr "" "(实验性)这个参数会取代默认的给API使用的音频声道数量。(参数个数为1)(默认" "参数为%(default)s)" -#: autosub/options.py:515 +#: autosub/options.py:540 msgid "energy" msgstr "能量(相对值)" -#: autosub/options.py:518 +#: autosub/options.py:543 #, python-format msgid "" "The energy level which determines the region to be detected. Ref: https://" @@ -577,23 +606,23 @@ msgstr "" "latest/apitutorial.html#examples-using-real-audio-data(参数个数为1)(默认参" "数为%(default)s)" -#: autosub/options.py:529 +#: autosub/options.py:554 #, python-format msgid "" "Minimum region size. Same docs above. (arg_num = 1) (default: %(default)s)" msgstr "" -"最小语音区域大小。同样的参考文档如上。(参数个数为1)(默认参数为%(default)" -"s)" +"最小语音区域大小。同样的参考文档如上。(参数个数为1)(默认参数" +"为%(default)s)" -#: autosub/options.py:539 +#: autosub/options.py:564 #, python-format msgid "" "Maximum region size. Same docs above. (arg_num = 1) (default: %(default)s)" msgstr "" -"最大音频区域大小。同样的参考文档如上。(参数个数为1)(默认参数为%(default)" -"s)" +"最大音频区域大小。同样的参考文档如上。(参数个数为1)(默认参数" +"为%(default)s)" -#: autosub/options.py:549 +#: autosub/options.py:574 #, python-format msgid "" "Maximum length of a tolerated silence within a valid audio activity. Same " @@ -602,7 +631,7 @@ msgstr "" "在一段有效的音频活动区域中可以容忍的最大(连续)安静区域。同样的参考文档如" "上。(参数个数为1)(默认参数为%(default)s)" -#: autosub/options.py:557 autosub/options.py:564 +#: autosub/options.py:582 autosub/options.py:589 msgid "" "Ref: https://auditok.readthedocs.io/en/latest/core.html#class-summary " "(arg_num = 0)" @@ -610,7 +639,7 @@ msgstr "" "参考:https://auditok.readthedocs.io/en/latest/core.html#class-summary(参数" "个数为0)" -#: autosub/options.py:571 +#: autosub/options.py:596 msgid "" "List all available subtitles formats. If your format is not supported, you " "can use ffmpeg or SubtitleEdit to convert the formats. You need to offer fps " @@ -621,25 +650,25 @@ msgstr "" "SubtitleEdit来对其进行转换。如果输出格式是\"sub\"且输入文件是音频无法获取到视" "频帧率时,你需要提供fps选项指定帧率。(参数个数为0)" -#: autosub/options.py:585 +#: autosub/options.py:610 msgid "" -"List all recommended \"-S\"/\"--speech-language\" Google Speech V2 language " -"codes. If no arg is given, list all. Or else will list get a group of \"good " -"match\" of the arg. Default \"good match\" standard is whose match score " -"above 90(score between 0 and 100). Ref: https://tools.ietf.org/html/bcp47 " -"https://github.com/LuminosoInsight/langcodes/blob/master/langcodes/__init__." -"py lang code example: language-script-region-variant-extension-privateuse " -"(arg_num = 0 or 1)" +"List all recommended \"-S\"/\"--speech-language\" Google Speech-to-Text " +"language codes. If no arg is given, list all. Or else will list get a group " +"of \"good match\" of the arg. Default \"good match\" standard is whose match " +"score above 90 (score between 0 and 100). Ref: https://tools.ietf.org/html/" +"bcp47 https://github.com/LuminosoInsight/langcodes/blob/master/langcodes/" +"__init__.py lang code example: language-script-region-variant-extension-" +"privateuse (arg_num = 0 or 1)" msgstr "" -"列出所有推荐的\"-S\"/\"--speech-language\"Google Speech V2语言代码。如果参数" -"没有给出,列出全部语言代码。默认的“好的匹配”标准是匹配分数超过90分(匹配分数" -"介于0和100之间)。参考:https://tools.ietf.org/html/bcp47 https://github.com/" -"LuminosoInsight/langcodes/blob/master/langcodes/__init__.py 语言代码范例:语" -"言文字种类-(扩展语言文字种类)-变体(或方言)-使用区域-变体(或方言)-扩展-" -"私有(https://www.zhihu.com/question/21980689/answer/93615123)(参数个数为0" -"或1)" +"列出所有推荐的\"-S\"/\"--speech-language\"Google Speech-to-Text 语言代码。如" +"果参数没有给出,列出全部语言代码。默认的“好的匹配”标准是匹配分数超过90分(匹" +"配分数介于0和100之间)。参考:https://tools.ietf.org/html/bcp47 https://" +"github.com/LuminosoInsight/langcodes/blob/master/langcodes/__init__.py 语言代" +"码范例:语言文字种类-(扩展语言文字种类)-变体(或方言)-使用区域-变体(或方" +"言)-扩展-私有(https://www.zhihu.com/question/21980689/answer/93615123)(参" +"数个数为0或1)" -#: autosub/options.py:602 +#: autosub/options.py:627 msgid "" "List all available \"-SRC\"/\"--src-language\" py-googletrans translation " "language codes. Or else will list get a group of \"good match\" of the arg. " @@ -649,19 +678,26 @@ msgstr "" "言代码。否则会给出一个“好的匹配”的清单。同样的参考文档如上。(参数个数为0或" "1)" -#: autosub/options.py:613 +#: autosub/options.py:638 #, python-format msgid "" "Use py-googletrans to detect a sub file's first line language. And list a " "group of matched language in recommended \"-S\"/\"--speech-language\" Google " -"Speech V2 language codes. Ref: https://cloud.google.com/speech-to-text/docs/" -"languages (arg_num = 1) (default: %(default)s)" +"Speech-to-Text language codes. Ref: https://cloud.google.com/speech-to-text/" +"docs/languages (arg_num = 1) (default: %(default)s)" msgstr "" "使用py-googletrans去检测一个字幕文件的第一行的语言。并列出一个和该语言匹配的" -"推荐Google Speech V2语言代码清单(\"-S\"/\"--speech-language\"选项所用到" +"推荐Google Speech-to-Text语言代码清单(\"-S\"/\"--speech-language\"选项所用到" "的)。参考:https://cloud.google.com/speech-to-text/docs/languages(参数个数" "为1)(默认参数 %(default)s)" +#~ msgid "" +#~ "The Google Speech V2 API key to be used. If not provided, use free API " +#~ "key instead.(arg_num = 1)" +#~ msgstr "" +#~ "用于Google Speech V2 API的key。如果没有提供,会使用免费API key。(参数个数" +#~ "为1)" + #~ msgid "" #~ "(Experimental)This arg will override the default API audio sample rate, " #~ "which control the audio sample rate before sending it to the API.(arg_num " diff --git a/autosub/options.py b/autosub/options.py index ca559e58..3219f8ee 100644 --- a/autosub/options.py +++ b/autosub/options.py @@ -267,7 +267,7 @@ def get_cmd_args(): # pylint: disable=too-many-statements speech_group.add_argument( '-sapi', '--speech-api', - metavar=_('api_code'), + metavar=_('API_code'), default='gsv2', help=_("Choose which Speech-to-Text API to use. " "Currently supported: " @@ -424,7 +424,7 @@ def get_cmd_args(): # pylint: disable=too-many-statements "It should be the file path of the JSON file " "that contains your service account credentials. " "If used, override the API key options. " - "Ref: https://cloud.google.com/docs/authentication/getting-started" + "Ref: https://cloud.google.com/docs/authentication/getting-started " "Currently supported: gcsv1 (GOOGLE_APPLICATION_CREDENTIALS) " "(arg_num = 1)") ) @@ -608,7 +608,7 @@ def get_cmd_args(): # pylint: disable=too-many-statements const=' ', nargs='?', help=_("List all recommended \"-S\"/\"--speech-language\" " - "Google Speech V2 language codes. " + "Google Speech-to-Text language codes. " "If no arg is given, list all. " "Or else will list get a group of \"good match\" " "of the arg. Default \"good match\" standard is whose " @@ -637,7 +637,7 @@ def get_cmd_args(): # pylint: disable=too-many-statements metavar=_('path'), help=_("Use py-googletrans to detect a sub file's first line language. " "And list a group of matched language in recommended " - "\"-S\"/\"--speech-language\" Google Speech V2 language codes. " + "\"-S\"/\"--speech-language\" Google Speech-to-Text language codes. " "Ref: https://cloud.google.com/speech-to-text/docs/languages " "(arg_num = 1) (default: %(default)s)") ) diff --git a/autosub/speech_trans_api.py b/autosub/speech_trans_api.py index 88a9a71e..2d50e6a7 100644 --- a/autosub/speech_trans_api.py +++ b/autosub/speech_trans_api.py @@ -127,7 +127,7 @@ def gcsv1p1beta1_service_client( else: raise exceptions.SpeechToTextException( - result_dict) + json.dumps(result_dict, indent=4, ensure_ascii=False)) if 'confidence' in result_dict: confidence = \ @@ -207,7 +207,7 @@ def __call__(self, filename): else: raise exceptions.SpeechToTextException( - result_dict) + requests_result_json) if 'confidence' in result_dict: confidence = \ diff --git a/docs/CHANGELOG.zh-Hans.md b/docs/CHANGELOG.zh-Hans.md index c4231d45..e8a375bd 100644 --- a/docs/CHANGELOG.zh-Hans.md +++ b/docs/CHANGELOG.zh-Hans.md @@ -30,6 +30,10 @@ ### [未发布] +#### 添加(未发布) + +- 添加Google Cloud Speech-to-Text支持。 [issue #10](https://github.com/BingLingGroup/autosub/issues/10) + ### [0.5.3-alpha] - 2019-12-30 #### 改动(0.5.3-alpha) diff --git a/docs/README.zh-Hans.md b/docs/README.zh-Hans.md index 720bc4a1..1f9911a2 100644 --- a/docs/README.zh-Hans.md +++ b/docs/README.zh-Hans.md @@ -382,7 +382,7 @@ usage: -i 路径, --input 路径 用于生成字幕文件的视频/音频/字幕文件。如果输入文件是字幕文件,程序仅会对其进行翻译。(参数个数为1) -er 路径, --ext-regions 路径 - 提供外部语音区域(时间轴)的字幕文件。该字幕文件格式需要是pysubs2所支持的。使用后会替换掉默认的 自动寻 + 提供外部语音区域(时间轴)的字幕文件。该字幕文件格式需要是pysubs2所支持的。使用后会替换掉默认的自动寻 找语音区域(时间轴)的功能。(参数个数为1) -sty [路径], --styles [路径] 当输出格式为"ass"/"ssa"时有效。为你的字幕文件提供"ass"/"ssa"样式的字幕文件。如果不提供 @@ -417,7 +417,7 @@ usage: 一个介于0和100之间的整数用于控制以下两个选项的匹配结果组,"-lsc"/"--list-speech- codes"以及"-ltc"/"--list-translation-codes"或者在"-bm"/"-- best- - match"选项中的最佳匹配结果。结果会是一组“好的匹配”,其分数需要超过这个参数的值。(参数个数为1 ) + match"选项中的最佳匹配结果。结果会是一组“好的匹配”,其分数需要超过这个参数的值。(参数个数为1) 输出选项: 控制输出的选项。 @@ -426,7 +426,7 @@ usage: -F 格式, --format 格式 输出字幕的格式。如果没有提供该选项,使用"-o"/"--output"参数中的后缀。如果"-o"/"-- output"参数也没有提供扩展名,那么使用"srt"。在这种情况下,如果"-i"/"-- input"的参数是一个字幕文件,那么使用和字幕文件相同的扩展名。(参数个数为1)(默认参数为srt) - -y, --yes 避免任何运行中的暂停和覆写文件的行为。如果参数有误,会直接停止程序。(参数个数为0) + -y, --yes 避免任何暂停和覆写文件的行为。如果参数有误,会直接停止程序。(参数个数为0) -of [种类 [种类 ...]], --output-files [种类 [种类 ...]] 输出更多的文件。可选种类:regions, src, dst, bilingual, all.(时间轴,源语 言字幕,目标语言字幕,双语字幕,所有)(参数个数在4和1之间)(默认参数为['dst']) @@ -440,16 +440,23 @@ usage: 语音选项: 控制语音转文字的选项。 - -gsv2 key, --gspeechv2 key - 用于Google Speech V2 API的key。如果没有提供,会使用免费API - key。(参数个数为1) + -sapi API代码, --speech-api API代码 + 选择使用Speech-to-Text API。当前支持:gsv2:Google Speech V2 + (https://github.com/gillesdemey/google-speech-v2)。 + gcsv1:Google Cloud Speech-to-Text V1P1Beta1 + (https://cloud.google.com/speech-to- + text/docs)。(参数个数为1)(默认参数为gsv2) + -skey key, --speech-key key + API key for Speech-to-Text API. (arg_num = 1) + Currently supported: gsv2: The API key for gsv2. + (default: Free API key) gcsv1: The API key for gcsv1. + (Can be overridden by "-sa"/"--service-account") -mnc float, --min-confidence float - Google Speech V2 API用于识别可信度的回应参数。一个介于0和1之间的浮点数。可信度越高意味 - 着结果越好。输入这个参数会导致所有低于这个结果的识别结果被删除。参考:https://github.com/ - BingLingGroup/google- - speech-v2#response(参数个数为1)(默认参数为0.0) + API用于识别可信度的回应参数。一个介于0和1之间的浮点数。可信度越高意味着结果越好。输入这个参数会导致所有 + 低于这个结果的识别结果被删除。参考:https://github.com/BingLingGroup/goo + gle-speech-v2#response(参数个数为1)(默认参数为0.0) -sc integer, --speech-concurrency integer - 用于Google Speech V2 requests请求的并行数量。(参数个数为1)(默认参数为10) + 用于Speech-to-Text请求的并行数量。(参数个数为1)(默认参数为10) py-googletrans选项: 控制翻译的选项。同时也是默认的翻译方法。可能随时会被谷歌爸爸封。 @@ -458,11 +465,11 @@ py-googletrans选项: (实验性)在两次翻译请求之间睡眠(暂停)的时间。(参数个数为1)(默认参数为5) -surl [URL [URL ...]], --service-urls [URL [URL ...]] (实验性)自定义多个请求URL。参考:https://py- - googletrans.readthedocs.io/en/latest/(参数个数为1) - -ua User-Agent header, --user-agent User-Agent header + googletrans.readthedocs.io/en/latest/(参数个数大于等于1) + -ua User-Agent headers, --user-agent User-Agent headers (实验性)自定义用户代理(User-Agent)头部。同样的参考文档如上。(参数个数为1) -Google Speech V2选项: +Google Translate V2选项: 控制翻译的选项。(未被测试过)如果提供了API key,它会取代py-googletrans的翻译方法。 -gtv2 key, --gtransv2 key @@ -494,12 +501,17 @@ Google Speech V2选项: -h, --help 显示autosub的帮助信息并退出。(参数个数为0) -V, --version 显示autosub的版本信息并退出。(参数个数为0) + -sa 路径, --service-account 路径 + 设置服务账号密钥的环境变量。应该是包含服务帐号密钥的 JSON 文件的文件路径。如果使用了,会覆盖API密钥 + 选项。参考:https://cloud.google.com/docs/authentication/get + ting-started + 当前支持:gcsv1(GOOGLE_APPLICATION_CREDENTIALS)(参数个数为1) 音频处理选项: 控制音频处理的选项。 -ap [模式 [模式 ...]], --audio-process [模式 [模式 ...]] - 控制音频处理的选项。如果没有提供选项,进行正常的格式转换工作。"y":它会先预处理输入文件,如果成 功了,在语 + 控制音频处理的选项。如果没有提供选项,进行正常的格式转换工作。"y":它会先预处理输入文件,如果成功了,在语 音转文字之前不会对音频进行额外的处理。"o":只会预处理输入音频。("-k"/"-- keep"选项自动置为真)"s":只会分割输入音频。("-k"/"--keep"选项自动置为真)"n":在语 音转文字的步骤之前,强制去除多余的格式检查或者转换工作。以下是用于处理音频的默认命令:ffmpeg @@ -514,18 +526,17 @@ Google Speech V2选项: https://ffmpeg.org/ffmpeg-filters.html)(参数个数介于1和2之间) -k, --keep 将音频处理中产生的文件放在输出路径中。(参数个数为0) -apc [命令 [命令 ...]], --audio-process-cmd [命令 [命令 ...]] - 这个参数会取代默认的音频处理命令。每行命令需要放在一个引号内。输入文件名写为{in_}。输出文件名写 为{ou + 这个参数会取代默认的音频处理命令。每行命令需要放在一个引号内。输入文件名写为{in_}。输出文件名写为{ou t_}。(参数个数大于1) -ac integer, --audio-concurrency integer 用于ffmpeg音频切割的进程并行数量。(参数个数为1)(默认参数为10) -acc 命令, --audio-conversion-cmd 命令 (实验性)这个参数会取代默认的音频转换命令。需要遵循原有的python参考关键词参数写法。以下是用于处理音频 - 的默认命令:ffmpeg -hide_banner -y -i "{in_}" -ac {channel} - -ar {sample_rate} "{out_}"(默认参数为1) + 的默认命令:ffmpeg -hide_banner -y -i "{in_}" -vn -ac + {channel} -ar {sample_rate} "{out_}"(默认参数为1) -asc 命令, --audio-split-cmd 命令 - (实验性)这个参数会取代默认的音频转换命令。相同的注意如上。默认:ffmpeg -ss {start} -t - {dura} -y -i "{in_}" -c copy -loglevel error - "{out_}"(参数个数为1) + (实验性)这个参数会取代默认的音频转换命令。相同的注意如上。默认:ffmpeg -y -ss {start} + -i "{in_}" -t {dura} -loglevel error "{out_}"(参数个数为1) -asf 文件名后缀, --api-suffix 文件名后缀 (实验性)这个参数会取代默认的给API使用的音频文件后缀。(默认参数为.flac) -asr 采样率, --api-sample-rate 采样率 @@ -545,7 +556,7 @@ Auditok的选项: -mxrs 秒, --max-region-size 秒 最大音频区域大小。同样的参考文档如上。(参数个数为1)(默认参数为6.0) -mxcs 秒, --max-continuous-silence 秒 - 在一段有效的音频活动区域中可以容忍的最大(连续)安静区域。同样的参考文档如上。(参数个数为1)( 默认参数为0 + 在一段有效的音频活动区域中可以容忍的最大(连续)安静区域。同样的参考文档如上。(参数个数为1)(默认参数为0 .3) -sml, --strict-min-length 参考:https://auditok.readthedocs.io/en/latest/core.html# @@ -561,11 +572,11 @@ Auditok的选项: 行转换。如果输出格式是"sub"且输入文件是音频无法获取到视频帧率时,你需要提供fps选项指定帧率。(参数个 数为0) -lsc [语言代码], --list-speech-codes [语言代码] - 列出所有推荐的"-S"/"--speech-language"Google Speech V2语言代码。如果 - 参数没有给出,列出全部语言代码。默认的“好的匹配”标准是匹配分数超过90分(匹配分数介于0和100之间)。参 - 考:https://tools.ietf.org/html/bcp47 https://github.com - /LuminosoInsight/langcodes/blob/master/langcodes/__ini - t__.py 语言代码范例:语言文字种类-(扩展语言文字种类)-变体(或方言)-使用区域- + 列出所有推荐的"-S"/"--speech-language"Google Speech-to-Text 语 + 言代码。如果参数没有给出,列出全部语言代码。默认的“好的匹配”标准是匹配分数超过90分(匹配分数介于0和10 + 0之间)。参考:https://tools.ietf.org/html/bcp47 https://gith + ub.com/LuminosoInsight/langcodes/blob/master/langcodes + /__init__.py 语言代码范例:语言文字种类-(扩展语言文字种类)-变体(或方言)-使用区域- 变体(或方言)-扩展-私有(https://www.zhihu.com/question/21980689/ answer/93615123)(参数个数为0或1) -ltc [语言代码], --list-translation-codes [语言代码] @@ -573,7 +584,7 @@ Auditok的选项: 语言代码。否则会给出一个“好的匹配”的清单。同样的参考文档如上。(参数个数为0或1) -dsl 路径, --detect-sub-language 路径 使用py-googletrans去检测一个字幕文件的第一行的语言。并列出一个和该语言匹配的推荐Google - Speech V2语言代码清单("-S"/"--speech- + Speech-to-Text语言代码清单("-S"/"--speech- language"选项所用到的)。参考:https://cloud.google.com/speech- to-text/docs/languages(参数个数为1)(默认参数 None) From 9b60e4e17a4e6bc7ebc9cb9308cf135ba29396fd Mon Sep 17 00:00:00 2001 From: BingLingGroup <42505588+BingLingGroup@users.noreply.github.com> Date: Tue, 28 Jan 2020 12:50:52 +0800 Subject: [PATCH 05/10] Refactor cmdline_utils.py to remove extra file conversion Add Google Cloud Speech-to-Text support for .ogg --- autosub/__init__.py | 37 ++++--- autosub/cmdline_utils.py | 37 +------ autosub/constants.py | 9 +- autosub/core.py | 4 +- .../LC_MESSAGES/autosub.cmdline_utils.mo | Bin 9679 -> 9614 bytes .../LC_MESSAGES/autosub.cmdline_utils.po | 61 ++++++------ .../locale/zh_CN/LC_MESSAGES/autosub.core.mo | Bin 1740 -> 1801 bytes .../locale/zh_CN/LC_MESSAGES/autosub.core.po | 4 +- .../data/locale/zh_CN/LC_MESSAGES/autosub.mo | Bin 1283 -> 1139 bytes .../zh_CN/LC_MESSAGES/autosub.options.mo | Bin 23247 -> 23223 bytes .../zh_CN/LC_MESSAGES/autosub.options.po | 90 +++++++++--------- .../data/locale/zh_CN/LC_MESSAGES/autosub.po | 30 +++--- autosub/options.py | 9 +- 13 files changed, 134 insertions(+), 147 deletions(-) diff --git a/autosub/__init__.py b/autosub/__init__.py index c9bfa75e..d9255d4e 100644 --- a/autosub/__init__.py +++ b/autosub/__init__.py @@ -74,6 +74,9 @@ def main(): # pylint: disable=too-many-branches, too-many-statements, too-many- ffmpeg_cmd = ffmpeg_cmd + ' ' + cmdline_utils.fix_args(args, + ffmpeg_cmd=ffmpeg_cmd) + if args.audio_process: args.audio_process = {k.lower() for k in args.audio_process} args.audio_process = \ @@ -115,31 +118,39 @@ def main(): # pylint: disable=too-many-branches, too-many-statements, too-many- input_m=input_m, ffmpeg_cmd=ffmpeg_cmd ) + args.audio_split_cmd = \ + args.audio_split_cmd.replace( + "-vn -ac [channel] -ar [sample_rate] ", "") if not prcs_file: - no_audio_prcs = False + print(_("Audio pre-processing failed." + "\nUse default method")) else: args.input = prcs_file print(_("Audio pre-processing complete.")) - no_audio_prcs = True - elif 'n' in args.audio_process: - print(_("No extra check/conversion " - "before the speech-to-text procedure.")) - no_audio_prcs = True - else: - no_audio_prcs = False else: - no_audio_prcs = False + args.audio_split_cmd = \ + args.audio_split_cmd.replace( + "[channel]", + "{channel}".format(channel=args.api_audio_channel)) + args.audio_split_cmd = \ + args.audio_split_cmd.replace( + "[sample_rate]", + "{sample_rate}".format(sample_rate=args.api_sample_rate)) + + if args.api_suffix == ".ogg": + # regard ogg as ogg_opus + args.audio_split_cmd = \ + args.audio_split_cmd.replace( + "-vn", + "-vn -c:a libopus") cmdline_utils.validate_aovp_args(args) - cmdline_utils.fix_args(args, - ffmpeg_cmd=ffmpeg_cmd) fps = cmdline_utils.get_fps(args=args, input_m=input_m) cmdline_utils.audio_or_video_prcs(args, fps=fps, input_m=input_m, - styles_list=styles_list, - no_audio_prcs=no_audio_prcs) + styles_list=styles_list) elif validate_result == 1: cmdline_utils.validate_sp_args(args) diff --git a/autosub/cmdline_utils.py b/autosub/cmdline_utils.py index a7bd8589..400813c2 100644 --- a/autosub/cmdline_utils.py +++ b/autosub/cmdline_utils.py @@ -763,8 +763,7 @@ def audio_or_video_prcs( # pylint: disable=too-many-branches, too-many-statemen args, input_m=input, fps=30.0, - styles_list=None, - no_audio_prcs=False): + styles_list=None): """ Give args and process an input audio or video file. """ @@ -883,34 +882,8 @@ def audio_or_video_prcs( # pylint: disable=too-many-branches, too-many-statemen except KeyError: pass - if not no_audio_prcs: - audio_for_api_temp = tempfile.NamedTemporaryFile( - suffix=args.api_suffix, - delete=False) - audio_for_api = audio_for_api_temp.name - audio_for_api_temp.close() - command = args.audio_conversion_cmd.format( - in_=args.input, - channel=args.api_audio_channel, - sample_rate=args.api_sample_rate, - out_=audio_for_api - ) - print(_("\nConvert to \"{name}\" " - "for API.").format( - name=audio_for_api)) - print(command) - subprocess.check_output( - constants.cmd_conversion(command), - stdin=open(os.devnull)) - if not ffmpeg_utils.ffprobe_check_file(audio_for_api): - raise exceptions.AutosubException( - _("Error: Convert source audio to \"{name}\" failed.").format( - name=audio_for_api)) - else: - audio_for_api = args.input - audio_fragments = core.bulk_audio_conversion( - source_file=audio_for_api, + source_file=args.input, output=args.output, regions=regions, split_cmd=args.audio_split_cmd, @@ -927,10 +900,6 @@ def audio_or_video_prcs( # pylint: disable=too-many-branches, too-many-statemen raise exceptions.ConversionException( _("Error: Conversion failed.")) - if not args.keep: - os.remove(audio_for_api) - print(_("\n\"{name}\" has been deleted.").format(name=audio_for_api)) - if args.audio_process and 's' in args.audio_process: raise exceptions.AutosubException( _("Audio processing complete.\nAll works done.")) @@ -958,7 +927,7 @@ def audio_or_video_prcs( # pylint: disable=too-many-branches, too-many-statemen {"Content-Type": "audio/x-flac; rate={rate}".format(rate=args.api_sample_rate)} else: headers = \ - {"Content-Type": "audio/aac; rate={rate}".format(rate=args.api_sample_rate)} + {"Content-Type": "audio/ogg; rate={rate}".format(rate=args.api_sample_rate)} text_list = core.gsv2_to_text( audio_fragments=audio_fragments, diff --git a/autosub/constants.py b/autosub/constants.py index 49a2c0fc..dda842c5 100644 --- a/autosub/constants.py +++ b/autosub/constants.py @@ -72,7 +72,7 @@ DEFAULT_MODE_SET = {'regions', 'src', 'dst', 'bilingual'} DEFAULT_SUB_MODE_SET = {'dst', 'bilingual'} DEFAULT_LANG_MODE_SET = {'s', 'src', 'd'} -DEFAULT_AUDIO_PRCS_MODE_SET = {'o', 's', 'y', 'n'} +DEFAULT_AUDIO_PRCS_MODE_SET = {'o', 's', 'y'} DEFAULT_AUDIO_PRCS = [ "ffmpeg -hide_banner -i \"{in_}\" -af \"asplit[a],aphasemeter=video=0,\ @@ -88,10 +88,11 @@ "ffmpeg -hide_banner -y -i \"{in_}\" -vn -ac {channel} -ar {sample_rate} \"{out_}\"" DEFAULT_AUDIO_SPLT = \ - "ffmpeg -y -ss {start} -i \"{in_}\" -t {dura} -loglevel error \"{out_}\"" + "ffmpeg -y -ss {start} -i \"{in_}\" -t {dura} " \ + "-vn -ac [channel] -ar [sample_rate] -loglevel error \"{out_}\"" -DEFAULT_VIDEO_FPS_CMD = "ffprobe -v 0 -of csv=p=0 -select_streams" \ - " v:0 -show_entries stream=r_frame_rate \"{in_}\"" +DEFAULT_VIDEO_FPS_CMD = "ffprobe -v 0 -of csv=p=0 -select_streams " \ + "v:0 -show_entries stream=r_frame_rate \"{in_}\"" SPEECH_TO_TEXT_LANGUAGE_CODES = { 'af-za': 'Afrikaans (South Africa)', diff --git a/autosub/core.py b/autosub/core.py index c055a5cf..370a1b85 100644 --- a/autosub/core.py +++ b/autosub/core.py @@ -213,9 +213,9 @@ def gcsv1_to_text( # pylint: disable=too-many-locals,too-many-arguments,too-man encoding = "MP3" elif audio_fragments[0].lower().endswith(".wav"): # regard WAV as PCM - encoding = "PCM" + encoding = "LINEAR16" elif audio_fragments[0].lower().endswith(".ogg"): - encoding = "OGG" + encoding = "OGG_OPUS" else: encoding = "" diff --git a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.cmdline_utils.mo b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.cmdline_utils.mo index 6e7fd10e746f579b5168dcb5c3ca84b0f44b4b02..2028fa8940a41b34ecbc9da4c8fd4e3624f4fba7 100644 GIT binary patch delta 1527 zcmYk+O-NKx6u|K_=GSOBYWY!`O=^CPqi@F1#wi_BNm9eU6pRXnloFvZlp>U&jG%}R z!otv%izq}4p;eKqHl?+YHj*JkxUhCX3&H;9y(e_x&hOqgZ|1&x&bjY%+vB$AYI^Lr z(3+>B1&#V10Sc;3BTz1*~;h3qM7T%m(W9V&dO|_mI%ZC)CtzqCQx5nw7#T zEN6ZS>v05+;9G3Qzo_fhrQ>Y|?-<;~;BG62bJ)Rr1y7)7k4P&Hq0Seuz(Ia9>>40^ zY))FR20L*-K0;mi6>3C&;sJDKSrIJF;yE<5Ra~gS5Nb#V(7-8d#3j^RrV(EaaSb23 z!w#IqVLXT)3a1QDVg=qp{r&|mU>9N1@3RP79yaC>2fZ=E1r7BW>cSsU7uvLM%;0ET zWgfr?PGJvjp>Cju?dl))Bjq9wQUCWD9>NdEt+IhyD^70KjryYubm2=_gcEoL7qA^m zh*v)j;Q+3orsk~MdeCV+%6u8Um_`_M!7y@3dXcEhebfz2p+3MHtjFkg28S8sk)T@a zLam80e1ORX{20ek7sw?Kr?C~a7$p0i@KUehrx(*T&l=fdbWOD$U2SAi z8Z3 z7RtwFtpRiJB&v5FqI%^G>H&VBDzhMo`p;%kkrXK90aS@wF$ddFrG12%IE-sBmTWrH zDpca*d}yZ^@c|Ct9BiP0w2>ZMg3nOjkKzlwn#}ffLk*8P3p-Ia-b9sr2$eXBdMeQ> z)P2>s509eWmSNnD#cWd>xP`i}4`~}4ME&juOvP}%F zBGeN!z5Aaq7&WN8@y5C5n5zp69a#)9QHy^@IP2`o_d`2UN}HRG?P+OsTALkTTT@-* uo>M-jzPZII*|e@dJuE7=zb+v?C~4&Bb1sYy^nLBV$$b3I{_}}pv3~$144ugU diff --git a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.cmdline_utils.po b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.cmdline_utils.po index 61548151..afa6fb46 100644 --- a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.cmdline_utils.po +++ b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.cmdline_utils.po @@ -7,7 +7,7 @@ msgid "" msgstr "" "Project-Id-Version: \n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2020-01-22 21:01+0800\n" +"POT-Creation-Date: 2020-01-28 11:58+0800\n" "PO-Revision-Date: 2020-01-23 16:51+0800\n" "Last-Translator: \n" "Language-Team: \n" @@ -263,7 +263,7 @@ msgstr "" "你输入的最大连续安静区域{mxcs}比0还小。\n" "现在重置为{dmxcs}。" -#: autosub/cmdline_utils.py:597 autosub/cmdline_utils.py:774 +#: autosub/cmdline_utils.py:597 autosub/cmdline_utils.py:773 msgid "" "\n" "No works done. Check your \"-of\"/\"--output-files\" option." @@ -271,16 +271,16 @@ msgstr "" "\n" "啥都没做。检查你的\"-of\"/\"--output-files\"选项。" -#: autosub/cmdline_utils.py:642 autosub/cmdline_utils.py:1101 +#: autosub/cmdline_utils.py:642 autosub/cmdline_utils.py:1070 msgid "Error: Translation failed." msgstr "错误:翻译失败。" -#: autosub/cmdline_utils.py:687 autosub/cmdline_utils.py:1137 +#: autosub/cmdline_utils.py:687 autosub/cmdline_utils.py:1106 msgid "Bilingual subtitles file created at \"{}\"." msgstr "双语字幕创建在了\"{}\"。" -#: autosub/cmdline_utils.py:691 autosub/cmdline_utils.py:881 -#: autosub/cmdline_utils.py:1072 autosub/cmdline_utils.py:1141 +#: autosub/cmdline_utils.py:691 autosub/cmdline_utils.py:880 +#: autosub/cmdline_utils.py:1041 autosub/cmdline_utils.py:1110 msgid "" "\n" "All works done." @@ -288,15 +288,15 @@ msgstr "" "\n" "做完了。" -#: autosub/cmdline_utils.py:730 autosub/cmdline_utils.py:1180 +#: autosub/cmdline_utils.py:730 autosub/cmdline_utils.py:1149 msgid "Destination language subtitles file created at \"{}\"." msgstr "目的语言字幕文件创建在了\"{}\"。" -#: autosub/cmdline_utils.py:780 +#: autosub/cmdline_utils.py:779 msgid "Use external speech regions." msgstr "使用外部时间轴。" -#: autosub/cmdline_utils.py:819 +#: autosub/cmdline_utils.py:818 msgid "" "\n" "Convert source audio to \"{name}\" to get audio length and detect audio " @@ -305,11 +305,11 @@ msgstr "" "\n" "将源音频转换为\"{name}\"来检测语音区域。" -#: autosub/cmdline_utils.py:829 autosub/cmdline_utils.py:907 +#: autosub/cmdline_utils.py:828 msgid "Error: Convert source audio to \"{name}\" failed." msgstr "错误:转换源音频至\"{name}\"失败。" -#: autosub/cmdline_utils.py:841 autosub/cmdline_utils.py:932 +#: autosub/cmdline_utils.py:840 msgid "" "\n" "\"{name}\" has been deleted." @@ -317,27 +317,19 @@ msgstr "" "\n" "\"{name}\"已被删除。" -#: autosub/cmdline_utils.py:845 +#: autosub/cmdline_utils.py:844 msgid "Error: Can't get speech regions." msgstr "错误:无法得到语音区域。" -#: autosub/cmdline_utils.py:878 autosub/cmdline_utils.py:1256 +#: autosub/cmdline_utils.py:877 autosub/cmdline_utils.py:1225 msgid "Times file created at \"{}\"." msgstr "时间轴文件创建在了\"{}\"." -#: autosub/cmdline_utils.py:898 -msgid "" -"\n" -"Convert to \"{name}\" for API." -msgstr "" -"\n" -"为API转换为\"{name}\"。" - -#: autosub/cmdline_utils.py:928 +#: autosub/cmdline_utils.py:901 msgid "Error: Conversion failed." msgstr "错误:转换失败。" -#: autosub/cmdline_utils.py:936 +#: autosub/cmdline_utils.py:905 msgid "" "Audio processing complete.\n" "All works done." @@ -345,22 +337,22 @@ msgstr "" "音频处理完毕。\n" "做完了。" -#: autosub/cmdline_utils.py:982 +#: autosub/cmdline_utils.py:951 msgid "Use the API key given in the option \"-skey\"/\"--speech-key\"." msgstr "使用选项\"-skey\"/\"--speech-key\"提供的API密钥。" -#: autosub/cmdline_utils.py:996 +#: autosub/cmdline_utils.py:965 msgid "" "Set the GOOGLE_APPLICATION_CREDENTIALS given in the option \"-sa\"/\"--" "service-account\"." msgstr "" "设置选项\"-sa\"/\"--service-account\"提供的GOOGLE_APPLICATION_CREDENTIALS。" -#: autosub/cmdline_utils.py:1010 +#: autosub/cmdline_utils.py:979 msgid "Use the GOOGLE_APPLICATION_CREDENTIALS in the environment variables." msgstr "使用环境变量中的GOOGLE_APPLICATION_CREDENTIALS。" -#: autosub/cmdline_utils.py:1022 +#: autosub/cmdline_utils.py:991 msgid "" "No available GOOGLE_APPLICATION_CREDENTIALS. Use \"-sa\"/\"--service-account" "\" to set one." @@ -368,7 +360,7 @@ msgstr "" "没有可用的GOOGLE_APPLICATION_CREDENTIALS。使用选项\"-sa\"/\"--service-account" "\"来设置一个。" -#: autosub/cmdline_utils.py:1030 +#: autosub/cmdline_utils.py:999 msgid "" "Error: Speech-to-text failed.\n" "All works done." @@ -376,11 +368,11 @@ msgstr "" "错误:语音转文字失败。\n" "做完了。" -#: autosub/cmdline_utils.py:1068 autosub/cmdline_utils.py:1222 +#: autosub/cmdline_utils.py:1037 autosub/cmdline_utils.py:1191 msgid "Speech language subtitles file created at \"{}\"." msgstr "语音字幕文件创建在了\"{}\"。" -#: autosub/cmdline_utils.py:1189 +#: autosub/cmdline_utils.py:1158 msgid "" "Override \"-of\"/\"--output-files\" due to your args too few.\n" "Output source subtitles file only." @@ -388,7 +380,7 @@ msgstr "" "因为你其他参数输入得太少了,忽略\"-of\"/\"--output-files\"参数。\n" "只输出源语言字幕文件。" -#: autosub/cmdline_utils.py:1227 +#: autosub/cmdline_utils.py:1196 msgid "" "Override \"-of\"/\"--output-files\" due to your args too few.\n" "Output regions subtitles file only." @@ -396,6 +388,13 @@ msgstr "" "因为你其他参数输入得太少了,忽略\"-of\"/\"--output-files\"参数。\n" "只输出时间轴文件。" +#~ msgid "" +#~ "\n" +#~ "Convert to \"{name}\" for API." +#~ msgstr "" +#~ "\n" +#~ "为API转换为\"{name}\"。" + #~ msgid "Bilingual subtitles file created at \"{}\"" #~ msgstr "双语字幕创建在了\"{}\"" diff --git a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.core.mo b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.core.mo index 7c8c0aed8ba086221e3430c330c1784d1bf2680b..674a4784dbc4231b607a409ff49a7b7027f17eee 100644 GIT binary patch delta 210 zcmX@Z+sQYfr(T(nfngUj1A{FC1H(NQ28K`|EyK#d-~ptIfV32l-VLPrf%G*X%?qTT z0cmX@{RK!H0%-*{1_nKTp>su_V<>!N|bKK-a)f c*T_P_(8$Wba5D!BJLBYejEb9OST`^N0B{&1IRF3v delta 161 zcmeC=JHtDnr~VfM1H&$61_oOoy_JQ5!4XJ*2GSlt+LV=nK?+FM0cn09JrhXt0_l}N zS{q1j2hxT>`Z1991k$2x3=9%LIvz-i0qF`Ltp}w0fiy_pJ|N8qqu^0^g1q}WRKW;EfQZ|Du7R)A@{0|~gS0>8r z`#$yU^Yp&;xPh-RxJLYQVN8lqkAk!%H8%>=ol5kf6lFA#I5shleN_JoY~Te$7}iD7 z>pqb(wlN`6kqJwbhYe=&gzC^6Ynb$lSlC7Gh(R(vr!{luRkc}1Z-t75T*1y2 bouqA7%g*7b|1@j9_T1xZzj}_AkKtJU8doG4 delta 375 zcmXBM&r8B!9LMoz%YzNH@LJSEUb-w>Q8t5!h|WQ$PEqR#qu6Aw@TO%D5)u^_4MF=M zbx0y5D47375J8u=rME)x+-LN_o7eM;Z`+?0(w`w;fnW)!A8n%mvXHlaP%rvL9h`?Ba0XJIkRV)vVceRlVF5X}t?7(3d^8F8- zhI2S;a2D2J4{SrsBYxC<@*L)br}r2qvy`E)QYFrmt%#zjyG&1RYARi08=B6j5oa`+ zU`&aNh9(;LOcT1Mu=O4NEWj@XC0QJeieoaBreadGuJRpjv3TEnthP$^X5+y$ DHhyVI diff --git a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.options.mo b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.options.mo index 9961788807a7731693962ad6f1a907a96d6dbbb3..bffb52f23c42ec5b707914e38d72531a27c3da97 100644 GIT binary patch delta 1668 zcmZ|Pe@xVM9LMq3p&W8*h6pMeemOHtIXVtV0#r&;2u->We;1d0HB3N{lQN@YB(XLM z*R7(Vvbj(f9Kn&6Vwc%&C3JvLQx6JNq9#KY61m+)(B!|LghLH`$MkDItlnnU|9 zGbFy6dbd=L^Kc6Nby$E6DN+vY$7a%pSCJCBIa5(B=i)aSTqNeFO5MbTv!p-pGo*$t zrb(`GDDJ?s^Ph-8 z+i?$Wz+p^JV7JU-4_vlDx)(c;+B%7(=px!Xx`xc9+gOKbhEul{Bg9{!z2ntc&iESS z=BX*l#Xc_Da0n-GH=g!NPh$E)i6-ri+l{3z>3s~){v)gMVdf&I;99&wycO-bi|&&y z;5%r?8?&W8yn^l$@O_(W;jpa8gfT2k{&~ZP~r#&B66ZtE@321n@s1nq~{T^4l)o+^_n zdFq!QmU0<5ox^;ZgwjW)X8a-UCRS%3$IEEqxA+W}tdVx&F$`g1g;YhoT}X;7j#4Jx zM*F>)Tjdyt#xJVk)?*e4ZCHo>7)GsiCT>ByPKQ29u!?+Ze zaGQ9_szo0g>_l#YqE@wY@heWFA^!>Kb1cJlbO$MdUHCi(Ih!#Y#t-qS8t0j(^734! zaIlsa4d-o=ZqdJVvvU^a^0K*!7h)PN!5Q|eQWo#1MtcDp<8gi5=i-KO7X1g%K3s3$ zB=gbS?)X&1?ss>&a$%si55F5?zq?;a z&ZGsdWNRSlRzlRf)th5@w|k9H*fg5=MjD!%LVE~y?Qb-jA}vOv8ER^1+GQMQ z-f!#(H5p;EvANYWBF~zQxrX=PL3?ytn=?FY?%UrGHtYX?cy{E@0_Cxm%vshoPi3kV z&h&Z`OrQ1UVk@zt#ts8rvGK|RYjJ*ky7g1#pl8zf@k7?pnt=OH2Zk|B delta 1639 zcmY+@e@xV69Ki9HZvgh&^;_@Qp;(zYy}&9(Jc@x6{MW3J8qiuL~Pd;i34yq?eVdA^_L`F@}0 zxjtg}am3K}mUB!*vO`5a!oV;Qqd}wwgNa?UX=5`U!;m>53i&1s!2WQN=jk7qE5c81 z^Dhe@VKDjVc_OJ8A0e`m{x|1~#Nr90g!If8r);Egl@0>~|HNkEzZQth;KvI^s3Auq zMOcHJMh{-V#kgaU$Rt+aL!QrzVlr*QXr7_p93!#~Z}|;lc5R_WScc`XPLVw{zNh1D zOn5=05M8)0K;#$H3ZyR;c?lbl+A@HoqvztGj%e z$W81-E#8|*8-KtdG`#073Fvfgq-0c*2)$CcS|550>xrlFIND!f=kY1FVRf=TE|hhA zOI(B6xQrB$P1L)IA;dGO`jo+IMY@Q4F^hcWIuUK&##cE7Y`ollP2cUuxP{nK=5 zmLZbEt{=}7N#en~CU%+u|8S^nc*(50fYoUeyU>Dd_%1%faxBT_^x!QVpkDk|{e|vx zl$OvRxm~|EseX4l{f%_I+)IPdS$8{u#06XfC@5__>0L7>S#4u69+5{fUD(n}p3D*Z6h${R9`1{}i>u zbr}Qknm_&uwFz$c<2&wqL5YSC_tT)n2zKLte}(Sl#wJ6Fd)SyBp(Qvb!cIx37P$X4 zx`MREN_WGorG_X^->fGAYjZ7@{ffh;l-mv+vQ;ULgBIm*wd0`8s@N^%7Q4k-X8AyQ z-%?#=v+q~-*$;eJX|Xyy;bEl<65_4#Y08$oe3Lm#$<0&D#Rd79ifOCaw0*THFSp2? zzdI){*ZtGtW$t-lksjBQIia4jFAoQ~tx3_w6eW3GTB@fdY0q=(r@Cr=m+PkLM${Wu z)Yfiq*Qq}`PUwqJ&(!;BU2218>g-kDPz#~=+%>ha-Rn8`du_e@K~A!#GG+gwQkF73 z-l2B&`-TRmx;?&$FTCfQ)WOdvdpjm4 u+O;t~Jq-nGjQ;^MTTI3P diff --git a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.options.po b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.options.po index 300f0638..715ad9c4 100644 --- a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.options.po +++ b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.options.po @@ -7,8 +7,8 @@ msgid "" msgstr "" "Project-Id-Version: \n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2020-01-23 16:43+0800\n" -"PO-Revision-Date: 2020-01-23 16:44+0800\n" +"POT-Creation-Date: 2020-01-28 11:58+0800\n" +"PO-Revision-Date: 2020-01-28 12:04+0800\n" "Last-Translator: \n" "Language-Team: \n" "MIME-Version: 1.0\n" @@ -146,7 +146,7 @@ msgid "List all available arguments." msgstr "列出所有可选参数。" #: autosub/options.py:102 autosub/options.py:112 autosub/options.py:122 -#: autosub/options.py:211 autosub/options.py:422 autosub/options.py:637 +#: autosub/options.py:211 autosub/options.py:422 autosub/options.py:638 msgid "path" msgstr "路径" @@ -199,7 +199,7 @@ msgstr "" "言行使用第二个。(参数个数为1或2)" #: autosub/options.py:151 autosub/options.py:163 autosub/options.py:174 -#: autosub/options.py:607 autosub/options.py:624 +#: autosub/options.py:608 autosub/options.py:625 msgid "lang_code" msgstr "语言代码" @@ -374,8 +374,8 @@ msgid "" msgstr "" "用于Speech-to-Text请求的并行数量。(参数个数为1)(默认参数为%(default)s)" -#: autosub/options.py:313 autosub/options.py:551 autosub/options.py:561 -#: autosub/options.py:571 +#: autosub/options.py:313 autosub/options.py:552 autosub/options.py:562 +#: autosub/options.py:572 msgid "second" msgstr "秒" @@ -495,39 +495,37 @@ msgid "" "conversion work. \"y\": pre-process the input first then start normal " "workflow. If succeed, no more conversion before the speech-to-text " "procedure. \"o\": only pre-process the input audio. (\"-k\"/\"--keep\" is " -"true) \"s\": only split the input audio. (\"-k\"/\"--keep\" is true) \"n\": " -"FORCED NO EXTRA CHECK/CONVERSION before the speech-to-text procedure. " -"Default command to pre-process the audio: {dft_1} | {dft_2} | {dft_3} (Ref: " -"https://github.com/stevenj/autosub/blob/master/scripts/subgen.sh https://" -"ffmpeg.org/ffmpeg-filters.html) (2 >= arg_num >= 1)" +"true) \"s\": only split the input audio. (\"-k\"/\"--keep\" is true) Default " +"command to pre-process the audio: {dft_1} | {dft_2} | {dft_3} (Ref: https://" +"github.com/stevenj/autosub/blob/master/scripts/subgen.sh https://ffmpeg.org/" +"ffmpeg-filters.html) (2 >= arg_num >= 1)" msgstr "" "控制音频处理的选项。如果没有提供选项,进行正常的格式转换工作。\"y\":它会先预" "处理输入文件,如果成功了,在语音转文字之前不会对音频进行额外的处理。\"o\":只" "会预处理输入音频。(\"-k\"/\"--keep\"选项自动置为真)\"s\":只会分割输入音" -"频。(\"-k\"/\"--keep\"选项自动置为真)\"n\":在语音转文字的步骤之前,强制去" -"除多余的格式检查或者转换工作。以下是用于处理音频的默认命令:{dft_1} | " -"{dft_2} | {dft_3}(参考:https://github.com/stevenj/autosub/blob/master/" +"频。(\"-k\"/\"--keep\"选项自动置为真)以下是用于处理音频的默认命令:{dft_1} " +"| {dft_2} | {dft_3}(参考:https://github.com/stevenj/autosub/blob/master/" "scripts/subgen.sh https://ffmpeg.org/ffmpeg-filters.html)(参数个数介于1和2" "之间)" -#: autosub/options.py:462 +#: autosub/options.py:460 msgid "Keep audio processing files to the output path. (arg_num = 0)" msgstr "将音频处理中产生的文件放在输出路径中。(参数个数为0)" -#: autosub/options.py:468 autosub/options.py:488 autosub/options.py:500 +#: autosub/options.py:466 autosub/options.py:486 autosub/options.py:501 msgid "command" msgstr "命令" -#: autosub/options.py:469 +#: autosub/options.py:467 msgid "" -"This arg will override the default audio process command. Every line of the " -"commands need to be in quotes. Input file name is {in_}. Output file name is " -"{out_}. (arg_num >= 1)" +"This arg will override the default audio pre-process command. Every line of " +"the commands need to be in quotes. Input file name is {in_}. Output file " +"name is {out_}. (arg_num >= 1)" msgstr "" -"这个参数会取代默认的音频处理命令。每行命令需要放在一个引号内。输入文件名写为" -"{in_}。输出文件名写为{out_}。(参数个数大于1)" +"这个参数会取代默认的音频预处理命令。每行命令需要放在一个引号内。输入文件名写" +"为{in_}。输出文件名写为{out_}。(参数个数大于1)" -#: autosub/options.py:482 +#: autosub/options.py:480 #, python-format msgid "" "Number of concurrent ffmpeg audio split process to make. (arg_num = 1) " @@ -535,16 +533,18 @@ msgid "" msgstr "" "用于ffmpeg音频切割的进程并行数量。(参数个数为1)(默认参数为%(default)s)" -#: autosub/options.py:489 +#: autosub/options.py:487 msgid "" "(Experimental)This arg will override the default audio conversion command. " -"Need to follow the python references keyword argument. Default command to " -"process the audio: {dft} (arg_num = 1)" +"\"[\", \"]\" are optional arguments meaning you can remove them. \"{{\", " +"\"}}\" are required arguments meaning you can't remove them. Default command " +"to process the audio: {dft} (arg_num = 1)" msgstr "" -"(实验性)这个参数会取代默认的音频转换命令。需要遵循原有的python参考关键词参" -"数写法。以下是用于处理音频的默认命令:{dft}(默认参数为1)" +"(实验性)这个参数会取代默认的音频转换命令。\"[\", \"]\" 是可选参数,可以移" +"除。\"{{\", \"}}\"是必选参数,不可移除。以下是用于处理音频的默认命令:{dft}" +"(默认参数为1)" -#: autosub/options.py:501 +#: autosub/options.py:502 msgid "" "(Experimental)This arg will override the default audio split command. Same " "attention above. Default: {dft} (arg_num = 1)" @@ -552,11 +552,11 @@ msgstr "" "(实验性)这个参数会取代默认的音频转换命令。相同的注意如上。默认:{dft}(参数" "个数为1)" -#: autosub/options.py:511 +#: autosub/options.py:512 msgid "file_suffix" msgstr "文件名后缀" -#: autosub/options.py:513 +#: autosub/options.py:514 #, python-format msgid "" "(Experimental)This arg will override the default API audio suffix. (arg_num " @@ -565,11 +565,11 @@ msgstr "" "(实验性)这个参数会取代默认的给API使用的音频文件后缀。(默认参数" "为%(default)s)" -#: autosub/options.py:520 +#: autosub/options.py:521 msgid "sample_rate" msgstr "采样率" -#: autosub/options.py:523 +#: autosub/options.py:524 #, python-format msgid "" "(Experimental)This arg will override the default API audio sample rate(Hz). " @@ -578,11 +578,11 @@ msgstr "" "(实验性)这个参数会取代默认的给API使用的音频采样率(赫兹)。(参数个数为1)" "(默认参数为%(default)s)" -#: autosub/options.py:530 +#: autosub/options.py:531 msgid "channel_num" msgstr "声道数" -#: autosub/options.py:533 +#: autosub/options.py:534 #, python-format msgid "" "(Experimental)This arg will override the default API audio channel. (arg_num " @@ -591,11 +591,11 @@ msgstr "" "(实验性)这个参数会取代默认的给API使用的音频声道数量。(参数个数为1)(默认" "参数为%(default)s)" -#: autosub/options.py:540 +#: autosub/options.py:541 msgid "energy" msgstr "能量(相对值)" -#: autosub/options.py:543 +#: autosub/options.py:544 #, python-format msgid "" "The energy level which determines the region to be detected. Ref: https://" @@ -606,7 +606,7 @@ msgstr "" "latest/apitutorial.html#examples-using-real-audio-data(参数个数为1)(默认参" "数为%(default)s)" -#: autosub/options.py:554 +#: autosub/options.py:555 #, python-format msgid "" "Minimum region size. Same docs above. (arg_num = 1) (default: %(default)s)" @@ -614,7 +614,7 @@ msgstr "" "最小语音区域大小。同样的参考文档如上。(参数个数为1)(默认参数" "为%(default)s)" -#: autosub/options.py:564 +#: autosub/options.py:565 #, python-format msgid "" "Maximum region size. Same docs above. (arg_num = 1) (default: %(default)s)" @@ -622,7 +622,7 @@ msgstr "" "最大音频区域大小。同样的参考文档如上。(参数个数为1)(默认参数" "为%(default)s)" -#: autosub/options.py:574 +#: autosub/options.py:575 #, python-format msgid "" "Maximum length of a tolerated silence within a valid audio activity. Same " @@ -631,7 +631,7 @@ msgstr "" "在一段有效的音频活动区域中可以容忍的最大(连续)安静区域。同样的参考文档如" "上。(参数个数为1)(默认参数为%(default)s)" -#: autosub/options.py:582 autosub/options.py:589 +#: autosub/options.py:583 autosub/options.py:590 msgid "" "Ref: https://auditok.readthedocs.io/en/latest/core.html#class-summary " "(arg_num = 0)" @@ -639,7 +639,7 @@ msgstr "" "参考:https://auditok.readthedocs.io/en/latest/core.html#class-summary(参数" "个数为0)" -#: autosub/options.py:596 +#: autosub/options.py:597 msgid "" "List all available subtitles formats. If your format is not supported, you " "can use ffmpeg or SubtitleEdit to convert the formats. You need to offer fps " @@ -650,7 +650,7 @@ msgstr "" "SubtitleEdit来对其进行转换。如果输出格式是\"sub\"且输入文件是音频无法获取到视" "频帧率时,你需要提供fps选项指定帧率。(参数个数为0)" -#: autosub/options.py:610 +#: autosub/options.py:611 msgid "" "List all recommended \"-S\"/\"--speech-language\" Google Speech-to-Text " "language codes. If no arg is given, list all. Or else will list get a group " @@ -668,7 +668,7 @@ msgstr "" "言)-扩展-私有(https://www.zhihu.com/question/21980689/answer/93615123)(参" "数个数为0或1)" -#: autosub/options.py:627 +#: autosub/options.py:628 msgid "" "List all available \"-SRC\"/\"--src-language\" py-googletrans translation " "language codes. Or else will list get a group of \"good match\" of the arg. " @@ -678,7 +678,7 @@ msgstr "" "言代码。否则会给出一个“好的匹配”的清单。同样的参考文档如上。(参数个数为0或" "1)" -#: autosub/options.py:638 +#: autosub/options.py:639 #, python-format msgid "" "Use py-googletrans to detect a sub file's first line language. And list a " diff --git a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.po b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.po index 6a147657..2e8c9954 100644 --- a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.po +++ b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.po @@ -7,7 +7,7 @@ msgid "" msgstr "" "Project-Id-Version: \n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2019-07-29 18:17+0800\n" +"POT-Creation-Date: 2020-01-28 11:58+0800\n" "PO-Revision-Date: 2019-07-28 10:36+0800\n" "Last-Translator: \n" "Language-Team: \n" @@ -17,7 +17,7 @@ msgstr "" "Language: zh_CN\n" "X-Generator: Poedit 2.2.3\n" -#: autosub/__init__.py:54 autosub/__init__.py:162 +#: autosub/__init__.py:54 autosub/__init__.py:173 msgid "" "\n" "All works done." @@ -29,7 +29,7 @@ msgstr "" msgid "Error: Dependency ffmpeg not found on this machine." msgstr "错误:依赖ffmpeg未在这台电脑上找到。" -#: autosub/__init__.py:83 +#: autosub/__init__.py:86 msgid "" "Error: The args of \"-ap\"/\"--audio-process\" are wrong.\n" "No works done." @@ -37,11 +37,11 @@ msgstr "" "错误:\"-ap\"/\"--audio-process\"的参数有误。\n" "啥都没做。" -#: autosub/__init__.py:98 +#: autosub/__init__.py:101 msgid "No works done." msgstr "啥都没做。" -#: autosub/__init__.py:103 +#: autosub/__init__.py:106 msgid "" "Audio pre-processing complete.\n" "All works done." @@ -49,15 +49,18 @@ msgstr "" "音频预处理已完成。\n" "做完了。" -#: autosub/__init__.py:122 -msgid "Audio pre-processing complete." +#: autosub/__init__.py:125 +#, fuzzy +msgid "" +"Audio pre-processing failed.\n" +"Use default method" msgstr "音频预处理已完成。" -#: autosub/__init__.py:125 -msgid "No extra check/conversion before the speech-to-text procedure." -msgstr "在语音转文字之前不会有多余的检查/格式转换。" +#: autosub/__init__.py:129 +msgid "Audio pre-processing complete." +msgstr "音频预处理已完成。" -#: autosub/__init__.py:153 +#: autosub/__init__.py:164 msgid "" "\n" "KeyboardInterrupt. Works stopped." @@ -65,10 +68,13 @@ msgstr "" "\n" "键盘中断。操作终止。" -#: autosub/__init__.py:156 +#: autosub/__init__.py:167 msgid "" "\n" "Error: pysubs2.exceptions. Check your file format." msgstr "" "\n" "错误:pysubs2异常。检查你的文件格式。" + +#~ msgid "No extra check/conversion before the speech-to-text procedure." +#~ msgstr "在语音转文字之前不会有多余的检查/格式转换。" diff --git a/autosub/options.py b/autosub/options.py index 3219f8ee..ad700bfc 100644 --- a/autosub/options.py +++ b/autosub/options.py @@ -443,8 +443,6 @@ def get_cmd_args(): # pylint: disable=too-many-statements "(\"-k\"/\"--keep\" is true) " "\"s\": only split the input audio. " "(\"-k\"/\"--keep\" is true) " - "\"n\": FORCED NO EXTRA CHECK/CONVERSION " - "before the speech-to-text procedure. " "Default command to pre-process the audio: " "{dft_1} | {dft_2} | {dft_3} " "(Ref: " @@ -467,7 +465,7 @@ def get_cmd_args(): # pylint: disable=too-many-statements '-apc', '--audio-process-cmd', nargs='*', metavar=_('command'), help=_("This arg will override the default " - "audio process command. " + "audio pre-process command. " "Every line of the commands need to be in quotes. " "Input file name is {in_}. " "Output file name is {out_}. " @@ -488,7 +486,10 @@ def get_cmd_args(): # pylint: disable=too-many-statements metavar=_('command'), help=_("(Experimental)This arg will override the default " "audio conversion command. " - "Need to follow the python references keyword argument. " + "\"[\", \"]\" are optional arguments " + "meaning you can remove them. " + "\"{{\", \"}}\" are required arguments " + "meaning you can't remove them. " "Default command to process the audio: " "{dft} " "(arg_num = 1)").format( From 27cd6c87375e8201fe98b8eaf24b2a436bc26095 Mon Sep 17 00:00:00 2001 From: BingLingGroup <42505588+BingLingGroup@users.noreply.github.com> Date: Fri, 31 Jan 2020 12:22:40 +0800 Subject: [PATCH 06/10] Add (close #72) more bilingual subtitles formats output support Fix output format limits when input is a subtitles file Docs update translations, changelog --- CHANGELOG.md | 5 + autosub/__init__.py | 20 +- autosub/cmdline_utils.py | 502 +++++++++++------- autosub/constants.py | 7 +- autosub/core.py | 196 ++++--- .../LC_MESSAGES/autosub.cmdline_utils.mo | Bin 9614 -> 9803 bytes .../LC_MESSAGES/autosub.cmdline_utils.po | 158 +++--- .../locale/zh_CN/LC_MESSAGES/autosub.core.mo | Bin 1801 -> 1758 bytes .../locale/zh_CN/LC_MESSAGES/autosub.core.po | 32 +- .../zh_CN/LC_MESSAGES/autosub.ffmpeg_utils.mo | Bin 1614 -> 1614 bytes .../zh_CN/LC_MESSAGES/autosub.ffmpeg_utils.po | 16 +- .../LC_MESSAGES/autosub.lang_code_utils.mo | Bin 808 -> 808 bytes .../LC_MESSAGES/autosub.lang_code_utils.po | 16 +- .../data/locale/zh_CN/LC_MESSAGES/autosub.mo | Bin 1139 -> 1251 bytes .../zh_CN/LC_MESSAGES/autosub.options.mo | Bin 23223 -> 23751 bytes .../zh_CN/LC_MESSAGES/autosub.options.po | 237 +++++---- .../data/locale/zh_CN/LC_MESSAGES/autosub.po | 29 +- autosub/ffmpeg_utils.py | 9 +- autosub/lang_code_utils.py | 6 +- autosub/metadata.py | 3 +- autosub/options.py | 196 +++---- autosub/speech_trans_api.py | 6 +- autosub/sub_utils.py | 134 ++++- docs/CHANGELOG.zh-Hans.md | 7 +- 24 files changed, 916 insertions(+), 663 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 84279570..5944749f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,11 @@ Click up arrow to go back to TOC. #### Added(Unreleased) - Add Google Cloud Speech-to-Text support. [issue #10](https://github.com/BingLingGroup/autosub/issues/10) +- Add more bilingual subtitles formats output support. [issue #72](https://github.com/BingLingGroup/autosub/issues/72) + +#### Changed(Unreleased) + +- Fix output format limits when input is a subtitles file. ### [0.5.3-alpha] - 2019-12-30 diff --git a/autosub/__init__.py b/autosub/__init__.py index d9255d4e..6c3b7cb9 100644 --- a/autosub/__init__.py +++ b/autosub/__init__.py @@ -69,8 +69,7 @@ def main(): # pylint: disable=too-many-branches, too-many-statements, too-many- if not ffmpeg_cmd: raise exceptions.AutosubException( _("Error: Dependency ffmpeg" - " not found on this machine.") - ) + " not found on this machine.")) ffmpeg_cmd = ffmpeg_cmd + ' ' @@ -84,8 +83,7 @@ def main(): # pylint: disable=too-many-branches, too-many-statements, too-many- if not args.audio_process: raise exceptions.AutosubException( _("Error: The args of \"-ap\"/\"--audio-process\" are wrong." - "\nNo works done.") - ) + "\nNo works done.")) if 'o' in args.audio_process: args.keep = True prcs_file = ffmpeg_utils.audio_pre_prcs( @@ -94,17 +92,14 @@ def main(): # pylint: disable=too-many-branches, too-many-statements, too-many- cmds=args.audio_process_cmd, output_name=args.output, input_m=input_m, - ffmpeg_cmd=ffmpeg_cmd - ) + ffmpeg_cmd=ffmpeg_cmd) if not prcs_file: raise exceptions.AutosubException( - _("No works done.") - ) + _("No works done.")) else: args.input = prcs_file raise exceptions.AutosubException( - _("Audio pre-processing complete.\nAll works done.") - ) + _("Audio pre-processing complete.\nAll works done.")) if 's' in args.audio_process: args.keep = True @@ -116,14 +111,13 @@ def main(): # pylint: disable=too-many-branches, too-many-statements, too-many- cmds=args.audio_process_cmd, output_name=args.output, input_m=input_m, - ffmpeg_cmd=ffmpeg_cmd - ) + ffmpeg_cmd=ffmpeg_cmd) args.audio_split_cmd = \ args.audio_split_cmd.replace( "-vn -ac [channel] -ar [sample_rate] ", "") if not prcs_file: print(_("Audio pre-processing failed." - "\nUse default method")) + "\nUse default method.")) else: args.input = prcs_file print(_("Audio pre-processing complete.")) diff --git a/autosub/cmdline_utils.py b/autosub/cmdline_utils.py index 400813c2..bc5ee19d 100644 --- a/autosub/cmdline_utils.py +++ b/autosub/cmdline_utils.py @@ -6,24 +6,25 @@ # pylint: disable=too-many-lines # Import built-in modules from __future__ import absolute_import, print_function, unicode_literals + +import gettext import os -import tempfile import subprocess -import gettext +import tempfile # Import third-party modules -import pysubs2 import auditok import googletrans import langcodes +import pysubs2 # Any changes to the path and your own modules from autosub import constants from autosub import core +from autosub import exceptions from autosub import ffmpeg_utils -from autosub import sub_utils from autosub import lang_code_utils -from autosub import exceptions +from autosub import sub_utils CMDLINE_UTILS_TEXT = gettext.translation(domain=__name__, localedir=constants.LOCALE_PATH, @@ -134,8 +135,7 @@ def validate_io( # pylint: disable=too-many-branches, too-many-statements if args.styles_name: if len(args.styles_name) > 2: raise exceptions.AutosubException( - _("Error: Too many \"-sn\"/\"--styles-name\" arguments.") - ) + _("Error: Too many \"-sn\"/\"--styles-name\" arguments.")) else: style_obj = pysubs2.SSAFile.load(args.styles) ass_styles = style_obj.styles.get(args.styles_name[0]) @@ -148,16 +148,14 @@ def validate_io( # pylint: disable=too-many-branches, too-many-statements else: raise exceptions.AutosubException( _("Error: \"-sn\"/\"--styles-name\" " - "arguments aren't in \"{path}\".").format(path=args.styles) - ) + "arguments aren't in \"{path}\".").format(path=args.styles)) for item in styles_dict.items(): styles_list.append(item[0]) styles_list.append(item[1]) else: raise exceptions.AutosubException( _("Error: \"-sn\"/\"--styles-name\" " - "arguments aren't in \"{path}\".").format(path=args.styles) - ) + "arguments aren't in \"{path}\".").format(path=args.styles)) if args.ext_regions and not os.path.isfile(args.ext_regions): raise exceptions.AutosubException( @@ -215,8 +213,7 @@ def validate_io( # pylint: disable=too-many-branches, too-many-statements raise exceptions.AutosubException( _("Error: Output subtitles format \"{fmt}\" not supported. " "Run with \"-lf\"/\"--list-formats\" to see all supported formats.\n" - "Or use ffmpeg or SubtitleEdit to convert the formats.").format(fmt=args.format) - ) + "Or use ffmpeg or SubtitleEdit to convert the formats.").format(fmt=args.format)) args.output_files = set(args.output_files) if "all" in args.output_files: @@ -227,15 +224,13 @@ def validate_io( # pylint: disable=too-many-branches, too-many-statements constants.DEFAULT_MODE_SET if not args.output_files: raise exceptions.AutosubException( - _("Error: No valid \"-of\"/\"--output-files\" arguments.") - ) + _("Error: No valid \"-of\"/\"--output-files\" arguments.")) else: args.output_files = args.output_files & \ constants.DEFAULT_SUB_MODE_SET if not args.output_files: raise exceptions.AutosubException( - _("Error: No valid \"-of\"/\"--output-files\" arguments.") - ) + _("Error: No valid \"-of\"/\"--output-files\" arguments.")) if args.best_match: args.best_match = {k.lower() for k in args.best_match} @@ -259,8 +254,7 @@ def validate_aovp_args(args): # pylint: disable=too-many-branches, too-many-ret """ if args.sleep_seconds < 0 or args.lines_per_trans < 0: raise exceptions.AutosubException( - _("Error: \"-slp\"/\"--sleep-seconds\" arg is illegal.") - ) + _("Error: \"-slp\"/\"--sleep-seconds\" arg is illegal.")) if args.speech_language: # pylint: disable=too-many-nested-blocks if args.speech_api == "gsv2" or args.speech_api == "gcsv1": @@ -270,8 +264,7 @@ def validate_aovp_args(args): # pylint: disable=too-many-branches, too-many-ret print( _("Warning: Speech language \"{src}\" not recommended. " "Run with \"-lsc\"/\"--list-speech-codes\" " - "to see all supported languages.").format(src=args.speech_language) - ) + "to see all supported languages.").format(src=args.speech_language)) if args.best_match and 's' in args.best_match: best_result = lang_code_utils.match_print( dsr_lang=args.speech_language, @@ -289,26 +282,22 @@ def validate_aovp_args(args): # pylint: disable=too-many-branches, too-many-ret if args.min_confidence < 0.0 or args.min_confidence > 1.0: raise exceptions.AutosubException( - _("Error: The arg of \"-mnc\"/\"--min-confidence\" isn't legal.") - ) + _("Error: The arg of \"-mnc\"/\"--min-confidence\" isn't legal.")) else: raise exceptions.AutosubException( - _("Error: Wrong API code.") - ) + _("Error: Wrong API code.")) if args.dst_language is None: print( _("Destination language not provided. " - "Only performing speech recognition.") - ) + "Only performing speech recognition.")) else: if not args.src_language: print( _("Source language not provided. " - "Use Speech language instead.") - ) + "Use Speech language instead.")) args.src_language = args.speech_language if not args.best_match: args.best_match = {'src'} @@ -332,8 +321,7 @@ def validate_aovp_args(args): # pylint: disable=too-many-branches, too-many-ret print( _("Warning: Source language \"{src}\" not supported. " "Run with \"-lsc\"/\"--list-translation-codes\" " - "to see all supported languages.").format(src=args.src_language) - ) + "to see all supported languages.").format(src=args.src_language)) best_result = lang_code_utils.match_print( dsr_lang=args.src_language, match_list=list(googletrans.constants.LANGUAGES.keys()), @@ -354,8 +342,7 @@ def validate_aovp_args(args): # pylint: disable=too-many-branches, too-many-ret "Run with \"-lsc\"/\"--list-translation-codes\" " "to see all supported languages. " "Or use \"-bm\"/\"--best-match\" to get a best match.").format( - src=args.src_language) - ) + src=args.src_language)) if not is_dst_matched: if not args.gtransv2: @@ -363,8 +350,7 @@ def validate_aovp_args(args): # pylint: disable=too-many-branches, too-many-ret print( _("Warning: Destination language \"{dst}\" not supported. " "Run with \"-lsc\"/\"--list-translation-codes\" " - "to see all supported languages.").format(dst=args.dst_language) - ) + "to see all supported languages.").format(dst=args.dst_language)) best_result = lang_code_utils.match_print( dsr_lang=args.dst_language, match_list=list(googletrans.constants.LANGUAGES.keys()), @@ -385,15 +371,13 @@ def validate_aovp_args(args): # pylint: disable=too-many-branches, too-many-ret "Run with \"-lsc\"/\"--list-translation-codes\" " "to see all supported languages. " "Or use \"-bm\"/\"--best-match\" to get a best match.").format( - dst=args.dst_language) - ) + dst=args.dst_language)) if args.dst_language == args.speech_language \ or args.src_language == args.dst_language: print( _("Speech language is the same as the Destination language. " - "Only performing speech recognition.") - ) + "Only performing speech recognition.")) args.dst_language = None args.src_language = None @@ -402,22 +386,19 @@ def validate_aovp_args(args): # pylint: disable=too-many-branches, too-many-ret if not args.keep: raise exceptions.AutosubException( _("You've already input times. " - "No works done.") - ) + "No works done.")) else: print( _("Speech language not provided. " - "Only performing speech regions detection.") - ) + "Only performing speech regions detection.")) if args.styles == ' ': # when args.styles is used but without option # its value is ' ' if not args.ext_regions: raise exceptions.AutosubException( - _("Error: External speech regions file not provided.") - ) + _("Error: External speech regions file not provided.")) else: args.styles = args.ext_regions @@ -430,8 +411,7 @@ def validate_sp_args(args): # pylint: disable=too-many-branches,too-many-return if args.src_language: if args.dst_language is None: raise exceptions.AutosubException( - _("Error: Destination language not provided.") - ) + _("Error: Destination language not provided.")) is_src_matched = False is_dst_matched = False @@ -450,8 +430,7 @@ def validate_sp_args(args): # pylint: disable=too-many-branches,too-many-return print( _("Warning: Source language \"{src}\" not supported. " "Run with \"-lsc\"/\"--list-translation-codes\" " - "to see all supported languages.").format(src=args.src_language) - ) + "to see all supported languages.").format(src=args.src_language)) best_result = lang_code_utils.match_print( dsr_lang=args.src_language, match_list=list(googletrans.constants.LANGUAGES.keys()), @@ -471,8 +450,7 @@ def validate_sp_args(args): # pylint: disable=too-many-branches,too-many-return "Run with \"-lsc\"/\"--list-translation-codes\" " "to see all supported languages. " "Or use \"-bm\"/\"--best-match\" to get a best match.").format( - src=args.src_language) - ) + src=args.src_language)) if not is_dst_matched: if not args.gtransv2: @@ -480,8 +458,7 @@ def validate_sp_args(args): # pylint: disable=too-many-branches,too-many-return print( _("Warning: Destination language \"{dst}\" not supported. " "Run with \"-lsc\"/\"--list-translation-codes\" " - "to see all supported languages.").format(dst=args.dst_language) - ) + "to see all supported languages.").format(dst=args.dst_language)) best_result = lang_code_utils.match_print( dsr_lang=args.dst_language, match_list=list(googletrans.constants.LANGUAGES.keys()), @@ -501,26 +478,22 @@ def validate_sp_args(args): # pylint: disable=too-many-branches,too-many-return "Run with \"-lsc\"/\"--list-translation-codes\" " "to see all supported languages. " "Or use \"-bm\"/\"--best-match\" to get a best match.").format( - dst=args.dst_language) - ) + dst=args.dst_language)) if args.dst_language == args.src_language: raise exceptions.AutosubException( - _("Error: Source language is the same as the Destination language.") - ) + _("Error: Source language is the same as the Destination language.")) else: raise exceptions.AutosubException( - _("Error: Source language not provided.") - ) + _("Error: Source language not provided.")) if args.styles == ' ': # when args.styles is used but without option # its value is ' ' if not args.ext_regions: raise exceptions.AutosubException( - _("Error: External speech regions file not provided.") - ) + _("Error: External speech regions file not provided.")) else: args.styles = args.ext_regions @@ -535,24 +508,21 @@ def fix_args(args, print( _("Your minimum region size {mrs0} is smaller than {mrs}.\n" "Now reset to {mrs}.").format(mrs0=args.min_region_size, - mrs=constants.MIN_REGION_SIZE) - ) + mrs=constants.MIN_REGION_SIZE)) args.min_region_size = constants.MIN_REGION_SIZE if args.max_region_size > constants.MAX_EXT_REGION_SIZE: print( _("Your maximum region size {mrs0} is larger than {mrs}.\n" "Now reset to {mrs}.").format(mrs0=args.max_region_size, - mrs=constants.MAX_EXT_REGION_SIZE) - ) + mrs=constants.MAX_EXT_REGION_SIZE)) args.max_region_size = constants.MAX_EXT_REGION_SIZE if args.max_continuous_silence < 0: print( _("Your maximum continuous silence {mxcs} is smaller than 0.\n" "Now reset to {dmxcs}.").format(mxcs=args.max_continuous_silence, - dmxcs=constants.DEFAULT_CONTINUOUS_SILENCE) - ) + dmxcs=constants.DEFAULT_CONTINUOUS_SILENCE)) args.max_continuous_silence = constants.DEFAULT_CONTINUOUS_SILENCE if not args.audio_conversion_cmd: @@ -569,8 +539,7 @@ def fix_args(args, def get_timed_text( is_empty_dropped, regions, - text_list -): + text_list): """ Get timed text list. """ @@ -595,8 +564,7 @@ def subs_trans( # pylint: disable=too-many-branches, too-many-statements, too-m if not args.output_files: raise exceptions.AutosubException( _("\nNo works done." - " Check your \"-of\"/\"--output-files\" option.") - ) + " Check your \"-of\"/\"--output-files\" option.")) src_sub = pysubs2.SSAFile.load(args.input) text_list = [] @@ -624,8 +592,7 @@ def subs_trans( # pylint: disable=too-many-branches, too-many-statements, too-m concurrency=args.trans_concurrency, src_language=args.src_language, dst_language=args.dst_language, - lines_per_trans=args.lines_per_trans - ) + lines_per_trans=args.lines_per_trans) else: # use googletrans translated_text = core.list_to_googletrans( @@ -634,8 +601,7 @@ def subs_trans( # pylint: disable=too-many-branches, too-many-statements, too-m dst_language=args.dst_language, sleep_seconds=args.sleep_seconds, user_agent=args.user_agent, - service_urls=args.service_urls - ) + service_urls=args.service_urls) if not translated_text or len(translated_text) != len(text_list): raise exceptions.AutosubException( @@ -663,26 +629,25 @@ def subs_trans( # pylint: disable=too-many-branches, too-many-statements, too-m text_list=translated_text, style_name=styles_list[0]) - if args.format != 'ass.json': - bilingual_string = bilingual_sub.to_string(format_=args.format, fps=fps) - else: - bilingual_string = bilingual_sub.to_string(format_='json') + bilingual_string = core.ssafile_to_sub_str( + ssafile=bilingual_sub, + fps=fps, + subtitles_file_format=args.format) if args.format == 'mpl2': extension = 'mpl2.txt' else: extension = args.format - bilingual_name = "{base}.{nt}.{extension}".format(base=args.output, - nt=args.src_language + - '&' + args.dst_language, - extension=extension) + bilingual_name = "{base}.{nt}.{extension}".format( + base=args.output, + nt=args.src_language + '&' + args.dst_language, + extension=extension) subtitles_file_path = core.str_to_file( str_=bilingual_string, output=bilingual_name, - input_m=input_m - ) + input_m=input_m) # subtitles string to file print(_("Bilingual subtitles file " "created at \"{}\".").format(subtitles_file_path)) @@ -693,6 +658,110 @@ def subs_trans( # pylint: disable=too-many-branches, too-many-statements, too-m except KeyError: pass + try: + args.output_files.remove("dst-lf-src") + bilingual_sub = pysubs2.SSAFile() + bilingual_sub.styles = src_sub.styles + if args.styles and \ + len(styles_list) == 2 and \ + (args.format == 'ass' or + args.format == 'ssa' or + args.format == 'ass.json'): + sub_utils.pysubs2_ssa_event_add( + src_ssafile=src_sub, + dst_ssafile=bilingual_sub, + text_list=translated_text, + style_name=styles_list[2], + same_event_type=1) + else: + sub_utils.pysubs2_ssa_event_add( + src_ssafile=src_sub, + dst_ssafile=bilingual_sub, + text_list=translated_text, + style_name=styles_list[0], + same_event_type=1) + + bilingual_string = core.ssafile_to_sub_str( + ssafile=bilingual_sub, + fps=fps, + subtitles_file_format=args.format) + + if args.format == 'mpl2': + extension = 'mpl2.txt' + else: + extension = args.format + + bilingual_name = "{base}.{nt}.0.{extension}".format( + base=args.output, + nt=args.src_language + '&' + args.dst_language, + extension=extension) + + subtitles_file_path = core.str_to_file( + str_=bilingual_string, + output=bilingual_name, + input_m=input_m) + # subtitles string to file + print(_("\"dst-lf-src\" subtitles file " + "created at \"{}\".").format(subtitles_file_path)) + + if not args.output_files: + raise exceptions.AutosubException(_("\nAll works done.")) + + except KeyError: + pass + + try: + args.output_files.remove("src-lf-dst") + bilingual_sub = pysubs2.SSAFile() + bilingual_sub.styles = src_sub.styles + if args.styles and \ + len(styles_list) == 2 and \ + (args.format == 'ass' or + args.format == 'ssa' or + args.format == 'ass.json'): + sub_utils.pysubs2_ssa_event_add( + src_ssafile=src_sub, + dst_ssafile=bilingual_sub, + text_list=translated_text, + style_name=styles_list[2], + same_event_type=2) + else: + sub_utils.pysubs2_ssa_event_add( + src_ssafile=src_sub, + dst_ssafile=bilingual_sub, + text_list=translated_text, + style_name=styles_list[0], + same_event_type=2) + + bilingual_string = core.ssafile_to_sub_str( + ssafile=bilingual_sub, + fps=fps, + subtitles_file_format=args.format) + + if args.format == 'mpl2': + extension = 'mpl2.txt' + else: + extension = args.format + + bilingual_name = "{base}.{nt}.1.{extension}".format( + base=args.output, + nt=args.src_language + '&' + args.dst_language, + extension=extension) + + subtitles_file_path = core.str_to_file( + str_=bilingual_string, + output=bilingual_name, + input_m=input_m) + # subtitles string to file + print(_("\"src-lf-dst\" subtitles file " + "created at \"{}\".").format(subtitles_file_path)) + + if not args.output_files: + raise exceptions.AutosubException(_("\nAll works done.")) + + except KeyError: + pass + try: args.output_files.remove("dst") dst_sub = pysubs2.SSAFile() @@ -710,10 +779,11 @@ def subs_trans( # pylint: disable=too-many-branches, too-many-statements, too-m text_list=translated_text, style_name=styles_list[0]) - if args.format != 'ass.json': - dst_string = dst_sub.to_string(format_=args.format, fps=fps) - else: - dst_string = dst_sub.to_string(format_='json') + dst_string = core.ssafile_to_sub_str( + ssafile=dst_sub, + fps=fps, + subtitles_file_format=args.format) + if args.format == 'mpl2': extension = 'mpl2.txt' else: @@ -724,8 +794,7 @@ def subs_trans( # pylint: disable=too-many-branches, too-many-statements, too-m subtitles_file_path = core.str_to_file( str_=dst_string, output=dst_name, - input_m=input_m - ) + input_m=input_m) # subtitles string to file print(_("Destination language subtitles " "file created at \"{}\".").format(subtitles_file_path)) @@ -736,8 +805,7 @@ def subs_trans( # pylint: disable=too-many-branches, too-many-statements, too-m def get_fps( args, - input_m=input -): + input_m=input): """ Give args and get fps. """ @@ -771,8 +839,7 @@ def audio_or_video_prcs( # pylint: disable=too-many-branches, too-many-statemen if not args.output_files: raise exceptions.AutosubException( _("\nNo works done." - " Check your \"-of\"/\"--output-files\" option.") - ) + " Check your \"-of\"/\"--output-files\" option.")) if args.ext_regions: # use external speech regions @@ -784,16 +851,14 @@ def audio_or_video_prcs( # pylint: disable=too-many-branches, too-many-statemen in_=args.input, channel=1, sample_rate=16000, - out_=audio_wav - ) + out_=audio_wav) print(command) subprocess.check_output( constants.cmd_conversion(command), stdin=open(os.devnull)) regions = sub_utils.sub_to_speech_regions( audio_wav=audio_wav, - sub_file=args.ext_regions - ) + sub_file=args.ext_regions) os.remove(audio_wav) else: @@ -813,8 +878,7 @@ def audio_or_video_prcs( # pylint: disable=too-many-branches, too-many-statemen in_=args.input, channel=1, sample_rate=48000, - out_=audio_wav - ) + out_=audio_wav) print(_("\nConvert source audio to \"{name}\" " "to get audio length and detect audio regions.").format( name=audio_wav)) @@ -834,15 +898,13 @@ def audio_or_video_prcs( # pylint: disable=too-many-branches, too-many-statemen min_region_size=args.min_region_size, max_region_size=args.max_region_size, max_continuous_silence=args.max_continuous_silence, - mode=mode - ) + mode=mode) os.remove(audio_wav) print(_("\n\"{name}\" has been deleted.").format(name=audio_wav)) if not regions: raise exceptions.AutosubException( - _("Error: Can't get speech regions.") - ) + _("Error: Can't get speech regions.")) if args.speech_language or \ args.audio_process and 's' in args.audio_process: # process output first @@ -855,14 +917,12 @@ def audio_or_video_prcs( # pylint: disable=too-many-branches, too-many-statemen times_string = core.list_to_ass_str( text_list=regions, styles_list=styles_list, - subtitles_file_format=args.format - ) + subtitles_file_format=args.format) else: times_string = core.list_to_sub_str( timed_text=regions, fps=fps, - subtitles_file_format=args.format - ) + subtitles_file_format=args.format) # times to subtitles string times_name = "{base}.{nt}.{extension}".format(base=args.output, nt="times", @@ -870,8 +930,7 @@ def audio_or_video_prcs( # pylint: disable=too-many-branches, too-many-statemen subtitles_file_path = core.str_to_file( str_=times_string, output=times_name, - input_m=input_m - ) + input_m=input_m) # subtitles string to file print(_("Times file created at \"{}\".").format(subtitles_file_path)) @@ -889,8 +948,7 @@ def audio_or_video_prcs( # pylint: disable=too-many-branches, too-many-statemen split_cmd=args.audio_split_cmd, suffix=args.api_suffix, concurrency=args.audio_concurrency, - is_keep=args.keep - ) + is_keep=args.keep) if not audio_fragments or \ len(audio_fragments) != len(regions): @@ -936,8 +994,7 @@ def audio_or_video_prcs( # pylint: disable=too-many-branches, too-many-statemen regions=regions, concurrency=args.speech_concurrency, min_confidence=args.min_confidence, - is_keep=args.keep - ) + is_keep=args.keep) elif args.speech_api == "gcsv1": # Google Cloud speech-to-text V1P1Beta1 @@ -959,8 +1016,7 @@ def audio_or_video_prcs( # pylint: disable=too-many-branches, too-many-statemen concurrency=args.speech_concurrency, src_language=args.speech_language, min_confidence=args.min_confidence, - is_keep=args.keep - ) + is_keep=args.keep) elif args.service_account and os.path.isfile(args.service_account): print(_("Set the GOOGLE_APPLICATION_CREDENTIALS " "given in the option \"-sa\"/\"--service-account\".")) @@ -972,8 +1028,7 @@ def audio_or_video_prcs( # pylint: disable=too-many-branches, too-many-statemen concurrency=args.speech_concurrency, src_language=args.speech_language, min_confidence=args.min_confidence, - is_keep=args.keep - ) + is_keep=args.keep) else: if 'GOOGLE_APPLICATION_CREDENTIALS' in os.environ: print(_("Use the GOOGLE_APPLICATION_CREDENTIALS " @@ -985,8 +1040,7 @@ def audio_or_video_prcs( # pylint: disable=too-many-branches, too-many-statemen concurrency=args.speech_concurrency, src_language=args.speech_language, min_confidence=args.min_confidence, - is_keep=args.keep - ) + is_keep=args.keep) else: print(_("No available GOOGLE_APPLICATION_CREDENTIALS. " "Use \"-sa\"/\"--service-account\" to set one.")) @@ -1001,8 +1055,7 @@ def audio_or_video_prcs( # pylint: disable=too-many-branches, too-many-statemen timed_text = get_timed_text( is_empty_dropped=args.drop_empty_regions, regions=regions, - text_list=text_list - ) + text_list=text_list) if args.dst_language: # process output first @@ -1015,14 +1068,12 @@ def audio_or_video_prcs( # pylint: disable=too-many-branches, too-many-statemen src_string = core.list_to_ass_str( text_list=timed_text, styles_list=styles_list[:2], - subtitles_file_format=args.format, - ) + subtitles_file_format=args.format, ) else: src_string = core.list_to_sub_str( timed_text=timed_text, fps=fps, - subtitles_file_format=args.format - ) + subtitles_file_format=args.format) # formatting timed_text to subtitles string src_name = "{base}.{nt}.{extension}".format(base=args.output, @@ -1031,8 +1082,7 @@ def audio_or_video_prcs( # pylint: disable=too-many-branches, too-many-statemen subtitles_file_path = core.str_to_file( str_=src_string, output=src_name, - input_m=input_m - ) + input_m=input_m) # subtitles string to file print(_("Speech language subtitles " "file created at \"{}\".").format(subtitles_file_path)) @@ -1052,8 +1102,7 @@ def audio_or_video_prcs( # pylint: disable=too-many-branches, too-many-statemen concurrency=args.trans_concurrency, src_language=args.src_language, dst_language=args.dst_language, - lines_per_trans=args.lines_per_trans - ) + lines_per_trans=args.lines_per_trans) else: # use googletrans translated_text = core.list_to_googletrans( @@ -1062,19 +1111,12 @@ def audio_or_video_prcs( # pylint: disable=too-many-branches, too-many-statemen dst_language=args.dst_language, sleep_seconds=args.sleep_seconds, user_agent=args.user_agent, - service_urls=args.service_urls - ) + service_urls=args.service_urls) if not translated_text or len(translated_text) != len(regions): raise exceptions.AutosubException( _("Error: Translation failed.")) - timed_trans = get_timed_text( - is_empty_dropped=args.drop_empty_regions, - regions=regions, - text_list=translated_text - ) - try: args.output_files.remove("bilingual") if args.styles and \ @@ -1082,26 +1124,35 @@ def audio_or_video_prcs( # pylint: disable=too-many-branches, too-many-statemen args.format == 'ssa' or args.format == 'ass.json'): bilingual_string = core.list_to_ass_str( - text_list=[timed_text, timed_trans], + text_list=[timed_text, translated_text], styles_list=styles_list, - subtitles_file_format=args.format, - ) + subtitles_file_format=args.format, ) else: - bilingual_string = core.list_to_sub_str( - timed_text=timed_text + timed_trans, + bilingual_sub = pysubs2.SSAFile() + sub_utils.pysubs2_ssa_event_add( + src_ssafile=None, + dst_ssafile=bilingual_sub, + text_list=timed_text, + style_name=None) + sub_utils.pysubs2_ssa_event_add( + src_ssafile=bilingual_sub, + dst_ssafile=bilingual_sub, + text_list=translated_text, + style_name=None, + same_event_type=0) + bilingual_string = core.ssafile_to_sub_str( + ssafile=bilingual_sub, fps=fps, - subtitles_file_format=args.format - ) + subtitles_file_format=args.format) # formatting timed_text to subtitles string - bilingual_name = "{base}.{nt}.{extension}".format(base=args.output, - nt=args.src_language + - '&' + args.dst_language, - extension=args.format) + bilingual_name = "{base}.{nt}.{extension}".format( + base=args.output, + nt=args.src_language + '&' + args.dst_language, + extension=args.format) subtitles_file_path = core.str_to_file( str_=bilingual_string, output=bilingual_name, - input_m=input_m - ) + input_m=input_m) # subtitles string to file print(_("Bilingual subtitles file " "created at \"{}\".").format(subtitles_file_path)) @@ -1112,8 +1163,109 @@ def audio_or_video_prcs( # pylint: disable=too-many-branches, too-many-statemen except KeyError: pass + try: + args.output_files.remove("dst-lf-src") + if args.styles and \ + (args.format == 'ass' or + args.format == 'ssa' or + args.format == 'ass.json'): + bilingual_string = core.list_to_ass_str( + text_list=[timed_text, translated_text], + styles_list=styles_list, + subtitles_file_format=args.format, + same_event_type=1) + else: + bilingual_sub = pysubs2.SSAFile() + src_sub = pysubs2.SSAFile() + sub_utils.pysubs2_ssa_event_add( + src_ssafile=None, + dst_ssafile=src_sub, + text_list=timed_text, + style_name=None) + sub_utils.pysubs2_ssa_event_add( + src_ssafile=src_sub, + dst_ssafile=bilingual_sub, + text_list=translated_text, + style_name=None, + same_event_type=1) + bilingual_string = core.ssafile_to_sub_str( + ssafile=bilingual_sub, + fps=fps, + subtitles_file_format=args.format) + # formatting timed_text to subtitles string + bilingual_name = "{base}.{nt}.0.{extension}".format( + base=args.output, + nt=args.src_language + '&' + args.dst_language, + extension=args.format) + subtitles_file_path = core.str_to_file( + str_=bilingual_string, + output=bilingual_name, + input_m=input_m) + # subtitles string to file + print(_("\"dst-lf-src\" subtitles file " + "created at \"{}\".").format(subtitles_file_path)) + + if not args.output_files: + raise exceptions.AutosubException(_("\nAll works done.")) + + except KeyError: + pass + + try: + args.output_files.remove("src-lf-dst") + if args.styles and \ + (args.format == 'ass' or + args.format == 'ssa' or + args.format == 'ass.json'): + bilingual_string = core.list_to_ass_str( + text_list=[timed_text, translated_text], + styles_list=styles_list, + subtitles_file_format=args.format, + same_event_type=2) + else: + bilingual_sub = pysubs2.SSAFile() + src_sub = pysubs2.SSAFile() + sub_utils.pysubs2_ssa_event_add( + src_ssafile=None, + dst_ssafile=src_sub, + text_list=timed_text, + style_name=None) + sub_utils.pysubs2_ssa_event_add( + src_ssafile=src_sub, + dst_ssafile=bilingual_sub, + text_list=translated_text, + style_name=None, + same_event_type=2) + bilingual_string = core.ssafile_to_sub_str( + ssafile=bilingual_sub, + fps=fps, + subtitles_file_format=args.format) + # formatting timed_text to subtitles string + bilingual_name = "{base}.{nt}.1.{extension}".format( + base=args.output, + nt=args.src_language + '&' + args.dst_language, + extension=args.format) + subtitles_file_path = core.str_to_file( + str_=bilingual_string, + output=bilingual_name, + input_m=input_m) + # subtitles string to file + print(_("\"src-lf-dst\" subtitles file " + "created at \"{}\".").format(subtitles_file_path)) + + if not args.output_files: + raise exceptions.AutosubException(_("\nAll works done.")) + + except KeyError: + pass + try: args.output_files.remove("dst") + timed_trans = get_timed_text( + is_empty_dropped=False, + regions=regions, + text_list=translated_text + ) # formatting timed_text to subtitles string if args.styles and \ (args.format == 'ass' or @@ -1123,28 +1275,25 @@ def audio_or_video_prcs( # pylint: disable=too-many-branches, too-many-statemen dst_string = core.list_to_ass_str( text_list=timed_trans, styles_list=styles_list[2:4], - subtitles_file_format=args.format, - ) + subtitles_file_format=args.format, ) else: dst_string = core.list_to_ass_str( text_list=timed_trans, styles_list=styles_list, - subtitles_file_format=args.format, - ) + subtitles_file_format=args.format, ) else: dst_string = core.list_to_sub_str( timed_text=timed_trans, fps=fps, - subtitles_file_format=args.format - ) - dst_name = "{base}.{nt}.{extension}".format(base=args.output, - nt=args.dst_language, - extension=args.format) + subtitles_file_format=args.format) + dst_name = "{base}.{nt}.{extension}".format( + base=args.output, + nt=args.dst_language, + extension=args.format) subtitles_file_path = core.str_to_file( str_=dst_string, output=dst_name, - input_m=input_m - ) + input_m=input_m) # subtitles string to file print(_("Destination language subtitles " "file created at \"{}\".").format(subtitles_file_path)) @@ -1156,13 +1305,11 @@ def audio_or_video_prcs( # pylint: disable=too-many-branches, too-many-statemen if len(args.output_files) > 1 or not ({"dst", "src"} & args.output_files): print( _("Override \"-of\"/\"--output-files\" due to your args too few." - "\nOutput source subtitles file only.") - ) + "\nOutput source subtitles file only.")) timed_text = get_timed_text( is_empty_dropped=args.drop_empty_regions, regions=regions, - text_list=text_list - ) + text_list=text_list) if args.styles and \ (args.format == 'ass' or args.format == 'ssa' or @@ -1170,14 +1317,12 @@ def audio_or_video_prcs( # pylint: disable=too-many-branches, too-many-statemen src_string = core.list_to_ass_str( text_list=timed_text, styles_list=styles_list, - subtitles_file_format=args.format, - ) + subtitles_file_format=args.format, ) else: src_string = core.list_to_sub_str( timed_text=timed_text, fps=fps, - subtitles_file_format=args.format - ) + subtitles_file_format=args.format) # formatting timed_text to subtitles string src_name = "{base}.{nt}.{extension}".format(base=args.output, nt=args.speech_language, @@ -1185,8 +1330,7 @@ def audio_or_video_prcs( # pylint: disable=too-many-branches, too-many-statemen subtitles_file_path = core.str_to_file( str_=src_string, output=src_name, - input_m=input_m - ) + input_m=input_m) # subtitles string to file print(_("Speech language subtitles " "file created at \"{}\".").format(subtitles_file_path)) @@ -1194,8 +1338,7 @@ def audio_or_video_prcs( # pylint: disable=too-many-branches, too-many-statemen else: print( _("Override \"-of\"/\"--output-files\" due to your args too few." - "\nOutput regions subtitles file only.") - ) + "\nOutput regions subtitles file only.")) if args.styles and \ (args.format == 'ass' or args.format == 'ssa' or @@ -1203,14 +1346,12 @@ def audio_or_video_prcs( # pylint: disable=too-many-branches, too-many-statemen times_subtitles = core.list_to_ass_str( text_list=regions, styles_list=styles_list, - subtitles_file_format=args.format - ) + subtitles_file_format=args.format) else: times_subtitles = core.list_to_sub_str( timed_text=regions, fps=fps, - subtitles_file_format=args.format - ) + subtitles_file_format=args.format) # times to subtitles string times_name = "{base}.{nt}.{extension}".format(base=args.output, nt="times", @@ -1218,8 +1359,7 @@ def audio_or_video_prcs( # pylint: disable=too-many-branches, too-many-statemen subtitles_file_path = core.str_to_file( str_=times_subtitles, output=times_name, - input_m=input_m - ) + input_m=input_m) # subtitles string to file print(_("Times file created at \"{}\".").format(subtitles_file_path)) diff --git a/autosub/constants.py b/autosub/constants.py index dda842c5..b5b51fdf 100644 --- a/autosub/constants.py +++ b/autosub/constants.py @@ -69,8 +69,8 @@ DEFAULT_SUBTITLES_FORMAT = 'srt' -DEFAULT_MODE_SET = {'regions', 'src', 'dst', 'bilingual'} -DEFAULT_SUB_MODE_SET = {'dst', 'bilingual'} +DEFAULT_MODE_SET = {'regions', 'src', 'dst', 'bilingual', 'dst-lf-src', 'src-lf-dst'} +DEFAULT_SUB_MODE_SET = {'dst', 'bilingual', 'dst-lf-src', 'src-lf-dst'} DEFAULT_LANG_MODE_SET = {'s', 'src', 'd'} DEFAULT_AUDIO_PRCS_MODE_SET = {'o', 's', 'y'} @@ -351,8 +351,7 @@ def cmd_conversion( - command -): + command): """ Give a command and return a cross-platform command """ diff --git a/autosub/core.py b/autosub/core.py index 370a1b85..c9de1bee 100644 --- a/autosub/core.py +++ b/autosub/core.py @@ -43,8 +43,7 @@ def auditok_gen_speech_regions( # pylint: disable=too-many-arguments min_region_size=constants.MIN_REGION_SIZE, max_region_size=constants.MAX_REGION_SIZE, max_continuous_silence=constants.DEFAULT_CONTINUOUS_SILENCE, - mode=auditok.StreamTokenizer.STRICT_MIN_LENGTH -): + mode=auditok.StreamTokenizer.STRICT_MIN_LENGTH): """ Give an input audio/video file, generate proper speech regions. """ @@ -80,8 +79,7 @@ def bulk_audio_conversion( # pylint: disable=too-many-arguments suffix, concurrency=constants.DEFAULT_CONCURRENCY, output=None, - is_keep=False -): + is_keep=False): """ Give an input audio/video file and generate short-term audio fragments. @@ -128,8 +126,7 @@ def gsv2_to_text( # pylint: disable=too-many-locals,too-many-arguments,too-many headers, concurrency=constants.DEFAULT_CONCURRENCY, min_confidence=0.0, - is_keep=False -): + is_keep=False): """ Give a list of short-term audio fragment files and generate text_list from Google speech-to-text V2 api. @@ -184,8 +181,7 @@ def gcsv1_to_text( # pylint: disable=too-many-locals,too-many-arguments,too-man concurrency=constants.DEFAULT_CONCURRENCY, src_language=constants.DEFAULT_SRC_LANGUAGE, min_confidence=0.0, - is_keep=False -): + is_keep=False): """ Give a list of short-term audio fragment files and generate text_list from Google cloud speech-to-text V1P1Beta1 api. @@ -265,9 +261,9 @@ def gcsv1_to_text( # pylint: disable=too-many-locals,too-many-arguments,too-man # https://pypi.org/project/google-cloud-speech/ config = { - "language_code": src_language, - "sample_rate_hertz": sample_rate, "encoding": encoding, + "sample_rate_hertz": sample_rate, + "language_code": src_language, } i = 0 @@ -318,8 +314,7 @@ def list_to_gtv2( # pylint: disable=too-many-locals,too-many-arguments concurrency=constants.DEFAULT_CONCURRENCY, src_language=constants.DEFAULT_SRC_LANGUAGE, dst_language=constants.DEFAULT_DST_LANGUAGE, - lines_per_trans=constants.DEFAULT_LINES_PER_TRANS -): + lines_per_trans=constants.DEFAULT_LINES_PER_TRANS): """ Give a text list, generate translated text list from GoogleTranslatorV2 api. """ @@ -376,8 +371,7 @@ def list_to_googletrans( # pylint: disable=too-many-locals, too-many-arguments, size_per_trans=constants.DEFAULT_SIZE_PER_TRANS, sleep_seconds=constants.DEFAULT_SLEEP_SECONDS, user_agent=None, - service_urls=None -): + service_urls=None): """ Give a text list, generate translated text list from GoogleTranslatorV2 api. """ @@ -496,17 +490,16 @@ def list_to_googletrans( # pylint: disable=too-many-locals, too-many-arguments, except KeyboardInterrupt: pbar.finish() - print(_("Cancelling transcription.")) + print(_("Cancelling translation.")) return 1 return translated_text -def list_to_sub_str( # pylint: disable=too-many-arguments +def list_to_sub_str( timed_text, fps=30.0, - subtitles_file_format=constants.DEFAULT_SUBTITLES_FORMAT -): + subtitles_file_format=constants.DEFAULT_SUBTITLES_FORMAT): """ Give an input timed text list, format it to a string. """ @@ -520,17 +513,16 @@ def list_to_sub_str( # pylint: disable=too-many-arguments src_ssafile=None, dst_ssafile=pysubs2_obj, text_list=timed_text, - style_name=None - ) + style_name=None) formatted_subtitles = pysubs2_obj.to_string( format_=subtitles_file_format) elif subtitles_file_format == 'vtt': - formatted_subtitles = sub_utils.vtt_formatter( + formatted_subtitles = sub_utils.list_to_vtt_str( subtitles=timed_text) elif subtitles_file_format == 'json': - formatted_subtitles = sub_utils.json_formatter( + formatted_subtitles = sub_utils.list_to_json_str( subtitles=timed_text) elif subtitles_file_format == 'ass.json': @@ -539,13 +531,12 @@ def list_to_sub_str( # pylint: disable=too-many-arguments src_ssafile=None, dst_ssafile=pysubs2_obj, text_list=timed_text, - style_name=None - ) + style_name=None) formatted_subtitles = pysubs2_obj.to_string( format_='json') elif subtitles_file_format == 'txt': - formatted_subtitles = sub_utils.txt_formatter( + formatted_subtitles = sub_utils.list_to_txt_str( subtitles=timed_text) elif subtitles_file_format == 'sub': @@ -554,8 +545,7 @@ def list_to_sub_str( # pylint: disable=too-many-arguments src_ssafile=None, dst_ssafile=pysubs2_obj, text_list=timed_text, - style_name=None - ) + style_name=None) formatted_subtitles = pysubs2_obj.to_string( format_='microdvd', fps=fps) @@ -569,8 +559,7 @@ def list_to_sub_str( # pylint: disable=too-many-arguments src_ssafile=None, dst_ssafile=pysubs2_obj, text_list=timed_text, - style_name=None - ) + style_name=None) formatted_subtitles = pysubs2_obj.to_string( format_='mpl2', fps=fps) @@ -586,86 +575,125 @@ def list_to_sub_str( # pylint: disable=too-many-arguments src_ssafile=None, dst_ssafile=pysubs2_obj, text_list=timed_text, - style_name=None - ) + style_name=None) formatted_subtitles = pysubs2_obj.to_string( format_=constants.DEFAULT_SUBTITLES_FORMAT) return formatted_subtitles -def list_to_ass_str( # pylint: disable=too-many-arguments - text_list, - styles_list, - subtitles_file_format=constants.DEFAULT_SUBTITLES_FORMAT -): +def ssafile_to_sub_str( + ssafile, + fps=30.0, + subtitles_file_format=constants.DEFAULT_SUBTITLES_FORMAT): """ - Give an input timed text list, format it to an ass string. + Give an input SSAFile, format it to a string. """ - if subtitles_file_format == 'ass' \ - or subtitles_file_format == 'ssa'\ - or subtitles_file_format == 'ass.json': - pysubs2_obj = pysubs2.SSAFile() - pysubs2_obj.styles = \ - {styles_list[i]: styles_list[i + 1] for i in range(0, len(styles_list), 2)} - if not isinstance(text_list[0], list): - # text_list is [((start, end), text), ...] - # text_list provides regions - sub_utils.pysubs2_ssa_event_add( - src_ssafile=None, - dst_ssafile=pysubs2_obj, - text_list=text_list, - style_name=styles_list[0]) - else: - # text_list is [[src_list], [dst_list]] - # src_list provides regions - sub_utils.pysubs2_ssa_event_add( - src_ssafile=None, - dst_ssafile=pysubs2_obj, - text_list=text_list[0], - style_name=styles_list[0]) - if len(styles_list) == 1: - sub_utils.pysubs2_ssa_event_add( - src_ssafile=None, - dst_ssafile=pysubs2_obj, - text_list=text_list[1], - style_name=styles_list[0]) - else: - sub_utils.pysubs2_ssa_event_add( - src_ssafile=None, - dst_ssafile=pysubs2_obj, - text_list=text_list[1], - style_name=styles_list[2]) - - if subtitles_file_format != 'ass.json': - formatted_subtitles = pysubs2_obj.to_string(format_=subtitles_file_format) - else: - formatted_subtitles = pysubs2_obj.to_string(format_='json') + if subtitles_file_format == 'srt' \ + or subtitles_file_format == 'tmp'\ + or subtitles_file_format == 'ass'\ + or subtitles_file_format == 'ssa': + formatted_subtitles = ssafile.to_string( + format_=subtitles_file_format) + + elif subtitles_file_format == 'vtt': + formatted_subtitles = sub_utils.assfile_to_vtt_str( + subtitles=ssafile) + + elif subtitles_file_format == 'json': + formatted_subtitles = sub_utils.assfile_to_json_str( + subtitles=ssafile) + + elif subtitles_file_format == 'ass.json': + formatted_subtitles = ssafile.to_string( + format_='json') + + elif subtitles_file_format == 'txt': + formatted_subtitles = sub_utils.assfile_to_txt_str( + subtitles=ssafile) + + elif subtitles_file_format == 'sub': + formatted_subtitles = ssafile.to_string( + format_='microdvd', + fps=fps) + # sub format need fps + # ref https://pysubs2.readthedocs.io/en/latest + # /api-reference.html#supported-input-output-formats + + elif subtitles_file_format == 'mpl2.txt': + formatted_subtitles = ssafile.to_string( + format_='mpl2', + fps=fps) + else: # fallback process print(_("Format \"{fmt}\" not supported. " "Using \"{default_fmt}\" instead.").format( fmt=subtitles_file_format, default_fmt=constants.DEFAULT_SUBTITLES_FORMAT)) - pysubs2_obj = pysubs2.SSAFile() + formatted_subtitles = ssafile.to_string( + format_=constants.DEFAULT_SUBTITLES_FORMAT) + + return formatted_subtitles + + +def list_to_ass_str( + text_list, + styles_list, + subtitles_file_format=constants.DEFAULT_SUBTITLES_FORMAT, + same_event_type=0): + """ + Give an input timed text list, format it to an ass string. + """ + pysubs2_obj = pysubs2.SSAFile() + pysubs2_obj.styles = \ + {styles_list[i]: styles_list[i + 1] for i in range(0, len(styles_list), 2)} + if not isinstance(text_list[0], list): + # text_list is [((start, end), text), ...] + # text_list provides regions sub_utils.pysubs2_ssa_event_add( src_ssafile=None, dst_ssafile=pysubs2_obj, text_list=text_list, - style_name=None - ) - formatted_subtitles = pysubs2_obj.to_string( - format_=constants.DEFAULT_SUBTITLES_FORMAT) + style_name=styles_list[0]) + else: + # text_list is [[src_list], [dst_list]] + # src_list provides regions + sub_utils.pysubs2_ssa_event_add( + src_ssafile=None, + dst_ssafile=pysubs2_obj, + text_list=text_list[0], + style_name=styles_list[0]) + src_obj = pysubs2_obj + pysubs2_obj = pysubs2.SSAFile() + if len(styles_list) == 1: + sub_utils.pysubs2_ssa_event_add( + src_ssafile=src_obj, + dst_ssafile=pysubs2_obj, + text_list=text_list[1], + style_name=styles_list[0], + same_event_type=same_event_type) + else: + sub_utils.pysubs2_ssa_event_add( + src_ssafile=src_obj, + dst_ssafile=pysubs2_obj, + text_list=text_list[1], + style_name=styles_list[2], + same_event_type=same_event_type) + + if subtitles_file_format != 'ass.json': + formatted_subtitles = pysubs2_obj.to_string(format_=subtitles_file_format) + else: + formatted_subtitles = pysubs2_obj.to_string(format_='json') - return formatted_subtitles, subtitles_file_format + return formatted_subtitles def str_to_file( str_, output, - input_m=input -): + input_m=input): """ Give a string and write it to file """ diff --git a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.cmdline_utils.mo b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.cmdline_utils.mo index 2028fa8940a41b34ecbc9da4c8fd4e3624f4fba7..22ba0c2b99ee8a2cdbc47debf4252a58f80696ca 100644 GIT binary patch delta 1640 zcmY+^e`w5c9LMqZY_qxDe%Nd`J3F`YbL?=pADG|Fge_TY?V>F6BP7gHq-#@tq=-c+ zk>(Hchd)fAB>u2g+7wD_@fX4$SW`;G^Yy(`+o${7zo?HLCCi*Tr z>5|b_(R}ID}c4KGtjl`Y<2&<3c=zCHOS%SDeM%n_^as)#$Z~^h{4}*CuRJhFwa2MubJ2v7Ktixg4jMeF8Y|gHtuJ1%*+GCuJy*Lwp zpfXgLVHUs&)OsT-W9{fCzCB`4k8g1~mSvi4#a7&cuTX*Kk$+|8Bx?OBu0h}USY~#j zj_NY%#_pnyv=2-03ob=Zw%H=A$7mIUiyGh?JcI~OSh@E*Wu4VoV*I{OE zY#za1%x|K0e4X^Md3%5r*n@@m2Nl@sF`I@B9>QvHoCT$*6DMO2Dy1J$ss4`op`WM} z&moGAEP^ZXEb46gk(UkgrX9N}oIAJ>i|`X_LoU)Y2g?iCwk~L9;R)uDKV9$|efSr3 zp^rRPVl680X;h$xsOw(hZX81Wdm?PN86TlGP{c*LZYFP(k*z`f?xrXMW#9zzUF$*> zNe^nL-%x?Qq-8qR<1RdoHTVJdV%TqX5U=3|%woH`(QUXGFJKV+QSpAGAEN>C&av1k z)Q%#k8)(6$copa4AgXF}ro?Kb7SAy6#gkY^{uN*VH)0A=RGf`*Z(tMi&zOK}Y&dFF z42tO?t&C{Ym=icDzP1WW` zjMAklEu$-iY~0Dj3Z%d-gZ{tq2XQJrExy2MIS(5hl?s;R7&Yq@A2`KINjaHPmY2li z>oe!^8g@oz$#D~LHC;usoUW?YF;TBJhOS1&og+*^9g*4sI*$$KU!?iSrh+Wjf-7*W z<6d%~JJ8&6IJj?Tu;tM9K*wNKkvqUh3#>Z}-ECv=BJj1{Go7VDBRtayCY%r S7c)Htqrb>P_s6_j8UFxem#Cot delta 1540 zcmYk+O-NKx6u|MLX*yctM@?gm*`(&zI5yvnQ#PiOWQKhy7!?dTln8~P6k!Ze1Vw}( z7KW}|^nr*W)TYQ)8?#zS8^I7FTxg3RXd&4Dym>+w?)>h3^Jeb5=bZayTOS`!Eas;T z3$2k}NDrinbYg27AGE1-5eL4;T%5x;T*Lyb*(_3v%~*wZuo5RQ3%_ADF5yvJ#Q+8} zMD~j$q?ndEAMAk~c_BrpP4oDC!^oLN69rlD6PW<^!f>tQuwh6-zOf)!i7x zFkV6rj^ZxXmpKMIxUh`6^L0Fpom)iOaRPa0N!_YH5V0U@Nh!Lp1}pF!YDDg0F^-zo zKcGfv8J$>c73si6tY>|B#^3@j;CZagNiO^ZH8Lxx*UO22Gu}f&C!bJLvxfR$1$oI7 z`mvh%Nj!|V@gTm#Cj5)KZe2d!V(^~94GeEf=5Pwzm@nWlbZr-D!G6^F43?yjUz=G2 zgpbWhGlsAocjF_}g!=&(V!Qf>JxIC8L)8C$iu>>*a;vPM){2Fjb)&un16}wcmf;9q#u;qI z3gXp+{n(33sHr*QOg`v19%4R^Zp3U))C!-#mXg1W82iyV7~Ke$ccq>*&0%jFeFPMv8&P z`uaA~Vj>X6aq{B-O?SP~G_=%^q?s2o(MsP$*Dz}&HN4cT_~_+y&9g@K2whXHM^_t} zlzI(KhR)2^V5&u=1*5s;v?>u$UrkM(t5yXtO=^~EDj spU3BMd%SL6*y;5~f?<~@?C}hJu{kn*ey2AS33{i0+u|wHssI20 diff --git a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.cmdline_utils.po b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.cmdline_utils.po index afa6fb46..a99e4f0d 100644 --- a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.cmdline_utils.po +++ b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.cmdline_utils.po @@ -7,8 +7,8 @@ msgid "" msgstr "" "Project-Id-Version: \n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2020-01-28 11:58+0800\n" -"PO-Revision-Date: 2020-01-23 16:51+0800\n" +"POT-Creation-Date: 2020-01-31 12:02+0800\n" +"PO-Revision-Date: 2020-01-31 12:12+0800\n" "Last-Translator: \n" "Language-Team: \n" "MIME-Version: 1.0\n" @@ -17,20 +17,20 @@ msgstr "" "Language: zh_CN\n" "X-Generator: Poedit 2.2.4\n" -#: autosub/cmdline_utils.py:45 +#: autosub/cmdline_utils.py:46 msgid "List of output formats:\n" msgstr "列出所有输出格式:\n" -#: autosub/cmdline_utils.py:47 autosub/cmdline_utils.py:55 +#: autosub/cmdline_utils.py:48 autosub/cmdline_utils.py:56 msgid "Format" msgstr "格式" -#: autosub/cmdline_utils.py:48 autosub/cmdline_utils.py:56 -#: autosub/cmdline_utils.py:83 autosub/cmdline_utils.py:101 +#: autosub/cmdline_utils.py:49 autosub/cmdline_utils.py:57 +#: autosub/cmdline_utils.py:84 autosub/cmdline_utils.py:102 msgid "Description" msgstr "描述" -#: autosub/cmdline_utils.py:53 +#: autosub/cmdline_utils.py:54 msgid "" "\n" "List of input formats:\n" @@ -38,36 +38,36 @@ msgstr "" "\n" "列出所有输入格式:\n" -#: autosub/cmdline_utils.py:64 +#: autosub/cmdline_utils.py:65 msgid "Use py-googletrans to detect a sub file's first line language." msgstr "使用py-googletrans来检测字幕文件第一行的语言。" -#: autosub/cmdline_utils.py:71 autosub/cmdline_utils.py:82 -#: autosub/cmdline_utils.py:100 +#: autosub/cmdline_utils.py:72 autosub/cmdline_utils.py:83 +#: autosub/cmdline_utils.py:101 msgid "Lang code" msgstr "语言代码" -#: autosub/cmdline_utils.py:72 +#: autosub/cmdline_utils.py:73 msgid "Confidence" msgstr "可信度" -#: autosub/cmdline_utils.py:80 +#: autosub/cmdline_utils.py:81 msgid "List of all lang codes for speech-to-text:\n" msgstr "列出所有语音转文字的语言代码:\n" -#: autosub/cmdline_utils.py:89 +#: autosub/cmdline_utils.py:90 msgid "Match Google Speech V2 lang codes." msgstr "匹配Google Speech V2的语言代码。" -#: autosub/cmdline_utils.py:98 +#: autosub/cmdline_utils.py:99 msgid "List of all lang codes for translation:\n" msgstr "列出所有翻译的语言代码:\n" -#: autosub/cmdline_utils.py:107 +#: autosub/cmdline_utils.py:108 msgid "Match py-googletrans lang codes." msgstr "匹配py-googletrans的语言代码。" -#: autosub/cmdline_utils.py:125 +#: autosub/cmdline_utils.py:126 msgid "" "Error: arg of \"-i\"/\"--input\": \"{path}\" isn't valid. You need to give a " "valid path." @@ -75,7 +75,7 @@ msgstr "" "错误:\"-i\"/\"--input\"的参数:\"{path}\"是无效的。你需要提供一个有效的路" "径。" -#: autosub/cmdline_utils.py:131 +#: autosub/cmdline_utils.py:132 msgid "" "Error: arg of \"-sty\"/\"--styles\": \"{path}\" isn't valid. You need to " "give a valid path." @@ -83,15 +83,15 @@ msgstr "" "错误:\"-sty\"/\"--styles\"的参数:\"{path}\"是无效的。你需要提供一个有效的路" "径。" -#: autosub/cmdline_utils.py:137 +#: autosub/cmdline_utils.py:138 msgid "Error: Too many \"-sn\"/\"--styles-name\" arguments." msgstr "错误:\"-sn\"/\"--styles-name\"的参数过多。" -#: autosub/cmdline_utils.py:150 autosub/cmdline_utils.py:158 +#: autosub/cmdline_utils.py:150 autosub/cmdline_utils.py:157 msgid "Error: \"-sn\"/\"--styles-name\" arguments aren't in \"{path}\"." msgstr "错误:\"-sn\"/\"--styles-name\"的参数不在 \"{path}\"文件内。" -#: autosub/cmdline_utils.py:164 +#: autosub/cmdline_utils.py:162 msgid "" "Error: arg of \"-er\"/\"--ext-regions\": \"{path}\" isn't valid. You need to " "give a valid path." @@ -99,16 +99,16 @@ msgstr "" "错误:\"-er\"/\"--ext-regions\"的参数:\"{path}\"是无效的。你需要提供一个有效" "的路径。" -#: autosub/cmdline_utils.py:181 autosub/cmdline_utils.py:199 +#: autosub/cmdline_utils.py:179 autosub/cmdline_utils.py:197 msgid "No output format specified. Use input format \"{fmt}\" for output." msgstr "没有指定输出格式。使用输入的格式\"{fmt}\"作为输出格式。" -#: autosub/cmdline_utils.py:193 +#: autosub/cmdline_utils.py:191 msgid "" "Your output is a directory not a file path. Now file path set to \"{new}\"." msgstr "你的输出路径是一个目录不是一个文件路径。现在输出路径设置为\"{new}\"。" -#: autosub/cmdline_utils.py:216 +#: autosub/cmdline_utils.py:214 msgid "" "Error: Output subtitles format \"{fmt}\" not supported. Run with \"-lf\"/\"--" "list-formats\" to see all supported formats.\n" @@ -118,19 +118,19 @@ msgstr "" "看所有支持的格式。\n" "或者使用ffmpeg或者SubtitleEdit来转换格式。" -#: autosub/cmdline_utils.py:230 autosub/cmdline_utils.py:237 +#: autosub/cmdline_utils.py:227 autosub/cmdline_utils.py:233 msgid "Error: No valid \"-of\"/\"--output-files\" arguments." msgstr "错误:\"-of\"/\"--output-files\"参数无效。" -#: autosub/cmdline_utils.py:249 +#: autosub/cmdline_utils.py:244 msgid "Input is a subtitles file." msgstr "输入是一个字幕文件。" -#: autosub/cmdline_utils.py:262 +#: autosub/cmdline_utils.py:257 msgid "Error: \"-slp\"/\"--sleep-seconds\" arg is illegal." msgstr "错误:\"-slp\"/\"--sleep-seconds\"参数非法。" -#: autosub/cmdline_utils.py:271 +#: autosub/cmdline_utils.py:265 msgid "" "Warning: Speech language \"{src}\" not recommended. Run with \"-lsc\"/\"--" "list-speech-codes\" to see all supported languages." @@ -138,37 +138,37 @@ msgstr "" "警告:语音语言\"{src}\"不在推荐的清单里面。用\"-lsc\"/\"--list-speech-codes" "\"选项运行来查看所有支持的语言。" -#: autosub/cmdline_utils.py:281 +#: autosub/cmdline_utils.py:274 msgid "Use langcodes-py2 to standardize the result." msgstr "使用langcodes-py2来标准化结果。" -#: autosub/cmdline_utils.py:283 autosub/cmdline_utils.py:342 -#: autosub/cmdline_utils.py:373 autosub/cmdline_utils.py:460 -#: autosub/cmdline_utils.py:490 +#: autosub/cmdline_utils.py:276 autosub/cmdline_utils.py:330 +#: autosub/cmdline_utils.py:359 autosub/cmdline_utils.py:439 +#: autosub/cmdline_utils.py:467 msgid "Use \"{lang_code}\" instead." msgstr "改用\"{lang_code}\"。" -#: autosub/cmdline_utils.py:287 +#: autosub/cmdline_utils.py:280 msgid "Match failed. Still using \"{lang_code}\"." msgstr "匹配失败。仍在使用\"{lang_code}\"。" -#: autosub/cmdline_utils.py:292 +#: autosub/cmdline_utils.py:285 msgid "Error: The arg of \"-mnc\"/\"--min-confidence\" isn't legal." msgstr "错误:\"-mnc\"/\"--min-confidence\"参数的值不合法。" -#: autosub/cmdline_utils.py:297 +#: autosub/cmdline_utils.py:289 msgid "Error: Wrong API code." msgstr "错误:错误的API代码。" -#: autosub/cmdline_utils.py:302 +#: autosub/cmdline_utils.py:293 msgid "Destination language not provided. Only performing speech recognition." msgstr "翻译目的语言未提供。只进行语音识别。" -#: autosub/cmdline_utils.py:309 +#: autosub/cmdline_utils.py:299 msgid "Source language not provided. Use Speech language instead." msgstr "翻译源语言未提供。改用语音语言。" -#: autosub/cmdline_utils.py:333 autosub/cmdline_utils.py:451 +#: autosub/cmdline_utils.py:322 autosub/cmdline_utils.py:431 msgid "" "Warning: Source language \"{src}\" not supported. Run with \"-lsc\"/\"--list-" "translation-codes\" to see all supported languages." @@ -176,12 +176,12 @@ msgstr "" "警告:源语言\"{src}\"不在推荐的清单里面。用 \"-lsc\"/\"--list-translation-" "codes\"选项运行来查看所有支持的语言。" -#: autosub/cmdline_utils.py:347 autosub/cmdline_utils.py:378 -#: autosub/cmdline_utils.py:464 autosub/cmdline_utils.py:494 +#: autosub/cmdline_utils.py:335 autosub/cmdline_utils.py:364 +#: autosub/cmdline_utils.py:443 autosub/cmdline_utils.py:471 msgid "Match failed. Still using \"{lang_code}\". Program stopped." msgstr "匹配失败。仍在使用\"{lang_code}\"。程序终止。" -#: autosub/cmdline_utils.py:353 autosub/cmdline_utils.py:470 +#: autosub/cmdline_utils.py:341 autosub/cmdline_utils.py:449 msgid "" "Error: Source language \"{src}\" not supported. Run with \"-lsc\"/\"--list-" "translation-codes\" to see all supported languages. Or use \"-bm\"/\"--best-" @@ -191,7 +191,7 @@ msgstr "" "codes\"选项运行来查看所有支持的语言。或者使用\"-bm\"/\"--best-match\"来获得最" "佳匹配。" -#: autosub/cmdline_utils.py:364 autosub/cmdline_utils.py:481 +#: autosub/cmdline_utils.py:351 autosub/cmdline_utils.py:459 msgid "" "Warning: Destination language \"{dst}\" not supported. Run with \"-lsc\"/\"--" "list-translation-codes\" to see all supported languages." @@ -199,7 +199,7 @@ msgstr "" "警告:不支持目的语言\"{dst}\"。用 \"-lsc\"/\"--list-translation-codes\"选项运" "行来查看所有支持的语言。" -#: autosub/cmdline_utils.py:384 autosub/cmdline_utils.py:500 +#: autosub/cmdline_utils.py:370 autosub/cmdline_utils.py:477 msgid "" "Error: Destination language \"{dst}\" not supported. Run with \"-lsc\"/\"--" "list-translation-codes\" to see all supported languages. Or use \"-bm\"/\"--" @@ -209,37 +209,37 @@ msgstr "" "看所有支持的格式。\n" "或者使用ffmpeg或者SubtitleEdit来转换格式。" -#: autosub/cmdline_utils.py:394 +#: autosub/cmdline_utils.py:379 msgid "" "Speech language is the same as the Destination language. Only performing " "speech recognition." msgstr "语音语言和目的语言一致。只进行语音识别。" -#: autosub/cmdline_utils.py:404 +#: autosub/cmdline_utils.py:388 msgid "You've already input times. No works done." msgstr "你已经输入了时间轴。啥都没做。" -#: autosub/cmdline_utils.py:410 +#: autosub/cmdline_utils.py:393 msgid "Speech language not provided. Only performing speech regions detection." msgstr "语音语言未提供。只生成时间轴。" -#: autosub/cmdline_utils.py:419 autosub/cmdline_utils.py:522 +#: autosub/cmdline_utils.py:401 autosub/cmdline_utils.py:496 msgid "Error: External speech regions file not provided." msgstr "错误:外部时间轴文件未提供。" -#: autosub/cmdline_utils.py:433 +#: autosub/cmdline_utils.py:414 msgid "Error: Destination language not provided." msgstr "错误:目的语言未提供。" -#: autosub/cmdline_utils.py:509 +#: autosub/cmdline_utils.py:485 msgid "Error: Source language is the same as the Destination language." msgstr "错误:源语言和目的语言一致。" -#: autosub/cmdline_utils.py:514 +#: autosub/cmdline_utils.py:489 msgid "Error: Source language not provided." msgstr "错误:源语言未提供。" -#: autosub/cmdline_utils.py:536 +#: autosub/cmdline_utils.py:509 msgid "" "Your minimum region size {mrs0} is smaller than {mrs}.\n" "Now reset to {mrs}." @@ -247,7 +247,7 @@ msgstr "" "你输入的语音区域{mrs0}比{mrs}还小。\n" "现在重置为{mrs}。" -#: autosub/cmdline_utils.py:544 +#: autosub/cmdline_utils.py:516 msgid "" "Your maximum region size {mrs0} is larger than {mrs}.\n" "Now reset to {mrs}." @@ -255,7 +255,7 @@ msgstr "" "你输入的语音区域{mrs0}比{mrs}还大。\n" "现在重置为{mrs}。" -#: autosub/cmdline_utils.py:552 +#: autosub/cmdline_utils.py:523 msgid "" "Your maximum continuous silence {mxcs} is smaller than 0.\n" "Now reset to {dmxcs}." @@ -263,7 +263,7 @@ msgstr "" "你输入的最大连续安静区域{mxcs}比0还小。\n" "现在重置为{dmxcs}。" -#: autosub/cmdline_utils.py:597 autosub/cmdline_utils.py:773 +#: autosub/cmdline_utils.py:566 autosub/cmdline_utils.py:841 msgid "" "\n" "No works done. Check your \"-of\"/\"--output-files\" option." @@ -271,16 +271,18 @@ msgstr "" "\n" "啥都没做。检查你的\"-of\"/\"--output-files\"选项。" -#: autosub/cmdline_utils.py:642 autosub/cmdline_utils.py:1070 +#: autosub/cmdline_utils.py:608 autosub/cmdline_utils.py:1118 msgid "Error: Translation failed." msgstr "错误:翻译失败。" -#: autosub/cmdline_utils.py:687 autosub/cmdline_utils.py:1106 +#: autosub/cmdline_utils.py:652 autosub/cmdline_utils.py:1157 msgid "Bilingual subtitles file created at \"{}\"." msgstr "双语字幕创建在了\"{}\"。" -#: autosub/cmdline_utils.py:691 autosub/cmdline_utils.py:880 -#: autosub/cmdline_utils.py:1041 autosub/cmdline_utils.py:1110 +#: autosub/cmdline_utils.py:656 autosub/cmdline_utils.py:708 +#: autosub/cmdline_utils.py:760 autosub/cmdline_utils.py:939 +#: autosub/cmdline_utils.py:1091 autosub/cmdline_utils.py:1161 +#: autosub/cmdline_utils.py:1209 autosub/cmdline_utils.py:1257 msgid "" "\n" "All works done." @@ -288,15 +290,23 @@ msgstr "" "\n" "做完了。" -#: autosub/cmdline_utils.py:730 autosub/cmdline_utils.py:1149 +#: autosub/cmdline_utils.py:704 autosub/cmdline_utils.py:1205 +msgid "\"dst-lf-src\" subtitles file created at \"{}\"." +msgstr "\"dst-lf-src\"字幕创建在了\"{}\"。" + +#: autosub/cmdline_utils.py:756 autosub/cmdline_utils.py:1253 +msgid "\"src-lf-dst\" subtitles file created at \"{}\"." +msgstr "\"src-lf-dst\"字幕创建在了\"{}\"。" + +#: autosub/cmdline_utils.py:799 autosub/cmdline_utils.py:1298 msgid "Destination language subtitles file created at \"{}\"." msgstr "目的语言字幕文件创建在了\"{}\"。" -#: autosub/cmdline_utils.py:779 +#: autosub/cmdline_utils.py:846 msgid "Use external speech regions." msgstr "使用外部时间轴。" -#: autosub/cmdline_utils.py:818 +#: autosub/cmdline_utils.py:882 msgid "" "\n" "Convert source audio to \"{name}\" to get audio length and detect audio " @@ -305,11 +315,11 @@ msgstr "" "\n" "将源音频转换为\"{name}\"来检测语音区域。" -#: autosub/cmdline_utils.py:828 +#: autosub/cmdline_utils.py:892 msgid "Error: Convert source audio to \"{name}\" failed." msgstr "错误:转换源音频至\"{name}\"失败。" -#: autosub/cmdline_utils.py:840 +#: autosub/cmdline_utils.py:903 msgid "" "\n" "\"{name}\" has been deleted." @@ -317,19 +327,19 @@ msgstr "" "\n" "\"{name}\"已被删除。" -#: autosub/cmdline_utils.py:844 +#: autosub/cmdline_utils.py:907 msgid "Error: Can't get speech regions." msgstr "错误:无法得到语音区域。" -#: autosub/cmdline_utils.py:877 autosub/cmdline_utils.py:1225 +#: autosub/cmdline_utils.py:936 autosub/cmdline_utils.py:1365 msgid "Times file created at \"{}\"." msgstr "时间轴文件创建在了\"{}\"." -#: autosub/cmdline_utils.py:901 +#: autosub/cmdline_utils.py:959 msgid "Error: Conversion failed." msgstr "错误:转换失败。" -#: autosub/cmdline_utils.py:905 +#: autosub/cmdline_utils.py:963 msgid "" "Audio processing complete.\n" "All works done." @@ -337,22 +347,22 @@ msgstr "" "音频处理完毕。\n" "做完了。" -#: autosub/cmdline_utils.py:951 +#: autosub/cmdline_utils.py:1008 msgid "Use the API key given in the option \"-skey\"/\"--speech-key\"." msgstr "使用选项\"-skey\"/\"--speech-key\"提供的API密钥。" -#: autosub/cmdline_utils.py:965 +#: autosub/cmdline_utils.py:1021 msgid "" "Set the GOOGLE_APPLICATION_CREDENTIALS given in the option \"-sa\"/\"--" "service-account\"." msgstr "" "设置选项\"-sa\"/\"--service-account\"提供的GOOGLE_APPLICATION_CREDENTIALS。" -#: autosub/cmdline_utils.py:979 +#: autosub/cmdline_utils.py:1034 msgid "Use the GOOGLE_APPLICATION_CREDENTIALS in the environment variables." msgstr "使用环境变量中的GOOGLE_APPLICATION_CREDENTIALS。" -#: autosub/cmdline_utils.py:991 +#: autosub/cmdline_utils.py:1045 msgid "" "No available GOOGLE_APPLICATION_CREDENTIALS. Use \"-sa\"/\"--service-account" "\" to set one." @@ -360,7 +370,7 @@ msgstr "" "没有可用的GOOGLE_APPLICATION_CREDENTIALS。使用选项\"-sa\"/\"--service-account" "\"来设置一个。" -#: autosub/cmdline_utils.py:999 +#: autosub/cmdline_utils.py:1053 msgid "" "Error: Speech-to-text failed.\n" "All works done." @@ -368,11 +378,11 @@ msgstr "" "错误:语音转文字失败。\n" "做完了。" -#: autosub/cmdline_utils.py:1037 autosub/cmdline_utils.py:1191 +#: autosub/cmdline_utils.py:1087 autosub/cmdline_utils.py:1335 msgid "Speech language subtitles file created at \"{}\"." msgstr "语音字幕文件创建在了\"{}\"。" -#: autosub/cmdline_utils.py:1158 +#: autosub/cmdline_utils.py:1307 msgid "" "Override \"-of\"/\"--output-files\" due to your args too few.\n" "Output source subtitles file only." @@ -380,7 +390,7 @@ msgstr "" "因为你其他参数输入得太少了,忽略\"-of\"/\"--output-files\"参数。\n" "只输出源语言字幕文件。" -#: autosub/cmdline_utils.py:1196 +#: autosub/cmdline_utils.py:1340 msgid "" "Override \"-of\"/\"--output-files\" due to your args too few.\n" "Output regions subtitles file only." diff --git a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.core.mo b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.core.mo index 674a4784dbc4231b607a409ff49a7b7027f17eee..bd0e233d4b414501e9009b95692a22b9e16f4190 100644 GIT binary patch delta 242 zcmWm7F-rn*9LDkIf8BLMof<@U2nT_KgOL6U9HKzMvWP|p2Zh#X@f9@Z;>o=N*Bt*# z;45frZE<3a6Gc4m0i`^eAS$uAIys zF8UUn1(%Kv&MxBOH@m~}xjXLR2G#5F+t$^bq)kT}!$Vxd6I{V3+`=!cVK^jBVh4Bd z2&?#rbtAptD)YdV_Hn_J+L+@!zTyPFW10K(w5$frH;WMcinNVgoW%#6#%El_H!Op{ zcRzJgxv!a-auRPfOpO-nX3Dt7d3IhLI5(!f7i{Edub-Wr2CZH{P5j74Hn3q3HT`fY dX~qp3TRXDb3A*WJcJa?$?1xce3(x&<{s8k#FY^EZ diff --git a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.core.po b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.core.po index e3433c69..648d61ba 100644 --- a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.core.po +++ b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.core.po @@ -7,8 +7,8 @@ msgid "" msgstr "" "Project-Id-Version: \n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2020-01-23 15:38+0800\n" -"PO-Revision-Date: 2020-01-28 12:01+0800\n" +"POT-Creation-Date: 2020-01-31 12:02+0800\n" +"PO-Revision-Date: 2020-01-31 12:11+0800\n" "Last-Translator: \n" "Language-Team: \n" "MIME-Version: 1.0\n" @@ -17,7 +17,7 @@ msgstr "" "Language: zh_CN\n" "X-Generator: Poedit 2.2.4\n" -#: autosub/core.py:102 +#: autosub/core.py:100 msgid "" "\n" "Converting speech regions to short-term fragments." @@ -25,11 +25,11 @@ msgstr "" "\n" "按照语音区域将音频转换为多个短语音片段。" -#: autosub/core.py:103 +#: autosub/core.py:101 msgid "Converting: " msgstr "转换中: " -#: autosub/core.py:149 +#: autosub/core.py:146 msgid "" "\n" "Sending short-term fragments to Google Speech V2 API and getting result." @@ -37,11 +37,11 @@ msgstr "" "\n" "将短片段语音发送给Google Speech V2 API并得到识别结果。" -#: autosub/core.py:150 autosub/core.py:201 +#: autosub/core.py:147 autosub/core.py:197 msgid "Speech-to-Text: " msgstr "语音转文字中: " -#: autosub/core.py:171 autosub/core.py:300 +#: autosub/core.py:168 autosub/core.py:296 msgid "" "Error: Connection error happened too many times.\n" "All works done." @@ -49,7 +49,7 @@ msgstr "" "错误:过多连接错误。\n" "做完了。" -#: autosub/core.py:199 +#: autosub/core.py:195 msgid "" "\n" "Sending short-term fragments to Google Cloud Speech V1P1Beta1 API and " @@ -58,11 +58,11 @@ msgstr "" "\n" "将短片段语音发送给Google Cloud Speech V1P1Beta1 API并得到识别结果。" -#: autosub/core.py:308 +#: autosub/core.py:304 msgid "Receive something unexpected:" msgstr "收到未预期数据:" -#: autosub/core.py:337 autosub/core.py:388 +#: autosub/core.py:332 autosub/core.py:382 msgid "" "\n" "Translating text from \"{0}\" to \"{1}\"." @@ -70,24 +70,24 @@ msgstr "" "\n" "从\"{0}\"翻译为\"{1}\"。" -#: autosub/core.py:347 autosub/core.py:438 +#: autosub/core.py:342 autosub/core.py:432 msgid "Translation: " msgstr "翻译中: " -#: autosub/core.py:366 autosub/core.py:499 -msgid "Cancelling transcription." +#: autosub/core.py:493 +msgid "Cancelling translation." msgstr "取消翻译。" -#: autosub/core.py:580 autosub/core.py:647 +#: autosub/core.py:569 autosub/core.py:631 msgid "Format \"{fmt}\" not supported. Using \"{default_fmt}\" instead." msgstr "不支持格式\"{fmt}\"。改用\"{default_fmt}\"。" -#: autosub/core.py:676 +#: autosub/core.py:704 msgid "" "There is already a file with the same name in this location: \"{dest_name}\"." msgstr "在此处已有同名文件:\"{dest_name}\"。" -#: autosub/core.py:679 +#: autosub/core.py:707 msgid "Input a new path (including directory and file name) for output file.\n" msgstr "为输出文件输入一个新的路径(包括路径和文件名)。\n" diff --git a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.ffmpeg_utils.mo b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.ffmpeg_utils.mo index 1de34516b57300b24ac8acead02e721eceeaa12f..7fca9a923f09ba0901f03372ed389b00a613f0c3 100644 GIT binary patch delta 27 icmX@dbB0{U1LK9LnA8#qs{9XD;WW54hK2_ delta 27 icmZ3%wt{WLLPkDAOI-tVT_XzxLlY}gtqc@P0Ma1yTbLObOc@xQ7$yN3@<4$T zKw26|zW~x2K$?XGVz4fd1{oLwq@95D1R%{1q%Q+$pm_|O6E_NrasU~uKn&E&zz)Pf zIR>tYH}~l~mZoIpD-;x^>J}8`C#Mz{XXd3Vq$OtNq^9U`g%+nOq@<=LmgbZw$%XPnWbj qpV0Pn$;KC3mOgH1;(D@o|MMv;UhbapV%?Hw({?`FJazI5=57E3l2Ri8 delta 257 zcmaFN`I)2so)F7a1|Z-BVi_P#0b*VtUIWA+@BoMffcPO08w2qtAXWlmHAV&oIUwx` zr1gMw29O5HPY2R2K>9S0<^$3KObiUlK(;)P2AS*3%)np@qyvGpJOhIfLpzWm4HQ@o zq&0x_VIU1M=nar|0@4aB3=I50IvGd<&0*k}cu-iB9mrq>VxSr@0Ln3NPJFp<@(srG t97YxjhK5$A7L(UAN%0w4>Kd5qf`kmLjLjxrXIjN*Jh_Q^&E!~?HULg`B60u# diff --git a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.options.mo b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.options.mo index bffb52f23c42ec5b707914e38d72531a27c3da97..548443e3d40ed8728f80e653c2c69b057519d9e5 100644 GIT binary patch delta 2452 zcmZ|Pc~F#f7{KudOV~;iQSmHZ#2eWWG!HZjFGMs^JUVo8ftM8{PF# z21wIM2UEmxDFrqA!?9zUW^!yA9jmc+7ch;nn*K1ezQ2865odB|*w62Jp5Muy`Z(nC z=8&o;PiTn9s&J7B=KM4Jhm_%!wH z_#7*)!ZXw_4i#C0MKL1jcnj&0BxYA;q8h&w6R4jVE)s{op~^R8gvd3F^-x$x!8TIl zJZ?r+PaGvui^otM<=oLC5hT2eTd`Lx3&parBHz+qL-waI*Dex<8!;3MQMDIabpxNE zUW1RL=R*oM3P-GllgKs6DO7>qph|QVpTz5^0&k*My=|Hf^& zEuKZ-KUjr3ITkkRk;w5PKhaQvYT)V#BGXCOifW}JCYmQP4(q6YjkD;_nZ%>S^1~6f$tk zT#gdU(Sd(rDhZdRm@7Y+D)KDt?Kl8!^G%~I6D`v*n*Ll|g%wzU9oPdi7MOYR(W4qF zC|qR4wRl+#;5gReq%@J&u^wa5wuqhK1SR7$k;9SQi%l<~$``@E#9tVNg&4U+kzK8^V6HsK;wG%<^5(- zO5$&%)yKU^im@OxCM3;R7aH4JO~npmHh<&zd%?{*RDP2 zOjKnp*OBL3?{MmxBQHyhe$bGUr@2?_n#=K~uIU@}Jhxp-VvGvPacPd#x+6==&sH(^ zF4H7x^B>Bh(mpiLBhsp%u4##}YojK5FGd%1(`GvIv@9+^* zY_{?7T6{v{xCA5HuHCOp_Ey-}^o(e0Y-r!_ZEM`+FFD$J#t7`I^WK~KM|dO){GU@c zI_9T#y?~ChW$etl0xE&h!a!YV$Jx>c;T>mdT5FF7J}73Uw)#T^sNs^i@GaZB`#ZjF(InuCpy;rI541HM_E(A#78xeCMteJ^~9T_%mh2o zkGK}=@i=~lckm#75G_)UOJYQ*l7p6=m}3+95Ywo?#jFzX5x1GI2(J?F@lsHU-LWEH z;(1hYxEz!VGh!8#2w6D8^pe=WrwKZI5y&@!D!u zhWbTo%tIJ~n~4)Koc1QHz|(6*_{dDQxwTO_>aeo^*%V4>*ovw{Hx3dT9QntIpIB#} z>i+enLpVwM7^*|nnQQhN#C#6p*LfmI^jo}<^Fe#gCXqV)-EtSRQ*X%?^f12sL}4Fh zZ(%9%0`5iMts;dad=|Myi~AEVQh_DB6!>HTGizN|8BmBdR>j=v6c6 zr|>fl;dN{+7CC~uw~0KB6Bv(iEEC%-6`05qo{YNM~wZq)=U|#AgBrdDq{l{gyL~hd_zuPqSv}{z>0^Fc?eti-xWxyO8?EbQq4f}_^NZS zF4kF!^kRFAZG&EKj}LUs*NU+5lA_A`ustkPt(cm0ma5^ZSnZMlpvfRS|fv`&RCox3dMK}1k^Y?4dH+pIkw7R37hK2(*9mqqe!T7iZsS)e6c>wrTq1?MWd6ZL>XH%y4?XZLYav?#(g%R;Byx g$TFLr9J~I1Q*^G9+1CcAK0T%X84+wG@91{^3nx)6r~m)} diff --git a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.options.po b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.options.po index 715ad9c4..4badcfb3 100644 --- a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.options.po +++ b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.options.po @@ -7,8 +7,8 @@ msgid "" msgstr "" "Project-Id-Version: \n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2020-01-28 11:58+0800\n" -"PO-Revision-Date: 2020-01-28 12:04+0800\n" +"POT-Creation-Date: 2020-01-31 12:02+0800\n" +"PO-Revision-Date: 2020-01-31 12:08+0800\n" "Last-Translator: \n" "Language-Team: \n" "MIME-Version: 1.0\n" @@ -48,55 +48,55 @@ msgstr "" "Email: {email}\n" "问题反馈: https://github.com/agermanidis/autosub\n" -#: autosub/options.py:61 +#: autosub/options.py:60 msgid "Input Options" msgstr "输入选项" -#: autosub/options.py:62 +#: autosub/options.py:61 msgid "Options to control input." msgstr "控制输入的选项。" -#: autosub/options.py:64 +#: autosub/options.py:63 msgid "Language Options" msgstr "语言选项" -#: autosub/options.py:65 +#: autosub/options.py:64 msgid "Options to control language." msgstr "控制语言的选项。" -#: autosub/options.py:67 +#: autosub/options.py:66 msgid "Output Options" msgstr "输出选项" -#: autosub/options.py:68 +#: autosub/options.py:67 msgid "Options to control output." msgstr "控制输出的选项。" -#: autosub/options.py:70 +#: autosub/options.py:69 msgid "Speech Options" msgstr "语音选项" -#: autosub/options.py:71 +#: autosub/options.py:70 msgid "" "Options to control speech-to-text. If Speech Options not given, it will only " "generate the times." msgstr "控制语音转文字的选项。" -#: autosub/options.py:74 +#: autosub/options.py:73 msgid "py-googletrans Options" msgstr "py-googletrans选项" -#: autosub/options.py:75 +#: autosub/options.py:74 msgid "" "Options to control translation. Default method to translate. Could be " "blocked at any time." msgstr "控制翻译的选项。同时也是默认的翻译方法。可能随时会被谷歌爸爸封。" -#: autosub/options.py:79 +#: autosub/options.py:78 msgid "Google Translate V2 Options" msgstr "Google Translate V2选项" -#: autosub/options.py:80 +#: autosub/options.py:79 msgid "" "Options to control translation.(Not been tested) If the API key is given, it " "will replace the py-googletrans method." @@ -104,53 +104,53 @@ msgstr "" "控制翻译的选项。(未被测试过)如果提供了API key,它会取代py-googletrans的翻译" "方法。" -#: autosub/options.py:84 +#: autosub/options.py:83 msgid "Network Options" msgstr "网络选项" -#: autosub/options.py:85 +#: autosub/options.py:84 msgid "Options to control network." msgstr "控制网络的选项。" -#: autosub/options.py:87 +#: autosub/options.py:86 msgid "Other Options" msgstr "其他选项" -#: autosub/options.py:88 +#: autosub/options.py:87 msgid "Other options to control." msgstr "控制其他东西的选项。" -#: autosub/options.py:90 +#: autosub/options.py:89 msgid "Audio Processing Options" msgstr "音频处理选项" -#: autosub/options.py:91 +#: autosub/options.py:90 msgid "Options to control audio processing." msgstr "控制音频处理的选项。" -#: autosub/options.py:93 +#: autosub/options.py:92 msgid "Auditok Options" msgstr "Auditok的选项" -#: autosub/options.py:94 +#: autosub/options.py:93 msgid "" "Options to control Auditok when not using external speech regions control." msgstr "不使用外部语音区域控制时,用于控制Auditok的选项。" -#: autosub/options.py:97 +#: autosub/options.py:96 msgid "List Options" msgstr "列表选项" -#: autosub/options.py:98 +#: autosub/options.py:97 msgid "List all available arguments." msgstr "列出所有可选参数。" -#: autosub/options.py:102 autosub/options.py:112 autosub/options.py:122 -#: autosub/options.py:211 autosub/options.py:422 autosub/options.py:638 +#: autosub/options.py:101 autosub/options.py:110 autosub/options.py:119 +#: autosub/options.py:201 autosub/options.py:410 autosub/options.py:607 msgid "path" msgstr "路径" -#: autosub/options.py:103 +#: autosub/options.py:102 msgid "" "The path to the video/audio/subtitles file that needs to generate subtitles. " "When it is a subtitles file, the program will only translate it. (arg_num = " @@ -159,7 +159,7 @@ msgstr "" "用于生成字幕文件的视频/音频/字幕文件。如果输入文件是字幕文件,程序仅会对其进" "行翻译。(参数个数为1)" -#: autosub/options.py:113 +#: autosub/options.py:111 msgid "" "Path to the subtitles file which provides external speech regions, which is " "one of the formats that pysubs2 supports and overrides the default method to " @@ -168,7 +168,7 @@ msgstr "" "提供外部语音区域(时间轴)的字幕文件。该字幕文件格式需要是pysubs2所支持的。使" "用后会替换掉默认的自动寻找语音区域(时间轴)的功能。(参数个数为1)" -#: autosub/options.py:124 +#: autosub/options.py:121 msgid "" "Valid when your output format is \"ass\"/\"ssa\". Path to the subtitles file " "which provides \"ass\"/\"ssa\" styles for your output. If the arg_num is 0, " @@ -179,11 +179,11 @@ msgstr "" "幕文件。如果不提供参数,它会使用来自\"-esr\"/\"--external-speech-regions\"选" "项提供的样式。更多信息详见\"-sn\"/\"--styles-name\"。(参数个数为0或1)" -#: autosub/options.py:136 +#: autosub/options.py:132 msgid "style-name" msgstr "样式名" -#: autosub/options.py:137 +#: autosub/options.py:133 msgid "" "Valid when your output format is \"ass\"/\"ssa\" and \"-sty\"/\"--styles\" " "is given. Adds \"ass\"/\"ssa\" styles to your events. If not provided, " @@ -198,12 +198,12 @@ msgstr "" "数作为样式名。如果参数个数为2,源语言字幕行会使用第一个参数作为样式名。目标语" "言行使用第二个。(参数个数为1或2)" -#: autosub/options.py:151 autosub/options.py:163 autosub/options.py:174 -#: autosub/options.py:608 autosub/options.py:625 +#: autosub/options.py:146 autosub/options.py:157 autosub/options.py:167 +#: autosub/options.py:579 autosub/options.py:595 msgid "lang_code" msgstr "语言代码" -#: autosub/options.py:152 +#: autosub/options.py:147 #, python-format msgid "" "Lang code/Lang tag for speech-to-text. Recommend using the Google Cloud " @@ -215,7 +215,7 @@ msgstr "" "码。错误的输入不会终止程序。但是后果自负。参考:https://cloud.google.com/" "speech-to-text/docs/languages(参数个数为1)(默认参数: %(default)s)" -#: autosub/options.py:164 +#: autosub/options.py:158 #, python-format msgid "" "Lang code/Lang tag for translation source language. If not given, use " @@ -228,7 +228,7 @@ msgstr "" "googletrans作为翻译的方法,错误的输入会终止运行。(参数个数为1)(默认参数" "为%(default)s)" -#: autosub/options.py:175 +#: autosub/options.py:168 #, python-format msgid "" "Lang code/Lang tag for translation destination language. Same attention in " @@ -237,11 +237,11 @@ msgstr "" "用于翻译的目标语言的语言代码/语言标识符。同样的注意参考选项\"-SRC\"/\"--src-" "language\"。(参数个数为1)(默认参数为%(default)s)" -#: autosub/options.py:182 autosub/options.py:434 +#: autosub/options.py:174 autosub/options.py:421 msgid "mode" msgstr "模式" -#: autosub/options.py:184 +#: autosub/options.py:176 msgid "" "Allow langcodes-py2 to get a best matching lang code when your input is " "wrong. Only functional for py-googletrans and Google Speech V2. Available " @@ -254,7 +254,7 @@ msgstr "" "\"指\"-S\"/\"--speech-language\"。\"src\"指\"-SRC\"/\"--src-language\"。\"d" "\"指\"-D\"/\"--dst-language\"。(参数个数在1到3之间)" -#: autosub/options.py:199 +#: autosub/options.py:190 msgid "" "An integer between 0 and 100 to control the good match group of \"-lsc\"/\"--" "list-speech-codes\" or \"-ltc\"/\"--list-translation-codes\" or the match " @@ -266,7 +266,7 @@ msgstr "" "best-match\"选项中的最佳匹配结果。结果会是一组“好的匹配”,其分数需要超过这个" "参数的值。(参数个数为1)" -#: autosub/options.py:212 +#: autosub/options.py:202 msgid "" "The output path for subtitles file. (default: the \"input\" path combined " "with the proper name tails) (arg_num = 1)" @@ -274,11 +274,11 @@ msgstr "" "输出字幕文件的路径。(默认值是\"input\"路径和适当的文件名后缀的结合)(参数个" "数为1)" -#: autosub/options.py:219 +#: autosub/options.py:208 msgid "format" msgstr "格式" -#: autosub/options.py:220 +#: autosub/options.py:209 msgid "" "Destination subtitles format. If not provided, use the extension in the \"-o" "\"/\"--output\" arg. If \"-o\"/\"--output\" arg doesn't provide the " @@ -291,28 +291,33 @@ msgstr "" "果\"-i\"/\"--input\"的参数是一个字幕文件,那么使用和字幕文件相同的扩展名。" "(参数个数为1)(默认参数为{dft})" -#: autosub/options.py:234 +#: autosub/options.py:222 msgid "" "Prevent pauses and allow files to be overwritten. Stop the program when your " "args are wrong. (arg_num = 0)" msgstr "" "避免任何暂停和覆写文件的行为。如果参数有误,会直接停止程序。(参数个数为0)" -#: autosub/options.py:240 +#: autosub/options.py:227 msgid "type" msgstr "种类" -#: autosub/options.py:243 +#: autosub/options.py:230 #, python-format msgid "" -"Output more files. Available types: regions, src, dst, bilingual, all. (4 >= " -"arg_num >= 1) (default: %(default)s)" +"Output more files. Available types: regions, src, dst, bilingual, dst-lf-" +"src, src-lf-dst, all. dst-lf-src: dst language and src language in the same " +"event. And dst is ahead of src. src-lf-dst: src language and dst language in " +"the same event. And src is ahead of dst. (6 >= arg_num >= 1) (default: " +"%(default)s)" msgstr "" -"输出更多的文件。可选种类:regions, src, dst, bilingual, all.(时间轴,源语言" -"字幕,目标语言字幕,双语字幕,所有)(参数个数在4和1之间)(默认参数" -"为%(default)s)" +"输出更多的文件。可选种类:regions, src, dst, bilingual, dst-lf-src, src-lf-" +"dst, all.(时间轴,源语言字幕,目标语言字幕,双语字幕,dst-lf-src,,src-lf-" +"dst,所有)dst-lf-src:目标语言和源语言在同一字幕行中,且目标语言先于源语言。" +"src-lf-dst:源语言和目标语言在同一字幕行中,且源语言先于目标语言。(参数个数" +"在6和1之间)(默认参数为%(default)s)" -#: autosub/options.py:253 +#: autosub/options.py:243 msgid "" "Valid when your output format is \"sub\". If input, it will override the fps " "check on the input file. Ref: https://pysubs2.readthedocs.io/en/latest/api-" @@ -322,18 +327,14 @@ msgstr "" "查。参考:https://pysubs2.readthedocs.io/en/latest/api-reference." "html#supported-input-output-formats(参数个数为1)" -#: autosub/options.py:264 -msgid "Drop any regions without text. (arg_num = 0)" -msgstr "删除所有没有文本的空轴。(参数个数为0)" - -#: autosub/options.py:270 +#: autosub/options.py:252 msgid "API_code" msgstr "API代码" -#: autosub/options.py:272 +#: autosub/options.py:254 #, python-format msgid "" -"Choose which Speech-to-Text API to use. Currently supported: gsv2: Google " +"Choose which Speech-to-Text API to use. Currently support: gsv2: Google " "Speech V2 (https://github.com/gillesdemey/google-speech-v2). gcsv1: Google " "Cloud Speech-to-Text V1P1Beta1 (https://cloud.google.com/speech-to-text/" "docs). (arg_num = 1) (default: %(default)s)" @@ -343,17 +344,17 @@ msgstr "" "Text V1P1Beta1 (https://cloud.google.com/speech-to-text/docs)。(参数个数为" "1)(默认参数为%(default)s)" -#: autosub/options.py:282 +#: autosub/options.py:264 msgid "" -"The API key for Speech-to-Text API. (arg_num = 1) Currently supported: gsv2: " +"The API key for Speech-to-Text API. (arg_num = 1) Currently support: gsv2: " "The API key for gsv2. (default: Free API key) gcsv1: The API key for gcsv1. " -"(Can be overridden by \"-sa\"/\"--service-account\")" +"(If used, override the credentials given by\"-sa\"/\"--service-account\")" msgstr "" -"API key for Speech-to-Text API. (arg_num = 1) Currently supported: gsv2: The " -"API key for gsv2. (default: Free API key) gcsv1: The API key for gcsv1. (Can " -"be overridden by \"-sa\"/\"--service-account\")" +"Speech-to-Text API的密钥。(参数个数为1)当前支持:gsv2:gsv2的API密钥。(默" +"认参数为免费API密钥)gcsv1:gcsv1的API密钥。(如果使用了,可以覆盖 \"-sa\"/" +"\"--service-account\"提供的服务账号凭据)" -#: autosub/options.py:294 +#: autosub/options.py:291 #, python-format msgid "" "API response for text confidence. A float value between 0 and 1. Confidence " @@ -366,7 +367,11 @@ msgstr "" "com/BingLingGroup/google-speech-v2#response(参数个数为1)(默认参数" "为%(default)s)" -#: autosub/options.py:307 +#: autosub/options.py:301 +msgid "Drop any regions without speech recognition result. (arg_num = 0)" +msgstr "删除所有没有语音识别结果的空轴。(参数个数为0)" + +#: autosub/options.py:309 #, python-format msgid "" "Number of concurrent Speech-to-Text requests to make. (arg_num = 1) " @@ -374,12 +379,12 @@ msgid "" msgstr "" "用于Speech-to-Text请求的并行数量。(参数个数为1)(默认参数为%(default)s)" -#: autosub/options.py:313 autosub/options.py:552 autosub/options.py:562 -#: autosub/options.py:572 +#: autosub/options.py:314 autosub/options.py:529 autosub/options.py:538 +#: autosub/options.py:547 msgid "second" msgstr "秒" -#: autosub/options.py:316 +#: autosub/options.py:317 #, python-format msgid "" "(Experimental)Seconds to sleep between two translation requests. (arg_num = " @@ -396,14 +401,14 @@ msgstr "" "(实验性)自定义多个请求URL。参考:https://py-googletrans.readthedocs.io/en/" "latest/(参数个数大于等于1)" -#: autosub/options.py:333 +#: autosub/options.py:332 msgid "" "(Experimental)Customize User-Agent headers. Same docs above. (arg_num = 1)" msgstr "" "(实验性)自定义用户代理(User-Agent)头部。同样的参考文档如上。(参数个数为" "1)" -#: autosub/options.py:341 +#: autosub/options.py:339 msgid "" "The Google Translate V2 API key to be used. If not provided, use free API " "(py-googletrans) instead. (arg_num = 1)" @@ -411,7 +416,7 @@ msgstr "" "用于Google Translate V2的API key。如果没有提供,则使用免费的API也就是py-" "googletrans。(参数个数为1)" -#: autosub/options.py:351 +#: autosub/options.py:348 #, python-format msgid "" "Number of lines per Google Translate V2 request. (arg_num = 1) (default: " @@ -419,7 +424,7 @@ msgid "" msgstr "" "Google Translate V2请求的每行行数。(参数个数为1)(默认参数为%(default)s)" -#: autosub/options.py:360 +#: autosub/options.py:356 #, python-format msgid "" "Number of concurrent Google translate V2 API requests to make. (arg_num = 1) " @@ -428,11 +433,11 @@ msgstr "" "Google translate V2 API请求的并行数量。(参数个数为1)(默认参数" "为%(default)s)" -#: autosub/options.py:368 +#: autosub/options.py:363 msgid "Change the Google Speech V2 API URL into the http one. (arg_num = 0)" msgstr "将Google Speech V2 API的URL改为http类型。(参数个数为0)" -#: autosub/options.py:377 +#: autosub/options.py:371 #, python-format msgid "" "Add https proxy by setting environment variables. If arg_num is 0, use const " @@ -441,7 +446,7 @@ msgstr "" "通过设置环境变量的方式添加https代理。如果参数个数是0,使用const里的代理URL。" "(参数个数为0或1)(const为%(const)s)" -#: autosub/options.py:386 +#: autosub/options.py:379 #, python-format msgid "" "Add http proxy by setting environment variables. If arg_num is 0, use const " @@ -450,46 +455,46 @@ msgstr "" "通过设置环境变量的方式添加http代理。如果参数个数是0,使用const里的代理URL。" "(参数个数为0或1)(const为%(const)s)" -#: autosub/options.py:393 +#: autosub/options.py:385 msgid "username" msgstr "用户名" -#: autosub/options.py:394 +#: autosub/options.py:386 msgid "Set proxy username. (arg_num = 1)" msgstr "设置代理用户名。(参数个数为1)" -#: autosub/options.py:400 +#: autosub/options.py:391 msgid "password" msgstr "密码" -#: autosub/options.py:401 +#: autosub/options.py:392 msgid "Set proxy password. (arg_num = 1)" msgstr "设置代理密码。(参数个数为1)" -#: autosub/options.py:408 +#: autosub/options.py:398 #, python-format msgid "Show %(prog)s help message and exit. (arg_num = 0)" msgstr "显示%(prog)s的帮助信息并退出。(参数个数为0)" -#: autosub/options.py:417 +#: autosub/options.py:406 #, python-format msgid "Show %(prog)s version and exit. (arg_num = 0)" msgstr "显示%(prog)s的版本信息并退出。(参数个数为0)" -#: autosub/options.py:423 +#: autosub/options.py:411 msgid "" "Set service account key environment variable. It should be the file path of " -"the JSON file that contains your service account credentials. If used, " -"override the API key options. Ref: https://cloud.google.com/docs/" -"authentication/getting-started Currently supported: gcsv1 " -"(GOOGLE_APPLICATION_CREDENTIALS) (arg_num = 1)" +"the JSON file that contains your service account credentials. Can be " +"overridden by the API key. Ref: https://cloud.google.com/docs/authentication/" +"getting-started Currently support: gcsv1 (GOOGLE_APPLICATION_CREDENTIALS) " +"(arg_num = 1)" msgstr "" -"设置服务账号密钥的环境变量。应该是包含服务帐号密钥的 JSON 文件的文件路径。如" -"果使用了,会覆盖API密钥选项。参考:https://cloud.google.com/docs/" +"设置服务账号密钥的环境变量。应该是包含服务帐号凭据的JSON文件的文件路径。如果" +"使用了,会被API密钥选项覆盖。参考:https://cloud.google.com/docs/" "authentication/getting-started 当前支持:" "gcsv1(GOOGLE_APPLICATION_CREDENTIALS)(参数个数为1)" -#: autosub/options.py:435 +#: autosub/options.py:422 msgid "" "Option to control audio process. If not given the option, do normal " "conversion work. \"y\": pre-process the input first then start normal " @@ -508,15 +513,15 @@ msgstr "" "scripts/subgen.sh https://ffmpeg.org/ffmpeg-filters.html)(参数个数介于1和2" "之间)" -#: autosub/options.py:460 +#: autosub/options.py:446 msgid "Keep audio processing files to the output path. (arg_num = 0)" msgstr "将音频处理中产生的文件放在输出路径中。(参数个数为0)" -#: autosub/options.py:466 autosub/options.py:486 autosub/options.py:501 +#: autosub/options.py:451 autosub/options.py:469 autosub/options.py:483 msgid "command" msgstr "命令" -#: autosub/options.py:467 +#: autosub/options.py:452 msgid "" "This arg will override the default audio pre-process command. Every line of " "the commands need to be in quotes. Input file name is {in_}. Output file " @@ -525,7 +530,7 @@ msgstr "" "这个参数会取代默认的音频预处理命令。每行命令需要放在一个引号内。输入文件名写" "为{in_}。输出文件名写为{out_}。(参数个数大于1)" -#: autosub/options.py:480 +#: autosub/options.py:464 #, python-format msgid "" "Number of concurrent ffmpeg audio split process to make. (arg_num = 1) " @@ -533,7 +538,7 @@ msgid "" msgstr "" "用于ffmpeg音频切割的进程并行数量。(参数个数为1)(默认参数为%(default)s)" -#: autosub/options.py:487 +#: autosub/options.py:470 msgid "" "(Experimental)This arg will override the default audio conversion command. " "\"[\", \"]\" are optional arguments meaning you can remove them. \"{{\", " @@ -544,7 +549,7 @@ msgstr "" "除。\"{{\", \"}}\"是必选参数,不可移除。以下是用于处理音频的默认命令:{dft}" "(默认参数为1)" -#: autosub/options.py:502 +#: autosub/options.py:484 msgid "" "(Experimental)This arg will override the default audio split command. Same " "attention above. Default: {dft} (arg_num = 1)" @@ -552,11 +557,11 @@ msgstr "" "(实验性)这个参数会取代默认的音频转换命令。相同的注意如上。默认:{dft}(参数" "个数为1)" -#: autosub/options.py:512 +#: autosub/options.py:493 msgid "file_suffix" msgstr "文件名后缀" -#: autosub/options.py:514 +#: autosub/options.py:495 #, python-format msgid "" "(Experimental)This arg will override the default API audio suffix. (arg_num " @@ -565,11 +570,11 @@ msgstr "" "(实验性)这个参数会取代默认的给API使用的音频文件后缀。(默认参数" "为%(default)s)" -#: autosub/options.py:521 +#: autosub/options.py:501 msgid "sample_rate" msgstr "采样率" -#: autosub/options.py:524 +#: autosub/options.py:504 #, python-format msgid "" "(Experimental)This arg will override the default API audio sample rate(Hz). " @@ -578,11 +583,11 @@ msgstr "" "(实验性)这个参数会取代默认的给API使用的音频采样率(赫兹)。(参数个数为1)" "(默认参数为%(default)s)" -#: autosub/options.py:531 +#: autosub/options.py:510 msgid "channel_num" msgstr "声道数" -#: autosub/options.py:534 +#: autosub/options.py:513 #, python-format msgid "" "(Experimental)This arg will override the default API audio channel. (arg_num " @@ -591,11 +596,11 @@ msgstr "" "(实验性)这个参数会取代默认的给API使用的音频声道数量。(参数个数为1)(默认" "参数为%(default)s)" -#: autosub/options.py:541 +#: autosub/options.py:519 msgid "energy" msgstr "能量(相对值)" -#: autosub/options.py:544 +#: autosub/options.py:522 #, python-format msgid "" "The energy level which determines the region to be detected. Ref: https://" @@ -606,7 +611,7 @@ msgstr "" "latest/apitutorial.html#examples-using-real-audio-data(参数个数为1)(默认参" "数为%(default)s)" -#: autosub/options.py:555 +#: autosub/options.py:532 #, python-format msgid "" "Minimum region size. Same docs above. (arg_num = 1) (default: %(default)s)" @@ -614,7 +619,7 @@ msgstr "" "最小语音区域大小。同样的参考文档如上。(参数个数为1)(默认参数" "为%(default)s)" -#: autosub/options.py:565 +#: autosub/options.py:541 #, python-format msgid "" "Maximum region size. Same docs above. (arg_num = 1) (default: %(default)s)" @@ -622,7 +627,7 @@ msgstr "" "最大音频区域大小。同样的参考文档如上。(参数个数为1)(默认参数" "为%(default)s)" -#: autosub/options.py:575 +#: autosub/options.py:550 #, python-format msgid "" "Maximum length of a tolerated silence within a valid audio activity. Same " @@ -631,7 +636,7 @@ msgstr "" "在一段有效的音频活动区域中可以容忍的最大(连续)安静区域。同样的参考文档如" "上。(参数个数为1)(默认参数为%(default)s)" -#: autosub/options.py:583 autosub/options.py:590 +#: autosub/options.py:557 autosub/options.py:563 msgid "" "Ref: https://auditok.readthedocs.io/en/latest/core.html#class-summary " "(arg_num = 0)" @@ -639,7 +644,7 @@ msgstr "" "参考:https://auditok.readthedocs.io/en/latest/core.html#class-summary(参数" "个数为0)" -#: autosub/options.py:597 +#: autosub/options.py:569 msgid "" "List all available subtitles formats. If your format is not supported, you " "can use ffmpeg or SubtitleEdit to convert the formats. You need to offer fps " @@ -650,7 +655,7 @@ msgstr "" "SubtitleEdit来对其进行转换。如果输出格式是\"sub\"且输入文件是音频无法获取到视" "频帧率时,你需要提供fps选项指定帧率。(参数个数为0)" -#: autosub/options.py:611 +#: autosub/options.py:582 msgid "" "List all recommended \"-S\"/\"--speech-language\" Google Speech-to-Text " "language codes. If no arg is given, list all. Or else will list get a group " @@ -668,7 +673,7 @@ msgstr "" "言)-扩展-私有(https://www.zhihu.com/question/21980689/answer/93615123)(参" "数个数为0或1)" -#: autosub/options.py:628 +#: autosub/options.py:598 msgid "" "List all available \"-SRC\"/\"--src-language\" py-googletrans translation " "language codes. Or else will list get a group of \"good match\" of the arg. " @@ -678,7 +683,7 @@ msgstr "" "言代码。否则会给出一个“好的匹配”的清单。同样的参考文档如上。(参数个数为0或" "1)" -#: autosub/options.py:639 +#: autosub/options.py:608 #, python-format msgid "" "Use py-googletrans to detect a sub file's first line language. And list a " @@ -691,6 +696,14 @@ msgstr "" "的)。参考:https://cloud.google.com/speech-to-text/docs/languages(参数个数" "为1)(默认参数 %(default)s)" +#~ msgid "" +#~ "Output more files. Available types: regions, src, dst, bilingual, all. (4 " +#~ ">= arg_num >= 1) (default: %(default)s)" +#~ msgstr "" +#~ "输出更多的文件。可选种类:regions, src, dst, bilingual, all.(时间轴,源语" +#~ "言字幕,目标语言字幕,双语字幕,所有)(参数个数在4和1之间)(默认参数" +#~ "为%(default)s)" + #~ msgid "" #~ "The Google Speech V2 API key to be used. If not provided, use free API " #~ "key instead.(arg_num = 1)" diff --git a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.po b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.po index 2e8c9954..20f982a3 100644 --- a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.po +++ b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.po @@ -7,17 +7,17 @@ msgid "" msgstr "" "Project-Id-Version: \n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2020-01-28 11:58+0800\n" -"PO-Revision-Date: 2019-07-28 10:36+0800\n" +"POT-Creation-Date: 2020-01-31 12:09+0800\n" +"PO-Revision-Date: 2020-01-31 12:09+0800\n" "Last-Translator: \n" "Language-Team: \n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" "Language: zh_CN\n" -"X-Generator: Poedit 2.2.3\n" +"X-Generator: Poedit 2.2.4\n" -#: autosub/__init__.py:54 autosub/__init__.py:173 +#: autosub/__init__.py:54 autosub/__init__.py:167 msgid "" "\n" "All works done." @@ -29,7 +29,7 @@ msgstr "" msgid "Error: Dependency ffmpeg not found on this machine." msgstr "错误:依赖ffmpeg未在这台电脑上找到。" -#: autosub/__init__.py:86 +#: autosub/__init__.py:85 msgid "" "Error: The args of \"-ap\"/\"--audio-process\" are wrong.\n" "No works done." @@ -37,11 +37,11 @@ msgstr "" "错误:\"-ap\"/\"--audio-process\"的参数有误。\n" "啥都没做。" -#: autosub/__init__.py:101 +#: autosub/__init__.py:98 msgid "No works done." msgstr "啥都没做。" -#: autosub/__init__.py:106 +#: autosub/__init__.py:102 msgid "" "Audio pre-processing complete.\n" "All works done." @@ -49,18 +49,19 @@ msgstr "" "音频预处理已完成。\n" "做完了。" -#: autosub/__init__.py:125 -#, fuzzy +#: autosub/__init__.py:119 msgid "" "Audio pre-processing failed.\n" -"Use default method" -msgstr "音频预处理已完成。" +"Use default method." +msgstr "" +"音频预处理失败。\n" +"使用默认方法。" -#: autosub/__init__.py:129 +#: autosub/__init__.py:123 msgid "Audio pre-processing complete." msgstr "音频预处理已完成。" -#: autosub/__init__.py:164 +#: autosub/__init__.py:158 msgid "" "\n" "KeyboardInterrupt. Works stopped." @@ -68,7 +69,7 @@ msgstr "" "\n" "键盘中断。操作终止。" -#: autosub/__init__.py:167 +#: autosub/__init__.py:161 msgid "" "\n" "Error: pysubs2.exceptions. Check your file format." diff --git a/autosub/ffmpeg_utils.py b/autosub/ffmpeg_utils.py index 43577b73..9c61a45f 100644 --- a/autosub/ffmpeg_utils.py +++ b/autosub/ffmpeg_utils.py @@ -96,14 +96,12 @@ def __call__(self, region): except subprocess.CalledProcessError: raise exceptions.AutosubException( _("Error: ffmpeg can't split your file. " - "Check your audio processing options.") - ) + "Check your audio processing options.")) def ffprobe_get_fps( # pylint: disable=superfluous-parens video_file, - input_m=input -): + input_m=input): """ Return video_file's fps. """ @@ -208,8 +206,7 @@ def audio_pre_prcs( # pylint: disable=too-many-arguments, too-many-branches cmds, output_name=None, input_m=input, - ffmpeg_cmd="ffmpeg" -): + ffmpeg_cmd="ffmpeg"): """ Pre-process audio file. """ diff --git a/autosub/lang_code_utils.py b/autosub/lang_code_utils.py index 6b9d97ce..5760d6ef 100644 --- a/autosub/lang_code_utils.py +++ b/autosub/lang_code_utils.py @@ -81,8 +81,7 @@ def better_match(desired_language, def wjust( str_just, length, - is_left=True -): + is_left=True): """ Use wcwidth to just string. """ @@ -125,8 +124,7 @@ def match_print( match_scores = better_match( desired_language=dsr_lang, supported_languages=match_list, - min_score=min_score - )[0] + min_score=min_score)[0] if match_scores[0][0] == 'und': print(_("No lang codes been matched.")) return None diff --git a/autosub/metadata.py b/autosub/metadata.py index 1ebb98ac..6e67ab92 100644 --- a/autosub/metadata.py +++ b/autosub/metadata.py @@ -21,8 +21,7 @@ 'by using ffmpeg, ' 'transcribe speech based on Chrome-Web-Speech-api and ' 'translate the subtitles\' text by using py-googletrans. ' - 'It currently not supports the latest Google Cloud APIs.') -) + 'It currently not supports the latest Google Cloud APIs.')) AUTHOR = 'Anastasis Germanidis' AUTHOR_EMAIL = 'agermanidis@gmail.com' HOMEPAGE = 'https://github.com/agermanidis/autosub' diff --git a/autosub/options.py b/autosub/options.py index ad700bfc..4372c726 100644 --- a/autosub/options.py +++ b/autosub/options.py @@ -54,8 +54,7 @@ def get_cmd_args(): # pylint: disable=too-many-statements author=metadata.AUTHOR, email=metadata.AUTHOR_EMAIL), add_help=False, - formatter_class=argparse.RawDescriptionHelpFormatter - ) + formatter_class=argparse.RawDescriptionHelpFormatter) input_group = parser.add_argument_group( _('Input Options'), @@ -104,8 +103,7 @@ def get_cmd_args(): # pylint: disable=too-many-statements "that needs to generate subtitles. " "When it is a subtitles file, " "the program will only translate it. " - "(arg_num = 1)") - ) + "(arg_num = 1)")) input_group.add_argument( '-er', '--ext-regions', @@ -114,8 +112,7 @@ def get_cmd_args(): # pylint: disable=too-many-statements "which provides external speech regions, " "which is one of the formats that pysubs2 supports " "and overrides the default method to find speech regions. " - "(arg_num = 1)") - ) + "(arg_num = 1)")) input_group.add_argument( '-sty', '--styles', @@ -128,8 +125,7 @@ def get_cmd_args(): # pylint: disable=too-many-statements "it will use the styles from the : " "\"-esr\"/\"--external-speech-regions\". " "More info on \"-sn\"/\"--styles-name\". " - "(arg_num = 0 or 1)") - ) + "(arg_num = 0 or 1)")) input_group.add_argument( '-sn', '--styles-name', @@ -143,8 +139,7 @@ def get_cmd_args(): # pylint: disable=too-many-statements "specific style from the arg of \"-sty\"/\"--styles\". " "If the arg_num is 2, src language events use the first. " "Dst language events use the second. " - "(arg_num = 1 or 2)") - ) + "(arg_num = 1 or 2)")) lang_group.add_argument( '-S', '--speech-language', @@ -155,8 +150,7 @@ def get_cmd_args(): # pylint: disable=too-many-statements "WRONG INPUT WON'T STOP RUNNING. " "But use it at your own risk. " "Ref: https://cloud.google.com/speech-to-text/docs/languages" - "(arg_num = 1) (default: %(default)s)") - ) + "(arg_num = 1) (default: %(default)s)")) lang_group.add_argument( '-SRC', '--src-language', @@ -166,16 +160,14 @@ def get_cmd_args(): # pylint: disable=too-many-statements "of the \"-S\"/\"--speech-language\". " "If using py-googletrans as the method to translate, " "WRONG INPUT STOP RUNNING. " - "(arg_num = 1) (default: %(default)s)") - ) + "(arg_num = 1) (default: %(default)s)")) lang_group.add_argument( '-D', '--dst-language', metavar=_('lang_code'), help=_("Lang code/Lang tag for translation destination language. " "Same attention in the \"-SRC\"/\"--src-language\". " - "(arg_num = 1) (default: %(default)s)") - ) + "(arg_num = 1) (default: %(default)s)")) lang_group.add_argument( '-bm', '--best-match', @@ -189,8 +181,7 @@ def get_cmd_args(): # pylint: disable=too-many-statements "\"s\" for \"-S\"/\"--speech-language\". " "\"src\" for \"-SRC\"/\"--src-language\". " "\"d\" for \"-D\"/\"--dst-language\". " - "(3 >= arg_num >= 1)") - ) + "(3 >= arg_num >= 1)")) lang_group.add_argument( '-mns', '--min-score', @@ -203,16 +194,14 @@ def get_cmd_args(): # pylint: disable=too-many-statements "or the match result in \"-bm\"/\"--best-match\". " "Result will be a group of \"good match\" " "whose score is above this arg. " - "(arg_num = 1)") - ) + "(arg_num = 1)")) output_group.add_argument( '-o', '--output', metavar=_('path'), help=_("The output path for subtitles file. " "(default: the \"input\" path combined " - "with the proper name tails) (arg_num = 1)") - ) + "with the proper name tails) (arg_num = 1)")) output_group.add_argument( '-F', '--format', @@ -225,15 +214,13 @@ def get_cmd_args(): # pylint: disable=too-many-statements "In this case, if \"-i\"/\"--input\" arg is a subtitles file, " "use the same extension from the subtitles file. " "(arg_num = 1) (default: {dft})").format( - dft=constants.DEFAULT_SUBTITLES_FORMAT) - ) + dft=constants.DEFAULT_SUBTITLES_FORMAT)) output_group.add_argument( '-y', '--yes', action='store_true', help=_("Prevent pauses and allow files to be overwritten. " - "Stop the program when your args are wrong. (arg_num = 0)") - ) + "Stop the program when your args are wrong. (arg_num = 0)")) output_group.add_argument( '-of', '--output-files', @@ -242,9 +229,12 @@ def get_cmd_args(): # pylint: disable=too-many-statements default=["dst", ], help=_("Output more files. " "Available types: " - "regions, src, dst, bilingual, all. " - "(4 >= arg_num >= 1) (default: %(default)s)") - ) + "regions, src, dst, bilingual, dst-lf-src, src-lf-dst, all. " + "dst-lf-src: dst language and src language in the same event. " + "And dst is ahead of src. " + "src-lf-dst: src language and dst language in the same event. " + "And src is ahead of dst. " + "(6 >= arg_num >= 1) (default: %(default)s)")) output_group.add_argument( '-fps', '--sub-fps', @@ -255,22 +245,14 @@ def get_cmd_args(): # pylint: disable=too-many-statements "on the input file. " "Ref: https://pysubs2.readthedocs.io/en/latest/api-reference.html" "#supported-input-output-formats " - "(arg_num = 1)") - ) - - output_group.add_argument( - '-der', '--drop-empty-regions', - action='store_true', - help=_("Drop any regions without text. " - "(arg_num = 0)") - ) + "(arg_num = 1)")) speech_group.add_argument( '-sapi', '--speech-api', metavar=_('API_code'), default='gsv2', help=_("Choose which Speech-to-Text API to use. " - "Currently supported: " + "Currently support: " "gsv2: Google Speech V2 (https://github.com/gillesdemey/google-speech-v2). " "gcsv1: Google Cloud Speech-to-Text V1P1Beta1 " "(https://cloud.google.com/speech-to-text/docs). " @@ -280,11 +262,26 @@ def get_cmd_args(): # pylint: disable=too-many-statements '-skey', '--speech-key', metavar='key', help=_("The API key for Speech-to-Text API. (arg_num = 1) " - "Currently supported: " + "Currently support: " "gsv2: The API key for gsv2. (default: Free API key) " "gcsv1: The API key for gcsv1. " - "(Can be overridden by \"-sa\"/\"--service-account\")") - ) + "(If used, override the credentials " + "given by\"-sa\"/\"--service-account\")")) + + # speech_group.add_argument( + # '-sconf', '--speech-config', + # nargs='?', metavar=_('path'), + # const='config.json', + # help=_("Use speech recognition config to send request. " + # "Override these options below: " + # "\"-S\", \"-asr\", \"-aac\". " + # "Currently support: " + # "gcsv1: Google Cloud Speech-to-Text V1P1Beta1 " + # "(https://cloud.google.com/speech-to-text/docs" + # "/reference/rest/v1p1beta1/RecognitionConfig). " + # "If arg_num is 0, use const path. " + # "(arg_num = 0 or 1) (const: %(const)s)") + # ) speech_group.add_argument( '-mnc', '--min-confidence', @@ -296,8 +293,13 @@ def get_cmd_args(): # pylint: disable=too-many-statements "Confidence bigger means the result is better. " "Input this argument will drop any result below it. " "Ref: https://github.com/BingLingGroup/google-speech-v2#response " - "(arg_num = 1) (default: %(default)s)") - ) + "(arg_num = 1) (default: %(default)s)")) + + speech_group.add_argument( + '-der', '--drop-empty-regions', + action='store_true', + help=_("Drop any regions without speech recognition result. " + "(arg_num = 0)")) speech_group.add_argument( '-sc', '--speech-concurrency', @@ -305,8 +307,7 @@ def get_cmd_args(): # pylint: disable=too-many-statements type=int, default=constants.DEFAULT_CONCURRENCY, help=_("Number of concurrent Speech-to-Text requests to make. " - "(arg_num = 1) (default: %(default)s)") - ) + "(arg_num = 1) (default: %(default)s)")) pygt_group.add_argument( '-slp', '--sleep-seconds', @@ -315,8 +316,7 @@ def get_cmd_args(): # pylint: disable=too-many-statements default=constants.DEFAULT_SLEEP_SECONDS, help=_("(Experimental)Seconds to sleep " "between two translation requests. " - "(arg_num = 1) (default: %(default)s)") - ) + "(arg_num = 1) (default: %(default)s)")) pygt_group.add_argument( '-surl', '--service-urls', @@ -324,24 +324,21 @@ def get_cmd_args(): # pylint: disable=too-many-statements nargs='*', help=_("(Experimental)Customize request urls. " "Ref: https://py-googletrans.readthedocs.io/en/latest/ " - "(arg_num >= 1)") - ) + "(arg_num >= 1)")) pygt_group.add_argument( '-ua', '--user-agent', metavar='User-Agent headers', help=_("(Experimental)Customize User-Agent headers. " "Same docs above. " - "(arg_num = 1)") - ) + "(arg_num = 1)")) gtv2_group.add_argument( '-gtv2', '--gtransv2', metavar='key', help=_("The Google Translate V2 API key to be used. " "If not provided, use free API (py-googletrans) instead. " - "(arg_num = 1)") - ) + "(arg_num = 1)")) gtv2_group.add_argument( '-lpt', '--lines-per-trans', @@ -349,8 +346,7 @@ def get_cmd_args(): # pylint: disable=too-many-statements type=int, default=constants.DEFAULT_LINES_PER_TRANS, help=_("Number of lines per Google Translate V2 request. " - "(arg_num = 1) (default: %(default)s)") - ) + "(arg_num = 1) (default: %(default)s)")) gtv2_group.add_argument( '-tc', '--trans-concurrency', @@ -359,16 +355,14 @@ def get_cmd_args(): # pylint: disable=too-many-statements default=constants.DEFAULT_CONCURRENCY, help=_("Number of concurrent " "Google translate V2 API requests to make. " - "(arg_num = 1) (default: %(default)s)") - ) + "(arg_num = 1) (default: %(default)s)")) network_group.add_argument( '-hsa', '--http-speech-api', action='store_true', help=_("Change the Google Speech V2 API " "URL into the http one. " - "(arg_num = 0)") - ) + "(arg_num = 0)")) network_group.add_argument( '-hsp', '--https-proxy', @@ -376,8 +370,7 @@ def get_cmd_args(): # pylint: disable=too-many-statements const='https://127.0.0.1:1080', help=_("Add https proxy by setting environment variables. " "If arg_num is 0, use const proxy url. " - "(arg_num = 0 or 1) (const: %(const)s)") - ) + "(arg_num = 0 or 1) (const: %(const)s)")) network_group.add_argument( '-hp', '--http-proxy', @@ -385,28 +378,24 @@ def get_cmd_args(): # pylint: disable=too-many-statements const='http://127.0.0.1:1080', help=_("Add http proxy by setting environment variables. " "If arg_num is 0, use const proxy url. " - "(arg_num = 0 or 1) (const: %(const)s)") - ) + "(arg_num = 0 or 1) (const: %(const)s)")) network_group.add_argument( '-pu', '--proxy-username', metavar=_('username'), help=_("Set proxy username. " - "(arg_num = 1)") - ) + "(arg_num = 1)")) network_group.add_argument( '-pp', '--proxy-password', metavar=_('password'), help=_("Set proxy password. " - "(arg_num = 1)") - ) + "(arg_num = 1)")) options_group.add_argument( '-h', '--help', action='help', - help=_("Show %(prog)s help message and exit. (arg_num = 0)") - ) + help=_("Show %(prog)s help message and exit. (arg_num = 0)")) options_group.add_argument( '-V', '--version', @@ -414,8 +403,7 @@ def get_cmd_args(): # pylint: disable=too-many-statements version='%(prog)s ' + metadata.VERSION + ' by ' + metadata.AUTHOR + ' <' + metadata.AUTHOR_EMAIL + '>', - help=_("Show %(prog)s version and exit. (arg_num = 0)") - ) + help=_("Show %(prog)s version and exit. (arg_num = 0)")) options_group.add_argument( '-sa', '--service-account', @@ -423,11 +411,10 @@ def get_cmd_args(): # pylint: disable=too-many-statements help=_("Set service account key environment variable. " "It should be the file path of the JSON file " "that contains your service account credentials. " - "If used, override the API key options. " + "Can be overridden by the API key. " "Ref: https://cloud.google.com/docs/authentication/getting-started " - "Currently supported: gcsv1 (GOOGLE_APPLICATION_CREDENTIALS) " - "(arg_num = 1)") - ) + "Currently support: gcsv1 (GOOGLE_APPLICATION_CREDENTIALS) " + "(arg_num = 1)")) audio_prcs_group.add_argument( '-ap', '--audio-process', @@ -451,15 +438,13 @@ def get_cmd_args(): # pylint: disable=too-many-statements "(2 >= arg_num >= 1)").format( dft_1=constants.DEFAULT_AUDIO_PRCS[0], dft_2=constants.DEFAULT_AUDIO_PRCS[1], - dft_3=constants.DEFAULT_AUDIO_PRCS[2]) - ) + dft_3=constants.DEFAULT_AUDIO_PRCS[2])) audio_prcs_group.add_argument( '-k', '--keep', action='store_true', help=_("Keep audio processing files to the output path. " - "(arg_num = 0)") - ) + "(arg_num = 0)")) audio_prcs_group.add_argument( '-apc', '--audio-process-cmd', @@ -469,8 +454,7 @@ def get_cmd_args(): # pylint: disable=too-many-statements "Every line of the commands need to be in quotes. " "Input file name is {in_}. " "Output file name is {out_}. " - "(arg_num >= 1)") - ) + "(arg_num >= 1)")) audio_prcs_group.add_argument( '-ac', '--audio-concurrency', @@ -478,8 +462,7 @@ def get_cmd_args(): # pylint: disable=too-many-statements type=int, default=constants.DEFAULT_CONCURRENCY, help=_("Number of concurrent ffmpeg audio split process to make. " - "(arg_num = 1) (default: %(default)s)") - ) + "(arg_num = 1) (default: %(default)s)")) audio_prcs_group.add_argument( '-acc', '--audio-conversion-cmd', @@ -493,8 +476,7 @@ def get_cmd_args(): # pylint: disable=too-many-statements "Default command to process the audio: " "{dft} " "(arg_num = 1)").format( - dft=constants.DEFAULT_AUDIO_CVT) - ) + dft=constants.DEFAULT_AUDIO_CVT)) audio_prcs_group.add_argument( '-asc', '--audio-split-cmd', @@ -504,8 +486,7 @@ def get_cmd_args(): # pylint: disable=too-many-statements "Same attention above. " "Default: {dft} " "(arg_num = 1)").format( - dft=constants.DEFAULT_AUDIO_SPLT) - ) + dft=constants.DEFAULT_AUDIO_SPLT)) audio_prcs_group.add_argument( '-asf', '--api-suffix', @@ -513,8 +494,7 @@ def get_cmd_args(): # pylint: disable=too-many-statements default='.flac', help=_("(Experimental)This arg will override the default " "API audio suffix. " - "(arg_num = 1) (default: %(default)s)") - ) + "(arg_num = 1) (default: %(default)s)")) audio_prcs_group.add_argument( '-asr', '--api-sample-rate', @@ -523,8 +503,7 @@ def get_cmd_args(): # pylint: disable=too-many-statements default=44100, help=_("(Experimental)This arg will override the default " "API audio sample rate(Hz). " - "(arg_num = 1) (default: %(default)s)") - ) + "(arg_num = 1) (default: %(default)s)")) audio_prcs_group.add_argument( '-aac', '--api-audio-channel', @@ -533,8 +512,7 @@ def get_cmd_args(): # pylint: disable=too-many-statements default=1, help=_("(Experimental)This arg will override the default " "API audio channel. " - "(arg_num = 1) (default: %(default)s)") - ) + "(arg_num = 1) (default: %(default)s)")) auditok_group.add_argument( '-et', '--energy-threshold', @@ -544,8 +522,7 @@ def get_cmd_args(): # pylint: disable=too-many-statements help=_("The energy level which determines the region to be detected. " "Ref: https://auditok.readthedocs.io/en/latest/apitutorial.html" "#examples-using-real-audio-data " - "(arg_num = 1) (default: %(default)s)") - ) + "(arg_num = 1) (default: %(default)s)")) auditok_group.add_argument( '-mnrs', '--min-region-size', @@ -554,8 +531,7 @@ def get_cmd_args(): # pylint: disable=too-many-statements default=constants.MIN_REGION_SIZE, help=_("Minimum region size. " "Same docs above. " - "(arg_num = 1) (default: %(default)s)") - ) + "(arg_num = 1) (default: %(default)s)")) auditok_group.add_argument( '-mxrs', '--max-region-size', @@ -564,8 +540,7 @@ def get_cmd_args(): # pylint: disable=too-many-statements default=constants.MAX_REGION_SIZE, help=_("Maximum region size. " "Same docs above. " - "(arg_num = 1) (default: %(default)s)") - ) + "(arg_num = 1) (default: %(default)s)")) auditok_group.add_argument( '-mxcs', '--max-continuous-silence', @@ -574,22 +549,19 @@ def get_cmd_args(): # pylint: disable=too-many-statements default=constants.DEFAULT_CONTINUOUS_SILENCE, help=_("Maximum length of a tolerated silence within a valid audio activity. " "Same docs above. " - "(arg_num = 1) (default: %(default)s)") - ) + "(arg_num = 1) (default: %(default)s)")) auditok_group.add_argument( '-sml', '--strict-min-length', action='store_true', help=_("Ref: https://auditok.readthedocs.io/en/latest/core.html#class-summary " - "(arg_num = 0)") - ) + "(arg_num = 0)")) auditok_group.add_argument( '-dts', '--drop-trailing-silence', action='store_true', help=_("Ref: https://auditok.readthedocs.io/en/latest/core.html#class-summary " - "(arg_num = 0)") - ) + "(arg_num = 0)")) list_group.add_argument( '-lf', '--list-formats', @@ -600,8 +572,7 @@ def get_cmd_args(): # pylint: disable=too-many-statements "You need to offer fps option " "when input is an audio file " "and output is \"sub\" format. " - "(arg_num = 0)") - ) + "(arg_num = 0)")) list_group.add_argument( '-lsc', '--list-speech-codes', @@ -617,8 +588,7 @@ def get_cmd_args(): # pylint: disable=too-many-statements "Ref: https://tools.ietf.org/html/bcp47 " "https://github.com/LuminosoInsight/langcodes/blob/master/langcodes/__init__.py " "lang code example: language-script-region-variant-extension-privateuse " - "(arg_num = 0 or 1)") - ) + "(arg_num = 0 or 1)")) list_group.add_argument( '-ltc', '--list-translation-codes', @@ -630,8 +600,7 @@ def get_cmd_args(): # pylint: disable=too-many-statements "Or else will list get a group of \"good match\" " "of the arg. " "Same docs above. " - "(arg_num = 0 or 1)") - ) + "(arg_num = 0 or 1)")) list_group.add_argument( '-dsl', '--detect-sub-language', @@ -640,7 +609,6 @@ def get_cmd_args(): # pylint: disable=too-many-statements "And list a group of matched language in recommended " "\"-S\"/\"--speech-language\" Google Speech-to-Text language codes. " "Ref: https://cloud.google.com/speech-to-text/docs/languages " - "(arg_num = 1) (default: %(default)s)") - ) + "(arg_num = 1) (default: %(default)s)")) return parser.parse_args() diff --git a/autosub/speech_trans_api.py b/autosub/speech_trans_api.py index 2d50e6a7..429c327b 100644 --- a/autosub/speech_trans_api.py +++ b/autosub/speech_trans_api.py @@ -93,8 +93,7 @@ def gcsv1p1beta1_service_client( filename, is_keep, config, - min_confidence -): + min_confidence): """ Function for performing speech-to-text using Google Cloud Speech V1P1Beta1 API for an input FLAC file. @@ -253,8 +252,7 @@ def __call__(self, trans_list): result = self.service.translations().list( # pylint: disable=no-member source=self.src, target=self.dst, - q=[trans_str] - ).execute() + q=[trans_str]).execute() if 'translations' in result and result['translations'] and \ 'translatedText' in result['translations'][0]: diff --git a/autosub/sub_utils.py b/autosub/sub_utils.py index b41c96b9..72a13ced 100644 --- a/autosub/sub_utils.py +++ b/autosub/sub_utils.py @@ -18,8 +18,7 @@ def sub_to_speech_regions( audio_wav, sub_file, - ext_max_size_ms=constants.MAX_EXT_REGION_SIZE * 1000 -): + ext_max_size_ms=constants.MAX_EXT_REGION_SIZE * 1000): """ Give an input audio_wav file and subtitles file and generate proper speech regions. """ @@ -58,14 +57,14 @@ def sub_to_speech_regions( return regions -def pysubs2_ssa_event_add( +def pysubs2_ssa_event_add( # pylint: disable=too-many-branches, too-many-statements src_ssafile, dst_ssafile, text_list, style_name, -): + same_event_type=0,): """ - Serialize a list of subtitles according to the SRT format. + Serialize a list of subtitles using pysubs2. """ if not style_name: style_name = 'Default' @@ -96,14 +95,72 @@ def pysubs2_ssa_event_add( # text_list is [text, text, ...] i = 0 length = len(text_list) - while i < length: - event = pysubs2.SSAEvent() - event.start = src_ssafile.events[i].start - event.end = src_ssafile.events[i].end - event.text = text_list[i] - event.style = style_name - dst_ssafile.events.append(event) - i = i + 1 + if same_event_type == 0: + # append text_list to new events + while i < length: + event = pysubs2.SSAEvent() + event.start = src_ssafile.events[i].start + event.end = src_ssafile.events[i].end + event.text = text_list[i] + event.style = style_name + dst_ssafile.events.append(event) + i = i + 1 + elif same_event_type == 1: + # add text_list to src_ssafile + # before the existing text in event + if src_ssafile.events[0].style == style_name: + # same style + while i < length: + event = pysubs2.SSAEvent() + event.start = src_ssafile.events[i].start + event.end = src_ssafile.events[i].end + event.text = \ + text_list[i] + "\\N" + src_ssafile.events[i].text + event.style = style_name + dst_ssafile.events.append(event) + i = i + 1 + else: + # different style + while i < length: + event = pysubs2.SSAEvent() + event.start = src_ssafile.events[i].start + event.end = src_ssafile.events[i].end + event.text = \ + text_list[i] + \ + "\\N{{\\r{style_name}}}".format( + style_name=src_ssafile.events[i].style) + \ + src_ssafile.events[i].text + event.style = style_name + dst_ssafile.events.append(event) + i = i + 1 + elif same_event_type == 2: + # add text_list to src_ssafile + # after the existing text in event + if src_ssafile.events[0].style == style_name: + # same style + while i < length: + event = pysubs2.SSAEvent() + event.start = src_ssafile.events[i].start + event.end = src_ssafile.events[i].end + event.text = \ + src_ssafile.events[i].text + "\\N" + text_list[i] + event.style = style_name + dst_ssafile.events.append(event) + i = i + 1 + else: + # different style + while i < length: + event = pysubs2.SSAEvent() + event.start = src_ssafile.events[i].start + event.end = src_ssafile.events[i].end + event.text = \ + src_ssafile.events[i].text + \ + "\\N{{\\r{style_name}}}".format( + style_name=style_name) + \ + text_list[i] + event.style = style_name + dst_ssafile.events.append(event) + i = i + 1 else: # src_ssafile provides regions only i = 0 @@ -117,7 +174,7 @@ def pysubs2_ssa_event_add( i = i + 1 -def vtt_formatter(subtitles): +def list_to_vtt_str(subtitles): """ Serialize a list of subtitles according to the VTT format. """ @@ -126,8 +183,7 @@ def vtt_formatter(subtitles): src_ssafile=None, dst_ssafile=pysubs2_obj, text_list=subtitles, - style_name=None - ) + style_name=None) formatted_subtitles = pysubs2_obj.to_string( format_='srt') i = 0 @@ -143,7 +199,26 @@ def vtt_formatter(subtitles): return formatted_subtitles -def json_formatter(subtitles): +def assfile_to_vtt_str(subtitles): + """ + Serialize ASSFile according to the VTT format. + """ + formatted_subtitles = subtitles.to_string( + format_='srt') + i = 0 + lines = formatted_subtitles.split('\n') + new_lines = [] + for line in lines: + if i % 4 == 1: + line = line.replace(',', '.') + new_lines.append(line) + i = i + 1 + formatted_subtitles = '\n'.join(new_lines) + formatted_subtitles = 'WEBVTT\n\n' + formatted_subtitles + return formatted_subtitles + + +def list_to_json_str(subtitles): """ Serialize a list of subtitles as a JSON blob. """ @@ -173,7 +248,23 @@ def json_formatter(subtitles): return json.dumps(subtitle_dicts, indent=4, ensure_ascii=False) -def txt_formatter(subtitles): +def assfile_to_json_str(subtitles): + """ + Serialize ASSFile as a JSON blob. + """ + subtitle_dicts = [ + { + 'start': event.start / 1000.0, + 'end': event.end / 1000.0, + 'content': event.text + } + for event + in subtitles.events + ] + return json.dumps(subtitle_dicts, indent=4, ensure_ascii=False) + + +def list_to_txt_str(subtitles): """ Serialize a list of subtitles as a newline-delimited string. """ @@ -191,3 +282,10 @@ def txt_formatter(subtitles): end=end / 1000.0) result = result + '\n' + line return result + + +def assfile_to_txt_str(subtitles): + """ + Serialize ASSFile as a newline-delimited string. + """ + return '\n'.join(event.text for event in subtitles.events) diff --git a/docs/CHANGELOG.zh-Hans.md b/docs/CHANGELOG.zh-Hans.md index e8a375bd..d71fbba9 100644 --- a/docs/CHANGELOG.zh-Hans.md +++ b/docs/CHANGELOG.zh-Hans.md @@ -32,7 +32,12 @@ #### 添加(未发布) -- 添加Google Cloud Speech-to-Text支持。 [issue #10](https://github.com/BingLingGroup/autosub/issues/10) +- 添加Google Cloud Speech-to-Text支持。[issue #10](https://github.com/BingLingGroup/autosub/issues/10) +- 添加更多格式的双语字幕输出支持。[issue #72](https://github.com/BingLingGroup/autosub/issues/72) + +#### 改动(未发布) + +- 修复输入是字幕文件时的输出格式限制。 ### [0.5.3-alpha] - 2019-12-30 From 8ac3fbe69ee4f670811686e8bc2a6d7fd98d42df Mon Sep 17 00:00:00 2001 From: BingLingGroup <42505588+BingLingGroup@users.noreply.github.com> Date: Sat, 1 Feb 2020 12:31:43 +0800 Subject: [PATCH 07/10] Remove gtransv2 support --- autosub/cmdline_utils.py | 243 +++++++-------- autosub/core.py | 56 ---- .../LC_MESSAGES/autosub.cmdline_utils.mo | Bin 9803 -> 9844 bytes .../LC_MESSAGES/autosub.cmdline_utils.po | 92 +++--- .../locale/zh_CN/LC_MESSAGES/autosub.core.mo | Bin 1758 -> 1799 bytes .../locale/zh_CN/LC_MESSAGES/autosub.core.po | 7 +- .../zh_CN/LC_MESSAGES/autosub.options.mo | Bin 23751 -> 22700 bytes .../zh_CN/LC_MESSAGES/autosub.options.po | 292 +++++++++--------- autosub/options.py | 34 +- autosub/speech_trans_api.py | 34 -- setup.py | 1 - 11 files changed, 307 insertions(+), 452 deletions(-) diff --git a/autosub/cmdline_utils.py b/autosub/cmdline_utils.py index bc5ee19d..eb4ec566 100644 --- a/autosub/cmdline_utils.py +++ b/autosub/cmdline_utils.py @@ -316,62 +316,60 @@ def validate_aovp_args(args): # pylint: disable=too-many-branches, too-many-ret is_dst_matched = True if not is_src_matched: - if not args.gtransv2: - if args.best_match and 'src' in args.best_match: - print( - _("Warning: Source language \"{src}\" not supported. " - "Run with \"-lsc\"/\"--list-translation-codes\" " - "to see all supported languages.").format(src=args.src_language)) - best_result = lang_code_utils.match_print( - dsr_lang=args.src_language, - match_list=list(googletrans.constants.LANGUAGES.keys()), - min_score=args.min_score) - if best_result: - print(_("Use \"{lang_code}\" instead.").format( - lang_code=best_result[0])) - args.src_language = best_result[0] - else: - raise exceptions.AutosubException( - _("Match failed. Still using \"{lang_code}\". " - "Program stopped.").format( - lang_code=args.src_language)) - + if args.best_match and 'src' in args.best_match: + print( + _("Warning: Source language \"{src}\" not supported. " + "Run with \"-lsc\"/\"--list-translation-codes\" " + "to see all supported languages.").format(src=args.src_language)) + best_result = lang_code_utils.match_print( + dsr_lang=args.src_language, + match_list=list(googletrans.constants.LANGUAGES.keys()), + min_score=args.min_score) + if best_result: + print(_("Use \"{lang_code}\" instead.").format( + lang_code=best_result[0])) + args.src_language = best_result[0] else: raise exceptions.AutosubException( - _("Error: Source language \"{src}\" not supported. " - "Run with \"-lsc\"/\"--list-translation-codes\" " - "to see all supported languages. " - "Or use \"-bm\"/\"--best-match\" to get a best match.").format( - src=args.src_language)) + _("Match failed. Still using \"{lang_code}\". " + "Program stopped.").format( + lang_code=args.src_language)) - if not is_dst_matched: - if not args.gtransv2: - if args.best_match and 'd' in args.best_match: - print( - _("Warning: Destination language \"{dst}\" not supported. " - "Run with \"-lsc\"/\"--list-translation-codes\" " - "to see all supported languages.").format(dst=args.dst_language)) - best_result = lang_code_utils.match_print( - dsr_lang=args.dst_language, - match_list=list(googletrans.constants.LANGUAGES.keys()), - min_score=args.min_score) - if best_result: - print(_("Use \"{lang_code}\" instead.").format( - lang_code=best_result[0])) - args.dst_language = best_result[0] - else: - raise exceptions.AutosubException( - _("Match failed. Still using \"{lang_code}\". " - "Program stopped.").format( - lang_code=args.dst_language)) + else: + raise exceptions.AutosubException( + _("Error: Source language \"{src}\" not supported. " + "Run with \"-lsc\"/\"--list-translation-codes\" " + "to see all supported languages. " + "Or use \"-bm\"/\"--best-match\" to get a best match.").format( + src=args.src_language)) + if not is_dst_matched: + if args.best_match and 'd' in args.best_match: + print( + _("Warning: Destination language \"{dst}\" not supported. " + "Run with \"-lsc\"/\"--list-translation-codes\" " + "to see all supported languages.").format(dst=args.dst_language)) + best_result = lang_code_utils.match_print( + dsr_lang=args.dst_language, + match_list=list(googletrans.constants.LANGUAGES.keys()), + min_score=args.min_score) + if best_result: + print(_("Use \"{lang_code}\" instead.").format( + lang_code=best_result[0])) + args.dst_language = best_result[0] else: raise exceptions.AutosubException( - _("Error: Destination language \"{dst}\" not supported. " - "Run with \"-lsc\"/\"--list-translation-codes\" " - "to see all supported languages. " - "Or use \"-bm\"/\"--best-match\" to get a best match.").format( - dst=args.dst_language)) + _("Match failed. Still using \"{lang_code}\". " + "Program stopped.").format( + lang_code=args.dst_language)) + + else: + raise exceptions.AutosubException( + _("Error: Destination language \"{dst}\" not supported. " + "Run with \"-lsc\"/\"--list-translation-codes\" " + "to see all supported languages. " + "Or use \"-bm\"/\"--best-match\" to get a best match.").format( + dst=args.dst_language)) if args.dst_language == args.speech_language \ or args.src_language == args.dst_language: @@ -425,60 +423,58 @@ def validate_sp_args(args): # pylint: disable=too-many-branches,too-many-return is_dst_matched = True if not is_src_matched: - if not args.gtransv2: - if args.best_match and 'src' in args.best_match: - print( - _("Warning: Source language \"{src}\" not supported. " - "Run with \"-lsc\"/\"--list-translation-codes\" " - "to see all supported languages.").format(src=args.src_language)) - best_result = lang_code_utils.match_print( - dsr_lang=args.src_language, - match_list=list(googletrans.constants.LANGUAGES.keys()), - min_score=args.min_score) - if best_result: - print(_("Use \"{lang_code}\" instead.").format(lang_code=best_result[0])) - args.src_language = best_result[0] - else: - raise exceptions.AutosubException( - _("Match failed. Still using \"{lang_code}\". " - "Program stopped.").format( - lang_code=args.src_language)) - + if args.best_match and 'src' in args.best_match: + print( + _("Warning: Source language \"{src}\" not supported. " + "Run with \"-lsc\"/\"--list-translation-codes\" " + "to see all supported languages.").format(src=args.src_language)) + best_result = lang_code_utils.match_print( + dsr_lang=args.src_language, + match_list=list(googletrans.constants.LANGUAGES.keys()), + min_score=args.min_score) + if best_result: + print(_("Use \"{lang_code}\" instead.").format(lang_code=best_result[0])) + args.src_language = best_result[0] else: raise exceptions.AutosubException( - _("Error: Source language \"{src}\" not supported. " - "Run with \"-lsc\"/\"--list-translation-codes\" " - "to see all supported languages. " - "Or use \"-bm\"/\"--best-match\" to get a best match.").format( - src=args.src_language)) + _("Match failed. Still using \"{lang_code}\". " + "Program stopped.").format( + lang_code=args.src_language)) - if not is_dst_matched: - if not args.gtransv2: - if args.best_match and 'd' in args.best_match: - print( - _("Warning: Destination language \"{dst}\" not supported. " - "Run with \"-lsc\"/\"--list-translation-codes\" " - "to see all supported languages.").format(dst=args.dst_language)) - best_result = lang_code_utils.match_print( - dsr_lang=args.dst_language, - match_list=list(googletrans.constants.LANGUAGES.keys()), - min_score=args.min_score) - if best_result: - print(_("Use \"{lang_code}\" instead.").format(lang_code=best_result[0])) - args.dst_language = best_result[0] - else: - raise exceptions.AutosubException( - _("Match failed. Still using \"{lang_code}\". " - "Program stopped.").format( - lang_code=args.dst_language)) + else: + raise exceptions.AutosubException( + _("Error: Source language \"{src}\" not supported. " + "Run with \"-lsc\"/\"--list-translation-codes\" " + "to see all supported languages. " + "Or use \"-bm\"/\"--best-match\" to get a best match.").format( + src=args.src_language)) + if not is_dst_matched: + if args.best_match and 'd' in args.best_match: + print( + _("Warning: Destination language \"{dst}\" not supported. " + "Run with \"-lsc\"/\"--list-translation-codes\" " + "to see all supported languages.").format(dst=args.dst_language)) + best_result = lang_code_utils.match_print( + dsr_lang=args.dst_language, + match_list=list(googletrans.constants.LANGUAGES.keys()), + min_score=args.min_score) + if best_result: + print(_("Use \"{lang_code}\" instead.").format(lang_code=best_result[0])) + args.dst_language = best_result[0] else: raise exceptions.AutosubException( - _("Error: Destination language \"{dst}\" not supported. " - "Run with \"-lsc\"/\"--list-translation-codes\" " - "to see all supported languages. " - "Or use \"-bm\"/\"--best-match\" to get a best match.").format( - dst=args.dst_language)) + _("Match failed. Still using \"{lang_code}\". " + "Program stopped.").format( + lang_code=args.dst_language)) + + else: + raise exceptions.AutosubException( + _("Error: Destination language \"{dst}\" not supported. " + "Run with \"-lsc\"/\"--list-translation-codes\" " + "to see all supported languages. " + "Or use \"-bm\"/\"--best-match\" to get a best match.").format( + dst=args.dst_language)) if args.dst_language == args.src_language: raise exceptions.AutosubException( @@ -584,24 +580,14 @@ def subs_trans( # pylint: disable=too-many-branches, too-many-statements, too-m text_list.append(event.text) # text translation - if args.gtransv2: - # use gtransv2 - translated_text = core.list_to_gtv2( - text_list=text_list, - api_key=args.gtransv2, - concurrency=args.trans_concurrency, - src_language=args.src_language, - dst_language=args.dst_language, - lines_per_trans=args.lines_per_trans) - else: - # use googletrans - translated_text = core.list_to_googletrans( - text_list, - src_language=args.src_language, - dst_language=args.dst_language, - sleep_seconds=args.sleep_seconds, - user_agent=args.user_agent, - service_urls=args.service_urls) + # use googletrans + translated_text = core.list_to_googletrans( + text_list, + src_language=args.src_language, + dst_language=args.dst_language, + sleep_seconds=args.sleep_seconds, + user_agent=args.user_agent, + service_urls=args.service_urls) if not translated_text or len(translated_text) != len(text_list): raise exceptions.AutosubException( @@ -1094,24 +1080,13 @@ def audio_or_video_prcs( # pylint: disable=too-many-branches, too-many-statemen pass # text translation - if args.gtransv2: - # use gtransv2 - translated_text = core.list_to_gtv2( - text_list=text_list, - api_key=args.gtransv2, - concurrency=args.trans_concurrency, - src_language=args.src_language, - dst_language=args.dst_language, - lines_per_trans=args.lines_per_trans) - else: - # use googletrans - translated_text = core.list_to_googletrans( - text_list, - src_language=args.src_language, - dst_language=args.dst_language, - sleep_seconds=args.sleep_seconds, - user_agent=args.user_agent, - service_urls=args.service_urls) + translated_text = core.list_to_googletrans( + text_list, + src_language=args.src_language, + dst_language=args.dst_language, + sleep_seconds=args.sleep_seconds, + user_agent=args.user_agent, + service_urls=args.service_urls) if not translated_text or len(translated_text) != len(regions): raise exceptions.AutosubException( diff --git a/autosub/core.py b/autosub/core.py index c9de1bee..12f70245 100644 --- a/autosub/core.py +++ b/autosub/core.py @@ -308,62 +308,6 @@ def gcsv1_to_text( # pylint: disable=too-many-locals,too-many-arguments,too-man return text_list -def list_to_gtv2( # pylint: disable=too-many-locals,too-many-arguments - text_list, - api_key=None, - concurrency=constants.DEFAULT_CONCURRENCY, - src_language=constants.DEFAULT_SRC_LANGUAGE, - dst_language=constants.DEFAULT_DST_LANGUAGE, - lines_per_trans=constants.DEFAULT_LINES_PER_TRANS): - """ - Give a text list, generate translated text list from GoogleTranslatorV2 api. - """ - - if not text_list: - return None - - pool = multiprocessing.Pool(concurrency) - google_translate_api_key = api_key - translator = \ - speech_trans_api.GoogleTranslatorV2(api_key=google_translate_api_key, - src=src_language, - dst=dst_language) - - print(_("\nTranslating text from \"{0}\" to \"{1}\".").format( - src_language, - dst_language)) - - if len(text_list) > lines_per_trans: - trans_list =\ - [text_list[i:i + lines_per_trans] for i in range(0, len(text_list), lines_per_trans)] - else: - trans_list = [text_list] - - widgets = [_("Translation: "), - progressbar.Percentage(), ' ', - progressbar.Bar(), ' ', - progressbar.ETA()] - pbar = progressbar.ProgressBar(widgets=widgets, maxval=len(trans_list)).start() - - try: - translated_text = [] - for i, transcript in enumerate(pool.imap(translator, trans_list)): - if transcript: - translated_text.append(transcript) - else: - translated_text.append([""] * len(trans_list[i])) - pbar.update(i) - pbar.finish() - except KeyboardInterrupt: - pbar.finish() - pool.terminate() - pool.join() - print(_("Cancelling transcription.")) - return 1 - - return translated_text - - def list_to_googletrans( # pylint: disable=too-many-locals, too-many-arguments, too-many-branches, too-many-statements text_list, src_language=constants.DEFAULT_SRC_LANGUAGE, diff --git a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.cmdline_utils.mo b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.cmdline_utils.mo index 22ba0c2b99ee8a2cdbc47debf4252a58f80696ca..f3d628b99d512356202284c0265667bfd02b483a 100644 GIT binary patch delta 611 zcmXZZOG^S#6u|M5ND%Zgu(YhEha%d<)u_y*MF~O)Ap|Xgz(}$Oy@(b)m`Dg*^bJ~u ztBAr@t=dFULC`lSXe~j|B7*)Gg<*d8%)Oj*&za}sck(K3vwEsT&YdDI5qWGDsl#_1 z!Vv<{As>VcKLmNf1daE{GR9iFt5$ii7NX`M2@^7KHP5-X}~iaKnL6K z7l+Yk7K!3q#R7&|A7VQ?Sci{TgRiLfd}0^&kfawa9K&s&*b-qbpXK@h=XyOAdW1k%qSz)q=ljx>g zedz*@;0`9RgqrXNgBYgo{1q8Tt;j-sz$%X7E)L>7CehO&lE*Y|;T3LRe474if;*hT ZTC${YzTyF9SU*RLOVzo?;f-~pryfV32l-U+1nf%Fw1%?qTT z0BLO?{Rv1L0%yfb=XNEe51F18F@VeF{i}^!)_VK8UMq%b8mdxHie!Y*uS))<6Ey=BlyP`aI_xT|T?d`TU;W`MsUz<#)e2 z9XN3^5c?@J;dLQhXdZeYPKv?dL>|asP%3~2;l1!U+y{S!QP>`m`rs$<09?92>V-#O zTtNB@4r0$IOG(&IE|eImXBM%CZ8b(?gbt-hg$x*lHu0@gDTnTM-jde6^!6>`}pMq!M_T|z5%vd2!z%gi^Dp)CHve3717+x*mfbg+W=_2n3 zR!f9go6989MC~vM?u6F=h;Q#J!~f)m2IxqDL(r1rS>JvM5(OQGHt{`Z3!Q}IlRkwu z?>w}5({L{bxd=I%B5S-4=z-5;&%sf+BP_iLFNI^$Q5p|bcmuD&pRspWdILA&rwuIl zIkbcRv)1!Ou$lzJeg`)4zM+OZfro1G4E^WVc}ey?+=BfZ%;o)>2$cp~)_eVL$LcH# z835bib!Y<{?~>lf-hH>U2|Gke+ql*Z9=qWjb`)CfL^gW!I$=wkw7&3d8q&+mEa4(#Ott=4c*mm-vt<*I90$Mr#;&a|-R*TnP28(##1bbjF zv<;@A&Hv7~ufs3-pa3Ot$?AW))w?(*+xS`rrf!o~!+uD(^oEfMUqe1lf8XaZg1c;k zLHH*;1ZUvz{nFF0c{?Qt&%%#k#{*s{LzKoe>os>$F@YFy>XNQ8pm3)b6p`xz>uf;FQSw=Cmr_g14Ogvpf%PeQFM z&vc&@hbvb5gxSp#>!`8^lJ3EMQjzYZMxLtK?=SXgcLQm_ZEL*!4lLY?3@ecFLdTJM}o;iefyt?9(c4j zI^s;`MgrsO^WJsV=AVjpb`_mUbVgU63no{US5y{OR@GHhkGF)21GP?ncq+vyt-l=l e|Lj#;CIaK7TW6fEmgDixo^78cIg_2Gq5lA;_~o_$ delta 2747 zcma)-X-rgC6o3zI(5e+tQ91w8-g3wt!f(`;5kekW^iV3uS03Ak}7Ja zXcVKB8jID4IxbOMn$&1D#-!G!iD_+`!ib5MXiR@tyL|V(X;TuK^k&Zc-nq-U=bdxz zont3MuAdL_-jC=|Ewm*_Kjc|E5ih*ekq7OoP>~pT1Lnbp@NJk8CZfXwuo!lHNu(4y zpcVcI+l7eayezT?J+HG!C-g5NOXOx3kqv6AZ+Ll!j{L478WV0qwXo$Ckv{Z4?Iyxc zI){r)hS5+NS_U(qwYx|*{re&qgZCjOFmEc{iaxEU$RF@q7_L6`ZZ8ph zkS6GdPho#}I8vk$UWIosaHO}$K)9i=$U6EDM41tZVpkOz9V$mpz(V*1Tnys~>s!zZ z=ffXhBvBs0CV481I(oxkkrem~R0jLSi+m50;Yt`b zMC1xw2bG?gAW{R5K^4hqi6Rl$djU7VZpkoP%oJZK&+rgVdk=1l77nP_26cm*dFuRIdomi)EPk z1(mP@{WrJ~ZXAvi@OQWaZXzt|OCv{6;cyF7^NKS>vM~4nDo5jAH6t+$?nA!?Co^sv zMNz`zqlpatkG!v`Dirw>PKDu_A>(Fbn zL?)t7A8*d9nqaa9-eJ5Bs+wA#ZLag~<|UVEJT*}ym4z=RiyUP_GTu1hcR}Xln1;?l zb?&di0+=+7P{M7{0-wR@*jtcmI$k?nWB~mSp*qfCGfc*s^h##1V=jzkf*me{RnP@n zU>8_0(=_CSO0R;~aJUBERpW%C6pqRlSqkf5G7Ou;hrtX*<`+VOBOkr4=p`(?D1#B4 ziCY`G6#UiSy{e=%{)l|OXkgkXdAs1h(Mv9I91Oj6mt4S~$8G@)rs)h9b(?;?} z;mZVsGLw#oYGlZ3RJ~F{)g#y_dZP{J8IN$vj0mV(ZwfLDQ8^rns1}8gR^#jwbtAl- zFr$$jqmAWBW~GOz$|9I$e!qE1kXEBm)Hui0XyH5)g72JA;})deWJa)$i_vI_JW0QC zv{Xk5(jQ4fRMk^rM(#5phr{?|qF&9j-%q{JIP#L?8y*@HtMR5Xt=cR^Wp^Nwgm7s~ zx_@P8|IX1lPN&tbYx7(dhud!P=-PsGZFZ^0=5)CI+aflG#us}$rS8nMG^@>1Tvm`; z=qyRISany4#bGP5xzj9V9;dskz~3usbV%=@G0(q^W#{MmmPF6=&5S;jHmj_pKzC`* z<(l2*(A`=oN?Q)N^p$10+vCgbHN^iU`c!-0ya5ru#W7vxCCqYqv;tjsXdcG(qC_os zx#lU>HFnWf=xa5bTeI3$>5ieA&7-Zh+3lK3FST0=bz@fP+7zqty@rQciS8+O7WqGk zIn&YJmK%Q?^*Gs6D|72bscJt5K9stgt87Jj(NNW`EqCdr3kj`vNYrc&+_bPR!Q!$m zb(EE8Q&MWX_HDz9hvk(I51#i2GVg``-(1|B zRP*1tSUY9DZ^e|W32kiETz@Wbvf{zs_XG9Yn-6Z~7pOkr>zDbTZ6)hLH1onO2TzfQd=ELUt>gFq(TP{{M9j*xM-xt`vqw#W04#%)&g diff --git a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.options.po b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.options.po index 4badcfb3..1c8bf217 100644 --- a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.options.po +++ b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.options.po @@ -7,8 +7,8 @@ msgid "" msgstr "" "Project-Id-Version: \n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2020-01-31 12:02+0800\n" -"PO-Revision-Date: 2020-01-31 12:08+0800\n" +"POT-Creation-Date: 2020-02-01 12:04+0800\n" +"PO-Revision-Date: 2020-01-31 16:08+0800\n" "Last-Translator: \n" "Language-Team: \n" "MIME-Version: 1.0\n" @@ -37,7 +37,7 @@ msgid "" "the number of the arguments is required.\n" "Author: {author}\n" "Email: {email}\n" -"Bug report: https://github.com/agermanidis/autosub\n" +"Bug report: {homepage}\n" msgstr "" "确保有空格的参数被引号包围。\n" "默认参数指的是,\n" @@ -46,111 +46,99 @@ msgstr "" "该选项所需要的参数个数。\n" "作者: {author}\n" "Email: {email}\n" -"问题反馈: https://github.com/agermanidis/autosub\n" +"问题反馈: {homepage}\n" -#: autosub/options.py:60 +#: autosub/options.py:61 msgid "Input Options" msgstr "输入选项" -#: autosub/options.py:61 +#: autosub/options.py:62 msgid "Options to control input." msgstr "控制输入的选项。" -#: autosub/options.py:63 +#: autosub/options.py:64 msgid "Language Options" msgstr "语言选项" -#: autosub/options.py:64 +#: autosub/options.py:65 msgid "Options to control language." msgstr "控制语言的选项。" -#: autosub/options.py:66 +#: autosub/options.py:67 msgid "Output Options" msgstr "输出选项" -#: autosub/options.py:67 +#: autosub/options.py:68 msgid "Options to control output." msgstr "控制输出的选项。" -#: autosub/options.py:69 +#: autosub/options.py:70 msgid "Speech Options" msgstr "语音选项" -#: autosub/options.py:70 +#: autosub/options.py:71 msgid "" "Options to control speech-to-text. If Speech Options not given, it will only " "generate the times." msgstr "控制语音转文字的选项。" -#: autosub/options.py:73 +#: autosub/options.py:74 msgid "py-googletrans Options" msgstr "py-googletrans选项" -#: autosub/options.py:74 +#: autosub/options.py:75 msgid "" "Options to control translation. Default method to translate. Could be " "blocked at any time." msgstr "控制翻译的选项。同时也是默认的翻译方法。可能随时会被谷歌爸爸封。" -#: autosub/options.py:78 -msgid "Google Translate V2 Options" -msgstr "Google Translate V2选项" - #: autosub/options.py:79 -msgid "" -"Options to control translation.(Not been tested) If the API key is given, it " -"will replace the py-googletrans method." -msgstr "" -"控制翻译的选项。(未被测试过)如果提供了API key,它会取代py-googletrans的翻译" -"方法。" - -#: autosub/options.py:83 msgid "Network Options" msgstr "网络选项" -#: autosub/options.py:84 +#: autosub/options.py:80 msgid "Options to control network." msgstr "控制网络的选项。" -#: autosub/options.py:86 +#: autosub/options.py:82 msgid "Other Options" msgstr "其他选项" -#: autosub/options.py:87 +#: autosub/options.py:83 msgid "Other options to control." msgstr "控制其他东西的选项。" -#: autosub/options.py:89 +#: autosub/options.py:85 msgid "Audio Processing Options" msgstr "音频处理选项" -#: autosub/options.py:90 +#: autosub/options.py:86 msgid "Options to control audio processing." msgstr "控制音频处理的选项。" -#: autosub/options.py:92 +#: autosub/options.py:88 msgid "Auditok Options" msgstr "Auditok的选项" -#: autosub/options.py:93 +#: autosub/options.py:89 msgid "" "Options to control Auditok when not using external speech regions control." msgstr "不使用外部语音区域控制时,用于控制Auditok的选项。" -#: autosub/options.py:96 +#: autosub/options.py:92 msgid "List Options" msgstr "列表选项" -#: autosub/options.py:97 +#: autosub/options.py:93 msgid "List all available arguments." msgstr "列出所有可选参数。" -#: autosub/options.py:101 autosub/options.py:110 autosub/options.py:119 -#: autosub/options.py:201 autosub/options.py:410 autosub/options.py:607 +#: autosub/options.py:97 autosub/options.py:106 autosub/options.py:115 +#: autosub/options.py:197 autosub/options.py:382 autosub/options.py:579 msgid "path" msgstr "路径" -#: autosub/options.py:102 +#: autosub/options.py:98 msgid "" "The path to the video/audio/subtitles file that needs to generate subtitles. " "When it is a subtitles file, the program will only translate it. (arg_num = " @@ -159,7 +147,7 @@ msgstr "" "用于生成字幕文件的视频/音频/字幕文件。如果输入文件是字幕文件,程序仅会对其进" "行翻译。(参数个数为1)" -#: autosub/options.py:111 +#: autosub/options.py:107 msgid "" "Path to the subtitles file which provides external speech regions, which is " "one of the formats that pysubs2 supports and overrides the default method to " @@ -168,7 +156,7 @@ msgstr "" "提供外部语音区域(时间轴)的字幕文件。该字幕文件格式需要是pysubs2所支持的。使" "用后会替换掉默认的自动寻找语音区域(时间轴)的功能。(参数个数为1)" -#: autosub/options.py:121 +#: autosub/options.py:117 msgid "" "Valid when your output format is \"ass\"/\"ssa\". Path to the subtitles file " "which provides \"ass\"/\"ssa\" styles for your output. If the arg_num is 0, " @@ -179,11 +167,11 @@ msgstr "" "幕文件。如果不提供参数,它会使用来自\"-esr\"/\"--external-speech-regions\"选" "项提供的样式。更多信息详见\"-sn\"/\"--styles-name\"。(参数个数为0或1)" -#: autosub/options.py:132 +#: autosub/options.py:128 msgid "style-name" msgstr "样式名" -#: autosub/options.py:133 +#: autosub/options.py:129 msgid "" "Valid when your output format is \"ass\"/\"ssa\" and \"-sty\"/\"--styles\" " "is given. Adds \"ass\"/\"ssa\" styles to your events. If not provided, " @@ -198,24 +186,24 @@ msgstr "" "数作为样式名。如果参数个数为2,源语言字幕行会使用第一个参数作为样式名。目标语" "言行使用第二个。(参数个数为1或2)" -#: autosub/options.py:146 autosub/options.py:157 autosub/options.py:167 -#: autosub/options.py:579 autosub/options.py:595 +#: autosub/options.py:142 autosub/options.py:153 autosub/options.py:163 +#: autosub/options.py:551 autosub/options.py:567 msgid "lang_code" msgstr "语言代码" -#: autosub/options.py:147 +#: autosub/options.py:143 #, python-format msgid "" "Lang code/Lang tag for speech-to-text. Recommend using the Google Cloud " "Speech reference lang codes. WRONG INPUT WON'T STOP RUNNING. But use it at " -"your own risk. Ref: https://cloud.google.com/speech-to-text/docs/" -"languages(arg_num = 1) (default: %(default)s)" +"your own risk. Ref: https://cloud.google.com/speech-to-text/docs/languages" +"(arg_num = 1) (default: %(default)s)" msgstr "" "用于语音识别的语言代码/语言标识符。推荐使用Google Cloud Speech的参考语言代" "码。错误的输入不会终止程序。但是后果自负。参考:https://cloud.google.com/" "speech-to-text/docs/languages(参数个数为1)(默认参数: %(default)s)" -#: autosub/options.py:158 +#: autosub/options.py:154 #, python-format msgid "" "Lang code/Lang tag for translation source language. If not given, use " @@ -225,10 +213,10 @@ msgid "" msgstr "" "用于翻译的源语言的语言代码/语言标识符。如果没有提供,会使用langcodes-py2从列" "表里获取一个最佳匹配选项\"-S\"/\"--speech-language\"的语言代码。如果使用py-" -"googletrans作为翻译的方法,错误的输入会终止运行。(参数个数为1)(默认参数" -"为%(default)s)" +"googletrans作为翻译的方法,错误的输入会终止运行。(参数个数为1)(默认参数为%" +"(default)s)" -#: autosub/options.py:168 +#: autosub/options.py:164 #, python-format msgid "" "Lang code/Lang tag for translation destination language. Same attention in " @@ -237,11 +225,11 @@ msgstr "" "用于翻译的目标语言的语言代码/语言标识符。同样的注意参考选项\"-SRC\"/\"--src-" "language\"。(参数个数为1)(默认参数为%(default)s)" -#: autosub/options.py:174 autosub/options.py:421 +#: autosub/options.py:170 autosub/options.py:393 msgid "mode" msgstr "模式" -#: autosub/options.py:176 +#: autosub/options.py:172 msgid "" "Allow langcodes-py2 to get a best matching lang code when your input is " "wrong. Only functional for py-googletrans and Google Speech V2. Available " @@ -254,7 +242,7 @@ msgstr "" "\"指\"-S\"/\"--speech-language\"。\"src\"指\"-SRC\"/\"--src-language\"。\"d" "\"指\"-D\"/\"--dst-language\"。(参数个数在1到3之间)" -#: autosub/options.py:190 +#: autosub/options.py:186 msgid "" "An integer between 0 and 100 to control the good match group of \"-lsc\"/\"--" "list-speech-codes\" or \"-ltc\"/\"--list-translation-codes\" or the match " @@ -266,7 +254,7 @@ msgstr "" "best-match\"选项中的最佳匹配结果。结果会是一组“好的匹配”,其分数需要超过这个" "参数的值。(参数个数为1)" -#: autosub/options.py:202 +#: autosub/options.py:198 msgid "" "The output path for subtitles file. (default: the \"input\" path combined " "with the proper name tails) (arg_num = 1)" @@ -274,11 +262,11 @@ msgstr "" "输出字幕文件的路径。(默认值是\"input\"路径和适当的文件名后缀的结合)(参数个" "数为1)" -#: autosub/options.py:208 +#: autosub/options.py:204 msgid "format" msgstr "格式" -#: autosub/options.py:209 +#: autosub/options.py:205 msgid "" "Destination subtitles format. If not provided, use the extension in the \"-o" "\"/\"--output\" arg. If \"-o\"/\"--output\" arg doesn't provide the " @@ -291,25 +279,25 @@ msgstr "" "果\"-i\"/\"--input\"的参数是一个字幕文件,那么使用和字幕文件相同的扩展名。" "(参数个数为1)(默认参数为{dft})" -#: autosub/options.py:222 +#: autosub/options.py:218 msgid "" "Prevent pauses and allow files to be overwritten. Stop the program when your " "args are wrong. (arg_num = 0)" msgstr "" "避免任何暂停和覆写文件的行为。如果参数有误,会直接停止程序。(参数个数为0)" -#: autosub/options.py:227 +#: autosub/options.py:223 msgid "type" msgstr "种类" -#: autosub/options.py:230 +#: autosub/options.py:226 #, python-format msgid "" "Output more files. Available types: regions, src, dst, bilingual, dst-lf-" "src, src-lf-dst, all. dst-lf-src: dst language and src language in the same " "event. And dst is ahead of src. src-lf-dst: src language and dst language in " -"the same event. And src is ahead of dst. (6 >= arg_num >= 1) (default: " -"%(default)s)" +"the same event. And src is ahead of dst. (6 >= arg_num >= 1) (default: %" +"(default)s)" msgstr "" "输出更多的文件。可选种类:regions, src, dst, bilingual, dst-lf-src, src-lf-" "dst, all.(时间轴,源语言字幕,目标语言字幕,双语字幕,dst-lf-src,,src-lf-" @@ -317,7 +305,7 @@ msgstr "" "src-lf-dst:源语言和目标语言在同一字幕行中,且源语言先于目标语言。(参数个数" "在6和1之间)(默认参数为%(default)s)" -#: autosub/options.py:243 +#: autosub/options.py:239 msgid "" "Valid when your output format is \"sub\". If input, it will override the fps " "check on the input file. Ref: https://pysubs2.readthedocs.io/en/latest/api-" @@ -327,11 +315,11 @@ msgstr "" "查。参考:https://pysubs2.readthedocs.io/en/latest/api-reference." "html#supported-input-output-formats(参数个数为1)" -#: autosub/options.py:252 +#: autosub/options.py:248 msgid "API_code" msgstr "API代码" -#: autosub/options.py:254 +#: autosub/options.py:250 #, python-format msgid "" "Choose which Speech-to-Text API to use. Currently support: gsv2: Google " @@ -344,7 +332,7 @@ msgstr "" "Text V1P1Beta1 (https://cloud.google.com/speech-to-text/docs)。(参数个数为" "1)(默认参数为%(default)s)" -#: autosub/options.py:264 +#: autosub/options.py:260 msgid "" "The API key for Speech-to-Text API. (arg_num = 1) Currently support: gsv2: " "The API key for gsv2. (default: Free API key) gcsv1: The API key for gcsv1. " @@ -354,7 +342,7 @@ msgstr "" "认参数为免费API密钥)gcsv1:gcsv1的API密钥。(如果使用了,可以覆盖 \"-sa\"/" "\"--service-account\"提供的服务账号凭据)" -#: autosub/options.py:291 +#: autosub/options.py:287 #, python-format msgid "" "API response for text confidence. A float value between 0 and 1. Confidence " @@ -364,14 +352,14 @@ msgid "" msgstr "" "API用于识别可信度的回应参数。一个介于0和1之间的浮点数。可信度越高意味着结果越" "好。输入这个参数会导致所有低于这个结果的识别结果被删除。参考:https://github." -"com/BingLingGroup/google-speech-v2#response(参数个数为1)(默认参数" -"为%(default)s)" +"com/BingLingGroup/google-speech-v2#response(参数个数为1)(默认参数为%" +"(default)s)" -#: autosub/options.py:301 +#: autosub/options.py:297 msgid "Drop any regions without speech recognition result. (arg_num = 0)" msgstr "删除所有没有语音识别结果的空轴。(参数个数为0)" -#: autosub/options.py:309 +#: autosub/options.py:305 #, python-format msgid "" "Number of concurrent Speech-to-Text requests to make. (arg_num = 1) " @@ -379,21 +367,21 @@ msgid "" msgstr "" "用于Speech-to-Text请求的并行数量。(参数个数为1)(默认参数为%(default)s)" -#: autosub/options.py:314 autosub/options.py:529 autosub/options.py:538 -#: autosub/options.py:547 +#: autosub/options.py:310 autosub/options.py:501 autosub/options.py:510 +#: autosub/options.py:519 msgid "second" msgstr "秒" -#: autosub/options.py:317 +#: autosub/options.py:313 #, python-format msgid "" "(Experimental)Seconds to sleep between two translation requests. (arg_num = " "1) (default: %(default)s)" msgstr "" -"(实验性)在两次翻译请求之间睡眠(暂停)的时间。(参数个数为1)(默认参数" -"为%(default)s)" +"(实验性)在两次翻译请求之间睡眠(暂停)的时间。(参数个数为1)(默认参数为%" +"(default)s)" -#: autosub/options.py:325 +#: autosub/options.py:321 msgid "" "(Experimental)Customize request urls. Ref: https://py-googletrans." "readthedocs.io/en/latest/ (arg_num >= 1)" @@ -401,43 +389,18 @@ msgstr "" "(实验性)自定义多个请求URL。参考:https://py-googletrans.readthedocs.io/en/" "latest/(参数个数大于等于1)" -#: autosub/options.py:332 +#: autosub/options.py:328 msgid "" "(Experimental)Customize User-Agent headers. Same docs above. (arg_num = 1)" msgstr "" "(实验性)自定义用户代理(User-Agent)头部。同样的参考文档如上。(参数个数为" "1)" -#: autosub/options.py:339 -msgid "" -"The Google Translate V2 API key to be used. If not provided, use free API " -"(py-googletrans) instead. (arg_num = 1)" -msgstr "" -"用于Google Translate V2的API key。如果没有提供,则使用免费的API也就是py-" -"googletrans。(参数个数为1)" - -#: autosub/options.py:348 -#, python-format -msgid "" -"Number of lines per Google Translate V2 request. (arg_num = 1) (default: " -"%(default)s)" -msgstr "" -"Google Translate V2请求的每行行数。(参数个数为1)(默认参数为%(default)s)" - -#: autosub/options.py:356 -#, python-format -msgid "" -"Number of concurrent Google translate V2 API requests to make. (arg_num = 1) " -"(default: %(default)s)" -msgstr "" -"Google translate V2 API请求的并行数量。(参数个数为1)(默认参数" -"为%(default)s)" - -#: autosub/options.py:363 +#: autosub/options.py:335 msgid "Change the Google Speech V2 API URL into the http one. (arg_num = 0)" msgstr "将Google Speech V2 API的URL改为http类型。(参数个数为0)" -#: autosub/options.py:371 +#: autosub/options.py:343 #, python-format msgid "" "Add https proxy by setting environment variables. If arg_num is 0, use const " @@ -446,7 +409,7 @@ msgstr "" "通过设置环境变量的方式添加https代理。如果参数个数是0,使用const里的代理URL。" "(参数个数为0或1)(const为%(const)s)" -#: autosub/options.py:379 +#: autosub/options.py:351 #, python-format msgid "" "Add http proxy by setting environment variables. If arg_num is 0, use const " @@ -455,33 +418,33 @@ msgstr "" "通过设置环境变量的方式添加http代理。如果参数个数是0,使用const里的代理URL。" "(参数个数为0或1)(const为%(const)s)" -#: autosub/options.py:385 +#: autosub/options.py:357 msgid "username" msgstr "用户名" -#: autosub/options.py:386 +#: autosub/options.py:358 msgid "Set proxy username. (arg_num = 1)" msgstr "设置代理用户名。(参数个数为1)" -#: autosub/options.py:391 +#: autosub/options.py:363 msgid "password" msgstr "密码" -#: autosub/options.py:392 +#: autosub/options.py:364 msgid "Set proxy password. (arg_num = 1)" msgstr "设置代理密码。(参数个数为1)" -#: autosub/options.py:398 +#: autosub/options.py:370 #, python-format msgid "Show %(prog)s help message and exit. (arg_num = 0)" msgstr "显示%(prog)s的帮助信息并退出。(参数个数为0)" -#: autosub/options.py:406 +#: autosub/options.py:378 #, python-format msgid "Show %(prog)s version and exit. (arg_num = 0)" msgstr "显示%(prog)s的版本信息并退出。(参数个数为0)" -#: autosub/options.py:411 +#: autosub/options.py:383 msgid "" "Set service account key environment variable. It should be the file path of " "the JSON file that contains your service account credentials. Can be " @@ -491,10 +454,10 @@ msgid "" msgstr "" "设置服务账号密钥的环境变量。应该是包含服务帐号凭据的JSON文件的文件路径。如果" "使用了,会被API密钥选项覆盖。参考:https://cloud.google.com/docs/" -"authentication/getting-started 当前支持:" -"gcsv1(GOOGLE_APPLICATION_CREDENTIALS)(参数个数为1)" +"authentication/getting-started 当前支持:gcsv1" +"(GOOGLE_APPLICATION_CREDENTIALS)(参数个数为1)" -#: autosub/options.py:422 +#: autosub/options.py:394 msgid "" "Option to control audio process. If not given the option, do normal " "conversion work. \"y\": pre-process the input first then start normal " @@ -513,15 +476,15 @@ msgstr "" "scripts/subgen.sh https://ffmpeg.org/ffmpeg-filters.html)(参数个数介于1和2" "之间)" -#: autosub/options.py:446 +#: autosub/options.py:418 msgid "Keep audio processing files to the output path. (arg_num = 0)" msgstr "将音频处理中产生的文件放在输出路径中。(参数个数为0)" -#: autosub/options.py:451 autosub/options.py:469 autosub/options.py:483 +#: autosub/options.py:423 autosub/options.py:441 autosub/options.py:455 msgid "command" msgstr "命令" -#: autosub/options.py:452 +#: autosub/options.py:424 msgid "" "This arg will override the default audio pre-process command. Every line of " "the commands need to be in quotes. Input file name is {in_}. Output file " @@ -530,7 +493,7 @@ msgstr "" "这个参数会取代默认的音频预处理命令。每行命令需要放在一个引号内。输入文件名写" "为{in_}。输出文件名写为{out_}。(参数个数大于1)" -#: autosub/options.py:464 +#: autosub/options.py:436 #, python-format msgid "" "Number of concurrent ffmpeg audio split process to make. (arg_num = 1) " @@ -538,18 +501,18 @@ msgid "" msgstr "" "用于ffmpeg音频切割的进程并行数量。(参数个数为1)(默认参数为%(default)s)" -#: autosub/options.py:470 +#: autosub/options.py:442 msgid "" "(Experimental)This arg will override the default audio conversion command. " -"\"[\", \"]\" are optional arguments meaning you can remove them. \"{{\", " -"\"}}\" are required arguments meaning you can't remove them. Default command " -"to process the audio: {dft} (arg_num = 1)" +"\"[\", \"]\" are optional arguments meaning you can remove them. \"{{\", \"}}" +"\" are required arguments meaning you can't remove them. Default command to " +"process the audio: {dft} (arg_num = 1)" msgstr "" "(实验性)这个参数会取代默认的音频转换命令。\"[\", \"]\" 是可选参数,可以移" "除。\"{{\", \"}}\"是必选参数,不可移除。以下是用于处理音频的默认命令:{dft}" "(默认参数为1)" -#: autosub/options.py:484 +#: autosub/options.py:456 msgid "" "(Experimental)This arg will override the default audio split command. Same " "attention above. Default: {dft} (arg_num = 1)" @@ -557,24 +520,24 @@ msgstr "" "(实验性)这个参数会取代默认的音频转换命令。相同的注意如上。默认:{dft}(参数" "个数为1)" -#: autosub/options.py:493 +#: autosub/options.py:465 msgid "file_suffix" msgstr "文件名后缀" -#: autosub/options.py:495 +#: autosub/options.py:467 #, python-format msgid "" "(Experimental)This arg will override the default API audio suffix. (arg_num " "= 1) (default: %(default)s)" msgstr "" -"(实验性)这个参数会取代默认的给API使用的音频文件后缀。(默认参数" -"为%(default)s)" +"(实验性)这个参数会取代默认的给API使用的音频文件后缀。(默认参数为%(default)" +"s)" -#: autosub/options.py:501 +#: autosub/options.py:473 msgid "sample_rate" msgstr "采样率" -#: autosub/options.py:504 +#: autosub/options.py:476 #, python-format msgid "" "(Experimental)This arg will override the default API audio sample rate(Hz). " @@ -583,11 +546,11 @@ msgstr "" "(实验性)这个参数会取代默认的给API使用的音频采样率(赫兹)。(参数个数为1)" "(默认参数为%(default)s)" -#: autosub/options.py:510 +#: autosub/options.py:482 msgid "channel_num" msgstr "声道数" -#: autosub/options.py:513 +#: autosub/options.py:485 #, python-format msgid "" "(Experimental)This arg will override the default API audio channel. (arg_num " @@ -596,11 +559,11 @@ msgstr "" "(实验性)这个参数会取代默认的给API使用的音频声道数量。(参数个数为1)(默认" "参数为%(default)s)" -#: autosub/options.py:519 +#: autosub/options.py:491 msgid "energy" msgstr "能量(相对值)" -#: autosub/options.py:522 +#: autosub/options.py:494 #, python-format msgid "" "The energy level which determines the region to be detected. Ref: https://" @@ -611,23 +574,23 @@ msgstr "" "latest/apitutorial.html#examples-using-real-audio-data(参数个数为1)(默认参" "数为%(default)s)" -#: autosub/options.py:532 +#: autosub/options.py:504 #, python-format msgid "" "Minimum region size. Same docs above. (arg_num = 1) (default: %(default)s)" msgstr "" -"最小语音区域大小。同样的参考文档如上。(参数个数为1)(默认参数" -"为%(default)s)" +"最小语音区域大小。同样的参考文档如上。(参数个数为1)(默认参数为%(default)" +"s)" -#: autosub/options.py:541 +#: autosub/options.py:513 #, python-format msgid "" "Maximum region size. Same docs above. (arg_num = 1) (default: %(default)s)" msgstr "" -"最大音频区域大小。同样的参考文档如上。(参数个数为1)(默认参数" -"为%(default)s)" +"最大音频区域大小。同样的参考文档如上。(参数个数为1)(默认参数为%(default)" +"s)" -#: autosub/options.py:550 +#: autosub/options.py:522 #, python-format msgid "" "Maximum length of a tolerated silence within a valid audio activity. Same " @@ -636,7 +599,7 @@ msgstr "" "在一段有效的音频活动区域中可以容忍的最大(连续)安静区域。同样的参考文档如" "上。(参数个数为1)(默认参数为%(default)s)" -#: autosub/options.py:557 autosub/options.py:563 +#: autosub/options.py:529 autosub/options.py:535 msgid "" "Ref: https://auditok.readthedocs.io/en/latest/core.html#class-summary " "(arg_num = 0)" @@ -644,7 +607,7 @@ msgstr "" "参考:https://auditok.readthedocs.io/en/latest/core.html#class-summary(参数" "个数为0)" -#: autosub/options.py:569 +#: autosub/options.py:541 msgid "" "List all available subtitles formats. If your format is not supported, you " "can use ffmpeg or SubtitleEdit to convert the formats. You need to offer fps " @@ -655,7 +618,7 @@ msgstr "" "SubtitleEdit来对其进行转换。如果输出格式是\"sub\"且输入文件是音频无法获取到视" "频帧率时,你需要提供fps选项指定帧率。(参数个数为0)" -#: autosub/options.py:582 +#: autosub/options.py:554 msgid "" "List all recommended \"-S\"/\"--speech-language\" Google Speech-to-Text " "language codes. If no arg is given, list all. Or else will list get a group " @@ -673,7 +636,7 @@ msgstr "" "言)-扩展-私有(https://www.zhihu.com/question/21980689/answer/93615123)(参" "数个数为0或1)" -#: autosub/options.py:598 +#: autosub/options.py:570 msgid "" "List all available \"-SRC\"/\"--src-language\" py-googletrans translation " "language codes. Or else will list get a group of \"good match\" of the arg. " @@ -683,7 +646,7 @@ msgstr "" "言代码。否则会给出一个“好的匹配”的清单。同样的参考文档如上。(参数个数为0或" "1)" -#: autosub/options.py:608 +#: autosub/options.py:580 #, python-format msgid "" "Use py-googletrans to detect a sub file's first line language. And list a " @@ -696,13 +659,44 @@ msgstr "" "的)。参考:https://cloud.google.com/speech-to-text/docs/languages(参数个数" "为1)(默认参数 %(default)s)" +#~ msgid "Google Translate V2 Options" +#~ msgstr "Google Translate V2选项" + +#~ msgid "" +#~ "Options to control translation.(Not been tested) If the API key is given, " +#~ "it will replace the py-googletrans method." +#~ msgstr "" +#~ "控制翻译的选项。(未被测试过)如果提供了API key,它会取代py-googletrans的" +#~ "翻译方法。" + +#~ msgid "" +#~ "The Google Translate V2 API key to be used. If not provided, use free API " +#~ "(py-googletrans) instead. (arg_num = 1)" +#~ msgstr "" +#~ "用于Google Translate V2的API key。如果没有提供,则使用免费的API也就是py-" +#~ "googletrans。(参数个数为1)" + +#~ msgid "" +#~ "Number of lines per Google Translate V2 request. (arg_num = 1) (default: %" +#~ "(default)s)" +#~ msgstr "" +#~ "Google Translate V2请求的每行行数。(参数个数为1)(默认参数为%(default)" +#~ "s)" + +#~ msgid "" +#~ "Number of concurrent Google translate V2 API requests to make. (arg_num = " +#~ "1) (default: %(default)s)" +#~ msgstr "" +#~ "Google translate V2 API请求的并行数量。(参数个数为1)(默认参数为%" +#~ "(default)s)" + #~ msgid "" #~ "Output more files. Available types: regions, src, dst, bilingual, all. (4 " #~ ">= arg_num >= 1) (default: %(default)s)" #~ msgstr "" #~ "输出更多的文件。可选种类:regions, src, dst, bilingual, all.(时间轴,源语" -#~ "言字幕,目标语言字幕,双语字幕,所有)(参数个数在4和1之间)(默认参数" -#~ "为%(default)s)" +#~ "言字幕,目标语言字幕,双语字幕,所有)(参数个数在4和1之间)(默认参数为%" +#~ "(default)s)" #~ msgid "" #~ "The Google Speech V2 API key to be used. If not provided, use free API " diff --git a/autosub/options.py b/autosub/options.py index 4372c726..62800581 100644 --- a/autosub/options.py +++ b/autosub/options.py @@ -50,9 +50,10 @@ def get_cmd_args(): # pylint: disable=too-many-statements "the number of the arguments is required.\n" "Author: {author}\n" "Email: {email}\n" - "Bug report: https://github.com/agermanidis/autosub\n").format( + "Bug report: {homepage}\n").format( author=metadata.AUTHOR, - email=metadata.AUTHOR_EMAIL), + email=metadata.AUTHOR_EMAIL, + homepage=metadata.HOMEPAGE), add_help=False, formatter_class=argparse.RawDescriptionHelpFormatter) @@ -74,11 +75,6 @@ def get_cmd_args(): # pylint: disable=too-many-statements _('Options to control translation. ' 'Default method to translate. ' 'Could be blocked at any time.')) - gtv2_group = parser.add_argument_group( - _('Google Translate V2 Options'), - _('Options to control translation.(Not been tested) ' - 'If the API key is given, ' - 'it will replace the py-googletrans method.')) network_group = parser.add_argument_group( _('Network Options'), _('Options to control network.')) @@ -333,30 +329,6 @@ def get_cmd_args(): # pylint: disable=too-many-statements "Same docs above. " "(arg_num = 1)")) - gtv2_group.add_argument( - '-gtv2', '--gtransv2', - metavar='key', - help=_("The Google Translate V2 API key to be used. " - "If not provided, use free API (py-googletrans) instead. " - "(arg_num = 1)")) - - gtv2_group.add_argument( - '-lpt', '--lines-per-trans', - metavar='integer', - type=int, - default=constants.DEFAULT_LINES_PER_TRANS, - help=_("Number of lines per Google Translate V2 request. " - "(arg_num = 1) (default: %(default)s)")) - - gtv2_group.add_argument( - '-tc', '--trans-concurrency', - metavar='integer', - type=int, - default=constants.DEFAULT_CONCURRENCY, - help=_("Number of concurrent " - "Google translate V2 API requests to make. " - "(arg_num = 1) (default: %(default)s)")) - network_group.add_argument( '-hsa', '--http-speech-api', action='store_true', diff --git a/autosub/speech_trans_api.py b/autosub/speech_trans_api.py index 429c327b..c2038a62 100644 --- a/autosub/speech_trans_api.py +++ b/autosub/speech_trans_api.py @@ -15,7 +15,6 @@ # Import third-party modules import requests -from googleapiclient.discovery import build from google.cloud import speech_v1p1beta1 from google.protobuf.json_format import MessageToDict @@ -229,36 +228,3 @@ def __call__(self, filename): return None return None - - -class GoogleTranslatorV2(object): # pylint: disable=too-few-public-methods - """ - Class for GoogleTranslatorV2 translating text from one language to another. - """ - def __init__(self, api_key, src, dst): - self.api_key = api_key - self.service = build('translate', 'v2', - developerKey=self.api_key) - self.src = src - self.dst = dst - - def __call__(self, trans_list): - try: - if not trans_list: - return None - - trans_str = '\n'.join(trans_list) - - result = self.service.translations().list( # pylint: disable=no-member - source=self.src, - target=self.dst, - q=[trans_str]).execute() - - if 'translations' in result and result['translations'] and \ - 'translatedText' in result['translations'][0]: - return '\n'.split(result['translations'][0]['translatedText']) - - return None - - except KeyboardInterrupt: - return None diff --git a/setup.py b/setup.py index a4602208..9a59fbf4 100644 --- a/setup.py +++ b/setup.py @@ -33,7 +33,6 @@ }, package_data={str('autosub'): [str('data/locale/zh_CN/LC_MESSAGES/*mo')]}, install_requires=[ - 'google-api-python-client>=1.7.11', 'requests>=2.3.0', 'pysubs2>=0.2.4', 'progressbar2>=3.34.3', From 9abd0d18b6b46b1a2ca8c302c0a61aaf1b545226 Mon Sep 17 00:00:00 2001 From: BingLingGroup <42505588+BingLingGroup@users.noreply.github.com> Date: Sun, 2 Feb 2020 14:54:50 +0800 Subject: [PATCH 08/10] Fix pylint issues Docs update translations, changelog, readme, build scripts --- CHANGELOG.md | 17 +- README.md | 190 +++++++++++------- autosub/__init__.py | 8 +- autosub/cmdline_utils.py | 66 +++--- autosub/core.py | 10 +- .../LC_MESSAGES/autosub.cmdline_utils.mo | Bin 9844 -> 9742 bytes .../LC_MESSAGES/autosub.cmdline_utils.po | 5 +- .../locale/zh_CN/LC_MESSAGES/autosub.core.mo | Bin 1799 -> 1799 bytes .../locale/zh_CN/LC_MESSAGES/autosub.core.po | 23 +-- .../zh_CN/LC_MESSAGES/autosub.ffmpeg_utils.mo | Bin 1614 -> 1614 bytes .../zh_CN/LC_MESSAGES/autosub.ffmpeg_utils.po | 16 +- .../zh_CN/LC_MESSAGES/autosub.metadata.mo | Bin 1126 -> 1007 bytes .../zh_CN/LC_MESSAGES/autosub.metadata.po | 15 +- .../zh_CN/LC_MESSAGES/autosub.options.mo | Bin 22700 -> 22643 bytes .../zh_CN/LC_MESSAGES/autosub.options.po | 85 ++++---- autosub/ffmpeg_utils.py | 3 +- autosub/metadata.py | 13 +- autosub/options.py | 4 +- autosub/speech_trans_api.py | 39 ++-- docs/CHANGELOG.zh-Hans.md | 15 +- docs/README.zh-Hans.md | 160 ++++++++++----- requirements.txt | 19 +- scripts/create_release.py | 28 +-- scripts/nuitka_build.bat | 1 - scripts/pyinstaller_build.bat | 2 +- scripts/pyinstaller_build.spec | 2 +- 26 files changed, 413 insertions(+), 308 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5944749f..47aee475 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## TOC - [Unreleased](#unreleased) +- [0.5.4-alpha - 2020-01-31](#054-alpha---2020-01-31) + - [Added](#added054-alpha) + - [Changed](#changed054-alpha) - [0.5.3-alpha - 2019-12-30](#053-alpha---2019-12-30) - [Changed](#changed053-alpha) - [0.5.2-alpha - 2019-11-05](#052-alpha---2019-11-05) @@ -29,16 +32,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 Click up arrow to go back to TOC. -### [Unreleased] +### Unreleased -#### Added(Unreleased) +### [0.5.4-alpha] - 2020-01-31 -- Add Google Cloud Speech-to-Text support. [issue #10](https://github.com/BingLingGroup/autosub/issues/10) +#### Added(0.5.4-alpha) + +- Add basic Google Cloud Speech-to-Text support. [issue #10](https://github.com/BingLingGroup/autosub/issues/10) - Add more bilingual subtitles formats output support. [issue #72](https://github.com/BingLingGroup/autosub/issues/72) -#### Changed(Unreleased) +#### Changed(0.5.4-alpha) - Fix output format limits when input is a subtitles file. +- Remove gtransv2 support. ### [0.5.3-alpha] - 2019-12-30 @@ -165,7 +171,8 @@ Click up arrow to go back to TOC.  ↑  -[Unreleased]: https://github.com/BingLingGroup/autosub/compare/0.5.3-alpha...HEAD +[Unreleased]: https://github.com/BingLingGroup/autosub/compare/0.5.4-alpha...HEAD +[0.5.4-alpha]: https://github.com/BingLingGroup/autosub/compare/0.5.3-alpha...0.5.4-alpha [0.5.3-alpha]: https://github.com/BingLingGroup/autosub/compare/0.5.2-alpha...0.5.3-alpha [0.5.2-alpha]: https://github.com/BingLingGroup/autosub/compare/0.5.1-alpha...0.5.2-alpha [0.5.1-alpha]: https://github.com/BingLingGroup/autosub/compare/0.5.0-alpha...0.5.1-alpha diff --git a/README.md b/README.md index be126b27..6c3223ac 100644 --- a/README.md +++ b/README.md @@ -53,9 +53,9 @@ Click up arrow to go back to TOC. ### Description -Autosub is an automatic subtitles generating utility. It can detect speech regions automatically by using Auditok, split the audio files according to regions by using ffmpeg, transcribe speech based on [Google-Speech-v2](https://github.com/gillesdemey/google-speech-v2)([Chrome-Web-Speech-api](https://github.com/agermanidis/autosub/issues/1)) and translate the subtitles' text by using py-googletrans. It currently not supports the latest Google Cloud APIs. +Autosub is an automatic subtitles generating utility. It can detect speech regions automatically by using Auditok, split the audio files according to regions by using ffmpeg, transcribe speech based on several APIs and translate the subtitles' text by using py-googletrans. -The new features mentioned above are only available after autosub-0.5.0a (0.5.0-alpha). Not available on PyPI or the original repo. +The new features mentioned above are only available in the latest alpha branch. Not available on PyPI or the original repo. ### License @@ -191,15 +191,44 @@ PyPI version(autosub-0.3.12) is not recommended using on windows because it just A video/audio/subtitles file. -If it is a video or audio file, use ffmpeg to convert the format into [the proper one](https://github.com/gillesdemey/google-speech-v2#data) for API. Current default format for [Google-Speech-v2](https://github.com/gillesdemey/google-speech-v2) is 24bit/44100Hz/mono flac. API only accept mono audio files varied from the data from the repo mentioned above. +If it is a video or audio file, use ffmpeg to convert the format into [the proper one](https://github.com/gillesdemey/google-speech-v2#data) for API. Any format supported by ffmpeg is OK to input, but the output or processed format for API is limited by API and autosub codes. -Also, you can use the built-in audio pre-processing function. The default [pre-processing commands](https://github.com/agermanidis/autosub/issues/40#issuecomment-509928060) depend on the ffmpeg-normalize and ffmpeg. The commands include three commands. The [first](https://trac.ffmpeg.org/wiki/AudioChannelManipulation) is for converting stereo to mono. The [second](https://superuser.com/questions/733061/reduce-background-noise-and-optimize-the-speech-from-an-audio-clip-using-ffmpeg) is for filtering out the sound not in the frequency of speech. The third is to normalize the audio to make sure it is not too loud or too quiet. If you are not satisfied with the default commands, you can also modified them yourself by input `-apc` option. +Supported formats below: + +[Google-Speech-v2](https://github.com/gillesdemey/google-speech-v2) + +- 24bit/44100Hz/mono FLAC(default) +- Other format like OGG_OPUS isn't supported by API. (I've tried modifying requests headers or json requests and it just didn't work) Or format like PCM has less bits per sample but more storage usage than FLAC. Although the API support it but I think it's unnecessary to modify codes to support it. + +[Google Cloud Speech-to-Text API](https://cloud.google.com/speech-to-text/docs/encoding) + +- Supported + - 24bit/44100Hz/mono FLAC(default) +- Supported but not default args (more info on [Transcribe Audio To Subtitles](#transcribe-audio-to-subtitles)) + - 8000Hz|12000Hz|16000Hz|24000Hz|48000Hz/mono OGG_OPUS + - MP3 + - 16bit/mono PCM + +Also, you can use the built-in audio pre-processing function.(Though Google [doesn't recommend](https://cloud.google.com/speech-to-text/docs/best-practices) doing this.) The default [pre-processing commands](https://github.com/agermanidis/autosub/issues/40#issuecomment-509928060) depend on the ffmpeg-normalize and ffmpeg. The commands include three commands. The [first](https://trac.ffmpeg.org/wiki/AudioChannelManipulation) is for converting stereo to mono. The [second](https://superuser.com/questions/733061/reduce-background-noise-and-optimize-the-speech-from-an-audio-clip-using-ffmpeg) is for filtering out the sound not in the frequency of speech. The third is to normalize the audio to make sure it is not too loud or too quiet. If you are not satisfied with the default commands, you can also modified them yourself by input `-apc` option. Still, it currently only supports 24bit/44100Hz/mono FLAC format. If it is a subtitles file and you give the proper arguments, only translate it by py-googletrans. #### Split -Since this Speech-to-Text API only accept short-term audio which is not longer than [10 to 15 seconds](https://github.com/gillesdemey/google-speech-v2#caveats), we need to split one audio file into many small pieces which contain the speech parts. Autosub uses Auditok to detect speech regions. +Audio length limits: + +[Google-Speech-v2](https://github.com/gillesdemey/google-speech-v2) + +- No longer than [10 to 15 seconds](https://github.com/gillesdemey/google-speech-v2#caveats). +- In autosub it is set as the [10-seconds-limit](https://github.com/BingLingGroup/autosub/blob/dev/autosub/constants.py#L61). + +[Google Cloud Speech-to-Text API](https://cloud.google.com/speech-to-text/docs/encoding) + +- No longer than [1 minute](https://cloud.google.com/speech-to-text/docs/sync-recognize). +- In autosub it is currently set the same as the [10-seconds-limit](https://github.com/BingLingGroup/autosub/blob/dev/autosub/constants.py#L61). +- Currently only support sync-recognize means only short-term audio supported. + +Autosub uses Auditok to detect speech regions. And then use them to split as well as convert the video/audio into many audio fragments. Each fragment per region per API request. All these audio fragments are converted directly from input to avoid any extra quality loss. Or uses external regions from the file that pysubs2 supports like `.ass` or `.srt`. This will allow you to manually adjust the regions to get better recognition result. @@ -223,7 +252,7 @@ To manually match or see the full list of the lang codes, run the utility with t To get a subtitles first line language, you can use `-dsl` to detect. -- Currently, if autosub is using [Google-Speech-v2](https://github.com/gillesdemey/google-speech-v2) as the method to transcribe, it allows to send the lang codes not from the `--list-speech-codes`, which means in this case the program won't stop. +- Currently, autosub allows to send the lang codes not from the `--list-speech-codes`, which means in this case the program won't stop. - Though you can input the speech lang code whatever you want, need to point out that if not using the codes on the list but somehow the API accept it, [Google-Speech-v2](https://github.com/gillesdemey/google-speech-v2) recognizes your audio in the ways that depend on your IP address which is uncontrollable by yourself. This is a known issue and I ask for a [pull request](https://github.com/agermanidis/autosub/pull/136) in the original repo. @@ -334,18 +363,36 @@ autosub -i input_file -k ...(other options) Speech audio fragments to speech language subtitles. -Speech language subtitles only. +Use default [Google-Speech-v2](https://github.com/gillesdemey/google-speech-v2) to transcribe speech language subtitles only. ``` autosub -i input_file -S lang_code ``` -Speech language subtitles as a part. +Use default [Google-Speech-v2](https://github.com/gillesdemey/google-speech-v2) to transcribe speech language subtitles as a part. ``` autosub -i input_file -S lang_code -of src ...(other options) ``` +Use Google Cloud Speech-to-Text API service account(GOOGLE_APPLICATION_CREDENTIALS has already been set in system environment variable) to transcribe. + +``` +autosub -i input_file -sapi gcsv1 -S lang_code ...(other options) +``` + +Use Google Cloud Speech-to-Text API service account(GOOGLE_APPLICATION_CREDENTIALS is set by `-sa`) to transcribe. + +``` +autosub -i input_file -sapi gcsv1 -S lang_code -sa path_to_key_file ...(other options) +``` + +Use Google Cloud Speech-to-Text API key to transcribe. + +``` +autosub -i input_file -sapi gcsv1 -S lang_code -skey API_key ...(other options) +``` +  ↑  ##### Translate Subtitles @@ -433,10 +480,10 @@ Language Options: language". (3 >= arg_num >= 1) -mns integer, --min-score integer An integer between 0 and 100 to control the good match - group of "-lsc"/"--list-speech-codes" or "-ltc - "/"--list-translation-codes" or the match result in - "-bm"/"--best-match". Result will be a group of "good - match" whose score is above this arg. (arg_num = 1) + group of "-lsc"/"--list-speech-codes" or "-ltc"/"-- + list-translation-codes" or the match result in "-bm"/" + --best-match". Result will be a group of "good match" + whose score is above this arg. (arg_num = 1) Output Options: Options to control output. @@ -456,38 +503,43 @@ Output Options: the program when your args are wrong. (arg_num = 0) -of [type [type ...]], --output-files [type [type ...]] Output more files. Available types: regions, src, dst, - bilingual, all. (4 >= arg_num >= 1) (default: - [u'dst']) + bilingual, dst-lf-src, src-lf-dst, all. dst-lf-src: + dst language and src language in the same event. And + dst is ahead of src. src-lf-dst: src language and dst + language in the same event. And src is ahead of dst. + (6 >= arg_num >= 1) (default: ['dst']) -fps float, --sub-fps float Valid when your output format is "sub". If input, it will override the fps check on the input file. Ref: https://pysubs2.readthedocs.io/en/latest/api- reference.html#supported-input-output-formats (arg_num = 1) - -der, --drop-empty-regions - Drop any regions without text. (arg_num = 0) Speech Options: Options to control speech-to-text. If Speech Options not given, it will only generate the times. -sapi API_code, --speech-api API_code Choose which Speech-to-Text API to use. Currently - supported: gsv2: Google Speech V2 + support: gsv2: Google Speech V2 (https://github.com/gillesdemey/google-speech-v2). gcsv1: Google Cloud Speech-to-Text V1P1Beta1 (https://cloud.google.com/speech-to-text/docs). (arg_num = 1) (default: gsv2) -skey key, --speech-key key The API key for Speech-to-Text API. (arg_num = 1) - Currently supported: gsv2: The API key for gsv2. + Currently support: gsv2: The API key for gsv2. (default: Free API key) gcsv1: The API key for gcsv1. - (Can be overridden by "-sa"/"--service-account") + (If used, override the credentials given by"-sa"/"-- + service-account") -mnc float, --min-confidence float API response for text confidence. A float value between 0 and 1. Confidence bigger means the result is better. Input this argument will drop any result below it. Ref: https://github.com/BingLingGroup/google- speech-v2#response (arg_num = 1) (default: 0.0) + -der, --drop-empty-regions + Drop any regions without speech recognition result. + (arg_num = 0) -sc integer, --speech-concurrency integer Number of concurrent Speech-to-Text requests to make. (arg_num = 1) (default: 10) @@ -505,20 +557,6 @@ py-googletrans Options: (Experimental)Customize User-Agent headers. Same docs above. (arg_num = 1) -Google Translate V2 Options: - Options to control translation.(Not been tested) If the API key is given, it will replace the py-googletrans method. - - -gtv2 key, --gtransv2 key - The Google Translate V2 API key to be used. If not - provided, use free API (py-googletrans) instead. - (arg_num = 1) - -lpt integer, --lines-per-trans integer - Number of lines per Google Translate V2 request. - (arg_num = 1) (default: 15) - -tc integer, --trans-concurrency integer - Number of concurrent Google translate V2 API requests - to make. (arg_num = 1) (default: 10) - Network Options: Options to control network. @@ -546,10 +584,10 @@ Other Options: -sa path, --service-account path Set service account key environment variable. It should be the file path of the JSON file that contains - your service account credentials. If used, override - the API key options. Ref: + your service account credentials. Can be overridden by + the API key. Ref: https://cloud.google.com/docs/authentication/getting- - started Currently supported: gcsv1 + started Currently support: gcsv1 (GOOGLE_APPLICATION_CREDENTIALS) (arg_num = 1) Audio Processing Options: @@ -562,23 +600,21 @@ Audio Processing Options: succeed, no more conversion before the speech-to-text procedure. "o": only pre-process the input audio. ("-k"/"--keep" is true) "s": only split the input - audio. ("-k"/"--keep" is true) "n": FORCED NO EXTRA - CHECK/CONVERSION before the speech-to-text procedure. - Default command to pre-process the audio: ffmpeg - -hide_banner -i "{in_}" -af "asplit[a],aphasemeter=vid - eo=0,ametadata=select:key=lavfi.aphasemeter.phase:valu - e=-0.005:function=less,pan=1c|c0=c0,aresample=async=1: - first_pts=0,[a]amix" -ac 1 -f flac "{out_}" | ffmpeg - -hide_banner -i "{in_}" -af lowpass=3000,highpass=200 - "{out_}" | ffmpeg-normalize -v "{in_}" -ar 44100 -ofmt - flac -c:a flac -pr -p -o "{out_}" (Ref: https://github - .com/stevenj/autosub/blob/master/scripts/subgen.sh - https://ffmpeg.org/ffmpeg-filters.html) (2 >= arg_num - >= 1) + audio. ("-k"/"--keep" is true) Default command to pre- + process the audio: ffmpeg -hide_banner -i "{in_}" -af + "asplit[a],aphasemeter=video=0,ametadata=select:key=la + vfi.aphasemeter.phase:value=-0.005:function=less,pan=1 + c|c0=c0,aresample=async=1:first_pts=0,[a]amix" -ac 1 + -f flac "{out_}" | ffmpeg -hide_banner -i "{in_}" -af + lowpass=3000,highpass=200 "{out_}" | ffmpeg-normalize + -v "{in_}" -ar 44100 -ofmt flac -c:a flac -pr -p -o + "{out_}" (Ref: https://github.com/stevenj/autosub/blob + /master/scripts/subgen.sh https://ffmpeg.org/ffmpeg- + filters.html) (2 >= arg_num >= 1) -k, --keep Keep audio processing files to the output path. (arg_num = 0) -apc [command [command ...]], --audio-process-cmd [command [command ...]] - This arg will override the default audio process + This arg will override the default audio pre-process command. Every line of the commands need to be in quotes. Input file name is {in_}. Output file name is {out_}. (arg_num >= 1) @@ -587,16 +623,18 @@ Audio Processing Options: make. (arg_num = 1) (default: 10) -acc command, --audio-conversion-cmd command (Experimental)This arg will override the default audio - conversion command. Need to follow the python - references keyword argument. Default command to - process the audio: ffmpeg -hide_banner -y -i "{in_}" - -vn -ac {channel} -ar {sample_rate} "{out_}" (arg_num - = 1) + conversion command. "[", "]" are optional arguments + meaning you can remove them. "{", "}" are required + arguments meaning you can't remove them. Default + command to process the audio: ffmpeg -hide_banner -y + -i "{in_}" -vn -ac {channel} -ar {sample_rate} + "{out_}" (arg_num = 1) -asc command, --audio-split-cmd command (Experimental)This arg will override the default audio split command. Same attention above. Default: ffmpeg - -y -ss {start} -i "{in_}" -t {dura} -loglevel error - "{out_}" (arg_num = 1) + -y -ss {start} -i "{in_}" -t {dura} -vn -ac [channel] + -ar [sample_rate] -loglevel error "{out_}" (arg_num = + 1) -asf file_suffix, --api-suffix file_suffix (Experimental)This arg will override the default API audio suffix. (arg_num = 1) (default: .flac) @@ -626,13 +664,11 @@ Auditok Options: audio activity. Same docs above. (arg_num = 1) (default: 0.3) -sml, --strict-min-length - Ref: - https://auditok.readthedocs.io/en/latest/core.html - #class-summary (arg_num = 0) + Ref: https://auditok.readthedocs.io/en/latest/core.htm + l#class-summary (arg_num = 0) -dts, --drop-trailing-silence - Ref: - https://auditok.readthedocs.io/en/latest/core.html - #class-summary (arg_num = 0) + Ref: https://auditok.readthedocs.io/en/latest/core.htm + l#class-summary (arg_num = 0) List Options: List all available arguments. @@ -661,18 +697,18 @@ List Options: Use py-googletrans to detect a sub file's first line language. And list a group of matched language in recommended "-S"/"--speech-language" Google Speech-to- - Text language codes. Ref: https://cloud.google.com - /speech-to-text/docs/languages (arg_num = 1) (default: - None) + Text language codes. Ref: + https://cloud.google.com/speech-to-text/docs/languages + (arg_num = 1) (default: None) Make sure the argument with space is in quotes. The default value is used when the option is not given at the command line. "(arg_num)" means if the option is given, the number of the arguments is required. -Author: Anastasis Germanidis +Author: Bing Ling Email: agermanidis@gmail.com -Bug report: https://github.com/agermanidis/autosub +Bug report: https://github.com/BingLingGroup/autosub ```  ↑  @@ -734,16 +770,30 @@ Bugs and suggestions can be reported at [issues](https://github.com/BingLingGrou I only write the scripts for building standalone executable files on windows, [Nuitka script](scripts/nuitka_build.bat) and [pyinstaller script](scripts/pyinstaller_build.bat). -Nuitka build is pretty tricky. These two environments I tried and worked. +The version after 0.5.4a doesn't support Nuitka build since 0.5.4a import google.cloud package and it contains `pkg_resources.get_distribution` which is not supported by Nuitka due to this [Nuitka issue #146](https://github.com/Nuitka/Nuitka/issues/146). You can manually remove the codes include the google.cloud package and build it. I will consider remove the codes to support Nuitka build in the future version. + +Nuitka build is pretty tricky. These environments I tried and worked. 1. Anaconda recommended by [Nuitka readme](https://github.com/Nuitka/Nuitka#id6). - Python version 3.5 - mingw-w64 package [m2w64-gcc](https://anaconda.org/msys2/m2w64-gcc) (No need to set the environment variables separately if you run it on Anaconda Prompt) 2. Use other C Compiler rather than the [m2w64-gcc](https://anaconda.org/msys2/m2w64-gcc) by setting the value of environment variable `CC` which is the path to the C Compiler, including the executable name. For example, you install [MingW-W64-builds](http://mingw-w64.org/doku.php/download/mingw-builds) somewhere on your storage and you want Nuitka to use it. In this case, Python 3.5 is still recommended. +3. Nuitka 0.6.6 is the latest stable version I tried. Other later version like 0.6.7 doesn't support windows icon input. And for those whose os language is not `en_US`, please set it to `en_US` and then start to build. Otherwise you may encounter this [known issue](https://github.com/Nuitka/Nuitka/issues/193). -Pyinstaller build is more robust. No issues encountered when the spec file is written in the proper way. +About Pyinstaller build, you need to manually hook the gcloud module. [Source](https://stackoverflow.com/questions/40076795/pyinstaller-file-fails-to-execute-script-distributionnotfound). + +> . So you need to create a hook file for that names +> +> Python_Path\Lib\site-packages\PyInstaller\hooks\hook-gcloud.py +> +> File contents: + +```Python +from PyInstaller.utils.hooks import copy_metadata +datas = copy_metadata('gcloud') +``` [create_release.py](scripts/create_release.py) is used to make the two release packages. You need to create a `binaries` folder containing ffmpeg and ffmpeg-normalize executable files if you want to create a "fully" standalone release like the one I release. `ffmpeg.exe` and `ffprobe.exe` are from [Zeranoe ffmpeg windows build](https://ffmpeg.zeranoe.com/builds/). `ffmpeg-normalize.exe` built in the same way as I mentioned [above](#install-on-windows). diff --git a/autosub/__init__.py b/autosub/__init__.py index 6c3b7cb9..8070ee53 100644 --- a/autosub/__init__.py +++ b/autosub/__init__.py @@ -96,10 +96,10 @@ def main(): # pylint: disable=too-many-branches, too-many-statements, too-many- if not prcs_file: raise exceptions.AutosubException( _("No works done.")) - else: - args.input = prcs_file - raise exceptions.AutosubException( - _("Audio pre-processing complete.\nAll works done.")) + + args.input = prcs_file + raise exceptions.AutosubException( + _("Audio pre-processing complete.\nAll works done.")) if 's' in args.audio_process: args.keep = True diff --git a/autosub/cmdline_utils.py b/autosub/cmdline_utils.py index eb4ec566..96dcc3c8 100644 --- a/autosub/cmdline_utils.py +++ b/autosub/cmdline_utils.py @@ -136,26 +136,26 @@ def validate_io( # pylint: disable=too-many-branches, too-many-statements if len(args.styles_name) > 2: raise exceptions.AutosubException( _("Error: Too many \"-sn\"/\"--styles-name\" arguments.")) + + style_obj = pysubs2.SSAFile.load(args.styles) + ass_styles = style_obj.styles.get(args.styles_name[0]) + if ass_styles: + styles_dict = {args.styles_name[0]: ass_styles} + if len(args.styles_name) == 2: + ass_styles = style_obj.styles.get(args.styles_name[1]) + if ass_styles: + styles_dict[args.styles_name[1]] = ass_styles + else: + raise exceptions.AutosubException( + _("Error: \"-sn\"/\"--styles-name\" " + "arguments aren't in \"{path}\".").format(path=args.styles)) + for item in styles_dict.items(): + styles_list.append(item[0]) + styles_list.append(item[1]) else: - style_obj = pysubs2.SSAFile.load(args.styles) - ass_styles = style_obj.styles.get(args.styles_name[0]) - if ass_styles: - styles_dict = {args.styles_name[0]: ass_styles} - if len(args.styles_name) == 2: - ass_styles = style_obj.styles.get(args.styles_name[1]) - if ass_styles: - styles_dict[args.styles_name[1]] = ass_styles - else: - raise exceptions.AutosubException( - _("Error: \"-sn\"/\"--styles-name\" " - "arguments aren't in \"{path}\".").format(path=args.styles)) - for item in styles_dict.items(): - styles_list.append(item[0]) - styles_list.append(item[1]) - else: - raise exceptions.AutosubException( - _("Error: \"-sn\"/\"--styles-name\" " - "arguments aren't in \"{path}\".").format(path=args.styles)) + raise exceptions.AutosubException( + _("Error: \"-sn\"/\"--styles-name\" " + "arguments aren't in \"{path}\".").format(path=args.styles)) if args.ext_regions and not os.path.isfile(args.ext_regions): raise exceptions.AutosubException( @@ -271,7 +271,7 @@ def validate_aovp_args(args): # pylint: disable=too-many-branches, too-many-ret match_list=list(constants.SPEECH_TO_TEXT_LANGUAGE_CODES.keys()), min_score=args.min_score) if best_result: - print(_("Use langcodes-py2 to standardize the result.")) + print(_("Use langcodes to standardize the result.")) args.speech_language = langcodes.standardize_tag(best_result[0]) print(_("Use \"{lang_code}\" instead.").format( lang_code=args.speech_language)) @@ -333,7 +333,7 @@ def validate_aovp_args(args): # pylint: disable=too-many-branches, too-many-ret raise exceptions.AutosubException( _("Match failed. Still using \"{lang_code}\". " "Program stopped.").format( - lang_code=args.src_language)) + lang_code=args.src_language)) else: raise exceptions.AutosubException( @@ -341,7 +341,7 @@ def validate_aovp_args(args): # pylint: disable=too-many-branches, too-many-ret "Run with \"-lsc\"/\"--list-translation-codes\" " "to see all supported languages. " "Or use \"-bm\"/\"--best-match\" to get a best match.").format( - src=args.src_language)) + src=args.src_language)) if not is_dst_matched: if args.best_match and 'd' in args.best_match: @@ -361,7 +361,7 @@ def validate_aovp_args(args): # pylint: disable=too-many-branches, too-many-ret raise exceptions.AutosubException( _("Match failed. Still using \"{lang_code}\". " "Program stopped.").format( - lang_code=args.dst_language)) + lang_code=args.dst_language)) else: raise exceptions.AutosubException( @@ -369,7 +369,7 @@ def validate_aovp_args(args): # pylint: disable=too-many-branches, too-many-ret "Run with \"-lsc\"/\"--list-translation-codes\" " "to see all supported languages. " "Or use \"-bm\"/\"--best-match\" to get a best match.").format( - dst=args.dst_language)) + dst=args.dst_language)) if args.dst_language == args.speech_language \ or args.src_language == args.dst_language: @@ -397,8 +397,8 @@ def validate_aovp_args(args): # pylint: disable=too-many-branches, too-many-ret if not args.ext_regions: raise exceptions.AutosubException( _("Error: External speech regions file not provided.")) - else: - args.styles = args.ext_regions + + args.styles = args.ext_regions def validate_sp_args(args): # pylint: disable=too-many-branches,too-many-return-statements, too-many-statements @@ -439,7 +439,7 @@ def validate_sp_args(args): # pylint: disable=too-many-branches,too-many-return raise exceptions.AutosubException( _("Match failed. Still using \"{lang_code}\". " "Program stopped.").format( - lang_code=args.src_language)) + lang_code=args.src_language)) else: raise exceptions.AutosubException( @@ -447,7 +447,7 @@ def validate_sp_args(args): # pylint: disable=too-many-branches,too-many-return "Run with \"-lsc\"/\"--list-translation-codes\" " "to see all supported languages. " "Or use \"-bm\"/\"--best-match\" to get a best match.").format( - src=args.src_language)) + src=args.src_language)) if not is_dst_matched: if args.best_match and 'd' in args.best_match: @@ -466,7 +466,7 @@ def validate_sp_args(args): # pylint: disable=too-many-branches,too-many-return raise exceptions.AutosubException( _("Match failed. Still using \"{lang_code}\". " "Program stopped.").format( - lang_code=args.dst_language)) + lang_code=args.dst_language)) else: raise exceptions.AutosubException( @@ -474,7 +474,7 @@ def validate_sp_args(args): # pylint: disable=too-many-branches,too-many-return "Run with \"-lsc\"/\"--list-translation-codes\" " "to see all supported languages. " "Or use \"-bm\"/\"--best-match\" to get a best match.").format( - dst=args.dst_language)) + dst=args.dst_language)) if args.dst_language == args.src_language: raise exceptions.AutosubException( @@ -490,8 +490,8 @@ def validate_sp_args(args): # pylint: disable=too-many-branches,too-many-return if not args.ext_regions: raise exceptions.AutosubException( _("Error: External speech regions file not provided.")) - else: - args.styles = args.ext_regions + + args.styles = args.ext_regions def fix_args(args, @@ -544,7 +544,7 @@ def get_timed_text( timed_text = [(region, text) for region, text in zip(regions, text_list) if text] else: # keep empty regions - timed_text = [(region, text) for region, text in zip(regions, text_list)] + timed_text = list(zip(regions, text_list)) return timed_text diff --git a/autosub/core.py b/autosub/core.py index 12f70245..dcbb33cc 100644 --- a/autosub/core.py +++ b/autosub/core.py @@ -448,10 +448,7 @@ def list_to_sub_str( Give an input timed text list, format it to a string. """ - if subtitles_file_format == 'srt' \ - or subtitles_file_format == 'tmp'\ - or subtitles_file_format == 'ass'\ - or subtitles_file_format == 'ssa': + if subtitles_file_format in ('srt', 'tmp', 'ass', 'ssa'): pysubs2_obj = pysubs2.SSAFile() sub_utils.pysubs2_ssa_event_add( src_ssafile=None, @@ -534,10 +531,7 @@ def ssafile_to_sub_str( Give an input SSAFile, format it to a string. """ - if subtitles_file_format == 'srt' \ - or subtitles_file_format == 'tmp'\ - or subtitles_file_format == 'ass'\ - or subtitles_file_format == 'ssa': + if subtitles_file_format in ('srt', 'tmp', 'ass', 'ssa'): formatted_subtitles = ssafile.to_string( format_=subtitles_file_format) diff --git a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.cmdline_utils.mo b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.cmdline_utils.mo index f3d628b99d512356202284c0265667bfd02b483a..f2e7ae676f1972ed40360e00a5718fa363b4d4fd 100644 GIT binary patch delta 1268 zcmXZbT}YE*6u|Mb++2QascE+P(dN{$G+T*Z*)%7M#li|S3bO>m#EVEOLx}gzK8(6} z*+mk8Qjj1kx0?_$p{t566zQT5D=a9YDVI-CKJV{&pFO^3=bUFhA~zz&QjRq$ zTy2a(M$;OR9_+Gm9CouyD zFeGBg5R)1fX0aMqun~Q0MLgJzE;LX-zlAY;hWg@*X(E1{NVxnPlne1SpJX6GtM{olU;!(VchjAG@u-3uL@jB{$6NxD?+=h>E3w}baki#ib zg5{{|VbqEZqKEi0#iSEoVhfh86FH8junV7}2KG>Yt;_|~^=aIP&g|97bfUIu1oewe zptkfe2JkiRMpw>iyF1X(FEGYr7~SjXBSuks=g3{{bv=f8zJmL4A^BXCN1H4neW;m^ zQZ|y7yI6~HEW)p-f$jMsWf)A_pHKa@G&fn;gmKhTE})k9E$WMwQA_J4inc0*tvH0* z(;4JX-gB$OUl_wO4vrTWQ4{)!+tEvT^z(ym(w}4Umj&H0NBxTN8|ub%>eq-3sDaO+ z2D*p3Zx&DEJJerIh;)1LK57CEHtD`xZX6%kh5FtW)JmK&nDD<6MLisG)J)%?2DVd{ za%{yu?8gYs;{XMc=23Tj@ohYW&ruI!AyL#`?7=CtpkA72kWs<#Gq}vlR+pvK9JdB7 zVt%&zTzXD4@fwCELU`H3XfeIE&6a=}wiRUW<(UTkkFP~*GSAz5X>H83#?6Uw+kwnP aLC$=NeW%CoZ}itCR`MQXCSH_Gru+lw*^E5^ delta 1343 zcmY+^TS(JU9KiA4bjvQ9%~JD{{d1XCS}x1fEK8@78B&m9H(0ssGM0HMGGd{i30<&} z5Lg72V1$v29x6f~B#ACyg!NERq(KjvB=`_Q-=FiPKifX%9M8`0{C?-mE2R^q-Y*g6 z>%vpWJD#^ANW_V5Gao#^Eh2F^jteksmPi>U;9~S(Hnw6a_RshWSJ96P5m|{vI2UVh z9-hM@5wEn;SkJ&)T#Lc8MRG6$4fJ3PUPb+TH}>Es)P*n35wT8%)KYxCNslL^feLdL1;bX#<5)RKm?nwB~Sbs+<_ z#1*JV){Oc181-x?k(b1CVKn0n*n@{~8CofeCX|6|@L()y>x9b;^kFXb(+N|UfGN~Z zCl+H4o<J8%Vhu^PLu6z3ZvCvYF0!k1Wv*`%u*ZNprAifK5B8ZU`*8|d9ZgX0l5 zYDUed8|cI>IDi|`Yzy4lTx_I&8ryIjTkst9*8oARz6;Y)FK5$?uh2<9f-Fp^4+#f} z!*|S_Y%1`zn+r@iz7J+wjP6(Or~X&a9N5A1R<`-9mSj_&FWVAl&1Z}=nhvSk-RyH& zY;y`}X?eH$+bugolif~F_2H9MuDZ0Ev+IodlSW;=(^KWFtvc4|GU|`Ij9OP+gS+1U w(K;L)VmIveoDEt2Cy@_Bm;U$7(VO>1Z?}%Lwv1fr`0>m)daKjFFVP?T4~!nJ)&Kwi diff --git a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.cmdline_utils.po b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.cmdline_utils.po index 010e2264..895ba9fb 100644 --- a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.cmdline_utils.po +++ b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.cmdline_utils.po @@ -7,7 +7,7 @@ msgid "" msgstr "" "Project-Id-Version: \n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2020-02-01 11:58+0800\n" +"POT-Creation-Date: 2020-02-02 11:17+0800\n" "PO-Revision-Date: 2020-01-31 12:12+0800\n" "Last-Translator: \n" "Language-Team: \n" @@ -139,7 +139,8 @@ msgstr "" "\"选项运行来查看所有支持的语言。" #: autosub/cmdline_utils.py:274 -msgid "Use langcodes-py2 to standardize the result." +#, fuzzy +msgid "Use langcodes to standardize the result." msgstr "使用langcodes-py2来标准化结果。" #: autosub/cmdline_utils.py:276 autosub/cmdline_utils.py:329 diff --git a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.core.mo b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.core.mo index 13ceb6f9eeff6a69959324ff5fdfd264587ed841..ef11daba2d0cd826c498d80268056b3ae23fa1fd 100644 GIT binary patch delta 22 dcmZqYYvKYp=7@Aobm`=84l0p?So1DzFiqT{;AM6r-qfMp1rls&05{l5TK8 zYHD(ZZel^^8Qh2+wrqSU;SoJxhf{1S!Y(t?8gqLN~Tl8jV^oWzpU;t~b- z{QUHsR0Zdp{L&N!#{kdC>lt163@vpH%yo?{6bwzQOpPZqGD*RN%oU6btPBk%Ycj23 zG@g8g={ODtEMfLCdOmyI)6U*!vznew-}Yoj@3SfEpY=38o73=Y+J@(|TF@BCKmT)1d;ON{ z#Z6c5*~Ao=)Rirz;Ws%_p`>rU5+D7cMLK3SEqk z;#u0iQ?Lh_qAMi{iCnrzM;kszlQ33l-|TBN3$G%Z zqn|NKflsiXwr85O9q-|K3{IB_)$RB_n)!nmq5t*AQUEKd%H(TJ5d1-~69;fr8QX)I zGdT`Cie{@OmrJ=MdWgsH`78>=&t_8$<8i+WDRh5UllCQs15KqYq5zKmTsqf$DB-(fHM zL+|G$bemv|j$qhM%weAvlkgmxN*~R!J28qEX@4=?4|6w!cFR=YPs_Z4~!+P$Y1_;bV??TR*g|ipg0-j!cf6pL%{S6tp89mJeZd22k0KVng%-O0!JfP6Vk>@q&vApOhu-_7iQ zCc(%?d&$O--v<4KUVQEJXSLV|=b^cbKF4y$Am-B_MU!tej>9IW-GXLayVH&%b?}$$ zPA_tKlb6yryNZ)<(vP~Dl84>%UB2WkcW1Vlz@dp7CVwjI`P`klkX`OcUdS#?4d?p{ xeSyY~1M$xIp51|(E&DsFe34Kj6bME5_`>0;@Pbrrpe!S`FZ#-p+EpDZeg_Tkxtst1 delta 1458 zcmZA1Ur5zw9LMp`&*|~h@=s-HR(g!JVy9pI&dEQgQ)cCH=5({t%A!qcoSI!&mTa2Q ziK1d7qdIrB*f4^@ge7MeMX+GOa2bQF4T>UysNHli79$3Y-cQf(Y9hX#@Avupd;a+M zWoKm6Iejt}a->6fQWk!bFO^6dE|B=uHU1=U90SxB7D|;^JXKmvzGa$3SDnII{2CqV z@8e102h*khu)B!9_}L6;3JzcfkIzs_b1B4UN>8u@Cm5i%*q?Bgbbx$usq_J!^?itD zfeU9#TQPNq?bfA1ssJ!8T+l_2V2oi!{*>WhsfVG)|%mpQ0JCvD|*yFq)2+ zk| z)OV%xcofZ6l`oK{G0+`6h))-?K>YMQ={EJfVd-tGUnJ#XD+X{gn*1kzyc;JG_hL4t z1}Ow79QG4mBB!8JXgXd(Gtekb#w%z#-aylD40p1S+sN7!TWmj|3y%;_;33?&L^^?Y z@icDp-n2{UIfX|gIwQ7W9s9JB0e?cX(pO9EPUKc{!H6$ng8Efe+!H)j&CZa&QDZOJ zANW4;Gc2WkaZK8TP0MWl+qk`i&#;yIrxz4V!^CpwJ34l*kk$|fxzbaJn^#&paDq6A z=JLeqY`=DF%98f1vPX1vjr2YBx9Z7Zf5Q4Fn(;asq#c;Voy@N>3e60#gk_tR53ZHU z@Mkp7@v-l}zSEm*ei2qs-+*1X3(W*$X!`%|$1m^~KFHxo+%@&h(q6pK!ufYn$p26Z zV-Ip%I%lNAugJ&g=2Z{opq(ihmco+pUc@keaiV2?7$(->pC#&%u| z7H*QBQy<#Q{!gNi-(fFV3G%-|t??OY#qx> diff --git a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.options.po b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.options.po index 1c8bf217..458bdaef 100644 --- a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.options.po +++ b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.options.po @@ -7,8 +7,8 @@ msgid "" msgstr "" "Project-Id-Version: \n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2020-02-01 12:04+0800\n" -"PO-Revision-Date: 2020-01-31 16:08+0800\n" +"POT-Creation-Date: 2020-02-02 11:17+0800\n" +"PO-Revision-Date: 2020-02-02 11:19+0800\n" "Last-Translator: \n" "Language-Team: \n" "MIME-Version: 1.0\n" @@ -196,8 +196,8 @@ msgstr "语言代码" msgid "" "Lang code/Lang tag for speech-to-text. Recommend using the Google Cloud " "Speech reference lang codes. WRONG INPUT WON'T STOP RUNNING. But use it at " -"your own risk. Ref: https://cloud.google.com/speech-to-text/docs/languages" -"(arg_num = 1) (default: %(default)s)" +"your own risk. Ref: https://cloud.google.com/speech-to-text/docs/" +"languages(arg_num = 1) (default: %(default)s)" msgstr "" "用于语音识别的语言代码/语言标识符。推荐使用Google Cloud Speech的参考语言代" "码。错误的输入不会终止程序。但是后果自负。参考:https://cloud.google.com/" @@ -207,14 +207,14 @@ msgstr "" #, python-format msgid "" "Lang code/Lang tag for translation source language. If not given, use " -"langcodes-py2 to get a best matching of the \"-S\"/\"--speech-language\". If " +"langcodes to get a best matching of the \"-S\"/\"--speech-language\". If " "using py-googletrans as the method to translate, WRONG INPUT STOP RUNNING. " "(arg_num = 1) (default: %(default)s)" msgstr "" -"用于翻译的源语言的语言代码/语言标识符。如果没有提供,会使用langcodes-py2从列" -"表里获取一个最佳匹配选项\"-S\"/\"--speech-language\"的语言代码。如果使用py-" -"googletrans作为翻译的方法,错误的输入会终止运行。(参数个数为1)(默认参数为%" -"(default)s)" +"用于翻译的源语言的语言代码/语言标识符。如果没有提供,会使用langcodes从列表里" +"获取一个最佳匹配选项\"-S\"/\"--speech-language\"的语言代码。如果使用py-" +"googletrans作为翻译的方法,错误的输入会终止运行。(参数个数为1)(默认参数" +"为%(default)s)" #: autosub/options.py:164 #, python-format @@ -231,16 +231,15 @@ msgstr "模式" #: autosub/options.py:172 msgid "" -"Allow langcodes-py2 to get a best matching lang code when your input is " -"wrong. Only functional for py-googletrans and Google Speech V2. Available " -"modes: s, src, d, all. \"s\" for \"-S\"/\"--speech-language\". \"src\" for " -"\"-SRC\"/\"--src-language\". \"d\" for \"-D\"/\"--dst-language\". (3 >= " -"arg_num >= 1)" +"Allow langcodes to get a best matching lang code when your input is wrong. " +"Only functional for py-googletrans and Google Speech V2. Available modes: s, " +"src, d, all. \"s\" for \"-S\"/\"--speech-language\". \"src\" for \"-SRC\"/" +"\"--src-language\". \"d\" for \"-D\"/\"--dst-language\". (3 >= arg_num >= 1)" msgstr "" -"在输入有误的情况下,允许langcodes-py2为输入获取一个最佳匹配的语言代码。仅在使" -"用py-googletrans和Google Speech V2时起作用。可选的模式:s, src, d, all。\"s" -"\"指\"-S\"/\"--speech-language\"。\"src\"指\"-SRC\"/\"--src-language\"。\"d" -"\"指\"-D\"/\"--dst-language\"。(参数个数在1到3之间)" +"在输入有误的情况下,允许langcodes为输入获取一个最佳匹配的语言代码。仅在使用" +"py-googletrans和Google Speech V2时起作用。可选的模式:s, src, d, all。\"s\"指" +"\"-S\"/\"--speech-language\"。\"src\"指\"-SRC\"/\"--src-language\"。\"d\"指" +"\"-D\"/\"--dst-language\"。(参数个数在1到3之间)" #: autosub/options.py:186 msgid "" @@ -296,8 +295,8 @@ msgid "" "Output more files. Available types: regions, src, dst, bilingual, dst-lf-" "src, src-lf-dst, all. dst-lf-src: dst language and src language in the same " "event. And dst is ahead of src. src-lf-dst: src language and dst language in " -"the same event. And src is ahead of dst. (6 >= arg_num >= 1) (default: %" -"(default)s)" +"the same event. And src is ahead of dst. (6 >= arg_num >= 1) (default: " +"%(default)s)" msgstr "" "输出更多的文件。可选种类:regions, src, dst, bilingual, dst-lf-src, src-lf-" "dst, all.(时间轴,源语言字幕,目标语言字幕,双语字幕,dst-lf-src,,src-lf-" @@ -352,8 +351,8 @@ msgid "" msgstr "" "API用于识别可信度的回应参数。一个介于0和1之间的浮点数。可信度越高意味着结果越" "好。输入这个参数会导致所有低于这个结果的识别结果被删除。参考:https://github." -"com/BingLingGroup/google-speech-v2#response(参数个数为1)(默认参数为%" -"(default)s)" +"com/BingLingGroup/google-speech-v2#response(参数个数为1)(默认参数" +"为%(default)s)" #: autosub/options.py:297 msgid "Drop any regions without speech recognition result. (arg_num = 0)" @@ -378,8 +377,8 @@ msgid "" "(Experimental)Seconds to sleep between two translation requests. (arg_num = " "1) (default: %(default)s)" msgstr "" -"(实验性)在两次翻译请求之间睡眠(暂停)的时间。(参数个数为1)(默认参数为%" -"(default)s)" +"(实验性)在两次翻译请求之间睡眠(暂停)的时间。(参数个数为1)(默认参数" +"为%(default)s)" #: autosub/options.py:321 msgid "" @@ -454,8 +453,8 @@ msgid "" msgstr "" "设置服务账号密钥的环境变量。应该是包含服务帐号凭据的JSON文件的文件路径。如果" "使用了,会被API密钥选项覆盖。参考:https://cloud.google.com/docs/" -"authentication/getting-started 当前支持:gcsv1" -"(GOOGLE_APPLICATION_CREDENTIALS)(参数个数为1)" +"authentication/getting-started 当前支持:" +"gcsv1(GOOGLE_APPLICATION_CREDENTIALS)(参数个数为1)" #: autosub/options.py:394 msgid "" @@ -504,9 +503,9 @@ msgstr "" #: autosub/options.py:442 msgid "" "(Experimental)This arg will override the default audio conversion command. " -"\"[\", \"]\" are optional arguments meaning you can remove them. \"{{\", \"}}" -"\" are required arguments meaning you can't remove them. Default command to " -"process the audio: {dft} (arg_num = 1)" +"\"[\", \"]\" are optional arguments meaning you can remove them. \"{{\", " +"\"}}\" are required arguments meaning you can't remove them. Default command " +"to process the audio: {dft} (arg_num = 1)" msgstr "" "(实验性)这个参数会取代默认的音频转换命令。\"[\", \"]\" 是可选参数,可以移" "除。\"{{\", \"}}\"是必选参数,不可移除。以下是用于处理音频的默认命令:{dft}" @@ -530,8 +529,8 @@ msgid "" "(Experimental)This arg will override the default API audio suffix. (arg_num " "= 1) (default: %(default)s)" msgstr "" -"(实验性)这个参数会取代默认的给API使用的音频文件后缀。(默认参数为%(default)" -"s)" +"(实验性)这个参数会取代默认的给API使用的音频文件后缀。(默认参数" +"为%(default)s)" #: autosub/options.py:473 msgid "sample_rate" @@ -579,16 +578,16 @@ msgstr "" msgid "" "Minimum region size. Same docs above. (arg_num = 1) (default: %(default)s)" msgstr "" -"最小语音区域大小。同样的参考文档如上。(参数个数为1)(默认参数为%(default)" -"s)" +"最小语音区域大小。同样的参考文档如上。(参数个数为1)(默认参数" +"为%(default)s)" #: autosub/options.py:513 #, python-format msgid "" "Maximum region size. Same docs above. (arg_num = 1) (default: %(default)s)" msgstr "" -"最大音频区域大小。同样的参考文档如上。(参数个数为1)(默认参数为%(default)" -"s)" +"最大音频区域大小。同样的参考文档如上。(参数个数为1)(默认参数" +"为%(default)s)" #: autosub/options.py:522 #, python-format @@ -677,26 +676,26 @@ msgstr "" #~ "googletrans。(参数个数为1)" #~ msgid "" -#~ "Number of lines per Google Translate V2 request. (arg_num = 1) (default: %" -#~ "(default)s)" +#~ "Number of lines per Google Translate V2 request. (arg_num = 1) (default: " +#~ "%(default)s)" #~ msgstr "" -#~ "Google Translate V2请求的每行行数。(参数个数为1)(默认参数为%(default)" -#~ "s)" +#~ "Google Translate V2请求的每行行数。(参数个数为1)(默认参数" +#~ "为%(default)s)" #~ msgid "" #~ "Number of concurrent Google translate V2 API requests to make. (arg_num = " #~ "1) (default: %(default)s)" #~ msgstr "" -#~ "Google translate V2 API请求的并行数量。(参数个数为1)(默认参数为%" -#~ "(default)s)" +#~ "Google translate V2 API请求的并行数量。(参数个数为1)(默认参数" +#~ "为%(default)s)" #~ msgid "" #~ "Output more files. Available types: regions, src, dst, bilingual, all. (4 " #~ ">= arg_num >= 1) (default: %(default)s)" #~ msgstr "" #~ "输出更多的文件。可选种类:regions, src, dst, bilingual, all.(时间轴,源语" -#~ "言字幕,目标语言字幕,双语字幕,所有)(参数个数在4和1之间)(默认参数为%" -#~ "(default)s)" +#~ "言字幕,目标语言字幕,双语字幕,所有)(参数个数在4和1之间)(默认参数" +#~ "为%(default)s)" #~ msgid "" #~ "The Google Speech V2 API key to be used. If not provided, use free API " diff --git a/autosub/ffmpeg_utils.py b/autosub/ffmpeg_utils.py index 9c61a45f..dc8dc173 100644 --- a/autosub/ffmpeg_utils.py +++ b/autosub/ffmpeg_utils.py @@ -112,7 +112,8 @@ def ffprobe_get_fps( # pylint: disable=superfluous-parens constants.cmd_conversion(command), stdin=open(os.devnull)) num_list = map(int, re.findall(r'\d+', input_str.decode(sys.stdout.encoding))) - if len(list(num_list)) == 2: + num_list = list(num_list) + if len(num_list) == 2: fps = float(num_list[0]) / float(num_list[1]) else: raise ValueError diff --git a/autosub/metadata.py b/autosub/metadata.py index 6e67ab92..682a62a0 100644 --- a/autosub/metadata.py +++ b/autosub/metadata.py @@ -12,16 +12,15 @@ # For gettext po files NAME = 'autosub' -VERSION = '0.5.3-alpha' +VERSION = '0.5.4-alpha' DESCRIPTION = _('Auto-generate subtitles for video/audio/subtitles file.') LONG_DESCRIPTION = ( _('Autosub is an automatic subtitles generating utility. ' 'It can detect speech regions automatically ' 'by using Auditok, split the audio files according to regions ' 'by using ffmpeg, ' - 'transcribe speech based on Chrome-Web-Speech-api and ' - 'translate the subtitles\' text by using py-googletrans. ' - 'It currently not supports the latest Google Cloud APIs.')) -AUTHOR = 'Anastasis Germanidis' -AUTHOR_EMAIL = 'agermanidis@gmail.com' -HOMEPAGE = 'https://github.com/agermanidis/autosub' + 'transcribe speech based on several speech APIs and ' + 'translate the subtitles\' text by using py-googletrans.')) +AUTHOR = 'Bing Ling' +AUTHOR_EMAIL = 'binglinggroup@outlook.com' +HOMEPAGE = 'https://github.com/BingLingGroup/autosub' diff --git a/autosub/options.py b/autosub/options.py index 62800581..56b8ee6f 100644 --- a/autosub/options.py +++ b/autosub/options.py @@ -152,7 +152,7 @@ def get_cmd_args(): # pylint: disable=too-many-statements '-SRC', '--src-language', metavar=_('lang_code'), help=_("Lang code/Lang tag for translation source language. " - "If not given, use langcodes-py2 to get a best matching " + "If not given, use langcodes to get a best matching " "of the \"-S\"/\"--speech-language\". " "If using py-googletrans as the method to translate, " "WRONG INPUT STOP RUNNING. " @@ -169,7 +169,7 @@ def get_cmd_args(): # pylint: disable=too-many-statements '-bm', '--best-match', metavar=_('mode'), nargs="*", - help=_("Allow langcodes-py2 to get a best matching lang code " + help=_("Allow langcodes to get a best matching lang code " "when your input is wrong. " "Only functional for py-googletrans and Google Speech V2. " "Available modes: " diff --git a/autosub/speech_trans_api.py b/autosub/speech_trans_api.py index c2038a62..66ead730 100644 --- a/autosub/speech_trans_api.py +++ b/autosub/speech_trans_api.py @@ -71,12 +71,11 @@ def __call__(self, filename): return result return None - else: - # can't find confidence in json - # means it's 100% confident - result = line[:1].upper() + line[1:] - result = result.replace('’', '\'') - return result + # can't find confidence in json + # means it's 100% confident + result = line[:1].upper() + line[1:] + result = result.replace('’', '\'') + return result except (JSONDecodeError, ValueError, IndexError): # no result @@ -136,13 +135,13 @@ def gcsv1p1beta1_service_client( result = result.replace('’', '\'') return result return None - else: - # can't find confidence in json - # means it's 100% confident - result_dict = result_dict['transcript'] - result = result_dict[:1].upper() + result_dict[1:] - result = result.replace('’', '\'') - return result + + # can't find confidence in json + # means it's 100% confident + result_dict = result_dict['transcript'] + result = result_dict[:1].upper() + result_dict[1:] + result = result.replace('’', '\'') + return result except KeyboardInterrupt: return None @@ -216,13 +215,13 @@ def __call__(self, filename): result = result.replace('’', '\'') return result return None - else: - # can't find confidence in json - # means it's 100% confident - result_dict = result_dict['transcript'] - result = result_dict[:1].upper() + result_dict[1:] - result = result.replace('’', '\'') - return result + + # can't find confidence in json + # means it's 100% confident + result_dict = result_dict['transcript'] + result = result_dict[:1].upper() + result_dict[1:] + result = result.replace('’', '\'') + return result except KeyboardInterrupt: return None diff --git a/docs/CHANGELOG.zh-Hans.md b/docs/CHANGELOG.zh-Hans.md index d71fbba9..eb698fa8 100644 --- a/docs/CHANGELOG.zh-Hans.md +++ b/docs/CHANGELOG.zh-Hans.md @@ -9,6 +9,9 @@ ## 目录 - [未发布](#未发布) +- [0.5.4-alpha - 2020-01-31](#054-alpha---2020-01-31) + - [添加](#添加054-alpha) + - [改动](#改动054-alpha) - [0.5.3-alpha - 2019-12-30](#053-alpha---2019-12-30) - [改动](#改动053-alpha) - [0.5.2-alpha - 2019-11-05](#052-alpha---2019-11-05) @@ -30,14 +33,17 @@ ### [未发布] -#### 添加(未发布) +### [0.5.4-alpha] - 2020-01-31 -- 添加Google Cloud Speech-to-Text支持。[issue #10](https://github.com/BingLingGroup/autosub/issues/10) +#### 添加(0.5.4-alpha) + +- 添加Google Cloud Speech-to-Text基础支持。[issue #10](https://github.com/BingLingGroup/autosub/issues/10) - 添加更多格式的双语字幕输出支持。[issue #72](https://github.com/BingLingGroup/autosub/issues/72) -#### 改动(未发布) +#### 改动(0.5.4-alpha) - 修复输入是字幕文件时的输出格式限制。 +- 删除gtransv2支持。 ### [0.5.3-alpha] - 2019-12-30 @@ -158,7 +164,8 @@  ↑  -[未发布]: https://github.com/BingLingGroup/autosub/compare/0.5.3-alpha...HEAD +[未发布]: https://github.com/BingLingGroup/autosub/compare/0.5.4-alpha...HEAD +[0.5.4-alpha]: https://github.com/BingLingGroup/autosub/compare/0.5.3-alpha...0.5.4-alpha [0.5.3-alpha]: https://github.com/BingLingGroup/autosub/compare/0.5.2-alpha...0.5.3-alpha [0.5.2-alpha]: https://github.com/BingLingGroup/autosub/compare/0.5.1-alpha...0.5.2-alpha [0.5.1-alpha]: https://github.com/BingLingGroup/autosub/compare/0.5.0-alpha...0.5.1-alpha diff --git a/docs/README.zh-Hans.md b/docs/README.zh-Hans.md index 1f9911a2..a9267ab5 100644 --- a/docs/README.zh-Hans.md +++ b/docs/README.zh-Hans.md @@ -53,9 +53,9 @@ ### 介绍 -Autosub是一个字幕自动生成工具。它能使用Auditok来自动检测语音区域,通过ffmpeg根据语音区域来切割音频,通过[Google-Speech-v2](https://github.com/gillesdemey/google-speech-v2)([Chrome-Web-Speech-api](https://github.com/agermanidis/autosub/issues/1))将语音转为文字,以及通过py-googletrans将字幕文本翻译。目前暂时不支持最新的Google Cloud API。 +Autosub是一个字幕自动生成工具。它能使用Auditok来自动检测语音区域,通过ffmpeg根据语音区域来切割音频,通过多个API将语音转为文字,以及通过py-googletrans将字幕文本翻译。 -上方提到的一些新功能仅在autosub-0.5.0a(0.5.0-alpha)后提供。PyPI或者原仓库中的代码并没有这些功能。 +上方提到的一些新功能仅在最新的alpha分支中提供。PyPI或者原仓库中的代码并没有这些功能。 ### 证书 @@ -191,15 +191,44 @@ PyPI的版本(autosub-0.3.12)不推荐在windows上使用,因为它无法 一个视频/音频/字幕文件。 -如果是一个视频或者音频文件,使用ffmpeg来将格式转换为[API支持的格式](https://github.com/gillesdemey/google-speech-v2#data)。当前提供给[Google-Speech-v2](https://github.com/gillesdemey/google-speech-v2)的默认格式是24bit/44100Hz/单声道flac。API只接受单声道文件,而非那个仓库里所说的那样。 +如果是一个视频或者音频文件,使用ffmpeg来将格式转换为[API支持的格式](https://github.com/gillesdemey/google-speech-v2#data)。任何ffmpeg支持输入的格式皆可,但是输出或者处理后给API的格式是受API和autosub代码的限制的。 -你也可以使用自带的音频预处理功能。默认的[音频预处理指令](https://github.com/agermanidis/autosub/issues/40)同时依赖于ffmpeg和ffmpeg-normalize。这些命令包含三个子命令。[第一个](https://trac.ffmpeg.org/wiki/AudioChannelManipulation)是用来把双声道的音频转换为单声道的。[第二个](https://superuser.com/questions/733061/reduce-background-noise-and-optimize-the-speech-from-an-audio-clip-using-ffmpeg)是通过人声的频率范围来过滤噪音的。第三个则是正常化音频的音量来确保它的音量不是太大或者太小。如果你对默认指令的效果不满意,你也可以通过输入`-apc`选项来自行修改。 +当前支持的格式: + +[Google-Speech-v2](https://github.com/gillesdemey/google-speech-v2) + +- 24bit/44100Hz/单声道 FLAC(默认参数) +- 其他格式像OGG_OPUS是API不支持的。(我尝试过修改请求头或者修改为json请求都不行)或者像PCM这种有着比FLAC单位采样更少位宽但更大存储消耗的格式。虽然API支持,但我觉得没必要支持就没改代码去支持。 + +[Google Cloud Speech-to-Text API](https://cloud.google.com/speech-to-text/docs/encoding) + +- 支持 + - 24bit/44100Hz/单声道 FLAC(默认参数) +- 支持但不是默认参数 (详见[语音转文字/翻译API请求](#语音转文字翻译api请求)) + - 8000Hz|12000Hz|16000Hz|24000Hz|48000Hz/单声道 OGG_OPUS + - MP3 + - 16bit/单声道 PCM + +你也可以使用自带的音频预处理功能。默认的[音频预处理指令](https://github.com/agermanidis/autosub/issues/40)同时依赖于ffmpeg和ffmpeg-normalize。这些命令包含三个子命令。[第一个](https://trac.ffmpeg.org/wiki/AudioChannelManipulation)是用来把双声道的音频转换为单声道的。[第二个](https://superuser.com/questions/733061/reduce-background-noise-and-optimize-the-speech-from-an-audio-clip-using-ffmpeg)是通过人声的频率范围来过滤噪音的。第三个则是正常化音频的音量来确保它的音量不是太大或者太小。如果你对默认指令的效果不满意,你也可以通过输入`-apc`选项来自行修改。当然,它仍然只支持24bit/44100Hz/单声道 FLAC格式。 如果输入是字幕文件,同时你提供的参数适合,程序仅会将其通过py-googletrans来翻译。 #### 分割 -因为语音转文字API只支持10到15秒这样的[短片段音频](https://github.com/gillesdemey/google-speech-v2#caveats),我们需要将音频文件分割为若干包含语音的小片段。Autosub使用Auditok来检测语音区域。 +音频长度限制: + +[Google-Speech-v2](https://github.com/gillesdemey/google-speech-v2) + +- 不超过[10到15秒](https://github.com/gillesdemey/google-speech-v2#caveats)。 +- 在autosub里面,按照[10秒](https://github.com/BingLingGroup/autosub/blob/dev/autosub/constants.py#L61)来限制。 + +[Google Cloud Speech-to-Text API](https://cloud.google.com/speech-to-text/docs/encoding) + +- 不超过[1分钟](https://cloud.google.com/speech-to-text/docs/sync-recognize)。 +- 在autosub里面,同样按照[10秒](https://github.com/BingLingGroup/autosub/blob/dev/autosub/constants.py#L61)来限制。 +- 现在只支持同步语言识别意味着只支持短语音识别。 + +Autosub使用Auditok来检测语音区域。通过语音区域来分割并转换视频/音频为许多短语音片段。每个区域对应一个片段一个API请求。所有这些片段都是直接从输入转换的,避免任何多余的损失。 或者使用外部文件提供的时间码来作为语音区域输入,支持pysubs2支持的文件格式,如`.ass`或者`.srt`。这样你就可以使用外部工具先制作时间轴然后让程序使用并得到精确度更高的识别结果。 @@ -223,7 +252,7 @@ PyPI的版本(autosub-0.3.12)不推荐在windows上使用,因为它无法 为了得到某个字幕文件第一行的语言,你可以使用`-dsl`选项去检测。 -- 现在,如果autosub使用[Google-Speech-v2](https://github.com/gillesdemey/google-speech-v2)作为语音转文字的方法,它会允许发送不在`--list-speech-codes`清单中的语言代码,意味着在这种情况下程序不会终止运行。 +- 现在,autosub会允许发送不在`--list-speech-codes`清单中的语言代码,意味着在这种情况下程序不会终止运行。 - 尽管你可以输入任何你想输入的语言代码,需要指出的是如果你使用了不在清单上的语言代码但是API接受了,[Google-Speech-v2](https://github.com/gillesdemey/google-speech-v2)可能会按照你的IP地址机型个性化识别,而这是不受你控制的。这是一个已知的问题,我已经在原仓库申请了[拉取请求](https://github.com/agermanidis/autosub/pull/136)。 @@ -334,18 +363,36 @@ autosub -i 输入文件 -k ...(其他选项) 语音音频片段转为语音语言字幕。 -仅得到语音语言字幕。 +使用默认的[Google-Speech-v2](https://github.com/gillesdemey/google-speech-v2)仅转录得到语音语言字幕。 ``` autosub -i 输入文件 -S 语言代码 ``` -得到语音语言字幕只是处理过程中的一部分。 +使用默认的[Google-Speech-v2](https://github.com/gillesdemey/google-speech-v2)得到语音语言字幕只是处理过程中的一部分。 ``` autosub -i 输入文件 -S 语言代码 -of src ...(其他选项) ``` +使用Google Cloud Speech-to-Text API服务账号(GOOGLE_APPLICATION_CREDENTIALS环境变量已经设置好了)来转录字幕。 + +``` +autosub -i 输入文件 -sapi gcsv1 -S 语言代码 ...(其他选项) +``` + +使用Google Cloud Speech-to-Text API服务账号(GOOGLE_APPLICATION_CREDENTIALS环境变量由选项`-sa`设置)来转录字幕。 + +``` +autosub -i 输入文件 -sapi gcsv1 -S 语言代码 -sa 服务账号凭据文件路径 ...(其他选项) +``` + +使用Google Cloud Speech-to-Text API密钥来转录字幕。 + +``` +autosub -i 输入文件 -sapi gcsv1 -S 语言代码 -skey API密钥 ...(其他选项) +``` +  ↑  ##### 翻译字幕 @@ -417,7 +464,7 @@ usage: 一个介于0和100之间的整数用于控制以下两个选项的匹配结果组,"-lsc"/"--list-speech- codes"以及"-ltc"/"--list-translation-codes"或者在"-bm"/"-- best- - match"选项中的最佳匹配结果。结果会是一组“好的匹配”,其分数需要超过这个参数的值。(参数个数为1) + match"选项中的最佳匹配结果。结果会是一组“好的匹配”,其分数需要超过这个参数的值。(参数个数为1 ) 输出选项: 控制输出的选项。 @@ -428,14 +475,15 @@ usage: input"的参数是一个字幕文件,那么使用和字幕文件相同的扩展名。(参数个数为1)(默认参数为srt) -y, --yes 避免任何暂停和覆写文件的行为。如果参数有误,会直接停止程序。(参数个数为0) -of [种类 [种类 ...]], --output-files [种类 [种类 ...]] - 输出更多的文件。可选种类:regions, src, dst, bilingual, all.(时间轴,源语 - 言字幕,目标语言字幕,双语字幕,所有)(参数个数在4和1之间)(默认参数为['dst']) + 输出更多的文件。可选种类:regions, src, dst, bilingual, dst-lf-src, + src-lf-dst, all.(时间轴,源语言字幕,目标语言字幕,双语字幕,dst-lf- + src,,src-lf-dst,所有)dst-lf- + src:目标语言和源语言在同一字幕行中,且目标语言先于源语言。src-lf-dst:源语言和目标语言在同一字 + 幕行中,且源语言先于目标语言。(参数个数在6和1之间)(默认参数为['dst']) -fps float, --sub-fps float 当输出格式为"sub"时有效。如果提供了该参数,它会取代原有的对输入文件的帧率检查。参考:https://p ysubs2.readthedocs.io/en/latest/api- reference.html#supported-input-output-formats(参数个数为1) - -der, --drop-empty-regions - 删除所有没有文本的空轴。(参数个数为0) 语音选项: 控制语音转文字的选项。 @@ -447,14 +495,15 @@ usage: (https://cloud.google.com/speech-to- text/docs)。(参数个数为1)(默认参数为gsv2) -skey key, --speech-key key - API key for Speech-to-Text API. (arg_num = 1) - Currently supported: gsv2: The API key for gsv2. - (default: Free API key) gcsv1: The API key for gcsv1. - (Can be overridden by "-sa"/"--service-account") + Speech-to-Text API的密钥。(参数个数为1)当前支持:gsv2:gsv2的API密钥。(默认 + 参数为免费API密钥)gcsv1:gcsv1的API密钥。(如果使用了,可以覆盖 "-sa"/"-- + service-account"提供的服务账号凭据) -mnc float, --min-confidence float - API用于识别可信度的回应参数。一个介于0和1之间的浮点数。可信度越高意味着结果越好。输入这个参数会导致所有 + API用于识别可信度的回应参数。一个介于0和1之间的浮点数。可信度越高意味着结果越好。输入这个参数会 导致所有 低于这个结果的识别结果被删除。参考:https://github.com/BingLingGroup/goo gle-speech-v2#response(参数个数为1)(默认参数为0.0) + -der, --drop-empty-regions + 删除所有没有语音识别结果的空轴。(参数个数为0) -sc integer, --speech-concurrency integer 用于Speech-to-Text请求的并行数量。(参数个数为1)(默认参数为10) @@ -469,17 +518,6 @@ py-googletrans选项: -ua User-Agent headers, --user-agent User-Agent headers (实验性)自定义用户代理(User-Agent)头部。同样的参考文档如上。(参数个数为1) -Google Translate V2选项: - 控制翻译的选项。(未被测试过)如果提供了API key,它会取代py-googletrans的翻译方法。 - - -gtv2 key, --gtransv2 key - 用于Google Translate V2的API key。如果没有提供,则使用免费的API也就是py- - googletrans。(参数个数为1) - -lpt integer, --lines-per-trans integer - Google Translate V2请求的每行行数。(参数个数为1)(默认参数为15) - -tc integer, --trans-concurrency integer - Google translate V2 API请求的并行数量。(参数个数为1)(默认参数为10) - 网络选项: 控制网络的选项。 @@ -502,41 +540,43 @@ Google Translate V2选项: -h, --help 显示autosub的帮助信息并退出。(参数个数为0) -V, --version 显示autosub的版本信息并退出。(参数个数为0) -sa 路径, --service-account 路径 - 设置服务账号密钥的环境变量。应该是包含服务帐号密钥的 JSON 文件的文件路径。如果使用了,会覆盖API密钥 - 选项。参考:https://cloud.google.com/docs/authentication/get - ting-started + 设置服务账号密钥的环境变量。应该是包含服务帐号凭据的JSON文件的文件路径。如果使用了,会被API密钥 选项覆 + 盖。参考:https://cloud.google.com/docs/authentication/gett + ing-started 当前支持:gcsv1(GOOGLE_APPLICATION_CREDENTIALS)(参数个数为1) 音频处理选项: 控制音频处理的选项。 -ap [模式 [模式 ...]], --audio-process [模式 [模式 ...]] - 控制音频处理的选项。如果没有提供选项,进行正常的格式转换工作。"y":它会先预处理输入文件,如果成功了,在语 + 控制音频处理的选项。如果没有提供选项,进行正常的格式转换工作。"y":它会先预处理输入文件,如果成 功了,在语 音转文字之前不会对音频进行额外的处理。"o":只会预处理输入音频。("-k"/"-- - keep"选项自动置为真)"s":只会分割输入音频。("-k"/"--keep"选项自动置为真)"n":在语 - 音转文字的步骤之前,强制去除多余的格式检查或者转换工作。以下是用于处理音频的默认命令:ffmpeg - -hide_banner -i "{in_}" -af "asplit[a],aphasemeter=vid - eo=0,ametadata=select:key=lavfi.aphasemeter.phase:valu - e=-0.005:function=less,pan=1c|c0=c0,aresample=async=1: - first_pts=0,[a]amix" -ac 1 -f flac "{out_}" | ffmpeg - -hide_banner -i "{in_}" -af lowpass=3000,highpass=200 - "{out_}" | ffmpeg-normalize -v "{in_}" -ar 44100 -ofmt - flac -c:a flac -pr -p -o "{out_}"(参考:https://github.co - m/stevenj/autosub/blob/master/scripts/subgen.sh + keep"选项自动置为真)"s":只会分割输入音频。("-k"/"-- + keep"选项自动置为真)以下是用于处理音频的默认命令:ffmpeg -hide_banner -i + "{in_}" -af "asplit[a],aphasemeter=video=0,ametadata=s + elect:key=lavfi.aphasemeter.phase:value=-0.005:functio + n=less,pan=1c|c0=c0,aresample=async=1:first_pts=0,[a]a + mix" -ac 1 -f flac "{out_}" | ffmpeg -hide_banner -i + "{in_}" -af lowpass=3000,highpass=200 "{out_}" | + ffmpeg-normalize -v "{in_}" -ar 44100 -ofmt flac -c:a + flac -pr -p -o "{out_}"(参考:https://github.com/stevenj/ + autosub/blob/master/scripts/subgen.sh https://ffmpeg.org/ffmpeg-filters.html)(参数个数介于1和2之间) -k, --keep 将音频处理中产生的文件放在输出路径中。(参数个数为0) -apc [命令 [命令 ...]], --audio-process-cmd [命令 [命令 ...]] - 这个参数会取代默认的音频处理命令。每行命令需要放在一个引号内。输入文件名写为{in_}。输出文件名写为{ou - t_}。(参数个数大于1) + 这个参数会取代默认的音频预处理命令。每行命令需要放在一个引号内。输入文件名写为{in_}。输出文件名 写为{o + ut_}。(参数个数大于1) -ac integer, --audio-concurrency integer 用于ffmpeg音频切割的进程并行数量。(参数个数为1)(默认参数为10) -acc 命令, --audio-conversion-cmd 命令 - (实验性)这个参数会取代默认的音频转换命令。需要遵循原有的python参考关键词参数写法。以下是用于处理音频 - 的默认命令:ffmpeg -hide_banner -y -i "{in_}" -vn -ac - {channel} -ar {sample_rate} "{out_}"(默认参数为1) + (实验性)这个参数会取代默认的音频转换命令。"[", "]" 是可选参数,可以移除。"{", + "}"是必选参数,不可移除。以下是用于处理音频的默认命令:ffmpeg -hide_banner -y -i + "{in_}" -vn -ac {channel} -ar {sample_rate} + "{out_}"(默认参数为1) -asc 命令, --audio-split-cmd 命令 (实验性)这个参数会取代默认的音频转换命令。相同的注意如上。默认:ffmpeg -y -ss {start} - -i "{in_}" -t {dura} -loglevel error "{out_}"(参数个数为1) + -i "{in_}" -t {dura} -vn -ac [channel] -ar + [sample_rate] -loglevel error "{out_}"(参数个数为1) -asf 文件名后缀, --api-suffix 文件名后缀 (实验性)这个参数会取代默认的给API使用的音频文件后缀。(默认参数为.flac) -asr 采样率, --api-sample-rate 采样率 @@ -556,7 +596,7 @@ Auditok的选项: -mxrs 秒, --max-region-size 秒 最大音频区域大小。同样的参考文档如上。(参数个数为1)(默认参数为6.0) -mxcs 秒, --max-continuous-silence 秒 - 在一段有效的音频活动区域中可以容忍的最大(连续)安静区域。同样的参考文档如上。(参数个数为1)(默认参数为0 + 在一段有效的音频活动区域中可以容忍的最大(连续)安静区域。同样的参考文档如上。(参数个数为1)( 默认参数为0 .3) -sml, --strict-min-length 参考:https://auditok.readthedocs.io/en/latest/core.html# @@ -593,7 +633,7 @@ Auditok的选项: 如果选项没有在命令行中提供时会使用的参数。 "参数个数"指的是如果提供了选项, 该选项所需要的参数个数。 -作者: Anastasis Germanidis +作者: Bing Ling Email: agermanidis@gmail.com 问题反馈: https://github.com/agermanidis/autosub ``` @@ -657,16 +697,30 @@ for /f "delims=^" %%i in ('dir /b %in_format%') do ( 我只写了在windows上构建独立运行程序的脚本,一个用于[Nuitka](../scripts/nuitka_build.bat)一个用于[pyinstaller](../scripts/pyinstaller_build.bat)。 -Nuitka的构建有点麻烦。以下是我尝试成功的两种环境。 +0.5.4a以后的版本不支持Nuitka构建,因为0.5.4a导入了google.cloud包,里面包含的`pkg_resources.get_distribution`并不被Nuitka支持,详见[Nuitka issue #146](https://github.com/Nuitka/Nuitka/issues/146)。你可以手动移除和google.cloud包有关的代码并进行构建。我会考虑在以后的版本中移除这部分代码以支持Nuitka构建。 + +Nuitka的构建有点麻烦。以下是我成功尝试的环境。 1. [Nuitka readme](https://github.com/Nuitka/Nuitka#id6)推荐的Anaconda环境。 - Python 3.5版本 - mingw-w64的包[m2w64-gcc](https://anaconda.org/msys2/m2w64-gcc) (只要你是用Anaconda命令行启动的,就不需要单独设置环境变量) 2. 设置环境变量`CC`的值为相应C编译器可执行文件的目录(包括其名称)来使用其他C的编译器,而不是[m2w64-gcc](https://anaconda.org/msys2/m2w64-gcc)。譬如,你想让Nuitka使用你存储上安装的[MingW-W64-builds](http://mingw-w64.org/doku.php/download/mingw-builds)。在这种情况下,依然推荐使用Python 3.5。 +3. Nuikta版本0.6.6是我尝试的最新的稳定版本。其他更新的版本如0.6.7不支持windows图标输入。 其他的C编译器或者Python环境可能在编译时失败,原因不明。对于那些操作系统语言不是`en_US`,请在构建前先设置成`en_US`。否则你会遇到这个[已知问题](https://github.com/Nuitka/Nuitka/issues/193)。 -Pyinstaller的构建就比较稳定。只要配置文件写好了就没遇到问题。 +Pyinstaller的构建则是,你需要手动挂钩gcloud模组。[来源](https://stackoverflow.com/questions/40076795/pyinstaller-file-fails-to-execute-script-distributionnotfound)。 + +> . So you need to create a hook file for that names +> +> Python_Path\Lib\site-packages\PyInstaller\hooks\hook-gcloud.py +> +> File contents: + +```Python +from PyInstaller.utils.hooks import copy_metadata +datas = copy_metadata('gcloud') +``` [create_release.py](../scripts/create_release.py)是用来创建两个发布包的。如果你想创建一个“完全”独立运行的发布包像我制作的那样,你需要创建一个`binaries`文件夹,里面包含ffmpeg和ffmpeg-normalize的可执行文件。我使用的`ffmpeg.exe`和`ffprobe.exe`都来自[Zeranoe的ffmpeg windows构建](https://ffmpeg.zeranoe.com/builds/)。`ffmpeg-normalize.exe` 使用[如上](#在Windows上安装)所说的相同的方式进行构建。 diff --git a/requirements.txt b/requirements.txt index 175a9b14..409887d5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,8 @@ -setuptools==40.8.0 -wcwidth==0.1.7 -requests==2.21.0 -pysubs2==0.2.4 -auditok==0.1.5 -langcodes_py2==1.2.0 -googletrans==2.4.0 -protobuf==3.11.2 -google_api_python_client==1.7.11 -langcodes==1.4.1 -progressbar2==3.47.0 +requests>=2.3.0 +pysubs2>=0.2.4 +progressbar2>=3.34.3 +auditok>=0.1.5 +googletrans>=2.4.0 +langcodes-py2>=1.2.0 +wcwidth>=0.1.7 +google-cloud-speech>=1.3.1 diff --git a/scripts/create_release.py b/scripts/create_release.py index cdd451e5..568f2904 100644 --- a/scripts/create_release.py +++ b/scripts/create_release.py @@ -69,20 +69,20 @@ def copytree(src, if os.path.isdir(target_pyi): shutil.rmtree(target_pyi) os.makedirs(target_pyi) - command = "pipreqs --encoding=utf-8 --force --savepath requirements.txt {}".format(package_name) - print(command) - if sys.platform.startswith('win'): - args = command - else: - args = shlex.split(command) - p = subprocess.Popen(args, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - out, err = p.communicate() - if out: - print(out.decode(sys.stdout.encoding)) - if err: - print(err.decode(sys.stdout.encoding)) + # command = "pipreqs --encoding=utf-8 --force --savepath requirements.txt {}".format(package_name) + # print(command) + # if sys.platform.startswith('win'): + # args = command + # else: + # args = shlex.split(command) + # p = subprocess.Popen(args, + # stdout=subprocess.PIPE, + # stderr=subprocess.PIPE) + # out, err = p.communicate() + # if out: + # print(out.decode(sys.stdout.encoding)) + # if err: + # print(err.decode(sys.stdout.encoding)) copytree(src=here, dst=target, ext=[".md", ".txt"]) target_docs = os.path.join(target, "docs") os.makedirs(target_docs) diff --git a/scripts/nuitka_build.bat b/scripts/nuitka_build.bat index 76ff50b8..e9db854b 100644 --- a/scripts/nuitka_build.bat +++ b/scripts/nuitka_build.bat @@ -5,7 +5,6 @@ set "package_name=autosub" @echo on cd %~dp0 -call update_requirements.bat cd ..\ pip install -r requirements.txt nuitka "%package_name%" --standalone --output-dir %output_dir% --show-progress --show-scons --show-modules --windows-icon=%icon_dir% --recurse-to=multiprocessing --plugin-enable=multiprocessing --assume-yes-for-downloads \ No newline at end of file diff --git a/scripts/pyinstaller_build.bat b/scripts/pyinstaller_build.bat index 87e1264d..7a09b565 100644 --- a/scripts/pyinstaller_build.bat +++ b/scripts/pyinstaller_build.bat @@ -4,4 +4,4 @@ set dist_dir="..\.build_and_dist\pyinstaller.build" @echo on cd %~dp0 pyinstaller pyinstaller_build.spec --clean --distpath %dist_dir% --workpath %dist_dir% -call cmd \ No newline at end of file +pause \ No newline at end of file diff --git a/scripts/pyinstaller_build.spec b/scripts/pyinstaller_build.spec index f653df77..307e3432 100644 --- a/scripts/pyinstaller_build.spec +++ b/scripts/pyinstaller_build.spec @@ -18,7 +18,7 @@ a = Analysis([r"..\autosub\__main__.py", pathex=[r'C:\Program Files (x86)\Windows Kits\10\Redist\ucrt\DLLs\x64'], binaries=[], datas=[], - hiddenimports=[], + hiddenimports=["google.cloud.speech"], hookspath=[], runtime_hooks=[], excludes=[], From a7791ca9cf2d36f4b2c9f1544f1910bb0a1d4da6 Mon Sep 17 00:00:00 2001 From: BingLingGroup <42505588+BingLingGroup@users.noreply.github.com> Date: Sun, 2 Feb 2020 16:08:25 +0800 Subject: [PATCH 09/10] Docs update readme --- README.md | 24 ++++++++++++++++++++++-- docs/README.zh-Hans.md | 24 ++++++++++++++++++++++-- 2 files changed, 44 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 6c3223ac..e51917e2 100644 --- a/README.md +++ b/README.md @@ -200,7 +200,7 @@ Supported formats below: - 24bit/44100Hz/mono FLAC(default) - Other format like OGG_OPUS isn't supported by API. (I've tried modifying requests headers or json requests and it just didn't work) Or format like PCM has less bits per sample but more storage usage than FLAC. Although the API support it but I think it's unnecessary to modify codes to support it. -[Google Cloud Speech-to-Text API](https://cloud.google.com/speech-to-text/docs/encoding) +[Google Cloud Speech-to-Text API](https://cloud.google.com/speech-to-text/docs/encoding) [v1p1beta1](https://cloud.google.com/speech-to-text/docs/reference/rest/v1p1beta1/RecognitionConfig#AudioEncoding) - Supported - 24bit/44100Hz/mono FLAC(default) @@ -288,7 +288,10 @@ DEFAULT_MODE_SET = { 'regions', 'src', 'dst', - 'bilingual'} + 'bilingual', + 'dst-lf-src', + 'src-lf-dst' +} ```  ↑  @@ -393,6 +396,17 @@ Use Google Cloud Speech-to-Text API key to transcribe. autosub -i input_file -sapi gcsv1 -S lang_code -skey API_key ...(other options) ``` +Use 48000Hz OGG_OPUS in Google Cloud Speech-to-Text API. The conversion commands will be automatically modified by these [codes](https://github.com/BingLingGroup/autosub/blob/alpha/autosub/__init__.py#L135-L140). + +``` +autosub -i input_file -sapi gcsv1 -asf .ogg -asr 48000 ...(other options) +``` + +Use MP3 in Google Cloud Speech-to-Text API.(Not recommended because OGG_OPUS is better than MP3) +``` +autosub -i input_file -sapi gcsv1 -asf .mp3 ...(other options) +``` +  ↑  ##### Translate Subtitles @@ -411,6 +425,12 @@ Translate subtitles from a subtitles file. autosub -i input_file -SRC lang_code -D lang_code ``` +Translate subtitles by "translate.google.cn" which can be directly accessed from somewhere. + +``` +autosub -i input_file -surl "translate.google.cn" ...(other options) +``` +  ↑  #### Options diff --git a/docs/README.zh-Hans.md b/docs/README.zh-Hans.md index a9267ab5..724ae56e 100644 --- a/docs/README.zh-Hans.md +++ b/docs/README.zh-Hans.md @@ -200,7 +200,7 @@ PyPI的版本(autosub-0.3.12)不推荐在windows上使用,因为它无法 - 24bit/44100Hz/单声道 FLAC(默认参数) - 其他格式像OGG_OPUS是API不支持的。(我尝试过修改请求头或者修改为json请求都不行)或者像PCM这种有着比FLAC单位采样更少位宽但更大存储消耗的格式。虽然API支持,但我觉得没必要支持就没改代码去支持。 -[Google Cloud Speech-to-Text API](https://cloud.google.com/speech-to-text/docs/encoding) +[Google Cloud Speech-to-Text API](https://cloud.google.com/speech-to-text/docs/encoding) [v1p1beta1](https://cloud.google.com/speech-to-text/docs/reference/rest/v1p1beta1/RecognitionConfig#AudioEncoding) - 支持 - 24bit/44100Hz/单声道 FLAC(默认参数) @@ -288,7 +288,10 @@ DEFAULT_MODE_SET = { 'regions', 'src', 'dst', - 'bilingual'} + 'bilingual', + 'dst-lf-src', + 'src-lf-dst' +} ```  ↑  @@ -393,6 +396,17 @@ autosub -i 输入文件 -sapi gcsv1 -S 语言代码 -sa 服务账号凭据文件 autosub -i 输入文件 -sapi gcsv1 -S 语言代码 -skey API密钥 ...(其他选项) ``` +在Google Cloud Speech-to-Text API中使用48000Hz OGG_OPUS格式。转换指令在[此代码]((https://github.com/BingLingGroup/autosub/blob/alpha/autosub/__init__.py#L135-L140))中会被自动被替换。 + +``` +autosub -i 输入文件 -sapi gcsv1 -asf .ogg -asr 48000 ...(其他选项) +``` + +在Google Cloud Speech-to-Text API中使用MP3格式。(不推荐这样用,因为OGG_OPUS比MP3更好) +``` +autosub -i 输入文件 -sapi gcsv1 -asf .mp3 ...(其他选项) +``` +  ↑  ##### 翻译字幕 @@ -411,6 +425,12 @@ autosub -i 输入文件 -S 语言代码 (-SRC 语言代码) -D 语言代码 autosub -i 输入文件 -SRC 语言代码 -D 语言代码 ``` +使用"translate.google.cn"翻译字幕,"translate.google.cn"可被某地直连。 + +``` +autosub -i 输入文件 -surl "translate.google.cn" ...(其他选项) +``` +  ↑  #### 选项 From d9a474b0a8b642e8d3d07a7810d5f805fc9a476c Mon Sep 17 00:00:00 2001 From: BingLingGroup <42505588+BingLingGroup@users.noreply.github.com> Date: Sun, 2 Feb 2020 18:13:32 +0800 Subject: [PATCH 10/10] Fix gtransv2 options check --- README.md | 4 ++-- autosub/cmdline_utils.py | 2 +- autosub/constants.py | 1 - .../zh_CN/LC_MESSAGES/autosub.options.mo | Bin 22643 -> 22676 bytes .../zh_CN/LC_MESSAGES/autosub.options.po | 12 ++++++------ autosub/options.py | 4 ++-- docs/README.zh-Hans.md | 2 +- 7 files changed, 12 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index e51917e2..6d04c14f 100644 --- a/README.md +++ b/README.md @@ -727,7 +727,7 @@ when the option is not given at the command line. "(arg_num)" means if the option is given, the number of the arguments is required. Author: Bing Ling -Email: agermanidis@gmail.com +Email: binglinggroup@outlook.com Bug report: https://github.com/BingLingGroup/autosub ``` @@ -790,7 +790,7 @@ Bugs and suggestions can be reported at [issues](https://github.com/BingLingGrou I only write the scripts for building standalone executable files on windows, [Nuitka script](scripts/nuitka_build.bat) and [pyinstaller script](scripts/pyinstaller_build.bat). -The version after 0.5.4a doesn't support Nuitka build since 0.5.4a import google.cloud package and it contains `pkg_resources.get_distribution` which is not supported by Nuitka due to this [Nuitka issue #146](https://github.com/Nuitka/Nuitka/issues/146). You can manually remove the codes include the google.cloud package and build it. I will consider remove the codes to support Nuitka build in the future version. +The version after 0.5.4a doesn't support Nuitka build since 0.5.4a import google.cloud package and it contains `pkg_resources.get_distribution` which is not supported by Nuitka due to this [Nuitka issue #146](https://github.com/Nuitka/Nuitka/issues/146). You can manually remove the codes include the google.cloud package and build it. I will consider removing the codes to support Nuitka build in the future version. Nuitka build is pretty tricky. These environments I tried and worked. diff --git a/autosub/cmdline_utils.py b/autosub/cmdline_utils.py index 96dcc3c8..c2f3baf6 100644 --- a/autosub/cmdline_utils.py +++ b/autosub/cmdline_utils.py @@ -252,7 +252,7 @@ def validate_aovp_args(args): # pylint: disable=too-many-branches, too-many-ret Check that the commandline arguments passed to autosub are valid for audio or video processing. """ - if args.sleep_seconds < 0 or args.lines_per_trans < 0: + if args.sleep_seconds < 0: raise exceptions.AutosubException( _("Error: \"-slp\"/\"--sleep-seconds\" arg is illegal.")) diff --git a/autosub/constants.py b/autosub/constants.py index b5b51fdf..91238dfb 100644 --- a/autosub/constants.py +++ b/autosub/constants.py @@ -63,7 +63,6 @@ # when using external speech region control DEFAULT_DST_LANGUAGE = 'en-US' -DEFAULT_LINES_PER_TRANS = 15 DEFAULT_SIZE_PER_TRANS = 4000 DEFAULT_SLEEP_SECONDS = 5 diff --git a/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.options.mo b/autosub/data/locale/zh_CN/LC_MESSAGES/autosub.options.mo index b67d057dedb05bff9d4208d3c990f42f5fe214dc..08c8b4db73cbdc6cae7a3d257dcebdce74a5feb4 100644 GIT binary patch delta 1301 zcmZ|PT};h!9LMp`ADujwdFUj{LDU?l{+(1*C>7Gf35gj~sCgJk9v0S_n%T^SIb5;P z71O5LVKXfzX4sggxndsX#zmWr#pZ(d&;NgGYkU2^zu))w{eAzx|Nr#8*E-*4^^UT_ zJ(Aj|O39d!Abr6~d~QiuUde-+CLM*?BxyBvxIV*p;>79FCQQdI*og@vq)%w_e^ERY zOH%%o(oUQ~+>xTN$kAmcsHJ|a$G2z-=FOBQ;5noRUBNiKgE=(z1pA5q_@v#~KTBGN z^JYs7b=36|n*6&MWd6q-DS&CIVW;4F+Iz>u4!noObEOuv=1CW^6{8rRCQYKy13Zpz z(`gXb&6i%X-r$$UVfF%U3zwk>SE8A(cH>PrhPWZjU=)KrI2!l6aU1d$)Q)E31vG`a za4cR!voDHf-)(H7k^4w*O3!c}pawgL2eBQ?GNn`a5YJ+`(smNx;|C@}Sx#am_q3FP zH_&uClq(2GI?P$@%%91Z#&f=2 zGYFCR5ly09z|#>|6gmz3KoXU`)UgO(5m%x4c6`g6eT7&)LfTN|yrQny^2osTVm_2h-n9MFqq z|9~66!$&;eSA1jED|p?UualM1Rvbn@)>ZLS#111H`jCgy$7)9(*JTc<$6?%yPjN?$ zbObZk@Otnn-o%2n&P{&9$JEQL$(3S5=L)u-tjPch_zATUxH#W9+T0`2hbe6 z(2a{-SGk683hQ+^2KV4dJmki0XzFyj@#&3<9E+K2r9`@7CR?*2w_?&PU*vtvMN5%3 zPoCwE^m)RJvtqNY+Gu0!iMYssH#HiU&}~Hry!lhhYXc<*8XFGoYTg$pZ8+GN?+e;N jJ75R-^JV4c2Xp-PBHJEY6AG+tJiM#*|9MewV6*=(JH5Dc delta 1242 zcmXZcTTISz9LMp`KRrDn&ts@4IuMzso+OW)vvNu(Bn=JCVPSK+aL7D#kzo$o-<7y% zY?uqx!&SD>W;twD#F)cPV=i2^*=%ij|NQC@AUoue!r*d?apAE(-!YUW0KA! zOH*)8iZp;#*yl)*R4D;RPLfWM52i_L@S+lk$#7V!)#;2iQp)1-VX$cfqo>nQIFiG$dM<5x3$Q%qfu0<88c#(a>x=@Ckk=5iYa?1Dw-R7QBq6(l^zZVKf_*aBP2YExy9CD(N@|Ht^Nh zjE^vFqdm#D*u#2(YEBtHZsz=dGvH^9z2p8Z_K|%@{w3%K264#CC)L`G$D?_SS)RF` zVVpodf{C~WM_{!V*P@wM@5K$ZavOcq9l5Q(@%(-4%XKo`uf7(i+C7<2>}0v^2~j8H u7W#{v?ARXvxj@{$J+Xo0UI*QQ