diff --git a/README.md b/README.md index e4fafe6..28f9246 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,7 @@ The software used to generate Morse Code Ninja practice sets as found on These must be installed and available in your Shell's PATH. * [ebook2cw](https://fkurz.net/ham/ebook2cw.html) * [ffmpeg](https://ffmpeg.org) +* [sox](https://sourceforge.net/projects/sox/) * [lame](https://lame.sourceforge.io/) * [Perl 5](https://www.perl.org) * [Python 3](https://www.python.org) @@ -64,12 +65,14 @@ Uses AWS Polly and requires valid credentials in the aws.properties file.
. Default 1 second. + -sm, --silencemorse length of silence between Morse code and spoken voice. Default 1 second. + -ss, --silencesets length of silence between courtesy tone and next practice set. Default 1 second. + -sv, --silencevoice length of silence between spoken voice and repeated morse code. Default 1 second. + -sc, --silencecontext length of silence between spoken context and morse code. Default 1 second. + -st, --silencemanualcourtesytone length of silence between Morse code and manually specified courtesy tone . Default 1 second. -x, --extraspace 0 is no extra spacing. 0.5 is half word extra spacing. 1 is twice the word space. 1.5 is 2.5x the word space. etc + --precise trim AWS Polly and ebook2cw audio -- useful when specifying very short time with -sm, --silencemorse length of silence between Morse code and spoken voice. + ****Be sure*** to clear the cache directory if you are switching between precise and non-precise timing.\n"; -l, --lang language: ENGLISH or SWEDISH # General Notes diff --git a/generators/generate-single-letter-number.pl b/generators/generate-single-letter-number.pl index ca1cebb..a28ac52 100755 --- a/generators/generate-single-letter-number.pl +++ b/generators/generate-single-letter-number.pl @@ -11,7 +11,13 @@ # # Mind Melt # ./render.pl -i single-letter-number-mind-melt.txt -s 15 17 20 22 25 28 30 35 40 45 50 --norepeat --nocourtesytone -ss 0.2 -sm 0.2 -sv 0.2 - +# +# Warp -- Be sure to clear cache +# ./render.pl -i single-letter-number-warp.txt -s 15 17 20 22 25 28 30 35 40 45 50 --norepeat --nocourtesytone -ss 0.5 -sm 0.5 -sv 0.5 --precise +# +# Warp -- Be sure to clear cache +# ./render.pl -i single-letter-number-icr-territory.txt -s 15 17 20 22 25 28 30 35 40 45 50 --norepeat --nocourtesytone -ss 0.5 -sm 0.2 -sv 0.5 --precise +# # Check distribution ./generate-single-letter-number.pl | sort | uniq -c my $number_of_runs = 5000; diff --git a/generators/generate-single-letter.pl b/generators/generate-single-letter.pl index f45b33d..2bac882 100755 --- a/generators/generate-single-letter.pl +++ b/generators/generate-single-letter.pl @@ -11,6 +11,12 @@ # # Mind Melt # ./render.pl -i single-letters-mind-melt.txt -s 15 17 20 22 25 28 30 35 40 45 50 --norepeat --nocourtesytone -ss 0.2 -sm 0.2 -sv 0.2 +# +# Warp -- Be sure to clear cache +# ./render.pl -i single-letters-warp.txt -s 15 17 20 22 25 28 30 35 40 45 50 --norepeat --nocourtesytone -ss 0.5 -sm 0.5 -sv 0.5 --precise +# +# ICR Territory -- Be sure to clear cache +# ./render.pl -i single-letters-icr-territory.txt -s 15 17 20 22 25 28 30 35 40 45 50 --norepeat --nocourtesytone -ss 0.0 -sm 0.2 -sv 0.5 --precise my $number_of_runs = 5000; diff --git a/render.pl b/render.pl index 2b65df7..5d8f6de 100755 --- a/render.pl +++ b/render.pl @@ -38,6 +38,7 @@ 'sv|silencevoice=s' => \(my $silence_between_voice_and_repeat = "1"), # typically 1 second 'sc|silencecontext=s' => \(my $silence_between_context_and_morse_code = "1"), 'x|extraspace=s' => \(my $extra_word_spacing = 0), # 0 is no extra spacing. 0.5 is half word extra spacing. 1 is twice the word space. 1.5 is 2.5x the word space. etc + 'precise' => \(my $precise = ''), # flag. 1 = precise timing -- useful if using very tight times between morse code and spoken answer 'l|lang=s' => \(my $lang = "ENGLISH"), # ENGLISH | GERMAN | SWEDISH 'p|pitchtone=i' => \(my $pitch_tone = 700), # tone in Hz for pitch 'pr|pitchrandom' => \(my $pitch_tone_random = '0'), # flag. 0 == false, random pitch tone @@ -55,6 +56,14 @@ print_usage(); } +# There is some overhead in the concatentation process, so we'll subtract it out +if($precise) { + $silence_between_morse_code_and_spoken_voice -= "0.11"; + if($silence_between_morse_code_and_spoken_voice <= 0) { + $silence_between_morse_code_and_spoken_voice = "0.03"; + } +} + my $speed_racing_multiplier = 1.5; my $speed_racing_iterations = 3; @@ -435,7 +444,11 @@ sub split_on_spoken_directive { while ($exit_code != 0 && (!$no_spoken || $filename_map_key =~ m/context/)) { my $textFile = File::Spec->rel2abs("$filename_base-${counter}"); - my $cmd = "./text2speech.py \"$textFile\" $text_to_speech_engine $lang $cache_directory"; + my $trim_silence = 0; + if($precise) { + $trim_silence = 1; + } + my $cmd = "./text2speech.py \"$textFile\" $text_to_speech_engine $lang $cache_directory $trim_silence"; print "execute $cmd\n"; my $output = `$cmd`; @@ -569,7 +582,14 @@ sub split_on_spoken_directive { $pitch_tone = $random_tones[ rand @random_tones ]; } - my $ebookCmdBase = "ebook2cw $lang_option -R $rise_and_fall_time -F $rise_and_fall_time " . + my $ebookCmdBase = ""; + if($precise) { + $ebookCmdBase = "./ebook2cw-trim.bash "; + } else { + $ebookCmdBase = "ebook2cw "; + } + + $ebookCmdBase = $ebookCmdBase . "$lang_option -R $rise_and_fall_time -F $rise_and_fall_time " . "$extra_word_spacing_option -f $pitch_tone -w $speed -s 44100 "; if ($farnsworth != 0) { $ebookCmdBase = $ebookCmdBase . "-e $farnsworth "; @@ -750,7 +770,11 @@ sub split_on_spoken_directive { rename("$output_directory/sentence.txt ", '$filename_base-$counter-full.txt'); my $exit_code = -1; while($exit_code != 0 && $no_spoken != 0) { - my $cmd = './text2speech.py '."$filename_base-${counter}-full $text_to_speech_engine $lang $cache_directory"; + my $trim_silence = 0; + if($precise) { + $trim_silence = 1; + } + my $cmd = './text2speech.py '."$filename_base-${counter}-full $text_to_speech_engine $lang $cache_directory $trim_silence"; my $output = `$cmd`; $output =~ m/^Cached filename:(.*)\n/; my $full_voiced_filename = $1; @@ -1010,7 +1034,9 @@ sub split_on_spoken_directive { else { $speed = $_; } - unlink "$output_directory/sentence-${speed}0000.mp3", "$output_directory/sentence-repeat-${speed}0000.mp3", "$filename_base-list-${speed}wpm.txt", "$output_directory/silence.mp3"; + unlink "$output_directory/sentence-${speed}0000.mp3", "$output_directory/sentence-repeat-${speed}0000.mp3", + "$filename_base-list-${speed}wpm.txt", "$output_directory/silence.mp3", "$output_directory/sentence-${speed}.txt", + "$output_directory/sentence-${speed}-orig0000.mp3"; } unlink "$filename_base-structure.txt", "$filename_base-sentences.txt"; unlink glob("$output_directory/silence*.mp3"); @@ -1052,12 +1078,14 @@ sub print_usage { print " --nospoken exclude spoken\n"; print " --nocourtesytone exclude the courtesy tone\n"; print " -e, --engine name of Polly speech engine to use: NEURAL or STANDARD\n"; - print " --sm, --silencemorse length of silence between Morse code and spoken voice. Default 1 second.\n"; - print " --ss, --silencesets length of silence between courtesy tone and next practice set. Default 1 second.\n"; - print " --sv, --silencevoice length of silence between spoken voice and repeated morse code. Default 1 second.\n"; - print " --sc, --silencecontext length of silence between spoken context and morse code. Default 1 second.\n"; - print " --st, --silencemanualcourtesytone length of silence between Morse code and manually specified courtesy tone . Default 1 second.\n"; + print " -sm, --silencemorse length of silence between Morse code and spoken voice. Default 1 second.\n"; + print " -ss, --silencesets length of silence between courtesy tone and next practice set. Default 1 second.\n"; + print " -sv, --silencevoice length of silence between spoken voice and repeated morse code. Default 1 second.\n"; + print " -sc, --silencecontext length of silence between spoken context and morse code. Default 1 second.\n"; + print " -st, --silencemanualcourtesytone length of silence between Morse code and manually specified courtesy tone . Default 1 second.\n"; print " -x, --extraspace 0 is no extra spacing. 0.5 is half word extra spacing. 1 is twice the word space. 1.5 is 2.5x the word space. etc\n"; + print " --precise trim AWS Polly and ebook2cw audio -- useful when specifying very short time with -sm, --silencemorse length of silence between Morse code and spoken voice.\n"; + print " ****Be sure*** to clear the cache directory if you are switching between precise and non-precise timing.\n"; print " -l, --lang language: ENGLISH, GERMAN, or SWEDISH\n\n"; die ""; } diff --git a/text2speech.py b/text2speech.py index 85f082a..1fab0e3 100755 --- a/text2speech.py +++ b/text2speech.py @@ -6,7 +6,6 @@ import hashlib import os.path from os import environ -import shutil import subprocess sentence_filename = sys.argv[1] @@ -14,6 +13,7 @@ language = sys.argv[3] #cache_directory = 'cache/' cache_directory = sys.argv[4] +trim_silence = sys.argv[5] # ERROR return codes (coordinate with render.pl for intelligent error handling) ioError = 2 @@ -57,7 +57,8 @@ hex_digest = hashlib.sha256(sentence.encode('utf-8')).hexdigest() base_filename = engine_type + '-' + hex_digest + ".mp3" -temp_filename = cache_directory + engine_type + "-" + hex_digest + "-temp.mp3" +temp_resample_filename = cache_directory + engine_type + "-" + hex_digest + "-temp-resample.mp3" +temp_sox_filename = cache_directory + engine_type + "-" + hex_digest + "-temp-sox.mp3" cache_filename = cache_directory + hex_digest + ".mp3" def render(cache_filename, voice_id, text_type, text): @@ -71,17 +72,29 @@ def render(cache_filename, voice_id, text_type, text): response = polly_client.synthesize_speech(Engine=engine_type, VoiceId=voice_id, OutputFormat='mp3', TextType=text_type, Text=text) - file = open(temp_filename, 'wb') - file.write(response['AudioStream'].read()) - file.close() + if trim_silence == 1: + file = open(temp_sox_filename, 'wb') + file.write(response['AudioStream'].read()) + file.close() + + result = subprocess.run(['sox', temp_sox_filename, temp_resample_filename, 'silence', '1', '0.001', '1%'], + stdout=subprocess.PIPE, + universal_newlines=True) + os.remove(temp_sox_filename) + else: + file = open(temp_resample_filename, 'wb') + file.write(response['AudioStream'].read()) + file.close() + subprocess.run(['lame', '--resample', '44.1', '-a', '-b', '256', - temp_filename, + temp_resample_filename, cache_filename], stdout=subprocess.PIPE, universal_newlines=True) - os.remove(temp_filename) + os.remove(temp_resample_filename) + print("Cached filename:" + cache_filename) @@ -90,7 +103,7 @@ def render(cache_filename, voice_id, text_type, text): # render.pl if language == "ENGLISH": # short individual words are easier to understand spoken more slowly - if re.match(r".*?", sentence): + if re.match(r"\s*.*?\s*", sentence): print("Pronouncing exactly as specified") ssml = sentence cache_filename = cache_directory + "Mathew-exact-" + base_filename