diff --git a/README.md b/README.md
index e4fafe6..28f9246 100644
--- a/README.md
+++ b/README.md
@@ -7,6 +7,7 @@ The software used to generate Morse Code Ninja practice sets as found on
These must be installed and available in your Shell's PATH.
* [ebook2cw](https://fkurz.net/ham/ebook2cw.html)
* [ffmpeg](https://ffmpeg.org)
+* [sox](https://sourceforge.net/projects/sox/)
* [lame](https://lame.sourceforge.io/)
* [Perl 5](https://www.perl.org)
* [Python 3](https://www.python.org)
@@ -64,12 +65,14 @@ Uses AWS Polly and requires valid credentials in the aws.properties file.
. Default 1 second.
+ -sm, --silencemorse length of silence between Morse code and spoken voice. Default 1 second.
+ -ss, --silencesets length of silence between courtesy tone and next practice set. Default 1 second.
+ -sv, --silencevoice length of silence between spoken voice and repeated morse code. Default 1 second.
+ -sc, --silencecontext length of silence between spoken context and morse code. Default 1 second.
+ -st, --silencemanualcourtesytone length of silence between Morse code and manually specified courtesy tone . Default 1 second.
-x, --extraspace 0 is no extra spacing. 0.5 is half word extra spacing. 1 is twice the word space. 1.5 is 2.5x the word space. etc
+ --precise trim AWS Polly and ebook2cw audio -- useful when specifying very short time with -sm, --silencemorse length of silence between Morse code and spoken voice.
+ ****Be sure*** to clear the cache directory if you are switching between precise and non-precise timing.\n";
-l, --lang language: ENGLISH or SWEDISH
# General Notes
diff --git a/generators/generate-single-letter-number.pl b/generators/generate-single-letter-number.pl
index ca1cebb..a28ac52 100755
--- a/generators/generate-single-letter-number.pl
+++ b/generators/generate-single-letter-number.pl
@@ -11,7 +11,13 @@
#
# Mind Melt
# ./render.pl -i single-letter-number-mind-melt.txt -s 15 17 20 22 25 28 30 35 40 45 50 --norepeat --nocourtesytone -ss 0.2 -sm 0.2 -sv 0.2
-
+#
+# Warp -- Be sure to clear cache
+# ./render.pl -i single-letter-number-warp.txt -s 15 17 20 22 25 28 30 35 40 45 50 --norepeat --nocourtesytone -ss 0.5 -sm 0.5 -sv 0.5 --precise
+#
+# Warp -- Be sure to clear cache
+# ./render.pl -i single-letter-number-icr-territory.txt -s 15 17 20 22 25 28 30 35 40 45 50 --norepeat --nocourtesytone -ss 0.5 -sm 0.2 -sv 0.5 --precise
+#
# Check distribution ./generate-single-letter-number.pl | sort | uniq -c
my $number_of_runs = 5000;
diff --git a/generators/generate-single-letter.pl b/generators/generate-single-letter.pl
index f45b33d..2bac882 100755
--- a/generators/generate-single-letter.pl
+++ b/generators/generate-single-letter.pl
@@ -11,6 +11,12 @@
#
# Mind Melt
# ./render.pl -i single-letters-mind-melt.txt -s 15 17 20 22 25 28 30 35 40 45 50 --norepeat --nocourtesytone -ss 0.2 -sm 0.2 -sv 0.2
+#
+# Warp -- Be sure to clear cache
+# ./render.pl -i single-letters-warp.txt -s 15 17 20 22 25 28 30 35 40 45 50 --norepeat --nocourtesytone -ss 0.5 -sm 0.5 -sv 0.5 --precise
+#
+# ICR Territory -- Be sure to clear cache
+# ./render.pl -i single-letters-icr-territory.txt -s 15 17 20 22 25 28 30 35 40 45 50 --norepeat --nocourtesytone -ss 0.0 -sm 0.2 -sv 0.5 --precise
my $number_of_runs = 5000;
diff --git a/render.pl b/render.pl
index 2b65df7..5d8f6de 100755
--- a/render.pl
+++ b/render.pl
@@ -38,6 +38,7 @@
'sv|silencevoice=s' => \(my $silence_between_voice_and_repeat = "1"), # typically 1 second
'sc|silencecontext=s' => \(my $silence_between_context_and_morse_code = "1"),
'x|extraspace=s' => \(my $extra_word_spacing = 0), # 0 is no extra spacing. 0.5 is half word extra spacing. 1 is twice the word space. 1.5 is 2.5x the word space. etc
+ 'precise' => \(my $precise = ''), # flag. 1 = precise timing -- useful if using very tight times between morse code and spoken answer
'l|lang=s' => \(my $lang = "ENGLISH"), # ENGLISH | GERMAN | SWEDISH
'p|pitchtone=i' => \(my $pitch_tone = 700), # tone in Hz for pitch
'pr|pitchrandom' => \(my $pitch_tone_random = '0'), # flag. 0 == false, random pitch tone
@@ -55,6 +56,14 @@
print_usage();
}
+# There is some overhead in the concatentation process, so we'll subtract it out
+if($precise) {
+ $silence_between_morse_code_and_spoken_voice -= "0.11";
+ if($silence_between_morse_code_and_spoken_voice <= 0) {
+ $silence_between_morse_code_and_spoken_voice = "0.03";
+ }
+}
+
my $speed_racing_multiplier = 1.5;
my $speed_racing_iterations = 3;
@@ -435,7 +444,11 @@ sub split_on_spoken_directive {
while ($exit_code != 0 && (!$no_spoken || $filename_map_key =~ m/context/)) {
my $textFile = File::Spec->rel2abs("$filename_base-${counter}");
- my $cmd = "./text2speech.py \"$textFile\" $text_to_speech_engine $lang $cache_directory";
+ my $trim_silence = 0;
+ if($precise) {
+ $trim_silence = 1;
+ }
+ my $cmd = "./text2speech.py \"$textFile\" $text_to_speech_engine $lang $cache_directory $trim_silence";
print "execute $cmd\n";
my $output = `$cmd`;
@@ -569,7 +582,14 @@ sub split_on_spoken_directive {
$pitch_tone = $random_tones[ rand @random_tones ];
}
- my $ebookCmdBase = "ebook2cw $lang_option -R $rise_and_fall_time -F $rise_and_fall_time " .
+ my $ebookCmdBase = "";
+ if($precise) {
+ $ebookCmdBase = "./ebook2cw-trim.bash ";
+ } else {
+ $ebookCmdBase = "ebook2cw ";
+ }
+
+ $ebookCmdBase = $ebookCmdBase . "$lang_option -R $rise_and_fall_time -F $rise_and_fall_time " .
"$extra_word_spacing_option -f $pitch_tone -w $speed -s 44100 ";
if ($farnsworth != 0) {
$ebookCmdBase = $ebookCmdBase . "-e $farnsworth ";
@@ -750,7 +770,11 @@ sub split_on_spoken_directive {
rename("$output_directory/sentence.txt ", '$filename_base-$counter-full.txt');
my $exit_code = -1;
while($exit_code != 0 && $no_spoken != 0) {
- my $cmd = './text2speech.py '."$filename_base-${counter}-full $text_to_speech_engine $lang $cache_directory";
+ my $trim_silence = 0;
+ if($precise) {
+ $trim_silence = 1;
+ }
+ my $cmd = './text2speech.py '."$filename_base-${counter}-full $text_to_speech_engine $lang $cache_directory $trim_silence";
my $output = `$cmd`;
$output =~ m/^Cached filename:(.*)\n/;
my $full_voiced_filename = $1;
@@ -1010,7 +1034,9 @@ sub split_on_spoken_directive {
else {
$speed = $_;
}
- unlink "$output_directory/sentence-${speed}0000.mp3", "$output_directory/sentence-repeat-${speed}0000.mp3", "$filename_base-list-${speed}wpm.txt", "$output_directory/silence.mp3";
+ unlink "$output_directory/sentence-${speed}0000.mp3", "$output_directory/sentence-repeat-${speed}0000.mp3",
+ "$filename_base-list-${speed}wpm.txt", "$output_directory/silence.mp3", "$output_directory/sentence-${speed}.txt",
+ "$output_directory/sentence-${speed}-orig0000.mp3";
}
unlink "$filename_base-structure.txt", "$filename_base-sentences.txt";
unlink glob("$output_directory/silence*.mp3");
@@ -1052,12 +1078,14 @@ sub print_usage {
print " --nospoken exclude spoken\n";
print " --nocourtesytone exclude the courtesy tone\n";
print " -e, --engine name of Polly speech engine to use: NEURAL or STANDARD\n";
- print " --sm, --silencemorse length of silence between Morse code and spoken voice. Default 1 second.\n";
- print " --ss, --silencesets length of silence between courtesy tone and next practice set. Default 1 second.\n";
- print " --sv, --silencevoice length of silence between spoken voice and repeated morse code. Default 1 second.\n";
- print " --sc, --silencecontext length of silence between spoken context and morse code. Default 1 second.\n";
- print " --st, --silencemanualcourtesytone length of silence between Morse code and manually specified courtesy tone . Default 1 second.\n";
+ print " -sm, --silencemorse length of silence between Morse code and spoken voice. Default 1 second.\n";
+ print " -ss, --silencesets length of silence between courtesy tone and next practice set. Default 1 second.\n";
+ print " -sv, --silencevoice length of silence between spoken voice and repeated morse code. Default 1 second.\n";
+ print " -sc, --silencecontext length of silence between spoken context and morse code. Default 1 second.\n";
+ print " -st, --silencemanualcourtesytone length of silence between Morse code and manually specified courtesy tone . Default 1 second.\n";
print " -x, --extraspace 0 is no extra spacing. 0.5 is half word extra spacing. 1 is twice the word space. 1.5 is 2.5x the word space. etc\n";
+ print " --precise trim AWS Polly and ebook2cw audio -- useful when specifying very short time with -sm, --silencemorse length of silence between Morse code and spoken voice.\n";
+ print " ****Be sure*** to clear the cache directory if you are switching between precise and non-precise timing.\n";
print " -l, --lang language: ENGLISH, GERMAN, or SWEDISH\n\n";
die "";
}
diff --git a/text2speech.py b/text2speech.py
index 85f082a..1fab0e3 100755
--- a/text2speech.py
+++ b/text2speech.py
@@ -6,7 +6,6 @@
import hashlib
import os.path
from os import environ
-import shutil
import subprocess
sentence_filename = sys.argv[1]
@@ -14,6 +13,7 @@
language = sys.argv[3]
#cache_directory = 'cache/'
cache_directory = sys.argv[4]
+trim_silence = sys.argv[5]
# ERROR return codes (coordinate with render.pl for intelligent error handling)
ioError = 2
@@ -57,7 +57,8 @@
hex_digest = hashlib.sha256(sentence.encode('utf-8')).hexdigest()
base_filename = engine_type + '-' + hex_digest + ".mp3"
-temp_filename = cache_directory + engine_type + "-" + hex_digest + "-temp.mp3"
+temp_resample_filename = cache_directory + engine_type + "-" + hex_digest + "-temp-resample.mp3"
+temp_sox_filename = cache_directory + engine_type + "-" + hex_digest + "-temp-sox.mp3"
cache_filename = cache_directory + hex_digest + ".mp3"
def render(cache_filename, voice_id, text_type, text):
@@ -71,17 +72,29 @@ def render(cache_filename, voice_id, text_type, text):
response = polly_client.synthesize_speech(Engine=engine_type, VoiceId=voice_id, OutputFormat='mp3',
TextType=text_type, Text=text)
- file = open(temp_filename, 'wb')
- file.write(response['AudioStream'].read())
- file.close()
+ if trim_silence == 1:
+ file = open(temp_sox_filename, 'wb')
+ file.write(response['AudioStream'].read())
+ file.close()
+
+ result = subprocess.run(['sox', temp_sox_filename, temp_resample_filename, 'silence', '1', '0.001', '1%'],
+ stdout=subprocess.PIPE,
+ universal_newlines=True)
+ os.remove(temp_sox_filename)
+ else:
+ file = open(temp_resample_filename, 'wb')
+ file.write(response['AudioStream'].read())
+ file.close()
+
subprocess.run(['lame', '--resample', '44.1', '-a', '-b', '256',
- temp_filename,
+ temp_resample_filename,
cache_filename],
stdout=subprocess.PIPE,
universal_newlines=True)
- os.remove(temp_filename)
+ os.remove(temp_resample_filename)
+
print("Cached filename:" + cache_filename)
@@ -90,7 +103,7 @@ def render(cache_filename, voice_id, text_type, text):
# render.pl
if language == "ENGLISH":
# short individual words are easier to understand spoken more slowly
- if re.match(r".*?", sentence):
+ if re.match(r"\s*.*?\s*", sentence):
print("Pronouncing exactly as specified")
ssml = sentence
cache_filename = cache_directory + "Mathew-exact-" + base_filename