From 37992bd8a2974b2d19ed6ba9a989a2bf94839dfa Mon Sep 17 00:00:00 2001
From: Madhusudanan Kandasamy <madhusudanan@in.ibm.com>
Date: Fri, 8 Jan 2016 09:58:42 +0530
Subject: [PATCH 1/7] google speech API V2 needs audio file in mono format

---
 VoiceCommand/speech-recog.sh  | 2 +-
 VoiceCommand/voicecommand.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/VoiceCommand/speech-recog.sh b/VoiceCommand/speech-recog.sh
index 09f45f3..95c064b 100755
--- a/VoiceCommand/speech-recog.sh
+++ b/VoiceCommand/speech-recog.sh
@@ -32,6 +32,6 @@ done
 #sox -r 16000 -t alsa $hardware /dev/shm/out.flac silence 1 0.3 1% 1 0.5 1%
 #wget -q -U "rate=16000" -O - --post-file /dev/shm/out.flac --header="Content-Type: audio/x-flac; rate=16000" "http://www.google.com/speech-api/v1/recognize?lang=en&client=Mozilla/5.0" | sed -e 's/[{}]/''/g'| awk -v k="text" '{n=split($0,a,","); for (i=1; i<=n; i++) print a[i]; exit }' | awk -F: 'NR==3 { print $3; exit }'
 #arecord -D $hardware -f cd -t wav -d $duration -r 16000 | flac - -f --best --sample-rate 16000 -o /dev/shm/out.flac 1>/dev/shm/voice.log 2>/dev/shm/voice.log; wget -O - -o /dev/null --post-file /dev/shm/out.flac --header="Content-Type: audio/x-flac; rate=16000" http://www.google.com/speech-api/v1/recognize?lang="$lang" | sed -e 's/[{}]/''/g'| awk -v k="text" '{n=split($0,a,","); for (i=1; i<=n; i++) print a[i]; exit }' | awk -F: 'NR==3 { print $3; exit }'
-arecord -D $hardware -f cd -t wav -d $duration -r 16000 | flac - -f --best --sample-rate 16000 -o /dev/shm/out.flac 1>/dev/shm/voice.log 2>/dev/shm/voice.log; curl -X POST --data-binary @/dev/shm/out.flac --user-agent 'Mozilla/5.0' --header 'Content-Type: audio/x-flac; rate=16000;' "https://www.google.com/speech-api/v2/recognize?output=json&lang=$lang&key=AIzaSyBOti4mM-6x9WDnZIjIeyEU21OpBXqWBgw&client=Mozilla/5.0" | sed -e 's/[{}]/''/g' | awk -F":" '{print $4}' | awk -F"," '{print $1}' | tr -d '\n'
+arecord -D $hardware -t wav -d $duration -r 16000 | flac - -f --best --sample-rate 16000 -o /dev/shm/out.flac 1>/dev/shm/voice.log 2>/dev/shm/voice.log; curl -X POST --data-binary @/dev/shm/out.flac --user-agent 'Mozilla/5.0' --header 'Content-Type: audio/x-flac; rate=16000;' "https://www.google.com/speech-api/v2/recognize?output=json&lang=$lang&key=AIzaSyBOti4mM-6x9WDnZIjIeyEU21OpBXqWBgw&client=Mozilla/5.0" | sed -e 's/[{}]/''/g' | awk -F":" '{print $4}' | awk -F"," '{print $1}' | tr -d '\n'
 
 rm /dev/shm/out.flac
diff --git a/VoiceCommand/voicecommand.cpp b/VoiceCommand/voicecommand.cpp
index 442abd1..5b033fd 100644
--- a/VoiceCommand/voicecommand.cpp
+++ b/VoiceCommand/voicecommand.cpp
@@ -31,7 +31,7 @@ inline float GetVolume(string recordHW, string com_duration, bool nullout) {
     float vol = 0.0f;
     string run = "arecord -D ";
     run += recordHW;
-    run += " -f cd -t wav -d ";
+    run += " -t wav -d ";
     run += com_duration;
     run += " -r 16000 /dev/shm/noise.wav";
     if(nullout)

From 18d48d539b5ceb5caade2e9bb74589ea92e53601 Mon Sep 17 00:00:00 2001
From: Jeramy Morrill <jeramy@jeramy-macbookpro.roam.corp.google.com>
Date: Wed, 27 Apr 2016 12:39:31 -0400
Subject: [PATCH 2/7] removed no longer functioning google tts and replaced
 with @irsx02 solution for pico2wave

---
 VoiceCommand/tts | 65 ++++++++++++++++++++----------------------------
 1 file changed, 27 insertions(+), 38 deletions(-)

diff --git a/VoiceCommand/tts b/VoiceCommand/tts
index 7bbcfe7..1f77ca7 100755
--- a/VoiceCommand/tts
+++ b/VoiceCommand/tts
@@ -1,42 +1,31 @@
 #!/bin/bash
+#since google ended TTS, this wrapper-script replaces tts with pico2wave.
+#version 0.2 -now rudimentarily handles language -l param.
 
-#for the Raspberry Pi, we need to insert some sort of FILLER here since it cuts off the first bit of audio
-
-string=$@
-lang="en"
-if [ "$1" == "-l" ] ; then
-    lang="$2"
-    string=`echo "$string" | sed -r 's/^.{6}//'`
+if [ $# -lt 1 ]
+then                  #no argument entered - i need something to say
+   /usr/bin/pico2wave -w /tmp/tempsound.wav "I have nothing to say."
+   /usr/bin/aplay -q /tmp/tempsound.wav
+   rm /tmp/tempsound.wav
+   exit 0
 fi
 
-#empty the original file
-echo "" > "/dev/shm/speak.mp3"
-
-len=${#string}
-while [ $len -ge 100 ] ;
-do
-    #lets split this up so that its a maximum of 99 characters
-    tmp=${string:0:100}
-    string=${string:100}
-    
-    #now we need to make sure there aren't split words, let's find the last space and the string after it
-    lastspace=${tmp##* }
-    tmplen=${#lastspace}
-
-    #here we are shortening the tmp string
-    tmplen=`expr 100 - $tmplen` 
-    tmp=${tmp:0:tmplen}
-    
-    #now we concatenate and the string is reconstructed
-    string="$lastspace$string"
-    len=${#string}
-    
-    #get the first 100 characters
-    wget -q -U Mozilla -O "/dev/shm/tmp.mp3" "https://translate.google.com/translate_tts?tl=${lang}&q=$tmp&ie=UTF-8&total=1&idx=0&client=t"
-    cat "/dev/shm/tmp.mp3" >> "/dev/shm/speak.mp3"
-done
-#this will get the last remnants
-wget -q -U Mozilla -O "/dev/shm/tmp.mp3" "https://translate.google.com/translate_tts?tl=${lang}&q=$string&ie=UTF-8&total=1&idx=0&client=t"
-cat "/dev/shm/tmp.mp3" >> "/dev/shm/speak.mp3"
-#now we finally say the whole thing
-cat "/dev/shm/speak.mp3" | mpg123 - 1>>/dev/shm/voice.log 2>>/dev/shm/voice.log
+if [ "$1" = "-l" ]    #-l in event where user explicitly defines language.
+then                   # Note: always assumes $2 is 'en' or a valid language option.
+   lang=$2
+   if [ $lang = "en" ]   #TODO: cant find the real source of en, but if
+   then                      # i see 'en' I'm hard coding  en-US.
+      lang="en-US"       #US English, mofo, do you speak it
+   fi
+   shift 2
+   speech=$@
+   /usr/bin/pico2wave -l $lang -w /tmp/tempsound.wav "$speech"
+   /usr/bin/aplay -q /tmp/tempsound.wav
+   rm /tmp/tempsound.wav
+   exit 0
+else                  #else lets go straight to speech-output
+  speech=$@
+  /usr/bin/pico2wave -w /tmp/tempsound.wav "$speech"
+  /usr/bin/aplay -q /tmp/tempsound.wav
+  rm /tmp/tempsound.wav
+fi

From f433c18b5b5ad5dddda971c46856779d44de3e9c Mon Sep 17 00:00:00 2001
From: Jeramy Morrill <jeramy@jeramy-macbookpro.roam.corp.google.com>
Date: Wed, 27 Apr 2016 17:05:21 -0400
Subject: [PATCH 3/7] fixing key

---
 VoiceCommand/speech-recog.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/VoiceCommand/speech-recog.sh b/VoiceCommand/speech-recog.sh
index 95c064b..201a0c4 100755
--- a/VoiceCommand/speech-recog.sh
+++ b/VoiceCommand/speech-recog.sh
@@ -32,6 +32,6 @@ done
 #sox -r 16000 -t alsa $hardware /dev/shm/out.flac silence 1 0.3 1% 1 0.5 1%
 #wget -q -U "rate=16000" -O - --post-file /dev/shm/out.flac --header="Content-Type: audio/x-flac; rate=16000" "http://www.google.com/speech-api/v1/recognize?lang=en&client=Mozilla/5.0" | sed -e 's/[{}]/''/g'| awk -v k="text" '{n=split($0,a,","); for (i=1; i<=n; i++) print a[i]; exit }' | awk -F: 'NR==3 { print $3; exit }'
 #arecord -D $hardware -f cd -t wav -d $duration -r 16000 | flac - -f --best --sample-rate 16000 -o /dev/shm/out.flac 1>/dev/shm/voice.log 2>/dev/shm/voice.log; wget -O - -o /dev/null --post-file /dev/shm/out.flac --header="Content-Type: audio/x-flac; rate=16000" http://www.google.com/speech-api/v1/recognize?lang="$lang" | sed -e 's/[{}]/''/g'| awk -v k="text" '{n=split($0,a,","); for (i=1; i<=n; i++) print a[i]; exit }' | awk -F: 'NR==3 { print $3; exit }'
-arecord -D $hardware -t wav -d $duration -r 16000 | flac - -f --best --sample-rate 16000 -o /dev/shm/out.flac 1>/dev/shm/voice.log 2>/dev/shm/voice.log; curl -X POST --data-binary @/dev/shm/out.flac --user-agent 'Mozilla/5.0' --header 'Content-Type: audio/x-flac; rate=16000;' "https://www.google.com/speech-api/v2/recognize?output=json&lang=$lang&key=AIzaSyBOti4mM-6x9WDnZIjIeyEU21OpBXqWBgw&client=Mozilla/5.0" | sed -e 's/[{}]/''/g' | awk -F":" '{print $4}' | awk -F"," '{print $1}' | tr -d '\n'
+arecord -D $hardware -t wav -d $duration -r 16000 | flac - -f --best --sample-rate 16000 -o /dev/shm/out.flac 1>/dev/shm/voice.log 2>/dev/shm/voice.log; curl -X POST --data-binary @/dev/shm/out.flac --user-agent 'Mozilla/5.0' --header 'Content-Type: audio/x-flac; rate=16000;' "https://www.google.com/speech-api/v2/recognize?output=json&lang=$lang&key=AIzaSyALX8AqZZBN-TjQlu8WKAkkNw9Go5NxfQs&client=Mozilla/5.0" | sed -e 's/[{}]/''/g' | awk -F":" '{print $4}' | awk -F"," '{print $1}' | tr -d '\n'
 
 rm /dev/shm/out.flac

From ecf144af50ad3c8fc2ad38358e37d98ab38b0b71 Mon Sep 17 00:00:00 2001
From: Jeramy Morrill <jeramy@jeramy-macbookpro.roam.corp.google.com>
Date: Wed, 27 Apr 2016 17:08:48 -0400
Subject: [PATCH 4/7] fixing key

---
 VoiceCommand/speech-recog.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/VoiceCommand/speech-recog.sh b/VoiceCommand/speech-recog.sh
index 201a0c4..cc0f7a4 100755
--- a/VoiceCommand/speech-recog.sh
+++ b/VoiceCommand/speech-recog.sh
@@ -32,6 +32,6 @@ done
 #sox -r 16000 -t alsa $hardware /dev/shm/out.flac silence 1 0.3 1% 1 0.5 1%
 #wget -q -U "rate=16000" -O - --post-file /dev/shm/out.flac --header="Content-Type: audio/x-flac; rate=16000" "http://www.google.com/speech-api/v1/recognize?lang=en&client=Mozilla/5.0" | sed -e 's/[{}]/''/g'| awk -v k="text" '{n=split($0,a,","); for (i=1; i<=n; i++) print a[i]; exit }' | awk -F: 'NR==3 { print $3; exit }'
 #arecord -D $hardware -f cd -t wav -d $duration -r 16000 | flac - -f --best --sample-rate 16000 -o /dev/shm/out.flac 1>/dev/shm/voice.log 2>/dev/shm/voice.log; wget -O - -o /dev/null --post-file /dev/shm/out.flac --header="Content-Type: audio/x-flac; rate=16000" http://www.google.com/speech-api/v1/recognize?lang="$lang" | sed -e 's/[{}]/''/g'| awk -v k="text" '{n=split($0,a,","); for (i=1; i<=n; i++) print a[i]; exit }' | awk -F: 'NR==3 { print $3; exit }'
-arecord -D $hardware -t wav -d $duration -r 16000 | flac - -f --best --sample-rate 16000 -o /dev/shm/out.flac 1>/dev/shm/voice.log 2>/dev/shm/voice.log; curl -X POST --data-binary @/dev/shm/out.flac --user-agent 'Mozilla/5.0' --header 'Content-Type: audio/x-flac; rate=16000;' "https://www.google.com/speech-api/v2/recognize?output=json&lang=$lang&key=AIzaSyALX8AqZZBN-TjQlu8WKAkkNw9Go5NxfQs&client=Mozilla/5.0" | sed -e 's/[{}]/''/g' | awk -F":" '{print $4}' | awk -F"," '{print $1}' | tr -d '\n'
+arecord -D $hardware -t wav -d $duration -r 16000 | flac - -f --best --sample-rate 16000 -o /dev/shm/out.flac 1>/dev/shm/voice.log 2>/dev/shm/voice.log; curl -X POST --data-binary @/dev/shm/out.flac --user-agent 'chromium' --header 'Content-Type: audio/x-flac; rate=16000;' "https://www.google.com/speech-api/v2/recognize?output=json&lang=$lang&key=AIzaSyALX8AqZZBN-TjQlu8WKAkkNw9Go5NxfQs&client=chromium" | sed -e 's/[{}]/''/g' | awk -F":" '{print $4}' | awk -F"," '{print $1}' | tr -d '\n'
 
 rm /dev/shm/out.flac

From 74d78a87690c9e5a86dd8c46ffaf708a75aed2a8 Mon Sep 17 00:00:00 2001
From: Jeramy Morrill <jeramy@jeramy-macbookpro.roam.corp.google.com>
Date: Wed, 27 Apr 2016 17:16:13 -0400
Subject: [PATCH 5/7] adding test

---
 VoiceCommand/speech-rec-test.sh | 126 ++++++++++++++++++++++++++++++++
 1 file changed, 126 insertions(+)
 create mode 100644 VoiceCommand/speech-rec-test.sh

diff --git a/VoiceCommand/speech-rec-test.sh b/VoiceCommand/speech-rec-test.sh
new file mode 100644
index 0000000..607477c
--- /dev/null
+++ b/VoiceCommand/speech-rec-test.sh
@@ -0,0 +1,126 @@
+#!/bin/bash
+
+# Usage info
+show_help() {
+cat << EOF
+  Usage: ${0##*/} [-h] [-i INFILE] [-d DURATION] [-r RATE] [-l LANGUAGE] [-k KEY]
+
+  Record an utterance and send audio data to Google for speech recognition.
+
+       -h|--help               display this help and exit.
+       -i|--input     INFILE   use INFILE instead of recording a stream with sox or parecord.
+       -d|--duration  FLOAT    recoding duration in seconds (Default: 3).
+       -l|--language  STRING   set transcription language (Default: en_US).
+                               Other languages: fr_FR, de_DE, es_ES, ...
+       -r|--rate      INTEGER  Sampling rate of audio data (Default: 16000, if data is to be recorded).
+                               If -i|--input is used, the sampling rate must be supplied by the user.
+       -k|--key       STRING   Google Speech Recognition Key.
+
+EOF
+}
+
+DURATION=3
+LANGUAGE=en_US
+# Please replace this with your own key
+KEY=AIzaSyAcalCzUvPmmJ7CZBFOEWx2Z1ZSn4Vs1gg
+
+
+record() {
+    DURATION=$1
+    SRATE=$2
+    INFILE=$3
+
+    if hash rec 2>/dev/null; then
+    # try to record audio with sox
+        rec -q -c 1 -r $SRATE $INFILE trim 0 $DURATION
+    else
+    # fallback to parecord
+        timeout $DURATION parecord $INFILE --file-format=flac --rate=$SRATE --channels=1
+    fi
+}
+
+# parse parameters
+while [[ $# -ge 1 ]]
+do
+   key="$1"
+   case $key in
+       -h|--help)
+       show_help
+       exit 0
+       ;;
+       -i|--input)
+       INFILE="$2"
+       shift
+       ;;
+       -d|--duration)
+       DURATION="$2"
+       shift
+       ;;
+       -r|--rate)
+       SRATE=$2
+       shift
+       ;;
+       -l|--language)
+       LANGUAGE="$2"
+       shift
+       ;;
+       -k|--key)
+       KEY="$2"
+       shift
+       ;;
+       *)
+       echo "Unknown parameter '$key'. Type $0 -h for more information."
+       exit 1
+       ;;
+   esac
+   shift
+done
+
+if [[ ! "$DURATION" ]]
+   then
+     echo "ERROR: empty or invalid value for duration."
+     exit 1
+fi
+
+if [[ ! "$LANGUAGE" ]]
+   then
+     echo "ERROR: empty value for language."
+     exit 1
+fi
+
+if [[ ! "$INFILE" ]]
+   then
+      INFILE=record_`date "+%Y%b%d_%H-%M-%S"`.flac
+      if  [[ ! "$SRATE" ]]
+         then
+            SRATE=16000
+      fi
+      echo "Say something..."
+      echo ""
+      record $DURATION $SRATE $INFILE
+
+else
+      if  [[ ! "$SRATE" ]]
+      then
+           >&2 echo "ERROR: no sampling rate specified for input file."
+           exit 1
+      fi
+
+      echo "Try to recognize speech from file $INFILE"
+      echo ""
+fi
+
+RESULT=`wget -q --post-file $INFILE --header="Content-Type: audio/x-flac; rate=$SRATE" -O - "https://www.google.com/speech-api/v2/recognize?client=chromium&lang=$LANGUAGE&key=$KEY"`
+
+FILTERED=`echo "$RESULT" | grep "transcript.*}" | sed 's/,/\n/g;s/[{,},"]//g;s/\[//g;s/\]//g;s/:/: /g' | grep -o -i -e "transcript.*" -e "confidence:.*"`
+
+if [[ ! "$FILTERED" ]]
+  then
+     >&2 echo "Google was unable to recognize any speech in audio data"
+else
+    echo "Recognition result:"
+    echo ""
+    echo "$FILTERED"
+fi
+
+exit 0

From 606d67158e7f2019154cad9f87ae58bf5571e6e1 Mon Sep 17 00:00:00 2001
From: Jeramy Morrill <jeramy@jeramy-macbookpro.roam.corp.google.com>
Date: Wed, 27 Apr 2016 17:16:40 -0400
Subject: [PATCH 6/7] adding test

---
 VoiceCommand/speech-rec-test.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/VoiceCommand/speech-rec-test.sh b/VoiceCommand/speech-rec-test.sh
index 607477c..eb0ab9a 100644
--- a/VoiceCommand/speech-rec-test.sh
+++ b/VoiceCommand/speech-rec-test.sh
@@ -22,7 +22,7 @@ EOF
 DURATION=3
 LANGUAGE=en_US
 # Please replace this with your own key
-KEY=AIzaSyAcalCzUvPmmJ7CZBFOEWx2Z1ZSn4Vs1gg
+KEY=AIzaSyALX8AqZZBN-TjQlu8WKAkkNw9Go5NxfQs
 
 
 record() {

From 7e2dc6094df50bd4f49714c6e31ffb4f5fc9421b Mon Sep 17 00:00:00 2001
From: Jeramy Morrill <jeramy@jeramy-macbookpro.roam.corp.google.com>
Date: Wed, 27 Apr 2016 17:18:32 -0400
Subject: [PATCH 7/7] adding test

---
 VoiceCommand/speech-rec-test.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/VoiceCommand/speech-rec-test.sh b/VoiceCommand/speech-rec-test.sh
index eb0ab9a..e61d581 100644
--- a/VoiceCommand/speech-rec-test.sh
+++ b/VoiceCommand/speech-rec-test.sh
@@ -32,7 +32,7 @@ record() {
 
     if hash rec 2>/dev/null; then
     # try to record audio with sox
-        rec -q -c 1 -r $SRATE $INFILE trim 0 $DURATION
+        arecord -q -c 1 -r $SRATE $INFILE trim 0 $DURATION
     else
     # fallback to parecord
         timeout $DURATION parecord $INFILE --file-format=flac --rate=$SRATE --channels=1