diff --git a/package.xml b/package.xml index 11f6063..d67362f 100644 --- a/package.xml +++ b/package.xml @@ -1,13 +1,13 @@ itf_talk - 0.0.0 + 0.0.2 The itf_talk package - alex + Alex van der Peet diff --git a/src/SoundFile.py b/src/SoundFile.py index bd7802e..1bb48eb 100644 --- a/src/SoundFile.py +++ b/src/SoundFile.py @@ -64,4 +64,4 @@ def __init__(self, filename): self.gletplayer.set_handler("on_eos", self.stop) # Load file to pydub, for getting the current power (volume) in the sound file. - self.dubsegment = pydub.AudioSegment.from_mp3(filename) + self.dubsegment = pydub.AudioSegment.from_wav(filename) diff --git a/src/itf_talk.py b/src/itf_talk.py index f7e687e..e828bf8 100755 --- a/src/itf_talk.py +++ b/src/itf_talk.py @@ -1,9 +1,10 @@ #!/usr/bin/env python +import os import rospy import urllib, pycurl from threading import Thread -from std_msgs.msg import String, Float32, Bool +from std_msgs.msg import String, Float64, Bool import subprocess import pydub import math @@ -14,12 +15,13 @@ class ITFTalker(Thread): NODE_NAME = 'itf_talker' pub = rospy.Publisher('itf_next_sentence', String, queue_size=1) - pub_speech_strength = rospy.Publisher('speech_strength', Float32, queue_size=1) + pub_speech_strength = rospy.Publisher('speech_strength', Float64, queue_size=1) pub_speech_active = rospy.Publisher('speech_active', Bool, queue_size=1) soundfile = None rms_params = {"scale": 1.0/5000, "min": 0.0, "max": 1.0} gletplayer = None stop_request_received = False + jaw_inactive = True def __init__(self): Thread.__init__(self) @@ -85,13 +87,15 @@ def speakSpeechFromText(self, phrase): googleSpeechURL = self.getGoogleSpeechURL(section) print "Downloading " + googleSpeechURL + " to " + "tts" + str(index).zfill(index) + ".mp3\n" self.downloadFile(googleSpeechURL,"tts" + str(index).zfill(index) + ".mp3") + fileName = "tts" + str(index).zfill(index) + os.system("avconv -y -i "+fileName+".mp3 "+ fileName+".wav" ) print index, section totalDuration = 0 for index, section in enumerate(phraseSections): - fileName = 'tts' + str(index).zfill(index) + '.mp3' - dubsegment = pydub.AudioSegment.from_mp3(fileName) + fileName = 'tts' + str(index).zfill(index) + '.wav' + dubsegment = pydub.AudioSegment.from_wav(fileName) totalDuration += dubsegment.__len__() dubsegment = None @@ -110,9 +114,9 @@ def speakSpeechFromText(self, phrase): for index, section in enumerate(phraseSections): if (not self.stop_request_received): - fileName = 'tts' + str(index).zfill(index) + '.mp3' + fileName = 'tts' + str(index).zfill(index) + fileName = fileName + '.wav' print 'Calling SoundPlayer with parameter ' + fileName - self.play(fileName) if not (self.soundfile is None): @@ -142,7 +146,14 @@ def hit(self, rms): p = self.rms_params jaw_coeff = min(max(math.sqrt(rms * p["scale"]), p["min"]), p["max"]) - self.pub_speech_strength.publish(jaw_coeff) + if jaw_coeff > 0: + self.pub_speech_strength.publish(jaw_coeff) + self.jaw_inactive = False + else: + if not self.jaw_inactive: + self.pub_speech_strength.publish(jaw_coeff) + self.jaw_inactive = True + # Copy pau expression message stored during handle_face_in(), # modify jaw and publish. @@ -158,6 +169,7 @@ def stop(self): def play(self, filename): self.stop() + self.soundfile = SoundFile(filename) self.soundfile.on_playmore = self.hit # Set callback self.soundfile.play()