11package org .myrobotlab .service ;
22
3- import org .myrobotlab .framework .Service ;
4- import org .myrobotlab .service .config .ServiceConfig ;
3+ import io .github .givimad .whisperjni .WhisperContext ;
4+ import io .github .givimad .whisperjni .WhisperFullParams ;
5+ import io .github .givimad .whisperjni .WhisperJNI ;
6+ import org .myrobotlab .framework .Platform ;
7+ import org .myrobotlab .service .abstracts .AbstractSpeechRecognizer ;
8+ import org .myrobotlab .service .config .LlamaConfig ;
9+ import org .myrobotlab .service .config .WhisperConfig ;
10+ import org .myrobotlab .service .data .Locale ;
11+
12+ import javax .sound .sampled .AudioFormat ;
13+ import javax .sound .sampled .AudioSystem ;
14+ import javax .sound .sampled .Line ;
15+ import javax .sound .sampled .LineUnavailableException ;
16+ import javax .sound .sampled .Mixer ;
17+ import javax .sound .sampled .TargetDataLine ;
18+ import java .io .File ;
19+ import java .io .FileOutputStream ;
20+ import java .io .IOException ;
21+ import java .net .URL ;
22+ import java .nio .ByteBuffer ;
23+ import java .nio .ByteOrder ;
24+ import java .nio .ShortBuffer ;
25+ import java .nio .channels .Channels ;
26+ import java .nio .channels .FileChannel ;
27+ import java .nio .channels .ReadableByteChannel ;
28+ import java .nio .file .Path ;
29+ import java .util .Map ;
30+
31+ public class Whisper extends AbstractSpeechRecognizer <WhisperConfig > {
32+ private transient WhisperJNI whisper ;
33+
34+ private transient WhisperContext ctx ;
35+
36+ private transient WhisperFullParams params ;
37+
38+ private transient Thread listeningThread = new Thread ();
39+
540
6- public class Whisper extends Service <ServiceConfig > {
741 /**
842 * Constructor of service, reservedkey typically is a services name and inId
943 * will be its process id
@@ -14,4 +48,144 @@ public class Whisper extends Service<ServiceConfig> {
1448 public Whisper (String reservedKey , String inId ) {
1549 super (reservedKey , inId );
1650 }
51+
52+ public void loadModel (String modelPath ) {
53+ try {
54+ whisper = new WhisperJNI ();
55+ WhisperJNI .loadLibrary ();
56+ ctx = whisper .init (Path .of (modelPath ));
57+ } catch (IOException e ) {
58+ throw new RuntimeException (e );
59+ }
60+
61+ params = new WhisperFullParams ();
62+ params .nThreads = Platform .getLocalInstance ().getNumPhysicalProcessors ();
63+ params .printRealtime = true ;
64+ params .printProgress = true ;
65+
66+ }
67+
68+ public String findModelPath (String modelName ) {
69+ // First, we loop over all user-defined
70+ // model directories
71+ for (String dir : config .modelPaths ) {
72+ File path = new File (dir + fs + modelName );
73+ if (path .exists ()) {
74+ return path .getAbsolutePath ();
75+ }
76+ }
77+
78+ // Now, we check our data directory for any downloaded models
79+ File path = new File (getDataDir () + fs + modelName );
80+ if (path .exists ()) {
81+ return path .getAbsolutePath ();
82+ } else if (config .modelUrls .containsKey (modelName )) {
83+ // Model was not in data but we do have a URL for it
84+ try (FileOutputStream fileOutputStream = new FileOutputStream (path )) {
85+ ReadableByteChannel readableByteChannel = Channels .newChannel (new URL (config .modelUrls .get (modelName )).openStream ());
86+ FileChannel fileChannel = fileOutputStream .getChannel ();
87+ info ("Downloading model %s to path %s from URL %s" , modelName , path , config .modelUrls .get (modelName ));
88+ fileChannel .transferFrom (readableByteChannel , 0 , Long .MAX_VALUE );
89+ } catch (IOException e ) {
90+ throw new RuntimeException (e );
91+ }
92+ return path .getAbsolutePath ();
93+ }
94+ // Cannot find the model anywhere
95+ error ("Could not locate model {}, add its URL to download it or add a directory where it is located" , modelName );
96+ return null ;
97+ }
98+
99+ @ Override
100+ public void startListening () {
101+
102+ listeningThread = new Thread (() -> {
103+ AudioFormat format = new AudioFormat (16000.0f , 16 , 1 , true , false );
104+ TargetDataLine microphone = null ;
105+
106+ Mixer .Info [] mixerInfos = AudioSystem .getMixerInfo ();
107+ for (Mixer .Info info : mixerInfos ){
108+ Mixer m = AudioSystem .getMixer (info );
109+ Line .Info [] lineInfos = m .getTargetLineInfo ();
110+ for (Line .Info lineInfo :lineInfos ){
111+ System .out .println (info .getName ()+"---" +lineInfo );
112+ // Hard-code for my mic right now
113+ if (info .getName ().contains ("U0x46d0x825" )) {
114+ try {
115+ microphone = (TargetDataLine ) m .getLine (lineInfo );
116+ microphone .open (format );
117+ System .out .println ("Sample rate: " + format .getSampleRate ());
118+ } catch (LineUnavailableException e ) {
119+ throw new RuntimeException (e );
120+ }
121+ }
122+
123+ }
124+
125+ }
126+
127+ int numBytesRead ;
128+
129+ microphone .start ();
130+ while (config .listening ) {
131+ int CHUNK_SIZE = (int )((format .getFrameSize () * format .getFrameRate ())) * 5 ;
132+ ByteBuffer captureBuffer = ByteBuffer .allocate (CHUNK_SIZE );
133+ captureBuffer .order (ByteOrder .LITTLE_ENDIAN );
134+ numBytesRead = microphone .read (captureBuffer .array (), 0 , CHUNK_SIZE );
135+ System .out .println ("Num bytes read=" + numBytesRead );
136+ ShortBuffer shortBuffer = captureBuffer .asShortBuffer ();
137+ // transform the samples to f32 samples
138+ float [] samples = new float [captureBuffer .capacity () / 2 ];
139+ int index = 0 ;
140+ shortBuffer .position (0 );
141+ while (shortBuffer .hasRemaining ()) {
142+ samples [index ++] = Float .max (-1f , Float .min (((float ) shortBuffer .get ()) / (float ) Short .MAX_VALUE , 1f ));
143+ }
144+ int result = whisper .full (ctx , params , samples , samples .length );
145+ if (result != 0 ) {
146+ throw new RuntimeException ("Transcription failed with code " + result );
147+ }
148+ int numSegments = whisper .fullNSegments (ctx );
149+ System .out .println ("Inference done, numSegments=" + numSegments );
150+ for (int i = 0 ; i < numSegments ; i ++) {
151+ System .out .println (whisper .fullGetSegmentText (ctx , i ));
152+ invoke ("publishRecognized" , whisper .fullGetSegmentText (ctx , i ));
153+ }
154+
155+ }
156+ microphone .close ();
157+ });
158+ super .startListening ();
159+
160+ listeningThread .start ();
161+ }
162+
163+ @ Override
164+ public WhisperConfig apply (WhisperConfig c ) {
165+ super .apply (c );
166+
167+ if (config .selectedModel != null && !config .selectedModel .isEmpty ()) {
168+ String modelPath = findModelPath (config .selectedModel );
169+ if (modelPath != null ) {
170+ loadModel (modelPath );
171+ } else {
172+ error ("Could not find selected model {}" , config .selectedModel );
173+ }
174+ }
175+
176+ return config ;
177+ }
178+
179+ /**
180+ * locales this service supports - implementation can simply get
181+ * runtime.getLocales() if acceptable or create their own locales
182+ *
183+ * @return map of string to locale
184+ */
185+ @ Override
186+ public Map <String , Locale > getLocales () {
187+ return null ;
188+ }
189+
190+
17191}
0 commit comments