pannous · pannous · commit 3f608b931bcc · 2016-12-09T23:27:19.000+01:00
diff --git a/README.md b/README.md
@@ -48,6 +48,6 @@ Update: [Sphinx starts using tensorflow LSTMs](http://cmusphinx.sourceforge.net/
 Even though this project is far from finished we hope it gives you some starting points.
 
 Looking for a tensorflow consultant / deep learning contractor? Reach out to info@pannous.com
-
+<!-- 
 ### Warning / Attention
-Google keeps [deliberately breaking the tensorflow API](https://github.com/tensorflow/tensorflow/issues/4283) so you always need the latest tensorflow release if you want current examples to run (and can't run old tensorflow stuff simultaneously.)
+Google keeps [deliberately breaking the tensorflow API](https://github.com/tensorflow/tensorflow/issues/4283) so you always need the latest tensorflow release if you want current examples to run (and can't run old tensorflow stuff simultaneously.) -->
diff --git a/lstm_ctc_to_chars.py b/lstm_ctc_to_chars.py
@@ -5,7 +5,7 @@
 arrays of Mel-Frequency Cepstral Coefficients.  This is test code to run on the
 8-item data set in the "sample_data" directory, for those without access to TIMIT.
 
-Author: Jon Rein
+Original Author: Jon Rein
 '''
 
 from __future__ import absolute_import
@@ -17,6 +17,8 @@
 from tensorflow.python.ops import rnn_cell
 from tensorflow.python.ops.rnn import bidirectional_rnn
 import numpy as np
+import re
+
 from bdlstm_utils import load_batched_data
 
 INPUT_PATH = '/data/ctc/sample_data/mfcc'  # directory of MFCC nFeatures x nFrames 2-D array .npy files
@@ -87,10 +89,25 @@
 	reduced_sum = tf.reduce_sum(tf.edit_distance(predictions, targetY, normalize=False))
 	errorRate = reduced_sum / tf.to_float(tf.size(targetY.values))
 
+saver = tf.train.Saver()  # defaults to saving all variables
+ckpt = tf.train.get_checkpoint_state('./checkpoints')
 ####Run session
 with tf.Session(graph=graph) as session:
-	print('Initializing')
-	tf.initialize_all_variables().run()
+	merged = tf.merge_all_summaries()
+	writer = tf.train.SummaryWriter("/tmp/basic_new", session.graph)
+
+	start = 0
+	if ckpt and ckpt.model_checkpoint_path:
+		p = re.compile('\./checkpoints/model\.ckpt-([0-9]+)')
+		m = p.match(ckpt.model_checkpoint_path)
+		start = int(m.group(1))
+	if start > 0:
+		# Restore variables from disk.
+		saver.restore(session, "./checkpoints/model.ckpt-%d" % start)
+		print("Model %d restored." % start)
+	else:
+		print('Initializing')
+		session.run(tf.initialize_all_variables())
 	for epoch in range(nEpochs):
 		print('Epoch', epoch + 1, '...')
 		batchErrors = np.zeros(len(batchedData))
@@ -109,3 +126,6 @@
 			batchErrors[batch] = er * len(batchSeqLengths)
 		epochErrorRate = batchErrors.sum() / totalN
 		print('Epoch', epoch + 1, 'error rate:', epochErrorRate)
+		saver.save(session, 'checkpoints/model.ckpt', global_step=epoch + 1)
+	print('Learning finished')
+
diff --git a/speech_data.py b/speech_data.py
@@ -57,6 +57,32 @@ class Target(Enum):  # labels
 	# test_word=9 # use 5 even for speaker etc
 
 
+num_characters = 32
+# num_characters=60 #  only one case, Including numbers
+# num_characters=128 #
+# num_characters=256 #  including special characters
+# offset=0  # 1:1 mapping ++
+# offset=32 # starting with ' ' space
+# offset=48 # starting with  numbers
+offset = 64  # starting with characters
+max_word_length = 20
+terminal_symbol = 0
+
+def pad(vec, pad_to=max_word_length, one_hot=False):
+	for i in range(0, pad_to - len(vec)):
+		if one_hot:
+			vec.append([terminal_symbol] * num_characters)
+		else:
+			vec.append(terminal_symbol)
+	return vec
+
+
+def string_to_int_word(word, pad_to):
+	z = map(lambda x: (ord(x) - offset) % num_characters, word)
+	z = list(z)
+	z = pad(z)
+	return z
+
 
 def progresshook(blocknum, blocksize, totalsize):
 		readsofar = blocknum * blocksize
@@ -173,14 +199,14 @@ def mfcc_batch_generator(batch_size=10, source=Source.DIGIT_WAVES, target=Target
 		for wav in files:
 			if not wav.endswith(".wav"): continue
 			wave, sr = librosa.load(path+wav, mono=True)
+			mfcc = librosa.feature.mfcc(wave, sr)
 			if target==Target.speaker: label=one_hot_from_item(speaker(wav), speakers)
 			elif target==Target.digits:  label=dense_to_one_hot(int(wav[0]),10)
 			elif target==Target.first_letter:  label=dense_to_one_hot((ord(wav[0]) - 48) % 32,32)
 			elif target == Target.hotword: label = one_hot_word(wav, pad_to=20)  # max_output_length
 			elif target == Target.word:  label = string_to_int_word(wav, pad_to=20)  # max_output_length
 			else: raise Exception("todo : labels for Target!")
 			labels.append(label)
-			mfcc = librosa.feature.mfcc(wave, sr)
 			# print(np.array(mfcc).shape)
 			mfcc=np.pad(mfcc,((0,0),(0,80-len(mfcc[0]))), mode='constant', constant_values=0)
 			batch_features.append(np.array(mfcc))
@@ -322,27 +348,6 @@ def one_hot_from_item(item, items):
 	x[i]=1
 	return x
 
-num_characters=32
-# num_characters=60 #  only one case, Including numbers
-# num_characters=128 #
-# num_characters=256 #  including special characters
-# offset=0  # 1:1 mapping ++
-# offset=32 # starting with ' ' space
-# offset=48 # starting with  numbers
-offset=64 # starting with characters
-max_word_length=20
-
-def pad(vec,pad_to=max_word_length):
-	for i in range(0, pad_to - len(vec)):
-		vec.append([-1] * num_characters)  # Terminal 'symbol'
-	return vec
-
-def string_to_int_word(word, pad_to):
-	z = map(lambda x: (ord(x) - offset) % num_characters, word)
-	z= list(z)
-	z=pad(z)
-	return z
-
 
 def one_hot_word(word,pad_to=max_word_length):
 	vec=[]
diff --git a/word_to_phonemes.swift b/word_to_phonemes.swift
@@ -0,0 +1,17 @@
+#!/usr/bin/env swift
+import AppKit
+
+var tts=NSSpeechSynthesizer.init(voice:"com.apple.speech.synthesis.voice.Vicki")!
+var text=""
+var max=CommandLine.arguments.count-1;
+for s in CommandLine.arguments[1 ... max] {
+  text+=s+" "
+}
+extension NSString {
+    func split(pattern: String) -> [String] {return self.components(separatedBy:pattern);    }
+    var strip:String { return self.trimmingCharacters(in:NSCharacterSet.whitespacesAndNewlines)}
+    func replace(pattern: String,with:String)->String{return self.replacingOccurrences(of:pattern,with:with);}
+}
+
+var phon:String=tts.phonemes(from:text)
+print(phon)