Skip to content

Commit 3f608b9

Browse files
committed
1 parent ea54fd9 commit 3f608b9

File tree

4 files changed

+69
-27
lines changed

4 files changed

+69
-27
lines changed

Diff for: README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,6 @@ Update: [Sphinx starts using tensorflow LSTMs](http://cmusphinx.sourceforge.net/
4848
Even though this project is far from finished we hope it gives you some starting points.
4949

5050
Looking for a tensorflow consultant / deep learning contractor? Reach out to [email protected]
51-
51+
<!--
5252
### Warning / Attention
53-
Google keeps [deliberately breaking the tensorflow API](https://github.com/tensorflow/tensorflow/issues/4283) so you always need the latest tensorflow release if you want current examples to run (and can't run old tensorflow stuff simultaneously.)
53+
Google keeps [deliberately breaking the tensorflow API](https://github.com/tensorflow/tensorflow/issues/4283) so you always need the latest tensorflow release if you want current examples to run (and can't run old tensorflow stuff simultaneously.) -->

Diff for: lstm_ctc_to_chars.py

+23-3
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
arrays of Mel-Frequency Cepstral Coefficients. This is test code to run on the
66
8-item data set in the "sample_data" directory, for those without access to TIMIT.
77
8-
Author: Jon Rein
8+
Original Author: Jon Rein
99
'''
1010

1111
from __future__ import absolute_import
@@ -17,6 +17,8 @@
1717
from tensorflow.python.ops import rnn_cell
1818
from tensorflow.python.ops.rnn import bidirectional_rnn
1919
import numpy as np
20+
import re
21+
2022
from bdlstm_utils import load_batched_data
2123

2224
INPUT_PATH = '/data/ctc/sample_data/mfcc' # directory of MFCC nFeatures x nFrames 2-D array .npy files
@@ -87,10 +89,25 @@
8789
reduced_sum = tf.reduce_sum(tf.edit_distance(predictions, targetY, normalize=False))
8890
errorRate = reduced_sum / tf.to_float(tf.size(targetY.values))
8991

92+
saver = tf.train.Saver() # defaults to saving all variables
93+
ckpt = tf.train.get_checkpoint_state('./checkpoints')
9094
####Run session
9195
with tf.Session(graph=graph) as session:
92-
print('Initializing')
93-
tf.initialize_all_variables().run()
96+
merged = tf.merge_all_summaries()
97+
writer = tf.train.SummaryWriter("/tmp/basic_new", session.graph)
98+
99+
start = 0
100+
if ckpt and ckpt.model_checkpoint_path:
101+
p = re.compile('\./checkpoints/model\.ckpt-([0-9]+)')
102+
m = p.match(ckpt.model_checkpoint_path)
103+
start = int(m.group(1))
104+
if start > 0:
105+
# Restore variables from disk.
106+
saver.restore(session, "./checkpoints/model.ckpt-%d" % start)
107+
print("Model %d restored." % start)
108+
else:
109+
print('Initializing')
110+
session.run(tf.initialize_all_variables())
94111
for epoch in range(nEpochs):
95112
print('Epoch', epoch + 1, '...')
96113
batchErrors = np.zeros(len(batchedData))
@@ -109,3 +126,6 @@
109126
batchErrors[batch] = er * len(batchSeqLengths)
110127
epochErrorRate = batchErrors.sum() / totalN
111128
print('Epoch', epoch + 1, 'error rate:', epochErrorRate)
129+
saver.save(session, 'checkpoints/model.ckpt', global_step=epoch + 1)
130+
print('Learning finished')
131+

Diff for: speech_data.py

+27-22
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,32 @@ class Target(Enum): # labels
5757
# test_word=9 # use 5 even for speaker etc
5858

5959

60+
num_characters = 32
61+
# num_characters=60 # only one case, Including numbers
62+
# num_characters=128 #
63+
# num_characters=256 # including special characters
64+
# offset=0 # 1:1 mapping ++
65+
# offset=32 # starting with ' ' space
66+
# offset=48 # starting with numbers
67+
offset = 64 # starting with characters
68+
max_word_length = 20
69+
terminal_symbol = 0
70+
71+
def pad(vec, pad_to=max_word_length, one_hot=False):
72+
for i in range(0, pad_to - len(vec)):
73+
if one_hot:
74+
vec.append([terminal_symbol] * num_characters)
75+
else:
76+
vec.append(terminal_symbol)
77+
return vec
78+
79+
80+
def string_to_int_word(word, pad_to):
81+
z = map(lambda x: (ord(x) - offset) % num_characters, word)
82+
z = list(z)
83+
z = pad(z)
84+
return z
85+
6086

6187
def progresshook(blocknum, blocksize, totalsize):
6288
readsofar = blocknum * blocksize
@@ -173,14 +199,14 @@ def mfcc_batch_generator(batch_size=10, source=Source.DIGIT_WAVES, target=Target
173199
for wav in files:
174200
if not wav.endswith(".wav"): continue
175201
wave, sr = librosa.load(path+wav, mono=True)
202+
mfcc = librosa.feature.mfcc(wave, sr)
176203
if target==Target.speaker: label=one_hot_from_item(speaker(wav), speakers)
177204
elif target==Target.digits: label=dense_to_one_hot(int(wav[0]),10)
178205
elif target==Target.first_letter: label=dense_to_one_hot((ord(wav[0]) - 48) % 32,32)
179206
elif target == Target.hotword: label = one_hot_word(wav, pad_to=20) # max_output_length
180207
elif target == Target.word: label = string_to_int_word(wav, pad_to=20) # max_output_length
181208
else: raise Exception("todo : labels for Target!")
182209
labels.append(label)
183-
mfcc = librosa.feature.mfcc(wave, sr)
184210
# print(np.array(mfcc).shape)
185211
mfcc=np.pad(mfcc,((0,0),(0,80-len(mfcc[0]))), mode='constant', constant_values=0)
186212
batch_features.append(np.array(mfcc))
@@ -322,27 +348,6 @@ def one_hot_from_item(item, items):
322348
x[i]=1
323349
return x
324350

325-
num_characters=32
326-
# num_characters=60 # only one case, Including numbers
327-
# num_characters=128 #
328-
# num_characters=256 # including special characters
329-
# offset=0 # 1:1 mapping ++
330-
# offset=32 # starting with ' ' space
331-
# offset=48 # starting with numbers
332-
offset=64 # starting with characters
333-
max_word_length=20
334-
335-
def pad(vec,pad_to=max_word_length):
336-
for i in range(0, pad_to - len(vec)):
337-
vec.append([-1] * num_characters) # Terminal 'symbol'
338-
return vec
339-
340-
def string_to_int_word(word, pad_to):
341-
z = map(lambda x: (ord(x) - offset) % num_characters, word)
342-
z= list(z)
343-
z=pad(z)
344-
return z
345-
346351

347352
def one_hot_word(word,pad_to=max_word_length):
348353
vec=[]

Diff for: word_to_phonemes.swift

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#!/usr/bin/env swift
2+
import AppKit
3+
4+
var tts=NSSpeechSynthesizer.init(voice:"com.apple.speech.synthesis.voice.Vicki")!
5+
var text=""
6+
var max=CommandLine.arguments.count-1;
7+
for s in CommandLine.arguments[1 ... max] {
8+
text+=s+" "
9+
}
10+
extension NSString {
11+
func split(pattern: String) -> [String] {return self.components(separatedBy:pattern); }
12+
var strip:String { return self.trimmingCharacters(in:NSCharacterSet.whitespacesAndNewlines)}
13+
func replace(pattern: String,with:String)->String{return self.replacingOccurrences(of:pattern,with:with);}
14+
}
15+
16+
var phon:String=tts.phonemes(from:text)
17+
print(phon)

0 commit comments

Comments
 (0)