Skip to content

Commit fee7eaa

Browse files
author
Suwon Shon
committed
update phone_en model and training script
1 parent b9351aa commit fee7eaa

File tree

3 files changed

+743
-140
lines changed

3 files changed

+743
-140
lines changed

fusion_results.ipynb

+55-140
Original file line numberDiff line numberDiff line change
@@ -430,7 +430,7 @@
430430
},
431431
{
432432
"cell_type": "code",
433-
"execution_count": 7,
433+
"execution_count": null,
434434
"metadata": {},
435435
"outputs": [
436436
{
@@ -488,7 +488,7 @@
488488
},
489489
{
490490
"cell_type": "code",
491-
"execution_count": 8,
491+
"execution_count": null,
492492
"metadata": {},
493493
"outputs": [
494494
{
@@ -502,24 +502,7 @@
502502
"(?, 1500)\n",
503503
"(?, 600)\n",
504504
"71600\n",
505-
"INFO:tensorflow:Restoring parameters from snnmodel_chars/model71600.ckpt\n",
506-
"Final accurary on test dataset : 0.582\n",
507-
"Confusion matrix\n",
508-
"[[ 170. 32. 32. 11. 34.]\n",
509-
" [ 30. 108. 30. 24. 35.]\n",
510-
" [ 57. 44. 214. 24. 50.]\n",
511-
" [ 17. 32. 16. 175. 24.]\n",
512-
" [ 28. 34. 42. 28. 201.]]\n",
513-
"Precision\n",
514-
"[ 0.609319 0.47577093 0.55012853 0.66287879 0.6036036 ]\n",
515-
"Recall\n",
516-
"[ 0.56291391 0.432 0.64071856 0.66793893 0.58430233]\n",
517-
"\n",
518-
"\n",
519-
"<Performance evaluation on Test dataset>\n",
520-
"Accurary : 0.582\n",
521-
"Precision : 0.580\n",
522-
"Recall : 0.578\n"
505+
"INFO:tensorflow:Restoring parameters from snnmodel_chars/model71600.ckpt\n"
523506
]
524507
}
525508
],
@@ -581,18 +564,9 @@
581564
},
582565
{
583566
"cell_type": "code",
584-
"execution_count": 9,
567+
"execution_count": null,
585568
"metadata": {},
586-
"outputs": [
587-
{
588-
"name": "stdout",
589-
"output_type": "stream",
590-
"text": [
591-
"(13825, 50320) (1524, 50320) (1492, 50320)\n",
592-
"((13825, 50320), (1524, 50320), (5, 50320), (1492, 50320))\n"
593-
]
594-
}
595-
],
569+
"outputs": [],
596570
"source": [
597571
"import siamese_model_phone_hu as siamese_model\n",
598572
"\n",
@@ -640,41 +614,9 @@
640614
},
641615
{
642616
"cell_type": "code",
643-
"execution_count": 10,
617+
"execution_count": null,
644618
"metadata": {},
645-
"outputs": [
646-
{
647-
"name": "stdout",
648-
"output_type": "stream",
649-
"text": [
650-
"(?, 91520)\n",
651-
"(?, 1500)\n",
652-
"(?, 600)\n",
653-
"(?, 91520)\n",
654-
"(?, 1500)\n",
655-
"(?, 600)\n",
656-
"60400\n",
657-
"INFO:tensorflow:Restoring parameters from snnmodel_phone_hu_backup/model60400.ckpt\n",
658-
"Final accurary on test dataset : 0.548\n",
659-
"Confusion matrix\n",
660-
"[[ 170. 34. 73. 19. 50.]\n",
661-
" [ 25. 132. 62. 21. 27.]\n",
662-
" [ 46. 55. 137. 19. 51.]\n",
663-
" [ 17. 11. 15. 187. 25.]\n",
664-
" [ 44. 18. 47. 16. 191.]]\n",
665-
"Precision\n",
666-
"[ 0.49132948 0.49438202 0.44480519 0.73333333 0.60443038]\n",
667-
"Recall\n",
668-
"[ 0.56291391 0.528 0.41017964 0.71374046 0.55523256]\n",
669-
"\n",
670-
"\n",
671-
"<Performance evaluation on Test dataset>\n",
672-
"Accurary : 0.548\n",
673-
"Precision : 0.554\n",
674-
"Recall : 0.554\n"
675-
]
676-
}
677-
],
619+
"outputs": [],
678620
"source": [
679621
"# init variables\n",
680622
"sess = tf.InteractiveSession()\n",
@@ -733,29 +675,24 @@
733675
},
734676
{
735677
"cell_type": "code",
736-
"execution_count": 11,
678+
"execution_count": null,
737679
"metadata": {},
738-
"outputs": [
739-
{
740-
"name": "stdout",
741-
"output_type": "stream",
742-
"text": [
743-
"\n",
744-
"\n",
745-
"<Performance evaluation on Test dataset>\n",
746-
"Accurary : 0.755\n",
747-
"Precision : 0.554\n",
748-
"Recall : 0.554\n"
749-
]
750-
}
751-
],
680+
"outputs": [],
752681
"source": [
753682
"# Fusion 1 : ivector + chars\n",
754683
"\n",
755684
"tst_scores = tst_scores_ivectors + tst_scores_chars\n",
756685
"hypo_lang = np.argmax(tst_scores,axis = 0)\n",
757686
"temp = ((tst_labels-1) - hypo_lang)\n",
758687
"acc =1- np.size(np.nonzero(temp)) / float(np.size(tst_labels))\n",
688+
"confusionmat = np.zeros((5,5))\n",
689+
"for i,lang in enumerate(languages):\n",
690+
" hypo_bylang = hypo_lang[ tst_labels == i+1]\n",
691+
" hist_bylang = np.histogram(hypo_bylang,5)\n",
692+
" confusionmat[:,i] = hist_bylang[0]\n",
693+
"\n",
694+
"precision = np.diag(confusionmat) / np.sum(confusionmat,axis=1) #precision\n",
695+
"recall = np.diag(confusionmat) / np.sum(confusionmat,axis=0) # recall\n",
759696
"\n",
760697
"print '\\n\\n<Performance evaluation on Test dataset>'\n",
761698
"print 'Accurary : %0.3f' %(acc)\n",
@@ -765,29 +702,24 @@
765702
},
766703
{
767704
"cell_type": "code",
768-
"execution_count": 12,
705+
"execution_count": null,
769706
"metadata": {},
770-
"outputs": [
771-
{
772-
"name": "stdout",
773-
"output_type": "stream",
774-
"text": [
775-
"\n",
776-
"\n",
777-
"<Performance evaluation on Test dataset>\n",
778-
"Accurary : 0.751\n",
779-
"Precision : 0.554\n",
780-
"Recall : 0.554\n"
781-
]
782-
}
783-
],
707+
"outputs": [],
784708
"source": [
785709
"# Fusion 2 : ivector + words\n",
786710
"\n",
787711
"tst_scores = tst_scores_ivectors + tst_scores_words\n",
788712
"hypo_lang = np.argmax(tst_scores,axis = 0)\n",
789713
"temp = ((tst_labels-1) - hypo_lang)\n",
790714
"acc =1- np.size(np.nonzero(temp)) / float(np.size(tst_labels))\n",
715+
"confusionmat = np.zeros((5,5))\n",
716+
"for i,lang in enumerate(languages):\n",
717+
" hypo_bylang = hypo_lang[ tst_labels == i+1]\n",
718+
" hist_bylang = np.histogram(hypo_bylang,5)\n",
719+
" confusionmat[:,i] = hist_bylang[0]\n",
720+
"\n",
721+
"precision = np.diag(confusionmat) / np.sum(confusionmat,axis=1) #precision\n",
722+
"recall = np.diag(confusionmat) / np.sum(confusionmat,axis=0) # recall\n",
791723
"\n",
792724
"print '\\n\\n<Performance evaluation on Test dataset>'\n",
793725
"print 'Accurary : %0.3f' %(acc)\n",
@@ -797,29 +729,24 @@
797729
},
798730
{
799731
"cell_type": "code",
800-
"execution_count": 13,
732+
"execution_count": null,
801733
"metadata": {},
802-
"outputs": [
803-
{
804-
"name": "stdout",
805-
"output_type": "stream",
806-
"text": [
807-
"\n",
808-
"\n",
809-
"<Performance evaluation on Test dataset>\n",
810-
"Accurary : 0.716\n",
811-
"Precision : 0.554\n",
812-
"Recall : 0.554\n"
813-
]
814-
}
815-
],
734+
"outputs": [],
816735
"source": [
817736
"# Fusion 3 : ivector + phone_hu\n",
818737
"\n",
819738
"tst_scores = tst_scores_ivectors + tst_scores_phone_hu\n",
820739
"hypo_lang = np.argmax(tst_scores,axis = 0)\n",
821740
"temp = ((tst_labels-1) - hypo_lang)\n",
822741
"acc =1- np.size(np.nonzero(temp)) / float(np.size(tst_labels))\n",
742+
"confusionmat = np.zeros((5,5))\n",
743+
"for i,lang in enumerate(languages):\n",
744+
" hypo_bylang = hypo_lang[ tst_labels == i+1]\n",
745+
" hist_bylang = np.histogram(hypo_bylang,5)\n",
746+
" confusionmat[:,i] = hist_bylang[0]\n",
747+
"\n",
748+
"precision = np.diag(confusionmat) / np.sum(confusionmat,axis=1) #precision\n",
749+
"recall = np.diag(confusionmat) / np.sum(confusionmat,axis=0) # recall\n",
823750
"\n",
824751
"print '\\n\\n<Performance evaluation on Test dataset>'\n",
825752
"print 'Accurary : %0.3f' %(acc)\n",
@@ -829,30 +756,24 @@
829756
},
830757
{
831758
"cell_type": "code",
832-
"execution_count": 14,
759+
"execution_count": null,
833760
"metadata": {},
834-
"outputs": [
835-
{
836-
"name": "stdout",
837-
"output_type": "stream",
838-
"text": [
839-
"\n",
840-
"\n",
841-
"<Performance evaluation on Test dataset>\n",
842-
"Accurary : 0.752\n",
843-
"Precision : 0.554\n",
844-
"Recall : 0.554\n"
845-
]
846-
}
847-
],
761+
"outputs": [],
848762
"source": [
849763
"# Fusion 4 : All\n",
850764
"\n",
851765
"tst_scores = 1*tst_scores_ivectors + 1*tst_scores_words + 1*tst_scores_chars + 1*tst_scores_phone_hu\n",
852766
"hypo_lang = np.argmax(tst_scores,axis = 0)\n",
853767
"temp = ((tst_labels-1) - hypo_lang)\n",
854768
"acc =1- np.size(np.nonzero(temp)) / float(np.size(tst_labels))\n",
769+
"confusionmat = np.zeros((5,5))\n",
770+
"for i,lang in enumerate(languages):\n",
771+
" hypo_bylang = hypo_lang[ tst_labels == i+1]\n",
772+
" hist_bylang = np.histogram(hypo_bylang,5)\n",
773+
" confusionmat[:,i] = hist_bylang[0]\n",
855774
"\n",
775+
"precision = np.diag(confusionmat) / np.sum(confusionmat,axis=1) #precision\n",
776+
"recall = np.diag(confusionmat) / np.sum(confusionmat,axis=0) # recall\n",
856777
"print '\\n\\n<Performance evaluation on Test dataset>'\n",
857778
"print 'Accurary : %0.3f' %(acc)\n",
858779
"print 'Precision : %0.3f' %(np.mean(precision))\n",
@@ -861,30 +782,24 @@
861782
},
862783
{
863784
"cell_type": "code",
864-
"execution_count": 15,
785+
"execution_count": null,
865786
"metadata": {},
866-
"outputs": [
867-
{
868-
"name": "stdout",
869-
"output_type": "stream",
870-
"text": [
871-
"\n",
872-
"\n",
873-
"<Performance evaluation on Test dataset>\n",
874-
"Accurary : 0.775\n",
875-
"Precision : 0.554\n",
876-
"Recall : 0.554\n"
877-
]
878-
}
879-
],
787+
"outputs": [],
880788
"source": [
881789
"# Fusion - : All + linear combination\n",
882790
"\n",
883791
"tst_scores = 2.5*tst_scores_ivectors + 1*tst_scores_words + 1*tst_scores_chars + 1*tst_scores_phone_hu\n",
884792
"hypo_lang = np.argmax(tst_scores,axis = 0)\n",
885793
"temp = ((tst_labels-1) - hypo_lang)\n",
886794
"acc =1- np.size(np.nonzero(temp)) / float(np.size(tst_labels))\n",
795+
"confusionmat = np.zeros((5,5))\n",
796+
"for i,lang in enumerate(languages):\n",
797+
" hypo_bylang = hypo_lang[ tst_labels == i+1]\n",
798+
" hist_bylang = np.histogram(hypo_bylang,5)\n",
799+
" confusionmat[:,i] = hist_bylang[0]\n",
887800
"\n",
801+
"precision = np.diag(confusionmat) / np.sum(confusionmat,axis=1) #precision\n",
802+
"recall = np.diag(confusionmat) / np.sum(confusionmat,axis=0) # recall\n",
888803
"print '\\n\\n<Performance evaluation on Test dataset>'\n",
889804
"print 'Accurary : %0.3f' %(acc)\n",
890805
"print 'Precision : %0.3f' %(np.mean(precision))\n",

scripts/siamese_model_phone_en.py

+62
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
import tensorflow as tf
2+
import numpy as np
3+
class siamese:
4+
5+
# Create model
6+
def __init__(self,input_dim):
7+
self.x1 = tf.placeholder(tf.float32, [None, input_dim])
8+
self.x2 = tf.placeholder(tf.float32, [None, input_dim])
9+
10+
with tf.variable_scope("siamese") as scope:
11+
self.a1,self.b1,self.o1 = self.network(self.x1)
12+
scope.reuse_variables()
13+
self.a1,self.b2,self.o2 = self.network(self.x2)
14+
15+
# Create loss
16+
self.y_ = tf.placeholder(tf.float32, [None])
17+
self.loss = self.loss_with_cds()
18+
19+
def network(self, x):
20+
weights = []
21+
kernel_size =180
22+
stride = 22
23+
depth=40
24+
conv1 = self.conv_layer(x, kernel_size,stride,depth,'conv1')
25+
conv1r = tf.nn.relu(conv1)
26+
n_prev_weight = int(x.get_shape()[1])
27+
conv1_d = tf.reshape(conv1r,[-1, int(round(n_prev_weight/stride)*depth)])
28+
29+
fc1 = self.fc_layer(conv1_d, 1500, "fc1")
30+
ac1 = tf.nn.relu(fc1)
31+
fc2 = self.fc_layer(ac1, 600, "fc2")
32+
ac2 = tf.nn.relu(fc2)
33+
fc3 = self.fc_layer(ac2, 200, "fc3")
34+
return fc1,fc2,fc3
35+
36+
def fc_layer(self, bottom, n_weight, name):
37+
print( bottom.get_shape())
38+
n_prev_weight = bottom.get_shape()[1]
39+
W = tf.get_variable(name+'W', dtype=tf.float32, shape=[n_prev_weight, n_weight], initializer=tf.contrib.layers.xavier_initializer())
40+
b = tf.get_variable(name+'b', dtype=tf.float32, initializer=tf.random_uniform([n_weight],-0.001,0.001, dtype=tf.float32))
41+
fc = tf.nn.bias_add(tf.matmul(bottom, W), b)
42+
return fc
43+
44+
def conv_layer(self, bottom, kernel_size, stride, depth, name):
45+
n_prev_weight = int(bottom.get_shape()[1])
46+
num_channels = 1 # for 1 dimension
47+
inputlayer = tf.reshape(bottom, [-1,n_prev_weight,1])
48+
initer = tf.truncated_normal_initializer(stddev=0.1)
49+
W = tf.get_variable(name+'W', dtype=tf.float32, shape=[kernel_size, num_channels, depth], initializer=tf.contrib.layers.xavier_initializer())
50+
b = tf.get_variable(name+'b', dtype=tf.float32, initializer=tf.constant(0.001, shape=[depth*num_channels], dtype=tf.float32))
51+
52+
conv = tf.nn.bias_add( tf.nn.conv1d(inputlayer, W, stride, padding='SAME'), b)
53+
return conv
54+
55+
def loss_with_cds(self):
56+
labels_t = self.y_
57+
cds = tf.reduce_sum(tf.multiply(self.o1,self.o2),1)
58+
eucd2 = tf.reduce_mean(tf.pow(tf.subtract(labels_t,cds),2))
59+
eucd = tf.sqrt(eucd2, name="eucd")
60+
return eucd
61+
62+

0 commit comments

Comments
 (0)