update phone_en model and training script

Suwon Shon · Suwon Shon · commit fee7eaa1087f · 2017-10-25T10:09:27.000-04:00
diff --git a/fusion_results.ipynb b/fusion_results.ipynb
@@ -430,7 +430,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -488,7 +488,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -502,24 +502,7 @@
       "(?, 1500)\n",
       "(?, 600)\n",
       "71600\n",
-      "INFO:tensorflow:Restoring parameters from snnmodel_chars/model71600.ckpt\n",
-      "Final accurary on test dataset : 0.582\n",
-      "Confusion matrix\n",
-      "[[ 170.   32.   32.   11.   34.]\n",
-      " [  30.  108.   30.   24.   35.]\n",
-      " [  57.   44.  214.   24.   50.]\n",
-      " [  17.   32.   16.  175.   24.]\n",
-      " [  28.   34.   42.   28.  201.]]\n",
-      "Precision\n",
-      "[ 0.609319    0.47577093  0.55012853  0.66287879  0.6036036 ]\n",
-      "Recall\n",
-      "[ 0.56291391  0.432       0.64071856  0.66793893  0.58430233]\n",
-      "\n",
-      "\n",
-      "<Performance evaluation on Test dataset>\n",
-      "Accurary  : 0.582\n",
-      "Precision : 0.580\n",
-      "Recall    : 0.578\n"
+      "INFO:tensorflow:Restoring parameters from snnmodel_chars/model71600.ckpt\n"
      ]
     }
    ],
@@ -581,18 +564,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "(13825, 50320) (1524, 50320) (1492, 50320)\n",
-      "((13825, 50320), (1524, 50320), (5, 50320), (1492, 50320))\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import siamese_model_phone_hu as siamese_model\n",
     "\n",
@@ -640,41 +614,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "(?, 91520)\n",
-      "(?, 1500)\n",
-      "(?, 600)\n",
-      "(?, 91520)\n",
-      "(?, 1500)\n",
-      "(?, 600)\n",
-      "60400\n",
-      "INFO:tensorflow:Restoring parameters from snnmodel_phone_hu_backup/model60400.ckpt\n",
-      "Final accurary on test dataset : 0.548\n",
-      "Confusion matrix\n",
-      "[[ 170.   34.   73.   19.   50.]\n",
-      " [  25.  132.   62.   21.   27.]\n",
-      " [  46.   55.  137.   19.   51.]\n",
-      " [  17.   11.   15.  187.   25.]\n",
-      " [  44.   18.   47.   16.  191.]]\n",
-      "Precision\n",
-      "[ 0.49132948  0.49438202  0.44480519  0.73333333  0.60443038]\n",
-      "Recall\n",
-      "[ 0.56291391  0.528       0.41017964  0.71374046  0.55523256]\n",
-      "\n",
-      "\n",
-      "<Performance evaluation on Test dataset>\n",
-      "Accurary  : 0.548\n",
-      "Precision : 0.554\n",
-      "Recall    : 0.554\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# init variables\n",
     "sess = tf.InteractiveSession()\n",
@@ -733,29 +675,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\n",
-      "<Performance evaluation on Test dataset>\n",
-      "Accurary  : 0.755\n",
-      "Precision : 0.554\n",
-      "Recall    : 0.554\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Fusion 1 : ivector + chars\n",
     "\n",
     "tst_scores = tst_scores_ivectors + tst_scores_chars\n",
     "hypo_lang = np.argmax(tst_scores,axis = 0)\n",
     "temp = ((tst_labels-1) - hypo_lang)\n",
     "acc =1- np.size(np.nonzero(temp)) / float(np.size(tst_labels))\n",
+    "confusionmat = np.zeros((5,5))\n",
+    "for i,lang in enumerate(languages):\n",
+    "    hypo_bylang = hypo_lang[ tst_labels == i+1]\n",
+    "    hist_bylang = np.histogram(hypo_bylang,5)\n",
+    "    confusionmat[:,i] = hist_bylang[0]\n",
+    "\n",
+    "precision = np.diag(confusionmat) / np.sum(confusionmat,axis=1) #precision\n",
+    "recall = np.diag(confusionmat) / np.sum(confusionmat,axis=0) # recall\n",
     "\n",
     "print '\\n\\n<Performance evaluation on Test dataset>'\n",
     "print 'Accurary  : %0.3f' %(acc)\n",
@@ -765,29 +702,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\n",
-      "<Performance evaluation on Test dataset>\n",
-      "Accurary  : 0.751\n",
-      "Precision : 0.554\n",
-      "Recall    : 0.554\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Fusion 2 : ivector + words\n",
     "\n",
     "tst_scores = tst_scores_ivectors + tst_scores_words\n",
     "hypo_lang = np.argmax(tst_scores,axis = 0)\n",
     "temp = ((tst_labels-1) - hypo_lang)\n",
     "acc =1- np.size(np.nonzero(temp)) / float(np.size(tst_labels))\n",
+    "confusionmat = np.zeros((5,5))\n",
+    "for i,lang in enumerate(languages):\n",
+    "    hypo_bylang = hypo_lang[ tst_labels == i+1]\n",
+    "    hist_bylang = np.histogram(hypo_bylang,5)\n",
+    "    confusionmat[:,i] = hist_bylang[0]\n",
+    "\n",
+    "precision = np.diag(confusionmat) / np.sum(confusionmat,axis=1) #precision\n",
+    "recall = np.diag(confusionmat) / np.sum(confusionmat,axis=0) # recall\n",
     "\n",
     "print '\\n\\n<Performance evaluation on Test dataset>'\n",
     "print 'Accurary  : %0.3f' %(acc)\n",
@@ -797,29 +729,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\n",
-      "<Performance evaluation on Test dataset>\n",
-      "Accurary  : 0.716\n",
-      "Precision : 0.554\n",
-      "Recall    : 0.554\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Fusion 3 : ivector + phone_hu\n",
     "\n",
     "tst_scores = tst_scores_ivectors + tst_scores_phone_hu\n",
     "hypo_lang = np.argmax(tst_scores,axis = 0)\n",
     "temp = ((tst_labels-1) - hypo_lang)\n",
     "acc =1- np.size(np.nonzero(temp)) / float(np.size(tst_labels))\n",
+    "confusionmat = np.zeros((5,5))\n",
+    "for i,lang in enumerate(languages):\n",
+    "    hypo_bylang = hypo_lang[ tst_labels == i+1]\n",
+    "    hist_bylang = np.histogram(hypo_bylang,5)\n",
+    "    confusionmat[:,i] = hist_bylang[0]\n",
+    "\n",
+    "precision = np.diag(confusionmat) / np.sum(confusionmat,axis=1) #precision\n",
+    "recall = np.diag(confusionmat) / np.sum(confusionmat,axis=0) # recall\n",
     "\n",
     "print '\\n\\n<Performance evaluation on Test dataset>'\n",
     "print 'Accurary  : %0.3f' %(acc)\n",
@@ -829,30 +756,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\n",
-      "<Performance evaluation on Test dataset>\n",
-      "Accurary  : 0.752\n",
-      "Precision : 0.554\n",
-      "Recall    : 0.554\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Fusion 4 : All\n",
     "\n",
     "tst_scores = 1*tst_scores_ivectors + 1*tst_scores_words + 1*tst_scores_chars + 1*tst_scores_phone_hu\n",
     "hypo_lang = np.argmax(tst_scores,axis = 0)\n",
     "temp = ((tst_labels-1) - hypo_lang)\n",
     "acc =1- np.size(np.nonzero(temp)) / float(np.size(tst_labels))\n",
+    "confusionmat = np.zeros((5,5))\n",
+    "for i,lang in enumerate(languages):\n",
+    "    hypo_bylang = hypo_lang[ tst_labels == i+1]\n",
+    "    hist_bylang = np.histogram(hypo_bylang,5)\n",
+    "    confusionmat[:,i] = hist_bylang[0]\n",
     "\n",
+    "precision = np.diag(confusionmat) / np.sum(confusionmat,axis=1) #precision\n",
+    "recall = np.diag(confusionmat) / np.sum(confusionmat,axis=0) # recall\n",
     "print '\\n\\n<Performance evaluation on Test dataset>'\n",
     "print 'Accurary  : %0.3f' %(acc)\n",
     "print 'Precision : %0.3f' %(np.mean(precision))\n",
@@ -861,30 +782,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\n",
-      "<Performance evaluation on Test dataset>\n",
-      "Accurary  : 0.775\n",
-      "Precision : 0.554\n",
-      "Recall    : 0.554\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Fusion - : All + linear combination\n",
     "\n",
     "tst_scores = 2.5*tst_scores_ivectors + 1*tst_scores_words + 1*tst_scores_chars + 1*tst_scores_phone_hu\n",
     "hypo_lang = np.argmax(tst_scores,axis = 0)\n",
     "temp = ((tst_labels-1) - hypo_lang)\n",
     "acc =1- np.size(np.nonzero(temp)) / float(np.size(tst_labels))\n",
+    "confusionmat = np.zeros((5,5))\n",
+    "for i,lang in enumerate(languages):\n",
+    "    hypo_bylang = hypo_lang[ tst_labels == i+1]\n",
+    "    hist_bylang = np.histogram(hypo_bylang,5)\n",
+    "    confusionmat[:,i] = hist_bylang[0]\n",
     "\n",
+    "precision = np.diag(confusionmat) / np.sum(confusionmat,axis=1) #precision\n",
+    "recall = np.diag(confusionmat) / np.sum(confusionmat,axis=0) # recall\n",
     "print '\\n\\n<Performance evaluation on Test dataset>'\n",
     "print 'Accurary  : %0.3f' %(acc)\n",
     "print 'Precision : %0.3f' %(np.mean(precision))\n",
diff --git a/scripts/siamese_model_phone_en.py b/scripts/siamese_model_phone_en.py
@@ -0,0 +1,62 @@
+import tensorflow as tf 
+import numpy as np
+class siamese:
+
+    # Create model
+    def __init__(self,input_dim):
+        self.x1 = tf.placeholder(tf.float32, [None, input_dim])
+        self.x2 = tf.placeholder(tf.float32, [None, input_dim])
+
+        with tf.variable_scope("siamese") as scope:
+            self.a1,self.b1,self.o1 = self.network(self.x1)
+            scope.reuse_variables()
+            self.a1,self.b2,self.o2 = self.network(self.x2)
+            
+        # Create loss
+        self.y_ = tf.placeholder(tf.float32, [None])
+        self.loss = self.loss_with_cds()
+
+    def network(self, x):
+        weights = []
+        kernel_size =180
+        stride = 22
+        depth=40
+        conv1 = self.conv_layer(x, kernel_size,stride,depth,'conv1')
+        conv1r = tf.nn.relu(conv1)
+        n_prev_weight = int(x.get_shape()[1])
+        conv1_d = tf.reshape(conv1r,[-1, int(round(n_prev_weight/stride)*depth)])
+        
+        fc1 = self.fc_layer(conv1_d, 1500, "fc1")
+        ac1 = tf.nn.relu(fc1)
+        fc2 = self.fc_layer(ac1, 600, "fc2")   
+        ac2 = tf.nn.relu(fc2)
+        fc3 = self.fc_layer(ac2, 200, "fc3")
+        return fc1,fc2,fc3
+
+    def fc_layer(self, bottom, n_weight, name):
+        print( bottom.get_shape())
+        n_prev_weight = bottom.get_shape()[1]
+        W = tf.get_variable(name+'W', dtype=tf.float32, shape=[n_prev_weight, n_weight], initializer=tf.contrib.layers.xavier_initializer())
+        b = tf.get_variable(name+'b', dtype=tf.float32, initializer=tf.random_uniform([n_weight],-0.001,0.001, dtype=tf.float32))
+        fc = tf.nn.bias_add(tf.matmul(bottom, W), b)
+        return fc
+
+    def conv_layer(self, bottom, kernel_size, stride, depth, name):        
+        n_prev_weight = int(bottom.get_shape()[1])
+        num_channels = 1 # for 1 dimension
+        inputlayer = tf.reshape(bottom, [-1,n_prev_weight,1])
+        initer = tf.truncated_normal_initializer(stddev=0.1)
+        W = tf.get_variable(name+'W', dtype=tf.float32, shape=[kernel_size, num_channels, depth], initializer=tf.contrib.layers.xavier_initializer())
+        b = tf.get_variable(name+'b', dtype=tf.float32, initializer=tf.constant(0.001, shape=[depth*num_channels], dtype=tf.float32))
+        
+        conv = tf.nn.bias_add( tf.nn.conv1d(inputlayer, W, stride, padding='SAME'), b)
+        return conv
+
+    def loss_with_cds(self):
+        labels_t = self.y_
+        cds = tf.reduce_sum(tf.multiply(self.o1,self.o2),1)
+        eucd2 = tf.reduce_mean(tf.pow(tf.subtract(labels_t,cds),2))
+        eucd = tf.sqrt(eucd2, name="eucd")
+        return eucd
+    
+
diff --git a/train_phone_en.ipynb b/train_phone_en.ipynb