update baseline

Suwon Shon · Suwon Shon · commit b9351aa6cf5e · 2017-10-24T17:01:10.000-04:00
diff --git a/train_ivector_lda.ipynb b/train_ivector_lda.ipynb
@@ -0,0 +1,291 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from tensorflow.contrib.learn.python.learn.datasets import base\n",
+    "\n",
+    "import tensorflow as tf\n",
+    "import numpy as np\n",
+    "import os,sys\n",
+    "sys.path.insert(0, './scripts')\n",
+    "dataDir ='./data'\n",
+    "\n",
+    "\n",
+    "import py_compile\n",
+    "py_compile.compile('scripts/ivector_dataset.py')\n",
+    "py_compile.compile('scripts/ivector_tools.py')\n",
+    "py_compile.compile('scripts/siamese_model.py')\n",
+    "import ivector_dataset\n",
+    "import siamese_model\n",
+    "import ivector_tools as it"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# write prototxt for siamese network\n",
+    "\n",
+    "languages = ['EGY','GLF','LAV','MSA','NOR']\n",
+    "trn_labels = []\n",
+    "trn_names = []\n",
+    "trn_ivectors = np.empty((0,400))\n",
+    "dev_labels = []\n",
+    "dev_names = []\n",
+    "dev_ivectors = np.empty((0,400))\n",
+    "\n",
+    "\n",
+    "for i,lang in enumerate(languages):\n",
+    "    #load train.vardial2017\n",
+    "    filename = dataDir+'/train.vardial2017/%s.ivec' % lang\n",
+    "    name   = np.loadtxt(filename,usecols=[0],dtype='string')\n",
+    "    ivector = np.loadtxt(filename,usecols=range(1,401),dtype='float32')\n",
+    "    trn_labels = np.append(trn_labels, np.ones(np.size(name))*(i+1))\n",
+    "    trn_names=np.append(trn_names,name)\n",
+    "    trn_ivectors = np.append(trn_ivectors, ivector,axis=0)\n",
+    "\n",
+    "    #load dev.vardial2017\n",
+    "    filename = dataDir+'/dev.vardial2017/%s.ivec' % lang\n",
+    "    name   = np.loadtxt(filename,usecols=[0],dtype='string')\n",
+    "    ivector = np.loadtxt(filename,usecols=range(1,401),dtype='float32')\n",
+    "    dev_names=np.append(dev_names,name)\n",
+    "    dev_ivectors = np.append(dev_ivectors, ivector,axis=0)\n",
+    "    dev_labels = np.append(dev_labels, np.ones(np.size(name))*(i+1))\n",
+    "    \n",
+    "# load test.MGB3\n",
+    "filename = dataDir+'/test.MGB3/ivec_features'\n",
+    "tst_names   = np.loadtxt(filename,usecols=[0],dtype='string')\n",
+    "tst_ivectors = np.loadtxt(filename,usecols=range(1,401),dtype='float32')\n",
+    "\n",
+    "# merge trn+dev\n",
+    "trndev_ivectors = np.append(trn_ivectors, dev_ivectors,axis=0)\n",
+    "trndev_labels = np.append(trn_labels,dev_labels)\n",
+    "trndev_name = np.append(trn_names,dev_names)\n",
+    "\n",
+    "\n",
+    "# load tst.MGB3 labels\n",
+    "filename = 'data/test.MGB3/reference'\n",
+    "tst_ref_names = np.loadtxt(filename,usecols=[0],dtype='string')\n",
+    "tst_ref_labels = np.loadtxt(filename,usecols=[1],dtype='int')\n",
+    "\n",
+    "tst_ref_labels_index = []\n",
+    "for i, name_ref in enumerate(tst_ref_names):\n",
+    "    for j, name in enumerate(tst_names):\n",
+    "        if name == name_ref:\n",
+    "            tst_ref_labels_index = np.append(tst_ref_labels_index,int(j))\n",
+    "\n",
+    "tst_labels = tst_ref_labels\n",
+    "tst_ivectors = tst_ivectors[ map(int,tst_ref_labels_index),:]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "((13825, 400), (1524, 400), (5, 400), (1492, 400))\n",
+      "Final accurary on test dataset : 0.603\n",
+      "Confusion matrix\n",
+      "[[ 192.   14.   40.   10.   46.]\n",
+      " [  15.  118.   34.    8.   20.]\n",
+      " [  65.   83.  221.   16.  102.]\n",
+      " [  23.   28.   24.  225.   32.]\n",
+      " [   7.    7.   15.    3.  144.]]\n",
+      "Precision\n",
+      "[ 0.63576159  0.60512821  0.45379877  0.67771084  0.81818182]\n",
+      "Recall\n",
+      "[ 0.63576159  0.472       0.66167665  0.85877863  0.41860465]\n",
+      "\n",
+      "\n",
+      "<Performance evaluation on Test dataset : CDS (baseline) >\n",
+      "Accurary  : 0.603\n",
+      "Precision : 0.638\n",
+      "Recall    : 0.609\n"
+     ]
+    }
+   ],
+   "source": [
+    "#center and length norm.\n",
+    "m=np.mean(trn_ivectors,axis=0)\n",
+    "A = np.cov(trn_ivectors.transpose())\n",
+    "[a,D,V] = np.linalg.svd(A)\n",
+    "V= V.transpose()\n",
+    "W= np.dot(V, np.diag(1./( np.sqrt(D) + 0.0000000001 )))\n",
+    "\n",
+    "trn_ivectors = np.dot( np.subtract( trn_ivectors, m), W)\n",
+    "trndev_ivectors = np.dot( np.subtract( trndev_ivectors, m), W)\n",
+    "dev_ivectors = np.dot( np.subtract( dev_ivectors, m), W)\n",
+    "tst_ivectors = np.dot( np.subtract( tst_ivectors, m), W)\n",
+    "\n",
+    "trn_ivectors = it.length_norm(trn_ivectors)\n",
+    "trndev_ivectors = it.length_norm(trndev_ivectors)\n",
+    "dev_ivectors = it.length_norm(dev_ivectors)\n",
+    "tst_ivectors = it.length_norm(tst_ivectors)\n",
+    "\n",
+    "#language modeling\n",
+    "lang_mean=[]\n",
+    "for i, lang in enumerate(languages):\n",
+    "    lang_mean.append(np.mean(np.append(trn_ivectors[np.nonzero(trn_labels == i+1)] ,dev_ivectors[np.nonzero(dev_labels == i+1)],axis=0),axis=0))\n",
+    "#    lang_mean.append(np.mean(trn_ivectors[np.nonzero(trn_labels == i+1)],axis=0))\n",
+    "\n",
+    "lang_mean = np.array(lang_mean)\n",
+    "lang_mean = it.length_norm(lang_mean)\n",
+    "\n",
+    "print( np.shape(trn_ivectors), np.shape(dev_ivectors), np.shape(lang_mean),np.shape(tst_ivectors) )\n",
+    "\n",
+    "\n",
+    "tst_scores = lang_mean.dot(tst_ivectors.transpose() )\n",
+    "# print(tst_scores.shape)\n",
+    "hypo_lang = np.argmax(tst_scores,axis = 0)\n",
+    "temp = ((tst_labels-1) - hypo_lang)\n",
+    "acc =1- np.size(np.nonzero(temp)) / float(np.size(tst_labels))\n",
+    "print 'Final accurary on test dataset : %0.3f' %(acc)\n",
+    "\n",
+    "confusionmat = np.zeros((5,5))\n",
+    "for i,lang in enumerate(languages):\n",
+    "    hypo_bylang = hypo_lang[ tst_labels == i+1]\n",
+    "    hist_bylang = np.histogram(hypo_bylang,5)\n",
+    "    confusionmat[:,i] = hist_bylang[0]\n",
+    "\n",
+    "precision = np.diag(confusionmat) / np.sum(confusionmat,axis=1) #precision\n",
+    "recall = np.diag(confusionmat) / np.sum(confusionmat,axis=0) # recall\n",
+    "    \n",
+    "print 'Confusion matrix'\n",
+    "print confusionmat\n",
+    "print 'Precision'\n",
+    "print precision\n",
+    "print 'Recall'\n",
+    "print recall\n",
+    "\n",
+    "print '\\n\\n<Performance evaluation on Test dataset : CDS (baseline) >'\n",
+    "print 'Accurary  : %0.3f' %(acc)\n",
+    "print 'Precision : %0.3f' %(np.mean(precision))\n",
+    "print 'Recall    : %0.3f' %(np.mean(recall))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "((13825, 4), (1524, 4), (5, 4), (1492, 4))\n",
+      "Final accurary on test dataset : 0.628\n",
+      "Confusion matrix\n",
+      "[[ 200.   22.   46.   13.   40.]\n",
+      " [  17.  145.   62.   10.   27.]\n",
+      " [  47.   49.  172.    9.   54.]\n",
+      " [  22.   23.   26.  224.   27.]\n",
+      " [  16.   11.   28.    6.  196.]]\n",
+      "Precision\n",
+      "[ 0.62305296  0.55555556  0.51963746  0.69565217  0.76264591]\n",
+      "Recall\n",
+      "[ 0.66225166  0.58        0.51497006  0.85496183  0.56976744]\n",
+      "\n",
+      "\n",
+      "<Performance evaluation on Test dataset : LDA+CDS>\n",
+      "Accurary  : 0.628\n",
+      "Precision : 0.631\n",
+      "Recall    : 0.636\n"
+     ]
+    }
+   ],
+   "source": [
+    "#LDA\n",
+    "[languages,train_languages_num] = np.unique(trndev_labels,return_inverse=True)\n",
+    "V = it.lda2(trndev_ivectors,train_languages_num)\n",
+    "V = np.real(V[:,0:4])\n",
+    "trn_ivectors = np.matmul(trn_ivectors,V)\n",
+    "dev_ivectors = np.matmul(dev_ivectors,V)\n",
+    "tst_ivectors = np.matmul(tst_ivectors,V)\n",
+    "trndev_ivectors = np.matmul(trndev_ivectors,V)\n",
+    "\n",
+    "\n",
+    "\n",
+    "trn_ivectors = it.length_norm(trn_ivectors)\n",
+    "trndev_ivectors = it.length_norm(trndev_ivectors)\n",
+    "dev_ivectors = it.length_norm(dev_ivectors)\n",
+    "tst_ivectors = it.length_norm(tst_ivectors)\n",
+    "\n",
+    "\n",
+    "#language modeling\n",
+    "lang_mean=[]\n",
+    "for i, lang in enumerate(languages):\n",
+    "    lang_mean.append(np.mean(np.append(trn_ivectors[np.nonzero(trn_labels == i+1)] ,dev_ivectors[np.nonzero(dev_labels == i+1)],axis=0),axis=0))\n",
+    "#    lang_mean.append(np.mean(trn_ivectors[np.nonzero(trn_labels == i+1)],axis=0))\n",
+    "\n",
+    "lang_mean = np.array(lang_mean)\n",
+    "lang_mean = it.length_norm(lang_mean)\n",
+    "\n",
+    "print( np.shape(trn_ivectors), np.shape(dev_ivectors), np.shape(lang_mean),np.shape(tst_ivectors) )\n",
+    "\n",
+    "\n",
+    "tst_scores = lang_mean.dot(tst_ivectors.transpose() )\n",
+    "# print(tst_scores.shape)\n",
+    "hypo_lang = np.argmax(tst_scores,axis = 0)\n",
+    "temp = ((tst_labels-1) - hypo_lang)\n",
+    "acc =1- np.size(np.nonzero(temp)) / float(np.size(tst_labels))\n",
+    "print 'Final accurary on test dataset : %0.3f' %(acc)\n",
+    "\n",
+    "confusionmat = np.zeros((5,5))\n",
+    "for i,lang in enumerate(languages):\n",
+    "    hypo_bylang = hypo_lang[ tst_labels == i+1]\n",
+    "    hist_bylang = np.histogram(hypo_bylang,5)\n",
+    "    confusionmat[:,i] = hist_bylang[0]\n",
+    "\n",
+    "precision = np.diag(confusionmat) / np.sum(confusionmat,axis=1) #precision\n",
+    "recall = np.diag(confusionmat) / np.sum(confusionmat,axis=0) # recall\n",
+    "    \n",
+    "print 'Confusion matrix'\n",
+    "print confusionmat\n",
+    "print 'Precision'\n",
+    "print precision\n",
+    "print 'Recall'\n",
+    "print recall\n",
+    "\n",
+    "print '\\n\\n<Performance evaluation on Test dataset : LDA+CDS>'\n",
+    "print 'Accurary  : %0.3f' %(acc)\n",
+    "print 'Precision : %0.3f' %(np.mean(precision))\n",
+    "print 'Recall    : %0.3f' %(np.mean(recall))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 2",
+   "language": "python",
+   "name": "python2"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}