1
+
2
+ # coding: utf-8
3
+
4
+
5
+ import numpy as np
6
+ import os, re,gzip,struct
7
+ import scipy as sc
8
+
9
+ def read_ivector_text(file):
10
+
11
+ file = open(file,'r')
12
+
13
+ ivector = []
14
+ for line in file:
15
+ a = line.split('[ ')
16
+ b=a[1].split(' ]')
17
+ c= b[0].split(' ')
18
+ dim = np.size(c)
19
+ d=np.array(c)
20
+ e=d.astype('float32')
21
+ ivector.append(e)
22
+
23
+ print np.shape(ivector)
24
+ file.close()
25
+
26
+ return ivector
27
+ def read_ivector_binary(file, point):
28
+
29
+ ivecfile = open(file,'r')
30
+
31
+ ivecname = ivecfile.read(point)
32
+ #print ivecname
33
+
34
+ #binary flag
35
+ binary=ivecfile.read(2)
36
+ #print binary
37
+ #type flag (FV for 4 byteor DV for 8)
38
+ type=ivecfile.read(3)
39
+ if type == 'FV ':
40
+ sample_type='float32'
41
+ if type =='DV ':
42
+ sample_type = 'float64'
43
+
44
+ temp=ivecfile.read(1) #int-size
45
+
46
+ vec_size=struct.unpack('<i',ivecfile.read(4))[0]
47
+
48
+
49
+ ivector = np.fromfile(ivecfile,sample_type,vec_size)
50
+
51
+ # print binary
52
+ # print sample_type
53
+ # print temp
54
+ # print vec_size
55
+ # print np.sum(np.power(ivector,2))
56
+
57
+ ivecfile.close()
58
+
59
+
60
+ return ivector
61
+
62
+
63
+ def read_ivector_key(file):
64
+ spkid = []
65
+ ivec_pointer=[]
66
+ total_num = 0
67
+ scpfile = open(file,'r')
68
+ for line in scpfile:
69
+ #print line
70
+ temp = re.split(' |:|\n',line)
71
+ spkid.append(temp[0])
72
+ ivec_pointer.append(int(temp[2]))
73
+ total_num +=1
74
+
75
+ return spkid, ivec_pointer,total_num
76
+ def length_norm(mat):
77
+ mat = mat.transpose()
78
+ norm_mat = []
79
+ for line in mat:
80
+ temp = line/np.math.sqrt(sum(np.power(line,2)))
81
+ norm_mat.append(temp)
82
+ norm_mat = np.array(norm_mat)
83
+ return norm_mat.transpose()
84
+
85
+
86
+
87
+ def lda(mat,label):
88
+ # mat = observation x dim ( for example, 8x600 for 8 obs and 600dim ivector)
89
+ # label = num_utts (for example, [2,4,2] for 8 observations)
90
+
91
+ # ivec = [[0,1,2], [0,5,10],[1,5,3],[5,2,1],[5,3,6]]
92
+ # ivec = np.array(ivec,dtype='float')
93
+ # print np.shape(ivec)
94
+ # print ivec
95
+ # label = [2,3]
96
+ #Sw = np.inner(mat.transpose(), mat.transpose())
97
+ Sw = np.dot(mat.transpose(), mat)
98
+ mu_c=[]
99
+ pre_iter2 = 0
100
+ for iter1, iter2 in enumerate(label):
101
+ idx = np.arange(pre_iter2,pre_iter2+iter2)
102
+ pre_iter2 += iter2
103
+
104
+ temp = mat[idx]
105
+ mu_c.append(np.math.sqrt(iter2) * np.mean(temp,axis=0))
106
+ # mu_c.append(np.mean(temp,axis=0))
107
+ mu_c = np.array(mu_c)
108
+ # Sb = np.inner(mu_c.transpose(),mu_c.transpose())
109
+ Sb = np.dot(mu_c.transpose(),mu_c)
110
+ [D, V] = np.linalg.eig(np.linalg.inv(Sw).dot(Sb))
111
+ # D = np.real(D)
112
+ # V = np.real(V)
113
+ descend_idx = (-D).argsort()
114
+ V= V[:,descend_idx]
115
+ V = length_norm(V)
116
+ #print V
117
+
118
+ return V
119
+
120
+
121
+ def load_ivector_fromtextark(foldername,num_arks):
122
+ #loading ivectors from text-ark files
123
+ #foldername = '../db_ivectors/data/ivectors_callmynet_enrollment_gmm_2048/'
124
+ #num_arks = 3
125
+
126
+ spkid, point, total_num=read_ivector_key(foldername+'ivector.scp')
127
+ ivec1 = []
128
+ spk_ivectors = []
129
+ ivec1 = read_ivector_text(foldername+'ivector.1.ark')
130
+ for iter1 in np.arange(2,num_arks+1):
131
+ temp = []
132
+ temp = read_ivector_text(foldername+'ivector.'+str(iter1)+'.ark')
133
+ ivec1=np.append(ivec1,temp,axis=0)
134
+ spk_ivectors = ivec1
135
+ print 'total',total_num,'ivector were saved on spk_ivector variable(shape is',np.shape(spk_ivectors),' )'
136
+ return spk_ivectors, spkid, point, total_num
0 commit comments