-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathDataLoader.py
27 lines (20 loc) · 1.01 KB
/
DataLoader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import numpy as np
from sklearn.preprocessing import OneHotEncoder
def classificationDataLoader(fileName, labelCol=-1, delimiter=',', sparsify=False):
'''
load classification data from fileName, transform into onehot feature representation X, and label Y
:param
fileName: data file
labelCol: column position for label, default last column
delimiter: feature seperator each sample, default ','
:returns
X: np feature matrix (n_samples, n_features)
Y: np label matrix (n_samples, 1)
'''
data = np.loadtxt(fileName, dtype='int32', delimiter=',')
X, Y = data[:, xrange(data.shape[1]-1) if labelCol==-1 else xrange(1, data.shape[1])], data[:, labelCol]
print type(OneHotEncoder().fit_transform(X))
return (OneHotEncoder().fit_transform(X, ), Y) if sparsify==True else (OneHotEncoder().fit_transform(X, ).toarray(), Y)
# X, Y = classificationDataLoader('dataset/test.data')
# print X.shape, Y.shape
# print X, Y