Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 27 additions & 16 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,38 @@
from distutils.core import setup

NAME = "skfeature"

DESCRIPTION = "Feature Selection Repository in Python (DMML Lab@ASU)"

KEYWORDS = "Feature Selection Repository"

AUTHOR = "Jundong Li, Kewei Cheng, Suhang Wang"

AUTHOR_EMAIL = "[email protected], [email protected], [email protected]"

URL = "https://github.com/jundongl/scikit-feature"

VERSION = "1.0.0"


setup(
name = NAME,
version = VERSION,
description = DESCRIPTION,
keywords = KEYWORDS,
author = AUTHOR,
author_email = AUTHOR_EMAIL,
url = URL,
packages =['skfeature', 'skfeature.utility','skfeature.function','skfeature.function.information_theoretical_based','skfeature.function.similarity_based','skfeature.function.sparse_learning_based','skfeature.function.statistical_based','skfeature.function.streaming','skfeature.function.structure','skfeature.function.wrapper',] ,
name=NAME,
version=VERSION,
description=DESCRIPTION,
keywords=KEYWORDS,
author=AUTHOR,
author_email=AUTHOR_EMAIL,
url=URL,
packages=[
'skfeature',
'skfeature.utility',
'skfeature.function',
'skfeature.function.information_theoretical_based',
'skfeature.function.similarity_based',
'skfeature.function.sparse_learning_based',
'skfeature.function.statistical_based',
'skfeature.function.streaming',
'skfeature.function.structure',
'skfeature.function.wrapper',
],
# Adding test_suite to specify the test directory
test_suite='tests',
# Adding classifiers to specify Python versions supported
classifiers=[
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
'Operating System :: OS Independent',
],
)
Binary file modified skfeature/data/RELATHE.mat
Binary file not shown.
Binary file modified skfeature/data/SMK-CAN-187.mat
Binary file not shown.
Binary file modified skfeature/data/TOX-171.mat
Binary file not shown.
Binary file modified skfeature/data/USPS.mat
Binary file not shown.
Binary file modified skfeature/data/Yale.mat
Binary file not shown.
Binary file modified skfeature/data/arcene.mat
Binary file not shown.
Binary file modified skfeature/data/colon.mat
Binary file not shown.
Binary file modified skfeature/data/gisette.mat
Binary file not shown.
Binary file modified skfeature/data/leukemia.mat
Binary file not shown.
Binary file modified skfeature/data/lung.mat
Binary file not shown.
Binary file modified skfeature/data/lung_small.mat
Binary file not shown.
Binary file modified skfeature/data/lymphoma.mat
Binary file not shown.
Binary file modified skfeature/data/madelon.mat
Binary file not shown.
Binary file modified skfeature/data/nci9.mat
Binary file not shown.
Binary file modified skfeature/data/orlraws10P.mat
Binary file not shown.
Binary file modified skfeature/data/pixraw10P.mat
Binary file not shown.
Binary file modified skfeature/data/warpAR10P.mat
Binary file not shown.
Binary file modified skfeature/data/warpPIE10P.mat
Binary file not shown.
46 changes: 0 additions & 46 deletions skfeature/example/test_CFS.py
Original file line number Diff line number Diff line change
@@ -1,46 +0,0 @@
import scipy.io
from sklearn import svm
from sklearn import cross_validation
from sklearn.metrics import accuracy_score
from skfeature.function.statistical_based import CFS


def main():
# load data
mat = scipy.io.loadmat('../data/colon.mat')
X = mat['X'] # data
X = X.astype(float)
y = mat['Y'] # label
y = y[:, 0]
n_samples, n_features = X.shape # number of samples and number of features

# split data into 10 folds
ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True)

# perform evaluation on classification task
num_fea = 100 # number of selected features
clf = svm.LinearSVC() # linear SVM

correct = 0
for train, test in ss:
# obtain the index of selected features on training set
idx = CFS.cfs(X[train], y[train])

# obtain the dataset on the selected features
selected_features = X[:, idx[0:num_fea]]

# train a classification model with the selected features on the training dataset
clf.fit(selected_features[train], y[train])

# predict the class labels of test data
y_predict = clf.predict(selected_features[test])

# obtain the classification accuracy on the test data
acc = accuracy_score(y[test], y_predict)
correct = correct + acc

# output the average classification accuracy over all 10 folds
print 'Accuracy:', float(correct)/10

if __name__ == '__main__':
main()
46 changes: 0 additions & 46 deletions skfeature/example/test_CIFE.py
Original file line number Diff line number Diff line change
@@ -1,46 +0,0 @@
import scipy.io
from sklearn.metrics import accuracy_score
from sklearn import cross_validation
from sklearn import svm
from skfeature.function.information_theoretical_based import CIFE


def main():
# load data
mat = scipy.io.loadmat('../data/colon.mat')
X = mat['X'] # data
X = X.astype(float)
y = mat['Y'] # label
y = y[:, 0]
n_samples, n_features = X.shape # number of samples and number of features

# split data into 10 folds
ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True)

# perform evaluation on classification task
num_fea = 10 # number of selected features
clf = svm.LinearSVC() # linear SVM

correct = 0
for train, test in ss:
# obtain the index of each feature on the training set
idx,_,_ = CIFE.cife(X[train], y[train], n_selected_features=num_fea)

# obtain the dataset on the selected features
features = X[:, idx[0:num_fea]]

# train a classification model with the selected features on the training dataset
clf.fit(features[train], y[train])

# predict the class labels of test data
y_predict = clf.predict(features[test])

# obtain the classification accuracy on the test data
acc = accuracy_score(y[test], y_predict)
correct = correct + acc

# output the average classification accuracy over all 10 folds
print 'Accuracy:', float(correct)/10

if __name__ == '__main__':
main()
46 changes: 0 additions & 46 deletions skfeature/example/test_CMIM.py
Original file line number Diff line number Diff line change
@@ -1,46 +0,0 @@
import scipy.io
from sklearn.metrics import accuracy_score
from sklearn import cross_validation
from sklearn import svm
from skfeature.function.information_theoretical_based import CMIM


def main():
# load data
mat = scipy.io.loadmat('../data/colon.mat')
X = mat['X'] # data
X = X.astype(float)
y = mat['Y'] # label
y = y[:, 0]
n_samples, n_features = X.shape # number of samples and number of features

# split data into 10 folds
ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True)

# perform evaluation on classification task
num_fea = 10 # number of selected features
clf = svm.LinearSVC() # linear SVM

correct = 0
for train, test in ss:
# obtain the index of each feature on the training set
idx,_,_ = CMIM.cmim(X[train], y[train], n_selected_features=num_fea)

# obtain the dataset on the selected features
features = X[:, idx[0:num_fea]]

# train a classification model with the selected features on the training dataset
clf.fit(features[train], y[train])

# predict the class labels of test data
y_predict = clf.predict(features[test])

# obtain the classification accuracy on the test data
acc = accuracy_score(y[test], y_predict)
correct = correct + acc

# output the average classification accuracy over all 10 folds
print 'Accuracy:', float(correct)/10

if __name__ == '__main__':
main()
46 changes: 0 additions & 46 deletions skfeature/example/test_DISR.py
Original file line number Diff line number Diff line change
@@ -1,46 +0,0 @@
import scipy.io
from sklearn.metrics import accuracy_score
from sklearn import cross_validation
from sklearn import svm
from skfeature.function.information_theoretical_based import DISR


def main():
# load data
mat = scipy.io.loadmat('../data/colon.mat')
X = mat['X'] # data
X = X.astype(float)
y = mat['Y'] # label
y = y[:, 0]
n_samples, n_features = X.shape # number of samples and number of features

# split data into 10 folds
ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True)

# perform evaluation on classification task
num_fea = 10 # number of selected features
clf = svm.LinearSVC() # linear SVM

correct = 0
for train, test in ss:
# obtain the index of each feature on the training set
idx,_,_ = DISR.disr(X[train], y[train], n_selected_features=num_fea)

# obtain the dataset on the selected features
features = X[:, idx[0:num_fea]]

# train a classification model with the selected features on the training dataset
clf.fit(features[train], y[train])

# predict the class labels of test data
y_predict = clf.predict(features[test])

# obtain the classification accuracy on the test data
acc = accuracy_score(y[test], y_predict)
correct = correct + acc

# output the average classification accuracy over all 10 folds
print 'Accuracy:', float(correct)/10

if __name__ == '__main__':
main()
46 changes: 0 additions & 46 deletions skfeature/example/test_FCBF.py
Original file line number Diff line number Diff line change
@@ -1,46 +0,0 @@
import scipy.io
from sklearn.metrics import accuracy_score
from sklearn import cross_validation
from sklearn import svm
from skfeature.function.information_theoretical_based import FCBF


def main():
# load data
mat = scipy.io.loadmat('../data/colon.mat')
X = mat['X'] # data
X = X.astype(float)
y = mat['Y'] # label
y = y[:, 0]
n_samples, n_features = X.shape # number of samples and number of features

# split data into 10 folds
ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True)

# perform evaluation on classification task
num_fea = 10 # number of selected features
clf = svm.LinearSVC() # linear SVM

correct = 0
for train, test in ss:
# obtain the index of each feature on the training set
idx = FCBF.fcbf(X[train], y[train], n_selected_features=num_fea)

# obtain the dataset on the selected features
features = X[:, idx[0:num_fea]]

# train a classification model with the selected features on the training dataset
clf.fit(features[train], y[train])

# predict the class labels of test data
y_predict = clf.predict(features[test])

# obtain the classification accuracy on the test data
acc = accuracy_score(y[test], y_predict)
correct = correct + acc

# output the average classification accuracy over all 10 folds
print 'Accuracy:', float(correct)/10

if __name__ == '__main__':
main()
46 changes: 0 additions & 46 deletions skfeature/example/test_ICAP.py
Original file line number Diff line number Diff line change
@@ -1,46 +0,0 @@
import scipy.io
from sklearn.metrics import accuracy_score
from sklearn import cross_validation
from sklearn import svm
from skfeature.function.information_theoretical_based import ICAP


def main():
# load data
mat = scipy.io.loadmat('../data/colon.mat')
X = mat['X'] # data
X = X.astype(float)
y = mat['Y'] # label
y = y[:, 0]
n_samples, n_features = X.shape # number of samples and number of features

# split data into 10 folds
ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True)

# perform evaluation on classification task
num_fea = 10 # number of selected features
clf = svm.LinearSVC() # linear SVM

correct = 0
for train, test in ss:
# obtain the index of each feature on the training set
idx,_,_ = ICAP.icap(X[train], y[train], n_selected_features=num_fea)

# obtain the dataset on the selected features
features = X[:, idx[0:num_fea]]

# train a classification model with the selected features on the training dataset
clf.fit(features[train], y[train])

# predict the class labels of test data
y_predict = clf.predict(features[test])

# obtain the classification accuracy on the test data
acc = accuracy_score(y[test], y_predict)
correct = correct + acc

# output the average classification accuracy over all 10 folds
print 'Accuracy:', float(correct)/10

if __name__ == '__main__':
main()
46 changes: 0 additions & 46 deletions skfeature/example/test_JMI.py

This file was deleted.

Loading