jundongl · Blacksujit · Jul 11, 2024 · Jul 11, 2024 · Jul 11, 2024
diff --git a/setup.py b/setup.py
@@ -1,27 +1,38 @@
 from distutils.core import setup
 
 NAME = "skfeature"
-
 DESCRIPTION = "Feature Selection Repository in Python (DMML Lab@ASU)"
-
 KEYWORDS = "Feature Selection Repository"
-
 AUTHOR = "Jundong Li, Kewei Cheng, Suhang Wang"
-
 AUTHOR_EMAIL = "[email protected], [email protected], [email protected]"
-
 URL = "https://github.com/jundongl/scikit-feature"
-
 VERSION = "1.0.0"
-
-
 setup(
-    name = NAME,
-    version = VERSION,
-    description = DESCRIPTION,
-    keywords = KEYWORDS,
-    author = AUTHOR,
-    author_email = AUTHOR_EMAIL,
-    url = URL,
-    packages =['skfeature', 'skfeature.utility','skfeature.function','skfeature.function.information_theoretical_based','skfeature.function.similarity_based','skfeature.function.sparse_learning_based','skfeature.function.statistical_based','skfeature.function.streaming','skfeature.function.structure','skfeature.function.wrapper',] ,
+    name=NAME,
+    version=VERSION,
+    description=DESCRIPTION,
+    keywords=KEYWORDS,
+    author=AUTHOR,
+    author_email=AUTHOR_EMAIL,
+    url=URL,
+    packages=[
+        'skfeature',
+        'skfeature.utility',
+        'skfeature.function',
+        'skfeature.function.information_theoretical_based',
+        'skfeature.function.similarity_based',
+        'skfeature.function.sparse_learning_based',
+        'skfeature.function.statistical_based',
+        'skfeature.function.streaming',
+        'skfeature.function.structure',
+        'skfeature.function.wrapper',
+    ],
+    # Adding test_suite to specify the test directory
+    test_suite='tests',
+    # Adding classifiers to specify Python versions supported
+    classifiers=[
+        'Programming Language :: Python :: 2.7',
+        'Programming Language :: Python :: 3',
+        'Operating System :: OS Independent',
+    ],
 )
diff --git a/skfeature/data/RELATHE.mat b/skfeature/data/RELATHE.mat
diff --git a/skfeature/data/SMK-CAN-187.mat b/skfeature/data/SMK-CAN-187.mat
diff --git a/skfeature/data/TOX-171.mat b/skfeature/data/TOX-171.mat
diff --git a/skfeature/data/USPS.mat b/skfeature/data/USPS.mat
diff --git a/skfeature/data/Yale.mat b/skfeature/data/Yale.mat
diff --git a/skfeature/data/arcene.mat b/skfeature/data/arcene.mat
diff --git a/skfeature/data/colon.mat b/skfeature/data/colon.mat
diff --git a/skfeature/data/gisette.mat b/skfeature/data/gisette.mat
diff --git a/skfeature/data/leukemia.mat b/skfeature/data/leukemia.mat
diff --git a/skfeature/data/lung.mat b/skfeature/data/lung.mat
diff --git a/skfeature/data/lung_small.mat b/skfeature/data/lung_small.mat
diff --git a/skfeature/data/lymphoma.mat b/skfeature/data/lymphoma.mat
diff --git a/skfeature/data/madelon.mat b/skfeature/data/madelon.mat
diff --git a/skfeature/data/nci9.mat b/skfeature/data/nci9.mat
diff --git a/skfeature/data/orlraws10P.mat b/skfeature/data/orlraws10P.mat
diff --git a/skfeature/data/pixraw10P.mat b/skfeature/data/pixraw10P.mat
diff --git a/skfeature/data/warpAR10P.mat b/skfeature/data/warpAR10P.mat
diff --git a/skfeature/data/warpPIE10P.mat b/skfeature/data/warpPIE10P.mat
diff --git a/skfeature/example/test_CFS.py b/skfeature/example/test_CFS.py
@@ -1,46 +0,0 @@
-import scipy.io
-from sklearn import svm
-from sklearn import cross_validation
-from sklearn.metrics import accuracy_score
-from skfeature.function.statistical_based import CFS
-
-
-def main():
-    # load data
-    mat = scipy.io.loadmat('../data/colon.mat')
-    X = mat['X']    # data
-    X = X.astype(float)
-    y = mat['Y']    # label
-    y = y[:, 0]
-    n_samples, n_features = X.shape    # number of samples and number of features
-
-    # split data into 10 folds
-    ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True)
-
-    # perform evaluation on classification task
-    num_fea = 100    # number of selected features
-    clf = svm.LinearSVC()    # linear SVM
-
-    correct = 0
-    for train, test in ss:
-        # obtain the index of selected features on training set
-        idx = CFS.cfs(X[train], y[train])
-
-        # obtain the dataset on the selected features
-        selected_features = X[:, idx[0:num_fea]]
-
-        # train a classification model with the selected features on the training dataset
-        clf.fit(selected_features[train], y[train])
-
-        # predict the class labels of test data
-        y_predict = clf.predict(selected_features[test])
-
-        # obtain the classification accuracy on the test data
-        acc = accuracy_score(y[test], y_predict)
-        correct = correct + acc
-
-    # output the average classification accuracy over all 10 folds
-    print 'Accuracy:', float(correct)/10
-
-if __name__ == '__main__':
-    main()

diff --git a/skfeature/example/test_CIFE.py b/skfeature/example/test_CIFE.py
@@ -1,46 +0,0 @@
-import scipy.io
-from sklearn.metrics import accuracy_score
-from sklearn import cross_validation
-from sklearn import svm
-from skfeature.function.information_theoretical_based import CIFE
-
-
-def main():
-    # load data
-    mat = scipy.io.loadmat('../data/colon.mat')
-    X = mat['X']    # data
-    X = X.astype(float)
-    y = mat['Y']    # label
-    y = y[:, 0]
-    n_samples, n_features = X.shape    # number of samples and number of features
-
-    # split data into 10 folds
-    ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True)
-
-    # perform evaluation on classification task
-    num_fea = 10    # number of selected features
-    clf = svm.LinearSVC()    # linear SVM
-
-    correct = 0
-    for train, test in ss:
-        # obtain the index of each feature on the training set
-        idx,_,_ = CIFE.cife(X[train], y[train], n_selected_features=num_fea)
-
-        # obtain the dataset on the selected features
-        features = X[:, idx[0:num_fea]]
-
-        # train a classification model with the selected features on the training dataset
-        clf.fit(features[train], y[train])
-
-        # predict the class labels of test data
-        y_predict = clf.predict(features[test])
-
-        # obtain the classification accuracy on the test data
-        acc = accuracy_score(y[test], y_predict)
-        correct = correct + acc
-
-    # output the average classification accuracy over all 10 folds
-    print 'Accuracy:', float(correct)/10
-
-if __name__ == '__main__':
-    main()

diff --git a/skfeature/example/test_CMIM.py b/skfeature/example/test_CMIM.py
@@ -1,46 +0,0 @@
-import scipy.io
-from sklearn.metrics import accuracy_score
-from sklearn import cross_validation
-from sklearn import svm
-from skfeature.function.information_theoretical_based import CMIM
-
-
-def main():
-    # load data
-    mat = scipy.io.loadmat('../data/colon.mat')
-    X = mat['X']    # data
-    X = X.astype(float)
-    y = mat['Y']    # label
-    y = y[:, 0]
-    n_samples, n_features = X.shape    # number of samples and number of features
-
-    # split data into 10 folds
-    ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True)
-
-    # perform evaluation on classification task
-    num_fea = 10    # number of selected features
-    clf = svm.LinearSVC()    # linear SVM
-
-    correct = 0
-    for train, test in ss:
-        # obtain the index of each feature on the training set
-        idx,_,_ = CMIM.cmim(X[train], y[train], n_selected_features=num_fea)
-
-        # obtain the dataset on the selected features
-        features = X[:, idx[0:num_fea]]
-
-        # train a classification model with the selected features on the training dataset
-        clf.fit(features[train], y[train])
-
-        # predict the class labels of test data
-        y_predict = clf.predict(features[test])
-
-        # obtain the classification accuracy on the test data
-        acc = accuracy_score(y[test], y_predict)
-        correct = correct + acc
-
-    # output the average classification accuracy over all 10 folds
-    print 'Accuracy:', float(correct)/10
-
-if __name__ == '__main__':
-    main()

diff --git a/skfeature/example/test_DISR.py b/skfeature/example/test_DISR.py
@@ -1,46 +0,0 @@
-import scipy.io
-from sklearn.metrics import accuracy_score
-from sklearn import cross_validation
-from sklearn import svm
-from skfeature.function.information_theoretical_based import DISR
-
-
-def main():
-    # load data
-    mat = scipy.io.loadmat('../data/colon.mat')
-    X = mat['X']    # data
-    X = X.astype(float)
-    y = mat['Y']    # label
-    y = y[:, 0]
-    n_samples, n_features = X.shape    # number of samples and number of features
-
-    # split data into 10 folds
-    ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True)
-
-    # perform evaluation on classification task
-    num_fea = 10    # number of selected features
-    clf = svm.LinearSVC()    # linear SVM
-
-    correct = 0
-    for train, test in ss:
-        # obtain the index of each feature on the training set
-        idx,_,_ = DISR.disr(X[train], y[train], n_selected_features=num_fea)
-
-        # obtain the dataset on the selected features
-        features = X[:, idx[0:num_fea]]
-
-        # train a classification model with the selected features on the training dataset
-        clf.fit(features[train], y[train])
-
-        # predict the class labels of test data
-        y_predict = clf.predict(features[test])
-
-        # obtain the classification accuracy on the test data
-        acc = accuracy_score(y[test], y_predict)
-        correct = correct + acc
-
-    # output the average classification accuracy over all 10 folds
-    print 'Accuracy:', float(correct)/10
-
-if __name__ == '__main__':
-    main()

diff --git a/skfeature/example/test_FCBF.py b/skfeature/example/test_FCBF.py
@@ -1,46 +0,0 @@
-import scipy.io
-from sklearn.metrics import accuracy_score
-from sklearn import cross_validation
-from sklearn import svm
-from skfeature.function.information_theoretical_based import FCBF
-
-
-def main():
-    # load data
-    mat = scipy.io.loadmat('../data/colon.mat')
-    X = mat['X']    # data
-    X = X.astype(float)
-    y = mat['Y']    # label
-    y = y[:, 0]
-    n_samples, n_features = X.shape    # number of samples and number of features
-
-    # split data into 10 folds
-    ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True)
-
-    # perform evaluation on classification task
-    num_fea = 10    # number of selected features
-    clf = svm.LinearSVC()    # linear SVM
-
-    correct = 0
-    for train, test in ss:
-        # obtain the index of each feature on the training set
-        idx = FCBF.fcbf(X[train], y[train], n_selected_features=num_fea)
-
-        # obtain the dataset on the selected features
-        features = X[:, idx[0:num_fea]]
-
-        # train a classification model with the selected features on the training dataset
-        clf.fit(features[train], y[train])
-
-        # predict the class labels of test data
-        y_predict = clf.predict(features[test])
-
-        # obtain the classification accuracy on the test data
-        acc = accuracy_score(y[test], y_predict)
-        correct = correct + acc
-
-    # output the average classification accuracy over all 10 folds
-    print 'Accuracy:', float(correct)/10
-
-if __name__ == '__main__':
-    main()

diff --git a/skfeature/example/test_ICAP.py b/skfeature/example/test_ICAP.py
@@ -1,46 +0,0 @@
-import scipy.io
-from sklearn.metrics import accuracy_score
-from sklearn import cross_validation
-from sklearn import svm
-from skfeature.function.information_theoretical_based import ICAP
-
-
-def main():
-    # load data
-    mat = scipy.io.loadmat('../data/colon.mat')
-    X = mat['X']    # data
-    X = X.astype(float)
-    y = mat['Y']    # label
-    y = y[:, 0]
-    n_samples, n_features = X.shape    # number of samples and number of features
-
-    # split data into 10 folds
-    ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True)
-
-    # perform evaluation on classification task
-    num_fea = 10    # number of selected features
-    clf = svm.LinearSVC()    # linear SVM
-
-    correct = 0
-    for train, test in ss:
-        # obtain the index of each feature on the training set
-        idx,_,_ = ICAP.icap(X[train], y[train], n_selected_features=num_fea)
-
-        # obtain the dataset on the selected features
-        features = X[:, idx[0:num_fea]]
-
-        # train a classification model with the selected features on the training dataset
-        clf.fit(features[train], y[train])
-
-        # predict the class labels of test data
-        y_predict = clf.predict(features[test])
-
-        # obtain the classification accuracy on the test data
-        acc = accuracy_score(y[test], y_predict)
-        correct = correct + acc
-
-    # output the average classification accuracy over all 10 folds
-    print 'Accuracy:', float(correct)/10
-
-if __name__ == '__main__':
-    main()

diff --git a/skfeature/example/test_JMI.py b/skfeature/example/test_JMI.py