Skip to content

Commit 6291f66

Browse files
authored
Merge pull request #15 from max-acc/4-code-cleanup
4 code cleanup
2 parents 1b27144 + 4e4b74c commit 6291f66

10 files changed

+27434
-69
lines changed

Dry_Bean_Dataset.csv

Lines changed: 13612 additions & 0 deletions
Large diffs are not rendered by default.

README.md

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,44 @@
1313
### Installing
1414

1515

16-
### Executing program
17-
18-
19-
### Error Codes
16+
### Executing the program
17+
#### Setting up the classification model
18+
Firstly you have to import a package called "classification" that contains all important functions for classifying a
19+
dataset consisting of float values:
20+
```import classification.ClassificationOfFloatValues;```<br>
21+
The next step is to create an object for this classification (ob is used as a default name for an object):
22+
```ClassificationOfFloatValues ob = new ClassificationOfFloatValues(dataset);```
23+
The ```dataset``` variable should contain the name of the dataset that should be classified as a string.
24+
The dataset has to be in the same folder as the main file.<br>
25+
If the dataset has an index or a header (or both), it has to be indecaded by the user.
26+
If there is a header you have to call ```ob.setIndex(true);``` or/and ```ob.setHeader(true);```.
27+
The default value for these is ```false``` because it is expected that the dataset does not have an index or header.
28+
Most datasets do have a header and an index so make sure, if your dataset has a header or an index, to include this part in your program.
29+
<br><br>
30+
31+
#### Processing the data
32+
The following functions are required for classifying the data.
33+
Firstly you have to call ```ob.dataProcessing();```
34+
```ob.dataSubdivision();```
35+
```ob.distanceClassification();```
36+
<br><br>
37+
38+
#### Evaluating the Results
39+
For evaluating the predicted results you can call ```ob.evaluateResults();```.
40+
There are multiple ways to show how the results should be displayed.
41+
The ```ob.setEvaluation(model)``` functions sets the evaluation models which are going to be calculated and printed.
42+
```model``` should contain one of the names below as a string.<br>
43+
**Confusion Matrix**: Printing a normal confusion matrix for every class (size: class x class).
44+
**Simple Confusion Matrix**: Printing a simplified confusion matrix for every class with true positives and false positives (size: class x 2).
45+
**NormalizedConfusion Matrix**: Printing a normalized confusion matrix with the format of the confusion matrix as explained
46+
above. The values that are displayed a normalized (values between 0 and 1).
2047

2148

2249
## Scripts
23-
24-
50+
There is a script that explains the programs function and also explains the data manipulation in detail.
51+
You can find the description here.
2552
## Help
26-
53+
If you need help if applying the algorithm to your projects, feel free to ask.
2754

2855
## Authors
2956

@@ -33,6 +60,10 @@ Contributors names and contact info
3360
* [@max-acc](https://github.com/max-acc)
3461

3562
## Version History
63+
### Built v-0.1
64+
The current built is v-0.1.
65+
It is possible to classify a dataset which contains only float values.
66+
It is important to consider that the weight for every class is the same.
3667

3768

3869
## License

script/Dokumentation.pdf

1.44 MB
Binary file not shown.

src/Dry_Bean_Dataset.csv

Lines changed: 13612 additions & 0 deletions
Large diffs are not rendered by default.

src/Main.java

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,22 +7,26 @@ public class Main {
77
public static void main (String[] args) throws Exception {
88
// Creating of instance of second class present
99
// in the same project
10-
boolean index = true;
11-
boolean header = true;
1210
String dataset = "Iris_unordered_2.csv";
11+
dataset = "Dry_Bean_Dataset.csv";
12+
13+
1314

1415
ClassificationOfFloatValues ob = new ClassificationOfFloatValues(dataset);
1516
ob.setIndex(true);
1617
ob.setHeader(true);
1718

18-
// traingingData = percentage of training data <1
19+
// trainingData = percentage of training data <1
1920
ob.dataValidation(0.7f);
2021
ob.dataProcessing();
2122
ob.dataSubdivision();
22-
//System.out.println(ob.feedback()[0][2]);
23+
2324
ob.distanceClassification();
2425

26+
ob.setEvaluation("Confusion Matrix");
27+
ob.setEvaluation("Simple Confusion Matrix");
28+
ob.setEvaluation("Normalized Confusion Matrix");
29+
2530
ob.evaluateResults();
2631
}
27-
2832
}

src/classification/CSVread.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ static float[] getRecordFromLine (String line, boolean index, int rowCount) {
6868
// Using , as a delimiter for separating the line input
6969
rowScanner.useDelimiter(",");
7070
for (int i = 0; rowScanner.hasNext(); i++) {
71+
//System.out.println(rowScanner);
7172
// Skip if there is an index
7273
if ((index == true && skip == true) || i == rowCount) {
7374
skip = false;
@@ -77,7 +78,10 @@ static float[] getRecordFromLine (String line, boolean index, int rowCount) {
7778
if (i == rowCount) {
7879
break;
7980
}
80-
}values[i] = Float.parseFloat(rowScanner.next());
81+
}
82+
String tempVar = rowScanner.next();
83+
//System.out.println(tempVar);
84+
values[i] = Float.parseFloat(tempVar);
8185
}
8286
}
8387
return values;

src/classification/ClassificationOfFloatValues.java

Lines changed: 53 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ public class ClassificationOfFloatValues {
3232
private int[][] sortedProbability;
3333
private int numberOfClasses;
3434

35+
private boolean[] validationModel = {false, false, false};
36+
3537

3638
// Function to add the members of the class
3739
public float[][] output() { return this.predictorData; }
@@ -47,12 +49,12 @@ public ClassificationOfFloatValues(String dataset) throws Exception {
4749
this.datasetName = dataset;
4850
}
4951

50-
// --- Function for creating
52+
// --- Function for processing the data (reading and writing the data to an array)
5153
public void dataProcessing() throws Exception {
54+
// Get row and column count
5255
this.rowCount = CSVread.calcRowCount(this.datasetName, this.index);
5356
this.columnCount = CSVread.calcColumnCount(this.datasetName, this.index);
54-
//System.out.println(this.rowCount);
55-
//System.out.println(this.columnCount);
57+
// Get predictor and result data
5658
this.predictorData = CSVread.transformPredictorData(this.datasetName, this.index, this.header, this.columnCount, this.rowCount);
5759
this.resultData = CSVread.transformResultData(this.datasetName, this.index, this.header, this.columnCount, this.rowCount);
5860
this.dataProcessingBool = true;
@@ -77,56 +79,85 @@ public void dataSubdivision () {
7779

7880

7981
// --- Functions for additional user control -----------------------------------------------------------------------
80-
// --- Function for changing the ratio between training and testing data
82+
// --- Functions for setting the index and header data
8183
public void setIndex (boolean index) { this.index = index; }
84+
8285
public void setHeader (boolean header) { this.header = header; }
86+
87+
// --- Function for changing the data density to clear extreme values | Not working
8388
public void setDensity (float density) {this.density = density;}
89+
90+
// --- Function for changing the ratio between training and testing data
8491
public void dataValidation (float trainingData) {
8592
this.validation[0] = trainingData;
8693
this.validation[1] = 1- trainingData;
8794
}
95+
// --- Function for setting the validation model
96+
public void setEvaluation(String evaluationName) {
97+
if (evaluationName.equals("Confusion Matrix")) {
98+
this.validationModel[0] = true;
99+
}else if (evaluationName.equals("Simple Confusion Matrix")) {
100+
this.validationModel[1] = true;
101+
}else if (evaluationName.equals("Normalized Confusion Matrix")) {
102+
this.validationModel[2] = true;
103+
}
104+
}
88105

89106

90107
// --- Functions for evaluating the machine learning results -------------------------------------------------------
108+
// --- Function for printing confusion matrices
91109
public void evaluateResults() {
110+
// Creating an object to calculate confusion matrices
92111
DATA_evaluation evaluationObject = new DATA_evaluation(this.testDataResults,
93112
this.columnCount - this.numberOfTrainingData,
94113
this.predictedTestData,
95114
this.sortedProbability,
96115
this.numberOfClasses);
97-
int[][] asdf = evaluationObject.getConfusionMatrixSimple();
98-
evaluationObject.getConfusionMatrixNormalized();
99-
}
100-
public void confusionMatrix() {
101-
if (this.MLAlgorithm == "DistanceClassification") {
102-
System.out.println("nice confusion");
116+
117+
// Printing a basic confusion matrix
118+
if (this.validationModel[0] == true) {
119+
System.out.println("\nConfusion Matrix");
120+
evaluationObject.getConfusionMatrix();
121+
}
122+
// Printing a simplified confusion matrix
123+
if (this.validationModel[1] == true) {
124+
System.out.println("\nSimple Confusion Matrix");
125+
evaluationObject.getConfusionMatrixSimple();
103126
}
104-
else {
105-
System.out.println("There is no algorithm");
127+
// Printing a normalized confusion matrix
128+
if (this.validationModel[2] == true) {
129+
System.out.println("\nNormalized Confusion Matrix");
130+
evaluationObject.getConfusionMatrixNormalized();
106131
}
132+
107133
}
108134

109135

110136
// --- Functions for private calculations --------------------------------------------------------------------------
111137
// --- Function for returning usable index data
112138
private int returnIndex() {
113139
// If there is an index it returns -1 because the usable data of the processed data has one element less
140+
// The index does not belong to the important data
114141
if (this.index == true) { return -1; } else { return 0; }
115142
}
116143

117-
//
144+
// --- Function for checking if all required processes have been completed successful before starting the
145+
// classification algorithms
118146
private boolean checkRequiredProcesses () {
147+
// dataProcessingBool: Process the CSV data (reading)
148+
// dataSubdivisionBool: Dividing the data into training and testing data
119149
if (this.dataProcessingBool == true &&
120150
this.dataSubdivisionBool == true) {
121151
return true;
152+
}else {
153+
if (this.dataProcessingBool == false) {
154+
System.out.println("Error 310 | The data has not been divided into training and testing data!");
155+
}
156+
if (this.dataSubdivisionBool == false) {
157+
System.out.println("Error 311 | The data has not been divided into training and testing data!");
158+
}
159+
return false;
122160
}
123-
if (this.dataProcessingBool == false) {
124-
System.out.println("Error 310 | The data has not been divided into training and testing data!");
125-
}
126-
if (this.dataSubdivisionBool == false) {
127-
System.out.println("Error 311 | The data has not been divided into training and testing data!");
128-
}
129-
return false;
130161
}
131162

132163

@@ -149,9 +180,10 @@ public void distanceClassification (){
149180
classificationObject.setTestData(this.testDataPredictors, this.testDataResults, this.rowCount, this.columnCount - this.numberOfTrainingData);
150181
classificationObject.testModel();
151182

183+
// Return the number of found classes
152184
this.numberOfClasses = classificationObject.getNumberOfClasses();
153185

154-
// Get the test data
186+
// Get the predicted text data
155187
this.predictedTestData = classificationObject.getPredictedTestData();
156188
this.sortedProbability = classificationObject.getSortedProbability();
157189
}

0 commit comments

Comments
 (0)