Skip to content

Commit 1ed81a4

Browse files
author
mkalisch
committed
Merge sort algorithm added. FIX: error when parameters in ATTRIBUTE are separated by tab
1 parent 586c702 commit 1ed81a4

File tree

4 files changed

+138
-1
lines changed

4 files changed

+138
-1
lines changed

adaa.analytics.rules/src/main/java/adaa/analytics/rules/data/DataTable.java

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ public boolean setRole(String columnName, String role) {
204204
}
205205

206206
public void sortBy(String columnName, EColumnSortDirections sortDir) {
207-
customSort(columnName, sortDir, ESortAlgorithm.QuickSort);
207+
customSort(columnName, sortDir, ESortAlgorithm.MergeSort);
208208
}
209209

210210
public boolean isDoubleColumnSorted(String columnName, EColumnSortDirections sortDir) {
@@ -242,6 +242,9 @@ else if (table.column(getColumnIndex(columnName)) instanceof DoubleColumn) {
242242
else if(sortAlgorithm == ESortAlgorithm.QuickSort) {
243243
sortDoubleColumnQuick(dCol, sortDir, 0, table.rowCount() - 1);
244244
}
245+
else if(sortAlgorithm == ESortAlgorithm.MergeSort) {
246+
sortDoubleColumnMerge(dCol, sortDir);
247+
}
245248
} else {
246249
throw new IllegalArgumentException("Unsupported column type for custom sorting");
247250
}
@@ -264,6 +267,70 @@ private void sortDoubleColumnBubble(DoubleColumn column, EColumnSortDirections s
264267
} while (swapped);
265268
}
266269

270+
private void sortDoubleColumnMerge(DoubleColumn column, EColumnSortDirections sortDir) {
271+
mergeSort(column.name(), 0, table.rowCount()-1, sortDir);
272+
}
273+
274+
private void mergeSort(String columnName, int low, int high, EColumnSortDirections sortDir) {
275+
if (low < high) {
276+
int mid = (low + high) / 2;
277+
mergeSort(columnName, low, mid, sortDir);
278+
mergeSort(columnName, mid + 1, high, sortDir);
279+
merge(columnName, low, mid, high, sortDir);
280+
}
281+
}
282+
283+
private void merge(String columnName, int low, int mid, int high, EColumnSortDirections sortDir) {
284+
int n1 = mid - low + 1;
285+
int n2 = high - mid;
286+
287+
Table leftTable = table.emptyCopy();
288+
Table rightTable = table.emptyCopy();
289+
290+
for (int i = 0; i < n1; i++) {
291+
leftTable.addRow(table.row(low + i));
292+
}
293+
294+
for (int j = 0; j < n2; j++) {
295+
rightTable.addRow(table.row(mid + 1 + j));
296+
}
297+
298+
int i = 0, j = 0;
299+
int k = low;
300+
while (i < n1 && j < n2) {
301+
double leftValue = leftTable.doubleColumn(columnName).get(i);
302+
double rightValue = rightTable.doubleColumn(columnName).get(j);
303+
boolean comparison = sortDir == EColumnSortDirections.INCREASING ? leftValue <= rightValue : leftValue >= rightValue;
304+
305+
if (comparison) {
306+
copyRow(table, leftTable, k, i);
307+
i++;
308+
} else {
309+
copyRow(table, rightTable, k, j);
310+
j++;
311+
}
312+
k++;
313+
}
314+
315+
while (i < n1) {
316+
copyRow(table, leftTable, k, i);
317+
i++;
318+
k++;
319+
}
320+
321+
while (j < n2) {
322+
copyRow(table, rightTable, k, j);
323+
j++;
324+
k++;
325+
}
326+
}
327+
328+
private void copyRow(Table destinationTable, Table sourceTable, int destIndex, int sourceIndex) {
329+
for (Column<?> column : sourceTable.columns()) {
330+
((DoubleColumn)destinationTable.column(column.name())).set(destIndex, ((DoubleColumn)column).get(sourceIndex));
331+
}
332+
}
333+
267334
private void sortDoubleColumnQuick(DoubleColumn column, EColumnSortDirections sortDir, int low, int high) {
268335
if (low < high) {
269336
int pi = partitionDouble(column, sortDir, low, high);

adaa.analytics.rules/src/main/java/adaa/analytics/rules/data/metadata/ESortAlgorithm.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,6 @@
33
public enum ESortAlgorithm {
44
BubbleSort,
55
QuickSort,
6+
MergeSort,
67
NativeTableSaw
78
}

adaa.analytics.rules/src/main/java/ioutils/AttributeInfo.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ public class AttributeInfo {
1717

1818
public AttributeInfo(String arffAttributeLine) {
1919

20+
arffAttributeLine = arffAttributeLine.replace('\t', ' ');
21+
2022
Pattern pattern = Pattern.compile("\\{(.*?)\\}");
2123
Matcher matcher = pattern.matcher(arffAttributeLine);
2224

adaa.analytics.rules/test/adaa/analytics/rules/data/DataTableTest.java

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,73 @@ public void QuickSortTest() {
344344
Assert.assertEquals(1.0, dt.getExample(9).getValue("c2"), 0.0000001);
345345
}
346346

347+
@Test
348+
public void MergeSortTest() {
349+
350+
Object [][] data = new Object[][]{
351+
{1.0, "val1"},
352+
{4.0, "val2"},
353+
{10.0, "val1"},
354+
{3.0, "val2"},
355+
{7.0, "val1"},
356+
{11.0, "val2"},
357+
{2.0, "val1"},
358+
{5.0, "val2"},
359+
{4.0, "val1"},
360+
{3.0, "val2"}
361+
};
362+
363+
String [] colNames = new String[]{ "c1", "c2" };
364+
365+
DataTable dt = new DataTable(data, colNames, "c2", "c1", null);
366+
367+
Assert.assertFalse(dt.isDoubleColumnSorted("c2", EColumnSortDirections.INCREASING));
368+
369+
dt.customSort("c2", EColumnSortDirections.INCREASING, ESortAlgorithm.MergeSort);
370+
dt.customSort("c2", EColumnSortDirections.INCREASING, ESortAlgorithm.MergeSort);
371+
372+
Assert.assertTrue(dt.isDoubleColumnSorted("c2", EColumnSortDirections.INCREASING));
373+
Assert.assertEquals(11.0, dt.getExample(2).getValue("c1"), 0.0000001);
374+
Assert.assertEquals(0.0, dt.getExample(2).getValue("c2"), 0.0000001);
375+
Assert.assertEquals(1.0, dt.getExample(5).getValue("c1"), 0.0000001);
376+
Assert.assertEquals(1.0, dt.getExample(5).getValue("c2"), 0.0000001);
377+
Assert.assertEquals(4.0, dt.getExample(9).getValue("c1"), 0.0000001);
378+
Assert.assertEquals(1.0, dt.getExample(9).getValue("c2"), 0.0000001);
379+
Assert.assertFalse(dt.isDoubleColumnSorted("c1", EColumnSortDirections.INCREASING));
380+
381+
dt.sortBy("c1", EColumnSortDirections.INCREASING);
382+
383+
Assert.assertTrue(dt.isDoubleColumnSorted("c1", EColumnSortDirections.INCREASING));
384+
Assert.assertEquals(3.0, dt.getExample(2).getValue("c1"), 0.0000001);
385+
Assert.assertEquals(0.0, dt.getExample(2).getValue("c2"), 0.0000001);
386+
Assert.assertEquals(4.0, dt.getExample(5).getValue("c1"), 0.0000001);
387+
Assert.assertEquals(1.0, dt.getExample(5).getValue("c2"), 0.0000001);
388+
Assert.assertEquals(11.0, dt.getExample(9).getValue("c1"), 0.0000001);
389+
Assert.assertEquals(0.0, dt.getExample(9).getValue("c2"), 0.0000001);
390+
Assert.assertFalse(dt.isDoubleColumnSorted("c2", EColumnSortDirections.DECREASING));
391+
392+
dt.sortBy("c2", EColumnSortDirections.DECREASING);
393+
394+
Assert.assertTrue(dt.isDoubleColumnSorted("c2", EColumnSortDirections.DECREASING));
395+
Assert.assertEquals(4.0, dt.getExample(2).getValue("c1"), 0.0000001);
396+
Assert.assertEquals(1.0, dt.getExample(2).getValue("c2"), 0.0000001);
397+
Assert.assertEquals(3.0, dt.getExample(5).getValue("c1"), 0.0000001);
398+
Assert.assertEquals(0.0, dt.getExample(5).getValue("c2"), 0.0000001);
399+
Assert.assertEquals(11.0, dt.getExample(9).getValue("c1"), 0.0000001);
400+
Assert.assertEquals(0.0, dt.getExample(9).getValue("c2"), 0.0000001);
401+
Assert.assertFalse(dt.isDoubleColumnSorted("c1", EColumnSortDirections.DECREASING));
402+
403+
dt.sortBy("c1", EColumnSortDirections.DECREASING);
404+
405+
Assert.assertTrue(dt.isDoubleColumnSorted("c1", EColumnSortDirections.DECREASING));
406+
Assert.assertEquals(7.0, dt.getExample(2).getValue("c1"), 0.0000001);
407+
Assert.assertEquals(1.0, dt.getExample(2).getValue("c2"), 0.0000001);
408+
Assert.assertEquals(4.0, dt.getExample(5).getValue("c1"), 0.0000001);
409+
Assert.assertEquals(0.0, dt.getExample(5).getValue("c2"), 0.0000001);
410+
Assert.assertEquals(1.0, dt.getExample(9).getValue("c1"), 0.0000001);
411+
Assert.assertEquals(1.0, dt.getExample(9).getValue("c2"), 0.0000001);
412+
}
413+
347414
@Test
348415
public void FirstElementAsNullTest() {
349416

0 commit comments

Comments
 (0)