Skip to content

Commit 24243cd

Browse files
committed
Fixed multiple bugs in the expert mode:
- crash when adjusting conditions with nominal attributes (issue 126). - `mincov` parameter not set to the number of uncovered examples in the expert classification rules - complementary conditions not supported in the expert knowledge
1 parent b4472d9 commit 24243cd

16 files changed

+166
-82
lines changed

adaa.analytics.rules/build.gradle

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ plugins {
55
id 'java'
66
}
77

8-
version = '2.1.22'
8+
version = '2.1.23'
99
java {
1010
sourceCompatibility = JavaVersion.VERSION_1_8
1111
}

adaa.analytics.rules/src/main/java/adaa/analytics/rules/logic/induction/AbstractFinder.java

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import adaa.analytics.rules.logic.representation.condition.ElementaryCondition;
3131
import adaa.analytics.rules.logic.representation.rule.Rule;
3232
import adaa.analytics.rules.utils.Logger;
33+
import adaa.analytics.rules.utils.Pair;
3334

3435
/**
3536
* Abstract base class for growing and pruning procedures for all types of rules (classification, regression, survival).
@@ -350,7 +351,7 @@ public void postprocess(
350351

351352
/**
352353
* Abstract method representing all procedures which induce an elementary condition.
353-
*
354+
*
354355
* @param rule Current rule.
355356
* @param trainSet Training set.
356357
* @param uncoveredByRuleset Set of examples uncovered by the model.
@@ -360,12 +361,12 @@ public void postprocess(
360361
* @return Induced elementary condition.
361362
*/
362363
protected abstract ElementaryCondition induceCondition(
363-
final Rule rule,
364-
final IExampleSet trainSet,
365-
final Set<Integer> uncoveredByRuleset,
366-
final Set<Integer> coveredByRule,
367-
final Set<IAttribute> allowedAttributes,
368-
Object... extraParams);
364+
final Rule rule,
365+
final IExampleSet trainSet,
366+
final Set<Integer> uncoveredByRuleset,
367+
final Set<Integer> coveredByRule,
368+
final Set<IAttribute> allowedAttributes,
369+
Pair<String, Object>... extraParams);
369370

370371
/**
371372
* Maps a set of attribute names to a set of attributes.

adaa.analytics.rules/src/main/java/adaa/analytics/rules/logic/induction/ApproximateClassificationFinder.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import adaa.analytics.rules.logic.representation.valueset.SingletonSet;
1212
import adaa.analytics.rules.logic.representation.valueset.SingletonSetComplement;
1313
import adaa.analytics.rules.utils.Logger;
14+
import adaa.analytics.rules.utils.Pair;
1415
import tech.tablesaw.api.DoubleColumn;
1516

1617
import java.util.*;
@@ -289,7 +290,7 @@ protected ElementaryCondition induceCondition(
289290
Set<Integer> uncoveredPositives,
290291
Set<Integer> coveredByRule,
291292
Set<IAttribute> allowedAttributes,
292-
Object... extraParams) {
293+
Pair<String,Object>... extraParams) {
293294

294295
if (allowedAttributes.size() == 0) {
295296
return null;

adaa.analytics.rules/src/main/java/adaa/analytics/rules/logic/induction/ClassificationExpertFinder.java

Lines changed: 32 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import adaa.analytics.rules.logic.representation.valueset.SingletonSet;
2828
import adaa.analytics.rules.logic.representation.valueset.Universum;
2929
import adaa.analytics.rules.utils.Logger;
30+
import adaa.analytics.rules.utils.Pair;
3031
import org.apache.commons.lang3.SerializationUtils;
3132
import tech.tablesaw.api.DoubleColumn;
3233

@@ -85,17 +86,20 @@ public void adjust(
8586
CompoundCondition expertPremise = rule.getPremise();
8687
rule.setPremise(new CompoundCondition());
8788

88-
HashSet<Integer> covered = new HashSet<Integer>();
89+
//HashSet<Integer> covered = new HashSet<Integer>();
90+
Set<Integer> covered = new IntegerBitSet(dataset.size());
8991

9092
// bit vectors for faster operations on coverings
9193

9294
covered.addAll(rule.getCoveredPositives());
9395
covered.addAll(rule.getCoveredNegatives());
94-
96+
9597
for (ConditionBase cnd : expertPremise.getSubconditions()) {
9698
ElementaryCondition ec = (ElementaryCondition)cnd;
9799
ElementaryCondition newCondition;
98-
100+
101+
boolean alreadyAdded = false;
102+
99103
if (ec.isAdjustable()) {
100104
// determine attribute
101105
Set<IAttribute> attr = new TreeSet<IAttribute>(new AttributeComparator());
@@ -113,13 +117,24 @@ public void adjust(
113117
ec.evaluate(dataset, mustBeCovered);
114118
mustBeCovered.retainAll(rule.getCoveredPositives());
115119
}
116-
117-
newCondition = induceCondition(
118-
rule, dataset, mustBeCovered, covered, attr);
119-
newCondition.setType(Type.FORCED);
120-
tryAddCondition(rule, null, newCondition, dataset, covered, uncoveredPositives);
121-
122-
} else {
120+
121+
if (mustBeCovered.size() > 0) {
122+
newCondition = induceCondition(
123+
rule, dataset, mustBeCovered, covered, attr, new Pair<String, Object>("disable_precision_control", (Object) true));
124+
125+
// use original condition if adjustment fails
126+
if (newCondition != null) {
127+
newCondition.setType(Type.FORCED);
128+
tryAddCondition(rule, null, newCondition, dataset, covered, mustBeCovered);
129+
alreadyAdded = true;
130+
} else {
131+
Logger.log("Unable to adjust condition: " + ec, Level.FINE);
132+
}
133+
} else {
134+
Logger.log("Adjustable condition does not cover any positives: " + ec, Level.FINE);
135+
}
136+
}
137+
if (!alreadyAdded) {
123138
// add condition as it is without verification
124139
IntegerBitSet conditionCovered = new IntegerBitSet(dataset.size());
125140
newCondition = SerializationUtils.clone(ec);
@@ -305,7 +320,9 @@ public int grow(
305320
int preferredCounter = knowledge.getPreferredAttributesPerRule();
306321

307322
do {
308-
ElementaryCondition condition = induceCondition(rule, dataset, uncoveredPositives, covered, localAllowed, rule.getCoveredPositives());
323+
ElementaryCondition condition = induceCondition(rule, dataset, uncoveredPositives, covered, localAllowed,
324+
new Pair<String,Object>("covered_positives", rule.getCoveredPositives()));
325+
309326
carryOn = tryAddCondition(rule, null, condition, dataset, covered,uncoveredPositives);
310327
// fixme: we are not sure if condition was added
311328
if (carryOn) {
@@ -340,7 +357,8 @@ public int grow(
340357

341358
do {
342359
ElementaryCondition condition = induceCondition(
343-
rule, dataset, uncoveredPositives, covered, allowedAttributes, rule.getCoveredPositives());
360+
rule, dataset, uncoveredPositives, covered, allowedAttributes,
361+
new Pair<String,Object>("covered_positives", rule.getCoveredPositives()));
344362

345363
if (params.getSelectBestCandidate()) {
346364
carryOn = tryAddCondition(currentRule, rule, condition, dataset, covered, uncoveredPositives);
@@ -367,9 +385,8 @@ public int grow(
367385
*/
368386
@Override
369387
protected boolean checkCandidate(ElementaryCondition cnd, double classId, double p, double n, double new_p, double P,double uncoveredSize, int ruleOrderNum) {
370-
return new_p >= params.getAbsoluteMinimumCovered(P)
371-
&& p >= params.getAbsoluteMinimumCoveredAll(P)
372-
&& !knowledge.isForbidden(cnd.getAttribute(), cnd.getValueSet(), (int)classId);
388+
boolean ok = super.checkCandidate(cnd, classId, p, n, new_p, P, uncoveredSize, ruleOrderNum);
389+
return ok && !knowledge.isForbidden(cnd.getAttribute(), cnd.getValueSet(), (int)classId);
373390

374391
}
375392

adaa.analytics.rules/src/main/java/adaa/analytics/rules/logic/induction/ClassificationExpertSnC.java

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -229,9 +229,18 @@ public ClassificationRuleSet run(IExampleSet dataset)
229229
uncoveredPositives.removeAll(rule.getCoveredPositives());
230230
uncovered.removeAll(rule.getCoveredPositives());
231231
uncovered.removeAll(rule.getCoveredNegatives());
232-
232+
233+
double uncovered_p = 0;
234+
if (dataset.getAttributes().getWeight() == null) {
235+
uncovered_p = uncoveredPositives.size();
236+
} else {
237+
for (int id : uncoveredPositives) {
238+
uncovered_p += weightDataColumnDoubleAdapter.getDouble(id);
239+
}
240+
}
241+
233242
// stop if no positive examples remaining
234-
if (uncoveredPositives.size() == 0) {
243+
if (uncovered_p <= params.getMaximumUncoveredFraction() * weighted_P) {
235244
carryOn = false;
236245
}
237246

adaa.analytics.rules/src/main/java/adaa/analytics/rules/logic/induction/ClassificationFinder.java

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import adaa.analytics.rules.logic.representation.valueset.SingletonSet;
3333
import adaa.analytics.rules.logic.representation.valueset.SingletonSetComplement;
3434
import adaa.analytics.rules.utils.Logger;
35+
import adaa.analytics.rules.utils.Pair;
3536
import tech.tablesaw.api.DoubleColumn;
3637

3738

@@ -465,7 +466,7 @@ public void postprocess(
465466

466467
/**
467468
* Induces an elementary condition.
468-
*
469+
*
469470
* @param rule Current rule.
470471
* @param trainSet Training set.
471472
* @param uncoveredPositives Set of positive examples uncovered by the model.
@@ -476,12 +477,12 @@ public void postprocess(
476477
*/
477478
@Override
478479
protected ElementaryCondition induceCondition(
479-
Rule rule,
480-
IExampleSet trainSet,
481-
Set<Integer> uncoveredPositives,
482-
Set<Integer> coveredByRule,
483-
Set<IAttribute> allowedAttributes,
484-
Object... extraParams) {
480+
Rule rule,
481+
IExampleSet trainSet,
482+
Set<Integer> uncoveredPositives,
483+
Set<Integer> coveredByRule,
484+
Set<IAttribute> allowedAttributes,
485+
Pair<String, Object>... extraParams) {
485486

486487
if (allowedAttributes.size() == 0) {
487488
return null;
@@ -495,7 +496,17 @@ protected ElementaryCondition induceCondition(
495496
double P = rule.getWeighted_P();
496497
double N = rule.getWeighted_N();
497498

498-
double apriori_prec = params.isControlAprioriPrecision()
499+
500+
boolean controlPrecision = params.isControlAprioriPrecision();
501+
// override precision verification
502+
for (Pair<String, Object> param : extraParams) {
503+
if (param.getFirst().equals("disable_precision_control") && (Boolean)param.getSecond()) {
504+
controlPrecision = false;
505+
break;
506+
}
507+
}
508+
509+
double apriori_prec = controlPrecision
499510
? P / (P + N)
500511
: Double.MIN_VALUE;
501512

adaa.analytics.rules/src/main/java/adaa/analytics/rules/logic/induction/ClassificationSnC.java

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -165,9 +165,12 @@ public RuleSetBase run(IExampleSet dataset) {
165165
uncovered.removeAll(rule.getCoveredNegatives());
166166

167167
uncovered_p = 0;
168-
169-
for (int id : uncoveredPositives) {
170-
uncovered_p += dataset.getAttributes().getWeight() == null ? 1.0 : weightDataColumnDoubleAdapter.getDouble(id);
168+
if (dataset.getAttributes().getWeight() == null) {
169+
uncovered_p = uncoveredPositives.size();
170+
} else {
171+
for (int id : uncoveredPositives) {
172+
uncovered_p += weightDataColumnDoubleAdapter.getDouble(id);
173+
}
171174
}
172175

173176
Logger.log("Uncovered positives" + uncovered_p + "\n", Level.FINER);

adaa.analytics.rules/src/main/java/adaa/analytics/rules/logic/induction/RegressionExpertFinder.java

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ public void adjust(
6262

6363
for (ConditionBase cnd : expertPremise.getSubconditions()) {
6464
ElementaryCondition ec = (ElementaryCondition)cnd;
65+
boolean alreadyAdded = false;
66+
6567
if (ec.isAdjustable()) {
6668

6769
// update covering information - needed for automatic induction
@@ -84,23 +86,26 @@ public void adjust(
8486

8587
} else {
8688
// condition in other form - find best condition using this attribute with non-empty intersection with specified condition
87-
mustBeCovered = new HashSet<Integer>();
88-
for (int i : covered) {
89-
if (ec.evaluate(dataset.getExample(i))) {
90-
mustBeCovered.add(i);
91-
}
92-
}
89+
mustBeCovered = new IntegerBitSet(dataset.size());
90+
ec.evaluate(dataset, mustBeCovered);
91+
mustBeCovered.retainAll(rule.getCoveredPositives());
9392
}
94-
95-
ElementaryCondition newCondition = induceCondition(
96-
rule, dataset, mustBeCovered, covered, attr);
97-
98-
if (newCondition != null) {
99-
newCondition.setType(Type.FORCED);
100-
rule.getPremise().addSubcondition(newCondition);
93+
94+
if (!mustBeCovered.isEmpty()) {
95+
ElementaryCondition newCondition = induceCondition(
96+
rule, dataset, mustBeCovered, covered, attr);
97+
98+
if (newCondition != null) {
99+
newCondition.setType(Type.FORCED);
100+
rule.getPremise().addSubcondition(newCondition);
101+
alreadyAdded = true;
102+
}
103+
} else {
104+
Logger.log("Adjustable condition does not cover any positives: " + ec, Level.FINE);
101105
}
102-
103-
} else {
106+
}
107+
108+
if (!alreadyAdded) {
104109
rule.getPremise().addSubcondition((ElementaryCondition)SerializationUtils.clone(ec));
105110
}
106111
}

adaa.analytics.rules/src/main/java/adaa/analytics/rules/logic/induction/RegressionFinder.java

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
******************************************************************************/
1515
package adaa.analytics.rules.logic.induction;
1616

17-
import adaa.analytics.rules.data.metadata.EColumnSortDirections;
1817
import adaa.analytics.rules.logic.representation.*;
1918

2019
import adaa.analytics.rules.data.IAttribute;
@@ -29,6 +28,7 @@
2928
import adaa.analytics.rules.logic.representation.valueset.SingletonSet;
3029
import adaa.analytics.rules.logic.representation.valueset.SingletonSetComplement;
3130
import adaa.analytics.rules.utils.Logger;
31+
import adaa.analytics.rules.utils.Pair;
3232
import tech.tablesaw.api.DoubleColumn;
3333

3434
import java.security.InvalidParameterException;
@@ -55,7 +55,7 @@ protected ElementaryCondition induceCondition_mean(
5555
final Set<Integer> uncovered,
5656
final Set<Integer> covered,
5757
final Set<IAttribute> allowedAttributes,
58-
Object... extraParams) {
58+
Pair<String,Object>... extraParams) {
5959

6060
RegressionExampleSet set = (dataset instanceof RegressionExampleSet) ? (RegressionExampleSet)dataset : null;
6161
if (set == null) {
@@ -300,12 +300,12 @@ class Stats{
300300

301301
@Override
302302
protected ElementaryCondition induceCondition(
303-
final Rule rule,
304-
final IExampleSet dataset,
305-
final Set<Integer> uncovered,
306-
final Set<Integer> covered,
307-
final Set<IAttribute> allowedAttributes,
308-
Object... extraParams) {
303+
final Rule rule,
304+
final IExampleSet dataset,
305+
final Set<Integer> uncovered,
306+
final Set<Integer> covered,
307+
final Set<IAttribute> allowedAttributes,
308+
Pair<String, Object>... extraParams) {
309309

310310
if (allowedAttributes.size() == 0) {
311311
return null;

adaa.analytics.rules/src/main/java/adaa/analytics/rules/logic/representation/RuleParser.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -173,9 +173,10 @@ public static ElementaryCondition parseElementaryCondition(String s, IAttributes
173173
if (Pattern.compile("Any").matcher(valueString).find()) {
174174
valueSet = new Universum();
175175
} else if (attributeMeta.isNominal()) {
176-
regex = Pattern.compile("\\{(?<discrete>.+)\\}");
176+
regex = Pattern.compile("(?<negation>(!?))\\{(?<discrete>.+)\\}");
177177
matcher = regex.matcher(valueString);
178178
if (matcher.find()) {
179+
String negation = matcher.group("negation");
179180
String value = matcher.group("discrete");
180181

181182
if (value.equals("NaN") && isSurvival) {
@@ -188,7 +189,11 @@ public static ElementaryCondition parseElementaryCondition(String s, IAttributes
188189
Logger.log("Invalid value <" + value + "> of the nominal attribute <" + attribute + ">" + "\n", Level.WARNING);
189190
return null;
190191
}
191-
valueSet = new SingletonSet(v, mapping);
192+
if (negation.isEmpty()) {
193+
valueSet = new SingletonSet(v, mapping);
194+
} else {
195+
valueSet = new SingletonSetComplement(v, mapping);
196+
}
192197
}
193198

194199
}

0 commit comments

Comments
 (0)