Skip to content

Commit

Permalink
distributed FDs
Browse files Browse the repository at this point in the history
  • Loading branch information
kraftp committed May 24, 2018
1 parent 9dc864d commit a4fb4d9
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,8 @@ private DistributedBatchSummarizer getDistributedSummarizer(String outlierColumn
summarizer.setAttributes(attributes);
summarizer.setMinSupport(minSupport);
summarizer.setMinRatioMetric(minRiskRatio);
summarizer.setFDUsage(useFDs);
summarizer.setFDValues(functionalDependencies);
summarizer.setNumPartitions(distributedNumPartitions);
return summarizer;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ public abstract class DistributedBatchSummarizer implements Operator<Distributed
protected List<String> attributes = new ArrayList<>();
protected int numThreads = Runtime.getRuntime().availableProcessors();
protected String ratioMetric = "global_ratio";
protected boolean useFDs = false;
protected int[] functionalDependencies;

/**
* Adjust this to tune the significance (e.g. number of rows affected) of the results returned.
Expand Down Expand Up @@ -64,4 +66,15 @@ public DistributedBatchSummarizer setRatioMetric(final String ratioMetric) {
this.ratioMetric = ratioMetric;
return this;
}

public DistributedBatchSummarizer setFDUsage(final boolean useFDs) {
this.useFDs = useFDs;
return this;
}

public DistributedBatchSummarizer setFDValues(final int[] functionalDependencies) {
this.functionalDependencies = functionalDependencies;
return this;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,9 @@ private void processInternal(JavaPairRDD<String[], double[]> partitionedDataFram
encoder.getOutlierList(),
encoder.getColCardinalities(),
qualityMetricList,
thresholds
thresholds,
useFDs,
functionalDependencies
);
log.info("Number of results: {}", aplResults.size());

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ public static List<APLExplanationResult> explain(
ArrayList<Integer>[] outlierList,
int[] colCardinalities,
List<QualityMetric> argQualityMetrics,
List<Double> argThresholds
List<Double> argThresholds,
boolean useFDs,
int[] functionalDependencies
) {

Logger log = LoggerFactory.getLogger("APLSummarizerDistributed");
Expand Down Expand Up @@ -189,6 +191,10 @@ public static List<APLExplanationResult> explain(
for (int colNumOne = 0; colNumOne < numColumns; colNumOne++) {
int[] curColumnOneAttributes = attributesForThread[colNumOne];
for (int colNumTwo = colNumOne + 1; colNumTwo < numColumns; colNumTwo++) {
//if FDs are enabled, and these two attribute cols are FDs, skip
if (useFDs && ((functionalDependencies[colNumOne] & (1<<colNumTwo)) == (1<<colNumTwo))) {
continue;
}
int[] curColumnTwoAttributes = attributesForThread[colNumTwo];
if (colCardinalities[colNumOne] < AttributeEncoder.cardinalityThreshold &&
colCardinalities[colNumOne] < AttributeEncoder.cardinalityThreshold &&
Expand All @@ -210,8 +216,17 @@ public static List<APLExplanationResult> explain(
for (int colNumOne = 0; colNumOne < numColumns; colNumOne++) {
int[] curColumnOneAttributes = attributesForThread[colNumOne % numColumns];
for (int colNumTwo = colNumOne + 1; colNumTwo < numColumns; colNumTwo++) {
//if FD on and attributes 1 and 2 are FDs, skip
if (useFDs && ((functionalDependencies[colNumOne] & (1<<colNumTwo)) == (1<<colNumTwo))) {
continue;
}
int[] curColumnTwoAttributes = attributesForThread[colNumTwo % numColumns];
for (int colNumThree = colNumTwo + 1; colNumThree < numColumns; colNumThree++) {
//if FD on and attribute 3 is FD w/ 1 or 2, skip
if (useFDs && (((functionalDependencies[colNumOne] & (1 << colNumThree)) == (1 << colNumThree))
|| ((functionalDependencies[colNumTwo] & (1 << colNumThree)) == (1 << colNumThree)))) {
continue;
}
int[] curColumnThreeAttributes = attributesForThread[colNumThree % numColumns];
if (colCardinalities[colNumOne] < AttributeEncoder.cardinalityThreshold &&
colCardinalities[colNumOne] < AttributeEncoder.cardinalityThreshold &&
Expand Down

0 comments on commit a4fb4d9

Please sign in to comment.