Skip to content

Commit

Permalink
WITH MIN SUPPORT/MIN RATIO syntax for min ratio metric and min support
Browse files Browse the repository at this point in the history
  • Loading branch information
fabuzaid21 committed Jan 10, 2018
1 parent 054be69 commit 53eb13e
Show file tree
Hide file tree
Showing 11 changed files with 226 additions and 19 deletions.
12 changes: 12 additions & 0 deletions sql/src/main/antlr4/edu/stanford/futuredata/macrobase/SqlBase.g4
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ diffQuerySpecification
: SELECT setQuantifier? selectItem (',' selectItem)*
FROM DIFF queryNoWith qualifiedName? (',' queryNoWith qualifiedName?)?
ON columnAliases
(WITH minRatioExpression? minSupportExpression?)?
(COMPARE BY ratioMetricExpression)?
(MAX COMBO maxCombo=INTEGER_VALUE)?
(WHERE where=booleanExpression)?
Expand All @@ -183,13 +184,22 @@ diffQuerySpecification
exportExpression?
FROM DIFF queryNoWith qualifiedName? (',' queryNoWith qualifiedName?)?
ON columnAliases
(WITH minRatioExpression? minSupportExpression?)
(COMPARE BY ratioMetricExpression)?
(MAX COMBO maxCombo=INTEGER_VALUE)?
(WHERE where=booleanExpression)?
(ORDER BY sortItem (',' sortItem)*)?
(LIMIT limit=(INTEGER_VALUE | ALL))?
;

minRatioExpression
: MIN RATIO minRatio=DECIMAL_VALUE
;

minSupportExpression
: MIN SUPPORT minSupport=DECIMAL_VALUE
;

ratioMetricExpression
: identifier '(' aggregateExpression ')'
;
Expand Down Expand Up @@ -655,6 +665,7 @@ PRIVILEGES: 'PRIVILEGES';
PROPERTIES: 'PROPERTIES';
PUBLIC: 'PUBLIC';
RANGE: 'RANGE';
RATIO: 'RATIO';
READ: 'READ';
RECURSIVE: 'RECURSIVE';
RENAME: 'RENAME';
Expand Down Expand Up @@ -683,6 +694,7 @@ STARTING: 'STARTING';
STATS: 'STATS';
SUBSTRING: 'SUBSTRING';
SUM: 'SUM';
// SUPPORT: 'SUPPORT'; TODO: figure out how to include this
SYSTEM: 'SYSTEM';
TABLE: 'TABLE';
TABLES: 'TABLES';
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,13 @@ private DataFrame executeDiffQuerySpec(final DiffQuerySpecification diffQuery)
throws MacrobaseException {
assert (diffQuery.getSecond().isPresent()); // TODO: support single DataFrame queries
// Extract parameters for Diff query
// TODO: too many get's; too many fields are Optional that shouldn't be
final Query first = diffQuery.getFirst().get();
final Query second = diffQuery.getSecond().get();
final List<String> explainCols = diffQuery.getAttributeCols().stream().map(Identifier::toString)
.collect(toImmutableList());
final double minRatioMetric = diffQuery.getMinRatioExpression().get().getMinRatio();
final double minSupport = diffQuery.getMinSupportExpression().get().getMinSupport();
final ExplanationMetric ratioMetric = ExplanationMetric
.getMetricFn(diffQuery.getRatioMetricExpr().get().getFuncName().toString());
final long order = diffQuery.getMaxCombo().get().getValue();
Expand All @@ -109,13 +112,14 @@ private DataFrame executeDiffQuerySpec(final DiffQuerySpecification diffQuery)
}
// execute diff
// TODO: add support for "ON *"
DataFrame df = diff(firstDf, secondDf, explainCols, ratioMetric, (int) order);
DataFrame df = diff(firstDf, secondDf, explainCols, minRatioMetric, minSupport, ratioMetric,
(int) order);

return evaluateSQLClauses(diffQuery, df);
}

private DataFrame diff(final DataFrame outliers, final DataFrame inliers,
final List<String> cols,
final List<String> cols, final double minRatioMetric, final double minSupport,
final ExplanationMetric ratioMetric, final int order) throws MacrobaseException {

final String outlierColName = "outlier_col";
Expand All @@ -127,11 +131,10 @@ private DataFrame diff(final DataFrame outliers, final DataFrame inliers,
DataFrame combined = DataFrame.unionAll(Lists.newArrayList(outliers, inliers));

final APrioriSummarizer summarizer = new APrioriSummarizer();
// TODO: figure out a better way to handle default minRatioMetric and minSupport
summarizer.setRatioMetric(ratioMetric)
.setMaxOrder(order)
.setMinRatioMetric(1.5)
.setMinSupport(0.2)
.setMinRatioMetric(minRatioMetric)
.setMinSupport(minSupport)
.setOutlierColumn(outlierColName)
.setAttributes(cols);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import edu.stanford.futuredata.macrobase.SqlBaseBaseVisitor;
import edu.stanford.futuredata.macrobase.SqlBaseLexer;
import edu.stanford.futuredata.macrobase.SqlBaseParser;
import edu.stanford.futuredata.macrobase.SqlBaseParser.MinRatioExpressionContext;
import edu.stanford.futuredata.macrobase.sql.tree.Aggregate;
import edu.stanford.futuredata.macrobase.sql.tree.AggregateExpression;
import edu.stanford.futuredata.macrobase.sql.tree.AliasedRelation;
Expand Down Expand Up @@ -80,6 +81,8 @@
import edu.stanford.futuredata.macrobase.sql.tree.LikePredicate;
import edu.stanford.futuredata.macrobase.sql.tree.LogicalBinaryExpression;
import edu.stanford.futuredata.macrobase.sql.tree.LongLiteral;
import edu.stanford.futuredata.macrobase.sql.tree.MinRatioExpression;
import edu.stanford.futuredata.macrobase.sql.tree.MinSupportExpression;
import edu.stanford.futuredata.macrobase.sql.tree.NaturalJoin;
import edu.stanford.futuredata.macrobase.sql.tree.Node;
import edu.stanford.futuredata.macrobase.sql.tree.NodeLocation;
Expand Down Expand Up @@ -218,6 +221,8 @@ public Node visitQueryNoWith(SqlBaseParser.QueryNoWithContext context) {
diffQuery.getFirst(),
diffQuery.getSecond(),
diffQuery.getAttributeCols(),
diffQuery.getMinRatioExpression(),
diffQuery.getMinSupportExpression(),
diffQuery.getRatioMetricExpr(),
diffQuery.getMaxCombo(),
diffQuery.getWhere(),
Expand Down Expand Up @@ -276,6 +281,16 @@ public Node visitAggregateExpression(SqlBaseParser.AggregateExpressionContext co
return new AggregateExpression(getLocation(context), (Aggregate) visit(context.aggregate()));
}

@Override
public Node visitMinRatioExpression(SqlBaseParser.MinRatioExpressionContext context) {
return new MinRatioExpression(getLocation(context), new DecimalLiteral(context.minRatio.getText()));
}

@Override
public Node visitMinSupportExpression(SqlBaseParser.MinSupportExpressionContext context) {
return new MinSupportExpression(getLocation(context), new DecimalLiteral(context.minSupport.getText()));
}

@Override
public Node visitRatioMetricExpression(SqlBaseParser.RatioMetricExpressionContext context) {
return new RatioMetricExpression(getLocation(context), (Identifier) visit(context.identifier()),
Expand All @@ -290,14 +305,18 @@ public Node visitDiffQuerySpecification(SqlBaseParser.DiffQuerySpecificationCont

List<Query> subqueries = visit(context.queryNoWith(), Query.class);
check(subqueries.size() > 0 && subqueries.size() <= 2,
"At most two relations required for diff query", context);
"At least one and at most two relations required for diff query", context);

first = Optional.of(subqueries.get(0));
check(true, "At least one relation required for diff query", context);
if (subqueries.size() == 2) {
second = Optional.of(subqueries.get(1));
}

Optional<MinRatioExpression> minRatioExpr = visitIfPresent(context.minRatioExpression(),
MinRatioExpression.class);
Optional<MinSupportExpression> minSupportExpr = visitIfPresent(context.minSupportExpression(),
MinSupportExpression.class);

Optional<RatioMetricExpression> ratioMetricExpr = visitIfPresent(
context.ratioMetricExpression(), RatioMetricExpression.class);
List<Identifier> attributeCols = visit(context.columnAliases().identifier(), Identifier.class);
Expand All @@ -320,6 +339,8 @@ public Node visitDiffQuerySpecification(SqlBaseParser.DiffQuerySpecificationCont
first,
second,
attributeCols,
minRatioExpr,
minSupportExpr,
ratioMetricExpr,
maxCombo,
visitIfPresent(context.where, Expression.class),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -468,4 +468,12 @@ public R visitImportCsv(ImportCsv node, C context) {
public R visitDelimiterExpression(DelimiterExpression node, C context) {
return visitNode(node, context);
}

public R visitMinRatioExpression(MinRatioExpression node, C context) {
return visitNode(node, context);
}

public R visitMinSupportExpression(MinSupportExpression node, C context) {
return visitNode(node, context);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,9 @@
import java.util.Objects;
import java.util.Optional;

public class DecimalLiteral
extends Literal {
public class DecimalLiteral extends Literal {

private final String value;
private final double value;

public DecimalLiteral(String value) {
this(Optional.empty(), value);
Expand All @@ -33,10 +32,11 @@ public DecimalLiteral(NodeLocation location, String value) {

public DecimalLiteral(Optional<NodeLocation> location, String value) {
super(location);
this.value = requireNonNull(value, "value is null");
requireNonNull(value, "value is null");
this.value = Double.parseDouble(value);
}

public String getValue() {
public double getValue() {
return value;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ public class DiffQuerySpecification extends QueryBody {
private final Optional<Query> first;
private final Optional<Query> second;
private final List<Identifier> attributeCols;
private final Optional<MinRatioExpression> minRatioExpr;
private final Optional<MinSupportExpression> minSupportExpr;
private final Optional<RatioMetricExpression> ratioMetricExpr;
private final Optional<LongLiteral> maxCombo;
private final Optional<Expression> where;
Expand All @@ -22,6 +24,10 @@ public class DiffQuerySpecification extends QueryBody {
private final Optional<ExportExpression> exportExpr;

private static final LongLiteral DEFAULT_MAX_COMBO = new LongLiteral("3");
private static final MinRatioExpression DEFAULT_MIN_RATIO_EXPRESSION = new MinRatioExpression(
new DecimalLiteral("1.5"));
private static final MinSupportExpression DEFAULT_MIN_SUPPORT_EXPRESSION = new MinSupportExpression(
new DecimalLiteral("0.2"));
private static final RatioMetricExpression DEFAULT_RATIO_METRIC_EXPRESSION =
new RatioMetricExpression(new Identifier("global_ratio"),
new AggregateExpression(new Aggregate("COUNT")));
Expand All @@ -31,13 +37,16 @@ public DiffQuerySpecification(
Optional<Query> first,
Optional<Query> second,
List<Identifier> attributeCols,
Optional<MinRatioExpression> minRatioExpr,
Optional<MinSupportExpression> minSupportExpr,
Optional<RatioMetricExpression> ratioMetricExpr,
Optional<LongLiteral> maxCombo,
Optional<Expression> where,
Optional<OrderBy> orderBy,
Optional<String> limit,
Optional<ExportExpression> exportExpr) {
this(Optional.empty(), select, first, second, attributeCols, ratioMetricExpr, maxCombo, where,
this(Optional.empty(), select, first, second, attributeCols, minRatioExpr, minSupportExpr,
ratioMetricExpr, maxCombo, where,
orderBy, limit, exportExpr);
}

Expand All @@ -47,13 +56,16 @@ public DiffQuerySpecification(
Optional<Query> first,
Optional<Query> second,
List<Identifier> attributeCols,
Optional<MinRatioExpression> minRatioExpr,
Optional<MinSupportExpression> minSupportExpr,
Optional<RatioMetricExpression> ratioMetricExpr,
Optional<LongLiteral> maxCombo,
Optional<Expression> where,
Optional<OrderBy> orderBy,
Optional<String> limit,
Optional<ExportExpression> exportExpr) {
this(Optional.of(location), select, first, second, attributeCols, ratioMetricExpr, maxCombo,
this(Optional.of(location), select, first, second, attributeCols, minRatioExpr, minSupportExpr,
ratioMetricExpr, maxCombo,
where, orderBy, limit, exportExpr);
}

Expand All @@ -63,6 +75,8 @@ private DiffQuerySpecification(
Optional<Query> first,
Optional<Query> second,
List<Identifier> attributeCols,
Optional<MinRatioExpression> minRatioExpr,
Optional<MinSupportExpression> minSupportExpr,
Optional<RatioMetricExpression> ratioMetricExpr,
Optional<LongLiteral> maxCombo,
Optional<Expression> where,
Expand All @@ -74,6 +88,8 @@ private DiffQuerySpecification(
requireNonNull(first, "first is null");
requireNonNull(second, "second is null");
requireNonNull(attributeCols, "attributeCols is null");
requireNonNull(minRatioExpr, "minRatioExpr is null");
requireNonNull(minSupportExpr, "minSupportExpr is null");
requireNonNull(ratioMetricExpr, "ratioMetricExpr is null");
requireNonNull(maxCombo, "maxCombo is null");
requireNonNull(where, "where is null");
Expand All @@ -85,6 +101,8 @@ private DiffQuerySpecification(
this.first = first;
this.second = second;
this.attributeCols = attributeCols;
this.minRatioExpr = Optional.of(minRatioExpr.orElse(DEFAULT_MIN_RATIO_EXPRESSION));
this.minSupportExpr = Optional.of(minSupportExpr.orElse(DEFAULT_MIN_SUPPORT_EXPRESSION));
this.ratioMetricExpr = Optional.of(ratioMetricExpr.orElse(DEFAULT_RATIO_METRIC_EXPRESSION));
this.maxCombo = Optional.of(maxCombo.orElse(DEFAULT_MAX_COMBO));
this.where = where;
Expand All @@ -109,6 +127,14 @@ public List<Identifier> getAttributeCols() {
return attributeCols;
}

public Optional<MinRatioExpression> getMinRatioExpression() {
return minRatioExpr;
}

public Optional<MinSupportExpression> getMinSupportExpression() {
return minSupportExpr;
}

public Optional<RatioMetricExpression> getRatioMetricExpr() {
return ratioMetricExpr;
}
Expand Down Expand Up @@ -145,10 +171,13 @@ public List<Node> getChildren() {
first.ifPresent(nodes::add);
second.ifPresent(nodes::add);
nodes.addAll(attributeCols);
nodes.add(minRatioExpr.get());
nodes.add(minSupportExpr.get());
nodes.add(ratioMetricExpr.get());
nodes.add(new LongLiteral("" + maxCombo));
where.ifPresent(nodes::add);
orderBy.ifPresent(nodes::add);
limit.ifPresent((str) -> nodes.add(new StringLiteral(str)));
exportExpr.ifPresent(nodes::add);
return nodes.build();
}
Expand All @@ -160,6 +189,8 @@ public String toString() {
.add("first", first)
.add("second", second.orElse(null))
.add("attributeCols", attributeCols)
.add("minRatioExpr", minRatioExpr)
.add("minSupportExpr", minSupportExpr)
.add("ratioMetricExpr", ratioMetricExpr)
.add("maxCombo", maxCombo)
.add("where", where.orElse(null))
Expand All @@ -182,6 +213,8 @@ public boolean equals(Object obj) {
Objects.equals(first, o.first) &&
Objects.equals(second, o.second) &&
Objects.equals(attributeCols, o.attributeCols) &&
Objects.equals(minRatioExpr, o.minRatioExpr) &&
Objects.equals(minSupportExpr, o.minSupportExpr) &&
Objects.equals(ratioMetricExpr, o.ratioMetricExpr) &&
Objects.equals(maxCombo, o.maxCombo) &&
Objects.equals(where, o.where) &&
Expand All @@ -193,7 +226,8 @@ public boolean equals(Object obj) {
@Override
public int hashCode() {
return Objects
.hash(select, first, second, attributeCols, ratioMetricExpr, maxCombo, where, orderBy,
.hash(select, first, second, attributeCols, minRatioExpr, minSupportExpr, ratioMetricExpr,
maxCombo, where, orderBy,
limit, exportExpr);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ private ExportExpression(
requireNonNull(filename, "filename is null");

this.fieldDelimiter = fieldDelimiter.orElse(new DelimiterExpression(",")).toString();
// TODO: change this to throw a parseError
if (this.fieldDelimiter.length() != 1) {
throw new IllegalArgumentException("fieldDelimiter's length not equal to 1");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@
import edu.stanford.futuredata.macrobase.sql.ExpressionFormatter;
import java.util.Optional;

public abstract class Expression
extends Node {
public abstract class Expression extends Node {

protected Expression(Optional<NodeLocation> location) {
super(location);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@
import edu.stanford.futuredata.macrobase.sql.parser.ParsingException;
import java.util.Optional;

public class LongLiteral
extends Literal {
public class LongLiteral extends Literal {

private final long value;

Expand Down
Loading

0 comments on commit 53eb13e

Please sign in to comment.