Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft: Add query field rule #2672

Open
wants to merge 7 commits into
base: integration
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions properties/default.properties
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,7 @@ event.query.filters.enabled=false
event.query.filters.classnames=
event.query.filters.options=
event.query.filters.index.classnames=
event.query.field.rule.class.name=
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Only one? Do we want to be able to have a list of rule classes?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was going to just start with one for now. Pretty easy to update if we need multiple, but haven't seen that use case yet.


# Default set of decorators
event.query.data.decorators=
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -521,6 +521,7 @@ public class ShardQueryConfiguration extends GenericQueryConfiguration implement
* table in order to NOT be considered a field index hole. The value must be between 0.0-1.0, where 1.0 is equivalent to 100%.
*/
private double fieldIndexHoleMinThreshold = 1.0d;
private String fieldRuleClassName;

/**
* The set of date types that, if the query's end date is the current date, will NOT result in any date range adjustments or the addition of a
Expand Down Expand Up @@ -635,6 +636,7 @@ public void copyFrom(ShardQueryConfiguration other) {
this.setUseFilters(other.getUseFilters());
this.setFilterClassNames(null == other.getFilterClassNames() ? null : Lists.newArrayList(other.getFilterClassNames()));
this.setIndexFilteringClassNames(null == other.getIndexFilteringClassNames() ? null : Lists.newArrayList(other.getIndexFilteringClassNames()));
this.setFieldRuleClassName(null == other.getIndexFilteringClassNames() ? null : other.getFieldRuleClassName());
this.setNonEventKeyPrefixes(null == other.getNonEventKeyPrefixes() ? null : Sets.newHashSet(other.getNonEventKeyPrefixes()));
this.setUnevaluatedFields(null == other.getUnevaluatedFields() ? null : Sets.newHashSet(other.getUnevaluatedFields()));
this.setDatatypeFilter(null == other.getDatatypeFilter() ? null
Expand Down Expand Up @@ -1280,6 +1282,14 @@ public void setFilterClassNames(List<String> filterClassNames) {
this.filterClassNames = new ArrayList<>((filterClassNames != null ? filterClassNames : Collections.EMPTY_LIST));
}

public String getFieldRuleClassName() {
return fieldRuleClassName;
}

public void setFieldRuleClassName(String fieldRuleClassName) {
this.fieldRuleClassName = (fieldRuleClassName != null && !fieldRuleClassName.isEmpty()) ? fieldRuleClassName : null;
}

/**
* Gets any predicate-based filters to apply when iterating through the field index. These filters will be "anded" with the default data type filter, if
* any, used to construct the IndexIterator, particularly via the TLDQueryIterator.
Expand Down Expand Up @@ -2973,6 +2983,7 @@ public boolean equals(Object o) {
Objects.equals(getEnricherClassNames(), that.getEnricherClassNames()) &&
Objects.equals(getUseFilters(), that.getUseFilters()) &&
Objects.equals(getFilterClassNames(), that.getFilterClassNames()) &&
Objects.equals(getFieldRuleClassName(), that.getFieldRuleClassName()) &&
Objects.equals(getIndexFilteringClassNames(), that.getIndexFilteringClassNames()) &&
Objects.equals(getNonEventKeyPrefixes(), that.getNonEventKeyPrefixes()) &&
Objects.equals(getUnevaluatedFields(), that.getUnevaluatedFields()) &&
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package datawave.query.jexl.visitors;

import java.util.HashSet;

import org.apache.commons.jexl3.parser.ASTJexlScript;

import datawave.query.model.QueryModel;
import datawave.query.planner.QueryPlanningStage;
import datawave.query.util.MetadataHelper;

public class QueryFieldsRuleVisitor extends BaseVisitor {
private final MetadataHelper helper;
private final QueryModel model;
private final QueryPlanningStage stage;

public QueryFieldsRuleVisitor(MetadataHelper helper, QueryModel model, QueryPlanningStage stage) {
this.helper = helper;
this.model = model;
this.stage = stage;
}

public static ASTJexlScript applyRules(ASTJexlScript script, MetadataHelper helper, QueryModel model, QueryPlanningStage stage) {
QueryFieldsRuleVisitor ruleVisitor = new QueryFieldsRuleVisitor(helper, model, stage);
return (ASTJexlScript) script.jjtAccept(ruleVisitor, new HashSet<>());
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,8 @@
import datawave.query.planner.comparator.GeoWaveQueryPlanComparator;
import datawave.query.planner.pushdown.PushDownVisitor;
import datawave.query.planner.pushdown.rules.PushDownRule;
import datawave.query.planner.rules.FieldTransformRule;
import datawave.query.planner.rules.FieldTransformRuleVisitor;
import datawave.query.planner.rules.NodeTransformRule;
import datawave.query.planner.rules.NodeTransformVisitor;
import datawave.query.postprocessing.tf.Function;
Expand Down Expand Up @@ -988,6 +990,12 @@ protected ASTJexlScript processTree(final ASTJexlScript originalQueryTree, Shard
config.setQueryTree(timedExpandAnyFieldRegexNodes(timers, config.getQueryTree(), config, metadataHelper, scannerFactory, settings.getQuery()));
}

if (null != config.getFieldRuleClassName()) {
FieldTransformRule rule = new FieldTransformRule();
rule.setupRules(config);
config.setQueryTree(FieldTransformRuleVisitor.transform(config.getQueryTree(), Collections.singletonList(rule), config, metadataHelper));
}

if (reduceQuery) {
config.setQueryTree(timedReduce(timers, "Reduce Query After ANYFIELD Expansions", config.getQueryTree()));
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package datawave.query.planner;

public class QueryPlanningStage {
public enum PLAN_STAGE {
PRE_MODEL_EXPANSION, POST_MODEL_EXPANSION, POST_INDEX_EXPANSION
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package datawave.query.planner.rules;

import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;

import org.apache.commons.jexl3.parser.JexlNode;

import datawave.core.query.configuration.GenericQueryConfiguration;
import datawave.query.util.MetadataHelper;

public abstract class FieldRule {
Set<String> pruneFields = new HashSet<>();
Map<String,Set<String>> pruneFVPairs = new HashMap<>();

public FieldRule(GenericQueryConfiguration config) {
parseRules(config);
}

abstract void parseRules(GenericQueryConfiguration config);

public abstract boolean shouldPrune(JexlNode node, MetadataHelper helper);

public abstract boolean shouldModify(JexlNode node, MetadataHelper helper);

public abstract JexlNode modify(JexlNode node, MetadataHelper helper);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package datawave.query.planner.rules;

import java.lang.reflect.InvocationTargetException;

import org.apache.commons.jexl3.parser.ASTFalseNode;
import org.apache.commons.jexl3.parser.JexlNode;
import org.apache.commons.jexl3.parser.ParserTreeConstants;

import datawave.core.query.configuration.GenericQueryConfiguration;
import datawave.query.config.ShardQueryConfiguration;
import datawave.query.util.MetadataHelper;

public class FieldTransformRule implements NodeTransformRule {
FieldRule rule;
JexlNode falseNode = new ASTFalseNode(ParserTreeConstants.JJTFALSENODE);

@Override
public JexlNode apply(JexlNode node, ShardQueryConfiguration config, MetadataHelper helper) {
if (rule.shouldPrune(node, helper)) {
return falseNode;
}
if (rule.shouldModify(node, helper)) {
node = rule.modify(node, helper);
}
return node;
}

public void setupRules(ShardQueryConfiguration config) {
try {
Class<? extends FieldRule> ruleClass = Class.forName(config.getFieldRuleClassName()).asSubclass(FieldRule.class);
rule = ruleClass.getDeclaredConstructor(GenericQueryConfiguration.class).newInstance(config);
} catch (InstantiationException | IllegalAccessException | InvocationTargetException | NoSuchMethodException | ClassNotFoundException e) {
throw new RuntimeException("Unable to load pruning rules for " + config.getFieldRuleClassName());
}

}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
package datawave.query.planner.rules;

import static com.google.common.collect.Lists.newArrayList;
import static org.apache.commons.jexl3.parser.JexlNodes.newInstanceOfType;
import static org.apache.commons.jexl3.parser.JexlNodes.setChildren;

import java.util.ArrayList;
import java.util.List;

import org.apache.commons.jexl3.parser.ASTAndNode;
import org.apache.commons.jexl3.parser.ASTEQNode;
import org.apache.commons.jexl3.parser.ASTERNode;
import org.apache.commons.jexl3.parser.ASTFunctionNode;
import org.apache.commons.jexl3.parser.ASTGENode;
import org.apache.commons.jexl3.parser.ASTGTNode;
import org.apache.commons.jexl3.parser.ASTJexlScript;
import org.apache.commons.jexl3.parser.ASTLENode;
import org.apache.commons.jexl3.parser.ASTLTNode;
import org.apache.commons.jexl3.parser.ASTNENode;
import org.apache.commons.jexl3.parser.ASTNRNode;
import org.apache.commons.jexl3.parser.ASTNotNode;
import org.apache.commons.jexl3.parser.ASTOrNode;
import org.apache.commons.jexl3.parser.ASTReferenceExpression;
import org.apache.commons.jexl3.parser.JexlNode;

import datawave.query.config.ShardQueryConfiguration;
import datawave.query.jexl.nodes.QueryPropertyMarker;
import datawave.query.jexl.visitors.RebuildingVisitor;
import datawave.query.util.MetadataHelper;

public class FieldTransformRuleVisitor extends NodeTransformVisitor {

public FieldTransformRuleVisitor(ShardQueryConfiguration config, MetadataHelper helper, List<NodeTransformRule> rules) {
super(config, helper, rules);
}

public static ASTJexlScript transform(ASTJexlScript tree, List<NodeTransformRule> rules, ShardQueryConfiguration config, MetadataHelper helper) {
FieldTransformRuleVisitor visitor = new FieldTransformRuleVisitor(config, helper, rules);
return visitor.apply(tree);
}

private <T extends JexlNode> T copy(T node, Object data) {
T newNode = newInstanceOfType(node);
// keep lineage
newNode.jjtSetParent(node.jjtGetParent());
ArrayList<JexlNode> children = newArrayList();
for (int i = 0; i < node.jjtGetNumChildren(); i++) {
JexlNode copiedChild = (JexlNode) node.jjtGetChild(i).jjtAccept(this, data);
if (copiedChild != null) {
children.add(copiedChild);
}
}
return setChildren(newNode, children.toArray(new JexlNode[children.size()]));
}

@Override
public Object visit(ASTOrNode node, Object data) {
return applyTransforms(copy(node, data));
}

@Override
public Object visit(ASTAndNode node, Object data) {
// do not recurse on a marker node
if (QueryPropertyMarker.findInstance(node).isAnyType()) {
return applyTransforms(RebuildingVisitor.copy(node));
} else {
return applyTransforms(copy(node, data));
}
}

@Override
public Object visit(ASTEQNode node, Object data) {
return applyTransforms(copy(node, data));
}

@Override
public Object visit(ASTNENode node, Object data) {
return applyTransforms(copy(node, data));
}

@Override
public Object visit(ASTLTNode node, Object data) {
return applyTransforms(copy(node, data));
}

@Override
public Object visit(ASTGTNode node, Object data) {
return applyTransforms(copy(node, data));
}

@Override
public Object visit(ASTLENode node, Object data) {
return applyTransforms(copy(node, data));
}

@Override
public Object visit(ASTGENode node, Object data) {
return applyTransforms(copy(node, data));
}

@Override
public Object visit(ASTERNode node, Object data) {
return applyTransforms(copy(node, data));
}

@Override
public Object visit(ASTNRNode node, Object data) {
return applyTransforms(copy(node, data));
}

@Override
public Object visit(ASTNotNode node, Object data) {
return applyTransforms(copy(node, data));
}

@Override
public Object visit(ASTFunctionNode node, Object data) {
return applyTransforms(copy(node, data));
}

@Override
public Object visit(ASTReferenceExpression node, Object data) {
return applyTransforms(copy(node, data));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ public Object visit(ASTReferenceExpression node, Object data) {
return applyTransforms(super.visit(node, data));
}

private Object applyTransforms(Object node) {
protected Object applyTransforms(Object node) {
for (NodeTransformRule rule : rules) {
node = rule.apply((JexlNode) node, config, helper);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1934,6 +1934,14 @@ public void setFilterClassNames(List<String> filterClassNames) {
getConfig().setFilterClassNames(filterClassNames);
}

public String getFieldRuleClassName() {
return getConfig().getFieldRuleClassName();
}

public void setFieldRuleClassName(String fieldRuleClassName) {
getConfig().setFieldRuleClassName(fieldRuleClassName);
}

public List<String> getIndexFilteringClassNames() {
return getConfig().getIndexFilteringClassNames();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,8 @@ public void setUp() throws Exception {
updatedValues.put("useFilters", true);
defaultValues.put("indexFilteringClassNames", Lists.newArrayList());
updatedValues.put("indexFilteringClassNames", Lists.newArrayList("proj.datawave.query.filter.someIndexFilterClass"));
defaultValues.put("fieldRuleClassName", null);
updatedValues.put("fieldRuleClassName", "proj.datawave.query.planner.rule.someFieldRuleClass");
defaultValues.put("indexHoles", Lists.newArrayList());
updatedValues.put("indexHoles", Lists.newArrayList(new IndexHole()));
defaultValues.put("indexedFields", Sets.newHashSet());
Expand Down
Loading
Loading