Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,7 @@ tantivy = "0.25.0"
tantivy-common = "0.10.0"
tantivy-fst = "0.5"
tantivy-jieba = "0.17.0"
tantivy-query-grammar = "0.25.0"
temp-env = "0.3.0"
tempfile = "3.4.0"
terminal_size = "0.4.2"
Expand Down Expand Up @@ -659,6 +660,7 @@ state-machine-api = { git = "https://github.com/databendlabs/state-machine-api.g
sub-cache = { git = "https://github.com/databendlabs/sub-cache", tag = "v0.2.1" }
tantivy = { git = "https://github.com/datafuse-extras/tantivy", rev = "9065a4d" }
tantivy-common = { git = "https://github.com/datafuse-extras/tantivy", rev = "9065a4d", package = "tantivy-common" }
tantivy-query-grammar = { git = "https://github.com/datafuse-extras/tantivy", rev = "9065a4d", package = "tantivy-query-grammar" }
tantivy-jieba = { git = "https://github.com/datafuse-extras/tantivy-jieba", rev = "ac27464" }
watcher = { git = "https://github.com/databendlabs/watcher", tag = "v0.4.2" }
xorfilter-rs = { git = "https://github.com/datafuse-extras/xorfilter", tag = "databend-alpha.4" }
1 change: 1 addition & 0 deletions src/query/sql/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ serde_json = { workspace = true }
sha2 = { workspace = true }
similar = { workspace = true }
simsearch = { workspace = true }
tantivy-query-grammar = { workspace = true }
unicase = { workspace = true }
url = { workspace = true }

Expand Down
72 changes: 56 additions & 16 deletions src/query/sql/src/planner/semantic/type_check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,9 @@ use jsonb::keypath::KeyPaths;
use serde_json::json;
use serde_json::to_string;
use simsearch::SimSearch;
use tantivy_query_grammar::parse_query;
use tantivy_query_grammar::UserInputAst;
use tantivy_query_grammar::UserInputLeaf;
use unicase::Ascii;

use super::name_resolution::NameResolutionContext;
Expand Down Expand Up @@ -2657,28 +2660,65 @@ impl<'a> TypeChecker<'a> {
.set_span(query_scalar.span()));
};

let field_strs: Vec<&str> = query_text.split(' ').collect();
let mut column_refs = Vec::with_capacity(field_strs.len());
for field_str in field_strs {
if !field_str.contains(':') {
continue;
// Extract the first subfield from the query field as the field name,
// as queries may contain dot separators when the field is JSON type.
// For example: The value of the `info` field is: `{“tags”:{“id”:10,“env”:“prod”,‘name’:“test”}}`
// The query statement can be written as `info.tags.env:prod`, the field `info` can be extracted.
fn extract_first_subfield(field: &str) -> String {
field.split('.').next().unwrap_or(field).to_string()
}

fn collect_fields(ast: &UserInputAst, fields: &mut HashSet<String>) {
match ast {
UserInputAst::Clause(clauses) => {
for (_, sub_ast) in clauses {
collect_fields(sub_ast, fields);
}
}
UserInputAst::Boost(inner_ast, _) => {
collect_fields(inner_ast, fields);
}
UserInputAst::Leaf(leaf) => match &**leaf {
UserInputLeaf::Literal(literal) => {
if let Some(field) = &literal.field_name {
fields.insert(extract_first_subfield(field));
}
}
UserInputLeaf::Range { field, .. } => {
if let Some(field) = field {
fields.insert(extract_first_subfield(field));
}
}
UserInputLeaf::Set { field, .. } => {
if let Some(field) = field {
fields.insert(extract_first_subfield(field));
}
}
UserInputLeaf::Exists { field } => {
fields.insert(extract_first_subfield(field));
}
UserInputLeaf::Regex { field, .. } => {
if let Some(field) = field {
fields.insert(extract_first_subfield(field));
}
}
UserInputLeaf::All => {}
},
}
let field_names: Vec<&str> = field_str.split(':').collect();
// if the field is JSON type, must specify the key path in the object
// for example:
// the field `info` has the value: `{"tags":{"id":10,"env":"prod","name":"test"}}`
// a query can be written like this `info.tags.env:prod`
let field_name = field_names[0].trim();
let sub_field_names: Vec<&str> = field_name.split('.').collect();
}

let query_ast = parse_query(query_text).unwrap();
let mut fields = HashSet::new();
collect_fields(&query_ast, &mut fields);

let mut column_refs = Vec::with_capacity(fields.len());
for field in fields.into_iter() {
let column_expr = Expr::ColumnRef {
span: query_scalar.span(),
column: ColumnRef {
database: None,
table: None,
column: ColumnID::Name(Identifier::from_name(
query_scalar.span(),
sub_field_names[0].trim(),
)),
column: ColumnID::Name(Identifier::from_name(query_scalar.span(), field)),
},
};
let box (field_scalar, _) = self.resolve(&column_expr)?;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ SHOW CREATE TABLE t
----
t CREATE TABLE t ( id INT NULL, content VARCHAR NULL, SYNC INVERTED INDEX idx1 (content) filters = 'english_stop,english_stemmer,chinese_stop', tokenizer = 'chinese' ) ENGINE=FUSE

query
query IFT
SELECT id, score(), content FROM t WHERE match(content, 'test')
----

Expand Down Expand Up @@ -437,6 +437,17 @@ SELECT id, score(), body FROM t1 WHERE query('body.metadata.tags:technology')
3 2.411387 {"metadata":{"author":"Quincy","price":42.92,"publishedDate":"2022-05-05","tags":["autonomous vehicles","future","technology"]},"title":"The Future of Autonomous Vehicles"}
5 2.411387 {"metadata":{"author":"Samuel","price":69.99,"publishedDate":"2023-12-15","tags":["IoT","applications","technology"]},"title":"Internet of Things Applications"}

query IFT
SELECT id, score(), body FROM t1 WHERE query('body.metadata.tags:technology AND body.metadata.author:Quincy')
----
3 5.654169 {"metadata":{"author":"Quincy","price":42.92,"publishedDate":"2022-05-05","tags":["autonomous vehicles","future","technology"]},"title":"The Future of Autonomous Vehicles"}

query IFT
SELECT id, score(), body FROM t1 WHERE query('(body.metadata.tags:technology AND body.metadata.author:Quincy) OR body.metadata.author:Oliver')
----
1 3.2427819 {"metadata":{"author":"Oliver","price":15.44,"publishedDate":"2021-06-15","tags":["psychology","persuasion","behavior"]},"title":"The Psychology of Persuasion"}
3 5.654169 {"metadata":{"author":"Quincy","price":42.92,"publishedDate":"2022-05-05","tags":["autonomous vehicles","future","technology"]},"title":"The Future of Autonomous Vehicles"}

query IFT
SELECT id, score(), body FROM t1 WHERE query('body.metadata.tags:技术')
----
Expand Down
Loading