diff --git a/COMPAT.md b/COMPAT.md index d40214a3..79a037a5 100644 --- a/COMPAT.md +++ b/COMPAT.md @@ -500,3 +500,13 @@ UUID's in Limbo are `blobs` by default. | uuid7_timestamp_ms(X) | Yes | Convert a UUID v7 to milliseconds since epoch | | uuid_str(X) | Yes | Convert a valid UUID to string | | uuid_blob(X) | Yes | Convert a valid UUID to blob | + +### REGEXP + +| Function | Status | Comment | +|------------------------------------------------|--------|---------| +| regexp(pattern, source) | Yes | | +| regexp_like(source, pattern) | Yes | | +| regexp_substr(source, pattern) | Yes | | +| regexp_capture(source, pattern[, n]) | No | | +| regexp_replace(source, pattern, replacement) | No | | \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index ee2ca746..6284a9e8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1274,6 +1274,15 @@ dependencies = [ "syn 2.0.96", ] +[[package]] +name = "limbo_regexp" +version = "0.0.12" +dependencies = [ + "limbo_ext", + "log", + "regex", +] + [[package]] name = "limbo_sim" version = "0.0.12" diff --git a/Cargo.toml b/Cargo.toml index 22787b9f..a6741a7d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,7 @@ members = [ "core", "extensions/core", "extensions/uuid", + "extensions/regexp", "macros", "simulator", "sqlite3", diff --git a/core/translate/expr.rs b/core/translate/expr.rs index cd9b6b28..47e6c910 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -742,14 +742,16 @@ pub fn translate_expr( } Func::External(_) => { let regs = program.alloc_registers(args_count); - for (i, arg_expr) in args.iter().enumerate() { - translate_expr( - program, - referenced_tables, - &arg_expr[i], - regs + i, - resolver, - )?; + if let Some(args) = args { + for (i, arg_expr) in args.iter().enumerate() { + translate_expr( + program, + referenced_tables, + &arg_expr, + regs + i, + resolver, + )?; + } } program.emit_insn(Insn::Function { constant_mask: 0, diff --git a/extensions/regexp/Cargo.toml b/extensions/regexp/Cargo.toml new file mode 100644 index 00000000..dc2f87c3 --- /dev/null +++ b/extensions/regexp/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "limbo_regexp" +version.workspace = true +authors.workspace = true +edition.workspace = true +license.workspace = true +repository.workspace = true + +[lib] +crate-type = ["cdylib", "lib"] + + +[dependencies] +limbo_ext = { path = "../core"} +regex = "1.11.1" +log = "0.4.20" diff --git a/extensions/regexp/src/lib.rs b/extensions/regexp/src/lib.rs new file mode 100644 index 00000000..b5c87aa4 --- /dev/null +++ b/extensions/regexp/src/lib.rs @@ -0,0 +1,65 @@ +use limbo_ext::{export_scalar, register_extension, register_scalar_functions, Value, ValueType}; +use regex::Regex; + +register_extension! { + scalars: { + "regexp" => regexp, + "regexp_like" => regexp_like, + "regexp_substr" => regexp_substr, + }, +} + +#[export_scalar] +#[args(2)] +fn regexp(args: &[Value]) -> Value { + regex(&args[0], &args[1]) +} + +#[export_scalar] +#[args(2)] +fn regexp_like(args: &[Value]) -> Value { + regex(&args[1], &args[0]) +} + +fn regex(pattern: &Value, haystack: &Value) -> Value { + match (pattern.value_type(), haystack.value_type()) { + (ValueType::Text, ValueType::Text) => { + let Some(pattern) = pattern.to_text() else { + return Value::null(); + }; + let Some(haystack) = haystack.to_text() else { + return Value::null(); + }; + let re = match Regex::new(&pattern) { + Ok(re) => re, + Err(_) => return Value::null(), + }; + Value::from_integer(re.is_match(&haystack) as i64) + } + _ => Value::null(), + } +} + +#[export_scalar] +#[args(2)] +fn regexp_substr(args: &[Value]) -> Value { + return match (args[0].value_type(), args[1].value_type()) { + (ValueType::Text, ValueType::Text) => { + let Some(haystack) = &args[0].to_text() else { + return Value::null(); + }; + let Some(pattern) = &args[1].to_text() else { + return Value::null(); + }; + let re = match Regex::new(pattern) { + Ok(re) => re, + Err(_) => return Value::null(), + }; + match re.find(haystack) { + Some(mat) => Value::from_text(mat.as_str().to_string()), + None => Value::null(), + } + } + _ => Value::null(), + }; +} diff --git a/testing/extensions.py b/testing/extensions.py index 74383be9..fdf4eaf1 100755 --- a/testing/extensions.py +++ b/testing/extensions.py @@ -77,6 +77,11 @@ def run_test(pipe, sql, validator=None): raise Exception("Validation failed") print("Test PASSED") +def validate_true(result): + return result == "1" + +def validate_false(result): + return result == "0" def validate_blob(result): # HACK: blobs are difficult to test because the shell @@ -100,33 +105,54 @@ def assert_now_unixtime(result): def assert_specific_time(result): return result == "1736720789" - -def main(): +def test_uuid(pipe): specific_time = "01945ca0-3189-76c0-9a8f-caf310fc8b8e" extension_path = "./target/debug/liblimbo_uuid.so" + + # before extension loads, assert no function + run_test(pipe, "SELECT uuid4();", returns_null) + run_test(pipe, "SELECT uuid4_str();", returns_null) + run_test(pipe, f".load {extension_path}", returns_null) + print(f"Extension {extension_path} loaded successfully.") + run_test(pipe, "SELECT hex(uuid4());", validate_blob) + run_test(pipe, "SELECT uuid4_str();", validate_string_uuid) + run_test(pipe, "SELECT hex(uuid7());", validate_blob) + run_test( + pipe, + "SELECT uuid7_timestamp_ms(uuid7()) / 1000;", + ) + run_test(pipe, "SELECT uuid7_str();", validate_string_uuid) + run_test(pipe, "SELECT uuid_str(uuid7());", validate_string_uuid) + run_test(pipe, "SELECT hex(uuid_blob(uuid7_str()));", validate_blob) + run_test(pipe, "SELECT uuid_str(uuid_blob(uuid7_str()));", validate_string_uuid) + run_test( + pipe, + f"SELECT uuid7_timestamp_ms('{specific_time}') / 1000;", + assert_specific_time, + ) + +def test_regexp(pipe): + extension_path = "./target/debug/liblimbo_regexp.so" + + # before extension loads, assert no function + run_test(pipe, "SELECT regexp('a.c', 'abc');", returns_null) + run_test(pipe, f".load {extension_path}", returns_null) + print(f"Extension {extension_path} loaded successfully.") + run_test(pipe, "SELECT regexp('a.c', 'abc');", validate_true) + run_test(pipe, "SELECT regexp('a.c', 'ac');", validate_false) + run_test(pipe, "SELECT regexp('[0-9]+', 'the year is 2021');", validate_true) + run_test(pipe, "SELECT regexp('[0-9]+', 'the year is unknow');", validate_false) + run_test(pipe, "SELECT regexp_like('the year is 2021', '[0-9]+');", validate_true) + run_test(pipe, "SELECT regexp_like('the year is unknow', '[0-9]+');", validate_false) + run_test(pipe, "SELECT regexp_substr('the year is 2021', '[0-9]+') = '2021';", validate_true) + run_test(pipe, "SELECT regexp_substr('the year is unknow', '[0-9]+');", returns_null) + + +def main(): pipe = init_limbo() try: - # before extension loads, assert no function - run_test(pipe, "SELECT uuid4();", returns_null) - run_test(pipe, "SELECT uuid4_str();", returns_null) - run_test(pipe, f".load {extension_path}", returns_null) - print("Extension loaded successfully.") - run_test(pipe, "SELECT hex(uuid4());", validate_blob) - run_test(pipe, "SELECT uuid4_str();", validate_string_uuid) - run_test(pipe, "SELECT hex(uuid7());", validate_blob) - run_test( - pipe, - "SELECT uuid7_timestamp_ms(uuid7()) / 1000;", - ) - run_test(pipe, "SELECT uuid7_str();", validate_string_uuid) - run_test(pipe, "SELECT uuid_str(uuid7());", validate_string_uuid) - run_test(pipe, "SELECT hex(uuid_blob(uuid7_str()));", validate_blob) - run_test(pipe, "SELECT uuid_str(uuid_blob(uuid7_str()));", validate_string_uuid) - run_test( - pipe, - f"SELECT uuid7_timestamp_ms('{specific_time}') / 1000;", - assert_specific_time, - ) + test_regexp(pipe) + test_uuid(pipe) except Exception as e: print(f"Test FAILED: {e}") pipe.terminate()