Skip to content

Commit

Permalink
Implement regexp extension
Browse files Browse the repository at this point in the history
  • Loading branch information
psvri committed Jan 16, 2025
1 parent 3e9f3ae commit e43271f
Show file tree
Hide file tree
Showing 6 changed files with 150 additions and 23 deletions.
10 changes: 10 additions & 0 deletions COMPAT.md
Original file line number Diff line number Diff line change
Expand Up @@ -500,3 +500,13 @@ UUID's in Limbo are `blobs` by default.
| uuid7_timestamp_ms(X) | Yes | Convert a UUID v7 to milliseconds since epoch |
| uuid_str(X) | Yes | Convert a valid UUID to string |
| uuid_blob(X) | Yes | Convert a valid UUID to blob |

### REGEXP

| Function | Status | Comment |
|------------------------------------------------|--------|---------|
| regexp(pattern, source) | Yes | |
| regexp_like(source, pattern) | Yes | |
| regexp_substr(source, pattern) | Yes | |
| regexp_capture(source, pattern[, n]) | No | |
| regexp_replace(source, pattern, replacement) | No | |
9 changes: 9 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ members = [
"core",
"extensions/core",
"extensions/uuid",
"extensions/regexp",
"macros",
"simulator",
"sqlite3",
Expand Down
16 changes: 16 additions & 0 deletions extensions/regexp/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
[package]
name = "limbo_regexp"
version.workspace = true
authors.workspace = true
edition.workspace = true
license.workspace = true
repository.workspace = true

[lib]
crate-type = ["cdylib", "lib"]


[dependencies]
limbo_ext = { path = "../core"}
regex = "1.11.1"
log = "0.4.20"
65 changes: 65 additions & 0 deletions extensions/regexp/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
use limbo_ext::{export_scalar, register_extension, register_scalar_functions, Value, ValueType};
use regex::Regex;

register_extension! {
scalars: {
"regexp" => regexp,
"regexp_like" => regexp_like,
"regexp_substr" => regexp_substr,
},
}

#[export_scalar]
#[args(2)]
fn regexp(args: &[Value]) -> Value {
regex(&args[0], &args[1])
}

#[export_scalar]
#[args(2)]
fn regexp_like(args: &[Value]) -> Value {
regex(&args[1], &args[0])
}

fn regex(pattern: &Value, haystack: &Value) -> Value {
match (pattern.value_type(), haystack.value_type()) {
(ValueType::Text, ValueType::Text) => {
let Some(pattern) = pattern.to_text() else {
return Value::null();
};
let Some(haystack) = haystack.to_text() else {
return Value::null();
};
let re = match Regex::new(&pattern) {
Ok(re) => re,
Err(_) => return Value::null(),
};
Value::from_integer(re.is_match(&haystack) as i64)
}
_ => Value::null(),
}
}

#[export_scalar]
#[args(2)]
fn regexp_substr(args: &[Value]) -> Value {
return match (args[0].value_type(), args[1].value_type()) {
(ValueType::Text, ValueType::Text) => {
let Some(haystack) = &args[0].to_text() else {
return Value::null();
};
let Some(pattern) = &args[1].to_text() else {
return Value::null();
};
let re = match Regex::new(pattern) {
Ok(re) => re,
Err(_) => return Value::null(),
};
match re.find(haystack) {
Some(mat) => Value::from_text(mat.as_str().to_string()),
None => Value::null(),
}
}
_ => Value::null(),
};
}
72 changes: 49 additions & 23 deletions testing/extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,11 @@ def run_test(pipe, sql, validator=None):
raise Exception("Validation failed")
print("Test PASSED")

def validate_true(result):
return result == "1"

def validate_false(result):
return result == "0"

def validate_blob(result):
# HACK: blobs are difficult to test because the shell
Expand All @@ -100,33 +105,54 @@ def assert_now_unixtime(result):
def assert_specific_time(result):
return result == "1736720789"


def main():
def test_uuid(pipe):
specific_time = "01945ca0-3189-76c0-9a8f-caf310fc8b8e"
extension_path = "./target/debug/liblimbo_uuid.so"

# before extension loads, assert no function
run_test(pipe, "SELECT uuid4();", returns_null)
run_test(pipe, "SELECT uuid4_str();", returns_null)
run_test(pipe, f".load {extension_path}", returns_null)
print(f"Extension {extension_path} loaded successfully.")
run_test(pipe, "SELECT hex(uuid4());", validate_blob)
run_test(pipe, "SELECT uuid4_str();", validate_string_uuid)
run_test(pipe, "SELECT hex(uuid7());", validate_blob)
run_test(
pipe,
"SELECT uuid7_timestamp_ms(uuid7()) / 1000;",
)
run_test(pipe, "SELECT uuid7_str();", validate_string_uuid)
run_test(pipe, "SELECT uuid_str(uuid7());", validate_string_uuid)
run_test(pipe, "SELECT hex(uuid_blob(uuid7_str()));", validate_blob)
run_test(pipe, "SELECT uuid_str(uuid_blob(uuid7_str()));", validate_string_uuid)
run_test(
pipe,
f"SELECT uuid7_timestamp_ms('{specific_time}') / 1000;",
assert_specific_time,
)

def test_regexp(pipe):
extension_path = "./target/debug/liblimbo_regexp.so"

# before extension loads, assert no function
run_test(pipe, "SELECT regexp('a.c', 'abc');", returns_null)
run_test(pipe, f".load {extension_path}", returns_null)
print(f"Extension {extension_path} loaded successfully.")
run_test(pipe, "SELECT regexp('a.c', 'abc');", validate_true)
run_test(pipe, "SELECT regexp('a.c', 'ac');", validate_false)
run_test(pipe, "SELECT regexp('[0-9]+', 'the year is 2021');", validate_true)
run_test(pipe, "SELECT regexp('[0-9]+', 'the year is unknow');", validate_false)
run_test(pipe, "SELECT regexp_like('the year is 2021', '[0-9]+');", validate_true)
run_test(pipe, "SELECT regexp_like('the year is unknow', '[0-9]+');", validate_false)
run_test(pipe, "SELECT regexp_substr('the year is 2021', '[0-9]+') = '2021';", validate_true)
run_test(pipe, "SELECT regexp_substr('the year is unknow', '[0-9]+');", returns_null)


def main():
pipe = init_limbo()
try:
# before extension loads, assert no function
run_test(pipe, "SELECT uuid4();", returns_null)
run_test(pipe, "SELECT uuid4_str();", returns_null)
run_test(pipe, f".load {extension_path}", returns_null)
print("Extension loaded successfully.")
run_test(pipe, "SELECT hex(uuid4());", validate_blob)
run_test(pipe, "SELECT uuid4_str();", validate_string_uuid)
run_test(pipe, "SELECT hex(uuid7());", validate_blob)
run_test(
pipe,
"SELECT uuid7_timestamp_ms(uuid7()) / 1000;",
)
run_test(pipe, "SELECT uuid7_str();", validate_string_uuid)
run_test(pipe, "SELECT uuid_str(uuid7());", validate_string_uuid)
run_test(pipe, "SELECT hex(uuid_blob(uuid7_str()));", validate_blob)
run_test(pipe, "SELECT uuid_str(uuid_blob(uuid7_str()));", validate_string_uuid)
run_test(
pipe,
f"SELECT uuid7_timestamp_ms('{specific_time}') / 1000;",
assert_specific_time,
)
test_regexp(pipe)
test_uuid(pipe)
except Exception as e:
print(f"Test FAILED: {e}")
pipe.terminate()
Expand Down

0 comments on commit e43271f

Please sign in to comment.