From c51de732c82efff745247e18b8ed1dcec347ebe8 Mon Sep 17 00:00:00 2001 From: alpaylan Date: Fri, 13 Dec 2024 07:51:47 -0500 Subject: [PATCH 001/144] - added Arbitrary and ArbitraryOf traits for mroe centralized generation - implemented random generation for tables and structured queries --- simulator/main.rs | 425 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 388 insertions(+), 37 deletions(-) diff --git a/simulator/main.rs b/simulator/main.rs index 3c71bfef5..714c0cd15 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -2,6 +2,7 @@ use limbo_core::{Connection, Database, File, OpenFlags, PlatformIO, Result, RowR use rand::prelude::*; use rand_chacha::ChaCha8Rng; use std::cell::RefCell; +use std::fmt::Display; use std::rc::Rc; use std::sync::Arc; use tempfile::TempDir; @@ -36,12 +37,32 @@ struct SimulatorOpts { page_size: usize, } +trait Arbitrary { + fn arbitrary(rng: &mut R) -> Self; +} + +trait ArbitraryOf { + fn arbitrary_of(rng: &mut R, t: &T) -> Self; +} + struct Table { rows: Vec>, name: String, columns: Vec, } +impl Arbitrary for Table { + fn arbitrary(rng: &mut R) -> Self { + let name = gen_random_name(rng); + let columns = gen_columns(rng); + Table { + rows: Vec::new(), + name, + columns, + } + } +} + #[derive(Clone)] struct Column { name: String, @@ -50,6 +71,19 @@ struct Column { unique: bool, } +impl Arbitrary for Column { + fn arbitrary(rng: &mut R) -> Self { + let name = gen_random_name(rng); + let column_type = ColumnType::arbitrary(rng); + Column { + name, + column_type, + primary: false, + unique: false, + } + } +} + #[derive(Clone)] enum ColumnType { Integer, @@ -58,7 +92,19 @@ enum ColumnType { Blob, } -#[derive(Debug, PartialEq)] +impl Arbitrary for ColumnType { + fn arbitrary(rng: &mut R) -> Self { + match rng.gen_range(0..4) { + 0 => ColumnType::Integer, + 1 => ColumnType::Float, + 2 => ColumnType::Text, + 3 => ColumnType::Blob, + _ => unreachable!(), + } + } +} + +#[derive(Clone, Debug, PartialEq)] enum Value { Null, Integer(i64), @@ -67,6 +113,312 @@ enum Value { Blob(Vec), } +impl ArbitraryOf> for Value { + fn arbitrary_of(rng: &mut R, t: &Vec<&Value>) -> Self { + if t.is_empty() { + return Value::Null; + } + + let index = rng.gen_range(0..t.len()); + t[index].clone() + } +} + +impl ArbitraryOf for Value { + fn arbitrary_of(rng: &mut R, t: &ColumnType) -> Self { + match t { + ColumnType::Integer => Value::Integer(rng.gen_range(i64::MIN..i64::MAX)), + ColumnType::Float => Value::Float(rng.gen_range(-1e10..1e10)), + ColumnType::Text => Value::Text(gen_random_text(rng)), + ColumnType::Blob => Value::Blob(gen_random_text(rng).as_bytes().to_vec()), + } + } +} + +struct LTValue(Value); + + +impl ArbitraryOf> for LTValue { + fn arbitrary_of(rng: &mut R, t: &Vec<&Value>) -> Self { + if t.is_empty() { + return LTValue(Value::Null); + } + + let index = rng.gen_range(0..t.len()); + LTValue::arbitrary_of(rng, t[index]) + } +} + +impl ArbitraryOf for LTValue { + fn arbitrary_of(rng: &mut R, t: &Value) -> Self { + match t { + Value::Integer(i) => LTValue(Value::Integer(rng.gen_range(i64::MIN..*i - 1))), + Value::Float(f) => LTValue(Value::Float(rng.gen_range(-1e10..*f - 1.0))), + Value::Text(t) => { + // Either shorten the string, or make at least one character smaller and mutate the rest + let mut t = t.clone(); + if rng.gen_bool(0.01) { + t.pop(); + LTValue(Value::Text(t)) + } else { + let index = rng.gen_range(0..t.len()); + let mut t = t.into_bytes(); + t[index] -= 1; + // Mutate the rest of the string + for i in (index+1)..t.len() { + t[i] = rng.gen_range(0..=255); + } + LTValue(Value::Text(String::from_utf8(t).unwrap())) + } + } + Value::Blob(b) => todo!(), + _ => unreachable!(), + } + } +} + + +struct GTValue(Value); + +impl ArbitraryOf> for GTValue { + fn arbitrary_of(rng: &mut R, t: &Vec<&Value>) -> Self { + if t.is_empty() { + return GTValue(Value::Null); + } + + let index = rng.gen_range(0..t.len()); + GTValue::arbitrary_of(rng, t[index]) + } +} + +impl ArbitraryOf for GTValue { + fn arbitrary_of(rng: &mut R, t: &Value) -> Self { + match t { + Value::Integer(i) => GTValue(Value::Integer(rng.gen_range(*i..i64::MAX))), + Value::Float(f) => GTValue(Value::Float(rng.gen_range(*f..1e10))), + Value::Text(t) => { + // Either lengthen the string, or make at least one character smaller and mutate the rest + let mut t = t.clone(); + if rng.gen_bool(0.01) { + t.push(rng.gen_range(0..=255) as u8 as char); + GTValue(Value::Text(t)) + } else { + let index = rng.gen_range(0..t.len()); + let mut t = t.into_bytes(); + t[index] += 1; + // Mutate the rest of the string + for i in (index+1)..t.len() { + t[i] = rng.gen_range(0..=255); + } + GTValue(Value::Text(String::from_utf8(t).unwrap())) + } + } + Value::Blob(b) => todo!(), + _ => unreachable!(), + } + } +} + + +enum Predicate { + And(Vec), + Or(Vec), + Eq(String, Value), + Gt(String, Value), + Lt(String, Value), +} + +enum Query { + Create { table: Table }, + Select { table: String, guard: Predicate }, + Insert { table: String, values: Vec }, + Delete { table: String, guard: Predicate }, +} + +impl ArbitraryOf for Query { + fn arbitrary_of(rng: &mut R, t: &Table) -> Self { + match rng.gen_range(0..=200) { + 0 => Query::Create { + table: Table::arbitrary(rng), + }, + 1..=100 => Query::Select { + table: t.name.clone(), + guard: Predicate::arbitrary_of(rng, t), + }, + 101..=200 => Query::Insert { + table: t.name.clone(), + values: t + .columns + .iter() + .map(|c| Value::arbitrary_of(rng, &c.column_type)) + .collect(), + }, + 201..=300 => Query::Delete { + table: t.name.clone(), + guard: Predicate::arbitrary_of(rng, t), + }, + _ => unreachable!(), + } + } +} + +struct CompoundPredicate(Predicate); +struct SimplePredicate(Predicate); + +impl ArbitraryOf<(&Table, bool)> for SimplePredicate { + fn arbitrary_of(rng: &mut R, (t, b): &(&Table, bool)) -> Self { + // Pick a random column + let column_index = rng.gen_range(0..t.columns.len()); + let column = &t.columns[column_index]; + let column_values = t.rows.iter().map(|r| &r[column_index]).collect::>(); + // Pick an operator + let operator = match rng.gen_range(0..3) { + 0 => { + if *b { + Predicate::Eq(column.name.clone(), Value::arbitrary_of(rng, &column_values)) + } else { + Predicate::Eq(column.name.clone(), Value::arbitrary_of(rng, &column.column_type)) + } + } + 1 => Predicate::Gt(column.name.clone(), + match b { + true => GTValue::arbitrary_of(rng, &column_values).0, + false => LTValue::arbitrary_of(rng, &column_values).0, + }), + 2 => Predicate::Lt(column.name.clone(), + match b { + true => LTValue::arbitrary_of(rng, &column_values).0, + false => GTValue::arbitrary_of(rng, &column_values).0, + }), + _ => unreachable!(), + }; + + SimplePredicate(operator) + } +} + + + +impl ArbitraryOf<(&Table, bool)> for CompoundPredicate { + fn arbitrary_of(rng: &mut R, (t, b): &(&Table, bool)) -> Self { + // Decide if you want to create an AND or an OR + CompoundPredicate(if rng.gen_bool(0.7) { + // An AND for true requires each of its children to be true + // An AND for false requires at least one of its children to be false + if *b { + Predicate::And( + (0..rng.gen_range(1..=3)) + .map(|_| SimplePredicate::arbitrary_of(rng, &(*t, true)).0) + .collect(), + ) + } else { + // Create a vector of random booleans + let mut booleans = (0..rng.gen_range(1..=3)) + .map(|_| rng.gen_bool(0.5)) + .collect::>(); + + let len = booleans.len(); + + // Make sure at least one of them is false + if booleans.iter().all(|b| *b) { + booleans[rng.gen_range(0..len)] = false; + } + + Predicate::And( + booleans + .iter() + .map(|b| SimplePredicate::arbitrary_of(rng, &(*t, *b)).0) + .collect(), + ) + } + } else { + // An OR for true requires at least one of its children to be true + // An OR for false requires each of its children to be false + if *b { + // Create a vector of random booleans + let mut booleans = (0..rng.gen_range(1..=3)) + .map(|_| rng.gen_bool(0.5)) + .collect::>(); + let len = booleans.len(); + // Make sure at least one of them is true + if booleans.iter().all(|b| !*b) { + booleans[rng.gen_range(0..len)] = true; + } + + Predicate::Or( + booleans + .iter() + .map(|b| SimplePredicate::arbitrary_of(rng, &(*t, *b)).0) + .collect(), + ) + } else { + Predicate::Or( + (0..rng.gen_range(1..=3)) + .map(|_| SimplePredicate::arbitrary_of(rng, &(*t, false)).0) + .collect(), + ) + } + }) + } +} + +impl ArbitraryOf
for Predicate { + fn arbitrary_of(rng: &mut R, t: &Table) -> Self { + let b= rng.gen_bool(0.5); + CompoundPredicate::arbitrary_of(rng, &(t, b)).0 + } +} + +impl Display for Predicate { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Predicate::And(predicates) => { + write!(f, "(")?; + for (i, p) in predicates.iter().enumerate() { + if i != 0 { + write!(f, " AND ")?; + } + write!(f, "{}", p)?; + } + write!(f, ")") + } + Predicate::Or(predicates) => { + write!(f, "(")?; + for (i, p) in predicates.iter().enumerate() { + if i != 0 { + write!(f, " OR ")?; + } + write!(f, "{}", p)?; + } + write!(f, ")") + } + Predicate::Eq(name, value) => write!(f, "{} = {}", name, value), + Predicate::Gt(name, value) => write!(f, "{} > {}", name, value), + Predicate::Lt(name, value) => write!(f, "{} < {}", name, value), + } + } +} + +impl Display for Query { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Query::Create { table } => write!(f, "{}", table.to_create_str()), + Query::Select { table, guard } => write!(f, "SELECT * FROM {} WHERE {}", table, guard), + Query::Insert { table, values } => { + write!(f, "INSERT INTO {} VALUES (", table)?; + for (i, v) in values.iter().enumerate() { + if i != 0 { + write!(f, ", ")?; + } + write!(f, "{}", v)?; + } + write!(f, ")") + } + Query::Delete { table, guard } => write!(f, "DELETE FROM {} WHERE {}", table, guard), + } + } +} + #[allow(clippy::arc_with_non_send_sync)] fn main() { let _ = env_logger::try_init(); @@ -160,17 +512,21 @@ fn process_connection(env: &mut SimulatorEnv, conn: &mut Rc) -> Resu } else if env.tables.is_empty() { maybe_add_table(env, conn)?; } else { - let roll = env.rng.gen_range(0..100); - if roll < env.opts.read_percent { - // read - do_select(env, conn)?; - } else if roll < env.opts.read_percent + env.opts.write_percent { - // write - do_write(env, conn)?; - } else { - // delete - // TODO - } + let query = Query::arbitrary_of(&mut env.rng, &env.tables[0]); + log::info!("running query '{}'", query); + let rows = get_all_rows(env, conn, query.to_string().as_str())?; + log::debug!("{:?}", rows); + // let roll = env.rng.gen_range(0..100); + // if roll < env.opts.read_percent { + // // read + // do_select(env, conn)?; + // } else if roll < env.opts.read_percent + env.opts.write_percent { + // // write + // do_write(env, conn)?; + // } else { + // // delete + // // TODO + // } } Ok(()) } @@ -201,12 +557,7 @@ fn do_write(env: &mut SimulatorEnv, conn: &mut Rc) -> Result<()> { // gen insert query for column in &columns { - let value = match column.column_type { - ColumnType::Integer => Value::Integer(env.rng.gen_range(i64::MIN..i64::MAX)), - ColumnType::Float => Value::Float(env.rng.gen_range(-1e10..1e10)), - ColumnType::Text => Value::Text(gen_random_text(env)), - ColumnType::Blob => Value::Blob(gen_random_text(env).as_bytes().to_vec()), - }; + let value = Value::arbitrary_of(&mut env.rng, &column.column_type); query.push_str(value.to_string().as_str()); query.push(','); @@ -237,8 +588,8 @@ fn maybe_add_table(env: &mut SimulatorEnv, conn: &mut Rc) -> Result< if env.tables.len() < env.opts.max_tables { let table = Table { rows: Vec::new(), - name: gen_random_name(env), - columns: gen_columns(env), + name: gen_random_name(&mut env.rng), + columns: gen_columns(&mut env.rng), }; let rows = get_all_rows(env, conn, table.to_create_str().as_str())?; log::debug!("{:?}", rows); @@ -266,32 +617,32 @@ fn maybe_add_table(env: &mut SimulatorEnv, conn: &mut Rc) -> Result< Ok(()) } -fn gen_random_name(env: &mut SimulatorEnv) -> String { - let name = readable_name_custom("_", &mut env.rng); +fn gen_random_name(rng: &mut T) -> String { + let name = readable_name_custom("_", rng); name.replace("-", "_") } -fn gen_random_text(env: &mut SimulatorEnv) -> String { - let big_text = env.rng.gen_ratio(1, 1000); +fn gen_random_text(rng: &mut T) -> String { + let big_text = rng.gen_ratio(1, 1000); if big_text { let max_size: u64 = 2 * 1024 * 1024 * 1024; - let size = env.rng.gen_range(1024..max_size); + let size = rng.gen_range(1024..max_size); let mut name = String::new(); for i in 0..size { name.push(((i % 26) as u8 + b'A') as char); } name } else { - let name = readable_name_custom("_", &mut env.rng); + let name = readable_name_custom("_", rng); name.replace("-", "_") } } -fn gen_columns(env: &mut SimulatorEnv) -> Vec { - let mut column_range = env.rng.gen_range(1..128); +fn gen_columns(rng: &mut T) -> Vec { + let mut column_range = rng.gen_range(1..128); let mut columns = Vec::new(); while column_range > 0 { - let column_type = match env.rng.gen_range(0..4) { + let column_type = match rng.gen_range(0..4) { 0 => ColumnType::Integer, 1 => ColumnType::Float, 2 => ColumnType::Text, @@ -299,7 +650,7 @@ fn gen_columns(env: &mut SimulatorEnv) -> Vec { _ => unreachable!(), }; let column = Column { - name: gen_random_name(env), + name: gen_random_name(rng), column_type, primary: false, unique: false, @@ -565,14 +916,14 @@ impl Table { } } -impl Value { - pub fn to_string(&self) -> String { +impl Display for Value { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Value::Null => "NULL".to_string(), - Value::Integer(i) => i.to_string(), - Value::Float(f) => f.to_string(), - Value::Text(t) => format!("'{}'", t.clone()), - Value::Blob(vec) => to_sqlite_blob(vec), + Value::Null => write!(f, "NULL"), + Value::Integer(i) => write!(f, "{}", i), + Value::Float(fl) => write!(f, "{}", fl), + Value::Text(t) => write!(f, "'{}'", t), + Value::Blob(b) => write!(f, "{}", to_sqlite_blob(b)), } } } From 6029fc6303a17e6236fa2470e0f170d46c229722 Mon Sep 17 00:00:00 2001 From: alpaylan Date: Fri, 13 Dec 2024 07:56:41 -0500 Subject: [PATCH 002/144] fix formatting errors --- simulator/main.rs | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/simulator/main.rs b/simulator/main.rs index 714c0cd15..5edfab452 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -137,7 +137,6 @@ impl ArbitraryOf for Value { struct LTValue(Value); - impl ArbitraryOf> for LTValue { fn arbitrary_of(rng: &mut R, t: &Vec<&Value>) -> Self { if t.is_empty() { @@ -165,7 +164,7 @@ impl ArbitraryOf for LTValue { let mut t = t.into_bytes(); t[index] -= 1; // Mutate the rest of the string - for i in (index+1)..t.len() { + for i in (index + 1)..t.len() { t[i] = rng.gen_range(0..=255); } LTValue(Value::Text(String::from_utf8(t).unwrap())) @@ -177,7 +176,6 @@ impl ArbitraryOf for LTValue { } } - struct GTValue(Value); impl ArbitraryOf> for GTValue { @@ -207,7 +205,7 @@ impl ArbitraryOf for GTValue { let mut t = t.into_bytes(); t[index] += 1; // Mutate the rest of the string - for i in (index+1)..t.len() { + for i in (index + 1)..t.len() { t[i] = rng.gen_range(0..=255); } GTValue(Value::Text(String::from_utf8(t).unwrap())) @@ -219,7 +217,6 @@ impl ArbitraryOf for GTValue { } } - enum Predicate { And(Vec), Or(Vec), @@ -275,21 +272,31 @@ impl ArbitraryOf<(&Table, bool)> for SimplePredicate { let operator = match rng.gen_range(0..3) { 0 => { if *b { - Predicate::Eq(column.name.clone(), Value::arbitrary_of(rng, &column_values)) + Predicate::Eq( + column.name.clone(), + Value::arbitrary_of(rng, &column_values), + ) } else { - Predicate::Eq(column.name.clone(), Value::arbitrary_of(rng, &column.column_type)) + Predicate::Eq( + column.name.clone(), + Value::arbitrary_of(rng, &column.column_type), + ) } } - 1 => Predicate::Gt(column.name.clone(), + 1 => Predicate::Gt( + column.name.clone(), match b { true => GTValue::arbitrary_of(rng, &column_values).0, false => LTValue::arbitrary_of(rng, &column_values).0, - }), - 2 => Predicate::Lt(column.name.clone(), + }, + ), + 2 => Predicate::Lt( + column.name.clone(), match b { true => LTValue::arbitrary_of(rng, &column_values).0, false => GTValue::arbitrary_of(rng, &column_values).0, - }), + }, + ), _ => unreachable!(), }; @@ -297,8 +304,6 @@ impl ArbitraryOf<(&Table, bool)> for SimplePredicate { } } - - impl ArbitraryOf<(&Table, bool)> for CompoundPredicate { fn arbitrary_of(rng: &mut R, (t, b): &(&Table, bool)) -> Self { // Decide if you want to create an AND or an OR @@ -364,7 +369,7 @@ impl ArbitraryOf<(&Table, bool)> for CompoundPredicate { impl ArbitraryOf
for Predicate { fn arbitrary_of(rng: &mut R, t: &Table) -> Self { - let b= rng.gen_bool(0.5); + let b = rng.gen_bool(0.5); CompoundPredicate::arbitrary_of(rng, &(t, b)).0 } } From 2d712d2b358e8860ed0715c02b0990dc65d9e785 Mon Sep 17 00:00:00 2001 From: alpaylan Date: Fri, 13 Dec 2024 15:59:16 -0500 Subject: [PATCH 003/144] update simulator to randomly pick an action and check its postconditions --- simulator/main.rs | 229 ++++++++++++++++++++++++++-------------------- 1 file changed, 132 insertions(+), 97 deletions(-) diff --git a/simulator/main.rs b/simulator/main.rs index 5edfab452..02a15b9a0 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -3,6 +3,7 @@ use rand::prelude::*; use rand_chacha::ChaCha8Rng; use std::cell::RefCell; use std::fmt::Display; +use std::ops::Deref; use std::rc::Rc; use std::sync::Arc; use tempfile::TempDir; @@ -53,8 +54,10 @@ struct Table { impl Arbitrary for Table { fn arbitrary(rng: &mut R) -> Self { - let name = gen_random_name(rng); - let columns = gen_columns(rng); + let name = Name::arbitrary(rng).0; + let columns = (1..rng.gen_range(1..128)) + .map(|_| Column::arbitrary(rng)) + .collect(); Table { rows: Vec::new(), name, @@ -73,7 +76,7 @@ struct Column { impl Arbitrary for Column { fn arbitrary(rng: &mut R) -> Self { - let name = gen_random_name(rng); + let name = Name::arbitrary(rng).0; let column_type = ColumnType::arbitrary(rng); Column { name, @@ -374,28 +377,48 @@ impl ArbitraryOf
for Predicate { } } +impl ArbitraryOf<(&str, &Value)> for Predicate { + fn arbitrary_of(rng: &mut R, (c, t): &(&str, &Value)) -> Self { + match rng.gen_range(0..3) { + 0 => Predicate::Eq(c.to_string(), (*t).clone()), + 1 => Predicate::Gt(c.to_string(), LTValue::arbitrary_of(rng, *t).0), + 2 => Predicate::Lt(c.to_string(), LTValue::arbitrary_of(rng, *t).0), + _ => unreachable!(), + } + } +} + impl Display for Predicate { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Predicate::And(predicates) => { - write!(f, "(")?; - for (i, p) in predicates.iter().enumerate() { - if i != 0 { - write!(f, " AND ")?; + if predicates.is_empty() { + // todo: Make this TRUE when the bug is fixed + write!(f, "1 = 1") + } else { + write!(f, "(")?; + for (i, p) in predicates.iter().enumerate() { + if i != 0 { + write!(f, " AND ")?; + } + write!(f, "{}", p)?; } - write!(f, "{}", p)?; + write!(f, ")") } - write!(f, ")") } Predicate::Or(predicates) => { - write!(f, "(")?; - for (i, p) in predicates.iter().enumerate() { - if i != 0 { - write!(f, " OR ")?; + if predicates.is_empty() { + write!(f, "FALSE") + } else { + write!(f, "(")?; + for (i, p) in predicates.iter().enumerate() { + if i != 0 { + write!(f, " OR ")?; + } + write!(f, "{}", p)?; } - write!(f, "{}", p)?; + write!(f, ")") } - write!(f, ")") } Predicate::Eq(name, value) => write!(f, "{} = {}", name, value), Predicate::Gt(name, value) => write!(f, "{} > {}", name, value), @@ -509,73 +532,94 @@ fn main() { env.io.print_stats(); } -fn process_connection(env: &mut SimulatorEnv, conn: &mut Rc) -> Result<()> { - let management = env.rng.gen_ratio(1, 100); - if management { - // for now create table only - maybe_add_table(env, conn)?; - } else if env.tables.is_empty() { - maybe_add_table(env, conn)?; - } else { - let query = Query::arbitrary_of(&mut env.rng, &env.tables[0]); - log::info!("running query '{}'", query); - let rows = get_all_rows(env, conn, query.to_string().as_str())?; - log::debug!("{:?}", rows); - // let roll = env.rng.gen_range(0..100); - // if roll < env.opts.read_percent { - // // read - // do_select(env, conn)?; - // } else if roll < env.opts.read_percent + env.opts.write_percent { - // // write - // do_write(env, conn)?; - // } else { - // // delete - // // TODO - // } +fn property_insert_select(env: &mut SimulatorEnv, conn: &mut Rc) { + // Get a random table + let table = env.rng.gen_range(0..env.tables.len()); + + // let table = &env.tables[table]; + + // Pick a random column + let column_index = env.rng.gen_range(0..env.tables[table].columns.len()); + let column = &env.tables[table].columns[column_index].clone(); + + let mut rng = env.rng.clone(); + + // Generate a random value of the column type + let value = Value::arbitrary_of(&mut rng, &column.column_type); + + // Create a whole new row + let mut row = Vec::new(); + for (i, column) in env.tables[table].columns.iter().enumerate() { + if i == column_index { + row.push(value.clone()); + } else { + let value = Value::arbitrary_of(&mut rng, &column.column_type); + row.push(value); + } } - Ok(()) -} -fn do_select(env: &mut SimulatorEnv, conn: &mut Rc) -> Result<()> { - let table = env.rng.gen_range(0..env.tables.len()); - let table_name = { - let table = &env.tables[table]; - table.name.clone() + // Insert the row + let query = Query::Insert { + table: env.tables[table].name.clone(), + values: row.clone(), + }; + let _ = get_all_rows(env, conn, query.to_string().as_str()).unwrap(); + // Shadow operation on the table + env.tables[table].rows.push(row.clone()); + + // Create a query that selects the row + let query = Query::Select { + table: env.tables[table].name.clone(), + guard: Predicate::Eq(column.name.clone(), value), }; - let rows = get_all_rows(env, conn, format!("SELECT * FROM {}", table_name).as_str())?; - let table = &env.tables[table]; - compare_equal_rows(&table.rows, &rows); - Ok(()) + // Get all rows + let rows = get_all_rows(env, conn, query.to_string().as_str()).unwrap(); + + // Check that the row is there + assert!(rows.iter().any(|r| r == &row)); } -fn do_write(env: &mut SimulatorEnv, conn: &mut Rc) -> Result<()> { - let mut query = String::new(); +fn property_select_all(env: &mut SimulatorEnv, conn: &mut Rc) { + // Get a random table let table = env.rng.gen_range(0..env.tables.len()); - { - let table = &env.tables[table]; - query.push_str(format!("INSERT INTO {} VALUES (", table.name).as_str()); - } - let columns = env.tables[table].columns.clone(); - let mut row = Vec::new(); + // Create a query that selects all rows + let query = Query::Select { + table: env.tables[table].name.clone(), + guard: Predicate::And(Vec::new()), + }; - // gen insert query - for column in &columns { - let value = Value::arbitrary_of(&mut env.rng, &column.column_type); + // Get all rows + let rows = get_all_rows(env, conn, query.to_string().as_str()).unwrap(); - query.push_str(value.to_string().as_str()); - query.push(','); - row.push(value); + // Check that all rows are there + assert_eq!(rows.len(), env.tables[table].rows.len()); + for row in &env.tables[table].rows { + assert!(rows.iter().any(|r| r == row)); } +} - let table = &mut env.tables[table]; - table.rows.push(row); - - query.pop(); - query.push_str(");"); +fn process_connection(env: &mut SimulatorEnv, conn: &mut Rc) -> Result<()> { + if env.tables.is_empty() { + maybe_add_table(env, conn)?; + } - let _ = get_all_rows(env, conn, query.as_str())?; + match env.rng.gen_range(0..2) { + // Randomly insert a value and check that the select result contains it. + 0 => property_insert_select(env, conn), + // Check that the current state of the in-memory table is the same as the one in the + // database. + 1 => property_select_all(env, conn), + // Perform a random query, update the in-memory table with the result. + 2 => { + let table_index = env.rng.gen_range(0..env.tables.len()); + let query = Query::arbitrary_of(&mut env.rng, &env.tables[table_index]); + let rows = get_all_rows(env, conn, query.to_string().as_str())?; + env.tables[table_index].rows = rows; + } + _ => unreachable!(), + } Ok(()) } @@ -593,8 +637,10 @@ fn maybe_add_table(env: &mut SimulatorEnv, conn: &mut Rc) -> Result< if env.tables.len() < env.opts.max_tables { let table = Table { rows: Vec::new(), - name: gen_random_name(&mut env.rng), - columns: gen_columns(&mut env.rng), + name: Name::arbitrary(&mut env.rng).0, + columns: (1..env.rng.gen_range(1..128)) + .map(|_| Column::arbitrary(&mut env.rng)) + .collect(), }; let rows = get_all_rows(env, conn, table.to_create_str().as_str())?; log::debug!("{:?}", rows); @@ -622,9 +668,21 @@ fn maybe_add_table(env: &mut SimulatorEnv, conn: &mut Rc) -> Result< Ok(()) } -fn gen_random_name(rng: &mut T) -> String { - let name = readable_name_custom("_", rng); - name.replace("-", "_") +struct Name(String); + +impl Arbitrary for Name { + fn arbitrary(rng: &mut R) -> Self { + let name = readable_name_custom("_", rng); + Name(name.replace("-", "_")) + } +} + +impl Deref for Name { + type Target = str; + + fn deref(&self) -> &Self::Target { + &self.0 + } } fn gen_random_text(rng: &mut T) -> String { @@ -643,29 +701,6 @@ fn gen_random_text(rng: &mut T) -> String { } } -fn gen_columns(rng: &mut T) -> Vec { - let mut column_range = rng.gen_range(1..128); - let mut columns = Vec::new(); - while column_range > 0 { - let column_type = match rng.gen_range(0..4) { - 0 => ColumnType::Integer, - 1 => ColumnType::Float, - 2 => ColumnType::Text, - 3 => ColumnType::Blob, - _ => unreachable!(), - }; - let column = Column { - name: gen_random_name(rng), - column_type, - primary: false, - unique: false, - }; - columns.push(column); - column_range -= 1; - } - columns -} - fn get_all_rows( env: &mut SimulatorEnv, conn: &mut Rc, From ab556032f5c5be33956597a2cadde974c6c51846 Mon Sep 17 00:00:00 2001 From: alpaylan Date: Fri, 13 Dec 2024 16:07:56 -0500 Subject: [PATCH 004/144] change the names of guard to predicate, arbitrary_of to arbitrary_from, ArbitraryOf to ArbitraryFrom --- simulator/main.rs | 102 +++++++++++++++++++++++----------------------- 1 file changed, 51 insertions(+), 51 deletions(-) diff --git a/simulator/main.rs b/simulator/main.rs index 02a15b9a0..99b760271 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -42,8 +42,8 @@ trait Arbitrary { fn arbitrary(rng: &mut R) -> Self; } -trait ArbitraryOf { - fn arbitrary_of(rng: &mut R, t: &T) -> Self; +trait ArbitraryFrom { + fn arbitrary_from(rng: &mut R, t: &T) -> Self; } struct Table { @@ -116,8 +116,8 @@ enum Value { Blob(Vec), } -impl ArbitraryOf> for Value { - fn arbitrary_of(rng: &mut R, t: &Vec<&Value>) -> Self { +impl ArbitraryFrom> for Value { + fn arbitrary_from(rng: &mut R, t: &Vec<&Value>) -> Self { if t.is_empty() { return Value::Null; } @@ -127,8 +127,8 @@ impl ArbitraryOf> for Value { } } -impl ArbitraryOf for Value { - fn arbitrary_of(rng: &mut R, t: &ColumnType) -> Self { +impl ArbitraryFrom for Value { + fn arbitrary_from(rng: &mut R, t: &ColumnType) -> Self { match t { ColumnType::Integer => Value::Integer(rng.gen_range(i64::MIN..i64::MAX)), ColumnType::Float => Value::Float(rng.gen_range(-1e10..1e10)), @@ -140,19 +140,19 @@ impl ArbitraryOf for Value { struct LTValue(Value); -impl ArbitraryOf> for LTValue { - fn arbitrary_of(rng: &mut R, t: &Vec<&Value>) -> Self { +impl ArbitraryFrom> for LTValue { + fn arbitrary_from(rng: &mut R, t: &Vec<&Value>) -> Self { if t.is_empty() { return LTValue(Value::Null); } let index = rng.gen_range(0..t.len()); - LTValue::arbitrary_of(rng, t[index]) + LTValue::arbitrary_from(rng, t[index]) } } -impl ArbitraryOf for LTValue { - fn arbitrary_of(rng: &mut R, t: &Value) -> Self { +impl ArbitraryFrom for LTValue { + fn arbitrary_from(rng: &mut R, t: &Value) -> Self { match t { Value::Integer(i) => LTValue(Value::Integer(rng.gen_range(i64::MIN..*i - 1))), Value::Float(f) => LTValue(Value::Float(rng.gen_range(-1e10..*f - 1.0))), @@ -181,19 +181,19 @@ impl ArbitraryOf for LTValue { struct GTValue(Value); -impl ArbitraryOf> for GTValue { - fn arbitrary_of(rng: &mut R, t: &Vec<&Value>) -> Self { +impl ArbitraryFrom> for GTValue { + fn arbitrary_from(rng: &mut R, t: &Vec<&Value>) -> Self { if t.is_empty() { return GTValue(Value::Null); } let index = rng.gen_range(0..t.len()); - GTValue::arbitrary_of(rng, t[index]) + GTValue::arbitrary_from(rng, t[index]) } } -impl ArbitraryOf for GTValue { - fn arbitrary_of(rng: &mut R, t: &Value) -> Self { +impl ArbitraryFrom for GTValue { + fn arbitrary_from(rng: &mut R, t: &Value) -> Self { match t { Value::Integer(i) => GTValue(Value::Integer(rng.gen_range(*i..i64::MAX))), Value::Float(f) => GTValue(Value::Float(rng.gen_range(*f..1e10))), @@ -230,32 +230,32 @@ enum Predicate { enum Query { Create { table: Table }, - Select { table: String, guard: Predicate }, + Select { table: String, predicate: Predicate }, Insert { table: String, values: Vec }, - Delete { table: String, guard: Predicate }, + Delete { table: String, predicate: Predicate }, } -impl ArbitraryOf
for Query { - fn arbitrary_of(rng: &mut R, t: &Table) -> Self { +impl ArbitraryFrom
for Query { + fn arbitrary_from(rng: &mut R, t: &Table) -> Self { match rng.gen_range(0..=200) { 0 => Query::Create { table: Table::arbitrary(rng), }, 1..=100 => Query::Select { table: t.name.clone(), - guard: Predicate::arbitrary_of(rng, t), + predicate: Predicate::arbitrary_from(rng, t), }, 101..=200 => Query::Insert { table: t.name.clone(), values: t .columns .iter() - .map(|c| Value::arbitrary_of(rng, &c.column_type)) + .map(|c| Value::arbitrary_from(rng, &c.column_type)) .collect(), }, 201..=300 => Query::Delete { table: t.name.clone(), - guard: Predicate::arbitrary_of(rng, t), + predicate: Predicate::arbitrary_from(rng, t), }, _ => unreachable!(), } @@ -265,8 +265,8 @@ impl ArbitraryOf
for Query { struct CompoundPredicate(Predicate); struct SimplePredicate(Predicate); -impl ArbitraryOf<(&Table, bool)> for SimplePredicate { - fn arbitrary_of(rng: &mut R, (t, b): &(&Table, bool)) -> Self { +impl ArbitraryFrom<(&Table, bool)> for SimplePredicate { + fn arbitrary_from(rng: &mut R, (t, b): &(&Table, bool)) -> Self { // Pick a random column let column_index = rng.gen_range(0..t.columns.len()); let column = &t.columns[column_index]; @@ -277,27 +277,27 @@ impl ArbitraryOf<(&Table, bool)> for SimplePredicate { if *b { Predicate::Eq( column.name.clone(), - Value::arbitrary_of(rng, &column_values), + Value::arbitrary_from(rng, &column_values), ) } else { Predicate::Eq( column.name.clone(), - Value::arbitrary_of(rng, &column.column_type), + Value::arbitrary_from(rng, &column.column_type), ) } } 1 => Predicate::Gt( column.name.clone(), match b { - true => GTValue::arbitrary_of(rng, &column_values).0, - false => LTValue::arbitrary_of(rng, &column_values).0, + true => GTValue::arbitrary_from(rng, &column_values).0, + false => LTValue::arbitrary_from(rng, &column_values).0, }, ), 2 => Predicate::Lt( column.name.clone(), match b { - true => LTValue::arbitrary_of(rng, &column_values).0, - false => GTValue::arbitrary_of(rng, &column_values).0, + true => LTValue::arbitrary_from(rng, &column_values).0, + false => GTValue::arbitrary_from(rng, &column_values).0, }, ), _ => unreachable!(), @@ -307,8 +307,8 @@ impl ArbitraryOf<(&Table, bool)> for SimplePredicate { } } -impl ArbitraryOf<(&Table, bool)> for CompoundPredicate { - fn arbitrary_of(rng: &mut R, (t, b): &(&Table, bool)) -> Self { +impl ArbitraryFrom<(&Table, bool)> for CompoundPredicate { + fn arbitrary_from(rng: &mut R, (t, b): &(&Table, bool)) -> Self { // Decide if you want to create an AND or an OR CompoundPredicate(if rng.gen_bool(0.7) { // An AND for true requires each of its children to be true @@ -316,7 +316,7 @@ impl ArbitraryOf<(&Table, bool)> for CompoundPredicate { if *b { Predicate::And( (0..rng.gen_range(1..=3)) - .map(|_| SimplePredicate::arbitrary_of(rng, &(*t, true)).0) + .map(|_| SimplePredicate::arbitrary_from(rng, &(*t, true)).0) .collect(), ) } else { @@ -335,7 +335,7 @@ impl ArbitraryOf<(&Table, bool)> for CompoundPredicate { Predicate::And( booleans .iter() - .map(|b| SimplePredicate::arbitrary_of(rng, &(*t, *b)).0) + .map(|b| SimplePredicate::arbitrary_from(rng, &(*t, *b)).0) .collect(), ) } @@ -356,13 +356,13 @@ impl ArbitraryOf<(&Table, bool)> for CompoundPredicate { Predicate::Or( booleans .iter() - .map(|b| SimplePredicate::arbitrary_of(rng, &(*t, *b)).0) + .map(|b| SimplePredicate::arbitrary_from(rng, &(*t, *b)).0) .collect(), ) } else { Predicate::Or( (0..rng.gen_range(1..=3)) - .map(|_| SimplePredicate::arbitrary_of(rng, &(*t, false)).0) + .map(|_| SimplePredicate::arbitrary_from(rng, &(*t, false)).0) .collect(), ) } @@ -370,19 +370,19 @@ impl ArbitraryOf<(&Table, bool)> for CompoundPredicate { } } -impl ArbitraryOf
for Predicate { - fn arbitrary_of(rng: &mut R, t: &Table) -> Self { +impl ArbitraryFrom
for Predicate { + fn arbitrary_from(rng: &mut R, t: &Table) -> Self { let b = rng.gen_bool(0.5); - CompoundPredicate::arbitrary_of(rng, &(t, b)).0 + CompoundPredicate::arbitrary_from(rng, &(t, b)).0 } } -impl ArbitraryOf<(&str, &Value)> for Predicate { - fn arbitrary_of(rng: &mut R, (c, t): &(&str, &Value)) -> Self { +impl ArbitraryFrom<(&str, &Value)> for Predicate { + fn arbitrary_from(rng: &mut R, (c, t): &(&str, &Value)) -> Self { match rng.gen_range(0..3) { 0 => Predicate::Eq(c.to_string(), (*t).clone()), - 1 => Predicate::Gt(c.to_string(), LTValue::arbitrary_of(rng, *t).0), - 2 => Predicate::Lt(c.to_string(), LTValue::arbitrary_of(rng, *t).0), + 1 => Predicate::Gt(c.to_string(), LTValue::arbitrary_from(rng, *t).0), + 2 => Predicate::Lt(c.to_string(), LTValue::arbitrary_from(rng, *t).0), _ => unreachable!(), } } @@ -431,7 +431,7 @@ impl Display for Query { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Query::Create { table } => write!(f, "{}", table.to_create_str()), - Query::Select { table, guard } => write!(f, "SELECT * FROM {} WHERE {}", table, guard), + Query::Select { table, predicate: guard } => write!(f, "SELECT * FROM {} WHERE {}", table, guard), Query::Insert { table, values } => { write!(f, "INSERT INTO {} VALUES (", table)?; for (i, v) in values.iter().enumerate() { @@ -442,7 +442,7 @@ impl Display for Query { } write!(f, ")") } - Query::Delete { table, guard } => write!(f, "DELETE FROM {} WHERE {}", table, guard), + Query::Delete { table, predicate: guard } => write!(f, "DELETE FROM {} WHERE {}", table, guard), } } } @@ -545,7 +545,7 @@ fn property_insert_select(env: &mut SimulatorEnv, conn: &mut Rc) { let mut rng = env.rng.clone(); // Generate a random value of the column type - let value = Value::arbitrary_of(&mut rng, &column.column_type); + let value = Value::arbitrary_from(&mut rng, &column.column_type); // Create a whole new row let mut row = Vec::new(); @@ -553,7 +553,7 @@ fn property_insert_select(env: &mut SimulatorEnv, conn: &mut Rc) { if i == column_index { row.push(value.clone()); } else { - let value = Value::arbitrary_of(&mut rng, &column.column_type); + let value = Value::arbitrary_from(&mut rng, &column.column_type); row.push(value); } } @@ -570,7 +570,7 @@ fn property_insert_select(env: &mut SimulatorEnv, conn: &mut Rc) { // Create a query that selects the row let query = Query::Select { table: env.tables[table].name.clone(), - guard: Predicate::Eq(column.name.clone(), value), + predicate: Predicate::Eq(column.name.clone(), value), }; // Get all rows @@ -587,7 +587,7 @@ fn property_select_all(env: &mut SimulatorEnv, conn: &mut Rc) { // Create a query that selects all rows let query = Query::Select { table: env.tables[table].name.clone(), - guard: Predicate::And(Vec::new()), + predicate: Predicate::And(Vec::new()), }; // Get all rows @@ -614,7 +614,7 @@ fn process_connection(env: &mut SimulatorEnv, conn: &mut Rc) -> Resu // Perform a random query, update the in-memory table with the result. 2 => { let table_index = env.rng.gen_range(0..env.tables.len()); - let query = Query::arbitrary_of(&mut env.rng, &env.tables[table_index]); + let query = Query::arbitrary_from(&mut env.rng, &env.tables[table_index]); let rows = get_all_rows(env, conn, query.to_string().as_str())?; env.tables[table_index].rows = rows; } From 8cb7086bfc3e8081048f7557cdbe3bb42b7a0daa Mon Sep 17 00:00:00 2001 From: alpaylan Date: Fri, 13 Dec 2024 16:08:50 -0500 Subject: [PATCH 005/144] formatting changes --- simulator/main.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/simulator/main.rs b/simulator/main.rs index 99b760271..d0544d3b6 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -431,7 +431,10 @@ impl Display for Query { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Query::Create { table } => write!(f, "{}", table.to_create_str()), - Query::Select { table, predicate: guard } => write!(f, "SELECT * FROM {} WHERE {}", table, guard), + Query::Select { + table, + predicate: guard, + } => write!(f, "SELECT * FROM {} WHERE {}", table, guard), Query::Insert { table, values } => { write!(f, "INSERT INTO {} VALUES (", table)?; for (i, v) in values.iter().enumerate() { @@ -442,7 +445,10 @@ impl Display for Query { } write!(f, ")") } - Query::Delete { table, predicate: guard } => write!(f, "DELETE FROM {} WHERE {}", table, guard), + Query::Delete { + table, + predicate: guard, + } => write!(f, "DELETE FROM {} WHERE {}", table, guard), } } } From 31fcdb8727da3828765aaf32d28b5aef747ca2c7 Mon Sep 17 00:00:00 2001 From: alpaylan Date: Sat, 14 Dec 2024 12:44:23 -0500 Subject: [PATCH 006/144] add workload percentage back to the simulator, fix the smaller/larger UTF8 string generator --- simulator/main.rs | 256 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 198 insertions(+), 58 deletions(-) diff --git a/simulator/main.rs b/simulator/main.rs index 2373cb8ed..ed2fb14a7 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -25,6 +25,12 @@ enum SimConnection { Disconnected, } +#[derive(Debug, Copy, Clone)] +enum SimulatorMode { + Random, + Workload, +} + #[derive(Debug)] struct SimulatorOpts { ticks: usize, @@ -35,6 +41,7 @@ struct SimulatorOpts { read_percent: usize, write_percent: usize, delete_percent: usize, + mode: SimulatorMode, page_size: usize, } @@ -164,16 +171,32 @@ impl ArbitraryFrom for LTValue { LTValue(Value::Text(t)) } else { let index = rng.gen_range(0..t.len()); - let mut t = t.into_bytes(); + let mut t = t.chars().map(|c| c as u32).collect::>(); t[index] -= 1; // Mutate the rest of the string for i in (index + 1)..t.len() { t[i] = rng.gen_range(0..=255); } - LTValue(Value::Text(String::from_utf8(t).unwrap())) + let t = t.into_iter().map(|c| c as u8 as char).collect::(); + LTValue(Value::Text(t)) + } + } + Value::Blob(b) => { + // Either shorten the blob, or make at least one byte smaller and mutate the rest + let mut b = b.clone(); + if rng.gen_bool(0.01) { + b.pop(); + LTValue(Value::Blob(b)) + } else { + let index = rng.gen_range(0..b.len()); + b[index] -= 1; + // Mutate the rest of the blob + for i in (index + 1)..b.len() { + b[i] = rng.gen_range(0..=255); + } + LTValue(Value::Blob(b)) } } - Value::Blob(b) => todo!(), _ => unreachable!(), } } @@ -205,16 +228,32 @@ impl ArbitraryFrom for GTValue { GTValue(Value::Text(t)) } else { let index = rng.gen_range(0..t.len()); - let mut t = t.into_bytes(); + let mut t = t.chars().map(|c| c as u32).collect::>(); t[index] += 1; // Mutate the rest of the string for i in (index + 1)..t.len() { t[i] = rng.gen_range(0..=255); } - GTValue(Value::Text(String::from_utf8(t).unwrap())) + let t = t.into_iter().map(|c| c as u8 as char).collect::(); + GTValue(Value::Text(t)) + } + } + Value::Blob(b) => { + // Either lengthen the blob, or make at least one byte smaller and mutate the rest + let mut b = b.clone(); + if rng.gen_bool(0.01) { + b.push(rng.gen_range(0..=255)); + GTValue(Value::Blob(b)) + } else { + let index = rng.gen_range(0..b.len()); + b[index] += 1; + // Mutate the rest of the blob + for i in (index + 1)..b.len() { + b[i] = rng.gen_range(0..=255); + } + GTValue(Value::Blob(b)) } } - Value::Blob(b) => todo!(), _ => unreachable!(), } } @@ -229,34 +268,91 @@ enum Predicate { } enum Query { - Create { table: Table }, - Select { table: String, predicate: Predicate }, - Insert { table: String, values: Vec }, - Delete { table: String, predicate: Predicate }, + Create(Create), + Select(Select), + Insert(Insert), + Delete(Delete), +} + +struct Create { + table: Table, +} + +impl Arbitrary for Create { + fn arbitrary(rng: &mut R) -> Self { + Create { + table: Table::arbitrary(rng), + } + } +} + +struct Select { + table: String, + predicate: Predicate, +} + +impl ArbitraryFrom> for Select { + fn arbitrary_from(rng: &mut R, t: &Vec
) -> Self { + let table = rng.gen_range(0..t.len()); + Select { + table: t[table].name.clone(), + predicate: Predicate::arbitrary_from(rng, &t[table]), + } + } +} + +impl ArbitraryFrom> for Select { + fn arbitrary_from(rng: &mut R, t: &Vec<&Table>) -> Self { + let table = rng.gen_range(0..t.len()); + Select { + table: t[table].name.clone(), + predicate: Predicate::arbitrary_from(rng, t[table]), + } + } +} + +struct Insert { + table: String, + values: Vec, +} + +impl ArbitraryFrom
for Insert { + fn arbitrary_from(rng: &mut R, t: &Table) -> Self { + let values = t + .columns + .iter() + .map(|c| Value::arbitrary_from(rng, &c.column_type)) + .collect(); + Insert { + table: t.name.clone(), + values, + } + } +} + +struct Delete { + table: String, + predicate: Predicate, +} + +impl ArbitraryFrom
for Delete { + fn arbitrary_from(rng: &mut R, t: &Table) -> Self { + Delete { + table: t.name.clone(), + predicate: Predicate::arbitrary_from(rng, t), + } + } } impl ArbitraryFrom
for Query { fn arbitrary_from(rng: &mut R, t: &Table) -> Self { match rng.gen_range(0..=200) { - 0 => Query::Create { - table: Table::arbitrary(rng), - }, - 1..=100 => Query::Select { - table: t.name.clone(), - predicate: Predicate::arbitrary_from(rng, t), - }, - 101..=200 => Query::Insert { - table: t.name.clone(), - values: t - .columns - .iter() - .map(|c| Value::arbitrary_from(rng, &c.column_type)) - .collect(), - }, - 201..=300 => Query::Delete { - table: t.name.clone(), - predicate: Predicate::arbitrary_from(rng, t), - }, + 0 => Query::Create(Create::arbitrary(rng)), + 1..=100 => Query::Select(Select::arbitrary_from(rng, &vec![t])), + 101..=200 => Query::Insert(Insert::arbitrary_from(rng, t)), + // todo: This branch is currently never taken, as DELETE is not yet implemented. + // Change this when DELETE is implemented. + 201..=300 => Query::Delete(Delete::arbitrary_from(rng, t)), _ => unreachable!(), } } @@ -394,7 +490,7 @@ impl Display for Predicate { Predicate::And(predicates) => { if predicates.is_empty() { // todo: Make this TRUE when the bug is fixed - write!(f, "1 = 1") + write!(f, "TRUE") } else { write!(f, "(")?; for (i, p) in predicates.iter().enumerate() { @@ -430,12 +526,12 @@ impl Display for Predicate { impl Display for Query { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Query::Create { table } => write!(f, "{}", table.to_create_str()), - Query::Select { + Query::Create(Create { table }) => write!(f, "{}", table.to_create_str()), + Query::Select(Select { table, predicate: guard, - } => write!(f, "SELECT * FROM {} WHERE {}", table, guard), - Query::Insert { table, values } => { + }) => write!(f, "SELECT * FROM {} WHERE {}", table, guard), + Query::Insert(Insert { table, values }) => { write!(f, "INSERT INTO {} VALUES (", table)?; for (i, v) in values.iter().enumerate() { if i != 0 { @@ -445,10 +541,10 @@ impl Display for Query { } write!(f, ")") } - Query::Delete { + Query::Delete(Delete { table, predicate: guard, - } => write!(f, "DELETE FROM {} WHERE {}", table, guard), + }) => write!(f, "DELETE FROM {} WHERE {}", table, guard), } } } @@ -481,6 +577,7 @@ fn main() { read_percent, write_percent, delete_percent, + mode: SimulatorMode::Workload, page_size: 4096, // TODO: randomize this too }; let io = Arc::new(SimulatorIO::new(seed, opts.page_size).unwrap()); @@ -542,8 +639,6 @@ fn property_insert_select(env: &mut SimulatorEnv, conn: &mut Rc) { // Get a random table let table = env.rng.gen_range(0..env.tables.len()); - // let table = &env.tables[table]; - // Pick a random column let column_index = env.rng.gen_range(0..env.tables[table].columns.len()); let column = &env.tables[table].columns[column_index].clone(); @@ -565,19 +660,19 @@ fn property_insert_select(env: &mut SimulatorEnv, conn: &mut Rc) { } // Insert the row - let query = Query::Insert { + let query = Query::Insert(Insert { table: env.tables[table].name.clone(), values: row.clone(), - }; + }); let _ = get_all_rows(env, conn, query.to_string().as_str()).unwrap(); // Shadow operation on the table env.tables[table].rows.push(row.clone()); // Create a query that selects the row - let query = Query::Select { + let query = Query::Select(Select { table: env.tables[table].name.clone(), predicate: Predicate::Eq(column.name.clone(), value), - }; + }); // Get all rows let rows = get_all_rows(env, conn, query.to_string().as_str()).unwrap(); @@ -591,10 +686,10 @@ fn property_select_all(env: &mut SimulatorEnv, conn: &mut Rc) { let table = env.rng.gen_range(0..env.tables.len()); // Create a query that selects all rows - let query = Query::Select { + let query = Query::Select(Select { table: env.tables[table].name.clone(), predicate: Predicate::And(Vec::new()), - }; + }); // Get all rows let rows = get_all_rows(env, conn, query.to_string().as_str()).unwrap(); @@ -611,20 +706,65 @@ fn process_connection(env: &mut SimulatorEnv, conn: &mut Rc) -> Resu maybe_add_table(env, conn)?; } - match env.rng.gen_range(0..2) { - // Randomly insert a value and check that the select result contains it. - 0 => property_insert_select(env, conn), - // Check that the current state of the in-memory table is the same as the one in the - // database. - 1 => property_select_all(env, conn), - // Perform a random query, update the in-memory table with the result. - 2 => { - let table_index = env.rng.gen_range(0..env.tables.len()); - let query = Query::arbitrary_from(&mut env.rng, &env.tables[table_index]); - let rows = get_all_rows(env, conn, query.to_string().as_str())?; - env.tables[table_index].rows = rows; - } - _ => unreachable!(), + match env.opts.mode { + SimulatorMode::Random => { + match env.rng.gen_range(0..2) { + // Randomly insert a value and check that the select result contains it. + 0 => property_insert_select(env, conn), + // Check that the current state of the in-memory table is the same as the one in the + // database. + 1 => property_select_all(env, conn), + // Perform a random query, update the in-memory table with the result. + 2 => { + let table_index = env.rng.gen_range(0..env.tables.len()); + let query = Query::arbitrary_from(&mut env.rng, &env.tables[table_index]); + let rows = get_all_rows(env, conn, query.to_string().as_str())?; + env.tables[table_index].rows = rows; + } + _ => unreachable!(), + } + } + SimulatorMode::Workload => { + let picked = env.rng.gen_range(0..100); + + if env.rng.gen_ratio(1, 100) { + maybe_add_table(env, conn)?; + } + + if picked < env.opts.read_percent { + let query = Select::arbitrary_from(&mut env.rng, &env.tables); + + let _ = get_all_rows(env, conn, Query::Select(query).to_string().as_str())?; + } else if picked < env.opts.read_percent + env.opts.write_percent { + let table_index = env.rng.gen_range(0..env.tables.len()); + let column_index = env.rng.gen_range(0..env.tables[table_index].columns.len()); + let column = &env.tables[table_index].columns[column_index].clone(); + let mut rng = env.rng.clone(); + let value = Value::arbitrary_from(&mut rng, &column.column_type); + let mut row = Vec::new(); + for (i, column) in env.tables[table_index].columns.iter().enumerate() { + if i == column_index { + row.push(value.clone()); + } else { + let value = Value::arbitrary_from(&mut rng, &column.column_type); + row.push(value); + } + } + let query = Query::Insert(Insert { + table: env.tables[table_index].name.clone(), + values: row.clone(), + }); + let _ = get_all_rows(env, conn, query.to_string().as_str())?; + env.tables[table_index].rows.push(row.clone()); + } else { + let table_index = env.rng.gen_range(0..env.tables.len()); + let query = Query::Select(Select { + table: env.tables[table_index].name.clone(), + predicate: Predicate::And(Vec::new()), + }); + let _ = get_all_rows(env, conn, query.to_string().as_str())?; + } + } } Ok(()) From a1b2ab3f54dcd64518f7e052318dfa9650183413 Mon Sep 17 00:00:00 2001 From: alpaylan Date: Sat, 14 Dec 2024 14:42:24 -0500 Subject: [PATCH 007/144] change names to more descriptive versions, fix the equal row comparison to check the ordering --- simulator/main.rs | 131 ++++++++++++++++++++++------------------------ 1 file changed, 64 insertions(+), 67 deletions(-) diff --git a/simulator/main.rs b/simulator/main.rs index ed2fb14a7..8a7f10d24 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -124,19 +124,19 @@ enum Value { } impl ArbitraryFrom> for Value { - fn arbitrary_from(rng: &mut R, t: &Vec<&Value>) -> Self { - if t.is_empty() { + fn arbitrary_from(rng: &mut R, values: &Vec<&Value>) -> Self { + if values.is_empty() { return Value::Null; } - let index = rng.gen_range(0..t.len()); - t[index].clone() + let index = rng.gen_range(0..values.len()); + values[index].clone() } } impl ArbitraryFrom for Value { - fn arbitrary_from(rng: &mut R, t: &ColumnType) -> Self { - match t { + fn arbitrary_from(rng: &mut R, column_type: &ColumnType) -> Self { + match column_type { ColumnType::Integer => Value::Integer(rng.gen_range(i64::MIN..i64::MAX)), ColumnType::Float => Value::Float(rng.gen_range(-1e10..1e10)), ColumnType::Text => Value::Text(gen_random_text(rng)), @@ -148,19 +148,19 @@ impl ArbitraryFrom for Value { struct LTValue(Value); impl ArbitraryFrom> for LTValue { - fn arbitrary_from(rng: &mut R, t: &Vec<&Value>) -> Self { - if t.is_empty() { + fn arbitrary_from(rng: &mut R, values: &Vec<&Value>) -> Self { + if values.is_empty() { return LTValue(Value::Null); } - let index = rng.gen_range(0..t.len()); - LTValue::arbitrary_from(rng, t[index]) + let index = rng.gen_range(0..values.len()); + LTValue::arbitrary_from(rng, values[index]) } } impl ArbitraryFrom for LTValue { - fn arbitrary_from(rng: &mut R, t: &Value) -> Self { - match t { + fn arbitrary_from(rng: &mut R, value: &Value) -> Self { + match value { Value::Integer(i) => LTValue(Value::Integer(rng.gen_range(i64::MIN..*i - 1))), Value::Float(f) => LTValue(Value::Float(rng.gen_range(-1e10..*f - 1.0))), Value::Text(t) => { @@ -205,19 +205,19 @@ impl ArbitraryFrom for LTValue { struct GTValue(Value); impl ArbitraryFrom> for GTValue { - fn arbitrary_from(rng: &mut R, t: &Vec<&Value>) -> Self { - if t.is_empty() { + fn arbitrary_from(rng: &mut R, values: &Vec<&Value>) -> Self { + if values.is_empty() { return GTValue(Value::Null); } - let index = rng.gen_range(0..t.len()); - GTValue::arbitrary_from(rng, t[index]) + let index = rng.gen_range(0..values.len()); + GTValue::arbitrary_from(rng, values[index]) } } impl ArbitraryFrom for GTValue { - fn arbitrary_from(rng: &mut R, t: &Value) -> Self { - match t { + fn arbitrary_from(rng: &mut R, value: &Value) -> Self { + match value { Value::Integer(i) => GTValue(Value::Integer(rng.gen_range(*i..i64::MAX))), Value::Float(f) => GTValue(Value::Float(rng.gen_range(*f..1e10))), Value::Text(t) => { @@ -260,13 +260,14 @@ impl ArbitraryFrom for GTValue { } enum Predicate { - And(Vec), - Or(Vec), - Eq(String, Value), - Gt(String, Value), - Lt(String, Value), + And(Vec), // p1 AND p2 AND p3... AND pn + Or(Vec), // p1 OR p2 OR p3... OR pn + Eq(String, Value), // column = Value + Gt(String, Value), // column > Value + Lt(String, Value), // column < Value } +// This type represents the potential queries on the database. enum Query { Create(Create), Select(Select), @@ -292,21 +293,21 @@ struct Select { } impl ArbitraryFrom> for Select { - fn arbitrary_from(rng: &mut R, t: &Vec
) -> Self { - let table = rng.gen_range(0..t.len()); + fn arbitrary_from(rng: &mut R, tables: &Vec
) -> Self { + let table = rng.gen_range(0..tables.len()); Select { - table: t[table].name.clone(), - predicate: Predicate::arbitrary_from(rng, &t[table]), + table: tables[table].name.clone(), + predicate: Predicate::arbitrary_from(rng, &tables[table]), } } } impl ArbitraryFrom> for Select { - fn arbitrary_from(rng: &mut R, t: &Vec<&Table>) -> Self { - let table = rng.gen_range(0..t.len()); + fn arbitrary_from(rng: &mut R, tables: &Vec<&Table>) -> Self { + let table = rng.gen_range(0..tables.len()); Select { - table: t[table].name.clone(), - predicate: Predicate::arbitrary_from(rng, t[table]), + table: tables[table].name.clone(), + predicate: Predicate::arbitrary_from(rng, tables[table]), } } } @@ -317,14 +318,14 @@ struct Insert { } impl ArbitraryFrom
for Insert { - fn arbitrary_from(rng: &mut R, t: &Table) -> Self { - let values = t + fn arbitrary_from(rng: &mut R, table: &Table) -> Self { + let values = table .columns .iter() .map(|c| Value::arbitrary_from(rng, &c.column_type)) .collect(); Insert { - table: t.name.clone(), + table: table.name.clone(), values, } } @@ -336,23 +337,23 @@ struct Delete { } impl ArbitraryFrom
for Delete { - fn arbitrary_from(rng: &mut R, t: &Table) -> Self { + fn arbitrary_from(rng: &mut R, table: &Table) -> Self { Delete { - table: t.name.clone(), - predicate: Predicate::arbitrary_from(rng, t), + table: table.name.clone(), + predicate: Predicate::arbitrary_from(rng, table), } } } impl ArbitraryFrom
for Query { - fn arbitrary_from(rng: &mut R, t: &Table) -> Self { + fn arbitrary_from(rng: &mut R, table: &Table) -> Self { match rng.gen_range(0..=200) { 0 => Query::Create(Create::arbitrary(rng)), - 1..=100 => Query::Select(Select::arbitrary_from(rng, &vec![t])), - 101..=200 => Query::Insert(Insert::arbitrary_from(rng, t)), + 1..=100 => Query::Select(Select::arbitrary_from(rng, &vec![table])), + 101..=200 => Query::Insert(Insert::arbitrary_from(rng, table)), // todo: This branch is currently never taken, as DELETE is not yet implemented. // Change this when DELETE is implemented. - 201..=300 => Query::Delete(Delete::arbitrary_from(rng, t)), + 201..=300 => Query::Delete(Delete::arbitrary_from(rng, table)), _ => unreachable!(), } } @@ -362,15 +363,15 @@ struct CompoundPredicate(Predicate); struct SimplePredicate(Predicate); impl ArbitraryFrom<(&Table, bool)> for SimplePredicate { - fn arbitrary_from(rng: &mut R, (t, b): &(&Table, bool)) -> Self { + fn arbitrary_from(rng: &mut R, (table, predicate_value): &(&Table, bool)) -> Self { // Pick a random column - let column_index = rng.gen_range(0..t.columns.len()); - let column = &t.columns[column_index]; - let column_values = t.rows.iter().map(|r| &r[column_index]).collect::>(); + let column_index = rng.gen_range(0..table.columns.len()); + let column = &table.columns[column_index]; + let column_values = table.rows.iter().map(|r| &r[column_index]).collect::>(); // Pick an operator let operator = match rng.gen_range(0..3) { 0 => { - if *b { + if *predicate_value { Predicate::Eq( column.name.clone(), Value::arbitrary_from(rng, &column_values), @@ -384,14 +385,14 @@ impl ArbitraryFrom<(&Table, bool)> for SimplePredicate { } 1 => Predicate::Gt( column.name.clone(), - match b { + match predicate_value { true => GTValue::arbitrary_from(rng, &column_values).0, false => LTValue::arbitrary_from(rng, &column_values).0, }, ), 2 => Predicate::Lt( column.name.clone(), - match b { + match predicate_value { true => LTValue::arbitrary_from(rng, &column_values).0, false => GTValue::arbitrary_from(rng, &column_values).0, }, @@ -404,15 +405,15 @@ impl ArbitraryFrom<(&Table, bool)> for SimplePredicate { } impl ArbitraryFrom<(&Table, bool)> for CompoundPredicate { - fn arbitrary_from(rng: &mut R, (t, b): &(&Table, bool)) -> Self { + fn arbitrary_from(rng: &mut R, (table, predicate_value): &(&Table, bool)) -> Self { // Decide if you want to create an AND or an OR CompoundPredicate(if rng.gen_bool(0.7) { // An AND for true requires each of its children to be true // An AND for false requires at least one of its children to be false - if *b { + if *predicate_value { Predicate::And( (0..rng.gen_range(1..=3)) - .map(|_| SimplePredicate::arbitrary_from(rng, &(*t, true)).0) + .map(|_| SimplePredicate::arbitrary_from(rng, &(*table, true)).0) .collect(), ) } else { @@ -431,14 +432,14 @@ impl ArbitraryFrom<(&Table, bool)> for CompoundPredicate { Predicate::And( booleans .iter() - .map(|b| SimplePredicate::arbitrary_from(rng, &(*t, *b)).0) + .map(|b| SimplePredicate::arbitrary_from(rng, &(*table, *b)).0) .collect(), ) } } else { // An OR for true requires at least one of its children to be true // An OR for false requires each of its children to be false - if *b { + if *predicate_value { // Create a vector of random booleans let mut booleans = (0..rng.gen_range(1..=3)) .map(|_| rng.gen_bool(0.5)) @@ -452,13 +453,13 @@ impl ArbitraryFrom<(&Table, bool)> for CompoundPredicate { Predicate::Or( booleans .iter() - .map(|b| SimplePredicate::arbitrary_from(rng, &(*t, *b)).0) + .map(|b| SimplePredicate::arbitrary_from(rng, &(*table, *b)).0) .collect(), ) } else { Predicate::Or( (0..rng.gen_range(1..=3)) - .map(|_| SimplePredicate::arbitrary_from(rng, &(*t, false)).0) + .map(|_| SimplePredicate::arbitrary_from(rng, &(*table, false)).0) .collect(), ) } @@ -467,18 +468,18 @@ impl ArbitraryFrom<(&Table, bool)> for CompoundPredicate { } impl ArbitraryFrom
for Predicate { - fn arbitrary_from(rng: &mut R, t: &Table) -> Self { + fn arbitrary_from(rng: &mut R, table: &Table) -> Self { let b = rng.gen_bool(0.5); - CompoundPredicate::arbitrary_from(rng, &(t, b)).0 + CompoundPredicate::arbitrary_from(rng, &(table, b)).0 } } impl ArbitraryFrom<(&str, &Value)> for Predicate { - fn arbitrary_from(rng: &mut R, (c, t): &(&str, &Value)) -> Self { + fn arbitrary_from(rng: &mut R, (column_name, value): &(&str, &Value)) -> Self { match rng.gen_range(0..3) { - 0 => Predicate::Eq(c.to_string(), (*t).clone()), - 1 => Predicate::Gt(c.to_string(), LTValue::arbitrary_from(rng, *t).0), - 2 => Predicate::Lt(c.to_string(), LTValue::arbitrary_from(rng, *t).0), + 0 => Predicate::Eq(column_name.to_string(), (*value).clone()), + 1 => Predicate::Gt(column_name.to_string(), LTValue::arbitrary_from(rng, *value).0), + 2 => Predicate::Lt(column_name.to_string(), LTValue::arbitrary_from(rng, *value).0), _ => unreachable!(), } } @@ -694,11 +695,8 @@ fn property_select_all(env: &mut SimulatorEnv, conn: &mut Rc) { // Get all rows let rows = get_all_rows(env, conn, query.to_string().as_str()).unwrap(); - // Check that all rows are there - assert_eq!(rows.len(), env.tables[table].rows.len()); - for row in &env.tables[table].rows { - assert!(rows.iter().any(|r| r == row)); - } + // Make sure the rows are the same + compare_equal_rows(&rows, &env.tables[table].rows); } fn process_connection(env: &mut SimulatorEnv, conn: &mut Rc) -> Result<()> { @@ -733,7 +731,6 @@ fn process_connection(env: &mut SimulatorEnv, conn: &mut Rc) -> Resu if picked < env.opts.read_percent { let query = Select::arbitrary_from(&mut env.rng, &env.tables); - let _ = get_all_rows(env, conn, Query::Select(query).to_string().as_str())?; } else if picked < env.opts.read_percent + env.opts.write_percent { let table_index = env.rng.gen_range(0..env.tables.len()); From 8e094de6aae887abf2530cdefd802fbfd2f138b1 Mon Sep 17 00:00:00 2001 From: alpaylan Date: Sat, 14 Dec 2024 14:57:32 -0500 Subject: [PATCH 008/144] fix random character generation --- simulator/main.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/simulator/main.rs b/simulator/main.rs index 8a7f10d24..30d3a0176 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -170,14 +170,14 @@ impl ArbitraryFrom for LTValue { t.pop(); LTValue(Value::Text(t)) } else { - let index = rng.gen_range(0..t.len()); let mut t = t.chars().map(|c| c as u32).collect::>(); + let index = rng.gen_range(0..t.len()); t[index] -= 1; // Mutate the rest of the string for i in (index + 1)..t.len() { - t[i] = rng.gen_range(0..=255); + t[i] = rng.gen_range('a' as u32..='z' as u32); } - let t = t.into_iter().map(|c| c as u8 as char).collect::(); + let t = t.into_iter().map(|c| char::from_u32(c).unwrap_or('z')).collect::(); LTValue(Value::Text(t)) } } @@ -227,14 +227,14 @@ impl ArbitraryFrom for GTValue { t.push(rng.gen_range(0..=255) as u8 as char); GTValue(Value::Text(t)) } else { - let index = rng.gen_range(0..t.len()); let mut t = t.chars().map(|c| c as u32).collect::>(); + let index = rng.gen_range(0..t.len()); t[index] += 1; // Mutate the rest of the string for i in (index + 1)..t.len() { - t[i] = rng.gen_range(0..=255); + t[i] = rng.gen_range('a' as u32..='z' as u32); } - let t = t.into_iter().map(|c| c as u8 as char).collect::(); + let t = t.into_iter().map(|c| char::from_u32(c).unwrap_or('a')).collect::(); GTValue(Value::Text(t)) } } From f1937ee47fc68d149cf6af446b2a3a1cb7096e38 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Sun, 15 Dec 2024 10:43:47 +0200 Subject: [PATCH 009/144] sqlite3: Add sqlite3_wal_checkpoint_*() API This wires up checkpointing to the SQLite C API. We don't respect the checkpointing mode because core does not have that nor do we report back some stats. Refs: #478 --- sqlite3/src/lib.rs | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/sqlite3/src/lib.rs b/sqlite3/src/lib.rs index 6a86a04fe..12b53ad82 100644 --- a/sqlite3/src/lib.rs +++ b/sqlite3/src/lib.rs @@ -28,6 +28,11 @@ pub const SQLITE_STATE_OPEN: u8 = 0x76; pub const SQLITE_STATE_SICK: u8 = 0xba; pub const SQLITE_STATE_BUSY: u8 = 0x6d; +pub const SQLITE_CHECKPOINT_PASSIVE: ffi::c_int = 0; +pub const SQLITE_CHECKPOINT_FULL: ffi::c_int = 1; +pub const SQLITE_CHECKPOINT_RESTART: ffi::c_int = 2; +pub const SQLITE_CHECKPOINT_TRUNCATE: ffi::c_int = 3; + pub mod util; use util::sqlite3_safety_check_sick_or_ok; @@ -918,3 +923,36 @@ fn sqlite3_errstr_impl(rc: i32) -> *const std::ffi::c_char { } } } + +#[no_mangle] +pub unsafe extern "C" fn sqlite3_wal_checkpoint( + _db: *mut sqlite3, + _db_name: *const std::ffi::c_char, +) -> ffi::c_int { + sqlite3_wal_checkpoint_v2( + _db, + _db_name, + SQLITE_CHECKPOINT_PASSIVE, + std::ptr::null_mut(), + std::ptr::null_mut(), + ) +} + +#[no_mangle] +pub unsafe extern "C" fn sqlite3_wal_checkpoint_v2( + db: *mut sqlite3, + _db_name: *const std::ffi::c_char, + _mode: ffi::c_int, + _log_size: *mut ffi::c_int, + _checkpoint_count: *mut ffi::c_int, +) -> ffi::c_int { + if db.is_null() { + return SQLITE_MISUSE; + } + let db: &mut sqlite3 = &mut *db; + // TODO: Checkpointing modes and reporting back log size and checkpoint count to caller. + if let Err(e) = db.conn.checkpoint() { + return SQLITE_ERROR; + } + SQLITE_OK +} From ec1c796650c1412b8b55ce5c53960f05b1b40403 Mon Sep 17 00:00:00 2001 From: alpaylan Date: Sun, 15 Dec 2024 12:50:38 -0500 Subject: [PATCH 010/144] change the boolean name to a more descriptive version --- simulator/main.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/simulator/main.rs b/simulator/main.rs index 30d3a0176..fcb6fc116 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -469,8 +469,8 @@ impl ArbitraryFrom<(&Table, bool)> for CompoundPredicate { impl ArbitraryFrom
for Predicate { fn arbitrary_from(rng: &mut R, table: &Table) -> Self { - let b = rng.gen_bool(0.5); - CompoundPredicate::arbitrary_from(rng, &(table, b)).0 + let predicate_value = rng.gen_bool(0.5); + CompoundPredicate::arbitrary_from(rng, &(table, predicate_value)).0 } } From 0172c512ac01dff0480b58a19eb948da4b7d5ba4 Mon Sep 17 00:00:00 2001 From: alpaylan Date: Sun, 15 Dec 2024 12:51:01 -0500 Subject: [PATCH 011/144] fix formatting --- simulator/main.rs | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/simulator/main.rs b/simulator/main.rs index fcb6fc116..48318f299 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -177,7 +177,10 @@ impl ArbitraryFrom for LTValue { for i in (index + 1)..t.len() { t[i] = rng.gen_range('a' as u32..='z' as u32); } - let t = t.into_iter().map(|c| char::from_u32(c).unwrap_or('z')).collect::(); + let t = t + .into_iter() + .map(|c| char::from_u32(c).unwrap_or('z')) + .collect::(); LTValue(Value::Text(t)) } } @@ -234,7 +237,10 @@ impl ArbitraryFrom for GTValue { for i in (index + 1)..t.len() { t[i] = rng.gen_range('a' as u32..='z' as u32); } - let t = t.into_iter().map(|c| char::from_u32(c).unwrap_or('a')).collect::(); + let t = t + .into_iter() + .map(|c| char::from_u32(c).unwrap_or('a')) + .collect::(); GTValue(Value::Text(t)) } } @@ -260,11 +266,11 @@ impl ArbitraryFrom for GTValue { } enum Predicate { - And(Vec), // p1 AND p2 AND p3... AND pn - Or(Vec), // p1 OR p2 OR p3... OR pn - Eq(String, Value), // column = Value - Gt(String, Value), // column > Value - Lt(String, Value), // column < Value + And(Vec), // p1 AND p2 AND p3... AND pn + Or(Vec), // p1 OR p2 OR p3... OR pn + Eq(String, Value), // column = Value + Gt(String, Value), // column > Value + Lt(String, Value), // column < Value } // This type represents the potential queries on the database. @@ -367,7 +373,11 @@ impl ArbitraryFrom<(&Table, bool)> for SimplePredicate { // Pick a random column let column_index = rng.gen_range(0..table.columns.len()); let column = &table.columns[column_index]; - let column_values = table.rows.iter().map(|r| &r[column_index]).collect::>(); + let column_values = table + .rows + .iter() + .map(|r| &r[column_index]) + .collect::>(); // Pick an operator let operator = match rng.gen_range(0..3) { 0 => { @@ -478,8 +488,14 @@ impl ArbitraryFrom<(&str, &Value)> for Predicate { fn arbitrary_from(rng: &mut R, (column_name, value): &(&str, &Value)) -> Self { match rng.gen_range(0..3) { 0 => Predicate::Eq(column_name.to_string(), (*value).clone()), - 1 => Predicate::Gt(column_name.to_string(), LTValue::arbitrary_from(rng, *value).0), - 2 => Predicate::Lt(column_name.to_string(), LTValue::arbitrary_from(rng, *value).0), + 1 => Predicate::Gt( + column_name.to_string(), + LTValue::arbitrary_from(rng, *value).0, + ), + 2 => Predicate::Lt( + column_name.to_string(), + LTValue::arbitrary_from(rng, *value).0, + ), _ => unreachable!(), } } From da781dffa0bfab98f864d7a8549bcea9307b50d9 Mon Sep 17 00:00:00 2001 From: KaguraMilet Date: Mon, 16 Dec 2024 20:34:25 +0800 Subject: [PATCH 012/144] feat(optimizer): eliminate between statement --- core/translate/optimizer.rs | 94 +++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 3e3946996..45d4a8299 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -15,6 +15,7 @@ use super::plan::{ * but having them separate makes them easier to understand */ pub fn optimize_plan(mut select_plan: Plan) -> Result { + eliminate_between(&mut select_plan.source, &mut select_plan.where_clause)?; if let ConstantConditionEliminationResult::ImpossibleCondition = eliminate_constants(&mut select_plan.source, &mut select_plan.where_clause)? { @@ -500,6 +501,46 @@ fn push_scan_direction(operator: &mut SourceOperator, direction: &Direction) { } } +fn eliminate_between( + operator: &mut SourceOperator, + where_clauses: &mut Option>, +) -> Result<()> { + if let Some(predicates) = where_clauses { + *predicates = predicates.drain(..).map(convert_between_expr).collect(); + } + + match operator { + SourceOperator::Join { + left, + right, + predicates, + .. + } => { + eliminate_between(left, where_clauses)?; + eliminate_between(right, where_clauses)?; + + if let Some(predicates) = predicates { + *predicates = predicates.drain(..).map(convert_between_expr).collect(); + } + } + SourceOperator::Scan { + predicates: Some(preds), + .. + } => { + *preds = preds.drain(..).map(convert_between_expr).collect(); + } + SourceOperator::Search { + predicates: Some(preds), + .. + } => { + *preds = preds.drain(..).map(convert_between_expr).collect(); + } + _ => (), + } + + Ok(()) +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum ConstantPredicate { AlwaysTrue, @@ -807,6 +848,59 @@ pub fn try_extract_index_search_expression( } } +fn convert_between_expr(expr: ast::Expr) -> ast::Expr { + match expr { + ast::Expr::Between { + lhs, + not, + start, + end, + } => { + let lower_bound = ast::Expr::Binary(start, ast::Operator::LessEquals, lhs.clone()); + let upper_bound = ast::Expr::Binary(lhs, ast::Operator::LessEquals, end); + + if not { + // Convert NOT BETWEEN to NOT (x <= start AND y <= end) + ast::Expr::Unary( + ast::UnaryOperator::Not, + Box::new(ast::Expr::Binary( + Box::new(lower_bound), + ast::Operator::And, + Box::new(upper_bound), + )), + ) + } else { + // Convert BETWEEN to (start <= y AND y <= end) + ast::Expr::Binary( + Box::new(lower_bound), + ast::Operator::And, + Box::new(upper_bound), + ) + } + } + // Process other expressions recursively + ast::Expr::Binary(lhs, op, rhs) => ast::Expr::Binary( + Box::new(convert_between_expr(*lhs)), + op, + Box::new(convert_between_expr(*rhs)), + ), + ast::Expr::FunctionCall { + name, + distinctness, + args, + order_by, + filter_over, + } => ast::Expr::FunctionCall { + name, + distinctness, + args: args.map(|args| args.into_iter().map(convert_between_expr).collect()), + order_by, + filter_over, + }, + _ => expr, + } +} + trait TakeOwnership { fn take_ownership(&mut self) -> Self; } From 1df3189db63d086140ed7e524c8e41aaee1f0748 Mon Sep 17 00:00:00 2001 From: KaguraMilet Date: Mon, 16 Dec 2024 23:28:54 +0800 Subject: [PATCH 013/144] feat(optimizer): support NOT BETWEEN AND with De Morgan's Laws. --- core/translate/optimizer.rs | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 45d4a8299..962e2ea80 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -856,21 +856,24 @@ fn convert_between_expr(expr: ast::Expr) -> ast::Expr { start, end, } => { - let lower_bound = ast::Expr::Binary(start, ast::Operator::LessEquals, lhs.clone()); - let upper_bound = ast::Expr::Binary(lhs, ast::Operator::LessEquals, end); + // Convert `y NOT BETWEEN x AND z` to `x > y OR y > z` + let (lower_op, upper_op) = if not { + (ast::Operator::Greater, ast::Operator::Greater) + } else { + // Convert `y BETWEEN x AND z` to `x <= y AND y <= z` + (ast::Operator::LessEquals, ast::Operator::LessEquals) + }; + + let lower_bound = ast::Expr::Binary(start, lower_op, lhs.clone()); + let upper_bound = ast::Expr::Binary(lhs, upper_op, end); if not { - // Convert NOT BETWEEN to NOT (x <= start AND y <= end) - ast::Expr::Unary( - ast::UnaryOperator::Not, - Box::new(ast::Expr::Binary( - Box::new(lower_bound), - ast::Operator::And, - Box::new(upper_bound), - )), + ast::Expr::Binary( + Box::new(lower_bound), + ast::Operator::Or, + Box::new(upper_bound), ) } else { - // Convert BETWEEN to (start <= y AND y <= end) ast::Expr::Binary( Box::new(lower_bound), ast::Operator::And, From be18c6e8f04ac2c9b4c681816d86413428c04473 Mon Sep 17 00:00:00 2001 From: alpaylan Date: Mon, 16 Dec 2024 10:49:05 -0500 Subject: [PATCH 014/144] break simulator into parts, add readme --- simulator/README.md | 2 + simulator/generation.rs | 32 ++ simulator/generation/query.rs | 213 +++++++++++ simulator/generation/table.rs | 196 ++++++++++ simulator/main.rs | 667 +--------------------------------- simulator/model.rs | 3 + simulator/model/query.rs | 111 ++++++ simulator/model/table.rs | 91 +++++ simulator/properties.rs | 78 ++++ 9 files changed, 733 insertions(+), 660 deletions(-) create mode 100644 simulator/README.md create mode 100644 simulator/generation.rs create mode 100644 simulator/generation/query.rs create mode 100644 simulator/generation/table.rs create mode 100644 simulator/model.rs create mode 100644 simulator/model/query.rs create mode 100644 simulator/model/table.rs create mode 100644 simulator/properties.rs diff --git a/simulator/README.md b/simulator/README.md new file mode 100644 index 000000000..cdaf20c6f --- /dev/null +++ b/simulator/README.md @@ -0,0 +1,2 @@ +# Simulator + diff --git a/simulator/generation.rs b/simulator/generation.rs new file mode 100644 index 000000000..73f84c11c --- /dev/null +++ b/simulator/generation.rs @@ -0,0 +1,32 @@ +use anarchist_readable_name_generator_lib::readable_name_custom; +use rand::Rng; + +pub mod table; +pub mod query; + +pub trait Arbitrary { + fn arbitrary(rng: &mut R) -> Self; +} + +pub trait ArbitraryFrom { + fn arbitrary_from(rng: &mut R, t: &T) -> Self; +} + + +fn gen_random_text(rng: &mut T) -> String { + let big_text = rng.gen_ratio(1, 1000); + if big_text { + let max_size: u64 = 2 * 1024 * 1024 * 1024; + let size = rng.gen_range(1024..max_size); + let mut name = String::new(); + for i in 0..size { + name.push(((i % 26) as u8 + b'A') as char); + } + name + } else { + let name = readable_name_custom("_", rng); + name.replace("-", "_") + } +} + + diff --git a/simulator/generation/query.rs b/simulator/generation/query.rs new file mode 100644 index 000000000..d8fc4ae16 --- /dev/null +++ b/simulator/generation/query.rs @@ -0,0 +1,213 @@ + +use crate::generation::{Arbitrary, ArbitraryFrom}; +use crate::generation::table::{LTValue, GTValue}; + +use crate::model::query::{Create, Delete, Insert, Predicate, Query, Select}; +use crate::model::table::{Table, Value}; +use rand::Rng; + + +impl Arbitrary for Create { + fn arbitrary(rng: &mut R) -> Self { + Create { + table: Table::arbitrary(rng), + } + } +} + + +impl ArbitraryFrom> for Select { + fn arbitrary_from(rng: &mut R, tables: &Vec
) -> Self { + let table = rng.gen_range(0..tables.len()); + Select { + table: tables[table].name.clone(), + predicate: Predicate::arbitrary_from(rng, &tables[table]), + } + } +} + +impl ArbitraryFrom> for Select { + fn arbitrary_from(rng: &mut R, tables: &Vec<&Table>) -> Self { + let table = rng.gen_range(0..tables.len()); + Select { + table: tables[table].name.clone(), + predicate: Predicate::arbitrary_from(rng, tables[table]), + } + } +} + + +impl ArbitraryFrom
for Insert { + fn arbitrary_from(rng: &mut R, table: &Table) -> Self { + let values = table + .columns + .iter() + .map(|c| Value::arbitrary_from(rng, &c.column_type)) + .collect(); + Insert { + table: table.name.clone(), + values, + } + } +} + + +impl ArbitraryFrom
for Delete { + fn arbitrary_from(rng: &mut R, table: &Table) -> Self { + Delete { + table: table.name.clone(), + predicate: Predicate::arbitrary_from(rng, table), + } + } +} + +impl ArbitraryFrom
for Query { + fn arbitrary_from(rng: &mut R, table: &Table) -> Self { + match rng.gen_range(0..=200) { + 0 => Query::Create(Create::arbitrary(rng)), + 1..=100 => Query::Select(Select::arbitrary_from(rng, &vec![table])), + 101..=200 => Query::Insert(Insert::arbitrary_from(rng, table)), + // todo: This branch is currently never taken, as DELETE is not yet implemented. + // Change this when DELETE is implemented. + 201..=300 => Query::Delete(Delete::arbitrary_from(rng, table)), + _ => unreachable!(), + } + } +} + + +struct CompoundPredicate(Predicate); +struct SimplePredicate(Predicate); + +impl ArbitraryFrom<(&Table, bool)> for SimplePredicate { + fn arbitrary_from(rng: &mut R, (table, predicate_value): &(&Table, bool)) -> Self { + // Pick a random column + let column_index = rng.gen_range(0..table.columns.len()); + let column = &table.columns[column_index]; + let column_values = table + .rows + .iter() + .map(|r| &r[column_index]) + .collect::>(); + // Pick an operator + let operator = match rng.gen_range(0..3) { + 0 => { + if *predicate_value { + Predicate::Eq( + column.name.clone(), + Value::arbitrary_from(rng, &column_values), + ) + } else { + Predicate::Eq( + column.name.clone(), + Value::arbitrary_from(rng, &column.column_type), + ) + } + } + 1 => Predicate::Gt( + column.name.clone(), + match predicate_value { + true => GTValue::arbitrary_from(rng, &column_values).0, + false => LTValue::arbitrary_from(rng, &column_values).0, + }, + ), + 2 => Predicate::Lt( + column.name.clone(), + match predicate_value { + true => LTValue::arbitrary_from(rng, &column_values).0, + false => GTValue::arbitrary_from(rng, &column_values).0, + }, + ), + _ => unreachable!(), + }; + + SimplePredicate(operator) + } +} + +impl ArbitraryFrom<(&Table, bool)> for CompoundPredicate { + fn arbitrary_from(rng: &mut R, (table, predicate_value): &(&Table, bool)) -> Self { + // Decide if you want to create an AND or an OR + CompoundPredicate(if rng.gen_bool(0.7) { + // An AND for true requires each of its children to be true + // An AND for false requires at least one of its children to be false + if *predicate_value { + Predicate::And( + (0..rng.gen_range(1..=3)) + .map(|_| SimplePredicate::arbitrary_from(rng, &(*table, true)).0) + .collect(), + ) + } else { + // Create a vector of random booleans + let mut booleans = (0..rng.gen_range(1..=3)) + .map(|_| rng.gen_bool(0.5)) + .collect::>(); + + let len = booleans.len(); + + // Make sure at least one of them is false + if booleans.iter().all(|b| *b) { + booleans[rng.gen_range(0..len)] = false; + } + + Predicate::And( + booleans + .iter() + .map(|b| SimplePredicate::arbitrary_from(rng, &(*table, *b)).0) + .collect(), + ) + } + } else { + // An OR for true requires at least one of its children to be true + // An OR for false requires each of its children to be false + if *predicate_value { + // Create a vector of random booleans + let mut booleans = (0..rng.gen_range(1..=3)) + .map(|_| rng.gen_bool(0.5)) + .collect::>(); + let len = booleans.len(); + // Make sure at least one of them is true + if booleans.iter().all(|b| !*b) { + booleans[rng.gen_range(0..len)] = true; + } + + Predicate::Or( + booleans + .iter() + .map(|b| SimplePredicate::arbitrary_from(rng, &(*table, *b)).0) + .collect(), + ) + } else { + Predicate::Or( + (0..rng.gen_range(1..=3)) + .map(|_| SimplePredicate::arbitrary_from(rng, &(*table, false)).0) + .collect(), + ) + } + }) + } +} + +impl ArbitraryFrom
for Predicate { + fn arbitrary_from(rng: &mut R, table: &Table) -> Self { + let predicate_value = rng.gen_bool(0.5); + CompoundPredicate::arbitrary_from(rng, &(table, predicate_value)).0 + } +} + +impl ArbitraryFrom<(&str, &Value)> for Predicate { + fn arbitrary_from(rng: &mut R, (column_name, value): &(&str, &Value)) -> Self { + match rng.gen_range(0..3) { + 0 => Predicate::Eq(column_name.to_string(), (*value).clone()), + 1 => Predicate::Gt( + column_name.to_string(), + LTValue::arbitrary_from(rng, *value).0, + ), + 2 => Predicate::Lt( + column_name.to_string(), + LTValue::arbitrary_from(rng, *value).0, + ), + _ => unreachable!(), + } + } +} diff --git a/simulator/generation/table.rs b/simulator/generation/table.rs new file mode 100644 index 000000000..8b1d6d421 --- /dev/null +++ b/simulator/generation/table.rs @@ -0,0 +1,196 @@ + +use rand::Rng; + +use crate::generation::{Arbitrary, ArbitraryFrom, readable_name_custom, gen_random_text}; +use crate::model::table::{Column, ColumnType, Name, Table, Value}; + +impl Arbitrary for Name { + fn arbitrary(rng: &mut R) -> Self { + let name = readable_name_custom("_", rng); + Name(name.replace("-", "_")) + } +} + + +impl Arbitrary for Table { + fn arbitrary(rng: &mut R) -> Self { + let name = Name::arbitrary(rng).0; + let columns = (1..rng.gen_range(1..128)) + .map(|_| Column::arbitrary(rng)) + .collect(); + Table { + rows: Vec::new(), + name, + columns, + } + } +} + +impl Arbitrary for Column { + fn arbitrary(rng: &mut R) -> Self { + let name = Name::arbitrary(rng).0; + let column_type = ColumnType::arbitrary(rng); + Column { + name, + column_type, + primary: false, + unique: false, + } + } +} + + +impl Arbitrary for ColumnType { + fn arbitrary(rng: &mut R) -> Self { + match rng.gen_range(0..4) { + 0 => ColumnType::Integer, + 1 => ColumnType::Float, + 2 => ColumnType::Text, + 3 => ColumnType::Blob, + _ => unreachable!(), + } + } +} + +impl ArbitraryFrom> for Value { + fn arbitrary_from(rng: &mut R, values: &Vec<&Value>) -> Self { + if values.is_empty() { + return Value::Null; + } + + let index = rng.gen_range(0..values.len()); + values[index].clone() + } +} + +impl ArbitraryFrom for Value { + fn arbitrary_from(rng: &mut R, column_type: &ColumnType) -> Self { + match column_type { + ColumnType::Integer => Value::Integer(rng.gen_range(i64::MIN..i64::MAX)), + ColumnType::Float => Value::Float(rng.gen_range(-1e10..1e10)), + ColumnType::Text => Value::Text(gen_random_text(rng)), + ColumnType::Blob => Value::Blob(gen_random_text(rng).as_bytes().to_vec()), + } + } +} + +pub(crate) struct LTValue(pub(crate) Value); + +impl ArbitraryFrom> for LTValue { + fn arbitrary_from(rng: &mut R, values: &Vec<&Value>) -> Self { + if values.is_empty() { + return LTValue(Value::Null); + } + + let index = rng.gen_range(0..values.len()); + LTValue::arbitrary_from(rng, values[index]) + } +} + +impl ArbitraryFrom for LTValue { + fn arbitrary_from(rng: &mut R, value: &Value) -> Self { + match value { + Value::Integer(i) => LTValue(Value::Integer(rng.gen_range(i64::MIN..*i - 1))), + Value::Float(f) => LTValue(Value::Float(rng.gen_range(-1e10..*f - 1.0))), + Value::Text(t) => { + // Either shorten the string, or make at least one character smaller and mutate the rest + let mut t = t.clone(); + if rng.gen_bool(0.01) { + t.pop(); + LTValue(Value::Text(t)) + } else { + let mut t = t.chars().map(|c| c as u32).collect::>(); + let index = rng.gen_range(0..t.len()); + t[index] -= 1; + // Mutate the rest of the string + for i in (index + 1)..t.len() { + t[i] = rng.gen_range('a' as u32..='z' as u32); + } + let t = t + .into_iter() + .map(|c| char::from_u32(c).unwrap_or('z')) + .collect::(); + LTValue(Value::Text(t)) + } + } + Value::Blob(b) => { + // Either shorten the blob, or make at least one byte smaller and mutate the rest + let mut b = b.clone(); + if rng.gen_bool(0.01) { + b.pop(); + LTValue(Value::Blob(b)) + } else { + let index = rng.gen_range(0..b.len()); + b[index] -= 1; + // Mutate the rest of the blob + for i in (index + 1)..b.len() { + b[i] = rng.gen_range(0..=255); + } + LTValue(Value::Blob(b)) + } + } + _ => unreachable!(), + } + } +} + +pub(crate) struct GTValue(pub(crate) Value); + +impl ArbitraryFrom> for GTValue { + fn arbitrary_from(rng: &mut R, values: &Vec<&Value>) -> Self { + if values.is_empty() { + return GTValue(Value::Null); + } + + let index = rng.gen_range(0..values.len()); + GTValue::arbitrary_from(rng, values[index]) + } +} + +impl ArbitraryFrom for GTValue { + fn arbitrary_from(rng: &mut R, value: &Value) -> Self { + match value { + Value::Integer(i) => GTValue(Value::Integer(rng.gen_range(*i..i64::MAX))), + Value::Float(f) => GTValue(Value::Float(rng.gen_range(*f..1e10))), + Value::Text(t) => { + // Either lengthen the string, or make at least one character smaller and mutate the rest + let mut t = t.clone(); + if rng.gen_bool(0.01) { + t.push(rng.gen_range(0..=255) as u8 as char); + GTValue(Value::Text(t)) + } else { + let mut t = t.chars().map(|c| c as u32).collect::>(); + let index = rng.gen_range(0..t.len()); + t[index] += 1; + // Mutate the rest of the string + for i in (index + 1)..t.len() { + t[i] = rng.gen_range('a' as u32..='z' as u32); + } + let t = t + .into_iter() + .map(|c| char::from_u32(c).unwrap_or('a')) + .collect::(); + GTValue(Value::Text(t)) + } + } + Value::Blob(b) => { + // Either lengthen the blob, or make at least one byte smaller and mutate the rest + let mut b = b.clone(); + if rng.gen_bool(0.01) { + b.push(rng.gen_range(0..=255)); + GTValue(Value::Blob(b)) + } else { + let index = rng.gen_range(0..b.len()); + b[index] += 1; + // Mutate the rest of the blob + for i in (index + 1)..b.len() { + b[i] = rng.gen_range(0..=255); + } + GTValue(Value::Blob(b)) + } + } + _ => unreachable!(), + } + } +} + diff --git a/simulator/main.rs b/simulator/main.rs index 48318f299..bfc9b658f 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -1,14 +1,18 @@ +use generation::{Arbitrary, ArbitraryFrom}; use limbo_core::{Connection, Database, File, OpenFlags, PlatformIO, Result, RowResult, IO}; +use model::table::{Column, Name, Table, Value}; +use model::query::{Insert, Predicate, Query, Select}; +use properties::{property_insert_select, property_select_all}; use rand::prelude::*; use rand_chacha::ChaCha8Rng; use std::cell::RefCell; -use std::fmt::Display; -use std::ops::Deref; use std::rc::Rc; use std::sync::Arc; use tempfile::TempDir; -use anarchist_readable_name_generator_lib::readable_name_custom; +mod generation; +mod properties; +mod model; struct SimulatorEnv { opts: SimulatorOpts, @@ -45,526 +49,7 @@ struct SimulatorOpts { page_size: usize, } -trait Arbitrary { - fn arbitrary(rng: &mut R) -> Self; -} - -trait ArbitraryFrom { - fn arbitrary_from(rng: &mut R, t: &T) -> Self; -} - -struct Table { - rows: Vec>, - name: String, - columns: Vec, -} - -impl Arbitrary for Table { - fn arbitrary(rng: &mut R) -> Self { - let name = Name::arbitrary(rng).0; - let columns = (1..rng.gen_range(1..128)) - .map(|_| Column::arbitrary(rng)) - .collect(); - Table { - rows: Vec::new(), - name, - columns, - } - } -} - -#[derive(Clone)] -struct Column { - name: String, - column_type: ColumnType, - primary: bool, - unique: bool, -} - -impl Arbitrary for Column { - fn arbitrary(rng: &mut R) -> Self { - let name = Name::arbitrary(rng).0; - let column_type = ColumnType::arbitrary(rng); - Column { - name, - column_type, - primary: false, - unique: false, - } - } -} - -#[derive(Clone)] -enum ColumnType { - Integer, - Float, - Text, - Blob, -} - -impl Arbitrary for ColumnType { - fn arbitrary(rng: &mut R) -> Self { - match rng.gen_range(0..4) { - 0 => ColumnType::Integer, - 1 => ColumnType::Float, - 2 => ColumnType::Text, - 3 => ColumnType::Blob, - _ => unreachable!(), - } - } -} - -#[derive(Clone, Debug, PartialEq)] -enum Value { - Null, - Integer(i64), - Float(f64), - Text(String), - Blob(Vec), -} - -impl ArbitraryFrom> for Value { - fn arbitrary_from(rng: &mut R, values: &Vec<&Value>) -> Self { - if values.is_empty() { - return Value::Null; - } - - let index = rng.gen_range(0..values.len()); - values[index].clone() - } -} - -impl ArbitraryFrom for Value { - fn arbitrary_from(rng: &mut R, column_type: &ColumnType) -> Self { - match column_type { - ColumnType::Integer => Value::Integer(rng.gen_range(i64::MIN..i64::MAX)), - ColumnType::Float => Value::Float(rng.gen_range(-1e10..1e10)), - ColumnType::Text => Value::Text(gen_random_text(rng)), - ColumnType::Blob => Value::Blob(gen_random_text(rng).as_bytes().to_vec()), - } - } -} - -struct LTValue(Value); - -impl ArbitraryFrom> for LTValue { - fn arbitrary_from(rng: &mut R, values: &Vec<&Value>) -> Self { - if values.is_empty() { - return LTValue(Value::Null); - } - let index = rng.gen_range(0..values.len()); - LTValue::arbitrary_from(rng, values[index]) - } -} - -impl ArbitraryFrom for LTValue { - fn arbitrary_from(rng: &mut R, value: &Value) -> Self { - match value { - Value::Integer(i) => LTValue(Value::Integer(rng.gen_range(i64::MIN..*i - 1))), - Value::Float(f) => LTValue(Value::Float(rng.gen_range(-1e10..*f - 1.0))), - Value::Text(t) => { - // Either shorten the string, or make at least one character smaller and mutate the rest - let mut t = t.clone(); - if rng.gen_bool(0.01) { - t.pop(); - LTValue(Value::Text(t)) - } else { - let mut t = t.chars().map(|c| c as u32).collect::>(); - let index = rng.gen_range(0..t.len()); - t[index] -= 1; - // Mutate the rest of the string - for i in (index + 1)..t.len() { - t[i] = rng.gen_range('a' as u32..='z' as u32); - } - let t = t - .into_iter() - .map(|c| char::from_u32(c).unwrap_or('z')) - .collect::(); - LTValue(Value::Text(t)) - } - } - Value::Blob(b) => { - // Either shorten the blob, or make at least one byte smaller and mutate the rest - let mut b = b.clone(); - if rng.gen_bool(0.01) { - b.pop(); - LTValue(Value::Blob(b)) - } else { - let index = rng.gen_range(0..b.len()); - b[index] -= 1; - // Mutate the rest of the blob - for i in (index + 1)..b.len() { - b[i] = rng.gen_range(0..=255); - } - LTValue(Value::Blob(b)) - } - } - _ => unreachable!(), - } - } -} - -struct GTValue(Value); - -impl ArbitraryFrom> for GTValue { - fn arbitrary_from(rng: &mut R, values: &Vec<&Value>) -> Self { - if values.is_empty() { - return GTValue(Value::Null); - } - - let index = rng.gen_range(0..values.len()); - GTValue::arbitrary_from(rng, values[index]) - } -} - -impl ArbitraryFrom for GTValue { - fn arbitrary_from(rng: &mut R, value: &Value) -> Self { - match value { - Value::Integer(i) => GTValue(Value::Integer(rng.gen_range(*i..i64::MAX))), - Value::Float(f) => GTValue(Value::Float(rng.gen_range(*f..1e10))), - Value::Text(t) => { - // Either lengthen the string, or make at least one character smaller and mutate the rest - let mut t = t.clone(); - if rng.gen_bool(0.01) { - t.push(rng.gen_range(0..=255) as u8 as char); - GTValue(Value::Text(t)) - } else { - let mut t = t.chars().map(|c| c as u32).collect::>(); - let index = rng.gen_range(0..t.len()); - t[index] += 1; - // Mutate the rest of the string - for i in (index + 1)..t.len() { - t[i] = rng.gen_range('a' as u32..='z' as u32); - } - let t = t - .into_iter() - .map(|c| char::from_u32(c).unwrap_or('a')) - .collect::(); - GTValue(Value::Text(t)) - } - } - Value::Blob(b) => { - // Either lengthen the blob, or make at least one byte smaller and mutate the rest - let mut b = b.clone(); - if rng.gen_bool(0.01) { - b.push(rng.gen_range(0..=255)); - GTValue(Value::Blob(b)) - } else { - let index = rng.gen_range(0..b.len()); - b[index] += 1; - // Mutate the rest of the blob - for i in (index + 1)..b.len() { - b[i] = rng.gen_range(0..=255); - } - GTValue(Value::Blob(b)) - } - } - _ => unreachable!(), - } - } -} - -enum Predicate { - And(Vec), // p1 AND p2 AND p3... AND pn - Or(Vec), // p1 OR p2 OR p3... OR pn - Eq(String, Value), // column = Value - Gt(String, Value), // column > Value - Lt(String, Value), // column < Value -} - -// This type represents the potential queries on the database. -enum Query { - Create(Create), - Select(Select), - Insert(Insert), - Delete(Delete), -} - -struct Create { - table: Table, -} - -impl Arbitrary for Create { - fn arbitrary(rng: &mut R) -> Self { - Create { - table: Table::arbitrary(rng), - } - } -} - -struct Select { - table: String, - predicate: Predicate, -} - -impl ArbitraryFrom> for Select { - fn arbitrary_from(rng: &mut R, tables: &Vec
) -> Self { - let table = rng.gen_range(0..tables.len()); - Select { - table: tables[table].name.clone(), - predicate: Predicate::arbitrary_from(rng, &tables[table]), - } - } -} - -impl ArbitraryFrom> for Select { - fn arbitrary_from(rng: &mut R, tables: &Vec<&Table>) -> Self { - let table = rng.gen_range(0..tables.len()); - Select { - table: tables[table].name.clone(), - predicate: Predicate::arbitrary_from(rng, tables[table]), - } - } -} - -struct Insert { - table: String, - values: Vec, -} - -impl ArbitraryFrom
for Insert { - fn arbitrary_from(rng: &mut R, table: &Table) -> Self { - let values = table - .columns - .iter() - .map(|c| Value::arbitrary_from(rng, &c.column_type)) - .collect(); - Insert { - table: table.name.clone(), - values, - } - } -} - -struct Delete { - table: String, - predicate: Predicate, -} - -impl ArbitraryFrom
for Delete { - fn arbitrary_from(rng: &mut R, table: &Table) -> Self { - Delete { - table: table.name.clone(), - predicate: Predicate::arbitrary_from(rng, table), - } - } -} - -impl ArbitraryFrom
for Query { - fn arbitrary_from(rng: &mut R, table: &Table) -> Self { - match rng.gen_range(0..=200) { - 0 => Query::Create(Create::arbitrary(rng)), - 1..=100 => Query::Select(Select::arbitrary_from(rng, &vec![table])), - 101..=200 => Query::Insert(Insert::arbitrary_from(rng, table)), - // todo: This branch is currently never taken, as DELETE is not yet implemented. - // Change this when DELETE is implemented. - 201..=300 => Query::Delete(Delete::arbitrary_from(rng, table)), - _ => unreachable!(), - } - } -} - -struct CompoundPredicate(Predicate); -struct SimplePredicate(Predicate); - -impl ArbitraryFrom<(&Table, bool)> for SimplePredicate { - fn arbitrary_from(rng: &mut R, (table, predicate_value): &(&Table, bool)) -> Self { - // Pick a random column - let column_index = rng.gen_range(0..table.columns.len()); - let column = &table.columns[column_index]; - let column_values = table - .rows - .iter() - .map(|r| &r[column_index]) - .collect::>(); - // Pick an operator - let operator = match rng.gen_range(0..3) { - 0 => { - if *predicate_value { - Predicate::Eq( - column.name.clone(), - Value::arbitrary_from(rng, &column_values), - ) - } else { - Predicate::Eq( - column.name.clone(), - Value::arbitrary_from(rng, &column.column_type), - ) - } - } - 1 => Predicate::Gt( - column.name.clone(), - match predicate_value { - true => GTValue::arbitrary_from(rng, &column_values).0, - false => LTValue::arbitrary_from(rng, &column_values).0, - }, - ), - 2 => Predicate::Lt( - column.name.clone(), - match predicate_value { - true => LTValue::arbitrary_from(rng, &column_values).0, - false => GTValue::arbitrary_from(rng, &column_values).0, - }, - ), - _ => unreachable!(), - }; - - SimplePredicate(operator) - } -} - -impl ArbitraryFrom<(&Table, bool)> for CompoundPredicate { - fn arbitrary_from(rng: &mut R, (table, predicate_value): &(&Table, bool)) -> Self { - // Decide if you want to create an AND or an OR - CompoundPredicate(if rng.gen_bool(0.7) { - // An AND for true requires each of its children to be true - // An AND for false requires at least one of its children to be false - if *predicate_value { - Predicate::And( - (0..rng.gen_range(1..=3)) - .map(|_| SimplePredicate::arbitrary_from(rng, &(*table, true)).0) - .collect(), - ) - } else { - // Create a vector of random booleans - let mut booleans = (0..rng.gen_range(1..=3)) - .map(|_| rng.gen_bool(0.5)) - .collect::>(); - - let len = booleans.len(); - - // Make sure at least one of them is false - if booleans.iter().all(|b| *b) { - booleans[rng.gen_range(0..len)] = false; - } - - Predicate::And( - booleans - .iter() - .map(|b| SimplePredicate::arbitrary_from(rng, &(*table, *b)).0) - .collect(), - ) - } - } else { - // An OR for true requires at least one of its children to be true - // An OR for false requires each of its children to be false - if *predicate_value { - // Create a vector of random booleans - let mut booleans = (0..rng.gen_range(1..=3)) - .map(|_| rng.gen_bool(0.5)) - .collect::>(); - let len = booleans.len(); - // Make sure at least one of them is true - if booleans.iter().all(|b| !*b) { - booleans[rng.gen_range(0..len)] = true; - } - - Predicate::Or( - booleans - .iter() - .map(|b| SimplePredicate::arbitrary_from(rng, &(*table, *b)).0) - .collect(), - ) - } else { - Predicate::Or( - (0..rng.gen_range(1..=3)) - .map(|_| SimplePredicate::arbitrary_from(rng, &(*table, false)).0) - .collect(), - ) - } - }) - } -} - -impl ArbitraryFrom
for Predicate { - fn arbitrary_from(rng: &mut R, table: &Table) -> Self { - let predicate_value = rng.gen_bool(0.5); - CompoundPredicate::arbitrary_from(rng, &(table, predicate_value)).0 - } -} - -impl ArbitraryFrom<(&str, &Value)> for Predicate { - fn arbitrary_from(rng: &mut R, (column_name, value): &(&str, &Value)) -> Self { - match rng.gen_range(0..3) { - 0 => Predicate::Eq(column_name.to_string(), (*value).clone()), - 1 => Predicate::Gt( - column_name.to_string(), - LTValue::arbitrary_from(rng, *value).0, - ), - 2 => Predicate::Lt( - column_name.to_string(), - LTValue::arbitrary_from(rng, *value).0, - ), - _ => unreachable!(), - } - } -} - -impl Display for Predicate { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Predicate::And(predicates) => { - if predicates.is_empty() { - // todo: Make this TRUE when the bug is fixed - write!(f, "TRUE") - } else { - write!(f, "(")?; - for (i, p) in predicates.iter().enumerate() { - if i != 0 { - write!(f, " AND ")?; - } - write!(f, "{}", p)?; - } - write!(f, ")") - } - } - Predicate::Or(predicates) => { - if predicates.is_empty() { - write!(f, "FALSE") - } else { - write!(f, "(")?; - for (i, p) in predicates.iter().enumerate() { - if i != 0 { - write!(f, " OR ")?; - } - write!(f, "{}", p)?; - } - write!(f, ")") - } - } - Predicate::Eq(name, value) => write!(f, "{} = {}", name, value), - Predicate::Gt(name, value) => write!(f, "{} > {}", name, value), - Predicate::Lt(name, value) => write!(f, "{} < {}", name, value), - } - } -} - -impl Display for Query { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Query::Create(Create { table }) => write!(f, "{}", table.to_create_str()), - Query::Select(Select { - table, - predicate: guard, - }) => write!(f, "SELECT * FROM {} WHERE {}", table, guard), - Query::Insert(Insert { table, values }) => { - write!(f, "INSERT INTO {} VALUES (", table)?; - for (i, v) in values.iter().enumerate() { - if i != 0 { - write!(f, ", ")?; - } - write!(f, "{}", v)?; - } - write!(f, ")") - } - Query::Delete(Delete { - table, - predicate: guard, - }) => write!(f, "DELETE FROM {} WHERE {}", table, guard), - } - } -} #[allow(clippy::arc_with_non_send_sync)] fn main() { @@ -652,68 +137,6 @@ fn main() { env.io.print_stats(); } -fn property_insert_select(env: &mut SimulatorEnv, conn: &mut Rc) { - // Get a random table - let table = env.rng.gen_range(0..env.tables.len()); - - // Pick a random column - let column_index = env.rng.gen_range(0..env.tables[table].columns.len()); - let column = &env.tables[table].columns[column_index].clone(); - - let mut rng = env.rng.clone(); - - // Generate a random value of the column type - let value = Value::arbitrary_from(&mut rng, &column.column_type); - - // Create a whole new row - let mut row = Vec::new(); - for (i, column) in env.tables[table].columns.iter().enumerate() { - if i == column_index { - row.push(value.clone()); - } else { - let value = Value::arbitrary_from(&mut rng, &column.column_type); - row.push(value); - } - } - - // Insert the row - let query = Query::Insert(Insert { - table: env.tables[table].name.clone(), - values: row.clone(), - }); - let _ = get_all_rows(env, conn, query.to_string().as_str()).unwrap(); - // Shadow operation on the table - env.tables[table].rows.push(row.clone()); - - // Create a query that selects the row - let query = Query::Select(Select { - table: env.tables[table].name.clone(), - predicate: Predicate::Eq(column.name.clone(), value), - }); - - // Get all rows - let rows = get_all_rows(env, conn, query.to_string().as_str()).unwrap(); - - // Check that the row is there - assert!(rows.iter().any(|r| r == &row)); -} - -fn property_select_all(env: &mut SimulatorEnv, conn: &mut Rc) { - // Get a random table - let table = env.rng.gen_range(0..env.tables.len()); - - // Create a query that selects all rows - let query = Query::Select(Select { - table: env.tables[table].name.clone(), - predicate: Predicate::And(Vec::new()), - }); - - // Get all rows - let rows = get_all_rows(env, conn, query.to_string().as_str()).unwrap(); - - // Make sure the rows are the same - compare_equal_rows(&rows, &env.tables[table].rows); -} fn process_connection(env: &mut SimulatorEnv, conn: &mut Rc) -> Result<()> { if env.tables.is_empty() { @@ -827,38 +250,7 @@ fn maybe_add_table(env: &mut SimulatorEnv, conn: &mut Rc) -> Result< Ok(()) } -struct Name(String); -impl Arbitrary for Name { - fn arbitrary(rng: &mut R) -> Self { - let name = readable_name_custom("_", rng); - Name(name.replace("-", "_")) - } -} - -impl Deref for Name { - type Target = str; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -fn gen_random_text(rng: &mut T) -> String { - let big_text = rng.gen_ratio(1, 1000); - if big_text { - let max_size: u64 = 2 * 1024 * 1024 * 1024; - let size = rng.gen_range(1024..max_size); - let mut name = String::new(); - for i in 0..size { - name.push(((i % 26) as u8 + b'A') as char); - } - name - } else { - let name = readable_name_custom("_", rng); - name.replace("-", "_") - } -} fn get_all_rows( env: &mut SimulatorEnv, @@ -1086,48 +478,3 @@ impl Drop for SimulatorFile { } } -impl ColumnType { - pub fn as_str(&self) -> &str { - match self { - ColumnType::Integer => "INTEGER", - ColumnType::Float => "FLOAT", - ColumnType::Text => "TEXT", - ColumnType::Blob => "BLOB", - } - } -} - -impl Table { - pub fn to_create_str(&self) -> String { - let mut out = String::new(); - - out.push_str(format!("CREATE TABLE {} (", self.name).as_str()); - - assert!(!self.columns.is_empty()); - for column in &self.columns { - out.push_str(format!("{} {},", column.name, column.column_type.as_str()).as_str()); - } - // remove last comma - out.pop(); - - out.push_str(");"); - out - } -} - -impl Display for Value { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Value::Null => write!(f, "NULL"), - Value::Integer(i) => write!(f, "{}", i), - Value::Float(fl) => write!(f, "{}", fl), - Value::Text(t) => write!(f, "'{}'", t), - Value::Blob(b) => write!(f, "{}", to_sqlite_blob(b)), - } - } -} - -fn to_sqlite_blob(bytes: &[u8]) -> String { - let hex: String = bytes.iter().map(|b| format!("{:02X}", b)).collect(); - format!("X'{}'", hex) -} diff --git a/simulator/model.rs b/simulator/model.rs new file mode 100644 index 000000000..5c8b5f5e0 --- /dev/null +++ b/simulator/model.rs @@ -0,0 +1,3 @@ + +pub mod table; +pub mod query; diff --git a/simulator/model/query.rs b/simulator/model/query.rs new file mode 100644 index 000000000..26dc75982 --- /dev/null +++ b/simulator/model/query.rs @@ -0,0 +1,111 @@ +use std::fmt::Display; + +use crate::model::table::{Table, Value}; + + +#[derive(Clone, Debug, PartialEq)] +pub(crate) enum Predicate { + And(Vec), // p1 AND p2 AND p3... AND pn + Or(Vec), // p1 OR p2 OR p3... OR pn + Eq(String, Value), // column = Value + Gt(String, Value), // column > Value + Lt(String, Value), // column < Value +} + +impl Display for Predicate { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Predicate::And(predicates) => { + if predicates.is_empty() { + // todo: Make this TRUE when the bug is fixed + write!(f, "TRUE") + } else { + write!(f, "(")?; + for (i, p) in predicates.iter().enumerate() { + if i != 0 { + write!(f, " AND ")?; + } + write!(f, "{}", p)?; + } + write!(f, ")") + } + } + Predicate::Or(predicates) => { + if predicates.is_empty() { + write!(f, "FALSE") + } else { + write!(f, "(")?; + for (i, p) in predicates.iter().enumerate() { + if i != 0 { + write!(f, " OR ")?; + } + write!(f, "{}", p)?; + } + write!(f, ")") + } + } + Predicate::Eq(name, value) => write!(f, "{} = {}", name, value), + Predicate::Gt(name, value) => write!(f, "{} > {}", name, value), + Predicate::Lt(name, value) => write!(f, "{} < {}", name, value), + } + } +} + +// This type represents the potential queries on the database. +#[derive(Debug)] +pub(crate) enum Query { + Create(Create), + Select(Select), + Insert(Insert), + Delete(Delete), +} + +#[derive(Debug)] +pub(crate) struct Create { + pub(crate) table: Table, +} + +#[derive(Clone, Debug, PartialEq)] +pub(crate) struct Select { + pub(crate) table: String, + pub(crate) predicate: Predicate, +} + +#[derive(Clone, Debug, PartialEq)] +pub(crate) struct Insert { + pub(crate) table: String, + pub(crate) values: Vec, +} + +#[derive(Clone, Debug, PartialEq)] +pub(crate) struct Delete { + pub(crate) table: String, + pub(crate) predicate: Predicate, +} + +impl Display for Query { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Query::Create(Create { table }) => write!(f, "{}", table.to_create_str()), + Query::Select(Select { + table, + predicate: guard, + }) => write!(f, "SELECT * FROM {} WHERE {}", table, guard), + Query::Insert(Insert { table, values }) => { + write!(f, "INSERT INTO {} VALUES (", table)?; + for (i, v) in values.iter().enumerate() { + if i != 0 { + write!(f, ", ")?; + } + write!(f, "{}", v)?; + } + write!(f, ")") + } + Query::Delete(Delete { + table, + predicate: guard, + }) => write!(f, "DELETE FROM {} WHERE {}", table, guard), + } + } +} + diff --git a/simulator/model/table.rs b/simulator/model/table.rs new file mode 100644 index 000000000..1797e54bf --- /dev/null +++ b/simulator/model/table.rs @@ -0,0 +1,91 @@ + +use std::{fmt::Display, ops::Deref}; + +pub(crate) struct Name(pub(crate) String); + +impl Deref for Name { + type Target = str; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +#[derive(Debug)] +pub(crate) struct Table { + pub(crate) rows: Vec>, + pub(crate) name: String, + pub(crate) columns: Vec, +} + +impl Table { + pub fn to_create_str(&self) -> String { + let mut out = String::new(); + + out.push_str(format!("CREATE TABLE {} (", self.name).as_str()); + + assert!(!self.columns.is_empty()); + for column in &self.columns { + out.push_str(format!("{} {},", column.name, column.column_type.as_str()).as_str()); + } + // remove last comma + out.pop(); + + out.push_str(");"); + out + } +} + +#[derive(Debug, Clone)] +pub(crate) struct Column { + pub(crate) name: String, + pub(crate) column_type: ColumnType, + pub(crate) primary: bool, + pub(crate) unique: bool, +} + +#[derive(Debug, Clone)] +pub(crate) enum ColumnType { + Integer, + Float, + Text, + Blob, +} + +impl ColumnType { + pub fn as_str(&self) -> &str { + match self { + ColumnType::Integer => "INTEGER", + ColumnType::Float => "FLOAT", + ColumnType::Text => "TEXT", + ColumnType::Blob => "BLOB", + } + } +} + +#[derive(Clone, Debug, PartialEq)] +pub(crate) enum Value { + Null, + Integer(i64), + Float(f64), + Text(String), + Blob(Vec), +} + +fn to_sqlite_blob(bytes: &[u8]) -> String { + let hex: String = bytes.iter().map(|b| format!("{:02X}", b)).collect(); + format!("X'{}'", hex) +} + +impl Display for Value { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Value::Null => write!(f, "NULL"), + Value::Integer(i) => write!(f, "{}", i), + Value::Float(fl) => write!(f, "{}", fl), + Value::Text(t) => write!(f, "'{}'", t), + Value::Blob(b) => write!(f, "{}", to_sqlite_blob(b)), + } + } +} + diff --git a/simulator/properties.rs b/simulator/properties.rs new file mode 100644 index 000000000..a6536d1d8 --- /dev/null +++ b/simulator/properties.rs @@ -0,0 +1,78 @@ +use std::rc::Rc; + +use limbo_core::Connection; +use rand::Rng; + +use crate::{ + compare_equal_rows, + generation::ArbitraryFrom, + get_all_rows, + model::{ + query::{Insert, Predicate, Query, Select}, + table::Value, + }, + SimulatorEnv, +}; + +pub fn property_insert_select(env: &mut SimulatorEnv, conn: &mut Rc) { + // Get a random table + let table = env.rng.gen_range(0..env.tables.len()); + + // Pick a random column + let column_index = env.rng.gen_range(0..env.tables[table].columns.len()); + let column = &env.tables[table].columns[column_index].clone(); + + let mut rng = env.rng.clone(); + + // Generate a random value of the column type + let value = Value::arbitrary_from(&mut rng, &column.column_type); + + // Create a whole new row + let mut row = Vec::new(); + for (i, column) in env.tables[table].columns.iter().enumerate() { + if i == column_index { + row.push(value.clone()); + } else { + let value = Value::arbitrary_from(&mut rng, &column.column_type); + row.push(value); + } + } + + // Insert the row + let query = Query::Insert(Insert { + table: env.tables[table].name.clone(), + values: row.clone(), + }); + let _ = get_all_rows(env, conn, query.to_string().as_str()).unwrap(); + // Shadow operation on the table + env.tables[table].rows.push(row.clone()); + + // Create a query that selects the row + let query = Query::Select(Select { + table: env.tables[table].name.clone(), + predicate: Predicate::Eq(column.name.clone(), value), + }); + + // Get all rows + let rows = get_all_rows(env, conn, query.to_string().as_str()).unwrap(); + + // Check that the row is there + assert!(rows.iter().any(|r| r == &row)); +} + +pub fn property_select_all(env: &mut SimulatorEnv, conn: &mut Rc) { + // Get a random table + let table = env.rng.gen_range(0..env.tables.len()); + + // Create a query that selects all rows + let query = Query::Select(Select { + table: env.tables[table].name.clone(), + predicate: Predicate::And(Vec::new()), + }); + + // Get all rows + let rows = get_all_rows(env, conn, query.to_string().as_str()).unwrap(); + + // Make sure the rows are the same + compare_equal_rows(&rows, &env.tables[table].rows); +} From 53ecedaceb492c7b7dacc105d80001ed5c66e850 Mon Sep 17 00:00:00 2001 From: alpaylan Date: Mon, 16 Dec 2024 10:49:56 -0500 Subject: [PATCH 015/144] fix formatting --- simulator/README.md | 74 ++++++++++++++++++++++++++++++++++- simulator/generation.rs | 5 +-- simulator/generation/query.rs | 8 +--- simulator/generation/table.rs | 6 +-- simulator/main.rs | 10 +---- simulator/model.rs | 3 +- simulator/model/query.rs | 2 - simulator/model/table.rs | 2 - 8 files changed, 79 insertions(+), 31 deletions(-) diff --git a/simulator/README.md b/simulator/README.md index cdaf20c6f..ed1f688b9 100644 --- a/simulator/README.md +++ b/simulator/README.md @@ -1,2 +1,74 @@ -# Simulator +# Limbo Simulator +Limbo simulator uses randomized deterministic simulations to test the Limbo database behaviors. + +Each simulations begins with a random configurations; + +- the database workload distribution(percentages of reads, writes, deletes...), +- database parameters(page size), +- number of reader or writers, etc. + +Based on these parameters, we randomly generate **interaction plans**. Interaction plans consist of statements/queries, and assertions that will be executed in order. The building blocks of interaction plans are; + +- Randomly generated SQL queries satisfying the workload distribution, +- Properties, which contain multiple matching queries with assertions indicating the expected result. + +An example of a property is the following: + +```json +{ + "name": "Read your own writes", + "queries": [ + "INSERT INTO t1 (id) VALUES (1)", + "SELECT * FROM t1 WHERE id = 1", + ], + "assertions": [ + "result.rows.length == 1", + "result.rows[0].id == 1" + ] +} +``` + +The simulator executes the interaction plans in a loop, and checks the assertions. It can add random queries unrelated to the properties without +breaking the property invariants to reach more diverse states and respect the configured workload distribution. + +The simulator code is broken into 4 main parts: + +- **Simulator(main.rs)**: The main entry point of the simulator. It generates random configurations and interaction plans, and executes them. +- **Model(model.rs, model/table.rs, model/query.rs)**: A simpler model of the database, it contains atomic actions for insertion and selection, we use this model while deciding the next actions. +- **Generation(generation.rs, generation/table.rs, generation/query.rs, generation/plan.rs)**: Random generation functions for the database model and interaction plans. +- **Properties(properties.rs)**: Contains the properties that we want to test. + +## Running the simulator + +To run the simulator, you can use the following command: + +```bash +cargo run +``` + +This prompt (in the future) will invoke a clap command line interface to configure the simulator. For now, the simulator runs with the default configurations changing the `main.rs` file. If you want to see the logs, you can change the `RUST_LOG` environment variable. + +```bash +RUST_LOG=info cargo run --bin limbo_sim +``` + +## Adding new properties + +Todo + +## Adding new generation functions + +Todo + +## Adding new models + +Todo + +## Coverage with Limbo + +Todo + +## Automatic Compatibility Testing with SQLite + +Todo \ No newline at end of file diff --git a/simulator/generation.rs b/simulator/generation.rs index 73f84c11c..ece3a2c3b 100644 --- a/simulator/generation.rs +++ b/simulator/generation.rs @@ -1,8 +1,8 @@ use anarchist_readable_name_generator_lib::readable_name_custom; use rand::Rng; -pub mod table; pub mod query; +pub mod table; pub trait Arbitrary { fn arbitrary(rng: &mut R) -> Self; @@ -12,7 +12,6 @@ pub trait ArbitraryFrom { fn arbitrary_from(rng: &mut R, t: &T) -> Self; } - fn gen_random_text(rng: &mut T) -> String { let big_text = rng.gen_ratio(1, 1000); if big_text { @@ -28,5 +27,3 @@ fn gen_random_text(rng: &mut T) -> String { name.replace("-", "_") } } - - diff --git a/simulator/generation/query.rs b/simulator/generation/query.rs index d8fc4ae16..748409972 100644 --- a/simulator/generation/query.rs +++ b/simulator/generation/query.rs @@ -1,12 +1,10 @@ - +use crate::generation::table::{GTValue, LTValue}; use crate::generation::{Arbitrary, ArbitraryFrom}; -use crate::generation::table::{LTValue, GTValue}; use crate::model::query::{Create, Delete, Insert, Predicate, Query, Select}; use crate::model::table::{Table, Value}; use rand::Rng; - impl Arbitrary for Create { fn arbitrary(rng: &mut R) -> Self { Create { @@ -15,7 +13,6 @@ impl Arbitrary for Create { } } - impl ArbitraryFrom> for Select { fn arbitrary_from(rng: &mut R, tables: &Vec
) -> Self { let table = rng.gen_range(0..tables.len()); @@ -36,7 +33,6 @@ impl ArbitraryFrom> for Select { } } - impl ArbitraryFrom
for Insert { fn arbitrary_from(rng: &mut R, table: &Table) -> Self { let values = table @@ -51,7 +47,6 @@ impl ArbitraryFrom
for Insert { } } - impl ArbitraryFrom
for Delete { fn arbitrary_from(rng: &mut R, table: &Table) -> Self { Delete { @@ -75,7 +70,6 @@ impl ArbitraryFrom
for Query { } } - struct CompoundPredicate(Predicate); struct SimplePredicate(Predicate); diff --git a/simulator/generation/table.rs b/simulator/generation/table.rs index 8b1d6d421..8d5d70e98 100644 --- a/simulator/generation/table.rs +++ b/simulator/generation/table.rs @@ -1,7 +1,6 @@ - use rand::Rng; -use crate::generation::{Arbitrary, ArbitraryFrom, readable_name_custom, gen_random_text}; +use crate::generation::{gen_random_text, readable_name_custom, Arbitrary, ArbitraryFrom}; use crate::model::table::{Column, ColumnType, Name, Table, Value}; impl Arbitrary for Name { @@ -11,7 +10,6 @@ impl Arbitrary for Name { } } - impl Arbitrary for Table { fn arbitrary(rng: &mut R) -> Self { let name = Name::arbitrary(rng).0; @@ -39,7 +37,6 @@ impl Arbitrary for Column { } } - impl Arbitrary for ColumnType { fn arbitrary(rng: &mut R) -> Self { match rng.gen_range(0..4) { @@ -193,4 +190,3 @@ impl ArbitraryFrom for GTValue { } } } - diff --git a/simulator/main.rs b/simulator/main.rs index bfc9b658f..11c171b25 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -1,7 +1,7 @@ use generation::{Arbitrary, ArbitraryFrom}; use limbo_core::{Connection, Database, File, OpenFlags, PlatformIO, Result, RowResult, IO}; -use model::table::{Column, Name, Table, Value}; use model::query::{Insert, Predicate, Query, Select}; +use model::table::{Column, Name, Table, Value}; use properties::{property_insert_select, property_select_all}; use rand::prelude::*; use rand_chacha::ChaCha8Rng; @@ -11,8 +11,8 @@ use std::sync::Arc; use tempfile::TempDir; mod generation; -mod properties; mod model; +mod properties; struct SimulatorEnv { opts: SimulatorOpts, @@ -49,8 +49,6 @@ struct SimulatorOpts { page_size: usize, } - - #[allow(clippy::arc_with_non_send_sync)] fn main() { let _ = env_logger::try_init(); @@ -137,7 +135,6 @@ fn main() { env.io.print_stats(); } - fn process_connection(env: &mut SimulatorEnv, conn: &mut Rc) -> Result<()> { if env.tables.is_empty() { maybe_add_table(env, conn)?; @@ -250,8 +247,6 @@ fn maybe_add_table(env: &mut SimulatorEnv, conn: &mut Rc) -> Result< Ok(()) } - - fn get_all_rows( env: &mut SimulatorEnv, conn: &mut Rc, @@ -477,4 +472,3 @@ impl Drop for SimulatorFile { self.inner.unlock_file().expect("Failed to unlock file"); } } - diff --git a/simulator/model.rs b/simulator/model.rs index 5c8b5f5e0..a29f56382 100644 --- a/simulator/model.rs +++ b/simulator/model.rs @@ -1,3 +1,2 @@ - -pub mod table; pub mod query; +pub mod table; diff --git a/simulator/model/query.rs b/simulator/model/query.rs index 26dc75982..20058aead 100644 --- a/simulator/model/query.rs +++ b/simulator/model/query.rs @@ -2,7 +2,6 @@ use std::fmt::Display; use crate::model::table::{Table, Value}; - #[derive(Clone, Debug, PartialEq)] pub(crate) enum Predicate { And(Vec), // p1 AND p2 AND p3... AND pn @@ -108,4 +107,3 @@ impl Display for Query { } } } - diff --git a/simulator/model/table.rs b/simulator/model/table.rs index 1797e54bf..bc018e132 100644 --- a/simulator/model/table.rs +++ b/simulator/model/table.rs @@ -1,4 +1,3 @@ - use std::{fmt::Display, ops::Deref}; pub(crate) struct Name(pub(crate) String); @@ -88,4 +87,3 @@ impl Display for Value { } } } - From 9e0a9b5490ea64682686a3f97faaf1cd0a7c7b20 Mon Sep 17 00:00:00 2001 From: Lauri Virtanen Date: Sun, 24 Nov 2024 22:27:26 +0200 Subject: [PATCH 016/144] Add mathematical functions table to COMPAT.md --- COMPAT.md | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/COMPAT.md b/COMPAT.md index ec9473d48..e65b05c67 100644 --- a/COMPAT.md +++ b/COMPAT.md @@ -160,6 +160,41 @@ Feature support of [sqlite expr syntax](https://www.sqlite.org/lang_expr.html). | upper(X) | Yes | | | zeroblob(N) | Yes | | +### Mathematical functions + +| Function | Status | Comment | +| ---------- | ------ | ------- | +| acos(X) | No | | +| acosh(X) | No | | +| asin(X) | No | | +| asinh(X) | No | | +| atan(X) | No | | +| atan2(Y,X) | No | | +| atanh(X) | No | | +| ceil(X) | No | | +| ceiling(X) | No | | +| cos(X) | No | | +| cosh(X) | No | | +| degrees(X) | No | | +| exp(X) | No | | +| floor(X) | No | | +| ln(X) | No | | +| log(B,X) | No | | +| log(X) | No | | +| log10(X) | No | | +| log2(X) | No | | +| mod(X,Y) | No | | +| pi() | No | | +| pow(X,Y) | No | | +| power(X,Y) | No | | +| radians(X) | No | | +| sin(X) | No | | +| sinh(X) | No | | +| sqrt(X) | No | | +| tan(X) | No | | +| tanh(X) | No | | +| trunc(X) | No | | + ### Aggregate functions | Function | Status | Comment | From 9720f63a55db370fe2da6da2107ba81c7602a513 Mon Sep 17 00:00:00 2001 From: Lauri Virtanen Date: Sun, 24 Nov 2024 23:24:28 +0200 Subject: [PATCH 017/144] Add types for mathematical functions --- core/function.rs | 146 +++++++++++++++++++++++++++++++++++++++++ core/translate/expr.rs | 5 +- core/vdbe/mod.rs | 5 +- 3 files changed, 154 insertions(+), 2 deletions(-) diff --git a/core/function.rs b/core/function.rs index 9e2dca0f3..c4885925f 100644 --- a/core/function.rs +++ b/core/function.rs @@ -139,10 +139,126 @@ impl Display for ScalarFunc { } } +#[derive(Debug, Clone, PartialEq)] +pub enum MathFunc { + Acos, + Acosh, + Asin, + Asinh, + Atan, + Atan2, + Atanh, + Ceil, + Ceiling, + Cos, + Cosh, + Degrees, + Exp, + Floor, + Ln, + Log, + Log10, + Log2, + Mod, + Pi, + Pow, + Power, + Radians, + Sin, + Sinh, + Sqrt, + Tan, + Tanh, + Trunc, +} + +pub enum MathFuncArity { + Nullary, + Unary, + Binary, + UnaryOrBinary, +} + +impl MathFunc { + pub fn arity(&self) -> MathFuncArity { + match self { + MathFunc::Pi => MathFuncArity::Nullary, + + MathFunc::Acos + | MathFunc::Acosh + | MathFunc::Asin + | MathFunc::Asinh + | MathFunc::Atan + | MathFunc::Atanh + | MathFunc::Ceil + | MathFunc::Ceiling + | MathFunc::Cos + | MathFunc::Cosh + | MathFunc::Degrees + | MathFunc::Exp + | MathFunc::Floor + | MathFunc::Ln + | MathFunc::Log10 + | MathFunc::Log2 + | MathFunc::Radians + | MathFunc::Sin + | MathFunc::Sinh + | MathFunc::Sqrt + | MathFunc::Tan + | MathFunc::Tanh + | MathFunc::Trunc => MathFuncArity::Unary, + + MathFunc::Atan2 | MathFunc::Mod | MathFunc::Pow | MathFunc::Power => { + MathFuncArity::Binary + } + + MathFunc::Log => MathFuncArity::UnaryOrBinary, + } + } +} + +impl Display for MathFunc { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let str = match self { + MathFunc::Acos => "acos".to_string(), + MathFunc::Acosh => "acosh".to_string(), + MathFunc::Asin => "asin".to_string(), + MathFunc::Asinh => "asinh".to_string(), + MathFunc::Atan => "atan".to_string(), + MathFunc::Atan2 => "atan2".to_string(), + MathFunc::Atanh => "atanh".to_string(), + MathFunc::Ceil => "ceil".to_string(), + MathFunc::Ceiling => "ceiling".to_string(), + MathFunc::Cos => "cos".to_string(), + MathFunc::Cosh => "cosh".to_string(), + MathFunc::Degrees => "degrees".to_string(), + MathFunc::Exp => "exp".to_string(), + MathFunc::Floor => "floor".to_string(), + MathFunc::Ln => "ln".to_string(), + MathFunc::Log => "log".to_string(), + MathFunc::Log10 => "log10".to_string(), + MathFunc::Log2 => "log2".to_string(), + MathFunc::Mod => "mod".to_string(), + MathFunc::Pi => "pi".to_string(), + MathFunc::Pow => "pow".to_string(), + MathFunc::Power => "power".to_string(), + MathFunc::Radians => "radians".to_string(), + MathFunc::Sin => "sin".to_string(), + MathFunc::Sinh => "sinh".to_string(), + MathFunc::Sqrt => "sqrt".to_string(), + MathFunc::Tan => "tan".to_string(), + MathFunc::Tanh => "tanh".to_string(), + MathFunc::Trunc => "trunc".to_string(), + }; + write!(f, "{}", str) + } +} + #[derive(Debug)] pub enum Func { Agg(AggFunc), Scalar(ScalarFunc), + Math(MathFunc), #[cfg(feature = "json")] Json(JsonFunc), } @@ -152,6 +268,7 @@ impl Display for Func { match self { Func::Agg(agg_func) => write!(f, "{}", agg_func.to_string()), Func::Scalar(scalar_func) => write!(f, "{}", scalar_func), + Func::Math(math_func) => write!(f, "{}", math_func), #[cfg(feature = "json")] Func::Json(json_func) => write!(f, "{}", json_func), } @@ -216,6 +333,35 @@ impl Func { "unhex" => Ok(Func::Scalar(ScalarFunc::Unhex)), "zeroblob" => Ok(Func::Scalar(ScalarFunc::ZeroBlob)), "soundex" => Ok(Func::Scalar(ScalarFunc::Soundex)), + "acos" => Ok(Func::Math(MathFunc::Acos)), + "acosh" => Ok(Func::Math(MathFunc::Acosh)), + "asin" => Ok(Func::Math(MathFunc::Asin)), + "asinh" => Ok(Func::Math(MathFunc::Asinh)), + "atan" => Ok(Func::Math(MathFunc::Atan)), + "atan2" => Ok(Func::Math(MathFunc::Atan2)), + "atanh" => Ok(Func::Math(MathFunc::Atanh)), + "ceil" => Ok(Func::Math(MathFunc::Ceil)), + "ceiling" => Ok(Func::Math(MathFunc::Ceiling)), + "cos" => Ok(Func::Math(MathFunc::Cos)), + "cosh" => Ok(Func::Math(MathFunc::Cosh)), + "degrees" => Ok(Func::Math(MathFunc::Degrees)), + "exp" => Ok(Func::Math(MathFunc::Exp)), + "floor" => Ok(Func::Math(MathFunc::Floor)), + "ln" => Ok(Func::Math(MathFunc::Ln)), + "log" => Ok(Func::Math(MathFunc::Log)), + "log10" => Ok(Func::Math(MathFunc::Log10)), + "log2" => Ok(Func::Math(MathFunc::Log2)), + "mod" => Ok(Func::Math(MathFunc::Mod)), + "pi" => Ok(Func::Math(MathFunc::Pi)), + "pow" => Ok(Func::Math(MathFunc::Pow)), + "power" => Ok(Func::Math(MathFunc::Power)), + "radians" => Ok(Func::Math(MathFunc::Radians)), + "sin" => Ok(Func::Math(MathFunc::Sin)), + "sinh" => Ok(Func::Math(MathFunc::Sinh)), + "sqrt" => Ok(Func::Math(MathFunc::Sqrt)), + "tan" => Ok(Func::Math(MathFunc::Tan)), + "tanh" => Ok(Func::Math(MathFunc::Tanh)), + "trunc" => Ok(Func::Math(MathFunc::Trunc)), _ => Err(()), } } diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 04ea1fc98..45ceb6ec3 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -2,7 +2,7 @@ use sqlite3_parser::ast::{self, UnaryOperator}; #[cfg(feature = "json")] use crate::function::JsonFunc; -use crate::function::{AggFunc, Func, FuncCtx, ScalarFunc}; +use crate::function::{AggFunc, Func, FuncCtx, MathFuncArity, ScalarFunc}; use crate::schema::Type; use crate::util::normalize_ident; use crate::vdbe::{builder::ProgramBuilder, BranchOffset, Insn}; @@ -1603,6 +1603,9 @@ pub fn translate_expr( } } } + Func::Math(mfs) => match mfs { + _ => unimplemented!(), + }, } } ast::Expr::FunctionCallStar { .. } => todo!(), diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index fa07fa69e..0da9926f9 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -24,7 +24,7 @@ pub mod sorter; mod datetime; use crate::error::{LimboError, SQLITE_CONSTRAINT_PRIMARYKEY}; -use crate::function::{AggFunc, FuncCtx, ScalarFunc}; +use crate::function::{AggFunc, FuncCtx, MathFunc, MathFuncArity, ScalarFunc}; use crate::pseudo::PseudoCursor; use crate::schema::Table; use crate::storage::sqlite3_ondisk::DatabaseHeader; @@ -2491,6 +2491,9 @@ impl Program { state.registers[*dest] = exec_replace(source, pattern, replacement); } }, + crate::function::Func::Math(math_func) => match math_func { + _ => unimplemented!(), + }, crate::function::Func::Agg(_) => { unreachable!("Aggregate functions should not be handled here") } From 793a85a14cdcc0fae3280638ae0b2a6362f7e46e Mon Sep 17 00:00:00 2001 From: Lauri Virtanen Date: Thu, 28 Nov 2024 19:28:00 +0200 Subject: [PATCH 018/144] Tolerate floating point minor differences in compatibility tests --- testing/tester.tcl | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/testing/tester.tcl b/testing/tester.tcl index 5ccb2165b..1aae417ca 100644 --- a/testing/tester.tcl +++ b/testing/tester.tcl @@ -32,3 +32,36 @@ proc do_execsql_test_on_specific_db {db_name test_name sql_statements expected_o set combined_expected_output [join $expected_outputs "\n"] run_test $::sqlite_exec $db_name $combined_sql $combined_expected_output } + +proc within_tolerance {actual expected tolerance} { + expr {abs($actual - $expected) <= $tolerance} +} + +proc do_execsql_test_tolerance {test_name sql_statements expected_outputs tolerance} { + puts "Running test: $test_name" + set combined_sql [string trim $sql_statements] + set actual_output [evaluate_sql $::sqlite_exec $combined_sql] + set actual_values [split $actual_output "\n"] + set expected_values [split $expected_outputs "\n"] + + if {[llength $actual_values] != [llength $expected_values]} { + puts "Test FAILED: '$sql_statements'" + puts "returned '$actual_output'" + puts "expected '$expected_outputs'" + exit 1 + } + + for {set i 0} {$i < [llength $actual_values]} {incr i} { + set actual [lindex $actual_values $i] + set expected [lindex $expected_values $i] + + if {![within_tolerance $actual $expected $tolerance]} { + set lower_bound [expr {$expected - $tolerance}] + set upper_bound [expr {$expected + $tolerance}] + puts "Test FAILED: '$sql_statements'" + puts "returned '$actual'" + puts "expected a value within the range \[$lower_bound, $upper_bound\]" + exit 1 + } + } +} From f69fdc1645d64f4b5b474111a594536994ad0732 Mon Sep 17 00:00:00 2001 From: Lauri Virtanen Date: Mon, 25 Nov 2024 00:03:06 +0200 Subject: [PATCH 019/144] Support unary math functions --- COMPAT.md | 46 ++--- core/translate/expr.rs | 33 +++- core/vdbe/mod.rs | 64 +++++- testing/math.test | 430 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 548 insertions(+), 25 deletions(-) diff --git a/COMPAT.md b/COMPAT.md index e65b05c67..c3eb70e00 100644 --- a/COMPAT.md +++ b/COMPAT.md @@ -164,36 +164,36 @@ Feature support of [sqlite expr syntax](https://www.sqlite.org/lang_expr.html). | Function | Status | Comment | | ---------- | ------ | ------- | -| acos(X) | No | | -| acosh(X) | No | | -| asin(X) | No | | -| asinh(X) | No | | -| atan(X) | No | | +| acos(X) | Yes | | +| acosh(X) | Yes | | +| asin(X) | Yes | | +| asinh(X) | Yes | | +| atan(X) | Yes | | | atan2(Y,X) | No | | -| atanh(X) | No | | -| ceil(X) | No | | -| ceiling(X) | No | | -| cos(X) | No | | -| cosh(X) | No | | -| degrees(X) | No | | -| exp(X) | No | | -| floor(X) | No | | -| ln(X) | No | | +| atanh(X) | Yes | | +| ceil(X) | Yes | | +| ceiling(X) | Yes | | +| cos(X) | Yes | | +| cosh(X) | Yes | | +| degrees(X) | Yes | | +| exp(X) | Yes | | +| floor(X) | Yes | | +| ln(X) | Yes | | | log(B,X) | No | | | log(X) | No | | -| log10(X) | No | | -| log2(X) | No | | +| log10(X) | Yes | | +| log2(X) | Yes | | | mod(X,Y) | No | | | pi() | No | | | pow(X,Y) | No | | | power(X,Y) | No | | -| radians(X) | No | | -| sin(X) | No | | -| sinh(X) | No | | -| sqrt(X) | No | | -| tan(X) | No | | -| tanh(X) | No | | -| trunc(X) | No | | +| radians(X) | Yes | | +| sin(X) | Yes | | +| sinh(X) | Yes | | +| sqrt(X) | Yes | | +| tan(X) | Yes | | +| tanh(X) | Yes | | +| trunc(X) | Yes | | ### Aggregate functions diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 45ceb6ec3..da2c260b8 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -1603,7 +1603,38 @@ pub fn translate_expr( } } } - Func::Math(mfs) => match mfs { + Func::Math(math_func) => match math_func.arity() { + MathFuncArity::Unary => { + let args = if let Some(args) = args { + if args.len() != 1 { + crate::bail_parse_error!( + "{} function with not exactly 1 argument", + math_func + ); + } + args + } else { + crate::bail_parse_error!("{} function with no arguments", math_func); + }; + + let reg = program.alloc_register(); + + translate_expr( + program, + referenced_tables, + &args[0], + reg, + precomputed_exprs_to_registers, + )?; + + program.emit_insn(Insn::Function { + constant_mask: 0, + start_reg: reg, + dest: target_register, + func: func_ctx, + }); + Ok(target_register) + } _ => unimplemented!(), }, } diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 0da9926f9..c97aa6202 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -2491,7 +2491,12 @@ impl Program { state.registers[*dest] = exec_replace(source, pattern, replacement); } }, - crate::function::Func::Math(math_func) => match math_func { + crate::function::Func::Math(math_func) => match math_func.arity() { + MathFuncArity::Unary => { + let reg_value = &state.registers[*start_reg]; + let result = exec_math_unary(reg_value, math_func); + state.registers[*dest] = result; + } _ => unimplemented!(), }, crate::function::Func::Agg(_) => { @@ -3600,6 +3605,63 @@ fn execute_sqlite_version(version_integer: i64) -> String { format!("{}.{}.{}", major, minor, release) } +fn to_f64(reg: &OwnedValue) -> Option { + match reg { + OwnedValue::Integer(i) => Some(*i as f64), + OwnedValue::Float(f) => Some(*f), + OwnedValue::Text(t) => t.parse::().ok(), + OwnedValue::Agg(ctx) => to_f64(ctx.final_value()), + _ => None, + } +} + +fn exec_math_unary(reg: &OwnedValue, function: &MathFunc) -> OwnedValue { + // In case of some functions and integer input, return the input as is + if let OwnedValue::Integer(_) = reg { + if matches! { function, MathFunc::Ceil | MathFunc::Ceiling | MathFunc::Floor | MathFunc::Trunc } + { + return reg.clone(); + } + } + + let f = match to_f64(reg) { + Some(f) => f, + None => return OwnedValue::Null, + }; + + let result = match function { + MathFunc::Acos => f.acos(), + MathFunc::Acosh => f.acosh(), + MathFunc::Asin => f.asin(), + MathFunc::Asinh => f.asinh(), + MathFunc::Atan => f.atan(), + MathFunc::Atanh => f.atanh(), + MathFunc::Ceil | MathFunc::Ceiling => f.ceil(), + MathFunc::Cos => f.cos(), + MathFunc::Cosh => f.cosh(), + MathFunc::Degrees => f.to_degrees(), + MathFunc::Exp => f.exp(), + MathFunc::Floor => f.floor(), + MathFunc::Ln => f.ln(), + MathFunc::Log10 => f.log10(), + MathFunc::Log2 => f.log2(), + MathFunc::Radians => f.to_radians(), + MathFunc::Sin => f.sin(), + MathFunc::Sinh => f.sinh(), + MathFunc::Sqrt => f.sqrt(), + MathFunc::Tan => f.tan(), + MathFunc::Tanh => f.tanh(), + MathFunc::Trunc => f.trunc(), + _ => unreachable!("Unexpected mathematical unary function {:?}", function), + }; + + if result.is_nan() { + OwnedValue::Null + } else { + OwnedValue::Float(result) + } +} + #[cfg(test)] mod tests { diff --git a/testing/math.test b/testing/math.test index af477fca4..bd795ef15 100644 --- a/testing/math.test +++ b/testing/math.test @@ -361,3 +361,433 @@ do_execsql_test bitwise-not-zero { SELECT ~0 } {-1} + +set tolerance 1e-13 + +do_execsql_test_tolerance acos-int { + SELECT acos(1) +} {0.0} $tolerance + +do_execsql_test_tolerance acos-float { + SELECT acos(-0.5) +} {2.0943951023931957} $tolerance + +do_execsql_test_tolerance acos-str { + SELECT acos('-0.5') +} {2.0943951023931957} $tolerance + +do_execsql_test_tolerance acos-null { + SELECT acos(null) +} {} $tolerance + + +do_execsql_test_tolerance acosh-int { + SELECT acosh(1) +} {0.0} $tolerance + +do_execsql_test_tolerance acosh-float { + SELECT acosh(1.5) +} {0.962423650119207} $tolerance + +do_execsql_test_tolerance acosh-str { + SELECT acosh('1.5') +} {0.962423650119207} $tolerance + +do_execsql_test_tolerance acosh-invalid { + SELECT acosh(0.99) +} {} $tolerance + +do_execsql_test_tolerance acosh-null { + SELECT acosh(null) +} {} $tolerance + + +do_execsql_test_tolerance asin-int { + SELECT asin(1) +} {1.5707963267948966} $tolerance + +do_execsql_test_tolerance asin-float { + SELECT asin(-0.5) +} {-0.5235987755982989} $tolerance + +do_execsql_test_tolerance asin-str { + SELECT asin('-0.5') +} {-0.5235987755982989} $tolerance + +do_execsql_test_tolerance asin-null { + SELECT asin(null) +} {} $tolerance + + +do_execsql_test_tolerance sin-int { + SELECT sin(1) +} {0.841470984807897} $tolerance + +do_execsql_test_tolerance sin-float { + SELECT sin(-0.5) +} {-0.479425538604203} $tolerance + +do_execsql_test_tolerance sin-str { + SELECT sin('-0.5') +} {-0.479425538604203} $tolerance + +do_execsql_test_tolerance sin-null { + SELECT sin(null) +} {} $tolerance + +do_execsql_test_tolerance sin-products-id { + SELECT sin(id) from products +} {0.8414709848078965 +0.9092974268256817 +0.1411200080598672 +-0.7568024953079282 +-0.9589242746631385 +-0.27941549819892586 +0.6569865987187891 +0.9893582466233818 +0.4121184852417566 +-0.5440211108893698 +-0.9999902065507035} $tolerance + + +do_execsql_test_tolerance asinh-int { + SELECT asinh(1) +} {0.881373587019543} $tolerance + +do_execsql_test_tolerance asinh-float { + SELECT asinh(-0.5) +} {-0.48121182505960347} $tolerance + +do_execsql_test_tolerance asinh-str { + SELECT asinh('-0.5') +} {-0.48121182505960347} $tolerance + +do_execsql_test_tolerance asinh-null { + SELECT asinh(null) +} {} $tolerance + + +do_execsql_test_tolerance atan-int { + SELECT atan(1) +} {0.7853981633974483} $tolerance + +do_execsql_test_tolerance atan-float { + SELECT atan(-0.5) +} {-0.4636476090008061} $tolerance + +do_execsql_test_tolerance atan-str { + SELECT atan('-0.5') +} {-0.4636476090008061} $tolerance + +do_execsql_test_tolerance atan-null { + SELECT atan(null) +} {} $tolerance + + +do_execsql_test_tolerance tan-int { + SELECT tan(1) +} {1.5574077246549} $tolerance + +do_execsql_test_tolerance tan-float { + SELECT tan(-0.5) +} {-0.54630248984379} $tolerance + +do_execsql_test_tolerance tan-str { + SELECT tan('-0.5') +} {-0.54630248984379} $tolerance + +do_execsql_test_tolerance tan-null { + SELECT tan(null) +} {} $tolerance + + +do_execsql_test_tolerance atanh-int { + SELECT atanh(0) +} {0.0} $tolerance + +do_execsql_test_tolerance atanh-float { + SELECT atanh(-0.5) +} {-0.5493061443340548} $tolerance + +do_execsql_test_tolerance atanh-str { + SELECT atanh('-0.5') +} {-0.5493061443340548} $tolerance + +do_execsql_test_tolerance atanh-null { + SELECT atanh(null) +} {} $tolerance + + +do_execsql_test ceil-int { + SELECT ceil(1) +} {1} + +do_execsql_test ceil-float { + SELECT ceil(-1.5) +} {-1.0} + +do_execsql_test ceil-str { + SELECT ceil('1.5') +} {2.0} + +do_execsql_test ceil-null { + SELECT ceil(null) +} {} + + +do_execsql_test ceiling-int { + SELECT ceiling(1) +} {1} + +do_execsql_test ceiling-float { + SELECT ceiling(-1.5) +} {-1.0} + +do_execsql_test ceiling-str { + SELECT ceiling('1.5') +} {2.0} + +do_execsql_test ceiling-null { + SELECT ceiling(null) +} {} + + +do_execsql_test_tolerance cos-int { + SELECT cos(1) +} {0.54030230586814} $tolerance + +do_execsql_test_tolerance cos-float { + SELECT cos(-0.5) +} {0.877582561890373} $tolerance + +do_execsql_test_tolerance cos-str { + SELECT cos('-0.5') +} {0.877582561890373} $tolerance + +do_execsql_test_tolerance cos-null { + SELECT cos(null) +} {} $tolerance + + +do_execsql_test_tolerance cosh-int { + SELECT cosh(1) +} {1.54308063481524} $tolerance + +do_execsql_test_tolerance cosh-float { + SELECT cosh(-0.5) +} {1.12762596520638} $tolerance + +do_execsql_test_tolerance cosh-str { + SELECT cosh('-0.5') +} {1.12762596520638} $tolerance + +do_execsql_test_tolerance cosh-null { + SELECT cosh(null) +} {} $tolerance + + +do_execsql_test_tolerance degrees-int { + SELECT degrees(1) +} {57.2957795130823} $tolerance + +do_execsql_test_tolerance degrees-float { + SELECT degrees(-0.5) +} {-28.6478897565412} $tolerance + +do_execsql_test_tolerance degrees-str { + SELECT degrees('-0.5') +} {-28.6478897565412} $tolerance + +do_execsql_test_tolerance degrees-null { + SELECT degrees(null) +} {} $tolerance + + +do_execsql_test_tolerance exp-int { + SELECT exp(1) +} {2.71828182845905} $tolerance + +do_execsql_test_tolerance exp-float { + SELECT exp(-0.5) +} {0.606530659712633} $tolerance + +do_execsql_test_tolerance exp-str { + SELECT exp('-0.5') +} {0.606530659712633} $tolerance + +do_execsql_test_tolerance exp-null { + SELECT exp(null) +} {} $tolerance + + +do_execsql_test floor-int { + SELECT floor(1) +} {1} + +do_execsql_test floor-float { + SELECT floor(-1.5) +} {-2.0} + +do_execsql_test floor-str { + SELECT floor('1.5') +} {1.0} + +do_execsql_test floor-null { + SELECT floor(null) +} {} + + +do_execsql_test_tolerance ln-int { + SELECT ln(1) +} {0.0} $tolerance + +do_execsql_test_tolerance ln-float { + SELECT ln(0.5) +} {-0.693147180559945} $tolerance + +do_execsql_test_tolerance ln-str { + SELECT ln('0.5') +} {-0.693147180559945} $tolerance + +do_execsql_test_tolerance ln-negative { + SELECT ln(-0.5) +} {} $tolerance + +do_execsql_test_tolerance ln-null { + SELECT ln(null) +} {} $tolerance + + +do_execsql_test_tolerance log10-int { + SELECT log10(1) +} {0.0} $tolerance + +do_execsql_test_tolerance log10-float { + SELECT log10(0.5) +} {-0.301029995663981} $tolerance + +do_execsql_test_tolerance log10-str { + SELECT log10('0.5') +} {-0.301029995663981} $tolerance + +do_execsql_test_tolerance log10-negative { + SELECT log10(-0.5) +} {} $tolerance + +do_execsql_test_tolerance log10-null { + SELECT log10(null) +} {} $tolerance + + +do_execsql_test_tolerance log2-int { + SELECT log2(1) +} {0.0} $tolerance + +do_execsql_test_tolerance log2-float { + SELECT log2(0.5) +} {-1.0} $tolerance + +do_execsql_test_tolerance log2-str { + SELECT log2('0.5') +} {-1.0} $tolerance + +do_execsql_test_tolerance log2-negative { + SELECT log2(-0.5) +} {} $tolerance + +do_execsql_test_tolerance log2-null { + SELECT log2(null) +} {} $tolerance + + +do_execsql_test_tolerance radians-int { + SELECT radians(1) +} {0.0174532925199433} $tolerance + +do_execsql_test_tolerance radians-float { + SELECT radians(-0.5) +} {-0.00872664625997165} $tolerance + +do_execsql_test_tolerance radians-str { + SELECT radians('-0.5') +} {-0.00872664625997165} $tolerance + +do_execsql_test_tolerance radians-null { + SELECT radians(null) +} {} $tolerance + + +do_execsql_test_tolerance sinh-int { + SELECT sinh(1) +} {1.1752011936438} $tolerance + +do_execsql_test_tolerance sinh-float { + SELECT sinh(-0.5) +} {-0.521095305493747} $tolerance + +do_execsql_test_tolerance sinh-str { + SELECT sinh('-0.5') +} {-0.521095305493747} $tolerance + +do_execsql_test_tolerance sinh-null { + SELECT sinh(null) +} {} $tolerance + + +do_execsql_test_tolerance sqrt-int { + SELECT sqrt(1) +} {1.0} $tolerance + +do_execsql_test_tolerance sqrt-float { + SELECT sqrt(0.5) +} {0.707106781186548} $tolerance + +do_execsql_test_tolerance sqrt-str { + SELECT sqrt('0.5') +} {0.707106781186548} $tolerance + +do_execsql_test_tolerance sqrt-negative { + SELECT sqrt(-0.5) +} {} $tolerance + +do_execsql_test_tolerance sqrt-null { + SELECT sqrt(null) +} {} $tolerance + + +do_execsql_test_tolerance tanh-int { + SELECT tanh(1) +} {0.761594155955765} $tolerance + +do_execsql_test_tolerance tanh-float { + SELECT tanh(-0.5) +} {-0.46211715726001} $tolerance + +do_execsql_test_tolerance tanh-str { + SELECT tanh('-0.5') +} {-0.46211715726001} $tolerance + +do_execsql_test_tolerance tanh-null { + SELECT tanh(null) +} {} $tolerance + + +do_execsql_test trunc-int { + SELECT trunc(1) +} {1} + +do_execsql_test trunc-float { + SELECT trunc(2.5) +} {2.0} + +do_execsql_test trunc-float-negative { + SELECT trunc(-2.5) +} {-2.0} + +do_execsql_test trunc-str { + SELECT trunc('2.5') +} {2.0} + +do_execsql_test trunc-null { + SELECT trunc(null) +} {} From 5e426a7624c59f1c158bc227cb9858ebaedc6c22 Mon Sep 17 00:00:00 2001 From: Lauri Virtanen Date: Tue, 26 Nov 2024 00:27:39 +0200 Subject: [PATCH 020/144] Support binary math functions --- COMPAT.md | 8 +- core/translate/expr.rs | 47 +++++++++++ core/vdbe/mod.rs | 32 ++++++++ testing/math.test | 180 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 263 insertions(+), 4 deletions(-) diff --git a/COMPAT.md b/COMPAT.md index c3eb70e00..9580799fd 100644 --- a/COMPAT.md +++ b/COMPAT.md @@ -169,7 +169,7 @@ Feature support of [sqlite expr syntax](https://www.sqlite.org/lang_expr.html). | asin(X) | Yes | | | asinh(X) | Yes | | | atan(X) | Yes | | -| atan2(Y,X) | No | | +| atan2(Y,X) | Yes | | | atanh(X) | Yes | | | ceil(X) | Yes | | | ceiling(X) | Yes | | @@ -183,10 +183,10 @@ Feature support of [sqlite expr syntax](https://www.sqlite.org/lang_expr.html). | log(X) | No | | | log10(X) | Yes | | | log2(X) | Yes | | -| mod(X,Y) | No | | +| mod(X,Y) | Yes | | | pi() | No | | -| pow(X,Y) | No | | -| power(X,Y) | No | | +| pow(X,Y) | Yes | | +| power(X,Y) | Yes | | | radians(X) | Yes | | | sin(X) | Yes | | | sinh(X) | Yes | | diff --git a/core/translate/expr.rs b/core/translate/expr.rs index da2c260b8..7bc08f2c2 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -1635,6 +1635,53 @@ pub fn translate_expr( }); Ok(target_register) } + + MathFuncArity::Binary => { + let args = if let Some(args) = args { + if args.len() != 2 { + crate::bail_parse_error!( + "{} function with not exactly 2 arguments", + math_func + ); + } + args + } else { + crate::bail_parse_error!("{} function with no arguments", math_func); + }; + + let reg1 = program.alloc_register(); + let reg2 = program.alloc_register(); + + translate_expr( + program, + referenced_tables, + &args[0], + reg1, + precomputed_exprs_to_registers, + )?; + if let ast::Expr::Literal(_) = &args[0] { + program.mark_last_insn_constant(); + } + + translate_expr( + program, + referenced_tables, + &args[1], + reg2, + precomputed_exprs_to_registers, + )?; + if let ast::Expr::Literal(_) = &args[1] { + program.mark_last_insn_constant(); + } + + program.emit_insn(Insn::Function { + constant_mask: 0, + start_reg: target_register + 1, + dest: target_register, + func: func_ctx, + }); + Ok(target_register) + } _ => unimplemented!(), }, } diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index c97aa6202..09f52d892 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -2497,6 +2497,13 @@ impl Program { let result = exec_math_unary(reg_value, math_func); state.registers[*dest] = result; } + + MathFuncArity::Binary => { + let lhs = &state.registers[*start_reg]; + let rhs = &state.registers[*start_reg + 1]; + let result = exec_math_binary(lhs, rhs, math_func); + state.registers[*dest] = result; + } _ => unimplemented!(), }, crate::function::Func::Agg(_) => { @@ -3662,6 +3669,31 @@ fn exec_math_unary(reg: &OwnedValue, function: &MathFunc) -> OwnedValue { } } +fn exec_math_binary(lhs: &OwnedValue, rhs: &OwnedValue, function: &MathFunc) -> OwnedValue { + let lhs = match to_f64(lhs) { + Some(f) => f, + None => return OwnedValue::Null, + }; + + let rhs = match to_f64(rhs) { + Some(f) => f, + None => return OwnedValue::Null, + }; + + let result = match function { + MathFunc::Atan2 => lhs.atan2(rhs), + MathFunc::Mod => lhs % rhs, + MathFunc::Pow | MathFunc::Power => lhs.powf(rhs), + _ => unreachable!("Unexpected mathematical binary function {:?}", function), + }; + + if result.is_nan() { + OwnedValue::Null + } else { + OwnedValue::Float(result) + } +} + #[cfg(test)] mod tests { diff --git a/testing/math.test b/testing/math.test index bd795ef15..9b43b95f9 100644 --- a/testing/math.test +++ b/testing/math.test @@ -791,3 +791,183 @@ do_execsql_test trunc-str { do_execsql_test trunc-null { SELECT trunc(null) } {} + + +do_execsql_test_tolerance atan2-int-int { + SELECT atan2(5, -1) +} {1.76819188664478} $tolerance + +do_execsql_test_tolerance atan2-int-float { + SELECT atan2(5, -1.5) +} {1.86225312127276} $tolerance + +do_execsql_test_tolerance atan2-int-str { + SELECT atan2(5, '-1.5') +} {1.86225312127276} $tolerance + +do_execsql_test_tolerance atan2-float-int { + SELECT atan2(5.5, 10) +} {0.502843210927861} $tolerance + +do_execsql_test_tolerance atan2-float-float { + SELECT atan2(5.5, -1.5) +} {1.83704837594582} $tolerance + +do_execsql_test_tolerance atan2-float-str { + SELECT atan2(5.5, '-1.5') +} {1.83704837594582} $tolerance + +do_execsql_test_tolerance atan2-str-str { + SELECT atan2('5.5', '-1.5') +} {1.83704837594582} $tolerance + +do_execsql_test atan2-null-int { + SELECT atan2(null, 5) +} {} + +do_execsql_test atan2-int-null { + SELECT atan2(5, null) +} {} + + +do_execsql_test_tolerance mod-int-int { + SELECT mod(10, -3) +} {1.0} $tolerance + +do_execsql_test_tolerance mod-int-float { + SELECT mod(5, -1.5) +} {0.5} $tolerance + +do_execsql_test_tolerance mod-int-str { + SELECT mod(5, '-1.5') +} {0.5} $tolerance + +do_execsql_test_tolerance mod-float-int { + SELECT mod(5.5, 2) +} {1.5} $tolerance + +do_execsql_test_tolerance mod-float-float { + SELECT mod(5.5, -1.5) +} {1.0} $tolerance + +do_execsql_test_tolerance mod-float-str { + SELECT mod(5.5, '-1.5') +} {1.0} $tolerance + +do_execsql_test_tolerance mod-str-str { + SELECT mod('5.5', '-1.5') +} {1.0} $tolerance + +do_execsql_test mod-null-int { + SELECT mod(null, 5) +} {} + +do_execsql_test mod-int-null { + SELECT mod(5, null) +} {} + +do_execsql_test mod-float-zero { + SELECT mod(1.5, 0) +} {} + +do_execsql_test mod-products-id { + SELECT mod(products.id, 3) from products +} {1.0 +2.0 +0.0 +1.0 +2.0 +0.0 +1.0 +2.0 +0.0 +1.0 +2.0} + +do_execsql_test mod-products-price-id { + SELECT mod(products.price, products.id) from products +} {0.0 +0.0 +0.0 +1.0 +4.0 +4.0 +1.0 +2.0 +1.0 +3.0 +4.0} + + +do_execsql_test_tolerance pow-int-int { + SELECT pow(5, -1) +} {0.2} $tolerance + +do_execsql_test_tolerance pow-int-float { + SELECT pow(5, -1.5) +} {0.0894427190999916} $tolerance + +do_execsql_test_tolerance pow-int-str { + SELECT pow(5, '-1.5') +} {0.0894427190999916} $tolerance + +do_execsql_test_tolerance pow-float-int { + SELECT pow(5.5, 2) +} {30.25} $tolerance + +do_execsql_test_tolerance pow-float-float { + SELECT pow(5.5, -1.5) +} {0.077527533220222} $tolerance + +do_execsql_test_tolerance pow-float-str { + SELECT pow(5.5, '-1.5') +} {0.077527533220222} $tolerance + +do_execsql_test_tolerance pow-str-str { + SELECT pow('5.5', '-1.5') +} {0.077527533220222} $tolerance + +do_execsql_test pow-null-int { + SELECT pow(null, 5) +} {} + +do_execsql_test pow-int-null { + SELECT pow(5, null) +} {} + + +do_execsql_test_tolerance power-int-int { + SELECT power(5, -1) +} {0.2} $tolerance + +do_execsql_test_tolerance power-int-float { + SELECT power(5, -1.5) +} {0.0894427190999916} $tolerance + +do_execsql_test_tolerance power-int-str { + SELECT power(5, '-1.5') +} {0.0894427190999916} $tolerance + +do_execsql_test_tolerance power-float-int { + SELECT power(5.5, 2) +} {30.25} $tolerance + +do_execsql_test_tolerance power-float-float { + SELECT power(5.5, -1.5) +} {0.077527533220222} $tolerance + +do_execsql_test_tolerance power-float-str { + SELECT power(5.5, '-1.5') +} {0.077527533220222} $tolerance + +do_execsql_test_tolerance power-str-str { + SELECT power('5.5', '-1.5') +} {0.077527533220222} $tolerance + +do_execsql_test power-null-int { + SELECT power(null, 5) +} {} + +do_execsql_test power-int-null { + SELECT power(5, null) +} {} From 89d0289444da695730a8d565eea41b9027cdf834 Mon Sep 17 00:00:00 2001 From: Lauri Virtanen Date: Thu, 12 Dec 2024 22:54:28 +0200 Subject: [PATCH 021/144] Support `pi()` function --- COMPAT.md | 2 +- core/translate/expr.rs | 14 ++++++++++++++ core/vdbe/mod.rs | 5 +++++ testing/math.test | 5 +++++ 4 files changed, 25 insertions(+), 1 deletion(-) diff --git a/COMPAT.md b/COMPAT.md index 9580799fd..25dae3614 100644 --- a/COMPAT.md +++ b/COMPAT.md @@ -184,7 +184,7 @@ Feature support of [sqlite expr syntax](https://www.sqlite.org/lang_expr.html). | log10(X) | Yes | | | log2(X) | Yes | | | mod(X,Y) | Yes | | -| pi() | No | | +| pi() | Yes | | | pow(X,Y) | Yes | | | power(X,Y) | Yes | | | radians(X) | Yes | | diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 7bc08f2c2..c8e66821d 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -1604,6 +1604,20 @@ pub fn translate_expr( } } Func::Math(math_func) => match math_func.arity() { + MathFuncArity::Nullary => { + if args.is_some() { + crate::bail_parse_error!("{} function with arguments", math_func); + } + + program.emit_insn(Insn::Function { + constant_mask: 0, + start_reg: 0, + dest: target_register, + func: func_ctx, + }); + Ok(target_register) + } + MathFuncArity::Unary => { let args = if let Some(args) = args { if args.len() != 1 { diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 09f52d892..796eeff94 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -2492,6 +2492,10 @@ impl Program { } }, crate::function::Func::Math(math_func) => match math_func.arity() { + MathFuncArity::Nullary => { + state.registers[*dest] = OwnedValue::Float(std::f64::consts::PI); + } + MathFuncArity::Unary => { let reg_value = &state.registers[*start_reg]; let result = exec_math_unary(reg_value, math_func); @@ -2504,6 +2508,7 @@ impl Program { let result = exec_math_binary(lhs, rhs, math_func); state.registers[*dest] = result; } + _ => unimplemented!(), }, crate::function::Func::Agg(_) => { diff --git a/testing/math.test b/testing/math.test index 9b43b95f9..c75c387e6 100644 --- a/testing/math.test +++ b/testing/math.test @@ -364,6 +364,11 @@ do_execsql_test bitwise-not-zero { set tolerance 1e-13 +do_execsql_test_tolerance pi { + SELECT pi() +} {3.14159265358979} $tolerance + + do_execsql_test_tolerance acos-int { SELECT acos(1) } {0.0} $tolerance From e69ee80fac5eaab156559a5c38464b2606df74ed Mon Sep 17 00:00:00 2001 From: Lauri Virtanen Date: Sun, 15 Dec 2024 22:30:04 +0200 Subject: [PATCH 022/144] Support `log(X)` and `log(B,X)` math functions --- COMPAT.md | 4 +-- core/translate/expr.rs | 35 ++++++++++++++++++++++- core/vdbe/mod.rs | 40 +++++++++++++++++++++++++- testing/math.test | 65 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 140 insertions(+), 4 deletions(-) diff --git a/COMPAT.md b/COMPAT.md index 25dae3614..f02c25b66 100644 --- a/COMPAT.md +++ b/COMPAT.md @@ -179,8 +179,8 @@ Feature support of [sqlite expr syntax](https://www.sqlite.org/lang_expr.html). | exp(X) | Yes | | | floor(X) | Yes | | | ln(X) | Yes | | -| log(B,X) | No | | -| log(X) | No | | +| log(B,X) | Yes | | +| log(X) | Yes | | | log10(X) | Yes | | | log2(X) | Yes | | | mod(X,Y) | Yes | | diff --git a/core/translate/expr.rs b/core/translate/expr.rs index c8e66821d..b1db3ac92 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -1696,7 +1696,40 @@ pub fn translate_expr( }); Ok(target_register) } - _ => unimplemented!(), + + MathFuncArity::UnaryOrBinary => { + let args = if let Some(args) = args { + if args.len() > 2 { + crate::bail_parse_error!( + "{} function with more than 2 arguments", + math_func + ); + } + args + } else { + crate::bail_parse_error!("{} function with no arguments", math_func); + }; + + let regs = program.alloc_registers(args.len()); + + for (i, arg) in args.iter().enumerate() { + translate_expr( + program, + referenced_tables, + arg, + regs + i, + precomputed_exprs_to_registers, + )?; + } + + program.emit_insn(Insn::Function { + constant_mask: 0, + start_reg: regs, + dest: target_register, + func: func_ctx, + }); + Ok(target_register) + } }, } } diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 796eeff94..547cde53b 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -2509,7 +2509,24 @@ impl Program { state.registers[*dest] = result; } - _ => unimplemented!(), + MathFuncArity::UnaryOrBinary => match math_func { + MathFunc::Log => { + let lhs = &state.registers[*start_reg]; + let rhs = state.registers.get(*start_reg + 1); + + let result = if let Some(arg) = rhs { + exec_math_log(arg, Some(lhs)) + } else { + exec_math_log(lhs, None) + }; + + state.registers[*dest] = result; + } + _ => unreachable!( + "Unexpected mathematical UnaryOrBinary function {:?}", + math_func + ), + }, }, crate::function::Func::Agg(_) => { unreachable!("Aggregate functions should not be handled here") @@ -3699,6 +3716,27 @@ fn exec_math_binary(lhs: &OwnedValue, rhs: &OwnedValue, function: &MathFunc) -> } } +fn exec_math_log(arg: &OwnedValue, base: Option<&OwnedValue>) -> OwnedValue { + let f = match to_f64(arg) { + Some(f) => f, + None => return OwnedValue::Null, + }; + + let base = match base { + Some(base) => match to_f64(base) { + Some(f) => f, + None => return OwnedValue::Null, + }, + None => 10.0, + }; + + if f <= 0.0 || base <= 0.0 || base == 1.0 { + return OwnedValue::Null; + } + + OwnedValue::Float(f.log(base)) +} + #[cfg(test)] mod tests { diff --git a/testing/math.test b/testing/math.test index c75c387e6..064de41e3 100644 --- a/testing/math.test +++ b/testing/math.test @@ -976,3 +976,68 @@ do_execsql_test power-null-int { do_execsql_test power-int-null { SELECT power(5, null) } {} + + +do_execsql_test_tolerance log-int { + SELECT log(1) +} {0.0} $tolerance + +do_execsql_test_tolerance log-float { + SELECT log(1.5) +} {0.176091259055681} $tolerance + +do_execsql_test_tolerance log-str { + SELECT log('1.5') +} {0.176091259055681} $tolerance + +do_execsql_test log-negative { + SELECT log(-1.5) +} {} + +do_execsql_test log-null { + SELECT log(null) +} {} + +do_execsql_test_tolerance log-int-int { + SELECT log(5, 1) +} {0.0} $tolerance + +do_execsql_test_tolerance log-int-float { + SELECT log(5, 1.5) +} {0.251929636412592} $tolerance + +do_execsql_test_tolerance log-int-str { + SELECT log(5, '1.5') +} {0.251929636412592} $tolerance + +do_execsql_test_tolerance log-float-int { + SELECT log(5.5, 10) +} {1.35068935021985} $tolerance + +do_execsql_test_tolerance log-float-float { + SELECT log(5.5, 1.5) +} {0.237844588273313} $tolerance + +do_execsql_test_tolerance log-float-str { + SELECT log(5.5, '1.5') +} {0.237844588273313} $tolerance + +do_execsql_test_tolerance log-str-str { + SELECT log('5.5', '1.5') +} {0.237844588273313} $tolerance + +do_execsql_test log-negative-negative { + SELECT log(-1.5, -1.5) +} {} + +do_execsql_test log-float-negative { + SELECT log(1.5, -1.5) +} {} + +do_execsql_test log-null-int { + SELECT log(null, 5) +} {} + +do_execsql_test log-int-null { + SELECT log(5, null) +} {} From f5c82503f949a84f06c23f5d416a9e693fc74aa2 Mon Sep 17 00:00:00 2001 From: Lauri Virtanen Date: Mon, 16 Dec 2024 19:51:13 +0200 Subject: [PATCH 023/144] Be more explicit with `pi()` being the only nullary math function --- core/vdbe/mod.rs | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 547cde53b..154d6f7e6 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -2492,9 +2492,18 @@ impl Program { } }, crate::function::Func::Math(math_func) => match math_func.arity() { - MathFuncArity::Nullary => { - state.registers[*dest] = OwnedValue::Float(std::f64::consts::PI); - } + MathFuncArity::Nullary => match math_func { + MathFunc::Pi => { + state.registers[*dest] = + OwnedValue::Float(std::f64::consts::PI); + } + _ => { + unreachable!( + "Unexpected mathematical Nullary function {:?}", + math_func + ); + } + }, MathFuncArity::Unary => { let reg_value = &state.registers[*start_reg]; From aa821647173781d0e1a0c772d8d4b089e3c1d41e Mon Sep 17 00:00:00 2001 From: Lauri Virtanen Date: Mon, 16 Dec 2024 20:05:13 +0200 Subject: [PATCH 024/144] Add FIXME comments about floating point comparison tolerance --- testing/math.test | 6 ++++-- testing/tester.tcl | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/testing/math.test b/testing/math.test index 064de41e3..f2faa4cd2 100644 --- a/testing/math.test +++ b/testing/math.test @@ -3,6 +3,10 @@ set testdir [file dirname $argv0] source $testdir/tester.tcl +# Tolerance for floating point comparisons +# FIXME: When Limbo's floating point presentation matches to SQLite, this could/should be removed +set tolerance 1e-13 + do_execsql_test add-int { SELECT 10 + 1 } {11} @@ -362,8 +366,6 @@ do_execsql_test bitwise-not-zero { } {-1} -set tolerance 1e-13 - do_execsql_test_tolerance pi { SELECT pi() } {3.14159265358979} $tolerance diff --git a/testing/tester.tcl b/testing/tester.tcl index 1aae417ca..1ce0a3755 100644 --- a/testing/tester.tcl +++ b/testing/tester.tcl @@ -37,6 +37,8 @@ proc within_tolerance {actual expected tolerance} { expr {abs($actual - $expected) <= $tolerance} } +# This function is used to test floating point values within a tolerance +# FIXME: When Limbo's floating point presentation matches to SQLite, this could/should be removed proc do_execsql_test_tolerance {test_name sql_statements expected_outputs tolerance} { puts "Running test: $test_name" set combined_sql [string trim $sql_statements] From ca418c2674d436aa03d86d92328780fb6c175ef1 Mon Sep 17 00:00:00 2001 From: Lauri Virtanen Date: Mon, 16 Dec 2024 21:55:55 +0200 Subject: [PATCH 025/144] Run `do_execsql_test_tolerance` for each database --- testing/tester.tcl | 44 +++++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/testing/tester.tcl b/testing/tester.tcl index 1ce0a3755..04a43c3eb 100644 --- a/testing/tester.tcl +++ b/testing/tester.tcl @@ -40,30 +40,32 @@ proc within_tolerance {actual expected tolerance} { # This function is used to test floating point values within a tolerance # FIXME: When Limbo's floating point presentation matches to SQLite, this could/should be removed proc do_execsql_test_tolerance {test_name sql_statements expected_outputs tolerance} { - puts "Running test: $test_name" - set combined_sql [string trim $sql_statements] - set actual_output [evaluate_sql $::sqlite_exec $combined_sql] - set actual_values [split $actual_output "\n"] - set expected_values [split $expected_outputs "\n"] - - if {[llength $actual_values] != [llength $expected_values]} { - puts "Test FAILED: '$sql_statements'" - puts "returned '$actual_output'" - puts "expected '$expected_outputs'" - exit 1 - } - - for {set i 0} {$i < [llength $actual_values]} {incr i} { - set actual [lindex $actual_values $i] - set expected [lindex $expected_values $i] + foreach db $::test_dbs { + puts [format "(%s) %s Running test: %s" $db [string repeat " " [expr {40 - [string length $db]}]] $test_name] + set combined_sql [string trim $sql_statements] + set actual_output [evaluate_sql $::sqlite_exec $db $combined_sql] + set actual_values [split $actual_output "\n"] + set expected_values [split $expected_outputs "\n"] - if {![within_tolerance $actual $expected $tolerance]} { - set lower_bound [expr {$expected - $tolerance}] - set upper_bound [expr {$expected + $tolerance}] + if {[llength $actual_values] != [llength $expected_values]} { puts "Test FAILED: '$sql_statements'" - puts "returned '$actual'" - puts "expected a value within the range \[$lower_bound, $upper_bound\]" + puts "returned '$actual_output'" + puts "expected '$expected_outputs'" exit 1 } + + for {set i 0} {$i < [llength $actual_values]} {incr i} { + set actual [lindex $actual_values $i] + set expected [lindex $expected_values $i] + + if {![within_tolerance $actual $expected $tolerance]} { + set lower_bound [expr {$expected - $tolerance}] + set upper_bound [expr {$expected + $tolerance}] + puts "Test FAILED: '$sql_statements'" + puts "returned '$actual'" + puts "expected a value within the range \[$lower_bound, $upper_bound\]" + exit 1 + } + } } } From fe429302391000bb4749f86fda2f6dc09a7e22bc Mon Sep 17 00:00:00 2001 From: Lauri Virtanen Date: Mon, 16 Dec 2024 23:48:58 +0200 Subject: [PATCH 026/144] Take `log` function argument count from function context --- core/vdbe/mod.rs | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 154d6f7e6..069782c9a 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -2520,15 +2520,21 @@ impl Program { MathFuncArity::UnaryOrBinary => match math_func { MathFunc::Log => { - let lhs = &state.registers[*start_reg]; - let rhs = state.registers.get(*start_reg + 1); - - let result = if let Some(arg) = rhs { - exec_math_log(arg, Some(lhs)) - } else { - exec_math_log(lhs, None) + let result = match arg_count { + 1 => { + let arg = &state.registers[*start_reg]; + exec_math_log(arg, None) + } + 2 => { + let base = &state.registers[*start_reg]; + let arg = &state.registers[*start_reg + 1]; + exec_math_log(arg, Some(base)) + } + _ => unreachable!( + "{:?} function with unexpected number of arguments", + math_func + ), }; - state.registers[*dest] = result; } _ => unreachable!( From a1c77af8a8a14ef939543c3ffb21922cb8a01479 Mon Sep 17 00:00:00 2001 From: Lauri Virtanen Date: Tue, 17 Dec 2024 00:03:48 +0200 Subject: [PATCH 027/144] Limit `sin` and `mod` tests rows --- testing/math.test | 26 ++++---------------------- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/testing/math.test b/testing/math.test index f2faa4cd2..7b27495e3 100644 --- a/testing/math.test +++ b/testing/math.test @@ -443,18 +443,12 @@ do_execsql_test_tolerance sin-null { } {} $tolerance do_execsql_test_tolerance sin-products-id { - SELECT sin(id) from products + SELECT sin(id) from products limit 5 } {0.8414709848078965 0.9092974268256817 0.1411200080598672 -0.7568024953079282 --0.9589242746631385 --0.27941549819892586 -0.6569865987187891 -0.9893582466233818 -0.4121184852417566 --0.5440211108893698 --0.9999902065507035} $tolerance +-0.9589242746631385} $tolerance do_execsql_test_tolerance asinh-int { @@ -878,31 +872,19 @@ do_execsql_test mod-float-zero { } {} do_execsql_test mod-products-id { - SELECT mod(products.id, 3) from products + SELECT mod(products.id, 3) from products limit 5 } {1.0 2.0 0.0 1.0 -2.0 -0.0 -1.0 -2.0 -0.0 -1.0 2.0} do_execsql_test mod-products-price-id { - SELECT mod(products.price, products.id) from products + SELECT mod(products.price, products.id) from products limit 5 } {0.0 0.0 0.0 1.0 -4.0 -4.0 -1.0 -2.0 -1.0 -3.0 4.0} From 8d18263fd637ee1e00bfa0555cf3b6333ef936cf Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Sun, 15 Dec 2024 22:29:44 -0500 Subject: [PATCH 028/144] Replace vec args with slices where possible, clippy warnings --- core/translate/emitter.rs | 43 ++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index ce2b4d0b2..d276adb3b 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -216,7 +216,7 @@ pub fn emit_program( // Clean up and close the main execution loop close_loop( &mut program, - &mut plan.source, + &plan.source, &mut metadata, &plan.referenced_tables, )?; @@ -235,7 +235,7 @@ pub fn emit_program( group_by, plan.order_by.as_ref(), &plan.aggregates, - plan.limit.clone(), + plan.limit, &plan.referenced_tables, &mut metadata, )?; @@ -259,7 +259,7 @@ pub fn emit_program( &mut program, order_by, &plan.result_columns, - plan.limit.clone(), + plan.limit, &mut metadata, )?; } @@ -274,7 +274,7 @@ pub fn emit_program( /// Initialize resources needed for ORDER BY processing fn init_order_by( program: &mut ProgramBuilder, - order_by: &Vec<(ast::Expr, Direction)>, + order_by: &[(ast::Expr, Direction)], metadata: &mut Metadata, ) -> Result<()> { metadata @@ -301,7 +301,7 @@ fn init_order_by( fn init_group_by( program: &mut ProgramBuilder, group_by: &GroupBy, - aggregates: &Vec, + aggregates: &[Aggregate], metadata: &mut Metadata, ) -> Result<()> { let agg_final_label = program.allocate_label(); @@ -866,8 +866,8 @@ fn inner_loop_emit( /// See the InnerLoopEmitTarget enum for more details. fn inner_loop_source_emit( program: &mut ProgramBuilder, - result_columns: &Vec, - aggregates: &Vec, + result_columns: &[ResultSetColumn], + aggregates: &[Aggregate], metadata: &mut Metadata, emit_target: InnerLoopEmitTarget, referenced_tables: &[BTreeTableReference], @@ -927,7 +927,7 @@ fn inner_loop_source_emit( order_by, result_columns, &mut metadata.result_column_indexes_in_orderby_sorter, - &metadata.sort_metadata.as_ref().unwrap(), + metadata.sort_metadata.as_ref().unwrap(), None, )?; Ok(()) @@ -1123,12 +1123,13 @@ fn close_loop( /// Emits the bytecode for processing a GROUP BY clause. /// This is called when the main query execution loop has finished processing, /// and we now have data in the GROUP BY sorter. +#[allow(clippy::too_many_arguments)] fn group_by_emit( program: &mut ProgramBuilder, - result_columns: &Vec, + result_columns: &[ResultSetColumn], group_by: &GroupBy, order_by: Option<&Vec<(ast::Expr, Direction)>>, - aggregates: &Vec, + aggregates: &[Aggregate], limit: Option, referenced_tables: &[BTreeTableReference], metadata: &mut Metadata, @@ -1437,7 +1438,7 @@ fn group_by_emit( order_by, result_columns, &mut metadata.result_column_indexes_in_orderby_sorter, - &metadata.sort_metadata.as_ref().unwrap(), + metadata.sort_metadata.as_ref().unwrap(), Some(&precomputed_exprs_to_register), )?; } @@ -1474,9 +1475,9 @@ fn group_by_emit( /// and we can now materialize the aggregate results. fn agg_without_group_by_emit( program: &mut ProgramBuilder, - referenced_tables: &Vec, - result_columns: &Vec, - aggregates: &Vec, + referenced_tables: &[BTreeTableReference], + result_columns: &[ResultSetColumn], + aggregates: &[Aggregate], metadata: &mut Metadata, ) -> Result<()> { let agg_start_reg = metadata.aggregation_start_register.unwrap(); @@ -1513,8 +1514,8 @@ fn agg_without_group_by_emit( /// and we can now emit rows from the ORDER BY sorter. fn order_by_emit( program: &mut ProgramBuilder, - order_by: &Vec<(ast::Expr, Direction)>, - result_columns: &Vec, + order_by: &[(ast::Expr, Direction)], + result_columns: &[ResultSetColumn], limit: Option, metadata: &mut Metadata, ) -> Result<()> { @@ -1693,8 +1694,8 @@ fn sorter_insert( fn order_by_sorter_insert( program: &mut ProgramBuilder, referenced_tables: &[BTreeTableReference], - order_by: &Vec<(ast::Expr, Direction)>, - result_columns: &Vec, + order_by: &[(ast::Expr, Direction)], + result_columns: &[ResultSetColumn], result_column_indexes_in_orderby_sorter: &mut HashMap, sort_metadata: &SortMetadata, precomputed_exprs_to_register: Option<&Vec<(&ast::Expr, usize)>>, @@ -1760,8 +1761,8 @@ fn order_by_sorter_insert( /// /// If any result columns can be skipped, this returns list of 2-tuples of (SkippedResultColumnIndex: usize, ResultColumnIndexInOrderBySorter: usize) fn order_by_deduplicate_result_columns( - order_by: &Vec<(ast::Expr, Direction)>, - result_columns: &Vec, + order_by: &[(ast::Expr, Direction)], + result_columns: &[ResultSetColumn], ) -> Option> { let mut result_column_remapping: Option> = None; for (i, rc) in result_columns.iter().enumerate() { @@ -1781,5 +1782,5 @@ fn order_by_deduplicate_result_columns( } } - return result_column_remapping; + result_column_remapping } From 25772ee1f3cb5fabc9f5cd93e08857c8c927bdf7 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Sun, 15 Dec 2024 23:29:56 -0500 Subject: [PATCH 029/144] Implement custom expression equality checking --- core/translate/emitter.rs | 7 +- core/translate/mod.rs | 2 +- vendored/sqlite3-parser/src/parser/ast/mod.rs | 268 ++++++++++++++++++ 3 files changed, 271 insertions(+), 6 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index d276adb3b..f7658ccfa 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -5,7 +5,7 @@ use std::cell::RefCell; use std::collections::HashMap; use std::rc::{Rc, Weak}; -use sqlite3_parser::ast; +use sqlite3_parser::ast::{self, exprs_are_equivalent}; use crate::schema::{Column, PseudoTable, Table}; use crate::storage::sqlite3_ondisk::DatabaseHeader; @@ -1766,13 +1766,10 @@ fn order_by_deduplicate_result_columns( ) -> Option> { let mut result_column_remapping: Option> = None; for (i, rc) in result_columns.iter().enumerate() { - // TODO: implement a custom equality check for expressions - // there are lots of examples where this breaks, even simple ones like - // sum(x) != SUM(x) let found = order_by .iter() .enumerate() - .find(|(_, (expr, _))| expr == &rc.expr); + .find(|(_, (expr, _))| exprs_are_equivalent(expr, &rc.expr)); if let Some((j, _)) = found { if let Some(ref mut v) = result_column_remapping { v.push((i, j)); diff --git a/core/translate/mod.rs b/core/translate/mod.rs index cb2463239..e7ce74d4b 100644 --- a/core/translate/mod.rs +++ b/core/translate/mod.rs @@ -392,7 +392,7 @@ fn update_pragma(name: &str, value: i64, header: Rc>, pa struct TableFormatter<'a> { body: &'a ast::CreateTableBody, } -impl<'a> Display for TableFormatter<'a> { +impl<'a> Display for TableFormatter<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { self.body.to_fmt(f) } diff --git a/vendored/sqlite3-parser/src/parser/ast/mod.rs b/vendored/sqlite3-parser/src/parser/ast/mod.rs index da1798cff..3613cd6eb 100644 --- a/vendored/sqlite3-parser/src/parser/ast/mod.rs +++ b/vendored/sqlite3-parser/src/parser/ast/mod.rs @@ -498,6 +498,112 @@ impl Expr { } } +/// This function is used to determine whether two expressions are logically +/// equivalent in the context of queries, even if their representations +/// differ. e.g.: `SUM(x)` and `sum(x)`, `x + y` and `y + x` +pub fn exprs_are_equivalent(expr1: &Expr, expr2: &Expr) -> bool { + use Expr::*; + match (expr1, expr2) { + ( + Between { + lhs: lhs1, + not: not1, + start: start1, + end: end1, + }, + Between { + lhs: lhs2, + not: not2, + start: start2, + end: end2, + }, + ) => { + *not1 == *not2 + && exprs_are_equivalent(lhs1, lhs2) + && exprs_are_equivalent(start1, start2) + && exprs_are_equivalent(end1, end2) + } + + (Binary(lhs1, op1, rhs1), Binary(lhs2, op2, rhs2)) => { + op1 == op2 + && ((exprs_are_equivalent(lhs1, lhs2) && exprs_are_equivalent(rhs1, rhs2)) + || (op1.is_commutative() + && exprs_are_equivalent(lhs1, rhs2) + && exprs_are_equivalent(rhs1, lhs2))) + } + ( + Case { + base: base1, + when_then_pairs: pairs1, + else_expr: else1, + }, + Case { + base: base2, + when_then_pairs: pairs2, + else_expr: else2, + }, + ) => { + base1 == base2 + && pairs1.len() == pairs2.len() + && pairs1.iter().zip(pairs2).all(|((w1, t1), (w2, t2))| { + exprs_are_equivalent(w1, w2) && exprs_are_equivalent(t1, t2) + }) + && else1 == else2 + } + ( + Cast { + expr: expr1, + type_name: type1, + }, + Cast { + expr: expr2, + type_name: type2, + }, + ) => exprs_are_equivalent(expr1, expr2) && type1 == type2, + (Collate(expr1, collation1), Collate(expr2, collation2)) => { + exprs_are_equivalent(expr1, expr2) && collation1.eq_ignore_ascii_case(collation2) + } + ( + FunctionCall { + name: name1, + distinctness: distinct1, + args: args1, + order_by: order1, + filter_over: filter1, + }, + FunctionCall { + name: name2, + distinctness: distinct2, + args: args2, + order_by: order2, + filter_over: filter2, + }, + ) => { + name1.0.eq_ignore_ascii_case(&name2.0) + && distinct1 == distinct2 + && args1 == args2 + && order1 == order2 + && filter1 == filter2 + } + (Literal(lit1), Literal(lit2)) => lit1 == lit2, + (Id(id1), Id(id2)) => id1.0.eq_ignore_ascii_case(&id2.0), + (Unary(op1, expr1), Unary(op2, expr2)) => op1 == op2 && exprs_are_equivalent(expr1, expr2), + (Variable(var1), Variable(var2)) => var1 == var2, + (Parenthesized(exprs1), Parenthesized(exprs2)) => { + exprs1.len() == exprs2.len() + && exprs1 + .iter() + .zip(exprs2) + .all(|(e1, e2)| exprs_are_equivalent(e1, e2)) + } + (Parenthesized(exprs1), exprs2) => { + exprs1.len() == 1 && exprs_are_equivalent(&exprs1[0], exprs2) + } + // fall back to naive equality check + _ => expr1 == expr2, + } +} + /// SQL literal #[derive(Clone, Debug, PartialEq, Eq)] pub enum Literal { @@ -534,6 +640,19 @@ impl Literal { unreachable!() } } + pub fn is_equal(&self, other: &Self) -> bool { + match (self, other) { + (Self::Numeric(n1), Self::Numeric(n2)) => n1 == n2, + (Self::String(s1), Self::String(s2)) => s1 == s2, + (Self::Blob(b1), Self::Blob(b2)) => b1 == b2, + (Self::Keyword(k1), Self::Keyword(k2)) => k1 == k2, + (Self::Null, Self::Null) => true, + (Self::CurrentDate, Self::CurrentDate) => true, + (Self::CurrentTime, Self::CurrentTime) => true, + (Self::CurrentTimestamp, Self::CurrentTimestamp) => true, + _ => false, + } + } } /// Textual comparison operator in an expression @@ -648,6 +767,20 @@ impl From for Operator { } } +impl Operator { + pub fn is_commutative(&self) -> bool { + matches!( + self, + Operator::Add + | Operator::Multiply + | Operator::BitwiseAnd + | Operator::BitwiseOr + | Operator::Equals + | Operator::NotEquals + ) + } +} + /// Unary operators #[derive(Copy, Clone, Debug, PartialEq, Eq)] pub enum UnaryOperator { @@ -1871,4 +2004,139 @@ mod test { fn name(s: &'static str) -> Name { Name(s.to_owned()) } + + #[test] + fn test_exprs_are_equivalent() { + use super::{Expr, Id, Literal, Operator::*}; + + // commutative addition + let expr1 = Expr::Binary( + Box::new(Expr::Literal(Literal::Numeric("1".to_string()))), + Add, + Box::new(Expr::Literal(Literal::Numeric("2".to_string()))), + ); + let expr2 = Expr::Binary( + Box::new(Expr::Literal(Literal::Numeric("2".to_string()))), + Add, + Box::new(Expr::Literal(Literal::Numeric("1".to_string()))), + ); + assert!(super::exprs_are_equivalent(&expr1, &expr2)); + + // non-commutative subtraction + let expr3 = Expr::Binary( + Box::new(Expr::Literal(Literal::Numeric("3".to_string()))), + Subtract, + Box::new(Expr::Literal(Literal::Numeric("2".to_string()))), + ); + let expr4 = Expr::Binary( + Box::new(Expr::Literal(Literal::Numeric("2".to_string()))), + Subtract, + Box::new(Expr::Literal(Literal::Numeric("3".to_string()))), + ); + assert!(!super::exprs_are_equivalent(&expr3, &expr4)); + + // case-insensitive function calls + let func1 = Expr::FunctionCall { + name: Id("SUM".to_string()), + distinctness: None, + args: Some(vec![Expr::Id(Id("x".to_string()))]), + order_by: None, + filter_over: None, + }; + let func2 = Expr::FunctionCall { + name: Id("sum".to_string()), + distinctness: None, + args: Some(vec![Expr::Id(Id("x".to_string()))]), + order_by: None, + filter_over: None, + }; + assert!(super::exprs_are_equivalent(&func1, &func2)); + + // DISTINCT function argument mismatch + let func3 = Expr::FunctionCall { + name: Id("SUM".to_string()), + distinctness: Some(super::Distinctness::Distinct), + args: Some(vec![Expr::Id(Id("x".to_string()))]), + order_by: None, + filter_over: None, + }; + assert!(!super::exprs_are_equivalent(&func1, &func3)); + + // commutative multiplication + let expr5 = Expr::Binary( + Box::new(Expr::Literal(Literal::Numeric("4".to_string()))), + Multiply, + Box::new(Expr::Literal(Literal::Numeric("5".to_string()))), + ); + let expr6 = Expr::Binary( + Box::new(Expr::Literal(Literal::Numeric("5".to_string()))), + Multiply, + Box::new(Expr::Literal(Literal::Numeric("4".to_string()))), + ); + assert!(super::exprs_are_equivalent(&expr5, &expr6)); + + // parenthesized expressions + let expr7 = Expr::Parenthesized(vec![Expr::Binary( + Box::new(Expr::Literal(Literal::Numeric("6".to_string()))), + Add, + Box::new(Expr::Literal(Literal::Numeric("7".to_string()))), + )]); + let expr8 = Expr::Binary( + Box::new(Expr::Literal(Literal::Numeric("6".to_string()))), + Add, + Box::new(Expr::Literal(Literal::Numeric("7".to_string()))), + ); + assert!(super::exprs_are_equivalent(&expr7, &expr8)); + + // LIKE expressions with escape clauses + let expr9 = Expr::Like { + lhs: Box::new(Expr::Id(Id("name".to_string()))), + not: false, + op: super::LikeOperator::Like, + rhs: Box::new(Expr::Literal(Literal::String("%john%".to_string()))), + escape: Some(Box::new(Expr::Literal(Literal::String("\\".to_string())))), + }; + let expr10 = Expr::Like { + lhs: Box::new(Expr::Id(Id("name".to_string()))), + not: false, + op: super::LikeOperator::Like, + rhs: Box::new(Expr::Literal(Literal::String("%john%".to_string()))), + escape: Some(Box::new(Expr::Literal(Literal::String("\\".to_string())))), + }; + assert!(super::exprs_are_equivalent(&expr9, &expr10)); + + // differing escape clauses in LIKE + let expr11 = Expr::Like { + lhs: Box::new(Expr::Id(Id("name".to_string()))), + not: false, + op: super::LikeOperator::Like, + rhs: Box::new(Expr::Literal(Literal::String("%john%".to_string()))), + escape: Some(Box::new(Expr::Literal(Literal::String("#".to_string())))), + }; + assert!(!super::exprs_are_equivalent(&expr9, &expr11)); + + // BETWEEN expressions + let expr12 = Expr::Between { + lhs: Box::new(Expr::Id(Id("age".to_string()))), + not: false, + start: Box::new(Expr::Literal(Literal::Numeric("18".to_string()))), + end: Box::new(Expr::Literal(Literal::Numeric("65".to_string()))), + }; + let expr13 = Expr::Between { + lhs: Box::new(Expr::Id(Id("age".to_string()))), + not: false, + start: Box::new(Expr::Literal(Literal::Numeric("18".to_string()))), + end: Box::new(Expr::Literal(Literal::Numeric("65".to_string()))), + }; + assert!(super::exprs_are_equivalent(&expr12, &expr13)); + + // differing BETWEEN bounds + let expr14 = Expr::Between { + lhs: Box::new(Expr::Id(Id("age".to_string()))), + not: false, + start: Box::new(Expr::Literal(Literal::Numeric("20".to_string()))), + end: Box::new(Expr::Literal(Literal::Numeric("65".to_string()))), + }; + assert!(!super::exprs_are_equivalent(&expr12, &expr14)); + } } From 2f647001bc7c6c9cbfe7be92e13d08ef0cc81313 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Mon, 16 Dec 2024 12:02:12 -0500 Subject: [PATCH 030/144] Add cases to expr equality check, normalize numeric strings --- vendored/sqlite3-parser/src/parser/ast/mod.rs | 239 ++++++++++++++---- 1 file changed, 195 insertions(+), 44 deletions(-) diff --git a/vendored/sqlite3-parser/src/parser/ast/mod.rs b/vendored/sqlite3-parser/src/parser/ast/mod.rs index 3613cd6eb..d32e2d8a6 100644 --- a/vendored/sqlite3-parser/src/parser/ast/mod.rs +++ b/vendored/sqlite3-parser/src/parser/ast/mod.rs @@ -523,7 +523,6 @@ pub fn exprs_are_equivalent(expr1: &Expr, expr2: &Expr) -> bool { && exprs_are_equivalent(start1, start2) && exprs_are_equivalent(end1, end2) } - (Binary(lhs1, op1, rhs1), Binary(lhs2, op2, rhs2)) => { op1 == op2 && ((exprs_are_equivalent(lhs1, lhs2) && exprs_are_equivalent(rhs1, rhs2)) @@ -585,7 +584,7 @@ pub fn exprs_are_equivalent(expr1: &Expr, expr2: &Expr) -> bool { && order1 == order2 && filter1 == filter2 } - (Literal(lit1), Literal(lit2)) => lit1 == lit2, + (Literal(lit1), Literal(lit2)) => lit1.is_equivalent(lit2), (Id(id1), Id(id2)) => id1.0.eq_ignore_ascii_case(&id2.0), (Unary(op1, expr1), Unary(op2, expr2)) => op1 == op2 && exprs_are_equivalent(expr1, expr2), (Variable(var1), Variable(var2)) => var1 == var2, @@ -596,9 +595,35 @@ pub fn exprs_are_equivalent(expr1: &Expr, expr2: &Expr) -> bool { .zip(exprs2) .all(|(e1, e2)| exprs_are_equivalent(e1, e2)) } - (Parenthesized(exprs1), exprs2) => { + (Parenthesized(exprs1), exprs2) | (exprs2, Parenthesized(exprs1)) => { exprs1.len() == 1 && exprs_are_equivalent(&exprs1[0], exprs2) } + ( + InList { + lhs: lhs1, + not: not1, + rhs: rhs1, + }, + InList { + lhs: lhs2, + not: not2, + rhs: rhs2, + }, + ) => { + *not1 == *not2 + && exprs_are_equivalent(lhs1, lhs2) + && rhs1 + .as_ref() + .zip(rhs2.as_ref()) + .map(|(list1, list2)| { + list1.len() == list2.len() + && list1 + .iter() + .zip(list2) + .all(|(e1, e2)| exprs_are_equivalent(e1, e2)) + }) + .unwrap_or(false) + } // fall back to naive equality check _ => expr1 == expr2, } @@ -627,6 +652,20 @@ pub enum Literal { CurrentTimestamp, } +/// normalization for comparison of numeric literals +fn normalize_numeric_str(num_str: &str) -> Option { + if let Ok(value) = num_str.parse::() { + let canonical = if value.fract() == 0.0 { + format!("{}", value as i64) + } else { + format!("{}", value) + }; + Some(canonical) + } else { + None + } +} + impl Literal { /// Constructor pub fn from_ctime_kw(token: Token) -> Self { @@ -640,12 +679,20 @@ impl Literal { unreachable!() } } - pub fn is_equal(&self, other: &Self) -> bool { + + /// checks if two literal values are equivalent + fn is_equivalent(&self, other: &Self) -> bool { match (self, other) { - (Self::Numeric(n1), Self::Numeric(n2)) => n1 == n2, - (Self::String(s1), Self::String(s2)) => s1 == s2, + (Self::Numeric(n1), Self::Numeric(n2)) => { + match (normalize_numeric_str(n1), normalize_numeric_str(n2)) { + (Some(canonical), Some(canonical2)) => canonical == canonical2, + _ => false, + } + } + // TODO: check for quoted == unquoted strings? + (Self::String(s1), Self::String(s2)) => s1.eq_ignore_ascii_case(s2), (Self::Blob(b1), Self::Blob(b2)) => b1 == b2, - (Self::Keyword(k1), Self::Keyword(k2)) => k1 == k2, + (Self::Keyword(k1), Self::Keyword(k2)) => k1.eq_ignore_ascii_case(k2), (Self::Null, Self::Null) => true, (Self::CurrentDate, Self::CurrentDate) => true, (Self::CurrentTime, Self::CurrentTime) => true, @@ -768,6 +815,7 @@ impl From for Operator { } impl Operator { + /// returns whether order of operations can be ignored pub fn is_commutative(&self) -> bool { matches!( self, @@ -2006,36 +2054,72 @@ mod test { } #[test] - fn test_exprs_are_equivalent() { - use super::{Expr, Id, Literal, Operator::*}; + fn test_basic_addition_exprs_are_equivalent() { + use super::{Expr, Literal, Operator::*}; + let expr1 = Expr::Binary( + Box::new(Expr::Literal(Literal::Numeric("826".to_string()))), + Add, + Box::new(Expr::Literal(Literal::Numeric("389".to_string()))), + ); + let expr2 = Expr::Binary( + Box::new(Expr::Literal(Literal::Numeric("389".to_string()))), + Add, + Box::new(Expr::Literal(Literal::Numeric("826".to_string()))), + ); + assert!(super::exprs_are_equivalent(&expr1, &expr2)); + } - // commutative addition + #[test] + fn test_addition_expressions_equivalent_normalized() { + use super::{Expr, Literal, Operator::*}; let expr1 = Expr::Binary( - Box::new(Expr::Literal(Literal::Numeric("1".to_string()))), + Box::new(Expr::Literal(Literal::Numeric("123.0".to_string()))), Add, - Box::new(Expr::Literal(Literal::Numeric("2".to_string()))), + Box::new(Expr::Literal(Literal::Numeric("243".to_string()))), ); let expr2 = Expr::Binary( - Box::new(Expr::Literal(Literal::Numeric("2".to_string()))), + Box::new(Expr::Literal(Literal::Numeric("243.0".to_string()))), Add, - Box::new(Expr::Literal(Literal::Numeric("1".to_string()))), + Box::new(Expr::Literal(Literal::Numeric("123".to_string()))), ); assert!(super::exprs_are_equivalent(&expr1, &expr2)); + } - // non-commutative subtraction + #[test] + fn test_subtraction_expressions_not_equivalent() { + use super::{Expr, Literal, Operator::*}; let expr3 = Expr::Binary( - Box::new(Expr::Literal(Literal::Numeric("3".to_string()))), + Box::new(Expr::Literal(Literal::Numeric("364".to_string()))), Subtract, - Box::new(Expr::Literal(Literal::Numeric("2".to_string()))), + Box::new(Expr::Literal(Literal::Numeric("22.0".to_string()))), ); let expr4 = Expr::Binary( - Box::new(Expr::Literal(Literal::Numeric("2".to_string()))), + Box::new(Expr::Literal(Literal::Numeric("22.0".to_string()))), Subtract, - Box::new(Expr::Literal(Literal::Numeric("3".to_string()))), + Box::new(Expr::Literal(Literal::Numeric("364".to_string()))), ); assert!(!super::exprs_are_equivalent(&expr3, &expr4)); + } + + #[test] + fn test_subtraction_expressions_normalized() { + use super::{Expr, Literal, Operator::*}; + let expr3 = Expr::Binary( + Box::new(Expr::Literal(Literal::Numeric("66.0".to_string()))), + Subtract, + Box::new(Expr::Literal(Literal::Numeric("22".to_string()))), + ); + let expr4 = Expr::Binary( + Box::new(Expr::Literal(Literal::Numeric("66".to_string()))), + Subtract, + Box::new(Expr::Literal(Literal::Numeric("22.0".to_string()))), + ); + assert!(super::exprs_are_equivalent(&expr3, &expr4)); + } - // case-insensitive function calls + #[test] + fn test_expressions_equivalent_case_insensitive_functioncalls() { + use super::{Expr, Id}; let func1 = Expr::FunctionCall { name: Id("SUM".to_string()), distinctness: None, @@ -2052,7 +2136,6 @@ mod test { }; assert!(super::exprs_are_equivalent(&func1, &func2)); - // DISTINCT function argument mismatch let func3 = Expr::FunctionCall { name: Id("SUM".to_string()), distinctness: Some(super::Distinctness::Distinct), @@ -2061,21 +2144,62 @@ mod test { filter_over: None, }; assert!(!super::exprs_are_equivalent(&func1, &func3)); + } + + #[test] + fn test_expressions_equivalent_identical_fn_with_distinct() { + use super::{Expr, Id}; + let sum = Expr::FunctionCall { + name: Id("SUM".to_string()), + distinctness: None, + args: Some(vec![Expr::Id(Id("x".to_string()))]), + order_by: None, + filter_over: None, + }; + let sum_distinct = Expr::FunctionCall { + name: Id("SUM".to_string()), + distinctness: Some(super::Distinctness::Distinct), + args: Some(vec![Expr::Id(Id("x".to_string()))]), + order_by: None, + filter_over: None, + }; + assert!(!super::exprs_are_equivalent(&sum, &sum_distinct)); + } - // commutative multiplication - let expr5 = Expr::Binary( - Box::new(Expr::Literal(Literal::Numeric("4".to_string()))), + #[test] + fn test_expressions_equivalent_multiplicaiton() { + use super::{Expr, Literal, Operator::*}; + let expr1 = Expr::Binary( + Box::new(Expr::Literal(Literal::Numeric("42.0".to_string()))), Multiply, - Box::new(Expr::Literal(Literal::Numeric("5".to_string()))), + Box::new(Expr::Literal(Literal::Numeric("38".to_string()))), ); - let expr6 = Expr::Binary( - Box::new(Expr::Literal(Literal::Numeric("5".to_string()))), + let expr2 = Expr::Binary( + Box::new(Expr::Literal(Literal::Numeric("38.0".to_string()))), Multiply, - Box::new(Expr::Literal(Literal::Numeric("4".to_string()))), + Box::new(Expr::Literal(Literal::Numeric("42".to_string()))), ); - assert!(super::exprs_are_equivalent(&expr5, &expr6)); + assert!(super::exprs_are_equivalent(&expr1, &expr2)); + } - // parenthesized expressions + #[test] + fn test_expressions_both_parenthesized_equivalent() { + use super::{Expr, Literal, Operator::*}; + let expr1 = Expr::Parenthesized(vec![Expr::Binary( + Box::new(Expr::Literal(Literal::Numeric("683".to_string()))), + Add, + Box::new(Expr::Literal(Literal::Numeric("799.0".to_string()))), + )]); + let expr2 = Expr::Binary( + Box::new(Expr::Literal(Literal::Numeric("799".to_string()))), + Add, + Box::new(Expr::Literal(Literal::Numeric("683".to_string()))), + ); + assert!(super::exprs_are_equivalent(&expr1, &expr2)); + } + #[test] + fn test_expressions_parenthesized_equivalent() { + use super::{Expr, Literal, Operator::*}; let expr7 = Expr::Parenthesized(vec![Expr::Binary( Box::new(Expr::Literal(Literal::Numeric("6".to_string()))), Add, @@ -2087,56 +2211,83 @@ mod test { Box::new(Expr::Literal(Literal::Numeric("7".to_string()))), ); assert!(super::exprs_are_equivalent(&expr7, &expr8)); + } - // LIKE expressions with escape clauses - let expr9 = Expr::Like { + #[test] + fn test_like_expressions_equivalent() { + use super::{Expr, Id, Literal}; + let expr1 = Expr::Like { lhs: Box::new(Expr::Id(Id("name".to_string()))), not: false, op: super::LikeOperator::Like, rhs: Box::new(Expr::Literal(Literal::String("%john%".to_string()))), escape: Some(Box::new(Expr::Literal(Literal::String("\\".to_string())))), }; - let expr10 = Expr::Like { + let expr2 = Expr::Like { lhs: Box::new(Expr::Id(Id("name".to_string()))), not: false, op: super::LikeOperator::Like, rhs: Box::new(Expr::Literal(Literal::String("%john%".to_string()))), escape: Some(Box::new(Expr::Literal(Literal::String("\\".to_string())))), }; - assert!(super::exprs_are_equivalent(&expr9, &expr10)); + assert!(super::exprs_are_equivalent(&expr1, &expr2)); + } - // differing escape clauses in LIKE - let expr11 = Expr::Like { + #[test] + fn test_expressions_equivalent_like_escaped() { + use super::{Expr, Id, Literal}; + let expr1 = Expr::Like { + lhs: Box::new(Expr::Id(Id("name".to_string()))), + not: false, + op: super::LikeOperator::Like, + rhs: Box::new(Expr::Literal(Literal::String("%john%".to_string()))), + escape: Some(Box::new(Expr::Literal(Literal::String("\\".to_string())))), + }; + let expr2 = Expr::Like { lhs: Box::new(Expr::Id(Id("name".to_string()))), not: false, op: super::LikeOperator::Like, rhs: Box::new(Expr::Literal(Literal::String("%john%".to_string()))), escape: Some(Box::new(Expr::Literal(Literal::String("#".to_string())))), }; - assert!(!super::exprs_are_equivalent(&expr9, &expr11)); - - // BETWEEN expressions - let expr12 = Expr::Between { + assert!(!super::exprs_are_equivalent(&expr1, &expr2)); + } + #[test] + fn test_expressions_equivalent_between() { + use super::{Expr, Id, Literal}; + let expr1 = Expr::Between { lhs: Box::new(Expr::Id(Id("age".to_string()))), not: false, start: Box::new(Expr::Literal(Literal::Numeric("18".to_string()))), end: Box::new(Expr::Literal(Literal::Numeric("65".to_string()))), }; - let expr13 = Expr::Between { + let expr2 = Expr::Between { lhs: Box::new(Expr::Id(Id("age".to_string()))), not: false, start: Box::new(Expr::Literal(Literal::Numeric("18".to_string()))), end: Box::new(Expr::Literal(Literal::Numeric("65".to_string()))), }; - assert!(super::exprs_are_equivalent(&expr12, &expr13)); + assert!(super::exprs_are_equivalent(&expr1, &expr2)); // differing BETWEEN bounds - let expr14 = Expr::Between { + let expr3 = Expr::Between { lhs: Box::new(Expr::Id(Id("age".to_string()))), not: false, start: Box::new(Expr::Literal(Literal::Numeric("20".to_string()))), end: Box::new(Expr::Literal(Literal::Numeric("65".to_string()))), }; - assert!(!super::exprs_are_equivalent(&expr12, &expr14)); + assert!(!super::exprs_are_equivalent(&expr1, &expr3)); + } + + #[test] + fn test_normalize_numeric_string() { + use super::normalize_numeric_str; + assert_eq!(normalize_numeric_str("001"), Some("1".to_string())); + assert_eq!(normalize_numeric_str("1.00"), Some("1".to_string())); + assert_eq!(normalize_numeric_str("0.010"), Some("0.01".to_string())); + assert_eq!(normalize_numeric_str("1e3"), Some("1000".to_string())); + assert_eq!(normalize_numeric_str("1.23e2"), Some("123".to_string())); + assert_eq!(normalize_numeric_str("invalid"), None); + assert_eq!(normalize_numeric_str(""), None); } } From 66c44a021f07d3d0368d871bace6d21a81001397 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Mon, 16 Dec 2024 19:38:56 -0500 Subject: [PATCH 031/144] Move ast expr equality check out of vendored, remove allocs and add cases --- cli/main.rs | 2 +- core/translate/emitter.rs | 3 +- core/translate/mod.rs | 2 +- core/util.rs | 455 ++++++++++++++++++ vendored/sqlite3-parser/src/parser/ast/mod.rs | 404 ---------------- 5 files changed, 459 insertions(+), 407 deletions(-) diff --git a/cli/main.rs b/cli/main.rs index 3671d47c9..9977f6540 100644 --- a/cli/main.rs +++ b/cli/main.rs @@ -2,7 +2,7 @@ mod app; mod opcodes_dictionary; use rustyline::{error::ReadlineError, DefaultEditor}; -use std::sync::{atomic::Ordering, Arc}; +use std::sync::atomic::Ordering; #[allow(clippy::arc_with_non_send_sync)] fn main() -> anyhow::Result<()> { diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index f7658ccfa..38311b9d9 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -5,12 +5,13 @@ use std::cell::RefCell; use std::collections::HashMap; use std::rc::{Rc, Weak}; -use sqlite3_parser::ast::{self, exprs_are_equivalent}; +use sqlite3_parser::ast::{self}; use crate::schema::{Column, PseudoTable, Table}; use crate::storage::sqlite3_ondisk::DatabaseHeader; use crate::translate::plan::{IterationDirection, Search}; use crate::types::{OwnedRecord, OwnedValue}; +use crate::util::exprs_are_equivalent; use crate::vdbe::builder::ProgramBuilder; use crate::vdbe::{BranchOffset, Insn, Program}; use crate::{Connection, Result}; diff --git a/core/translate/mod.rs b/core/translate/mod.rs index e7ce74d4b..ef06e1467 100644 --- a/core/translate/mod.rs +++ b/core/translate/mod.rs @@ -392,7 +392,7 @@ fn update_pragma(name: &str, value: i64, header: Rc>, pa struct TableFormatter<'a> { body: &'a ast::CreateTableBody, } -impl<'a> Display for TableFormatter<'_> { +impl Display for TableFormatter<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { self.body.to_fmt(f) } diff --git a/core/util.rs b/core/util.rs index ed1e60010..985bfaa28 100644 --- a/core/util.rs +++ b/core/util.rs @@ -1,5 +1,7 @@ use std::{rc::Rc, sync::Arc}; +use sqlite3_parser::ast::{Expr, FunctionTail, Literal}; + use crate::{ schema::{self, Schema}, Result, RowResult, Rows, IO, @@ -55,3 +57,456 @@ pub fn parse_schema_rows(rows: Option, schema: &mut Schema, io: Arc bool { + match (num_str.parse::(), other.parse::()) { + (Ok(num), Ok(other)) => num == other, + _ => num_str == other, + } +} + +const QUOTE_PAIRS: &[(char, char)] = &[('"', '"'), ('[', ']'), ('`', '`')]; +pub fn check_ident_equivalency(ident1: &str, ident2: &str) -> bool { + fn strip_quotes(identifier: &str) -> &str { + for &(start, end) in QUOTE_PAIRS { + if identifier.starts_with(start) && identifier.ends_with(end) { + return &identifier[1..identifier.len() - 1]; + } + } + identifier + } + strip_quotes(ident1).eq_ignore_ascii_case(strip_quotes(ident2)) +} + +pub fn check_literal_equivalency(lhs: &Literal, rhs: &Literal) -> bool { + match (lhs, rhs) { + (Literal::Numeric(n1), Literal::Numeric(n2)) => cmp_numeric_strings(n1, n2), + (Literal::String(s1), Literal::String(s2)) => check_ident_equivalency(s1, s2), + (Literal::Blob(b1), Literal::Blob(b2)) => b1 == b2, + (Literal::Keyword(k1), Literal::Keyword(k2)) => check_ident_equivalency(k1, k2), + (Literal::Null, Literal::Null) => true, + (Literal::CurrentDate, Literal::CurrentDate) => true, + (Literal::CurrentTime, Literal::CurrentTime) => true, + (Literal::CurrentTimestamp, Literal::CurrentTimestamp) => true, + _ => false, + } +} + +/// This function is used to determine whether two expressions are logically +/// equivalent in the context of queries, even if their representations +/// differ. e.g.: `SUM(x)` and `sum(x)`, `x + y` and `y + x` +/// +/// *Note*: doesn't attempt to evaluate/compute "constexpr" results +pub fn exprs_are_equivalent(expr1: &Expr, expr2: &Expr) -> bool { + match (expr1, expr2) { + ( + Expr::Between { + lhs: lhs1, + not: not1, + start: start1, + end: end1, + }, + Expr::Between { + lhs: lhs2, + not: not2, + start: start2, + end: end2, + }, + ) => { + not1 == not2 + && exprs_are_equivalent(lhs1, lhs2) + && exprs_are_equivalent(start1, start2) + && exprs_are_equivalent(end1, end2) + } + (Expr::Binary(lhs1, op1, rhs1), Expr::Binary(lhs2, op2, rhs2)) => { + op1 == op2 + && ((exprs_are_equivalent(lhs1, lhs2) && exprs_are_equivalent(rhs1, rhs2)) + || (op1.is_commutative() + && exprs_are_equivalent(lhs1, rhs2) + && exprs_are_equivalent(rhs1, lhs2))) + } + ( + Expr::Case { + base: base1, + when_then_pairs: pairs1, + else_expr: else1, + }, + Expr::Case { + base: base2, + when_then_pairs: pairs2, + else_expr: else2, + }, + ) => { + base1 == base2 + && pairs1.len() == pairs2.len() + && pairs1.iter().zip(pairs2).all(|((w1, t1), (w2, t2))| { + exprs_are_equivalent(w1, w2) && exprs_are_equivalent(t1, t2) + }) + && else1 == else2 + } + ( + Expr::Cast { + expr: expr1, + type_name: type1, + }, + Expr::Cast { + expr: expr2, + type_name: type2, + }, + ) => { + exprs_are_equivalent(expr1, expr2) + && match (type1, type2) { + (Some(t1), Some(t2)) => t1.name.eq_ignore_ascii_case(&t2.name), + _ => false, + } + } + (Expr::Collate(expr1, collation1), Expr::Collate(expr2, collation2)) => { + exprs_are_equivalent(expr1, expr2) && collation1.eq_ignore_ascii_case(collation2) + } + ( + Expr::FunctionCall { + name: name1, + distinctness: distinct1, + args: args1, + order_by: order1, + filter_over: filter1, + }, + Expr::FunctionCall { + name: name2, + distinctness: distinct2, + args: args2, + order_by: order2, + filter_over: filter2, + }, + ) => { + name1.0.eq_ignore_ascii_case(&name2.0) + && distinct1 == distinct2 + && args1 == args2 + && order1 == order2 + && filter1 == filter2 + } + ( + Expr::FunctionCallStar { + name: name1, + filter_over: filter1, + }, + Expr::FunctionCallStar { + name: name2, + filter_over: filter2, + }, + ) => { + name1.0.eq_ignore_ascii_case(&name2.0) + && match (filter1, filter2) { + (None, None) => true, + ( + Some(FunctionTail { + filter_clause: fc1, + over_clause: oc1, + }), + Some(FunctionTail { + filter_clause: fc2, + over_clause: oc2, + }), + ) => match ((fc1, fc2), (oc1, oc2)) { + ((Some(fc1), Some(fc2)), (Some(oc1), Some(oc2))) => { + exprs_are_equivalent(fc1, fc2) && oc1 == oc2 + } + ((Some(fc1), Some(fc2)), _) => exprs_are_equivalent(fc1, fc2), + _ => false, + }, + _ => false, + } + } + (Expr::Literal(lit1), Expr::Literal(lit2)) => check_literal_equivalency(lit1, lit2), + (Expr::Id(id1), Expr::Id(id2)) => check_ident_equivalency(&id1.0, &id2.0), + (Expr::Unary(op1, expr1), Expr::Unary(op2, expr2)) => { + op1 == op2 && exprs_are_equivalent(expr1, expr2) + } + (Expr::Variable(var1), Expr::Variable(var2)) => var1 == var2, + (Expr::Parenthesized(exprs1), Expr::Parenthesized(exprs2)) => { + exprs1.len() == exprs2.len() + && exprs1 + .iter() + .zip(exprs2) + .all(|(e1, e2)| exprs_are_equivalent(e1, e2)) + } + (Expr::Parenthesized(exprs1), exprs2) | (exprs2, Expr::Parenthesized(exprs1)) => { + exprs1.len() == 1 && exprs_are_equivalent(&exprs1[0], exprs2) + } + ( + Expr::InList { + lhs: lhs1, + not: not1, + rhs: rhs1, + }, + Expr::InList { + lhs: lhs2, + not: not2, + rhs: rhs2, + }, + ) => { + *not1 == *not2 + && exprs_are_equivalent(lhs1, lhs2) + && rhs1 + .as_ref() + .zip(rhs2.as_ref()) + .map(|(list1, list2)| { + list1.len() == list2.len() + && list1 + .iter() + .zip(list2) + .all(|(e1, e2)| exprs_are_equivalent(e1, e2)) + }) + .unwrap_or(false) + } + // fall back to naive equality check + _ => expr1 == expr2, + } +} + +#[cfg(test)] +pub mod tests { + use sqlite3_parser::ast::{self, Expr, Id, Literal, Operator::*, Type}; + #[test] + fn test_basic_addition_exprs_are_equivalent() { + let expr1 = Expr::Binary( + Box::new(Expr::Literal(Literal::Numeric("826".to_string()))), + Add, + Box::new(Expr::Literal(Literal::Numeric("389".to_string()))), + ); + let expr2 = Expr::Binary( + Box::new(Expr::Literal(Literal::Numeric("389".to_string()))), + Add, + Box::new(Expr::Literal(Literal::Numeric("826".to_string()))), + ); + assert!(super::exprs_are_equivalent(&expr1, &expr2)); + } + + #[test] + fn test_addition_expressions_equivalent_normalized() { + let expr1 = Expr::Binary( + Box::new(Expr::Literal(Literal::Numeric("123.0".to_string()))), + Add, + Box::new(Expr::Literal(Literal::Numeric("243".to_string()))), + ); + let expr2 = Expr::Binary( + Box::new(Expr::Literal(Literal::Numeric("243.0".to_string()))), + Add, + Box::new(Expr::Literal(Literal::Numeric("123".to_string()))), + ); + assert!(super::exprs_are_equivalent(&expr1, &expr2)); + } + + #[test] + fn test_subtraction_expressions_not_equivalent() { + let expr3 = Expr::Binary( + Box::new(Expr::Literal(Literal::Numeric("364".to_string()))), + Subtract, + Box::new(Expr::Literal(Literal::Numeric("22.0".to_string()))), + ); + let expr4 = Expr::Binary( + Box::new(Expr::Literal(Literal::Numeric("22.0".to_string()))), + Subtract, + Box::new(Expr::Literal(Literal::Numeric("364".to_string()))), + ); + assert!(!super::exprs_are_equivalent(&expr3, &expr4)); + } + + #[test] + fn test_subtraction_expressions_normalized() { + let expr3 = Expr::Binary( + Box::new(Expr::Literal(Literal::Numeric("66.0".to_string()))), + Subtract, + Box::new(Expr::Literal(Literal::Numeric("22".to_string()))), + ); + let expr4 = Expr::Binary( + Box::new(Expr::Literal(Literal::Numeric("66".to_string()))), + Subtract, + Box::new(Expr::Literal(Literal::Numeric("22.0".to_string()))), + ); + assert!(super::exprs_are_equivalent(&expr3, &expr4)); + } + + #[test] + fn test_expressions_equivalent_case_insensitive_functioncalls() { + let func1 = Expr::FunctionCall { + name: Id("SUM".to_string()), + distinctness: None, + args: Some(vec![Expr::Id(Id("x".to_string()))]), + order_by: None, + filter_over: None, + }; + let func2 = Expr::FunctionCall { + name: Id("sum".to_string()), + distinctness: None, + args: Some(vec![Expr::Id(Id("x".to_string()))]), + order_by: None, + filter_over: None, + }; + assert!(super::exprs_are_equivalent(&func1, &func2)); + + let func3 = Expr::FunctionCall { + name: Id("SUM".to_string()), + distinctness: Some(ast::Distinctness::Distinct), + args: Some(vec![Expr::Id(Id("x".to_string()))]), + order_by: None, + filter_over: None, + }; + assert!(!super::exprs_are_equivalent(&func1, &func3)); + } + + #[test] + fn test_expressions_equivalent_identical_fn_with_distinct() { + let sum = Expr::FunctionCall { + name: Id("SUM".to_string()), + distinctness: None, + args: Some(vec![Expr::Id(Id("x".to_string()))]), + order_by: None, + filter_over: None, + }; + let sum_distinct = Expr::FunctionCall { + name: Id("SUM".to_string()), + distinctness: Some(ast::Distinctness::Distinct), + args: Some(vec![Expr::Id(Id("x".to_string()))]), + order_by: None, + filter_over: None, + }; + assert!(!super::exprs_are_equivalent(&sum, &sum_distinct)); + } + + #[test] + fn test_expressions_equivalent_multiplicaiton() { + let expr1 = Expr::Binary( + Box::new(Expr::Literal(Literal::Numeric("42.0".to_string()))), + Multiply, + Box::new(Expr::Literal(Literal::Numeric("38".to_string()))), + ); + let expr2 = Expr::Binary( + Box::new(Expr::Literal(Literal::Numeric("38.0".to_string()))), + Multiply, + Box::new(Expr::Literal(Literal::Numeric("42".to_string()))), + ); + assert!(super::exprs_are_equivalent(&expr1, &expr2)); + } + + #[test] + fn test_expressions_both_parenthesized_equivalent() { + let expr1 = Expr::Parenthesized(vec![Expr::Binary( + Box::new(Expr::Literal(Literal::Numeric("683".to_string()))), + Add, + Box::new(Expr::Literal(Literal::Numeric("799.0".to_string()))), + )]); + let expr2 = Expr::Binary( + Box::new(Expr::Literal(Literal::Numeric("799".to_string()))), + Add, + Box::new(Expr::Literal(Literal::Numeric("683".to_string()))), + ); + assert!(super::exprs_are_equivalent(&expr1, &expr2)); + } + #[test] + fn test_expressions_parenthesized_equivalent() { + let expr7 = Expr::Parenthesized(vec![Expr::Binary( + Box::new(Expr::Literal(Literal::Numeric("6".to_string()))), + Add, + Box::new(Expr::Literal(Literal::Numeric("7".to_string()))), + )]); + let expr8 = Expr::Binary( + Box::new(Expr::Literal(Literal::Numeric("6".to_string()))), + Add, + Box::new(Expr::Literal(Literal::Numeric("7".to_string()))), + ); + assert!(super::exprs_are_equivalent(&expr7, &expr8)); + } + + #[test] + fn test_like_expressions_equivalent() { + let expr1 = Expr::Like { + lhs: Box::new(Expr::Id(Id("name".to_string()))), + not: false, + op: ast::LikeOperator::Like, + rhs: Box::new(Expr::Literal(Literal::String("%john%".to_string()))), + escape: Some(Box::new(Expr::Literal(Literal::String("\\".to_string())))), + }; + let expr2 = Expr::Like { + lhs: Box::new(Expr::Id(Id("name".to_string()))), + not: false, + op: ast::LikeOperator::Like, + rhs: Box::new(Expr::Literal(Literal::String("%john%".to_string()))), + escape: Some(Box::new(Expr::Literal(Literal::String("\\".to_string())))), + }; + assert!(super::exprs_are_equivalent(&expr1, &expr2)); + } + + #[test] + fn test_expressions_equivalent_like_escaped() { + let expr1 = Expr::Like { + lhs: Box::new(Expr::Id(Id("name".to_string()))), + not: false, + op: ast::LikeOperator::Like, + rhs: Box::new(Expr::Literal(Literal::String("%john%".to_string()))), + escape: Some(Box::new(Expr::Literal(Literal::String("\\".to_string())))), + }; + let expr2 = Expr::Like { + lhs: Box::new(Expr::Id(Id("name".to_string()))), + not: false, + op: ast::LikeOperator::Like, + rhs: Box::new(Expr::Literal(Literal::String("%john%".to_string()))), + escape: Some(Box::new(Expr::Literal(Literal::String("#".to_string())))), + }; + assert!(!super::exprs_are_equivalent(&expr1, &expr2)); + } + #[test] + fn test_expressions_equivalent_between() { + let expr1 = Expr::Between { + lhs: Box::new(Expr::Id(Id("age".to_string()))), + not: false, + start: Box::new(Expr::Literal(Literal::Numeric("18".to_string()))), + end: Box::new(Expr::Literal(Literal::Numeric("65".to_string()))), + }; + let expr2 = Expr::Between { + lhs: Box::new(Expr::Id(Id("age".to_string()))), + not: false, + start: Box::new(Expr::Literal(Literal::Numeric("18".to_string()))), + end: Box::new(Expr::Literal(Literal::Numeric("65".to_string()))), + }; + assert!(super::exprs_are_equivalent(&expr1, &expr2)); + + // differing BETWEEN bounds + let expr3 = Expr::Between { + lhs: Box::new(Expr::Id(Id("age".to_string()))), + not: false, + start: Box::new(Expr::Literal(Literal::Numeric("20".to_string()))), + end: Box::new(Expr::Literal(Literal::Numeric("65".to_string()))), + }; + assert!(!super::exprs_are_equivalent(&expr1, &expr3)); + } + #[test] + fn test_cast_exprs_equivalent() { + let cast1 = Expr::Cast { + expr: Box::new(Expr::Literal(Literal::Numeric("123".to_string()))), + type_name: Some(Type { + name: "INTEGER".to_string(), + size: None, + }), + }; + + let cast2 = Expr::Cast { + expr: Box::new(Expr::Literal(Literal::Numeric("123".to_string()))), + type_name: Some(Type { + name: "integer".to_string(), + size: None, + }), + }; + assert!(super::exprs_are_equivalent(&cast1, &cast2)); + } + + #[test] + fn test_ident_equivalency() { + assert!(super::check_ident_equivalency("\"foo\"", "foo")); + assert!(super::check_ident_equivalency("[foo]", "foo")); + assert!(super::check_ident_equivalency("`FOO`", "foo")); + assert!(super::check_ident_equivalency("\"foo\"", "`FOO`")); + assert!(!super::check_ident_equivalency("\"foo\"", "[bar]")); + assert!(!super::check_ident_equivalency("foo", "\"bar\"")); + } +} diff --git a/vendored/sqlite3-parser/src/parser/ast/mod.rs b/vendored/sqlite3-parser/src/parser/ast/mod.rs index d32e2d8a6..4ff8746f3 100644 --- a/vendored/sqlite3-parser/src/parser/ast/mod.rs +++ b/vendored/sqlite3-parser/src/parser/ast/mod.rs @@ -498,137 +498,6 @@ impl Expr { } } -/// This function is used to determine whether two expressions are logically -/// equivalent in the context of queries, even if their representations -/// differ. e.g.: `SUM(x)` and `sum(x)`, `x + y` and `y + x` -pub fn exprs_are_equivalent(expr1: &Expr, expr2: &Expr) -> bool { - use Expr::*; - match (expr1, expr2) { - ( - Between { - lhs: lhs1, - not: not1, - start: start1, - end: end1, - }, - Between { - lhs: lhs2, - not: not2, - start: start2, - end: end2, - }, - ) => { - *not1 == *not2 - && exprs_are_equivalent(lhs1, lhs2) - && exprs_are_equivalent(start1, start2) - && exprs_are_equivalent(end1, end2) - } - (Binary(lhs1, op1, rhs1), Binary(lhs2, op2, rhs2)) => { - op1 == op2 - && ((exprs_are_equivalent(lhs1, lhs2) && exprs_are_equivalent(rhs1, rhs2)) - || (op1.is_commutative() - && exprs_are_equivalent(lhs1, rhs2) - && exprs_are_equivalent(rhs1, lhs2))) - } - ( - Case { - base: base1, - when_then_pairs: pairs1, - else_expr: else1, - }, - Case { - base: base2, - when_then_pairs: pairs2, - else_expr: else2, - }, - ) => { - base1 == base2 - && pairs1.len() == pairs2.len() - && pairs1.iter().zip(pairs2).all(|((w1, t1), (w2, t2))| { - exprs_are_equivalent(w1, w2) && exprs_are_equivalent(t1, t2) - }) - && else1 == else2 - } - ( - Cast { - expr: expr1, - type_name: type1, - }, - Cast { - expr: expr2, - type_name: type2, - }, - ) => exprs_are_equivalent(expr1, expr2) && type1 == type2, - (Collate(expr1, collation1), Collate(expr2, collation2)) => { - exprs_are_equivalent(expr1, expr2) && collation1.eq_ignore_ascii_case(collation2) - } - ( - FunctionCall { - name: name1, - distinctness: distinct1, - args: args1, - order_by: order1, - filter_over: filter1, - }, - FunctionCall { - name: name2, - distinctness: distinct2, - args: args2, - order_by: order2, - filter_over: filter2, - }, - ) => { - name1.0.eq_ignore_ascii_case(&name2.0) - && distinct1 == distinct2 - && args1 == args2 - && order1 == order2 - && filter1 == filter2 - } - (Literal(lit1), Literal(lit2)) => lit1.is_equivalent(lit2), - (Id(id1), Id(id2)) => id1.0.eq_ignore_ascii_case(&id2.0), - (Unary(op1, expr1), Unary(op2, expr2)) => op1 == op2 && exprs_are_equivalent(expr1, expr2), - (Variable(var1), Variable(var2)) => var1 == var2, - (Parenthesized(exprs1), Parenthesized(exprs2)) => { - exprs1.len() == exprs2.len() - && exprs1 - .iter() - .zip(exprs2) - .all(|(e1, e2)| exprs_are_equivalent(e1, e2)) - } - (Parenthesized(exprs1), exprs2) | (exprs2, Parenthesized(exprs1)) => { - exprs1.len() == 1 && exprs_are_equivalent(&exprs1[0], exprs2) - } - ( - InList { - lhs: lhs1, - not: not1, - rhs: rhs1, - }, - InList { - lhs: lhs2, - not: not2, - rhs: rhs2, - }, - ) => { - *not1 == *not2 - && exprs_are_equivalent(lhs1, lhs2) - && rhs1 - .as_ref() - .zip(rhs2.as_ref()) - .map(|(list1, list2)| { - list1.len() == list2.len() - && list1 - .iter() - .zip(list2) - .all(|(e1, e2)| exprs_are_equivalent(e1, e2)) - }) - .unwrap_or(false) - } - // fall back to naive equality check - _ => expr1 == expr2, - } -} - /// SQL literal #[derive(Clone, Debug, PartialEq, Eq)] pub enum Literal { @@ -652,20 +521,6 @@ pub enum Literal { CurrentTimestamp, } -/// normalization for comparison of numeric literals -fn normalize_numeric_str(num_str: &str) -> Option { - if let Ok(value) = num_str.parse::() { - let canonical = if value.fract() == 0.0 { - format!("{}", value as i64) - } else { - format!("{}", value) - }; - Some(canonical) - } else { - None - } -} - impl Literal { /// Constructor pub fn from_ctime_kw(token: Token) -> Self { @@ -679,27 +534,6 @@ impl Literal { unreachable!() } } - - /// checks if two literal values are equivalent - fn is_equivalent(&self, other: &Self) -> bool { - match (self, other) { - (Self::Numeric(n1), Self::Numeric(n2)) => { - match (normalize_numeric_str(n1), normalize_numeric_str(n2)) { - (Some(canonical), Some(canonical2)) => canonical == canonical2, - _ => false, - } - } - // TODO: check for quoted == unquoted strings? - (Self::String(s1), Self::String(s2)) => s1.eq_ignore_ascii_case(s2), - (Self::Blob(b1), Self::Blob(b2)) => b1 == b2, - (Self::Keyword(k1), Self::Keyword(k2)) => k1.eq_ignore_ascii_case(k2), - (Self::Null, Self::Null) => true, - (Self::CurrentDate, Self::CurrentDate) => true, - (Self::CurrentTime, Self::CurrentTime) => true, - (Self::CurrentTimestamp, Self::CurrentTimestamp) => true, - _ => false, - } - } } /// Textual comparison operator in an expression @@ -2052,242 +1886,4 @@ mod test { fn name(s: &'static str) -> Name { Name(s.to_owned()) } - - #[test] - fn test_basic_addition_exprs_are_equivalent() { - use super::{Expr, Literal, Operator::*}; - let expr1 = Expr::Binary( - Box::new(Expr::Literal(Literal::Numeric("826".to_string()))), - Add, - Box::new(Expr::Literal(Literal::Numeric("389".to_string()))), - ); - let expr2 = Expr::Binary( - Box::new(Expr::Literal(Literal::Numeric("389".to_string()))), - Add, - Box::new(Expr::Literal(Literal::Numeric("826".to_string()))), - ); - assert!(super::exprs_are_equivalent(&expr1, &expr2)); - } - - #[test] - fn test_addition_expressions_equivalent_normalized() { - use super::{Expr, Literal, Operator::*}; - let expr1 = Expr::Binary( - Box::new(Expr::Literal(Literal::Numeric("123.0".to_string()))), - Add, - Box::new(Expr::Literal(Literal::Numeric("243".to_string()))), - ); - let expr2 = Expr::Binary( - Box::new(Expr::Literal(Literal::Numeric("243.0".to_string()))), - Add, - Box::new(Expr::Literal(Literal::Numeric("123".to_string()))), - ); - assert!(super::exprs_are_equivalent(&expr1, &expr2)); - } - - #[test] - fn test_subtraction_expressions_not_equivalent() { - use super::{Expr, Literal, Operator::*}; - let expr3 = Expr::Binary( - Box::new(Expr::Literal(Literal::Numeric("364".to_string()))), - Subtract, - Box::new(Expr::Literal(Literal::Numeric("22.0".to_string()))), - ); - let expr4 = Expr::Binary( - Box::new(Expr::Literal(Literal::Numeric("22.0".to_string()))), - Subtract, - Box::new(Expr::Literal(Literal::Numeric("364".to_string()))), - ); - assert!(!super::exprs_are_equivalent(&expr3, &expr4)); - } - - #[test] - fn test_subtraction_expressions_normalized() { - use super::{Expr, Literal, Operator::*}; - let expr3 = Expr::Binary( - Box::new(Expr::Literal(Literal::Numeric("66.0".to_string()))), - Subtract, - Box::new(Expr::Literal(Literal::Numeric("22".to_string()))), - ); - let expr4 = Expr::Binary( - Box::new(Expr::Literal(Literal::Numeric("66".to_string()))), - Subtract, - Box::new(Expr::Literal(Literal::Numeric("22.0".to_string()))), - ); - assert!(super::exprs_are_equivalent(&expr3, &expr4)); - } - - #[test] - fn test_expressions_equivalent_case_insensitive_functioncalls() { - use super::{Expr, Id}; - let func1 = Expr::FunctionCall { - name: Id("SUM".to_string()), - distinctness: None, - args: Some(vec![Expr::Id(Id("x".to_string()))]), - order_by: None, - filter_over: None, - }; - let func2 = Expr::FunctionCall { - name: Id("sum".to_string()), - distinctness: None, - args: Some(vec![Expr::Id(Id("x".to_string()))]), - order_by: None, - filter_over: None, - }; - assert!(super::exprs_are_equivalent(&func1, &func2)); - - let func3 = Expr::FunctionCall { - name: Id("SUM".to_string()), - distinctness: Some(super::Distinctness::Distinct), - args: Some(vec![Expr::Id(Id("x".to_string()))]), - order_by: None, - filter_over: None, - }; - assert!(!super::exprs_are_equivalent(&func1, &func3)); - } - - #[test] - fn test_expressions_equivalent_identical_fn_with_distinct() { - use super::{Expr, Id}; - let sum = Expr::FunctionCall { - name: Id("SUM".to_string()), - distinctness: None, - args: Some(vec![Expr::Id(Id("x".to_string()))]), - order_by: None, - filter_over: None, - }; - let sum_distinct = Expr::FunctionCall { - name: Id("SUM".to_string()), - distinctness: Some(super::Distinctness::Distinct), - args: Some(vec![Expr::Id(Id("x".to_string()))]), - order_by: None, - filter_over: None, - }; - assert!(!super::exprs_are_equivalent(&sum, &sum_distinct)); - } - - #[test] - fn test_expressions_equivalent_multiplicaiton() { - use super::{Expr, Literal, Operator::*}; - let expr1 = Expr::Binary( - Box::new(Expr::Literal(Literal::Numeric("42.0".to_string()))), - Multiply, - Box::new(Expr::Literal(Literal::Numeric("38".to_string()))), - ); - let expr2 = Expr::Binary( - Box::new(Expr::Literal(Literal::Numeric("38.0".to_string()))), - Multiply, - Box::new(Expr::Literal(Literal::Numeric("42".to_string()))), - ); - assert!(super::exprs_are_equivalent(&expr1, &expr2)); - } - - #[test] - fn test_expressions_both_parenthesized_equivalent() { - use super::{Expr, Literal, Operator::*}; - let expr1 = Expr::Parenthesized(vec![Expr::Binary( - Box::new(Expr::Literal(Literal::Numeric("683".to_string()))), - Add, - Box::new(Expr::Literal(Literal::Numeric("799.0".to_string()))), - )]); - let expr2 = Expr::Binary( - Box::new(Expr::Literal(Literal::Numeric("799".to_string()))), - Add, - Box::new(Expr::Literal(Literal::Numeric("683".to_string()))), - ); - assert!(super::exprs_are_equivalent(&expr1, &expr2)); - } - #[test] - fn test_expressions_parenthesized_equivalent() { - use super::{Expr, Literal, Operator::*}; - let expr7 = Expr::Parenthesized(vec![Expr::Binary( - Box::new(Expr::Literal(Literal::Numeric("6".to_string()))), - Add, - Box::new(Expr::Literal(Literal::Numeric("7".to_string()))), - )]); - let expr8 = Expr::Binary( - Box::new(Expr::Literal(Literal::Numeric("6".to_string()))), - Add, - Box::new(Expr::Literal(Literal::Numeric("7".to_string()))), - ); - assert!(super::exprs_are_equivalent(&expr7, &expr8)); - } - - #[test] - fn test_like_expressions_equivalent() { - use super::{Expr, Id, Literal}; - let expr1 = Expr::Like { - lhs: Box::new(Expr::Id(Id("name".to_string()))), - not: false, - op: super::LikeOperator::Like, - rhs: Box::new(Expr::Literal(Literal::String("%john%".to_string()))), - escape: Some(Box::new(Expr::Literal(Literal::String("\\".to_string())))), - }; - let expr2 = Expr::Like { - lhs: Box::new(Expr::Id(Id("name".to_string()))), - not: false, - op: super::LikeOperator::Like, - rhs: Box::new(Expr::Literal(Literal::String("%john%".to_string()))), - escape: Some(Box::new(Expr::Literal(Literal::String("\\".to_string())))), - }; - assert!(super::exprs_are_equivalent(&expr1, &expr2)); - } - - #[test] - fn test_expressions_equivalent_like_escaped() { - use super::{Expr, Id, Literal}; - let expr1 = Expr::Like { - lhs: Box::new(Expr::Id(Id("name".to_string()))), - not: false, - op: super::LikeOperator::Like, - rhs: Box::new(Expr::Literal(Literal::String("%john%".to_string()))), - escape: Some(Box::new(Expr::Literal(Literal::String("\\".to_string())))), - }; - let expr2 = Expr::Like { - lhs: Box::new(Expr::Id(Id("name".to_string()))), - not: false, - op: super::LikeOperator::Like, - rhs: Box::new(Expr::Literal(Literal::String("%john%".to_string()))), - escape: Some(Box::new(Expr::Literal(Literal::String("#".to_string())))), - }; - assert!(!super::exprs_are_equivalent(&expr1, &expr2)); - } - #[test] - fn test_expressions_equivalent_between() { - use super::{Expr, Id, Literal}; - let expr1 = Expr::Between { - lhs: Box::new(Expr::Id(Id("age".to_string()))), - not: false, - start: Box::new(Expr::Literal(Literal::Numeric("18".to_string()))), - end: Box::new(Expr::Literal(Literal::Numeric("65".to_string()))), - }; - let expr2 = Expr::Between { - lhs: Box::new(Expr::Id(Id("age".to_string()))), - not: false, - start: Box::new(Expr::Literal(Literal::Numeric("18".to_string()))), - end: Box::new(Expr::Literal(Literal::Numeric("65".to_string()))), - }; - assert!(super::exprs_are_equivalent(&expr1, &expr2)); - - // differing BETWEEN bounds - let expr3 = Expr::Between { - lhs: Box::new(Expr::Id(Id("age".to_string()))), - not: false, - start: Box::new(Expr::Literal(Literal::Numeric("20".to_string()))), - end: Box::new(Expr::Literal(Literal::Numeric("65".to_string()))), - }; - assert!(!super::exprs_are_equivalent(&expr1, &expr3)); - } - - #[test] - fn test_normalize_numeric_string() { - use super::normalize_numeric_str; - assert_eq!(normalize_numeric_str("001"), Some("1".to_string())); - assert_eq!(normalize_numeric_str("1.00"), Some("1".to_string())); - assert_eq!(normalize_numeric_str("0.010"), Some("0.01".to_string())); - assert_eq!(normalize_numeric_str("1e3"), Some("1000".to_string())); - assert_eq!(normalize_numeric_str("1.23e2"), Some("123".to_string())); - assert_eq!(normalize_numeric_str("invalid"), None); - assert_eq!(normalize_numeric_str(""), None); - } } From 1833dcb618bffb79a0f90c9ea557e52145ca5277 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Mon, 16 Dec 2024 20:14:04 -0500 Subject: [PATCH 032/144] Shrink shell help msg and replace hardcoded path for shell tests --- Makefile | 2 +- cli/app.rs | 9 +-------- core/util.rs | 10 ++++++++++ testing/shelltests.py | 2 +- 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/Makefile b/Makefile index ee2f15e0d..096c0a679 100644 --- a/Makefile +++ b/Makefile @@ -61,7 +61,7 @@ test: limbo test-compat test-sqlite3 test-shell .PHONY: test test-shell: limbo - ./testing/shelltests.py + SQLITE_EXEC=$(SQLITE_EXEC) ./testing/shelltests.py .PHONY: test-shell test-compat: diff --git a/cli/app.rs b/cli/app.rs index c9f388af6..cd3e21eb9 100644 --- a/cli/app.rs +++ b/cli/app.rs @@ -646,7 +646,6 @@ fn get_io(db: &str) -> anyhow::Result> { const HELP_MSG: &str = r#" Limbo SQL Shell Help ============== - Welcome to the Limbo SQL Shell! You can execute any standard SQL command here. In addition to standard SQL commands, the following special commands are available: @@ -689,12 +688,6 @@ Usage Examples: 8. Show the current values of settings: .show -9. Set the value 'NULL' to be displayed for null values instead of empty string: - .nullvalue "NULL" - Note: ------ - All SQL commands must end with a semicolon (;). -- Special commands do not require a semicolon. - -"#; +- Special commands do not require a semicolon."#; diff --git a/core/util.rs b/core/util.rs index 985bfaa28..4b8a7f43b 100644 --- a/core/util.rs +++ b/core/util.rs @@ -217,6 +217,8 @@ pub fn exprs_are_equivalent(expr1: &Expr, expr2: &Expr) -> bool { _ => false, } } + (Expr::NotNull(expr1), Expr::NotNull(expr2)) => exprs_are_equivalent(expr1, expr2), + (Expr::IsNull(expr1), Expr::IsNull(expr2)) => exprs_are_equivalent(expr1, expr2), (Expr::Literal(lit1), Expr::Literal(lit2)) => check_literal_equivalency(lit1, lit2), (Expr::Id(id1), Expr::Id(id2)) => check_ident_equivalency(&id1.0, &id2.0), (Expr::Unary(op1, expr1), Expr::Unary(op2, expr2)) => { @@ -233,6 +235,14 @@ pub fn exprs_are_equivalent(expr1: &Expr, expr2: &Expr) -> bool { (Expr::Parenthesized(exprs1), exprs2) | (exprs2, Expr::Parenthesized(exprs1)) => { exprs1.len() == 1 && exprs_are_equivalent(&exprs1[0], exprs2) } + (Expr::Qualified(tn1, cn1), Expr::Qualified(tn2, cn2)) => { + check_ident_equivalency(&tn1.0, &tn2.0) && check_ident_equivalency(&cn1.0, &cn2.0) + } + (Expr::DoublyQualified(sn1, tn1, cn1), Expr::DoublyQualified(sn2, tn2, cn2)) => { + check_ident_equivalency(&sn1.0, &sn2.0) + && check_ident_equivalency(&tn1.0, &tn2.0) + && check_ident_equivalency(&cn1.0, &cn2.0) + } ( Expr::InList { lhs: lhs1, diff --git a/testing/shelltests.py b/testing/shelltests.py index 22c9ed122..f36972e25 100755 --- a/testing/shelltests.py +++ b/testing/shelltests.py @@ -3,7 +3,7 @@ import subprocess # Configuration -sqlite_exec = "./target/debug/limbo" +sqlite_exec = os.getenv("SQLITE_EXEC", "./target/debug/limbo") cwd = os.getcwd() # Initial setup commands From 3252ab542bc0231483bff3a596f9b7fcc889e273 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=B6kmen=20G=C3=B6rgen?= Date: Tue, 17 Dec 2024 01:33:54 +0000 Subject: [PATCH 033/144] fix project name in txt files. --- bindings/python/requirements-dev.txt | 12 ++++++------ bindings/python/requirements.txt | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/bindings/python/requirements-dev.txt b/bindings/python/requirements-dev.txt index c3a985985..bfb954746 100644 --- a/bindings/python/requirements-dev.txt +++ b/bindings/python/requirements-dev.txt @@ -1,11 +1,11 @@ coverage==7.6.1 # via - # limbo (pyproject.toml) + # pylimbo (pyproject.toml) # pytest-cov iniconfig==2.0.0 # via pytest mypy==1.11.0 - # via limbo (pyproject.toml) + # via pylimbo (pyproject.toml) mypy-extensions==1.0.0 # via mypy packaging==24.2 @@ -14,13 +14,13 @@ pluggy==1.5.0 # via pytest pytest==8.3.1 # via - # limbo (pyproject.toml) + # pylimbo (pyproject.toml) # pytest-cov pytest-cov==5.0.0 - # via limbo (pyproject.toml) + # via pylimbo (pyproject.toml) ruff==0.5.4 - # via limbo (pyproject.toml) + # via pylimbo (pyproject.toml) typing-extensions==4.12.2 # via - # limbo (pyproject.toml) # mypy + # pylimbo (pyproject.toml) diff --git a/bindings/python/requirements.txt b/bindings/python/requirements.txt index d803b3772..c1348444d 100644 --- a/bindings/python/requirements.txt +++ b/bindings/python/requirements.txt @@ -1,2 +1,2 @@ typing-extensions==4.12.2 - # via limbo (pyproject.toml) + # via pylimbo (pyproject.toml) From 969ab244c259adc6990781f1426e0a941c237a42 Mon Sep 17 00:00:00 2001 From: Konstantinos Artopoulos Date: Wed, 18 Dec 2024 00:00:29 +0200 Subject: [PATCH 034/144] feat(cli): added .tables command --- cli/app.rs | 85 ++++++++++++++++++++++++++++++++++++--- testing/cmdlineshell.test | 9 +++++ 2 files changed, 89 insertions(+), 5 deletions(-) diff --git a/cli/app.rs b/cli/app.rs index 496a56b2a..c1a5fcd43 100644 --- a/cli/app.rs +++ b/cli/app.rs @@ -76,6 +76,8 @@ pub enum Command { NullValue, /// Toggle 'echo' mode to repeat commands before execution Echo, + /// Display tables + Tables, } impl Command { @@ -86,6 +88,7 @@ impl Command { | Self::Help | Self::Opcodes | Self::ShowInfo + | Self::Tables | Self::SetOutput => 0, Self::Open | Self::OutputMode | Self::Cwd | Self::Echo | Self::NullValue => 1, } + 1) // argv0 @@ -104,6 +107,7 @@ impl Command { Self::ShowInfo => ".show", Self::NullValue => ".nullvalue ", Self::Echo => ".echo on|off", + Self::Tables => ".tables", } } } @@ -116,6 +120,7 @@ impl FromStr for Command { ".open" => Ok(Self::Open), ".help" => Ok(Self::Help), ".schema" => Ok(Self::Schema), + ".tables" => Ok(Self::Tables), ".opcodes" => Ok(Self::Opcodes), ".mode" => Ok(Self::OutputMode), ".output" => Ok(Self::SetOutput), @@ -421,6 +426,12 @@ impl Limbo { let _ = self.writeln(e.to_string()); } } + Command::Tables => { + let pattern = args.get(1).copied(); + if let Err(e) = self.display_tables(pattern) { + let _ = self.writeln(e.to_string()); + } + } Command::Opcodes => { if args.len() > 1 { for op in &OPCODE_DESCRIPTIONS { @@ -621,6 +632,63 @@ impl Limbo { Ok(()) } + + fn display_tables(&mut self, pattern: Option<&str>) -> anyhow::Result<()> { + let sql = match pattern { + Some(pattern) => format!( + "SELECT name FROM sqlite_schema WHERE type='table' AND name NOT LIKE 'sqlite_%' AND name LIKE '{}' ORDER BY 1", + pattern + ), + None => String::from( + "SELECT name FROM sqlite_schema WHERE type='table' AND name NOT LIKE 'sqlite_%' ORDER BY 1" + ), + }; + + match self.conn.query(&sql) { + Ok(Some(ref mut rows)) => { + let mut tables = String::new(); + loop { + match rows.next_row()? { + RowResult::Row(row) => { + if let Some(Value::Text(table)) = row.values.first() { + tables.push_str(table); + tables.push(' '); + } + } + RowResult::IO => { + self.io.run_once()?; + } + RowResult::Done => break, + } + } + + if tables.len() > 0 { + let _ = self.writeln(tables.trim_end()); + } else { + if let Some(pattern) = pattern { + let _ = self.write_fmt(format_args!( + "Error: Tables with pattern '{}' not found.", + pattern + )); + } else { + let _ = self.writeln("No tables found in the database."); + } + } + } + Ok(None) => { + let _ = self.writeln("No results returned from the query."); + } + Err(err) => { + if err.to_string().contains("no such table: sqlite_schema") { + return Err(anyhow::anyhow!("Unable to access database schema. The database may be using an older SQLite version or may not be properly initialized.")); + } else { + return Err(anyhow::anyhow!("Error querying schema: {}", err)); + } + } + } + + Ok(()) + } } fn get_writer(output: &str) -> Box { @@ -656,6 +724,7 @@ Special Commands: .open Open and connect to a database file. .output Change the output mode. Available modes are 'raw' and 'pretty'. .schema Show the schema of the specified table. +.tables List names of tables matching LIKE pattern TABLE .opcodes Display all the opcodes defined by the virtual machine .cd Change the current working directory. .nullvalue Set the value to be displayed for null values. @@ -673,21 +742,27 @@ Usage Examples: 3. To view the schema of a table named 'employees': .schema employees -4. To list all available SQL opcodes: +4. To list all tables: + .tables + +5. To list all available SQL opcodes: .opcodes -5. To change the current output mode to 'pretty': +6. To change the current output mode to 'pretty': .mode pretty -6. Send output to STDOUT if no file is specified: +7. Send output to STDOUT if no file is specified: .output -7. To change the current working directory to '/tmp': +8. To change the current working directory to '/tmp': .cd /tmp -8. Show the current values of settings: +9. Show the current values of settings: .show +10. Set the value 'NULL' to be displayed for null values instead of empty string: + .nullvalue "NULL" + Note: - All SQL commands must end with a semicolon (;). - Special commands do not require a semicolon."#; diff --git a/testing/cmdlineshell.test b/testing/cmdlineshell.test index bb4183bcd..1112e0aaa 100755 --- a/testing/cmdlineshell.test +++ b/testing/cmdlineshell.test @@ -95,3 +95,12 @@ do_execsql_test_on_specific_db testing/testing.db schema-2 { # name TEXT, # price REAL # );"} + +# FIXME sqlite uses multicolumn output mode for display resulting in different spacing +# do_execsql_test_on_specific_db testing/testing.db schema-1 { +# .tables +# } {"products users"} + +do_execsql_test_on_specific_db testing/testing.db schema-1 { + .tables us% +} {"users"} From ba676b6eadb8ea6b9e95650e8f1b23c8010d83bd Mon Sep 17 00:00:00 2001 From: Konstantinos Artopoulos Date: Wed, 18 Dec 2024 00:13:42 +0200 Subject: [PATCH 035/144] fix: remove old help menu item that sneaked in on rebase --- cli/app.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/cli/app.rs b/cli/app.rs index c1a5fcd43..a6bc003a9 100644 --- a/cli/app.rs +++ b/cli/app.rs @@ -760,9 +760,6 @@ Usage Examples: 9. Show the current values of settings: .show -10. Set the value 'NULL' to be displayed for null values instead of empty string: - .nullvalue "NULL" - Note: - All SQL commands must end with a semicolon (;). - Special commands do not require a semicolon."#; From 7d4d803a13a3f473d3c1fa509c4226a11baefc3e Mon Sep 17 00:00:00 2001 From: alpaylan Date: Tue, 17 Dec 2024 18:24:39 -0500 Subject: [PATCH 036/144] implement interaction plans --- simulator/generation.rs | 32 +++- simulator/generation/plan.rs | 339 ++++++++++++++++++++++++++++++++++ simulator/generation/table.rs | 2 +- simulator/main.rs | 117 +++++------- simulator/model/table.rs | 2 +- 5 files changed, 414 insertions(+), 78 deletions(-) create mode 100644 simulator/generation/plan.rs diff --git a/simulator/generation.rs b/simulator/generation.rs index ece3a2c3b..26f42f6d6 100644 --- a/simulator/generation.rs +++ b/simulator/generation.rs @@ -2,6 +2,7 @@ use anarchist_readable_name_generator_lib::readable_name_custom; use rand::Rng; pub mod query; +pub mod plan; pub mod table; pub trait Arbitrary { @@ -12,10 +13,39 @@ pub trait ArbitraryFrom { fn arbitrary_from(rng: &mut R, t: &T) -> Self; } +pub(crate) fn frequency<'a, T, R: rand::Rng>(choices: Vec<(usize, Box T + 'a>)>, rng: &mut R) -> T { + let total = choices.iter().map(|(weight, _)| weight).sum::(); + let mut choice = rng.gen_range(0..total); + + for (weight, f) in choices { + if choice < weight { + return f(rng); + } + choice -= weight; + } + + unreachable!() +} + +pub(crate) fn one_of(choices: Vec T>>, rng: &mut R) -> T { + let index = rng.gen_range(0..choices.len()); + choices[index](rng) +} + +pub(crate) fn pick<'a, T, R: rand::Rng>(choices: &'a Vec, rng: &mut R) -> &'a T { + let index = rng.gen_range(0..choices.len()); + &choices[index] +} + +pub(crate) fn pick_index(choices: usize, rng: &mut R) -> usize { + rng.gen_range(0..choices) +} + fn gen_random_text(rng: &mut T) -> String { let big_text = rng.gen_ratio(1, 1000); if big_text { - let max_size: u64 = 2 * 1024 * 1024 * 1024; + // let max_size: u64 = 2 * 1024 * 1024 * 1024; + let max_size: u64 = 2 * 1024; // todo: change this back to 2 * 1024 * 1024 * 1024 let size = rng.gen_range(1024..max_size); let mut name = String::new(); for i in 0..size { diff --git a/simulator/generation/plan.rs b/simulator/generation/plan.rs new file mode 100644 index 000000000..6e5e6d951 --- /dev/null +++ b/simulator/generation/plan.rs @@ -0,0 +1,339 @@ +use std::{f32::consts::E, fmt::Display, os::macos::raw::stat, rc::Rc}; + +use limbo_core::{Connection, Result, RowResult}; +use rand::SeedableRng; +use rand_chacha::ChaCha8Rng; + +use crate::{ + model::{ + query::{Create, Insert, Predicate, Query, Select}, + table::Value, + }, + SimulatorEnv, SimulatorOpts, +}; + +use crate::generation::{frequency, Arbitrary, ArbitraryFrom}; + +use super::{pick, pick_index}; + +pub(crate) type ResultSet = Vec>; + +pub(crate) struct InteractionPlan { + pub(crate) plan: Vec, + pub(crate) stack: Vec, +} + +impl Display for InteractionPlan { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + for interaction in &self.plan { + match interaction { + Interaction::Query(query) => write!(f, "{};\n", query)?, + Interaction::Assertion(assertion) => write!(f, "-- ASSERT: {};\n", assertion.message)?, + } + } + + Ok(()) + } +} + +#[derive(Debug)] +pub(crate) struct InteractionStats { + pub(crate) read_count: usize, + pub(crate) write_count: usize, + pub(crate) delete_count: usize, +} + +impl Display for InteractionStats { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "Read: {}, Write: {}, Delete: {}", + self.read_count, self.write_count, self.delete_count + ) + } +} + +pub(crate) enum Interaction { + Query(Query), + Assertion(Assertion), +} + +impl Display for Interaction { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Interaction::Query(query) => write!(f, "{}", query), + Interaction::Assertion(assertion) => write!(f, "ASSERT: {}", assertion.message), + } + } +} + +pub(crate) struct Assertion { + pub(crate) func: Box) -> bool>, + pub(crate) message: String, +} + +pub(crate) struct Interactions(Vec); + +impl Interactions { + pub(crate) fn shadow(&self, env: &mut SimulatorEnv) { + for interaction in &self.0 { + match interaction { + Interaction::Query(query) => match query { + Query::Create(create) => { + env.tables.push(create.table.clone()); + } + Query::Insert(insert) => { + let table = env + .tables + .iter_mut() + .find(|t| t.name == insert.table) + .unwrap(); + table.rows.push(insert.values.clone()); + } + Query::Delete(_) => todo!(), + Query::Select(_) => {} + }, + Interaction::Assertion(_) => {} + } + } + } +} + +impl InteractionPlan { + pub(crate) fn new() -> Self { + InteractionPlan { + plan: Vec::new(), + stack: Vec::new(), + } + } + + pub(crate) fn push(&mut self, interaction: Interaction) { + self.plan.push(interaction); + } + + pub(crate) fn stats(&self) -> InteractionStats { + let mut read = 0; + let mut write = 0; + let mut delete = 0; + + for interaction in &self.plan { + match interaction { + Interaction::Query(query) => match query { + Query::Select(_) => read += 1, + Query::Insert(_) => write += 1, + Query::Delete(_) => delete += 1, + Query::Create(_) => {} + }, + Interaction::Assertion(_) => {} + } + } + + InteractionStats { + read_count: read, + write_count: write, + delete_count: delete, + } + } +} + +impl ArbitraryFrom for InteractionPlan { + fn arbitrary_from(rng: &mut R, env: &SimulatorEnv) -> Self { + let mut plan = InteractionPlan::new(); + + let mut env = SimulatorEnv { + opts: env.opts.clone(), + tables: vec![], + connections: vec![], + io: env.io.clone(), + db: env.db.clone(), + rng: ChaCha8Rng::seed_from_u64(rng.next_u64()), + }; + + let num_interactions = rng.gen_range(0..env.opts.max_interactions); + + // First create at least one table + let create_query = Create::arbitrary(rng); + env.tables.push(create_query.table.clone()); + plan.push(Interaction::Query(Query::Create(create_query))); + + while plan.plan.len() < num_interactions { + log::debug!( + "Generating interaction {}/{}", + plan.plan.len(), + num_interactions + ); + let interactions = Interactions::arbitrary_from(rng, &(&env, plan.stats())); + interactions.shadow(&mut env); + + plan.plan.extend(interactions.0.into_iter()); + } + + log::info!("Generated plan with {} interactions", plan.plan.len()); + plan + } +} + +impl Interaction { + pub(crate) fn execute_query(&self, conn: &mut Rc) -> Result { + match self { + Interaction::Query(query) => { + let query_str = query.to_string(); + let rows = conn.query(&query_str); + if rows.is_err() { + let err = rows.err(); + log::error!( + "Error running query '{}': {:?}", + &query_str[0..query_str.len().min(4096)], + err + ); + return Err(err.unwrap()); + } + let rows = rows.unwrap(); + assert!(rows.is_some()); + let mut rows = rows.unwrap(); + let mut out = Vec::new(); + while let Ok(row) = rows.next_row() { + match row { + RowResult::Row(row) => { + let mut r = Vec::new(); + for el in &row.values { + let v = match el { + limbo_core::Value::Null => Value::Null, + limbo_core::Value::Integer(i) => Value::Integer(*i), + limbo_core::Value::Float(f) => Value::Float(*f), + limbo_core::Value::Text(t) => Value::Text(t.to_string()), + limbo_core::Value::Blob(b) => Value::Blob(b.to_vec()), + }; + r.push(v); + } + + out.push(r); + } + RowResult::IO => {} + RowResult::Done => { + break; + } + } + } + + Ok(out) + } + Interaction::Assertion(_) => { + unreachable!("unexpected: this function should only be called on queries") + } + } + } + + pub(crate) fn execute_assertion(&self, stack: &Vec) -> Result<()> { + match self { + Interaction::Query(_) => { + unreachable!("unexpected: this function should only be called on assertions") + } + Interaction::Assertion(assertion) => { + if !assertion.func.as_ref()(stack) { + return Err(limbo_core::LimboError::InternalError( + assertion.message.clone(), + )); + } + Ok(()) + } + } + } +} + +fn property_insert_select(rng: &mut R, env: &SimulatorEnv) -> Interactions { + // Get a random table + let table = pick(&env.tables, rng); + // Pick a random column + let column_index = pick_index(table.columns.len(), rng); + let column = &table.columns[column_index].clone(); + // Generate a random value of the column type + let value = Value::arbitrary_from(rng, &column.column_type); + // Create a whole new row + let mut row = Vec::new(); + for (i, column) in table.columns.iter().enumerate() { + if i == column_index { + row.push(value.clone()); + } else { + let value = Value::arbitrary_from(rng, &column.column_type); + row.push(value); + } + } + // Insert the row + let insert_query = Interaction::Query(Query::Insert(Insert { + table: table.name.clone(), + values: row.clone(), + })); + + // Select the row + let select_query = Interaction::Query(Query::Select(Select { + table: table.name.clone(), + predicate: Predicate::Eq(column.name.clone(), value.clone()), + })); + + // Check that the row is there + let assertion = Interaction::Assertion(Assertion { + message: format!( + "row [{:?}] not found in table {} after inserting ({} = {})", + row.iter().map(|v| v.to_string()).collect::>(), + table.name, + column.name, + value, + ), + func: Box::new(move |stack: &Vec| { + let rows = stack.last().unwrap(); + rows.iter().any(|r| r == &row) + }), + }); + + Interactions(vec![insert_query, select_query, assertion]) +} + +fn create_table(rng: &mut R, env: &SimulatorEnv) -> Interactions { + let create_query = Interaction::Query(Query::Create(Create::arbitrary(rng))); + Interactions(vec![create_query]) +} + +fn random_read(rng: &mut R, env: &SimulatorEnv) -> Interactions { + let select_query = Interaction::Query(Query::Select(Select::arbitrary_from(rng, &env.tables))); + Interactions(vec![select_query]) +} + +fn random_write(rng: &mut R, env: &SimulatorEnv) -> Interactions { + let table = pick(&env.tables, rng); + let insert_query = Interaction::Query(Query::Insert(Insert::arbitrary_from(rng, table))); + Interactions(vec![insert_query]) +} + +impl ArbitraryFrom<(&SimulatorEnv, InteractionStats)> for Interactions { + fn arbitrary_from( + rng: &mut R, + (env, stats): &(&SimulatorEnv, InteractionStats), + ) -> Self { + let remaining_read = + ((((env.opts.max_interactions * env.opts.read_percent) as f64) / 100.0) as usize) + .saturating_sub(stats.read_count); + let remaining_write = ((((env.opts.max_interactions * env.opts.write_percent) as f64) + / 100.0) as usize) + .saturating_sub(stats.write_count); + + frequency( + vec![ + ( + usize::min(remaining_read, remaining_write), + Box::new(|rng: &mut R| property_insert_select(rng, env)), + ), + ( + remaining_read, + Box::new(|rng: &mut R| random_read(rng, env)), + ), + ( + remaining_write, + Box::new(|rng: &mut R| random_write(rng, env)), + ), + (1, Box::new(|rng: &mut R| create_table(rng, env))), + ], + rng, + ) + } +} diff --git a/simulator/generation/table.rs b/simulator/generation/table.rs index 8d5d70e98..685171207 100644 --- a/simulator/generation/table.rs +++ b/simulator/generation/table.rs @@ -13,7 +13,7 @@ impl Arbitrary for Name { impl Arbitrary for Table { fn arbitrary(rng: &mut R) -> Self { let name = Name::arbitrary(rng).0; - let columns = (1..rng.gen_range(1..128)) + let columns = (1..=rng.gen_range(1..10)) .map(|_| Column::arbitrary(rng)) .collect(); Table { diff --git a/simulator/main.rs b/simulator/main.rs index 11c171b25..c7ddc9bce 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -1,4 +1,5 @@ -use generation::{Arbitrary, ArbitraryFrom}; +use generation::plan::{Interaction, ResultSet}; +use generation::{pick, pick_index, Arbitrary, ArbitraryFrom}; use limbo_core::{Connection, Database, File, OpenFlags, PlatformIO, Result, RowResult, IO}; use model::query::{Insert, Predicate, Query, Select}; use model::table::{Column, Name, Table, Value}; @@ -6,6 +7,7 @@ use properties::{property_insert_select, property_select_all}; use rand::prelude::*; use rand_chacha::ChaCha8Rng; use std::cell::RefCell; +use std::io::Write; use std::rc::Rc; use std::sync::Arc; use tempfile::TempDir; @@ -29,13 +31,7 @@ enum SimConnection { Disconnected, } -#[derive(Debug, Copy, Clone)] -enum SimulatorMode { - Random, - Workload, -} - -#[derive(Debug)] +#[derive(Debug, Clone)] struct SimulatorOpts { ticks: usize, max_connections: usize, @@ -45,7 +41,7 @@ struct SimulatorOpts { read_percent: usize, write_percent: usize, delete_percent: usize, - mode: SimulatorMode, + max_interactions: usize, page_size: usize, } @@ -77,8 +73,8 @@ fn main() { read_percent, write_percent, delete_percent, - mode: SimulatorMode::Workload, page_size: 4096, // TODO: randomize this too + max_interactions: rng.gen_range(0..10000), }; let io = Arc::new(SimulatorIO::new(seed, opts.page_size).unwrap()); @@ -104,10 +100,20 @@ fn main() { println!("Initial opts {:?}", env.opts); - for _ in 0..env.opts.ticks { - let connection_index = env.rng.gen_range(0..env.opts.max_connections); + log::info!("Generating database interaction plan..."); + let mut plan = generation::plan::InteractionPlan::arbitrary_from(&mut env.rng.clone(), &env); + + log::info!("{}", plan.stats()); + + for interaction in &plan.plan { + let connection_index = pick_index(env.connections.len(), &mut env.rng); let mut connection = env.connections[connection_index].clone(); + if matches!(connection, SimConnection::Disconnected) { + connection = SimConnection::Connected(env.db.connect()); + env.connections[connection_index] = connection.clone(); + } + match &mut connection { SimConnection::Connected(conn) => { let disconnect = env.rng.gen_ratio(1, 100); @@ -116,10 +122,20 @@ fn main() { let _ = conn.close(); env.connections[connection_index] = SimConnection::Disconnected; } else { - match process_connection(&mut env, conn) { - Ok(_) => {} + match process_connection(conn, interaction, &mut plan.stack) { + Ok(_) => { + log::info!("connection {} processed", connection_index); + } Err(err) => { log::error!("error {}", err); + log::debug!("db is at {:?}", path); + // save the interaction plan + let mut path = TempDir::new().unwrap().into_path(); + path.push("simulator.plan"); + let mut f = std::fs::File::create(path.clone()).unwrap(); + f.write(plan.to_string().as_bytes()).unwrap(); + log::debug!("plan saved at {:?}", path); + log::debug!("seed was {}", seed); break; } } @@ -130,73 +146,24 @@ fn main() { env.connections[connection_index] = SimConnection::Connected(env.db.connect()); } } + + } + env.io.print_stats(); } -fn process_connection(env: &mut SimulatorEnv, conn: &mut Rc) -> Result<()> { - if env.tables.is_empty() { - maybe_add_table(env, conn)?; - } - - match env.opts.mode { - SimulatorMode::Random => { - match env.rng.gen_range(0..2) { - // Randomly insert a value and check that the select result contains it. - 0 => property_insert_select(env, conn), - // Check that the current state of the in-memory table is the same as the one in the - // database. - 1 => property_select_all(env, conn), - // Perform a random query, update the in-memory table with the result. - 2 => { - let table_index = env.rng.gen_range(0..env.tables.len()); - let query = Query::arbitrary_from(&mut env.rng, &env.tables[table_index]); - let rows = get_all_rows(env, conn, query.to_string().as_str())?; - env.tables[table_index].rows = rows; - } - _ => unreachable!(), - } +fn process_connection(conn: &mut Rc, interaction: &Interaction, stack: &mut Vec) -> Result<()> { + match interaction { + generation::plan::Interaction::Query(_) => { + log::debug!("{}", interaction); + let results = interaction.execute_query(conn)?; + log::debug!("{:?}", results); + stack.push(results); } - SimulatorMode::Workload => { - let picked = env.rng.gen_range(0..100); - - if env.rng.gen_ratio(1, 100) { - maybe_add_table(env, conn)?; - } - - if picked < env.opts.read_percent { - let query = Select::arbitrary_from(&mut env.rng, &env.tables); - let _ = get_all_rows(env, conn, Query::Select(query).to_string().as_str())?; - } else if picked < env.opts.read_percent + env.opts.write_percent { - let table_index = env.rng.gen_range(0..env.tables.len()); - let column_index = env.rng.gen_range(0..env.tables[table_index].columns.len()); - let column = &env.tables[table_index].columns[column_index].clone(); - let mut rng = env.rng.clone(); - let value = Value::arbitrary_from(&mut rng, &column.column_type); - let mut row = Vec::new(); - for (i, column) in env.tables[table_index].columns.iter().enumerate() { - if i == column_index { - row.push(value.clone()); - } else { - let value = Value::arbitrary_from(&mut rng, &column.column_type); - row.push(value); - } - } - let query = Query::Insert(Insert { - table: env.tables[table_index].name.clone(), - values: row.clone(), - }); - let _ = get_all_rows(env, conn, query.to_string().as_str())?; - env.tables[table_index].rows.push(row.clone()); - } else { - let table_index = env.rng.gen_range(0..env.tables.len()); - let query = Query::Select(Select { - table: env.tables[table_index].name.clone(), - predicate: Predicate::And(Vec::new()), - }); - let _ = get_all_rows(env, conn, query.to_string().as_str())?; - } + generation::plan::Interaction::Assertion(_) => { + interaction.execute_assertion(stack)?; } } diff --git a/simulator/model/table.rs b/simulator/model/table.rs index bc018e132..93c3d6d74 100644 --- a/simulator/model/table.rs +++ b/simulator/model/table.rs @@ -10,7 +10,7 @@ impl Deref for Name { } } -#[derive(Debug)] +#[derive(Debug, Clone)] pub(crate) struct Table { pub(crate) rows: Vec>, pub(crate) name: String, From 66e7a4edecade4eb6f5970256ed5debeaf71af49 Mon Sep 17 00:00:00 2001 From: alpaylan Date: Tue, 17 Dec 2024 18:30:55 -0500 Subject: [PATCH 037/144] fix formatting --- simulator/generation.rs | 7 +++++-- simulator/generation/plan.rs | 4 +++- simulator/main.rs | 9 +++++---- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/simulator/generation.rs b/simulator/generation.rs index 26f42f6d6..15b5845ec 100644 --- a/simulator/generation.rs +++ b/simulator/generation.rs @@ -1,8 +1,8 @@ use anarchist_readable_name_generator_lib::readable_name_custom; use rand::Rng; -pub mod query; pub mod plan; +pub mod query; pub mod table; pub trait Arbitrary { @@ -13,7 +13,10 @@ pub trait ArbitraryFrom { fn arbitrary_from(rng: &mut R, t: &T) -> Self; } -pub(crate) fn frequency<'a, T, R: rand::Rng>(choices: Vec<(usize, Box T + 'a>)>, rng: &mut R) -> T { +pub(crate) fn frequency<'a, T, R: rand::Rng>( + choices: Vec<(usize, Box T + 'a>)>, + rng: &mut R, +) -> T { let total = choices.iter().map(|(weight, _)| weight).sum::(); let mut choice = rng.gen_range(0..total); diff --git a/simulator/generation/plan.rs b/simulator/generation/plan.rs index 6e5e6d951..d7c309b93 100644 --- a/simulator/generation/plan.rs +++ b/simulator/generation/plan.rs @@ -28,7 +28,9 @@ impl Display for InteractionPlan { for interaction in &self.plan { match interaction { Interaction::Query(query) => write!(f, "{};\n", query)?, - Interaction::Assertion(assertion) => write!(f, "-- ASSERT: {};\n", assertion.message)?, + Interaction::Assertion(assertion) => { + write!(f, "-- ASSERT: {};\n", assertion.message)? + } } } diff --git a/simulator/main.rs b/simulator/main.rs index c7ddc9bce..0f745e7e8 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -146,15 +146,16 @@ fn main() { env.connections[connection_index] = SimConnection::Connected(env.db.connect()); } } - - } - env.io.print_stats(); } -fn process_connection(conn: &mut Rc, interaction: &Interaction, stack: &mut Vec) -> Result<()> { +fn process_connection( + conn: &mut Rc, + interaction: &Interaction, + stack: &mut Vec, +) -> Result<()> { match interaction { generation::plan::Interaction::Query(_) => { log::debug!("{}", interaction); From fb2908b3e9456c65872af5fb57e901a01c1b8a78 Mon Sep 17 00:00:00 2001 From: Konstantinos Artopoulos Date: Wed, 18 Dec 2024 09:10:37 +0200 Subject: [PATCH 038/144] refactor(testing): move .table tests to shelltests.py --- testing/cmdlineshell.test | 4 ---- testing/shelltests.py | 5 +++++ 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/testing/cmdlineshell.test b/testing/cmdlineshell.test index 1112e0aaa..73c6377ec 100755 --- a/testing/cmdlineshell.test +++ b/testing/cmdlineshell.test @@ -100,7 +100,3 @@ do_execsql_test_on_specific_db testing/testing.db schema-2 { # do_execsql_test_on_specific_db testing/testing.db schema-1 { # .tables # } {"products users"} - -do_execsql_test_on_specific_db testing/testing.db schema-1 { - .tables us% -} {"users"} diff --git a/testing/shelltests.py b/testing/shelltests.py index f36972e25..6b1dab121 100755 --- a/testing/shelltests.py +++ b/testing/shelltests.py @@ -182,6 +182,11 @@ def write_to_pipe(line): CWD: {cwd}/testing Echo: off""", ) + +do_execshell_test(pipe, "test-show-tables", ".tables", "products users") + +do_execshell_test(pipe, "test-show-tables-with-pattern", ".tables us%", "users") + # test we can set the null value write_to_pipe(".open :memory:") From 89f5167315f99720fe009faa45251ad802b8ccb2 Mon Sep 17 00:00:00 2001 From: Kacper Madej Date: Wed, 18 Dec 2024 15:53:52 +0100 Subject: [PATCH 039/144] Fix CI build --- .github/workflows/rust.yml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 915c938ab..0c1d8e4f9 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -27,6 +27,10 @@ jobs: steps: - uses: actions/checkout@v3 - uses: Swatinem/rust-cache@v2 + - name: Set up Python 3.10 + uses: actions/setup-python@v5 + with: + python-version: "3.10" - name: Build run: cargo build --verbose - name: Test @@ -53,6 +57,8 @@ jobs: test-limbo: runs-on: ubuntu-latest steps: + - name: Install sqlite + run: sudo apt update && sudo apt install -y sqlite3 libsqlite3-dev - name: Install cargo-c env: LINK: https://github.com/lu-zero/cargo-c/releases/download/v0.10.7 @@ -68,13 +74,7 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - - name: Install - run: | - set -euo pipefail - mkdir download && cd download - # apt contains an old version of sqlite3 that does not support e.g. string_agg(), so we download 3.46.0 manually - wget https://www.sqlite.org/2024/sqlite-tools-linux-x64-3460000.zip - unzip sqlite-tools-linux-x64-3460000.zip - + - name: Install sqlite + run: sudo apt update && sudo apt install -y sqlite3 libsqlite3-dev - name: Test - run: SQLITE_EXEC="$(pwd)/download/sqlite3" make test-compat + run: SQLITE_EXEC="sqlite3" make test-compat From a6d1a7cb56ca134271144865c1f87cb4c41c966e Mon Sep 17 00:00:00 2001 From: Raul Ferrando Date: Mon, 16 Dec 2024 18:01:10 +0100 Subject: [PATCH 040/144] pragma: inital approach to handle pragma statements This change refactors how PRAGMA statements are handled, introducing a more organized and extensible structure to simplify adding new PRAGMA properties in the future. Previously, only the `cache_size` PRAGMA was supported. With this update, support for the `journal_mode` PRAGMA has been added. --- core/translate/mod.rs | 144 +++++++++++------- testing/pragma.test | 4 + vendored/sqlite3-parser/src/parser/ast/mod.rs | 23 ++- 3 files changed, 118 insertions(+), 53 deletions(-) diff --git a/core/translate/mod.rs b/core/translate/mod.rs index ef06e1467..593886cf9 100644 --- a/core/translate/mod.rs +++ b/core/translate/mod.rs @@ -18,6 +18,7 @@ pub(crate) mod select; use std::cell::RefCell; use std::fmt::Display; use std::rc::{Rc, Weak}; +use std::str::FromStr; use crate::schema::Schema; use crate::storage::pager::Pager; @@ -26,8 +27,8 @@ use crate::vdbe::{builder::ProgramBuilder, Insn, Program}; use crate::{bail_parse_error, Connection, Result}; use insert::translate_insert; use select::translate_select; -use sqlite3_parser::ast; use sqlite3_parser::ast::fmt::ToTokens; +use sqlite3_parser::ast::{self, PragmaName}; /// Translate SQL statement into bytecode program. pub fn translate( @@ -305,38 +306,17 @@ fn translate_pragma( let mut write = false; match body { None => { - let pragma_result = program.alloc_register(); - - program.emit_insn(Insn::Integer { - value: database_header.borrow().default_cache_size.into(), - dest: pragma_result, - }); - - let pragma_result_end = program.next_free_register(); - program.emit_insn(Insn::ResultRow { - start_reg: pragma_result, - count: pragma_result_end - pragma_result, - }); + let pragma_name = &name.name.0; + query_pragma(pragma_name, database_header.clone(), &mut program); } Some(ast::PragmaBody::Equals(value)) => { - let value_to_update = match value { - ast::Expr::Literal(ast::Literal::Numeric(numeric_value)) => { - numeric_value.parse::().unwrap() - } - ast::Expr::Unary(ast::UnaryOperator::Negative, expr) => match *expr { - ast::Expr::Literal(ast::Literal::Numeric(numeric_value)) => { - -numeric_value.parse::().unwrap() - } - _ => 0, - }, - _ => 0, - }; write = true; update_pragma( &name.name.0, - value_to_update, + value, database_header.clone(), pager, + &mut program, ); } Some(ast::PragmaBody::Call(_)) => { @@ -357,36 +337,96 @@ fn translate_pragma( Ok(program.build(database_header, connection)) } -fn update_pragma(name: &str, value: i64, header: Rc>, pager: Rc) { - match name { - "cache_size" => { - let mut cache_size_unformatted = value; - let mut cache_size = if cache_size_unformatted < 0 { - let kb = cache_size_unformatted.abs() * 1024; - kb / 512 // assume 512 page size for now - } else { - value - } as usize; - if cache_size < MIN_PAGE_CACHE_SIZE { - // update both in memory and stored disk value - cache_size = MIN_PAGE_CACHE_SIZE; - cache_size_unformatted = MIN_PAGE_CACHE_SIZE as i64; - } - - // update in-memory header - header.borrow_mut().default_cache_size = cache_size_unformatted - .try_into() - .unwrap_or_else(|_| panic!("invalid value, too big for a i32 {}", value)); +fn update_pragma( + name: &str, + value: ast::Expr, + header: Rc>, + pager: Rc, + program: &mut ProgramBuilder, +) { + let pragma = PragmaName::from_str(name).expect("provided pragma not valid"); + match pragma { + PragmaName::CacheSize => { + let cache_size = match value { + ast::Expr::Literal(ast::Literal::Numeric(numeric_value)) => { + numeric_value.parse::().unwrap() + } + ast::Expr::Unary(ast::UnaryOperator::Negative, expr) => match *expr { + ast::Expr::Literal(ast::Literal::Numeric(numeric_value)) => { + -numeric_value.parse::().unwrap() + } + _ => 0, + }, + _ => 0, + }; - // update in disk - let header_copy = header.borrow().clone(); - pager.write_database_header(&header_copy); + update_cache_size(cache_size, header, pager); + } + PragmaName::JournalMode => { + query_pragma("journal_mode", header, program); + } + _ => todo!("pragma `{name}`"), + } +} - // update cache size - pager.change_page_cache_size(cache_size); +fn query_pragma( + name: &str, + database_header: Rc>, + program: &mut ProgramBuilder, +) { + let pragma = PragmaName::from_str(name).expect("provided pragma not valid"); + let register = program.alloc_register(); + match pragma { + PragmaName::CacheSize => { + program.emit_insn(Insn::Integer { + value: database_header.borrow().default_cache_size.into(), + dest: register, + }); } - _ => todo!(), + PragmaName::JournalMode => { + program.emit_insn(Insn::String8 { + value: "wal".into(), + dest: register, + }); + } + _ => { + todo!("pragma `{name}`"); + } + } + + let next_register = program.next_free_register(); + program.emit_insn(Insn::ResultRow { + start_reg: register, + count: next_register - register, + }); +} + +fn update_cache_size(value: i64, header: Rc>, pager: Rc) { + let mut cache_size_unformatted: i64 = value; + let mut cache_size = if cache_size_unformatted < 0 { + let kb = cache_size_unformatted.abs() * 1024; + kb / 512 // assume 512 page size for now + } else { + value + } as usize; + + if cache_size < MIN_PAGE_CACHE_SIZE { + // update both in memory and stored disk value + cache_size = MIN_PAGE_CACHE_SIZE; + cache_size_unformatted = MIN_PAGE_CACHE_SIZE as i64; } + + // update in-memory header + header.borrow_mut().default_cache_size = cache_size_unformatted + .try_into() + .unwrap_or_else(|_| panic!("invalid value, too big for a i32 {}", value)); + + // update in disk + let header_copy = header.borrow().clone(); + pager.write_database_header(&header_copy); + + // update cache size + pager.change_page_cache_size(cache_size); } struct TableFormatter<'a> { diff --git a/testing/pragma.test b/testing/pragma.test index 74b7ad339..ce6b2996c 100755 --- a/testing/pragma.test +++ b/testing/pragma.test @@ -6,3 +6,7 @@ source $testdir/tester.tcl do_execsql_test pragma-cache-size { PRAGMA cache_size } {-2000} + +do_execsql_test pragma-update-journal-mode-wal { + PRAGMA journal_mode=WAL +} {wal} diff --git a/vendored/sqlite3-parser/src/parser/ast/mod.rs b/vendored/sqlite3-parser/src/parser/ast/mod.rs index 4ff8746f3..7ee4d58f3 100644 --- a/vendored/sqlite3-parser/src/parser/ast/mod.rs +++ b/vendored/sqlite3-parser/src/parser/ast/mod.rs @@ -1572,11 +1572,32 @@ pub enum PragmaBody { /// function call Call(PragmaValue), } - /// `PRAGMA` value // https://sqlite.org/syntax/pragma-value.html pub type PragmaValue = Expr; // TODO +/// `PRAGMA` value +// https://sqlite.org/pragma.html +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum PragmaName { + /// `cache_size` pragma + CacheSize, + /// `journal_mode` pragma + JournalMode, +} + +impl FromStr for PragmaName { + type Err = (); + + fn from_str(input: &str) -> Result { + match input { + "cache_size" => Ok(PragmaName::CacheSize), + "journal_mode" => Ok(PragmaName::JournalMode), + _ => Err(()), + } + } +} + /// `CREATE TRIGGER` time #[derive(Copy, Clone, Debug, PartialEq, Eq)] pub enum TriggerTime { From d74012bb59542894a06fc8396013fc4d10e7d3b4 Mon Sep 17 00:00:00 2001 From: Raul Ferrando Date: Wed, 18 Dec 2024 17:10:13 +0100 Subject: [PATCH 041/144] fix pragma parsing potential errors --- core/translate/mod.rs | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/core/translate/mod.rs b/core/translate/mod.rs index 593886cf9..2e5d86141 100644 --- a/core/translate/mod.rs +++ b/core/translate/mod.rs @@ -307,7 +307,7 @@ fn translate_pragma( match body { None => { let pragma_name = &name.name.0; - query_pragma(pragma_name, database_header.clone(), &mut program); + query_pragma(pragma_name, database_header.clone(), &mut program)?; } Some(ast::PragmaBody::Equals(value)) => { write = true; @@ -317,7 +317,7 @@ fn translate_pragma( database_header.clone(), pager, &mut program, - ); + )?; } Some(ast::PragmaBody::Call(_)) => { todo!() @@ -343,8 +343,11 @@ fn update_pragma( header: Rc>, pager: Rc, program: &mut ProgramBuilder, -) { - let pragma = PragmaName::from_str(name).expect("provided pragma not valid"); +) -> Result<()> { + let pragma = match PragmaName::from_str(name) { + Ok(pragma) => pragma, + Err(()) => bail_parse_error!("Not a valid pragma name"), + }; match pragma { PragmaName::CacheSize => { let cache_size = match value { @@ -355,15 +358,16 @@ fn update_pragma( ast::Expr::Literal(ast::Literal::Numeric(numeric_value)) => { -numeric_value.parse::().unwrap() } - _ => 0, + _ => bail_parse_error!("Not a valid value"), }, - _ => 0, + _ => bail_parse_error!("Not a valid value"), }; - update_cache_size(cache_size, header, pager); + Ok(()) } PragmaName::JournalMode => { - query_pragma("journal_mode", header, program); + query_pragma("journal_mode", header, program)?; + Ok(()) } _ => todo!("pragma `{name}`"), } @@ -373,8 +377,11 @@ fn query_pragma( name: &str, database_header: Rc>, program: &mut ProgramBuilder, -) { - let pragma = PragmaName::from_str(name).expect("provided pragma not valid"); +) -> Result<()> { + let pragma = match PragmaName::from_str(name) { + Ok(pragma) => pragma, + Err(()) => bail_parse_error!("Not a valid pragma name"), + }; let register = program.alloc_register(); match pragma { PragmaName::CacheSize => { @@ -394,11 +401,11 @@ fn query_pragma( } } - let next_register = program.next_free_register(); program.emit_insn(Insn::ResultRow { start_reg: register, - count: next_register - register, + count: 1, }); + Ok(()) } fn update_cache_size(value: i64, header: Rc>, pager: Rc) { From 9e01c22a5e35636eddbd885b3e1c421a72b13950 Mon Sep 17 00:00:00 2001 From: Kacper Madej Date: Tue, 17 Dec 2024 22:35:05 +0100 Subject: [PATCH 042/144] Handle quoting identifiers properly --- core/translate/planner.rs | 32 +++++++++++++++++++------------- core/util.rs | 28 ++++++++++++++++++++++------ testing/join.test | 6 ++++++ testing/select.test | 8 ++++++++ 4 files changed, 55 insertions(+), 19 deletions(-) diff --git a/core/translate/planner.rs b/core/translate/planner.rs index 30a125e9e..14757e00a 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -97,12 +97,13 @@ fn bind_column_references( return Ok(()); } let mut match_result = None; + let normalized_id = normalize_ident(id.0.as_str()); for (tbl_idx, table) in referenced_tables.iter().enumerate() { let col_idx = table .table .columns .iter() - .position(|c| c.name.eq_ignore_ascii_case(&id.0)); + .position(|c| c.name.eq_ignore_ascii_case(&normalized_id)); if col_idx.is_some() { if match_result.is_some() { crate::bail_parse_error!("Column {} is ambiguous", id.0); @@ -124,20 +125,23 @@ fn bind_column_references( Ok(()) } ast::Expr::Qualified(tbl, id) => { - let matching_tbl_idx = referenced_tables - .iter() - .position(|t| t.table_identifier.eq_ignore_ascii_case(&tbl.0)); + let normalized_table_name = normalize_ident(tbl.0.as_str()); + let matching_tbl_idx = referenced_tables.iter().position(|t| { + t.table_identifier + .eq_ignore_ascii_case(&normalized_table_name) + }); if matching_tbl_idx.is_none() { - crate::bail_parse_error!("Table {} not found", tbl.0); + crate::bail_parse_error!("Table {} not found", normalized_table_name); } let tbl_idx = matching_tbl_idx.unwrap(); + let normalized_id = normalize_ident(id.0.as_str()); let col_idx = referenced_tables[tbl_idx] .table .columns .iter() - .position(|c| c.name.eq_ignore_ascii_case(&id.0)); + .position(|c| c.name.eq_ignore_ascii_case(&normalized_id)); if col_idx.is_none() { - crate::bail_parse_error!("Column {} not found", id.0); + crate::bail_parse_error!("Column {} not found", normalized_id); } let col = referenced_tables[tbl_idx] .table @@ -504,8 +508,9 @@ fn parse_from( let first_table = match *from.select.unwrap() { ast::SelectTable::Table(qualified_name, maybe_alias, _) => { - let Some(table) = schema.get_table(&qualified_name.name.0) else { - crate::bail_parse_error!("Table {} not found", qualified_name.name.0); + let normalized_qualified_name = normalize_ident(qualified_name.name.0.as_str()); + let Some(table) = schema.get_table(&normalized_qualified_name) else { + crate::bail_parse_error!("Table {} not found", normalized_qualified_name); }; let alias = maybe_alias .map(|a| match a { @@ -516,7 +521,7 @@ fn parse_from( BTreeTableReference { table: table.clone(), - table_identifier: alias.unwrap_or(qualified_name.name.0), + table_identifier: alias.unwrap_or(normalized_qualified_name), table_index: 0, } } @@ -570,8 +575,9 @@ fn parse_join( let table = match table { ast::SelectTable::Table(qualified_name, maybe_alias, _) => { - let Some(table) = schema.get_table(&qualified_name.name.0) else { - crate::bail_parse_error!("Table {} not found", qualified_name.name.0); + let normalized_name = normalize_ident(qualified_name.name.0.as_str()); + let Some(table) = schema.get_table(&normalized_name) else { + crate::bail_parse_error!("Table {} not found", normalized_name); }; let alias = maybe_alias .map(|a| match a { @@ -581,7 +587,7 @@ fn parse_join( .map(|a| a.0); BTreeTableReference { table: table.clone(), - table_identifier: alias.unwrap_or(qualified_name.name.0), + table_identifier: alias.unwrap_or(normalized_name), table_index, } } diff --git a/core/util.rs b/core/util.rs index 4b8a7f43b..24cbdb656 100644 --- a/core/util.rs +++ b/core/util.rs @@ -7,12 +7,19 @@ use crate::{ Result, RowResult, Rows, IO, }; -pub fn normalize_ident(ident: &str) -> String { - (if ident.starts_with('"') && ident.ends_with('"') { - &ident[1..ident.len() - 1] +// https://sqlite.org/lang_keywords.html +const QUOTE_PAIRS: &[(char, char)] = &[('"', '"'), ('[', ']'), ('`', '`')]; + +pub fn normalize_ident(identifier: &str) -> String { + let quote_pair = QUOTE_PAIRS + .iter() + .find(|&(start, end)| identifier.starts_with(*start) && identifier.ends_with(*end)); + + if let Some(&(start, end)) = quote_pair { + &identifier[1..identifier.len() - 1] } else { - ident - }) + identifier + } .to_lowercase() } @@ -65,7 +72,6 @@ fn cmp_numeric_strings(num_str: &str, other: &str) -> bool { } } -const QUOTE_PAIRS: &[(char, char)] = &[('"', '"'), ('[', ']'), ('`', '`')]; pub fn check_ident_equivalency(ident1: &str, ident2: &str) -> bool { fn strip_quotes(identifier: &str) -> &str { for &(start, end) in QUOTE_PAIRS { @@ -276,7 +282,17 @@ pub fn exprs_are_equivalent(expr1: &Expr, expr2: &Expr) -> bool { #[cfg(test)] pub mod tests { + use super::*; use sqlite3_parser::ast::{self, Expr, Id, Literal, Operator::*, Type}; + + #[test] + fn test_normalize_ident() { + assert_eq!(normalize_ident("foo"), "foo"); + assert_eq!(normalize_ident("`foo`"), "foo"); + assert_eq!(normalize_ident("[foo]"), "foo"); + assert_eq!(normalize_ident("\"foo\""), "foo"); + } + #[test] fn test_basic_addition_exprs_are_equivalent() { let expr1 = Expr::Binary( diff --git a/testing/join.test b/testing/join.test index 63283582e..7ebcdd6c5 100755 --- a/testing/join.test +++ b/testing/join.test @@ -240,6 +240,12 @@ do_execsql_test join-using-multiple { Cindy|Salazar|cap Tommy|Perry|shirt"} +do_execsql_test join-using-multiple-with-quoting { + select u.first_name, u.last_name, p.name from users u join users u2 using(id) join [products] p using(`id`) limit 3; +} {"Jamie|Foster|hat +Cindy|Salazar|cap +Tommy|Perry|shirt"} + # NATURAL JOIN desugars to JOIN USING (common_column1, common_column2...) do_execsql_test join-using { select * from users natural join products limit 3; diff --git a/testing/select.test b/testing/select.test index f4c3b9232..ff730b44b 100755 --- a/testing/select.test +++ b/testing/select.test @@ -51,6 +51,14 @@ do_execsql_test table-star-2 { select p.*, u.first_name from users u join products p on u.id = p.id limit 1; } {1|hat|79.0|Jamie} +do_execsql_test select_with_quoting { + select `users`.id from [users] where users.[id] = 5; +} {5} + +do_execsql_test select_with_quoting_2 { + select "users".`id` from users where `users`.[id] = 5; +} {5} + do_execsql_test seekrowid { select * from users u where u.id = 5; } {"5|Edward|Miller|christiankramer@example.com|725-281-1033|08522 English Plain|Lake Keith|ID|23283|15"} From 69e3dd28f77e59927da4313e517b2b428ede480d Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Wed, 18 Dec 2024 20:44:33 +0200 Subject: [PATCH 043/144] Limbo 0.0.10 --- CHANGELOG.md | 2 +- Cargo.lock | 14 +++++++------- Cargo.toml | 2 +- bindings/wasm/package.json | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ad084b4e7..555a013ad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # Changelog -## Unreleased +## 0.0.10 - 2024-12-18 ### Added diff --git a/Cargo.lock b/Cargo.lock index 6df496fd3..6fd0e8319 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -386,7 +386,7 @@ checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] name = "core_tester" -version = "0.0.9" +version = "0.0.10" dependencies = [ "anyhow", "clap", @@ -1100,7 +1100,7 @@ dependencies = [ [[package]] name = "limbo" -version = "0.0.9" +version = "0.0.10" dependencies = [ "anyhow", "clap", @@ -1114,7 +1114,7 @@ dependencies = [ [[package]] name = "limbo-wasm" -version = "0.0.9" +version = "0.0.10" dependencies = [ "console_error_panic_hook", "js-sys", @@ -1124,7 +1124,7 @@ dependencies = [ [[package]] name = "limbo_core" -version = "0.0.9" +version = "0.0.10" dependencies = [ "bumpalo", "cfg_block", @@ -1160,7 +1160,7 @@ dependencies = [ [[package]] name = "limbo_sim" -version = "0.0.9" +version = "0.0.10" dependencies = [ "anarchist-readable-name-generator-lib", "env_logger 0.10.2", @@ -1173,7 +1173,7 @@ dependencies = [ [[package]] name = "limbo_sqlite3" -version = "0.0.9" +version = "0.0.10" dependencies = [ "env_logger 0.11.5", "limbo_core", @@ -1616,7 +1616,7 @@ dependencies = [ [[package]] name = "py-limbo" -version = "0.0.9" +version = "0.0.10" dependencies = [ "anyhow", "limbo_core", diff --git a/Cargo.toml b/Cargo.toml index a9dccf37a..f09011a3a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,7 @@ members = [ exclude = ["perf/latency/limbo"] [workspace.package] -version = "0.0.9" +version = "0.0.10" authors = ["the Limbo authors"] edition = "2021" license = "MIT" diff --git a/bindings/wasm/package.json b/bindings/wasm/package.json index 18b398647..b9784ce19 100644 --- a/bindings/wasm/package.json +++ b/bindings/wasm/package.json @@ -3,7 +3,7 @@ "collaborators": [ "the Limbo authors" ], - "version": "0.0.9", + "version": "0.0.10", "license": "MIT", "repository": { "type": "git", From 682f014817e774b3943754e0dbdcb5bfea5beffd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=B6kmen=20G=C3=B6rgen?= Date: Wed, 18 Dec 2024 21:14:58 +0000 Subject: [PATCH 044/144] add maturin as dev dependency. --- bindings/python/pyproject.toml | 9 ++++++++- bindings/python/requirements-dev.txt | 2 ++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/bindings/python/pyproject.toml b/bindings/python/pyproject.toml index afd6b7122..3dd269b0e 100644 --- a/bindings/python/pyproject.toml +++ b/bindings/python/pyproject.toml @@ -30,7 +30,14 @@ dependencies = ['typing-extensions >=4.6.0,!=4.7.0'] dynamic = ['readme', 'version'] [project.optional-dependencies] -dev = ["mypy==1.11.0", "pytest==8.3.1", "pytest-cov==5.0.0", "ruff==0.5.4", "coverage==7.6.1"] +dev = [ + "mypy==1.11.0", + "pytest==8.3.1", + "pytest-cov==5.0.0", + "ruff==0.5.4", + "coverage==7.6.1", + "maturin==1.7.8", +] [project.urls] Homepage = "https://github.com/penberg/limbo" diff --git a/bindings/python/requirements-dev.txt b/bindings/python/requirements-dev.txt index bfb954746..5ec1981cc 100644 --- a/bindings/python/requirements-dev.txt +++ b/bindings/python/requirements-dev.txt @@ -4,6 +4,8 @@ coverage==7.6.1 # pytest-cov iniconfig==2.0.0 # via pytest +maturin==1.7.8 + # via pylimbo (pyproject.toml) mypy==1.11.0 # via pylimbo (pyproject.toml) mypy-extensions==1.0.0 From 39b5dbed5538be7b2c1d9e4cdee82fe04f8b0509 Mon Sep 17 00:00:00 2001 From: alpaylan Date: Wed, 18 Dec 2024 17:09:44 -0500 Subject: [PATCH 045/144] change gen.range based queries into frequency and one_of calls --- simulator/generation.rs | 2 +- simulator/generation/query.rs | 132 ++++++++++++++++++++-------------- simulator/generation/table.rs | 30 ++++---- simulator/main.rs | 11 ++- simulator/model/query.rs | 15 +++- simulator/model/table.rs | 30 ++------ 6 files changed, 122 insertions(+), 98 deletions(-) diff --git a/simulator/generation.rs b/simulator/generation.rs index 15b5845ec..d1006a953 100644 --- a/simulator/generation.rs +++ b/simulator/generation.rs @@ -30,7 +30,7 @@ pub(crate) fn frequency<'a, T, R: rand::Rng>( unreachable!() } -pub(crate) fn one_of(choices: Vec T>>, rng: &mut R) -> T { +pub(crate) fn one_of<'a, T, R: rand::Rng>(choices: Vec T + 'a>>, rng: &mut R) -> T { let index = rng.gen_range(0..choices.len()); choices[index](rng) } diff --git a/simulator/generation/query.rs b/simulator/generation/query.rs index 748409972..9589944ab 100644 --- a/simulator/generation/query.rs +++ b/simulator/generation/query.rs @@ -1,10 +1,12 @@ use crate::generation::table::{GTValue, LTValue}; -use crate::generation::{Arbitrary, ArbitraryFrom}; +use crate::generation::{one_of, Arbitrary, ArbitraryFrom}; use crate::model::query::{Create, Delete, Insert, Predicate, Query, Select}; use crate::model::table::{Table, Value}; use rand::Rng; +use super::{frequency, pick}; + impl Arbitrary for Create { fn arbitrary(rng: &mut R) -> Self { Create { @@ -15,20 +17,20 @@ impl Arbitrary for Create { impl ArbitraryFrom> for Select { fn arbitrary_from(rng: &mut R, tables: &Vec
) -> Self { - let table = rng.gen_range(0..tables.len()); + let table = pick(tables, rng); Select { - table: tables[table].name.clone(), - predicate: Predicate::arbitrary_from(rng, &tables[table]), + table: table.name.clone(), + predicate: Predicate::arbitrary_from(rng, table), } } } impl ArbitraryFrom> for Select { fn arbitrary_from(rng: &mut R, tables: &Vec<&Table>) -> Self { - let table = rng.gen_range(0..tables.len()); + let table = pick(tables, rng); Select { - table: tables[table].name.clone(), - predicate: Predicate::arbitrary_from(rng, tables[table]), + table: table.name.clone(), + predicate: Predicate::arbitrary_from(rng, *table), } } } @@ -58,15 +60,24 @@ impl ArbitraryFrom
for Delete { impl ArbitraryFrom
for Query { fn arbitrary_from(rng: &mut R, table: &Table) -> Self { - match rng.gen_range(0..=200) { - 0 => Query::Create(Create::arbitrary(rng)), - 1..=100 => Query::Select(Select::arbitrary_from(rng, &vec![table])), - 101..=200 => Query::Insert(Insert::arbitrary_from(rng, table)), - // todo: This branch is currently never taken, as DELETE is not yet implemented. - // Change this when DELETE is implemented. - 201..=300 => Query::Delete(Delete::arbitrary_from(rng, table)), - _ => unreachable!(), - } + frequency( + vec![ + (1, Box::new(|rng| Query::Create(Create::arbitrary(rng)))), + ( + 100, + Box::new(|rng| Query::Select(Select::arbitrary_from(rng, &vec![table]))), + ), + ( + 100, + Box::new(|rng| Query::Insert(Insert::arbitrary_from(rng, table))), + ), + ( + 0, + Box::new(|rng| Query::Delete(Delete::arbitrary_from(rng, table))), + ), + ], + rng, + ) } } @@ -84,35 +95,53 @@ impl ArbitraryFrom<(&Table, bool)> for SimplePredicate { .map(|r| &r[column_index]) .collect::>(); // Pick an operator - let operator = match rng.gen_range(0..3) { - 0 => { - if *predicate_value { - Predicate::Eq( - column.name.clone(), - Value::arbitrary_from(rng, &column_values), - ) - } else { - Predicate::Eq( - column.name.clone(), - Value::arbitrary_from(rng, &column.column_type), - ) - } - } - 1 => Predicate::Gt( - column.name.clone(), - match predicate_value { - true => GTValue::arbitrary_from(rng, &column_values).0, - false => LTValue::arbitrary_from(rng, &column_values).0, - }, + let operator = match predicate_value { + true => one_of( + vec![ + Box::new(|rng| { + Predicate::Eq( + column.name.clone(), + Value::arbitrary_from(rng, &column_values), + ) + }), + Box::new(|rng| { + Predicate::Gt( + column.name.clone(), + GTValue::arbitrary_from(rng, &column_values).0, + ) + }), + Box::new(|rng| { + Predicate::Lt( + column.name.clone(), + LTValue::arbitrary_from(rng, &column_values).0, + ) + }), + ], + rng, ), - 2 => Predicate::Lt( - column.name.clone(), - match predicate_value { - true => LTValue::arbitrary_from(rng, &column_values).0, - false => GTValue::arbitrary_from(rng, &column_values).0, - }, + false => one_of( + vec![ + Box::new(|rng| { + Predicate::Neq( + column.name.clone(), + Value::arbitrary_from(rng, &column.column_type), + ) + }), + Box::new(|rng| { + Predicate::Gt( + column.name.clone(), + LTValue::arbitrary_from(rng, &column_values).0, + ) + }), + Box::new(|rng| { + Predicate::Lt( + column.name.clone(), + GTValue::arbitrary_from(rng, &column_values).0, + ) + }), + ], + rng, ), - _ => unreachable!(), }; SimplePredicate(operator) @@ -191,17 +220,10 @@ impl ArbitraryFrom
for Predicate { impl ArbitraryFrom<(&str, &Value)> for Predicate { fn arbitrary_from(rng: &mut R, (column_name, value): &(&str, &Value)) -> Self { - match rng.gen_range(0..3) { - 0 => Predicate::Eq(column_name.to_string(), (*value).clone()), - 1 => Predicate::Gt( - column_name.to_string(), - LTValue::arbitrary_from(rng, *value).0, - ), - 2 => Predicate::Lt( - column_name.to_string(), - LTValue::arbitrary_from(rng, *value).0, - ), - _ => unreachable!(), - } + one_of(vec![ + Box::new(|rng| Predicate::Eq(column_name.to_string(), (*value).clone())), + Box::new(|rng| Predicate::Gt(column_name.to_string(), GTValue::arbitrary_from(rng, *value).0)), + Box::new(|rng| Predicate::Lt(column_name.to_string(), LTValue::arbitrary_from(rng, *value).0)), + ], rng) } } diff --git a/simulator/generation/table.rs b/simulator/generation/table.rs index 685171207..46b6b0df6 100644 --- a/simulator/generation/table.rs +++ b/simulator/generation/table.rs @@ -1,6 +1,6 @@ use rand::Rng; -use crate::generation::{gen_random_text, readable_name_custom, Arbitrary, ArbitraryFrom}; +use crate::generation::{pick_index, gen_random_text, pick, readable_name_custom, Arbitrary, ArbitraryFrom}; use crate::model::table::{Column, ColumnType, Name, Table, Value}; impl Arbitrary for Name { @@ -13,7 +13,7 @@ impl Arbitrary for Name { impl Arbitrary for Table { fn arbitrary(rng: &mut R) -> Self { let name = Name::arbitrary(rng).0; - let columns = (1..=rng.gen_range(1..10)) + let columns = (1..=rng.gen_range(1..5)) .map(|_| Column::arbitrary(rng)) .collect(); Table { @@ -39,13 +39,16 @@ impl Arbitrary for Column { impl Arbitrary for ColumnType { fn arbitrary(rng: &mut R) -> Self { - match rng.gen_range(0..4) { - 0 => ColumnType::Integer, - 1 => ColumnType::Float, - 2 => ColumnType::Text, - 3 => ColumnType::Blob, - _ => unreachable!(), - } + pick( + &vec![ + ColumnType::Integer, + ColumnType::Float, + ColumnType::Text, + ColumnType::Blob, + ], + rng, + ) + .to_owned() } } @@ -55,8 +58,7 @@ impl ArbitraryFrom> for Value { return Value::Null; } - let index = rng.gen_range(0..values.len()); - values[index].clone() + pick(values, rng).to_owned().clone() } } @@ -78,8 +80,8 @@ impl ArbitraryFrom> for LTValue { if values.is_empty() { return LTValue(Value::Null); } - - let index = rng.gen_range(0..values.len()); + + let index = pick_index(values.len(), rng); LTValue::arbitrary_from(rng, values[index]) } } @@ -139,7 +141,7 @@ impl ArbitraryFrom> for GTValue { return GTValue(Value::Null); } - let index = rng.gen_range(0..values.len()); + let index = pick_index(values.len(), rng); GTValue::arbitrary_from(rng, values[index]) } } diff --git a/simulator/main.rs b/simulator/main.rs index 0f745e7e8..00b415ce9 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -1,7 +1,7 @@ use generation::plan::{Interaction, ResultSet}; use generation::{pick, pick_index, Arbitrary, ArbitraryFrom}; use limbo_core::{Connection, Database, File, OpenFlags, PlatformIO, Result, RowResult, IO}; -use model::query::{Insert, Predicate, Query, Select}; +use model::query::{Create, Insert, Predicate, Query, Select}; use model::table::{Column, Name, Table, Value}; use properties::{property_insert_select, property_select_all}; use rand::prelude::*; @@ -189,7 +189,12 @@ fn maybe_add_table(env: &mut SimulatorEnv, conn: &mut Rc) -> Result< .map(|_| Column::arbitrary(&mut env.rng)) .collect(), }; - let rows = get_all_rows(env, conn, table.to_create_str().as_str())?; + let query = Query::Create(Create { table: table.clone() }); + let rows = get_all_rows( + env, + conn, + query.to_string().as_str(), + )?; log::debug!("{:?}", rows); let rows = get_all_rows( env, @@ -207,7 +212,7 @@ fn maybe_add_table(env: &mut SimulatorEnv, conn: &mut Rc) -> Result< _ => unreachable!(), }; assert!( - *as_text != table.to_create_str(), + *as_text != query.to_string(), "table was not inserted correctly" ); env.tables.push(table); diff --git a/simulator/model/query.rs b/simulator/model/query.rs index 20058aead..ce227a252 100644 --- a/simulator/model/query.rs +++ b/simulator/model/query.rs @@ -7,6 +7,7 @@ pub(crate) enum Predicate { And(Vec), // p1 AND p2 AND p3... AND pn Or(Vec), // p1 OR p2 OR p3... OR pn Eq(String, Value), // column = Value + Neq(String, Value), // column != Value Gt(String, Value), // column > Value Lt(String, Value), // column < Value } @@ -44,6 +45,7 @@ impl Display for Predicate { } } Predicate::Eq(name, value) => write!(f, "{} = {}", name, value), + Predicate::Neq(name, value) => write!(f, "{} != {}", name, value), Predicate::Gt(name, value) => write!(f, "{} > {}", name, value), Predicate::Lt(name, value) => write!(f, "{} < {}", name, value), } @@ -85,7 +87,18 @@ pub(crate) struct Delete { impl Display for Query { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Query::Create(Create { table }) => write!(f, "{}", table.to_create_str()), + Query::Create(Create { table }) => { + write!(f, "CREATE TABLE {} (", table.name)?; + + for (i, column) in table.columns.iter().enumerate() { + if i != 0 { + write!(f, ",")?; + } + write!(f, "{} {}", column.name, column.column_type)?; + } + + write!(f, ")") + }, Query::Select(Select { table, predicate: guard, diff --git a/simulator/model/table.rs b/simulator/model/table.rs index 93c3d6d74..ccc18f738 100644 --- a/simulator/model/table.rs +++ b/simulator/model/table.rs @@ -17,24 +17,6 @@ pub(crate) struct Table { pub(crate) columns: Vec, } -impl Table { - pub fn to_create_str(&self) -> String { - let mut out = String::new(); - - out.push_str(format!("CREATE TABLE {} (", self.name).as_str()); - - assert!(!self.columns.is_empty()); - for column in &self.columns { - out.push_str(format!("{} {},", column.name, column.column_type.as_str()).as_str()); - } - // remove last comma - out.pop(); - - out.push_str(");"); - out - } -} - #[derive(Debug, Clone)] pub(crate) struct Column { pub(crate) name: String, @@ -51,13 +33,13 @@ pub(crate) enum ColumnType { Blob, } -impl ColumnType { - pub fn as_str(&self) -> &str { +impl Display for ColumnType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - ColumnType::Integer => "INTEGER", - ColumnType::Float => "FLOAT", - ColumnType::Text => "TEXT", - ColumnType::Blob => "BLOB", + ColumnType::Integer => write!(f, "INTEGER"), + ColumnType::Float => write!(f, "REAL"), + ColumnType::Text => write!(f, "TEXT"), + ColumnType::Blob => write!(f, "BLOB"), } } } From cb20ca7e40be25a5be5bf01b209929af0bb3fc64 Mon Sep 17 00:00:00 2001 From: alpaylan Date: Wed, 18 Dec 2024 17:10:18 -0500 Subject: [PATCH 046/144] fix formatting --- simulator/generation.rs | 5 ++++- simulator/generation/query.rs | 23 ++++++++++++++++++----- simulator/generation/table.rs | 6 ++++-- simulator/main.rs | 10 ++++------ simulator/model/query.rs | 2 +- 5 files changed, 31 insertions(+), 15 deletions(-) diff --git a/simulator/generation.rs b/simulator/generation.rs index d1006a953..07a93492b 100644 --- a/simulator/generation.rs +++ b/simulator/generation.rs @@ -30,7 +30,10 @@ pub(crate) fn frequency<'a, T, R: rand::Rng>( unreachable!() } -pub(crate) fn one_of<'a, T, R: rand::Rng>(choices: Vec T + 'a>>, rng: &mut R) -> T { +pub(crate) fn one_of<'a, T, R: rand::Rng>( + choices: Vec T + 'a>>, + rng: &mut R, +) -> T { let index = rng.gen_range(0..choices.len()); choices[index](rng) } diff --git a/simulator/generation/query.rs b/simulator/generation/query.rs index 9589944ab..ca6926650 100644 --- a/simulator/generation/query.rs +++ b/simulator/generation/query.rs @@ -220,10 +220,23 @@ impl ArbitraryFrom
for Predicate { impl ArbitraryFrom<(&str, &Value)> for Predicate { fn arbitrary_from(rng: &mut R, (column_name, value): &(&str, &Value)) -> Self { - one_of(vec![ - Box::new(|rng| Predicate::Eq(column_name.to_string(), (*value).clone())), - Box::new(|rng| Predicate::Gt(column_name.to_string(), GTValue::arbitrary_from(rng, *value).0)), - Box::new(|rng| Predicate::Lt(column_name.to_string(), LTValue::arbitrary_from(rng, *value).0)), - ], rng) + one_of( + vec![ + Box::new(|rng| Predicate::Eq(column_name.to_string(), (*value).clone())), + Box::new(|rng| { + Predicate::Gt( + column_name.to_string(), + GTValue::arbitrary_from(rng, *value).0, + ) + }), + Box::new(|rng| { + Predicate::Lt( + column_name.to_string(), + LTValue::arbitrary_from(rng, *value).0, + ) + }), + ], + rng, + ) } } diff --git a/simulator/generation/table.rs b/simulator/generation/table.rs index 46b6b0df6..9af2d7d8e 100644 --- a/simulator/generation/table.rs +++ b/simulator/generation/table.rs @@ -1,6 +1,8 @@ use rand::Rng; -use crate::generation::{pick_index, gen_random_text, pick, readable_name_custom, Arbitrary, ArbitraryFrom}; +use crate::generation::{ + gen_random_text, pick, pick_index, readable_name_custom, Arbitrary, ArbitraryFrom, +}; use crate::model::table::{Column, ColumnType, Name, Table, Value}; impl Arbitrary for Name { @@ -80,7 +82,7 @@ impl ArbitraryFrom> for LTValue { if values.is_empty() { return LTValue(Value::Null); } - + let index = pick_index(values.len(), rng); LTValue::arbitrary_from(rng, values[index]) } diff --git a/simulator/main.rs b/simulator/main.rs index 00b415ce9..67d9b92f9 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -189,12 +189,10 @@ fn maybe_add_table(env: &mut SimulatorEnv, conn: &mut Rc) -> Result< .map(|_| Column::arbitrary(&mut env.rng)) .collect(), }; - let query = Query::Create(Create { table: table.clone() }); - let rows = get_all_rows( - env, - conn, - query.to_string().as_str(), - )?; + let query = Query::Create(Create { + table: table.clone(), + }); + let rows = get_all_rows(env, conn, query.to_string().as_str())?; log::debug!("{:?}", rows); let rows = get_all_rows( env, diff --git a/simulator/model/query.rs b/simulator/model/query.rs index ce227a252..eeec68d08 100644 --- a/simulator/model/query.rs +++ b/simulator/model/query.rs @@ -98,7 +98,7 @@ impl Display for Query { } write!(f, ")") - }, + } Query::Select(Select { table, predicate: guard, From ab306e95503b859e7b2a19f0ab82340b5763dc19 Mon Sep 17 00:00:00 2001 From: mag1c1an1 Date: Thu, 19 Dec 2024 17:33:06 +0800 Subject: [PATCH 047/144] Fix issue #499 (add crate-type in libmo_sqlite3) Signed-off-by: mag1c1an1 --- sqlite3/Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/sqlite3/Cargo.toml b/sqlite3/Cargo.toml index bb0bebbfc..bd54ad781 100644 --- a/sqlite3/Cargo.toml +++ b/sqlite3/Cargo.toml @@ -17,6 +17,7 @@ dist = true [lib] doc = false +crate-type = ["lib", "cdylib", "staticlib"] [dependencies] env_logger = { version = "0.11.3", default-features = false } From e93ac38e551f8527c084b177e802ed5ba301f2f2 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 19 Dec 2024 11:41:07 +0200 Subject: [PATCH 048/144] Add statement interruption support This adds an interrupt() method to Statement that allows apps to interrupt a running statement. Please note that this is different from `sqlite3_interrupt()` which interrupts all ongoing operations in a database. Although we want to support that too, per statement interrupt is much more useful to apps. --- bindings/python/src/lib.rs | 6 ++++++ bindings/wasm/lib.rs | 5 ++++- cli/app.rs | 4 ++++ core/benches/benchmark.rs | 9 +++++++++ core/lib.rs | 6 ++++++ core/util.rs | 1 + core/vdbe/mod.rs | 14 ++++++++++++++ simulator/main.rs | 3 +++ sqlite3/src/lib.rs | 2 ++ test/src/lib.rs | 7 +++++++ 10 files changed, 56 insertions(+), 1 deletion(-) diff --git a/bindings/python/src/lib.rs b/bindings/python/src/lib.rs index 89ce26603..aca225304 100644 --- a/bindings/python/src/lib.rs +++ b/bindings/python/src/lib.rs @@ -134,6 +134,9 @@ impl Cursor { PyErr::new::(format!("IO error: {:?}", e)) })?; } + limbo_core::RowResult::Interrupt => { + return Ok(None); + } limbo_core::RowResult::Done => { return Ok(None); } @@ -165,6 +168,9 @@ impl Cursor { PyErr::new::(format!("IO error: {:?}", e)) })?; } + limbo_core::RowResult::Interrupt => { + return Ok(results); + } limbo_core::RowResult::Done => { return Ok(results); } diff --git a/bindings/wasm/lib.rs b/bindings/wasm/lib.rs index c456cb617..f4c02a8e0 100644 --- a/bindings/wasm/lib.rs +++ b/bindings/wasm/lib.rs @@ -83,7 +83,9 @@ impl Statement { } JsValue::from(row_array) } - Ok(limbo_core::RowResult::IO) | Ok(limbo_core::RowResult::Done) => JsValue::UNDEFINED, + Ok(limbo_core::RowResult::IO) + | Ok(limbo_core::RowResult::Done) + | Ok(limbo_core::RowResult::Interrupt) => JsValue::UNDEFINED, Err(e) => panic!("Error: {:?}", e), } } @@ -101,6 +103,7 @@ impl Statement { array.push(&row_array); } Ok(limbo_core::RowResult::IO) => {} + Ok(limbo_core::RowResult::Interrupt) => break, Ok(limbo_core::RowResult::Done) => break, Err(e) => panic!("Error: {:?}", e), } diff --git a/cli/app.rs b/cli/app.rs index a6bc003a9..34ab20481 100644 --- a/cli/app.rs +++ b/cli/app.rs @@ -521,6 +521,7 @@ impl Limbo { Ok(RowResult::IO) => { self.io.run_once()?; } + Ok(RowResult::Interrupt) => break, Ok(RowResult::Done) => { break; } @@ -557,6 +558,7 @@ impl Limbo { Ok(RowResult::IO) => { self.io.run_once()?; } + Ok(RowResult::Interrupt) => break, Ok(RowResult::Done) => break, Err(err) => { let _ = self.write_fmt(format_args!("{}", err)); @@ -606,6 +608,7 @@ impl Limbo { RowResult::IO => { self.io.run_once()?; } + RowResult::Interrupt => break, RowResult::Done => break, } } @@ -658,6 +661,7 @@ impl Limbo { RowResult::IO => { self.io.run_once()?; } + RowResult::Interrupt => break, RowResult::Done => break, } } diff --git a/core/benches/benchmark.rs b/core/benches/benchmark.rs index cc2093a59..ab1b0aa9a 100644 --- a/core/benches/benchmark.rs +++ b/core/benches/benchmark.rs @@ -46,6 +46,9 @@ fn limbo_bench(criterion: &mut Criterion) { limbo_core::RowResult::IO => { io.run_once().unwrap(); } + limbo_core::RowResult::Interrupt => { + unreachable!(); + } limbo_core::RowResult::Done => { unreachable!(); } @@ -68,6 +71,9 @@ fn limbo_bench(criterion: &mut Criterion) { limbo_core::RowResult::IO => { io.run_once().unwrap(); } + limbo_core::RowResult::Interrupt => { + unreachable!(); + } limbo_core::RowResult::Done => { unreachable!(); } @@ -91,6 +97,9 @@ fn limbo_bench(criterion: &mut Criterion) { limbo_core::RowResult::IO => { io.run_once().unwrap(); } + limbo_core::RowResult::Interrupt => { + unreachable!(); + } limbo_core::RowResult::Done => { unreachable!(); } diff --git a/core/lib.rs b/core/lib.rs index e80406c72..1f5668d76 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -367,12 +367,17 @@ impl Statement { } } + pub fn interrupt(&mut self) { + self.state.interrupt(); + } + pub fn step(&mut self) -> Result> { let result = self.program.step(&mut self.state, self.pager.clone())?; match result { vdbe::StepResult::Row(row) => Ok(RowResult::Row(Row { values: row.values })), vdbe::StepResult::IO => Ok(RowResult::IO), vdbe::StepResult::Done => Ok(RowResult::Done), + vdbe::StepResult::Interrupt => Ok(RowResult::Interrupt), } } @@ -388,6 +393,7 @@ pub enum RowResult<'a> { Row(Row<'a>), IO, Done, + Interrupt, } pub struct Row<'a> { diff --git a/core/util.rs b/core/util.rs index 4b8a7f43b..66dd94789 100644 --- a/core/util.rs +++ b/core/util.rs @@ -51,6 +51,7 @@ pub fn parse_schema_rows(rows: Option, schema: &mut Schema, io: Arc break, RowResult::Done => break, } } diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 069782c9a..6d45eeaec 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -554,6 +554,7 @@ pub enum StepResult<'a> { Done, IO, Row(Record<'a>), + Interrupt, } /// If there is I/O, the instruction is restarted. @@ -589,6 +590,7 @@ pub struct ProgramState { deferred_seek: Option<(CursorID, CursorID)>, ended_coroutine: bool, // flag to notify yield coroutine finished regex_cache: RegexCache, + interrupted: bool, } impl ProgramState { @@ -604,6 +606,7 @@ impl ProgramState { deferred_seek: None, ended_coroutine: false, regex_cache: RegexCache::new(), + interrupted: false, } } @@ -614,6 +617,14 @@ impl ProgramState { pub fn column(&self, i: usize) -> Option { Some(format!("{:?}", self.registers[i])) } + + pub fn interrupt(&mut self) { + self.interrupted = true; + } + + pub fn is_interrupted(&self) -> bool { + self.interrupted + } } #[derive(Debug)] @@ -652,6 +663,9 @@ impl Program { pager: Rc, ) -> Result> { loop { + if state.is_interrupted() { + return Ok(StepResult::Interrupt); + } let insn = &self.insns[state.pc as usize]; trace_insn(self, state.pc as InsnReference, insn); let mut cursors = state.cursors.borrow_mut(); diff --git a/simulator/main.rs b/simulator/main.rs index b7af9854e..fc485132c 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -355,6 +355,9 @@ fn get_all_rows( break 'rows_loop; } } + RowResult::Interrupt => { + break; + } RowResult::Done => { break; } diff --git a/sqlite3/src/lib.rs b/sqlite3/src/lib.rs index 6a86a04fe..bfe990eb9 100644 --- a/sqlite3/src/lib.rs +++ b/sqlite3/src/lib.rs @@ -19,6 +19,7 @@ pub const SQLITE_ERROR: ffi::c_int = 1; pub const SQLITE_ABORT: ffi::c_int = 4; pub const SQLITE_BUSY: ffi::c_int = 5; pub const SQLITE_NOMEM: ffi::c_int = 7; +pub const SQLITE_INTERRUPT: ffi::c_int = 9; pub const SQLITE_NOTFOUND: ffi::c_int = 14; pub const SQLITE_MISUSE: ffi::c_int = 21; pub const SQLITE_ROW: ffi::c_int = 100; @@ -235,6 +236,7 @@ pub unsafe extern "C" fn sqlite3_step(stmt: *mut sqlite3_stmt) -> std::ffi::c_in match result { limbo_core::RowResult::IO => SQLITE_BUSY, limbo_core::RowResult::Done => SQLITE_DONE, + limbo_core::RowResult::Interrupt => SQLITE_INTERRUPT, limbo_core::RowResult::Row(row) => { stmt.row.replace(Some(row)); SQLITE_ROW diff --git a/test/src/lib.rs b/test/src/lib.rs index bde88a1f0..53cec37a4 100644 --- a/test/src/lib.rs +++ b/test/src/lib.rs @@ -93,6 +93,7 @@ mod tests { RowResult::IO => { tmp_db.io.run_once()?; } + RowResult::Interrupt => break, RowResult::Done => break, } }, @@ -160,6 +161,7 @@ mod tests { RowResult::IO => { tmp_db.io.run_once()?; } + RowResult::Interrupt => break, RowResult::Done => break, } }, @@ -233,6 +235,7 @@ mod tests { RowResult::IO => { tmp_db.io.run_once()?; } + RowResult::Interrupt => break, RowResult::Done => break, } }, @@ -295,6 +298,7 @@ mod tests { RowResult::IO => { tmp_db.io.run_once()?; } + RowResult::Interrupt => break, RowResult::Done => break, } }, @@ -355,6 +359,7 @@ mod tests { RowResult::IO => { tmp_db.io.run_once()?; } + RowResult::Interrupt => break, RowResult::Done => break, } } @@ -446,6 +451,7 @@ mod tests { RowResult::IO => { tmp_db.io.run_once()?; } + RowResult::Interrupt => break, RowResult::Done => break, } } @@ -479,6 +485,7 @@ mod tests { RowResult::IO => { tmp_db.io.run_once()?; } + RowResult::Interrupt => break, RowResult::Done => break, } }, From 99d1b0e5a334506e33250b9fc63e5b82ea465ba7 Mon Sep 17 00:00:00 2001 From: Ziyak Jehangir <53836911+ziyak97@users.noreply.github.com> Date: Thu, 19 Dec 2024 19:36:04 +0530 Subject: [PATCH 049/144] cleanup: replace &(*x) with x.as_ref() for smart pointer derefs --- bindings/wasm/lib.rs | 2 +- core/io/darwin.rs | 2 +- core/io/generic.rs | 2 +- core/io/linux.rs | 2 +- core/io/windows.rs | 2 +- core/storage/database.rs | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/bindings/wasm/lib.rs b/bindings/wasm/lib.rs index f4c02a8e0..ec2762b91 100644 --- a/bindings/wasm/lib.rs +++ b/bindings/wasm/lib.rs @@ -267,7 +267,7 @@ impl DatabaseStorage { impl limbo_core::DatabaseStorage for DatabaseStorage { fn read_page(&self, page_idx: usize, c: Rc) -> Result<()> { - let r = match &(*c) { + let r = match c.as_ref() { limbo_core::Completion::Read(r) => r, _ => unreachable!(), }; diff --git a/core/io/darwin.rs b/core/io/darwin.rs index bdab24af7..c052b572f 100644 --- a/core/io/darwin.rs +++ b/core/io/darwin.rs @@ -190,7 +190,7 @@ impl File for DarwinFile { fn pread(&self, pos: usize, c: Rc) -> Result<()> { let file = self.file.borrow(); let result = { - let r = match &(*c) { + let r = match c.as_ref() { Completion::Read(r) => r, _ => unreachable!(), }; diff --git a/core/io/generic.rs b/core/io/generic.rs index c8c5c45b8..0c35eaf52 100644 --- a/core/io/generic.rs +++ b/core/io/generic.rs @@ -55,7 +55,7 @@ impl File for GenericFile { let mut file = self.file.borrow_mut(); file.seek(std::io::SeekFrom::Start(pos as u64))?; { - let r = match &(*c) { + let r = match c.as_ref() { Completion::Read(r) => r, _ => unreachable!(), }; diff --git a/core/io/linux.rs b/core/io/linux.rs index fde8a9616..e765cc8ac 100644 --- a/core/io/linux.rs +++ b/core/io/linux.rs @@ -241,7 +241,7 @@ impl File for LinuxFile { } fn pread(&self, pos: usize, c: Rc) -> Result<()> { - let r = match &(*c) { + let r = match c.as_ref() { Completion::Read(r) => r, _ => unreachable!(), }; diff --git a/core/io/windows.rs b/core/io/windows.rs index db7f9da31..8bf37a2ff 100644 --- a/core/io/windows.rs +++ b/core/io/windows.rs @@ -57,7 +57,7 @@ impl File for WindowsFile { let mut file = self.file.borrow_mut(); file.seek(std::io::SeekFrom::Start(pos as u64))?; { - let r = match &(*c) { + let r = match c.as_ref() { Completion::Read(r) => r, _ => unreachable!(), }; diff --git a/core/storage/database.rs b/core/storage/database.rs index 75d835734..80a4e55c4 100644 --- a/core/storage/database.rs +++ b/core/storage/database.rs @@ -25,7 +25,7 @@ pub struct FileStorage { #[cfg(feature = "fs")] impl DatabaseStorage for FileStorage { fn read_page(&self, page_idx: usize, c: Rc) -> Result<()> { - let r = match &(*c) { + let r = match c.as_ref() { Completion::Read(r) => r, _ => unreachable!(), }; From 3a0e56bca58617d3309c8f556890e9c6da0db4ca Mon Sep 17 00:00:00 2001 From: Diego Reis Date: Thu, 19 Dec 2024 23:13:51 -0300 Subject: [PATCH 050/144] Implement basic sqlite3_get_table() API --- Cargo.lock | 5 +- sqlite3/Cargo.toml | 1 + sqlite3/src/lib.rs | 138 ++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 140 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6fd0e8319..ead336051 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1063,9 +1063,9 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "libc" -version = "0.2.168" +version = "0.2.169" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aaeb2981e0606ca11d79718f8bb01164f1d6ed75080182d3abf017e6d244b6d" +checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" [[package]] name = "libmimalloc-sys" @@ -1176,6 +1176,7 @@ name = "limbo_sqlite3" version = "0.0.10" dependencies = [ "env_logger 0.11.5", + "libc", "limbo_core", "log", ] diff --git a/sqlite3/Cargo.toml b/sqlite3/Cargo.toml index bd54ad781..444698d66 100644 --- a/sqlite3/Cargo.toml +++ b/sqlite3/Cargo.toml @@ -21,6 +21,7 @@ crate-type = ["lib", "cdylib", "staticlib"] [dependencies] env_logger = { version = "0.11.3", default-features = false } +libc = "0.2.169" limbo_core = { path = "../core" } log = "0.4.22" diff --git a/sqlite3/src/lib.rs b/sqlite3/src/lib.rs index bfe990eb9..aaf514dda 100644 --- a/sqlite3/src/lib.rs +++ b/sqlite3/src/lib.rs @@ -3,7 +3,7 @@ use log::trace; use std::cell::RefCell; -use std::ffi; +use std::ffi::{self, CStr, CString}; use std::rc::Rc; use std::sync::Arc; @@ -247,11 +247,20 @@ pub unsafe extern "C" fn sqlite3_step(stmt: *mut sqlite3_stmt) -> std::ffi::c_in } } +type exec_callback = Option< + unsafe extern "C" fn( + context: *mut std::ffi::c_void, + n_column: std::ffi::c_int, + argv: *mut *mut std::ffi::c_char, + colv: *mut *mut std::ffi::c_char, + ) -> ffi::c_int, +>; + #[no_mangle] pub unsafe extern "C" fn sqlite3_exec( db: *mut sqlite3, sql: *const ffi::c_char, - _callback: Option ffi::c_int>, + _callback: exec_callback, _context: *mut std::ffi::c_void, _err: *mut *mut std::ffi::c_char, ) -> ffi::c_int { @@ -642,6 +651,131 @@ pub unsafe extern "C" fn sqlite3_column_text( } } +pub struct TabResult { + az_result: Vec<*mut std::ffi::c_char>, + n_row: usize, + n_column: usize, + z_err_msg: Option, + rc: std::ffi::c_int, +} + +impl TabResult { + fn new(initial_capacity: usize) -> Self { + Self { + az_result: Vec::with_capacity(initial_capacity), + n_row: 0, + n_column: 0, + z_err_msg: None, + rc: SQLITE_OK, + } + } + + fn free(&mut self) { + for &ptr in &self.az_result { + if !ptr.is_null() { + unsafe { + sqlite3_free(ptr as *mut _); + } + } + } + self.az_result.clear(); + } +} + +#[no_mangle] +unsafe extern "C" fn sqlite_get_table_cb( + context: *mut std::ffi::c_void, + n_column: std::ffi::c_int, + argv: *mut *mut std::ffi::c_char, + colv: *mut *mut std::ffi::c_char, +) -> std::ffi::c_int { + let res = &mut *(context as *mut TabResult); + + if res.n_row == 0 { + res.n_column = n_column as usize; + for i in 0..n_column { + let col_name = *colv.add(i as usize); + let col_name_cstring = if !col_name.is_null() { + CStr::from_ptr(col_name).to_owned() + } else { + CString::new("NULL").unwrap() + }; + res.az_result.push(col_name_cstring.into_raw()); + } + } else if res.n_column != n_column as usize { + res.z_err_msg = Some( + CString::new("sqlite3_get_table() called with two or more incompatible queries") + .unwrap(), + ); + res.rc = SQLITE_ERROR; + return SQLITE_ERROR; + } + + for i in 0..n_column { + let value = *argv.add(i as usize); + let value_cstring = if !value.is_null() { + let len = libc::strlen(value); + let mut buf = Vec::with_capacity(len + 1); + libc::strncpy(buf.as_mut_ptr() as *mut std::ffi::c_char, value, len); + buf.set_len(len + 1); + CString::from_vec_with_nul(buf).unwrap() + } else { + CString::new("NULL").unwrap() + }; + res.az_result.push(value_cstring.into_raw()); + } + + res.n_row += 1; + SQLITE_OK +} + +#[no_mangle] +pub unsafe extern "C" fn sqlite3_get_table( + db: *mut sqlite3, + sql: *const std::ffi::c_char, + paz_result: *mut *mut *mut std::ffi::c_char, + pn_row: *mut std::ffi::c_int, + pn_column: *mut std::ffi::c_int, + pz_err_msg: *mut *mut std::ffi::c_char, +) -> std::ffi::c_int { + if db.is_null() || sql.is_null() || paz_result.is_null() { + return SQLITE_ERROR; + } + + let mut res = TabResult::new(20); + + let rc = sqlite3_exec( + db, + sql, + Some(sqlite_get_table_cb), + &mut res as *mut _ as *mut _, + pz_err_msg, + ); + + if rc != SQLITE_OK { + res.free(); + if let Some(err_msg) = res.z_err_msg { + if !pz_err_msg.is_null() { + *pz_err_msg = err_msg.into_raw(); + } + } + return rc; + } + + let total_results = res.az_result.len(); + if res.az_result.capacity() > total_results { + res.az_result.shrink_to_fit(); + } + + *paz_result = res.az_result.as_mut_ptr(); + *pn_row = res.n_row as std::ffi::c_int; + *pn_column = res.n_column as std::ffi::c_int; + + std::mem::forget(res); + + SQLITE_OK +} + #[no_mangle] pub unsafe extern "C" fn sqlite3_result_null(_context: *mut std::ffi::c_void) { stub!(); From b3555680236489623df435200ce0f64770cda3ce Mon Sep 17 00:00:00 2001 From: alpaylan Date: Thu, 19 Dec 2024 23:40:04 -0500 Subject: [PATCH 051/144] use ticks as the main simulator driver, handle disconnects correctly, add multi-connection setup --- simulator/generation/plan.rs | 62 ++++++++++++++++++- simulator/main.rs | 114 ++++++++++++++++++++++------------- 2 files changed, 132 insertions(+), 44 deletions(-) diff --git a/simulator/generation/plan.rs b/simulator/generation/plan.rs index d7c309b93..b1a233f9e 100644 --- a/simulator/generation/plan.rs +++ b/simulator/generation/plan.rs @@ -9,7 +9,7 @@ use crate::{ query::{Create, Insert, Predicate, Query, Select}, table::Value, }, - SimulatorEnv, SimulatorOpts, + SimConnection, SimulatorEnv, SimulatorOpts, }; use crate::generation::{frequency, Arbitrary, ArbitraryFrom}; @@ -21,6 +21,7 @@ pub(crate) type ResultSet = Vec>; pub(crate) struct InteractionPlan { pub(crate) plan: Vec, pub(crate) stack: Vec, + pub(crate) interaction_pointer: usize, } impl Display for InteractionPlan { @@ -31,6 +32,7 @@ impl Display for InteractionPlan { Interaction::Assertion(assertion) => { write!(f, "-- ASSERT: {};\n", assertion.message)? } + Interaction::Fault(fault) => write!(f, "-- FAULT: {};\n", fault)?, } } @@ -58,6 +60,7 @@ impl Display for InteractionStats { pub(crate) enum Interaction { Query(Query), Assertion(Assertion), + Fault(Fault), } impl Display for Interaction { @@ -65,6 +68,7 @@ impl Display for Interaction { match self { Interaction::Query(query) => write!(f, "{}", query), Interaction::Assertion(assertion) => write!(f, "ASSERT: {}", assertion.message), + Interaction::Fault(fault) => write!(f, "FAULT: {}", fault), } } } @@ -74,6 +78,18 @@ pub(crate) struct Assertion { pub(crate) message: String, } +pub(crate) enum Fault { + Disconnect, +} + +impl Display for Fault { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Fault::Disconnect => write!(f, "DISCONNECT"), + } + } +} + pub(crate) struct Interactions(Vec); impl Interactions { @@ -96,6 +112,7 @@ impl Interactions { Query::Select(_) => {} }, Interaction::Assertion(_) => {} + Interaction::Fault(_) => {} } } } @@ -106,6 +123,7 @@ impl InteractionPlan { InteractionPlan { plan: Vec::new(), stack: Vec::new(), + interaction_pointer: 0, } } @@ -127,6 +145,7 @@ impl InteractionPlan { Query::Create(_) => {} }, Interaction::Assertion(_) => {} + Interaction::Fault(_) => {} } } @@ -223,6 +242,9 @@ impl Interaction { Interaction::Assertion(_) => { unreachable!("unexpected: this function should only be called on queries") } + Interaction::Fault(fault) => { + unreachable!("unexpected: this function should only be called on queries") + } } } @@ -239,6 +261,38 @@ impl Interaction { } Ok(()) } + Interaction::Fault(_) => { + unreachable!("unexpected: this function should only be called on assertions") + } + } + } + + pub(crate) fn execute_fault(&self, env: &mut SimulatorEnv, conn_index: usize) -> Result<()> { + match self { + Interaction::Query(_) => { + unreachable!("unexpected: this function should only be called on faults") + } + Interaction::Assertion(_) => { + unreachable!("unexpected: this function should only be called on faults") + } + Interaction::Fault(fault) => { + match fault { + Fault::Disconnect => { + match env.connections[conn_index] { + SimConnection::Connected(ref mut conn) => { + conn.close()?; + } + SimConnection::Disconnected => { + return Err(limbo_core::LimboError::InternalError( + "Tried to disconnect a disconnected connection".to_string(), + )); + } + } + env.connections[conn_index] = SimConnection::Disconnected; + } + } + Ok(()) + } } } } @@ -307,6 +361,11 @@ fn random_write(rng: &mut R, env: &SimulatorEnv) -> Interactions { Interactions(vec![insert_query]) } +fn random_fault(rng: &mut R, env: &SimulatorEnv) -> Interactions { + let fault = Interaction::Fault(Fault::Disconnect); + Interactions(vec![fault]) +} + impl ArbitraryFrom<(&SimulatorEnv, InteractionStats)> for Interactions { fn arbitrary_from( rng: &mut R, @@ -334,6 +393,7 @@ impl ArbitraryFrom<(&SimulatorEnv, InteractionStats)> for Interactions { Box::new(|rng: &mut R| random_write(rng, env)), ), (1, Box::new(|rng: &mut R| create_table(rng, env))), + (1, Box::new(|rng: &mut R| random_fault(rng, env))), ], rng, ) diff --git a/simulator/main.rs b/simulator/main.rs index 67d9b92f9..dfdc974d3 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -1,4 +1,4 @@ -use generation::plan::{Interaction, ResultSet}; +use generation::plan::{Interaction, InteractionPlan, ResultSet}; use generation::{pick, pick_index, Arbitrary, ArbitraryFrom}; use limbo_core::{Connection, Database, File, OpenFlags, PlatformIO, Result, RowResult, IO}; use model::query::{Create, Insert, Predicate, Query, Select}; @@ -66,7 +66,7 @@ fn main() { }; let opts = SimulatorOpts { - ticks: rng.gen_range(0..4096), + ticks: rng.gen_range(0..1024), max_connections: 1, // TODO: for now let's use one connection as we didn't implement // correct transactions procesing max_tables: rng.gen_range(0..128), @@ -74,7 +74,7 @@ fn main() { write_percent, delete_percent, page_size: 4096, // TODO: randomize this too - max_interactions: rng.gen_range(0..10000), + max_interactions: rng.gen_range(0..1024), }; let io = Arc::new(SimulatorIO::new(seed, opts.page_size).unwrap()); @@ -101,63 +101,87 @@ fn main() { println!("Initial opts {:?}", env.opts); log::info!("Generating database interaction plan..."); - let mut plan = generation::plan::InteractionPlan::arbitrary_from(&mut env.rng.clone(), &env); + let mut plans = (1..=env.opts.max_connections) + .map(|_| InteractionPlan::arbitrary_from(&mut env.rng.clone(), &env)) + .collect::>(); - log::info!("{}", plan.stats()); + log::info!("{}", plans[0].stats()); - for interaction in &plan.plan { + log::info!("Executing database interaction plan..."); + let result = execute_plans(&mut env, &mut plans); + + if result.is_err() { + log::error!("error executing plans: {:?}", result.err()); + } + log::info!("db is at {:?}", path); + let mut path = TempDir::new().unwrap().into_path(); + path.push("simulator.plan"); + let mut f = std::fs::File::create(path.clone()).unwrap(); + f.write(plans[0].to_string().as_bytes()).unwrap(); + log::info!("plan saved at {:?}", path); + log::info!("seed was {}", seed); + + env.io.print_stats(); +} + +fn execute_plans(env: &mut SimulatorEnv, plans: &mut Vec) -> Result<()> { + // todo: add history here by recording which interaction was executed at which tick + for _tick in 0..env.opts.ticks { + // Pick the connection to interact with let connection_index = pick_index(env.connections.len(), &mut env.rng); - let mut connection = env.connections[connection_index].clone(); + // Execute the interaction for the selected connection + execute_plan(env, connection_index, plans)?; + } - if matches!(connection, SimConnection::Disconnected) { - connection = SimConnection::Connected(env.db.connect()); - env.connections[connection_index] = connection.clone(); - } + Ok(()) +} - match &mut connection { - SimConnection::Connected(conn) => { - let disconnect = env.rng.gen_ratio(1, 100); - if disconnect { - log::info!("disconnecting {}", connection_index); - let _ = conn.close(); - env.connections[connection_index] = SimConnection::Disconnected; - } else { - match process_connection(conn, interaction, &mut plan.stack) { - Ok(_) => { - log::info!("connection {} processed", connection_index); - } - Err(err) => { - log::error!("error {}", err); - log::debug!("db is at {:?}", path); - // save the interaction plan - let mut path = TempDir::new().unwrap().into_path(); - path.push("simulator.plan"); - let mut f = std::fs::File::create(path.clone()).unwrap(); - f.write(plan.to_string().as_bytes()).unwrap(); - log::debug!("plan saved at {:?}", path); - log::debug!("seed was {}", seed); - break; - } - } - } +fn execute_plan( + env: &mut SimulatorEnv, + connection_index: usize, + plans: &mut Vec, +) -> Result<()> { + let connection = &env.connections[connection_index]; + let plan = &mut plans[connection_index]; + + if plan.interaction_pointer >= plan.plan.len() { + return Ok(()); + } + + let interaction = &plan.plan[plan.interaction_pointer]; + + if let SimConnection::Disconnected = connection { + log::info!("connecting {}", connection_index); + env.connections[connection_index] = SimConnection::Connected(env.db.connect()); + } else { + match execute_interaction(env, connection_index, interaction, &mut plan.stack) { + Ok(_) => { + log::debug!("connection {} processed", connection_index); + plan.interaction_pointer += 1; } - SimConnection::Disconnected => { - log::info!("disconnecting {}", connection_index); - env.connections[connection_index] = SimConnection::Connected(env.db.connect()); + Err(err) => { + log::error!("error {}", err); + return Err(err); } } } - env.io.print_stats(); + Ok(()) } -fn process_connection( - conn: &mut Rc, +fn execute_interaction( + env: &mut SimulatorEnv, + connection_index: usize, interaction: &Interaction, stack: &mut Vec, ) -> Result<()> { match interaction { generation::plan::Interaction::Query(_) => { + let conn = match &mut env.connections[connection_index] { + SimConnection::Connected(conn) => conn, + SimConnection::Disconnected => unreachable!(), + }; + log::debug!("{}", interaction); let results = interaction.execute_query(conn)?; log::debug!("{:?}", results); @@ -165,6 +189,10 @@ fn process_connection( } generation::plan::Interaction::Assertion(_) => { interaction.execute_assertion(stack)?; + stack.clear(); + } + Interaction::Fault(_) => { + interaction.execute_fault(env, connection_index)?; } } From 03ae12306086a13e3d889e311519ae08242bfc85 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Fri, 20 Dec 2024 09:14:54 +0200 Subject: [PATCH 052/144] Add PERF.md with Mobibench instructions --- PERF.md | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 PERF.md diff --git a/PERF.md b/PERF.md new file mode 100644 index 000000000..65509f7b8 --- /dev/null +++ b/PERF.md @@ -0,0 +1,58 @@ +# Performance Testing + +## Mobibench + +1. Clone Mobibench source repository: + +```console +git clone git@github.com:ESOS-Lab/Mobibench.git +``` + +2. Patch Mobibench: + +```patch +diff --git a/shell/Makefile b/shell/Makefile +index 6b65351..262ab5f 100644 +--- a/shell/Makefile ++++ b/shell/Makefile +@@ -4,8 +4,7 @@ + + EXENAME = mobibench + +-SRCS = mobibench.c \ +- sqlite3.c ++SRCS = mobibench.c + + INSTALL = install + +@@ -14,6 +13,8 @@ bindir = $(prefix)/bin + + CFLAGS = -lpthread -ldl + ++LIBS = /target/release/liblimbo_sqlite3.a -lm ++ + #CFLAGS += -DDEBUG_SCRIPT + + #for sqltie3 +@@ -37,7 +38,7 @@ CFLAGS += -DNDEBUG=1 \ + --static + + all : +- $(CROSS)gcc -o $(EXENAME) $(SRCS) $(CFLAGS) ++ $(CROSS)gcc -o $(EXENAME) $(SRCS) $(CFLAGS) $(LIBS) + + clean : + @rm -rf mobibench +``` + +3. Build Mobibench: + +```console +cd shell && make +``` + +4. Run Mobibench: + +```console +./mobibench -p -n 1000 -d 0 +``` From 56710b0187b25ba559e7fc1bf05518b16bfa3b57 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Fri, 20 Dec 2024 09:18:18 +0200 Subject: [PATCH 053/144] sqlite3: Implement sqlite3_free_table() --- sqlite3/src/lib.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sqlite3/src/lib.rs b/sqlite3/src/lib.rs index a576e9b11..5bacf16cd 100644 --- a/sqlite3/src/lib.rs +++ b/sqlite3/src/lib.rs @@ -781,6 +781,14 @@ pub unsafe extern "C" fn sqlite3_get_table( SQLITE_OK } +#[no_mangle] +pub unsafe extern "C" fn sqlite3_free_table( + paz_result: *mut *mut *mut std::ffi::c_char, +) { + let res = &mut *(paz_result as *mut TabResult); + res.free(); +} + #[no_mangle] pub unsafe extern "C" fn sqlite3_result_null(_context: *mut std::ffi::c_void) { stub!(); From 07ae003db7dec0a2f5fdc68d9828d730e4db2936 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Fri, 20 Dec 2024 09:22:44 +0200 Subject: [PATCH 054/144] Update PERF.md --- PERF.md | 41 +++-------------------------------------- 1 file changed, 3 insertions(+), 38 deletions(-) diff --git a/PERF.md b/PERF.md index 65509f7b8..def290cae 100644 --- a/PERF.md +++ b/PERF.md @@ -2,48 +2,13 @@ ## Mobibench -1. Clone Mobibench source repository: +1. Clone the source repository of Mobibench fork for Limbo: ```console -git clone git@github.com:ESOS-Lab/Mobibench.git +git clone git@github.com:penberg/Mobibench.git ``` -2. Patch Mobibench: - -```patch -diff --git a/shell/Makefile b/shell/Makefile -index 6b65351..262ab5f 100644 ---- a/shell/Makefile -+++ b/shell/Makefile -@@ -4,8 +4,7 @@ - - EXENAME = mobibench - --SRCS = mobibench.c \ -- sqlite3.c -+SRCS = mobibench.c - - INSTALL = install - -@@ -14,6 +13,8 @@ bindir = $(prefix)/bin - - CFLAGS = -lpthread -ldl - -+LIBS = /target/release/liblimbo_sqlite3.a -lm -+ - #CFLAGS += -DDEBUG_SCRIPT - - #for sqltie3 -@@ -37,7 +38,7 @@ CFLAGS += -DNDEBUG=1 \ - --static - - all : -- $(CROSS)gcc -o $(EXENAME) $(SRCS) $(CFLAGS) -+ $(CROSS)gcc -o $(EXENAME) $(SRCS) $(CFLAGS) $(LIBS) - - clean : - @rm -rf mobibench -``` +2. Change `LIBS` in `shell/Makefile` to point to your Limbo source repository. 3. Build Mobibench: From 8387e7903c6f869711bba4bf39637273d9124040 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Fri, 20 Dec 2024 09:29:46 +0200 Subject: [PATCH 055/144] sqlite: Fix source formatting --- sqlite3/src/lib.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sqlite3/src/lib.rs b/sqlite3/src/lib.rs index 5bacf16cd..da057df79 100644 --- a/sqlite3/src/lib.rs +++ b/sqlite3/src/lib.rs @@ -782,9 +782,7 @@ pub unsafe extern "C" fn sqlite3_get_table( } #[no_mangle] -pub unsafe extern "C" fn sqlite3_free_table( - paz_result: *mut *mut *mut std::ffi::c_char, -) { +pub unsafe extern "C" fn sqlite3_free_table(paz_result: *mut *mut *mut std::ffi::c_char) { let res = &mut *(paz_result as *mut TabResult); res.free(); } From 19ae42dfa313d9810384ba596ea25a0620dcfa75 Mon Sep 17 00:00:00 2001 From: Kacper Madej Date: Wed, 18 Dec 2024 02:11:25 +0100 Subject: [PATCH 056/144] Implement json_array --- core/function.rs | 4 + core/json/mod.rs | 125 ++++++-- core/storage/sqlite3_ondisk.rs | 4 +- core/translate/expr.rs | 56 +++- core/types.rs | 63 +++- core/vdbe/datetime.rs | 289 +++++++++--------- core/vdbe/explain.rs | 153 +++++----- core/vdbe/mod.rs | 514 ++++++++++++++++++--------------- testing/json.test | 12 + 9 files changed, 717 insertions(+), 503 deletions(-) mode change 100644 => 100755 testing/json.test diff --git a/core/function.rs b/core/function.rs index c4885925f..86c88a1e4 100644 --- a/core/function.rs +++ b/core/function.rs @@ -5,6 +5,7 @@ use std::fmt::Display; #[derive(Debug, Clone, PartialEq)] pub enum JsonFunc { Json, + JsonArray, } #[cfg(feature = "json")] @@ -15,6 +16,7 @@ impl Display for JsonFunc { "{}", match self { JsonFunc::Json => "json".to_string(), + JsonFunc::JsonArray => "json_array".to_string(), } ) } @@ -328,6 +330,8 @@ impl Func { "replace" => Ok(Func::Scalar(ScalarFunc::Replace)), #[cfg(feature = "json")] "json" => Ok(Func::Json(JsonFunc::Json)), + #[cfg(feature = "json")] + "json_array" => Ok(Func::Json(JsonFunc::JsonArray)), "unixepoch" => Ok(Func::Scalar(ScalarFunc::UnixEpoch)), "hex" => Ok(Func::Scalar(ScalarFunc::Hex)), "unhex" => Ok(Func::Scalar(ScalarFunc::Unhex)), diff --git a/core/json/mod.rs b/core/json/mod.rs index 06169abd8..f0bffa8f1 100644 --- a/core/json/mod.rs +++ b/core/json/mod.rs @@ -6,7 +6,7 @@ use std::rc::Rc; pub use crate::json::de::from_str; pub use crate::json::ser::to_string; -use crate::types::OwnedValue; +use crate::types::{LimboText, OwnedValue, TextSubtype}; use indexmap::IndexMap; use serde::{Deserialize, Serialize}; @@ -24,18 +24,24 @@ pub enum Val { pub fn get_json(json_value: &OwnedValue) -> crate::Result { match json_value { - OwnedValue::Text(ref t) => match crate::json::from_str::(t) { - Ok(json) => { - let json = crate::json::to_string(&json).unwrap(); - Ok(OwnedValue::Text(Rc::new(json))) + OwnedValue::Text(ref t) => { + if t.subtype == TextSubtype::Json { + return Ok(json_value.to_owned()); } - Err(_) => { - crate::bail_parse_error!("malformed JSON") + + match crate::json::from_str::(&t.value) { + Ok(json) => { + let json = crate::json::to_string(&json).unwrap(); + Ok(OwnedValue::Text(LimboText::json(Rc::new(json)))) + } + Err(_) => { + crate::bail_parse_error!("malformed JSON") + } } - }, + } OwnedValue::Blob(b) => { if let Ok(json) = jsonb::from_slice(b) { - Ok(OwnedValue::Text(Rc::new(json.to_string()))) + Ok(OwnedValue::Text(LimboText::json(Rc::new(json.to_string())))) } else { crate::bail_parse_error!("malformed JSON"); } @@ -44,6 +50,37 @@ pub fn get_json(json_value: &OwnedValue) -> crate::Result { } } +pub fn json_array(values: Vec) -> crate::Result { + let mut s = String::new(); + s.push('['); + + for (idx, value) in values.iter().enumerate() { + match value { + OwnedValue::Blob(_) => crate::bail_parse_error!("JSON cannot hold BLOB values"), + OwnedValue::Text(t) => { + if t.subtype == TextSubtype::Json { + s.push_str(&t.value); + } else { + match crate::json::to_string(&*t.value) { + Ok(json) => s.push_str(&json), + Err(_) => crate::bail_parse_error!("malformed JSON"), + } + } + } + OwnedValue::Integer(i) => s.push_str(&i.to_string()), + OwnedValue::Float(f) => s.push_str(&f.to_string()), + _ => unreachable!(), + } + + if idx < values.len() - 1 { + s.push(','); + } + } + + s.push(']'); + Ok(OwnedValue::build_text(Rc::new(s))) +} + #[cfg(test)] mod tests { use super::*; @@ -51,10 +88,10 @@ mod tests { #[test] fn test_get_json_valid_json5() { - let input = OwnedValue::Text(Rc::new("{ key: 'value' }".to_string())); + let input = OwnedValue::build_text(Rc::new("{ key: 'value' }".to_string())); let result = get_json(&input).unwrap(); if let OwnedValue::Text(result_str) = result { - assert!(result_str.contains("\"key\":\"value\"")); + assert!(result_str.value.contains("\"key\":\"value\"")); } else { panic!("Expected OwnedValue::Text"); } @@ -62,10 +99,10 @@ mod tests { #[test] fn test_get_json_valid_json5_double_single_quotes() { - let input = OwnedValue::Text(Rc::new("{ key: ''value'' }".to_string())); + let input = OwnedValue::build_text(Rc::new("{ key: ''value'' }".to_string())); let result = get_json(&input).unwrap(); if let OwnedValue::Text(result_str) = result { - assert!(result_str.contains("\"key\":\"value\"")); + assert!(result_str.value.contains("\"key\":\"value\"")); } else { panic!("Expected OwnedValue::Text"); } @@ -73,10 +110,10 @@ mod tests { #[test] fn test_get_json_valid_json5_infinity() { - let input = OwnedValue::Text(Rc::new("{ \"key\": Infinity }".to_string())); + let input = OwnedValue::build_text(Rc::new("{ \"key\": Infinity }".to_string())); let result = get_json(&input).unwrap(); if let OwnedValue::Text(result_str) = result { - assert!(result_str.contains("{\"key\":9e999}")); + assert!(result_str.value.contains("{\"key\":9e999}")); } else { panic!("Expected OwnedValue::Text"); } @@ -84,10 +121,10 @@ mod tests { #[test] fn test_get_json_valid_json5_negative_infinity() { - let input = OwnedValue::Text(Rc::new("{ \"key\": -Infinity }".to_string())); + let input = OwnedValue::build_text(Rc::new("{ \"key\": -Infinity }".to_string())); let result = get_json(&input).unwrap(); if let OwnedValue::Text(result_str) = result { - assert!(result_str.contains("{\"key\":-9e999}")); + assert!(result_str.value.contains("{\"key\":-9e999}")); } else { panic!("Expected OwnedValue::Text"); } @@ -95,10 +132,10 @@ mod tests { #[test] fn test_get_json_valid_json5_nan() { - let input = OwnedValue::Text(Rc::new("{ \"key\": NaN }".to_string())); + let input = OwnedValue::build_text(Rc::new("{ \"key\": NaN }".to_string())); let result = get_json(&input).unwrap(); if let OwnedValue::Text(result_str) = result { - assert!(result_str.contains("{\"key\":null}")); + assert!(result_str.value.contains("{\"key\":null}")); } else { panic!("Expected OwnedValue::Text"); } @@ -106,7 +143,7 @@ mod tests { #[test] fn test_get_json_invalid_json5() { - let input = OwnedValue::Text(Rc::new("{ key: value }".to_string())); + let input = OwnedValue::build_text(Rc::new("{ key: value }".to_string())); let result = get_json(&input); match result { Ok(_) => panic!("Expected error for malformed JSON"), @@ -116,10 +153,10 @@ mod tests { #[test] fn test_get_json_valid_jsonb() { - let input = OwnedValue::Text(Rc::new("{\"key\":\"value\"}".to_string())); + let input = OwnedValue::build_text(Rc::new("{\"key\":\"value\"}".to_string())); let result = get_json(&input).unwrap(); if let OwnedValue::Text(result_str) = result { - assert!(result_str.contains("\"key\":\"value\"")); + assert!(result_str.value.contains("\"key\":\"value\"")); } else { panic!("Expected OwnedValue::Text"); } @@ -127,7 +164,7 @@ mod tests { #[test] fn test_get_json_invalid_jsonb() { - let input = OwnedValue::Text(Rc::new("{key:\"value\"".to_string())); + let input = OwnedValue::build_text(Rc::new("{key:\"value\"".to_string())); let result = get_json(&input); match result { Ok(_) => panic!("Expected error for malformed JSON"), @@ -141,7 +178,7 @@ mod tests { let input = OwnedValue::Blob(Rc::new(binary_json)); let result = get_json(&input).unwrap(); if let OwnedValue::Text(result_str) = result { - assert!(result_str.contains("\"asd\":\"adf\"")); + assert!(result_str.value.contains("\"asd\":\"adf\"")); } else { panic!("Expected OwnedValue::Text"); } @@ -168,4 +205,44 @@ mod tests { panic!("Expected OwnedValue::Null"); } } + + #[test] + fn test_json_array_simple() { + let text = OwnedValue::build_text(Rc::new("value1".to_string())); + let json = OwnedValue::Text(LimboText::json(Rc::new("\"value2\"".to_string()))); + let input = vec![text, json, OwnedValue::Integer(1), OwnedValue::Float(1.1)]; + + let result = json_array(input).unwrap(); + if let OwnedValue::Text(res) = result { + assert_eq!(res.value.as_str(), "[\"value1\",\"value2\",1,1.1]"); + } else { + panic!("Expected OwnedValue::Text"); + } + } + + #[test] + fn test_json_array_empty() { + let input = vec![]; + + let result = json_array(input).unwrap(); + if let OwnedValue::Text(res) = result { + assert_eq!(res.value.as_str(), "[]"); + } else { + panic!("Expected OwnedValue::Text"); + } + } + + #[test] + fn test_json_array_blob_invalid() { + let blob = OwnedValue::Blob(Rc::new("1".as_bytes().to_vec())); + + let input = vec![blob]; + + let result = json_array(input); + + match result { + Ok(_) => panic!("Expected error for blob input"), + Err(e) => assert!(e.to_string().contains("JSON cannot hold BLOB values")), + } + } } diff --git a/core/storage/sqlite3_ondisk.rs b/core/storage/sqlite3_ondisk.rs index a1f58d38a..a1a8aec0c 100644 --- a/core/storage/sqlite3_ondisk.rs +++ b/core/storage/sqlite3_ondisk.rs @@ -934,7 +934,7 @@ pub fn read_value(buf: &[u8], serial_type: &SerialType) -> Result<(OwnedValue, u } let bytes = buf[0..n].to_vec(); let value = unsafe { String::from_utf8_unchecked(bytes) }; - Ok((OwnedValue::Text(value.into()), n)) + Ok((OwnedValue::build_text(value.into()), n)) } } } @@ -1271,7 +1271,7 @@ mod tests { #[case(&[], SerialType::ConstInt0, OwnedValue::Integer(0))] #[case(&[], SerialType::ConstInt1, OwnedValue::Integer(1))] #[case(&[1, 2, 3], SerialType::Blob(3), OwnedValue::Blob(vec![1, 2, 3].into()))] - #[case(&[65, 66, 67], SerialType::String(3), OwnedValue::Text("ABC".to_string().into()))] + #[case(&[65, 66, 67], SerialType::String(3), OwnedValue::build_text("ABC".to_string().into()))] fn test_read_value( #[case] buf: &[u8], #[case] serial_type: SerialType, diff --git a/core/translate/expr.rs b/core/translate/expr.rs index b1db3ac92..679e86819 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -898,6 +898,22 @@ pub fn translate_expr( }); Ok(target_register) } + JsonFunc::JsonArray => { + allocate_registers( + program, + args, + referenced_tables, + precomputed_exprs_to_registers, + )?; + + program.emit_insn(Insn::Function { + constant_mask: 0, + start_reg: target_register + 1, + dest: target_register, + func: func_ctx, + }); + Ok(target_register) + } }, Func::Scalar(srf) => { match srf { @@ -905,18 +921,12 @@ pub fn translate_expr( unreachable!("this is always ast::Expr::Cast") } ScalarFunc::Char => { - let args = args.clone().unwrap_or_else(Vec::new); - - for arg in args.iter() { - let reg = program.alloc_register(); - translate_expr( - program, - referenced_tables, - arg, - reg, - precomputed_exprs_to_registers, - )?; - } + allocate_registers( + program, + args, + referenced_tables, + precomputed_exprs_to_registers, + )?; program.emit_insn(Insn::Function { constant_mask: 0, @@ -1942,6 +1952,28 @@ pub fn translate_expr( } } +fn allocate_registers( + program: &mut ProgramBuilder, + args: &Option>, + referenced_tables: Option<&[BTreeTableReference]>, + precomputed_exprs_to_registers: Option<&Vec<(&ast::Expr, usize)>>, +) -> Result<()> { + let args = args.clone().unwrap_or_else(Vec::new); + + for arg in args.iter() { + let reg = program.alloc_register(); + translate_expr( + program, + referenced_tables, + arg, + reg, + precomputed_exprs_to_registers, + )?; + } + + Ok(()) +} + fn wrap_eval_jump_expr( program: &mut ProgramBuilder, insn: Insn, diff --git a/core/types.rs b/core/types.rs index 7024ef257..5f1b55d7b 100644 --- a/core/types.rs +++ b/core/types.rs @@ -27,24 +27,59 @@ impl<'a> Display for Value<'a> { } } +#[derive(Debug, Clone, PartialEq)] +pub enum TextSubtype { + Text, + Json, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct LimboText { + pub value: Rc, + pub subtype: TextSubtype, +} + +impl LimboText { + pub fn new(value: Rc) -> Self { + Self { + value, + subtype: TextSubtype::Text, + } + } + + pub fn json(value: Rc) -> Self { + Self { + value, + subtype: TextSubtype::Json, + } + } +} + #[derive(Debug, Clone, PartialEq)] pub enum OwnedValue { Null, Integer(i64), Float(f64), - Text(Rc), + Text(LimboText), Blob(Rc>), Agg(Box), // TODO(pere): make this without Box. Currently this might cause cache miss but let's leave it for future analysis Record(OwnedRecord), } +impl OwnedValue { + // A helper function that makes building a text OwnedValue easier. + pub fn build_text(text: Rc) -> Self { + OwnedValue::Text(LimboText::new(text)) + } +} + impl Display for OwnedValue { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { OwnedValue::Null => write!(f, "NULL"), OwnedValue::Integer(i) => write!(f, "{}", i), OwnedValue::Float(fl) => write!(f, "{:?}", fl), - OwnedValue::Text(s) => write!(f, "{}", s), + OwnedValue::Text(s) => write!(f, "{}", s.value), OwnedValue::Blob(b) => write!(f, "{}", String::from_utf8_lossy(b)), OwnedValue::Agg(a) => match a.as_ref() { AggContext::Avg(acc, _count) => write!(f, "{}", acc), @@ -111,7 +146,7 @@ impl PartialOrd for OwnedValue { ) => Some(std::cmp::Ordering::Greater), (OwnedValue::Text(text_left), OwnedValue::Text(text_right)) => { - text_left.partial_cmp(text_right) + text_left.value.partial_cmp(&text_right.value) } // Text vs Blob (OwnedValue::Text(_), OwnedValue::Blob(_)) => Some(std::cmp::Ordering::Less), @@ -171,21 +206,27 @@ impl std::ops::Add for OwnedValue { OwnedValue::Float(float_left + float_right) } (OwnedValue::Text(string_left), OwnedValue::Text(string_right)) => { - OwnedValue::Text(Rc::new(string_left.to_string() + &string_right.to_string())) + OwnedValue::build_text(Rc::new( + string_left.value.to_string() + &string_right.value.to_string(), + )) } (OwnedValue::Text(string_left), OwnedValue::Integer(int_right)) => { - OwnedValue::Text(Rc::new(string_left.to_string() + &int_right.to_string())) + OwnedValue::build_text(Rc::new( + string_left.value.to_string() + &int_right.to_string(), + )) } (OwnedValue::Integer(int_left), OwnedValue::Text(string_right)) => { - OwnedValue::Text(Rc::new(int_left.to_string() + &string_right.to_string())) + OwnedValue::build_text(Rc::new( + int_left.to_string() + &string_right.value.to_string(), + )) } (OwnedValue::Text(string_left), OwnedValue::Float(float_right)) => { let string_right = OwnedValue::Float(float_right).to_string(); - OwnedValue::Text(Rc::new(string_left.to_string() + &string_right)) + OwnedValue::build_text(Rc::new(string_left.value.to_string() + &string_right)) } (OwnedValue::Float(float_left), OwnedValue::Text(string_right)) => { let string_left = OwnedValue::Float(float_left).to_string(); - OwnedValue::Text(Rc::new(string_left + &string_right.to_string())) + OwnedValue::build_text(Rc::new(string_left + &string_right.value.to_string())) } (lhs, OwnedValue::Null) => lhs, (OwnedValue::Null, rhs) => rhs, @@ -269,7 +310,7 @@ pub fn to_value(value: &OwnedValue) -> Value<'_> { OwnedValue::Null => Value::Null, OwnedValue::Integer(i) => Value::Integer(*i), OwnedValue::Float(f) => Value::Float(*f), - OwnedValue::Text(s) => Value::Text(s), + OwnedValue::Text(s) => Value::Text(&s.value), OwnedValue::Blob(b) => Value::Blob(b), OwnedValue::Agg(a) => match a.as_ref() { AggContext::Avg(acc, _count) => match acc { @@ -359,7 +400,7 @@ impl OwnedRecord { OwnedValue::Null => 0, OwnedValue::Integer(_) => 6, // for now let's only do i64 OwnedValue::Float(_) => 7, - OwnedValue::Text(t) => (t.len() * 2 + 13) as u64, + OwnedValue::Text(t) => (t.value.len() * 2 + 13) as u64, OwnedValue::Blob(b) => (b.len() * 2 + 12) as u64, // not serializable values OwnedValue::Agg(_) => unreachable!(), @@ -380,7 +421,7 @@ impl OwnedRecord { OwnedValue::Null => {} OwnedValue::Integer(i) => buf.extend_from_slice(&i.to_be_bytes()), OwnedValue::Float(f) => buf.extend_from_slice(&f.to_be_bytes()), - OwnedValue::Text(t) => buf.extend_from_slice(t.as_bytes()), + OwnedValue::Text(t) => buf.extend_from_slice(t.value.as_bytes()), OwnedValue::Blob(b) => buf.extend_from_slice(b), // non serializable OwnedValue::Agg(_) => unreachable!(), diff --git a/core/vdbe/datetime.rs b/core/vdbe/datetime.rs index 86ad57677..7b9e49fd6 100644 --- a/core/vdbe/datetime.rs +++ b/core/vdbe/datetime.rs @@ -10,53 +10,53 @@ use crate::Result; /// Implementation of the date() SQL function. pub fn exec_date(values: &[OwnedValue]) -> OwnedValue { let maybe_dt = match values.first() { - None => parse_naive_date_time(&OwnedValue::Text(Rc::new("now".to_string()))), + None => parse_naive_date_time(&OwnedValue::build_text(Rc::new("now".to_string()))), Some(value) => parse_naive_date_time(value), }; // early return, no need to look at modifiers if result invalid if maybe_dt.is_none() { - return OwnedValue::Text(Rc::new(String::new())); + return OwnedValue::build_text(Rc::new(String::new())); } // apply modifiers if result is valid let mut dt = maybe_dt.unwrap(); for modifier in values.iter().skip(1) { if let OwnedValue::Text(modifier_str) = modifier { - if apply_modifier(&mut dt, modifier_str).is_err() { - return OwnedValue::Text(Rc::new(String::new())); + if apply_modifier(&mut dt, &modifier_str.value).is_err() { + return OwnedValue::build_text(Rc::new(String::new())); } } else { - return OwnedValue::Text(Rc::new(String::new())); + return OwnedValue::build_text(Rc::new(String::new())); } } - OwnedValue::Text(Rc::new(get_date_from_naive_datetime(dt))) + OwnedValue::build_text(Rc::new(get_date_from_naive_datetime(dt))) } /// Implementation of the time() SQL function. pub fn exec_time(time_value: &[OwnedValue]) -> OwnedValue { let maybe_dt = match time_value.first() { - None => parse_naive_date_time(&OwnedValue::Text(Rc::new("now".to_string()))), + None => parse_naive_date_time(&OwnedValue::build_text(Rc::new("now".to_string()))), Some(value) => parse_naive_date_time(value), }; // early return, no need to look at modifiers if result invalid if maybe_dt.is_none() { - return OwnedValue::Text(Rc::new(String::new())); + return OwnedValue::build_text(Rc::new(String::new())); } // apply modifiers if result is valid let mut dt = maybe_dt.unwrap(); for modifier in time_value.iter().skip(1) { if let OwnedValue::Text(modifier_str) = modifier { - if apply_modifier(&mut dt, modifier_str).is_err() { - return OwnedValue::Text(Rc::new(String::new())); + if apply_modifier(&mut dt, &modifier_str.value).is_err() { + return OwnedValue::build_text(Rc::new(String::new())); } } else { - return OwnedValue::Text(Rc::new(String::new())); + return OwnedValue::build_text(Rc::new(String::new())); } } - OwnedValue::Text(Rc::new(get_time_from_naive_datetime(dt))) + OwnedValue::build_text(Rc::new(get_time_from_naive_datetime(dt))) } fn apply_modifier(dt: &mut NaiveDateTime, modifier: &str) -> Result<()> { @@ -125,7 +125,7 @@ fn get_unixepoch_from_naive_datetime(value: NaiveDateTime) -> String { fn parse_naive_date_time(time_value: &OwnedValue) -> Option { match time_value { - OwnedValue::Text(s) => get_date_time_from_time_value_string(s), + OwnedValue::Text(s) => get_date_time_from_time_value_string(&s.value), OwnedValue::Integer(i) => get_date_time_from_time_value_integer(*i), OwnedValue::Float(f) => get_date_time_from_time_value_float(*f), _ => None, @@ -410,197 +410,200 @@ mod tests { let test_cases = vec![ // Format 1: YYYY-MM-DD (no timezone applicable) ( - OwnedValue::Text(Rc::new("2024-07-21".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21".to_string())), test_date_str, ), // Format 2: YYYY-MM-DD HH:MM ( - OwnedValue::Text(Rc::new("2024-07-21 22:30".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21 22:30".to_string())), test_date_str, ), ( - OwnedValue::Text(Rc::new("2024-07-21 22:30+02:00".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21 22:30+02:00".to_string())), test_date_str, ), ( - OwnedValue::Text(Rc::new("2024-07-21 22:30-05:00".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21 22:30-05:00".to_string())), next_date_str, ), ( - OwnedValue::Text(Rc::new("2024-07-21 01:30+05:00".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21 01:30+05:00".to_string())), prev_date_str, ), ( - OwnedValue::Text(Rc::new("2024-07-21 22:30Z".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21 22:30Z".to_string())), test_date_str, ), // Format 3: YYYY-MM-DD HH:MM:SS ( - OwnedValue::Text(Rc::new("2024-07-21 22:30:45".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21 22:30:45".to_string())), test_date_str, ), ( - OwnedValue::Text(Rc::new("2024-07-21 22:30:45+02:00".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21 22:30:45+02:00".to_string())), test_date_str, ), ( - OwnedValue::Text(Rc::new("2024-07-21 22:30:45-05:00".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21 22:30:45-05:00".to_string())), next_date_str, ), ( - OwnedValue::Text(Rc::new("2024-07-21 01:30:45+05:00".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21 01:30:45+05:00".to_string())), prev_date_str, ), ( - OwnedValue::Text(Rc::new("2024-07-21 22:30:45Z".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21 22:30:45Z".to_string())), test_date_str, ), // Format 4: YYYY-MM-DD HH:MM:SS.SSS ( - OwnedValue::Text(Rc::new("2024-07-21 22:30:45.123".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21 22:30:45.123".to_string())), test_date_str, ), ( - OwnedValue::Text(Rc::new("2024-07-21 22:30:45.123+02:00".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21 22:30:45.123+02:00".to_string())), test_date_str, ), ( - OwnedValue::Text(Rc::new("2024-07-21 22:30:45.123-05:00".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21 22:30:45.123-05:00".to_string())), next_date_str, ), ( - OwnedValue::Text(Rc::new("2024-07-21 01:30:45.123+05:00".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21 01:30:45.123+05:00".to_string())), prev_date_str, ), ( - OwnedValue::Text(Rc::new("2024-07-21 22:30:45.123Z".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21 22:30:45.123Z".to_string())), test_date_str, ), // Format 5: YYYY-MM-DDTHH:MM ( - OwnedValue::Text(Rc::new("2024-07-21T22:30".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21T22:30".to_string())), test_date_str, ), ( - OwnedValue::Text(Rc::new("2024-07-21T22:30+02:00".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21T22:30+02:00".to_string())), test_date_str, ), ( - OwnedValue::Text(Rc::new("2024-07-21T22:30-05:00".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21T22:30-05:00".to_string())), next_date_str, ), ( - OwnedValue::Text(Rc::new("2024-07-21T01:30+05:00".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21T01:30+05:00".to_string())), prev_date_str, ), ( - OwnedValue::Text(Rc::new("2024-07-21T22:30Z".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21T22:30Z".to_string())), test_date_str, ), // Format 6: YYYY-MM-DDTHH:MM:SS ( - OwnedValue::Text(Rc::new("2024-07-21T22:30:45".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21T22:30:45".to_string())), test_date_str, ), ( - OwnedValue::Text(Rc::new("2024-07-21T22:30:45+02:00".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21T22:30:45+02:00".to_string())), test_date_str, ), ( - OwnedValue::Text(Rc::new("2024-07-21T22:30:45-05:00".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21T22:30:45-05:00".to_string())), next_date_str, ), ( - OwnedValue::Text(Rc::new("2024-07-21T01:30:45+05:00".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21T01:30:45+05:00".to_string())), prev_date_str, ), ( - OwnedValue::Text(Rc::new("2024-07-21T22:30:45Z".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21T22:30:45Z".to_string())), test_date_str, ), // Format 7: YYYY-MM-DDTHH:MM:SS.SSS ( - OwnedValue::Text(Rc::new("2024-07-21T22:30:45.123".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21T22:30:45.123".to_string())), test_date_str, ), ( - OwnedValue::Text(Rc::new("2024-07-21T22:30:45.123+02:00".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21T22:30:45.123+02:00".to_string())), test_date_str, ), ( - OwnedValue::Text(Rc::new("2024-07-21T22:30:45.123-05:00".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21T22:30:45.123-05:00".to_string())), next_date_str, ), ( - OwnedValue::Text(Rc::new("2024-07-21T01:30:45.123+05:00".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21T01:30:45.123+05:00".to_string())), prev_date_str, ), ( - OwnedValue::Text(Rc::new("2024-07-21T22:30:45.123Z".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21T22:30:45.123Z".to_string())), test_date_str, ), // Format 8: HH:MM - (OwnedValue::Text(Rc::new("22:30".to_string())), "2000-01-01"), ( - OwnedValue::Text(Rc::new("22:30+02:00".to_string())), + OwnedValue::build_text(Rc::new("22:30".to_string())), "2000-01-01", ), ( - OwnedValue::Text(Rc::new("22:30-05:00".to_string())), + OwnedValue::build_text(Rc::new("22:30+02:00".to_string())), + "2000-01-01", + ), + ( + OwnedValue::build_text(Rc::new("22:30-05:00".to_string())), "2000-01-02", ), ( - OwnedValue::Text(Rc::new("01:30+05:00".to_string())), + OwnedValue::build_text(Rc::new("01:30+05:00".to_string())), "1999-12-31", ), ( - OwnedValue::Text(Rc::new("22:30Z".to_string())), + OwnedValue::build_text(Rc::new("22:30Z".to_string())), "2000-01-01", ), // Format 9: HH:MM:SS ( - OwnedValue::Text(Rc::new("22:30:45".to_string())), + OwnedValue::build_text(Rc::new("22:30:45".to_string())), "2000-01-01", ), ( - OwnedValue::Text(Rc::new("22:30:45+02:00".to_string())), + OwnedValue::build_text(Rc::new("22:30:45+02:00".to_string())), "2000-01-01", ), ( - OwnedValue::Text(Rc::new("22:30:45-05:00".to_string())), + OwnedValue::build_text(Rc::new("22:30:45-05:00".to_string())), "2000-01-02", ), ( - OwnedValue::Text(Rc::new("01:30:45+05:00".to_string())), + OwnedValue::build_text(Rc::new("01:30:45+05:00".to_string())), "1999-12-31", ), ( - OwnedValue::Text(Rc::new("22:30:45Z".to_string())), + OwnedValue::build_text(Rc::new("22:30:45Z".to_string())), "2000-01-01", ), // Format 10: HH:MM:SS.SSS ( - OwnedValue::Text(Rc::new("22:30:45.123".to_string())), + OwnedValue::build_text(Rc::new("22:30:45.123".to_string())), "2000-01-01", ), ( - OwnedValue::Text(Rc::new("22:30:45.123+02:00".to_string())), + OwnedValue::build_text(Rc::new("22:30:45.123+02:00".to_string())), "2000-01-01", ), ( - OwnedValue::Text(Rc::new("22:30:45.123-05:00".to_string())), + OwnedValue::build_text(Rc::new("22:30:45.123-05:00".to_string())), "2000-01-02", ), ( - OwnedValue::Text(Rc::new("01:30:45.123+05:00".to_string())), + OwnedValue::build_text(Rc::new("01:30:45.123+05:00".to_string())), "1999-12-31", ), ( - OwnedValue::Text(Rc::new("22:30:45.123Z".to_string())), + OwnedValue::build_text(Rc::new("22:30:45.123Z".to_string())), "2000-01-01", ), // Test Format 11: 'now' - (OwnedValue::Text(Rc::new("now".to_string())), &now), + (OwnedValue::build_text(Rc::new("now".to_string())), &now), // Format 12: DDDDDDDDDD (Julian date as float or integer) (OwnedValue::Float(2460512.5), test_date_str), (OwnedValue::Integer(2460513), test_date_str), @@ -610,7 +613,7 @@ mod tests { let result = exec_date(&[input.clone()]); assert_eq!( result, - OwnedValue::Text(Rc::new(expected.to_string())), + OwnedValue::build_text(Rc::new(expected.to_string())), "Failed for input: {:?}", input ); @@ -620,37 +623,37 @@ mod tests { #[test] fn test_invalid_get_date_from_time_value() { let invalid_cases = vec![ - OwnedValue::Text(Rc::new("2024-07-21 25:00".to_string())), // Invalid hour - OwnedValue::Text(Rc::new("2024-07-21 24:00:00".to_string())), // Invalid hour - OwnedValue::Text(Rc::new("2024-07-21 23:60:00".to_string())), // Invalid minute - OwnedValue::Text(Rc::new("2024-07-21 22:58:60".to_string())), // Invalid second - OwnedValue::Text(Rc::new("2024-07-32".to_string())), // Invalid day - OwnedValue::Text(Rc::new("2024-13-01".to_string())), // Invalid month - OwnedValue::Text(Rc::new("invalid_date".to_string())), // Completely invalid string - OwnedValue::Text(Rc::new("".to_string())), // Empty string - OwnedValue::Integer(i64::MAX), // Large Julian day - OwnedValue::Integer(-1), // Negative Julian day - OwnedValue::Float(f64::MAX), // Large float + OwnedValue::build_text(Rc::new("2024-07-21 25:00".to_string())), // Invalid hour + OwnedValue::build_text(Rc::new("2024-07-21 24:00:00".to_string())), // Invalid hour + OwnedValue::build_text(Rc::new("2024-07-21 23:60:00".to_string())), // Invalid minute + OwnedValue::build_text(Rc::new("2024-07-21 22:58:60".to_string())), // Invalid second + OwnedValue::build_text(Rc::new("2024-07-32".to_string())), // Invalid day + OwnedValue::build_text(Rc::new("2024-13-01".to_string())), // Invalid month + OwnedValue::build_text(Rc::new("invalid_date".to_string())), // Completely invalid string + OwnedValue::build_text(Rc::new("".to_string())), // Empty string + OwnedValue::Integer(i64::MAX), // Large Julian day + OwnedValue::Integer(-1), // Negative Julian day + OwnedValue::Float(f64::MAX), // Large float OwnedValue::Float(-1.0), // Negative Julian day as float OwnedValue::Float(f64::NAN), // NaN OwnedValue::Float(f64::INFINITY), // Infinity OwnedValue::Null, // Null value OwnedValue::Blob(vec![1, 2, 3].into()), // Blob (unsupported type) // Invalid timezone tests - OwnedValue::Text(Rc::new("2024-07-21T12:00:00+24:00".to_string())), // Invalid timezone offset (too large) - OwnedValue::Text(Rc::new("2024-07-21T12:00:00-24:00".to_string())), // Invalid timezone offset (too small) - OwnedValue::Text(Rc::new("2024-07-21T12:00:00+00:60".to_string())), // Invalid timezone minutes - OwnedValue::Text(Rc::new("2024-07-21T12:00:00+00:00:00".to_string())), // Invalid timezone format (extra seconds) - OwnedValue::Text(Rc::new("2024-07-21T12:00:00+".to_string())), // Incomplete timezone - OwnedValue::Text(Rc::new("2024-07-21T12:00:00+Z".to_string())), // Invalid timezone format - OwnedValue::Text(Rc::new("2024-07-21T12:00:00+00:00Z".to_string())), // Mixing offset and Z - OwnedValue::Text(Rc::new("2024-07-21T12:00:00UTC".to_string())), // Named timezone (not supported) + OwnedValue::build_text(Rc::new("2024-07-21T12:00:00+24:00".to_string())), // Invalid timezone offset (too large) + OwnedValue::build_text(Rc::new("2024-07-21T12:00:00-24:00".to_string())), // Invalid timezone offset (too small) + OwnedValue::build_text(Rc::new("2024-07-21T12:00:00+00:60".to_string())), // Invalid timezone minutes + OwnedValue::build_text(Rc::new("2024-07-21T12:00:00+00:00:00".to_string())), // Invalid timezone format (extra seconds) + OwnedValue::build_text(Rc::new("2024-07-21T12:00:00+".to_string())), // Incomplete timezone + OwnedValue::build_text(Rc::new("2024-07-21T12:00:00+Z".to_string())), // Invalid timezone format + OwnedValue::build_text(Rc::new("2024-07-21T12:00:00+00:00Z".to_string())), // Mixing offset and Z + OwnedValue::build_text(Rc::new("2024-07-21T12:00:00UTC".to_string())), // Named timezone (not supported) ]; for case in invalid_cases.iter() { let result = exec_date(&[case.clone()]); match result { - OwnedValue::Text(ref result_str) if result_str.is_empty() => (), + OwnedValue::Text(ref result_str) if result_str.value.is_empty() => (), _ => panic!( "Expected empty string for input: {:?}, but got: {:?}", case, result @@ -670,158 +673,164 @@ mod tests { let test_cases = vec![ // Format 1: YYYY-MM-DD (no timezone applicable) ( - OwnedValue::Text(Rc::new("2024-07-21".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21".to_string())), "00:00:00", ), // Format 2: YYYY-MM-DD HH:MM ( - OwnedValue::Text(Rc::new("2024-07-21 22:30".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21 22:30".to_string())), "22:30:00", ), ( - OwnedValue::Text(Rc::new("2024-07-21 22:30+02:00".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21 22:30+02:00".to_string())), "20:30:00", ), ( - OwnedValue::Text(Rc::new("2024-07-21 22:30-05:00".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21 22:30-05:00".to_string())), "03:30:00", ), ( - OwnedValue::Text(Rc::new("2024-07-21 22:30Z".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21 22:30Z".to_string())), "22:30:00", ), // Format 3: YYYY-MM-DD HH:MM:SS ( - OwnedValue::Text(Rc::new("2024-07-21 22:30:45".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21 22:30:45".to_string())), test_time_str, ), ( - OwnedValue::Text(Rc::new("2024-07-21 22:30:45+02:00".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21 22:30:45+02:00".to_string())), prev_time_str, ), ( - OwnedValue::Text(Rc::new("2024-07-21 22:30:45-05:00".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21 22:30:45-05:00".to_string())), next_time_str, ), ( - OwnedValue::Text(Rc::new("2024-07-21 22:30:45Z".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21 22:30:45Z".to_string())), test_time_str, ), // Format 4: YYYY-MM-DD HH:MM:SS.SSS ( - OwnedValue::Text(Rc::new("2024-07-21 22:30:45.123".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21 22:30:45.123".to_string())), test_time_str, ), ( - OwnedValue::Text(Rc::new("2024-07-21 22:30:45.123+02:00".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21 22:30:45.123+02:00".to_string())), prev_time_str, ), ( - OwnedValue::Text(Rc::new("2024-07-21 22:30:45.123-05:00".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21 22:30:45.123-05:00".to_string())), next_time_str, ), ( - OwnedValue::Text(Rc::new("2024-07-21 22:30:45.123Z".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21 22:30:45.123Z".to_string())), test_time_str, ), // Format 5: YYYY-MM-DDTHH:MM ( - OwnedValue::Text(Rc::new("2024-07-21T22:30".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21T22:30".to_string())), "22:30:00", ), ( - OwnedValue::Text(Rc::new("2024-07-21T22:30+02:00".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21T22:30+02:00".to_string())), "20:30:00", ), ( - OwnedValue::Text(Rc::new("2024-07-21T22:30-05:00".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21T22:30-05:00".to_string())), "03:30:00", ), ( - OwnedValue::Text(Rc::new("2024-07-21T22:30Z".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21T22:30Z".to_string())), "22:30:00", ), // Format 6: YYYY-MM-DDTHH:MM:SS ( - OwnedValue::Text(Rc::new("2024-07-21T22:30:45".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21T22:30:45".to_string())), test_time_str, ), ( - OwnedValue::Text(Rc::new("2024-07-21T22:30:45+02:00".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21T22:30:45+02:00".to_string())), prev_time_str, ), ( - OwnedValue::Text(Rc::new("2024-07-21T22:30:45-05:00".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21T22:30:45-05:00".to_string())), next_time_str, ), ( - OwnedValue::Text(Rc::new("2024-07-21T22:30:45Z".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21T22:30:45Z".to_string())), test_time_str, ), // Format 7: YYYY-MM-DDTHH:MM:SS.SSS ( - OwnedValue::Text(Rc::new("2024-07-21T22:30:45.123".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21T22:30:45.123".to_string())), test_time_str, ), ( - OwnedValue::Text(Rc::new("2024-07-21T22:30:45.123+02:00".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21T22:30:45.123+02:00".to_string())), prev_time_str, ), ( - OwnedValue::Text(Rc::new("2024-07-21T22:30:45.123-05:00".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21T22:30:45.123-05:00".to_string())), next_time_str, ), ( - OwnedValue::Text(Rc::new("2024-07-21T22:30:45.123Z".to_string())), + OwnedValue::build_text(Rc::new("2024-07-21T22:30:45.123Z".to_string())), test_time_str, ), // Format 8: HH:MM - (OwnedValue::Text(Rc::new("22:30".to_string())), "22:30:00"), ( - OwnedValue::Text(Rc::new("22:30+02:00".to_string())), + OwnedValue::build_text(Rc::new("22:30".to_string())), + "22:30:00", + ), + ( + OwnedValue::build_text(Rc::new("22:30+02:00".to_string())), "20:30:00", ), ( - OwnedValue::Text(Rc::new("22:30-05:00".to_string())), + OwnedValue::build_text(Rc::new("22:30-05:00".to_string())), "03:30:00", ), - (OwnedValue::Text(Rc::new("22:30Z".to_string())), "22:30:00"), + ( + OwnedValue::build_text(Rc::new("22:30Z".to_string())), + "22:30:00", + ), // Format 9: HH:MM:SS ( - OwnedValue::Text(Rc::new("22:30:45".to_string())), + OwnedValue::build_text(Rc::new("22:30:45".to_string())), test_time_str, ), ( - OwnedValue::Text(Rc::new("22:30:45+02:00".to_string())), + OwnedValue::build_text(Rc::new("22:30:45+02:00".to_string())), prev_time_str, ), ( - OwnedValue::Text(Rc::new("22:30:45-05:00".to_string())), + OwnedValue::build_text(Rc::new("22:30:45-05:00".to_string())), next_time_str, ), ( - OwnedValue::Text(Rc::new("22:30:45Z".to_string())), + OwnedValue::build_text(Rc::new("22:30:45Z".to_string())), test_time_str, ), // Format 10: HH:MM:SS.SSS ( - OwnedValue::Text(Rc::new("22:30:45.123".to_string())), + OwnedValue::build_text(Rc::new("22:30:45.123".to_string())), test_time_str, ), ( - OwnedValue::Text(Rc::new("22:30:45.123+02:00".to_string())), + OwnedValue::build_text(Rc::new("22:30:45.123+02:00".to_string())), prev_time_str, ), ( - OwnedValue::Text(Rc::new("22:30:45.123-05:00".to_string())), + OwnedValue::build_text(Rc::new("22:30:45.123-05:00".to_string())), next_time_str, ), ( - OwnedValue::Text(Rc::new("22:30:45.123Z".to_string())), + OwnedValue::build_text(Rc::new("22:30:45.123Z".to_string())), test_time_str, ), // Test Format 11: 'now' - (OwnedValue::Text(Rc::new("now".to_string())), &now), + (OwnedValue::build_text(Rc::new("now".to_string())), &now), // Format 12: DDDDDDDDDD (Julian date as float or integer) (OwnedValue::Float(2460082.1), "14:24:00"), (OwnedValue::Integer(2460082), "12:00:00"), @@ -830,7 +839,7 @@ mod tests { for (input, expected) in test_cases { let result = exec_time(&[input]); if let OwnedValue::Text(result_str) = result { - assert_eq!(result_str.as_str(), expected); + assert_eq!(result_str.value.as_str(), expected); } else { panic!("Expected OwnedValue::Text, but got: {:?}", result); } @@ -840,37 +849,37 @@ mod tests { #[test] fn test_invalid_get_time_from_datetime_value() { let invalid_cases = vec![ - OwnedValue::Text(Rc::new("2024-07-21 25:00".to_string())), // Invalid hour - OwnedValue::Text(Rc::new("2024-07-21 24:00:00".to_string())), // Invalid hour - OwnedValue::Text(Rc::new("2024-07-21 23:60:00".to_string())), // Invalid minute - OwnedValue::Text(Rc::new("2024-07-21 22:58:60".to_string())), // Invalid second - OwnedValue::Text(Rc::new("2024-07-32".to_string())), // Invalid day - OwnedValue::Text(Rc::new("2024-13-01".to_string())), // Invalid month - OwnedValue::Text(Rc::new("invalid_date".to_string())), // Completely invalid string - OwnedValue::Text(Rc::new("".to_string())), // Empty string - OwnedValue::Integer(i64::MAX), // Large Julian day - OwnedValue::Integer(-1), // Negative Julian day - OwnedValue::Float(f64::MAX), // Large float + OwnedValue::build_text(Rc::new("2024-07-21 25:00".to_string())), // Invalid hour + OwnedValue::build_text(Rc::new("2024-07-21 24:00:00".to_string())), // Invalid hour + OwnedValue::build_text(Rc::new("2024-07-21 23:60:00".to_string())), // Invalid minute + OwnedValue::build_text(Rc::new("2024-07-21 22:58:60".to_string())), // Invalid second + OwnedValue::build_text(Rc::new("2024-07-32".to_string())), // Invalid day + OwnedValue::build_text(Rc::new("2024-13-01".to_string())), // Invalid month + OwnedValue::build_text(Rc::new("invalid_date".to_string())), // Completely invalid string + OwnedValue::build_text(Rc::new("".to_string())), // Empty string + OwnedValue::Integer(i64::MAX), // Large Julian day + OwnedValue::Integer(-1), // Negative Julian day + OwnedValue::Float(f64::MAX), // Large float OwnedValue::Float(-1.0), // Negative Julian day as float OwnedValue::Float(f64::NAN), // NaN OwnedValue::Float(f64::INFINITY), // Infinity OwnedValue::Null, // Null value OwnedValue::Blob(vec![1, 2, 3].into()), // Blob (unsupported type) // Invalid timezone tests - OwnedValue::Text(Rc::new("2024-07-21T12:00:00+24:00".to_string())), // Invalid timezone offset (too large) - OwnedValue::Text(Rc::new("2024-07-21T12:00:00-24:00".to_string())), // Invalid timezone offset (too small) - OwnedValue::Text(Rc::new("2024-07-21T12:00:00+00:60".to_string())), // Invalid timezone minutes - OwnedValue::Text(Rc::new("2024-07-21T12:00:00+00:00:00".to_string())), // Invalid timezone format (extra seconds) - OwnedValue::Text(Rc::new("2024-07-21T12:00:00+".to_string())), // Incomplete timezone - OwnedValue::Text(Rc::new("2024-07-21T12:00:00+Z".to_string())), // Invalid timezone format - OwnedValue::Text(Rc::new("2024-07-21T12:00:00+00:00Z".to_string())), // Mixing offset and Z - OwnedValue::Text(Rc::new("2024-07-21T12:00:00UTC".to_string())), // Named timezone (not supported) + OwnedValue::build_text(Rc::new("2024-07-21T12:00:00+24:00".to_string())), // Invalid timezone offset (too large) + OwnedValue::build_text(Rc::new("2024-07-21T12:00:00-24:00".to_string())), // Invalid timezone offset (too small) + OwnedValue::build_text(Rc::new("2024-07-21T12:00:00+00:60".to_string())), // Invalid timezone minutes + OwnedValue::build_text(Rc::new("2024-07-21T12:00:00+00:00:00".to_string())), // Invalid timezone format (extra seconds) + OwnedValue::build_text(Rc::new("2024-07-21T12:00:00+".to_string())), // Incomplete timezone + OwnedValue::build_text(Rc::new("2024-07-21T12:00:00+Z".to_string())), // Invalid timezone format + OwnedValue::build_text(Rc::new("2024-07-21T12:00:00+00:00Z".to_string())), // Mixing offset and Z + OwnedValue::build_text(Rc::new("2024-07-21T12:00:00UTC".to_string())), // Named timezone (not supported) ]; for case in invalid_cases { let result = exec_time(&[case.clone()]); match result { - OwnedValue::Text(ref result_str) if result_str.is_empty() => (), + OwnedValue::Text(ref result_str) if result_str.value.is_empty() => (), _ => panic!( "Expected empty string for input: {:?}, but got: {:?}", case, result diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index c88ac8b91..ce03a53fd 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -1,4 +1,5 @@ use super::{Insn, InsnReference, OwnedValue, Program}; +use crate::types::LimboText; use std::rc::Rc; pub fn insn_to_str( @@ -15,7 +16,7 @@ pub fn insn_to_str( 0, *target_pc as i32, 0, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, format!("Start at {}", target_pc), ), @@ -24,7 +25,7 @@ pub fn insn_to_str( *lhs as i32, *rhs as i32, *dest as i32, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, format!("r[{}]=r[{}]+r[{}]", dest, lhs, rhs), ), @@ -33,7 +34,7 @@ pub fn insn_to_str( *lhs as i32, *rhs as i32, *dest as i32, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, format!("r[{}]=r[{}]-r[{}]", dest, lhs, rhs), ), @@ -42,7 +43,7 @@ pub fn insn_to_str( *lhs as i32, *rhs as i32, *dest as i32, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, format!("r[{}]=r[{}]*r[{}]", dest, lhs, rhs), ), @@ -51,7 +52,7 @@ pub fn insn_to_str( *lhs as i32, *rhs as i32, *dest as i32, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, format!("r[{}]=r[{}]/r[{}]", dest, lhs, rhs), ), @@ -60,7 +61,7 @@ pub fn insn_to_str( *lhs as i32, *rhs as i32, *dest as i32, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, format!("r[{}]=r[{}]&r[{}]", dest, lhs, rhs), ), @@ -69,7 +70,7 @@ pub fn insn_to_str( *lhs as i32, *rhs as i32, *dest as i32, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, format!("r[{}]=r[{}]|r[{}]", dest, lhs, rhs), ), @@ -78,7 +79,7 @@ pub fn insn_to_str( *reg as i32, *dest as i32, 0, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, format!("r[{}]=~r[{}]", dest, reg), ), @@ -87,7 +88,7 @@ pub fn insn_to_str( 0, *dest as i32, dest_end.map_or(0, |end| end as i32), - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, dest_end.map_or(format!("r[{}]=NULL", dest), |end| { format!("r[{}..{}]=NULL", dest, end) @@ -98,7 +99,7 @@ pub fn insn_to_str( *cursor_id as i32, 0, 0, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, format!("Set cursor {} to a (pseudo) NULL row", cursor_id), ), @@ -107,7 +108,7 @@ pub fn insn_to_str( *reg as i32, *target_pc as i32, 0, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, format!("r[{}]!=NULL -> goto {}", reg, target_pc), ), @@ -120,7 +121,7 @@ pub fn insn_to_str( *start_reg_a as i32, *start_reg_b as i32, *count as i32, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, format!( "r[{}..{}]==r[{}..{}]", @@ -139,7 +140,7 @@ pub fn insn_to_str( *target_pc_lt as i32, *target_pc_eq as i32, *target_pc_gt as i32, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, "".to_string(), ), @@ -152,7 +153,7 @@ pub fn insn_to_str( *source_reg as i32, *dest_reg as i32, *count as i32, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, format!( "r[{}..{}]=r[{}..{}]", @@ -171,7 +172,7 @@ pub fn insn_to_str( *reg as i32, *target_pc as i32, 0, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, format!( "r[{}]>0 -> r[{}]-={}, goto {}", @@ -187,7 +188,7 @@ pub fn insn_to_str( *lhs as i32, *rhs as i32, *target_pc as i32, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, format!("if r[{}]==r[{}] goto {}", lhs, rhs, target_pc), ), @@ -200,7 +201,7 @@ pub fn insn_to_str( *lhs as i32, *rhs as i32, *target_pc as i32, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, format!("if r[{}]!=r[{}] goto {}", lhs, rhs, target_pc), ), @@ -213,7 +214,7 @@ pub fn insn_to_str( *lhs as i32, *rhs as i32, *target_pc as i32, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, format!("if r[{}]r[{}] goto {}", lhs, rhs, target_pc), ), @@ -252,7 +253,7 @@ pub fn insn_to_str( *lhs as i32, *rhs as i32, *target_pc as i32, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, format!("if r[{}]>=r[{}] goto {}", lhs, rhs, target_pc), ), @@ -265,7 +266,7 @@ pub fn insn_to_str( *reg as i32, *target_pc as i32, *null_reg as i32, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, format!("if r[{}] goto {}", reg, target_pc), ), @@ -278,7 +279,7 @@ pub fn insn_to_str( *reg as i32, *target_pc as i32, *null_reg as i32, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, format!("if !r[{}] goto {}", reg, target_pc), ), @@ -290,7 +291,7 @@ pub fn insn_to_str( *cursor_id as i32, *root_page as i32, 0, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, format!( "table={}, root={}", @@ -306,7 +307,7 @@ pub fn insn_to_str( 0, 0, 0, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, "".to_string(), ), @@ -319,7 +320,7 @@ pub fn insn_to_str( *cursor_id as i32, *content_reg as i32, *num_fields as i32, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, format!("{} columns in r[{}]", num_fields, content_reg), ), @@ -328,7 +329,7 @@ pub fn insn_to_str( *cursor_id as i32, 0, 0, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, "".to_string(), ), @@ -340,7 +341,7 @@ pub fn insn_to_str( *cursor_id as i32, *pc_if_empty as i32, 0, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, format!( "Rewind table {}", @@ -361,7 +362,7 @@ pub fn insn_to_str( *cursor_id as i32, *column as i32, *dest as i32, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, format!( "r[{}]={}.{}", @@ -385,7 +386,7 @@ pub fn insn_to_str( *start_reg as i32, *count as i32, *dest_reg as i32, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, format!( "r[{}]=mkrec(r[{}..{}])", @@ -399,7 +400,7 @@ pub fn insn_to_str( *start_reg as i32, *count as i32, 0, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, if *count == 1 { format!("output=r[{}]", start_reg) @@ -412,7 +413,7 @@ pub fn insn_to_str( *cursor_id as i32, 0, 0, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, "".to_string(), ), @@ -424,7 +425,7 @@ pub fn insn_to_str( *cursor_id as i32, *pc_if_next as i32, 0, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, "".to_string(), ), @@ -436,7 +437,7 @@ pub fn insn_to_str( *err_code as i32, 0, 0, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, "".to_string(), ), @@ -445,7 +446,7 @@ pub fn insn_to_str( 0, *write as i32, 0, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, "".to_string(), ), @@ -454,7 +455,7 @@ pub fn insn_to_str( 0, *target_pc as i32, 0, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, "".to_string(), ), @@ -466,7 +467,7 @@ pub fn insn_to_str( *return_reg as i32, *target_pc as i32, 0, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, "".to_string(), ), @@ -475,7 +476,7 @@ pub fn insn_to_str( *return_reg as i32, 0, 0, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, "".to_string(), ), @@ -484,7 +485,7 @@ pub fn insn_to_str( *value as i32, *dest as i32, 0, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, format!("r[{}]={}", dest, value), ), @@ -502,7 +503,7 @@ pub fn insn_to_str( *register as i32, 0, 0, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, "".to_string(), ), @@ -511,7 +512,7 @@ pub fn insn_to_str( 0, *dest as i32, 0, - OwnedValue::Text(Rc::new(value.clone())), + OwnedValue::build_text(Rc::new(value.clone())), 0, format!("r[{}]='{}'", dest, value), ), @@ -534,7 +535,7 @@ pub fn insn_to_str( *cursor_id as i32, *dest as i32, 0, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, format!( "r[{}]={}.rowid", @@ -554,7 +555,7 @@ pub fn insn_to_str( *cursor_id as i32, *src_reg as i32, *target_pc as i32, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, format!( "if (r[{}]!={}.rowid) goto {}", @@ -574,7 +575,7 @@ pub fn insn_to_str( *index_cursor_id as i32, *table_cursor_id as i32, 0, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, "".to_string(), ), @@ -589,7 +590,7 @@ pub fn insn_to_str( *cursor_id as i32, *target_pc as i32, *start_reg as i32, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, "".to_string(), ), @@ -604,7 +605,7 @@ pub fn insn_to_str( *cursor_id as i32, *target_pc as i32, *start_reg as i32, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, "".to_string(), ), @@ -618,7 +619,7 @@ pub fn insn_to_str( *cursor_id as i32, *target_pc as i32, *start_reg as i32, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, "".to_string(), ), @@ -632,7 +633,7 @@ pub fn insn_to_str( *cursor_id as i32, *target_pc as i32, *start_reg as i32, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, "".to_string(), ), @@ -641,7 +642,7 @@ pub fn insn_to_str( *reg as i32, *target_pc as i32, 0, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, format!("if (--r[{}]==0) goto {}", reg, target_pc), ), @@ -655,7 +656,7 @@ pub fn insn_to_str( 0, *col as i32, *acc_reg as i32, - OwnedValue::Text(Rc::new(func.to_string().into())), + OwnedValue::build_text(Rc::new(func.to_string().into())), 0, format!("accum=r[{}] step(r[{}])", *acc_reg, *col), ), @@ -664,7 +665,7 @@ pub fn insn_to_str( 0, *register as i32, 0, - OwnedValue::Text(Rc::new(func.to_string().into())), + OwnedValue::build_text(Rc::new(func.to_string().into())), 0, format!("accum=r[{}]", *register), ), @@ -693,7 +694,7 @@ pub fn insn_to_str( *cursor_id as i32, *columns as i32, 0, - OwnedValue::Text(Rc::new(format!( + OwnedValue::build_text(Rc::new(format!( "k({},{})", order.values.len(), to_print.join(",") @@ -711,7 +712,7 @@ pub fn insn_to_str( *cursor_id as i32, *dest_reg as i32, *pseudo_cursor as i32, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, format!("r[{}]=data", dest_reg), ), @@ -735,7 +736,7 @@ pub fn insn_to_str( *cursor_id as i32, *pc_if_empty as i32, 0, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, "".to_string(), ), @@ -747,7 +748,7 @@ pub fn insn_to_str( *cursor_id as i32, *pc_if_next as i32, 0, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, "".to_string(), ), @@ -761,7 +762,7 @@ pub fn insn_to_str( *constant_mask, *start_reg as i32, *dest as i32, - OwnedValue::Text(Rc::new(func.func.to_string())), + OwnedValue::build_text(Rc::new(func.func.to_string())), 0, if func.arg_count == 0 { format!("r[{}]=func()", dest) @@ -785,7 +786,7 @@ pub fn insn_to_str( *yield_reg as i32, *jump_on_definition as i32, *start_offset as i32, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, "".to_string(), ), @@ -794,7 +795,7 @@ pub fn insn_to_str( *yield_reg as i32, 0, 0, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, "".to_string(), ), @@ -806,7 +807,7 @@ pub fn insn_to_str( *yield_reg as i32, *end_offset as i32, 0, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, "".to_string(), ), @@ -820,7 +821,7 @@ pub fn insn_to_str( *cursor as i32, *record_reg as i32, *key_reg as i32, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), *flag as u16, "".to_string(), ), @@ -829,7 +830,7 @@ pub fn insn_to_str( *cursor_id as i32, 0, 0, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, "".to_string(), ), @@ -842,7 +843,7 @@ pub fn insn_to_str( *cursor as i32, *rowid_reg as i32, *prev_largest_reg as i32, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, "".to_string(), ), @@ -851,7 +852,7 @@ pub fn insn_to_str( *reg as i32, 0, 0, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, "".to_string(), ), @@ -860,7 +861,7 @@ pub fn insn_to_str( *reg as i32, 0, 0, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, "".to_string(), ), @@ -873,7 +874,7 @@ pub fn insn_to_str( *cursor as i32, *target_pc as i32, *rowid_reg as i32, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, "".to_string(), ), @@ -885,7 +886,7 @@ pub fn insn_to_str( *cursor_id as i32, *root_page as i32, 0, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, "".to_string(), ), @@ -894,7 +895,7 @@ pub fn insn_to_str( 0, 0, 0, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, "".to_string(), ), @@ -907,7 +908,7 @@ pub fn insn_to_str( *src_reg as i32, *dst_reg as i32, *amount as i32, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, format!("r[{}]=r[{}]", dst_reg, src_reg), ), @@ -916,7 +917,7 @@ pub fn insn_to_str( *db as i32, *root as i32, *flags as i32, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, format!("r[{}]=root iDb={} flags={}", root, db, flags), ), @@ -925,7 +926,7 @@ pub fn insn_to_str( *cursor_id as i32, 0, 0, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, "".to_string(), ), @@ -934,7 +935,7 @@ pub fn insn_to_str( 0, 0, 0, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, "".to_string(), ), @@ -943,7 +944,7 @@ pub fn insn_to_str( *src as i32, *target_pc as i32, 0, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, format!("if (r[{}]==NULL) goto {}", src, target_pc), ), @@ -952,7 +953,7 @@ pub fn insn_to_str( *db as i32, 0, 0, - OwnedValue::Text(Rc::new(where_clause.clone())), + OwnedValue::build_text(Rc::new(where_clause.clone())), 0, where_clause.clone(), ), @@ -961,7 +962,7 @@ pub fn insn_to_str( 0, 0, 0, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, "".to_string(), ), @@ -970,7 +971,7 @@ pub fn insn_to_str( 0, 0, 0, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, "".to_string(), ), @@ -979,7 +980,7 @@ pub fn insn_to_str( 0, 0, 0, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, "".to_string(), ), diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 069782c9a..9d07d94b0 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -34,7 +34,7 @@ use crate::types::{ }; use crate::util::parse_schema_rows; #[cfg(feature = "json")] -use crate::{function::JsonFunc, json::get_json}; +use crate::{function::JsonFunc, json::get_json, json::json_array}; use crate::{Connection, Result, TransactionState}; use crate::{Rows, DATABASE_VERSION}; @@ -1727,7 +1727,7 @@ impl Program { state.pc += 1; } Insn::String8 { value, dest } => { - state.registers[*dest] = OwnedValue::Text(Rc::new(value.into())); + state.registers[*dest] = OwnedValue::build_text(Rc::new(value.into())); state.pc += 1; } Insn::Blob { value, dest } => { @@ -1994,9 +1994,11 @@ impl Program { } } } - AggFunc::GroupConcat | AggFunc::StringAgg => OwnedValue::Agg(Box::new( - AggContext::GroupConcat(OwnedValue::Text(Rc::new("".to_string()))), - )), + AggFunc::GroupConcat | AggFunc::StringAgg => { + OwnedValue::Agg(Box::new(AggContext::GroupConcat( + OwnedValue::build_text(Rc::new("".to_string())), + ))) + } }; } match func { @@ -2067,7 +2069,7 @@ impl Program { Some(OwnedValue::Text(ref mut current_max)), OwnedValue::Text(value), ) => { - if value > *current_max { + if value.value > current_max.value { *current_max = value; } } @@ -2108,10 +2110,10 @@ impl Program { } ( Some(OwnedValue::Text(ref mut current_min)), - OwnedValue::Text(value), + OwnedValue::Text(text), ) => { - if value < *current_min { - *current_min = value; + if text.value < current_min.value { + *current_min = text; } } _ => { @@ -2263,6 +2265,18 @@ impl Program { Err(e) => return Err(e), } } + #[cfg(feature = "json")] + crate::function::Func::Json(JsonFunc::JsonArray) => { + let reg_values = + state.registers[*start_reg..*start_reg + arg_count].to_vec(); + + let json_array = json_array(reg_values); + + match json_array { + Ok(json) => state.registers[*dest] = json, + Err(e) => return Err(e), + } + } crate::function::Func::Scalar(scalar_func) => match scalar_func { ScalarFunc::Cast => { assert!(arg_count == 2); @@ -2273,7 +2287,7 @@ impl Program { else { unreachable!("Cast with non-text type"); }; - let result = exec_cast(®_value_argument, ®_value_type); + let result = exec_cast(®_value_argument, ®_value_type.value); state.registers[*dest] = result; } ScalarFunc::Char => { @@ -2304,7 +2318,12 @@ impl Program { } else { None }; - OwnedValue::Integer(exec_glob(cache, pattern, text) as i64) + OwnedValue::Integer(exec_glob( + cache, + &pattern.value, + &text.value, + ) + as i64) } _ => { unreachable!("Like on non-text registers"); @@ -2338,7 +2357,12 @@ impl Program { } else { None }; - OwnedValue::Integer(exec_like(cache, pattern, text) as i64) + OwnedValue::Integer(exec_like( + cache, + &pattern.value, + &text.value, + ) + as i64) } _ => { unreachable!("Like on non-text registers"); @@ -2457,16 +2481,18 @@ impl Program { } ScalarFunc::UnixEpoch => { if *start_reg == 0 { - let unixepoch: String = exec_unixepoch(&OwnedValue::Text( - Rc::new("now".to_string()), - ))?; - state.registers[*dest] = OwnedValue::Text(Rc::new(unixepoch)); + let unixepoch: String = exec_unixepoch( + &OwnedValue::build_text(Rc::new("now".to_string())), + )?; + state.registers[*dest] = + OwnedValue::build_text(Rc::new(unixepoch)); } else { let datetime_value = &state.registers[*start_reg]; let unixepoch = exec_unixepoch(datetime_value); match unixepoch { Ok(time) => { - state.registers[*dest] = OwnedValue::Text(Rc::new(time)) + state.registers[*dest] = + OwnedValue::build_text(Rc::new(time)) } Err(e) => { return Err(LimboError::ParseError(format!( @@ -2481,7 +2507,7 @@ impl Program { let version_integer: i64 = DATABASE_VERSION.get().unwrap().parse()?; let version = execute_sqlite_version(version_integer); - state.registers[*dest] = OwnedValue::Text(Rc::new(version)); + state.registers[*dest] = OwnedValue::build_text(Rc::new(version)); } ScalarFunc::Replace => { assert!(arg_count == 3); @@ -2817,7 +2843,7 @@ fn get_indent_count(indent_count: usize, curr_insn: &Insn, prev_insn: Option<&In fn exec_lower(reg: &OwnedValue) -> Option { match reg { - OwnedValue::Text(t) => Some(OwnedValue::Text(Rc::new(t.to_lowercase()))), + OwnedValue::Text(t) => Some(OwnedValue::build_text(Rc::new(t.value.to_lowercase()))), t => Some(t.to_owned()), } } @@ -2846,7 +2872,7 @@ fn exec_octet_length(reg: &OwnedValue) -> OwnedValue { fn exec_upper(reg: &OwnedValue) -> Option { match reg { - OwnedValue::Text(t) => Some(OwnedValue::Text(Rc::new(t.to_uppercase()))), + OwnedValue::Text(t) => Some(OwnedValue::build_text(Rc::new(t.value.to_uppercase()))), t => Some(t.to_owned()), } } @@ -2855,7 +2881,7 @@ fn exec_concat(registers: &[OwnedValue]) -> OwnedValue { let mut result = String::new(); for reg in registers { match reg { - OwnedValue::Text(text) => result.push_str(text), + OwnedValue::Text(text) => result.push_str(&text.value), OwnedValue::Integer(i) => result.push_str(&i.to_string()), OwnedValue::Float(f) => result.push_str(&f.to_string()), OwnedValue::Agg(aggctx) => result.push_str(&aggctx.final_value().to_string()), @@ -2864,7 +2890,7 @@ fn exec_concat(registers: &[OwnedValue]) -> OwnedValue { OwnedValue::Record(_) => unreachable!(), } } - OwnedValue::Text(Rc::new(result)) + OwnedValue::build_text(Rc::new(result)) } fn exec_concat_ws(registers: &[OwnedValue]) -> OwnedValue { @@ -2873,7 +2899,7 @@ fn exec_concat_ws(registers: &[OwnedValue]) -> OwnedValue { } let separator = match ®isters[0] { - OwnedValue::Text(text) => text.clone(), + OwnedValue::Text(text) => text.value.clone(), OwnedValue::Integer(i) => Rc::new(i.to_string()), OwnedValue::Float(f) => Rc::new(f.to_string()), _ => return OwnedValue::Null, @@ -2885,14 +2911,14 @@ fn exec_concat_ws(registers: &[OwnedValue]) -> OwnedValue { result.push_str(&separator); } match reg { - OwnedValue::Text(text) => result.push_str(text), + OwnedValue::Text(text) => result.push_str(&text.value), OwnedValue::Integer(i) => result.push_str(&i.to_string()), OwnedValue::Float(f) => result.push_str(&f.to_string()), _ => continue, } } - OwnedValue::Text(Rc::new(result)) + OwnedValue::build_text(Rc::new(result)) } fn exec_sign(reg: &OwnedValue) -> Option { @@ -2900,9 +2926,9 @@ fn exec_sign(reg: &OwnedValue) -> Option { OwnedValue::Integer(i) => *i as f64, OwnedValue::Float(f) => *f, OwnedValue::Text(s) => { - if let Ok(i) = s.parse::() { + if let Ok(i) = s.value.parse::() { i as f64 - } else if let Ok(f) = s.parse::() { + } else if let Ok(f) = s.value.parse::() { f } else { return Some(OwnedValue::Null); @@ -2937,25 +2963,26 @@ fn exec_sign(reg: &OwnedValue) -> Option { /// Generates the Soundex code for a given word pub fn exec_soundex(reg: &OwnedValue) -> OwnedValue { let s = match reg { - OwnedValue::Null => return OwnedValue::Text(Rc::new("?000".to_string())), + OwnedValue::Null => return OwnedValue::build_text(Rc::new("?000".to_string())), OwnedValue::Text(s) => { // return ?000 if non ASCII alphabet character is found - if !s.chars().all(|c| c.is_ascii_alphabetic()) { - return OwnedValue::Text(Rc::new("?000".to_string())); + if !s.value.chars().all(|c| c.is_ascii_alphabetic()) { + return OwnedValue::build_text(Rc::new("?000".to_string())); } s.clone() } - _ => return OwnedValue::Text(Rc::new("?000".to_string())), // For unsupported types, return NULL + _ => return OwnedValue::build_text(Rc::new("?000".to_string())), // For unsupported types, return NULL }; // Remove numbers and spaces let word: String = s + .value .chars() .filter(|c| !c.is_digit(10)) .collect::() .replace(" ", ""); if word.is_empty() { - return OwnedValue::Text(Rc::new("0000".to_string())); + return OwnedValue::build_text(Rc::new("0000".to_string())); } let soundex_code = |c| match c { @@ -3021,7 +3048,7 @@ pub fn exec_soundex(reg: &OwnedValue) -> OwnedValue { // Retain the first 4 characters and convert to uppercase result.truncate(4); - OwnedValue::Text(Rc::new(result.to_uppercase())) + OwnedValue::build_text(Rc::new(result.to_uppercase())) } fn exec_abs(reg: &OwnedValue) -> Option { @@ -3056,7 +3083,7 @@ fn exec_randomblob(reg: &OwnedValue) -> OwnedValue { let length = match reg { OwnedValue::Integer(i) => *i, OwnedValue::Float(f) => *f as i64, - OwnedValue::Text(t) => t.parse().unwrap_or(1), + OwnedValue::Text(t) => t.value.parse().unwrap_or(1), _ => 1, } .max(1) as usize; @@ -3068,13 +3095,13 @@ fn exec_randomblob(reg: &OwnedValue) -> OwnedValue { fn exec_quote(value: &OwnedValue) -> OwnedValue { match value { - OwnedValue::Null => OwnedValue::Text(OwnedValue::Null.to_string().into()), + OwnedValue::Null => OwnedValue::build_text(OwnedValue::Null.to_string().into()), OwnedValue::Integer(_) | OwnedValue::Float(_) => value.to_owned(), OwnedValue::Blob(_) => todo!(), OwnedValue::Text(s) => { - let mut quoted = String::with_capacity(s.len() + 2); + let mut quoted = String::with_capacity(s.value.len() + 2); quoted.push('\''); - for c in s.chars() { + for c in s.value.chars() { if c == '\0' { break; } else { @@ -3082,7 +3109,7 @@ fn exec_quote(value: &OwnedValue) -> OwnedValue { } } quoted.push('\''); - OwnedValue::Text(Rc::new(quoted)) + OwnedValue::build_text(Rc::new(quoted)) } _ => OwnedValue::Null, // For unsupported types, return NULL } @@ -3099,7 +3126,7 @@ fn exec_char(values: Vec) -> OwnedValue { } }) .collect(); - OwnedValue::Text(Rc::new(result)) + OwnedValue::build_text(Rc::new(result)) } fn construct_like_regex(pattern: &str) -> Regex { @@ -3183,31 +3210,33 @@ fn exec_substring( (str_value, start_value, length_value) { let start = *start as usize; - if start > str.len() { - return OwnedValue::Text(Rc::new("".to_string())); + let str_len = str.value.len(); + + if start > str_len { + return OwnedValue::build_text(Rc::new("".to_string())); } let start_idx = start - 1; - let str_len = str.len(); let end = if *length != -1 { start_idx + *length as usize } else { str_len }; - let substring = &str[start_idx..end.min(str_len)]; + let substring = &str.value[start_idx..end.min(str_len)]; - OwnedValue::Text(Rc::new(substring.to_string())) + OwnedValue::build_text(Rc::new(substring.to_string())) } else if let (OwnedValue::Text(str), OwnedValue::Integer(start)) = (str_value, start_value) { let start = *start as usize; - if start > str.len() { - return OwnedValue::Text(Rc::new("".to_string())); + let str_len = str.value.len(); + + if start > str_len { + return OwnedValue::build_text(Rc::new("".to_string())); } let start_idx = start - 1; - let str_len = str.len(); - let substring = &str[start_idx..str_len]; + let substring = &str.value[start_idx..str_len]; - OwnedValue::Text(Rc::new(substring.to_string())) + OwnedValue::build_text(Rc::new(substring.to_string())) } else { OwnedValue::Null } @@ -3228,7 +3257,7 @@ fn exec_instr(reg: &OwnedValue, pattern: &OwnedValue) -> OwnedValue { let reg_str; let reg = match reg { - OwnedValue::Text(s) => s.as_str(), + OwnedValue::Text(s) => s.value.as_str(), _ => { reg_str = reg.to_string(); reg_str.as_str() @@ -3237,7 +3266,7 @@ fn exec_instr(reg: &OwnedValue, pattern: &OwnedValue) -> OwnedValue { let pattern_str; let pattern = match pattern { - OwnedValue::Text(s) => s.as_str(), + OwnedValue::Text(s) => s.value.as_str(), _ => { pattern_str = pattern.to_string(); pattern_str.as_str() @@ -3252,11 +3281,11 @@ fn exec_instr(reg: &OwnedValue, pattern: &OwnedValue) -> OwnedValue { fn exec_typeof(reg: &OwnedValue) -> OwnedValue { match reg { - OwnedValue::Null => OwnedValue::Text(Rc::new("null".to_string())), - OwnedValue::Integer(_) => OwnedValue::Text(Rc::new("integer".to_string())), - OwnedValue::Float(_) => OwnedValue::Text(Rc::new("real".to_string())), - OwnedValue::Text(_) => OwnedValue::Text(Rc::new("text".to_string())), - OwnedValue::Blob(_) => OwnedValue::Text(Rc::new("blob".to_string())), + OwnedValue::Null => OwnedValue::build_text(Rc::new("null".to_string())), + OwnedValue::Integer(_) => OwnedValue::build_text(Rc::new("integer".to_string())), + OwnedValue::Float(_) => OwnedValue::build_text(Rc::new("real".to_string())), + OwnedValue::Text(_) => OwnedValue::build_text(Rc::new("text".to_string())), + OwnedValue::Blob(_) => OwnedValue::build_text(Rc::new("blob".to_string())), OwnedValue::Agg(ctx) => exec_typeof(ctx.final_value()), OwnedValue::Record(_) => unimplemented!(), } @@ -3269,7 +3298,7 @@ fn exec_hex(reg: &OwnedValue) -> OwnedValue { | OwnedValue::Float(_) | OwnedValue::Blob(_) => { let text = reg.to_string(); - OwnedValue::Text(Rc::new(hex::encode_upper(text))) + OwnedValue::build_text(Rc::new(hex::encode_upper(text))) } _ => OwnedValue::Null, } @@ -3321,7 +3350,7 @@ fn exec_unicode(reg: &OwnedValue) -> OwnedValue { fn _to_float(reg: &OwnedValue) -> f64 { match reg { - OwnedValue::Text(x) => x.parse().unwrap_or(0.0), + OwnedValue::Text(x) => x.value.parse().unwrap_or(0.0), OwnedValue::Integer(x) => *x as f64, OwnedValue::Float(x) => *x, _ => 0.0, @@ -3330,7 +3359,7 @@ fn _to_float(reg: &OwnedValue) -> f64 { fn exec_round(reg: &OwnedValue, precision: Option) -> OwnedValue { let precision = match precision { - Some(OwnedValue::Text(x)) => x.parse().unwrap_or(0.0), + Some(OwnedValue::Text(x)) => x.value.parse().unwrap_or(0.0), Some(OwnedValue::Integer(x)) => x as f64, Some(OwnedValue::Float(x)) => x, Some(OwnedValue::Null) => return OwnedValue::Null, @@ -3353,13 +3382,13 @@ fn exec_trim(reg: &OwnedValue, pattern: Option) -> OwnedValue { (reg, Some(pattern)) => match reg { OwnedValue::Text(_) | OwnedValue::Integer(_) | OwnedValue::Float(_) => { let pattern_chars: Vec = pattern.to_string().chars().collect(); - OwnedValue::Text(Rc::new( + OwnedValue::build_text(Rc::new( reg.to_string().trim_matches(&pattern_chars[..]).to_string(), )) } _ => reg.to_owned(), }, - (OwnedValue::Text(t), None) => OwnedValue::Text(Rc::new(t.trim().to_string())), + (OwnedValue::Text(t), None) => OwnedValue::build_text(Rc::new(t.value.trim().to_string())), (reg, _) => reg.to_owned(), } } @@ -3370,7 +3399,7 @@ fn exec_ltrim(reg: &OwnedValue, pattern: Option) -> OwnedValue { (reg, Some(pattern)) => match reg { OwnedValue::Text(_) | OwnedValue::Integer(_) | OwnedValue::Float(_) => { let pattern_chars: Vec = pattern.to_string().chars().collect(); - OwnedValue::Text(Rc::new( + OwnedValue::build_text(Rc::new( reg.to_string() .trim_start_matches(&pattern_chars[..]) .to_string(), @@ -3378,7 +3407,9 @@ fn exec_ltrim(reg: &OwnedValue, pattern: Option) -> OwnedValue { } _ => reg.to_owned(), }, - (OwnedValue::Text(t), None) => OwnedValue::Text(Rc::new(t.trim_start().to_string())), + (OwnedValue::Text(t), None) => { + OwnedValue::build_text(Rc::new(t.value.trim_start().to_string())) + } (reg, _) => reg.to_owned(), } } @@ -3389,7 +3420,7 @@ fn exec_rtrim(reg: &OwnedValue, pattern: Option) -> OwnedValue { (reg, Some(pattern)) => match reg { OwnedValue::Text(_) | OwnedValue::Integer(_) | OwnedValue::Float(_) => { let pattern_chars: Vec = pattern.to_string().chars().collect(); - OwnedValue::Text(Rc::new( + OwnedValue::build_text(Rc::new( reg.to_string() .trim_end_matches(&pattern_chars[..]) .to_string(), @@ -3397,7 +3428,9 @@ fn exec_rtrim(reg: &OwnedValue, pattern: Option) -> OwnedValue { } _ => reg.to_owned(), }, - (OwnedValue::Text(t), None) => OwnedValue::Text(Rc::new(t.trim_end().to_string())), + (OwnedValue::Text(t), None) => { + OwnedValue::build_text(Rc::new(t.value.trim_end().to_string())) + } (reg, _) => reg.to_owned(), } } @@ -3406,7 +3439,7 @@ fn exec_zeroblob(req: &OwnedValue) -> OwnedValue { let length: i64 = match req { OwnedValue::Integer(i) => *i, OwnedValue::Float(f) => *f as i64, - OwnedValue::Text(s) => s.parse().unwrap_or(0), + OwnedValue::Text(s) => s.value.parse().unwrap_or(0), _ => 0, }; OwnedValue::Blob(Rc::new(vec![0; length.max(0) as usize])) @@ -3444,7 +3477,7 @@ fn exec_cast(value: &OwnedValue, datatype: &str) -> OwnedValue { Affinity::Text => { // Convert everything to text representation // TODO: handle encoding and whatever sqlite3_snprintf does - OwnedValue::Text(Rc::new(value.to_string())) + OwnedValue::build_text(Rc::new(value.to_string())) } Affinity::Real => match value { OwnedValue::Blob(b) => { @@ -3452,7 +3485,7 @@ fn exec_cast(value: &OwnedValue, datatype: &str) -> OwnedValue { let text = String::from_utf8_lossy(b); cast_text_to_real(&text) } - OwnedValue::Text(t) => cast_text_to_real(t), + OwnedValue::Text(t) => cast_text_to_real(&t.value), OwnedValue::Integer(i) => OwnedValue::Float(*i as f64), OwnedValue::Float(f) => OwnedValue::Float(*f), _ => OwnedValue::Float(0.0), @@ -3463,7 +3496,7 @@ fn exec_cast(value: &OwnedValue, datatype: &str) -> OwnedValue { let text = String::from_utf8_lossy(b); cast_text_to_integer(&text) } - OwnedValue::Text(t) => cast_text_to_integer(t), + OwnedValue::Text(t) => cast_text_to_integer(&t.value), OwnedValue::Integer(i) => OwnedValue::Integer(*i), // A cast of a REAL value into an INTEGER results in the integer between the REAL value and zero // that is closest to the REAL value. If a REAL is greater than the greatest possible signed integer (+9223372036854775807) @@ -3486,7 +3519,7 @@ fn exec_cast(value: &OwnedValue, datatype: &str) -> OwnedValue { let text = String::from_utf8_lossy(b); cast_text_to_numeric(&text) } - OwnedValue::Text(t) => cast_text_to_numeric(t), + OwnedValue::Text(t) => cast_text_to_numeric(&t.value), OwnedValue::Integer(i) => OwnedValue::Integer(*i), OwnedValue::Float(f) => OwnedValue::Float(*f), _ => value.clone(), // TODO probably wrong @@ -3514,12 +3547,14 @@ fn exec_replace(source: &OwnedValue, pattern: &OwnedValue, replacement: &OwnedVa // If any of the casts failed, panic as text casting is not expected to fail. match (&source, &pattern, &replacement) { (OwnedValue::Text(source), OwnedValue::Text(pattern), OwnedValue::Text(replacement)) => { - if pattern.is_empty() { - return OwnedValue::Text(source.clone()); + if pattern.value.is_empty() { + return OwnedValue::build_text(source.value.clone()); } - let result = source.replace(pattern.as_str(), replacement); - OwnedValue::Text(Rc::new(result)) + let result = source + .value + .replace(pattern.value.as_str(), &replacement.value); + OwnedValue::build_text(Rc::new(result)) } _ => unreachable!("text cast should never fail"), } @@ -3653,7 +3688,7 @@ fn to_f64(reg: &OwnedValue) -> Option { match reg { OwnedValue::Integer(i) => Some(*i as f64), OwnedValue::Float(f) => Some(*f), - OwnedValue::Text(t) => t.parse::().ok(), + OwnedValue::Text(t) => t.value.parse::().ok(), OwnedValue::Agg(ctx) => to_f64(ctx.final_value()), _ => None, } @@ -3928,7 +3963,7 @@ mod tests { #[test] fn test_length() { - let input_str = OwnedValue::Text(Rc::new(String::from("bob"))); + let input_str = OwnedValue::build_text(Rc::new(String::from("bob"))); let expected_len = OwnedValue::Integer(3); assert_eq!(exec_length(&input_str), expected_len); @@ -3947,58 +3982,58 @@ mod tests { #[test] fn test_quote() { - let input = OwnedValue::Text(Rc::new(String::from("abc\0edf"))); - let expected = OwnedValue::Text(Rc::new(String::from("'abc'"))); + let input = OwnedValue::build_text(Rc::new(String::from("abc\0edf"))); + let expected = OwnedValue::build_text(Rc::new(String::from("'abc'"))); assert_eq!(exec_quote(&input), expected); let input = OwnedValue::Integer(123); let expected = OwnedValue::Integer(123); assert_eq!(exec_quote(&input), expected); - let input = OwnedValue::Text(Rc::new(String::from("hello''world"))); - let expected = OwnedValue::Text(Rc::new(String::from("'hello''world'"))); + let input = OwnedValue::build_text(Rc::new(String::from("hello''world"))); + let expected = OwnedValue::build_text(Rc::new(String::from("'hello''world'"))); assert_eq!(exec_quote(&input), expected); } #[test] fn test_typeof() { let input = OwnedValue::Null; - let expected: OwnedValue = OwnedValue::Text(Rc::new("null".to_string())); + let expected: OwnedValue = OwnedValue::build_text(Rc::new("null".to_string())); assert_eq!(exec_typeof(&input), expected); let input = OwnedValue::Integer(123); - let expected: OwnedValue = OwnedValue::Text(Rc::new("integer".to_string())); + let expected: OwnedValue = OwnedValue::build_text(Rc::new("integer".to_string())); assert_eq!(exec_typeof(&input), expected); let input = OwnedValue::Float(123.456); - let expected: OwnedValue = OwnedValue::Text(Rc::new("real".to_string())); + let expected: OwnedValue = OwnedValue::build_text(Rc::new("real".to_string())); assert_eq!(exec_typeof(&input), expected); - let input = OwnedValue::Text(Rc::new("hello".to_string())); - let expected: OwnedValue = OwnedValue::Text(Rc::new("text".to_string())); + let input = OwnedValue::build_text(Rc::new("hello".to_string())); + let expected: OwnedValue = OwnedValue::build_text(Rc::new("text".to_string())); assert_eq!(exec_typeof(&input), expected); let input = OwnedValue::Blob(Rc::new("limbo".as_bytes().to_vec())); - let expected: OwnedValue = OwnedValue::Text(Rc::new("blob".to_string())); + let expected: OwnedValue = OwnedValue::build_text(Rc::new("blob".to_string())); assert_eq!(exec_typeof(&input), expected); let input = OwnedValue::Agg(Box::new(AggContext::Sum(OwnedValue::Integer(123)))); - let expected = OwnedValue::Text(Rc::new("integer".to_string())); + let expected = OwnedValue::build_text(Rc::new("integer".to_string())); assert_eq!(exec_typeof(&input), expected); } #[test] fn test_unicode() { assert_eq!( - exec_unicode(&OwnedValue::Text(Rc::new("a".to_string()))), + exec_unicode(&OwnedValue::build_text(Rc::new("a".to_string()))), OwnedValue::Integer(97) ); assert_eq!( - exec_unicode(&OwnedValue::Text(Rc::new("😊".to_string()))), + exec_unicode(&OwnedValue::build_text(Rc::new("😊".to_string()))), OwnedValue::Integer(128522) ); assert_eq!( - exec_unicode(&OwnedValue::Text(Rc::new("".to_string()))), + exec_unicode(&OwnedValue::build_text(Rc::new("".to_string()))), OwnedValue::Null ); assert_eq!( @@ -4030,16 +4065,16 @@ mod tests { assert_eq!(exec_min(input_int_vec.clone()), OwnedValue::Integer(-1)); assert_eq!(exec_max(input_int_vec.clone()), OwnedValue::Integer(10)); - let str1 = OwnedValue::Text(Rc::new(String::from("A"))); - let str2 = OwnedValue::Text(Rc::new(String::from("z"))); + let str1 = OwnedValue::build_text(Rc::new(String::from("A"))); + let str2 = OwnedValue::build_text(Rc::new(String::from("z"))); let input_str_vec = vec![&str2, &str1]; assert_eq!( exec_min(input_str_vec.clone()), - OwnedValue::Text(Rc::new(String::from("A"))) + OwnedValue::build_text(Rc::new(String::from("A"))) ); assert_eq!( exec_max(input_str_vec.clone()), - OwnedValue::Text(Rc::new(String::from("z"))) + OwnedValue::build_text(Rc::new(String::from("z"))) ); let input_null_vec = vec![&OwnedValue::Null, &OwnedValue::Null]; @@ -4050,102 +4085,102 @@ mod tests { assert_eq!(exec_min(input_mixed_vec.clone()), OwnedValue::Integer(10)); assert_eq!( exec_max(input_mixed_vec.clone()), - OwnedValue::Text(Rc::new(String::from("A"))) + OwnedValue::build_text(Rc::new(String::from("A"))) ); } #[test] fn test_trim() { - let input_str = OwnedValue::Text(Rc::new(String::from(" Bob and Alice "))); - let expected_str = OwnedValue::Text(Rc::new(String::from("Bob and Alice"))); + let input_str = OwnedValue::build_text(Rc::new(String::from(" Bob and Alice "))); + let expected_str = OwnedValue::build_text(Rc::new(String::from("Bob and Alice"))); assert_eq!(exec_trim(&input_str, None), expected_str); - let input_str = OwnedValue::Text(Rc::new(String::from(" Bob and Alice "))); - let pattern_str = OwnedValue::Text(Rc::new(String::from("Bob and"))); - let expected_str = OwnedValue::Text(Rc::new(String::from("Alice"))); + let input_str = OwnedValue::build_text(Rc::new(String::from(" Bob and Alice "))); + let pattern_str = OwnedValue::build_text(Rc::new(String::from("Bob and"))); + let expected_str = OwnedValue::build_text(Rc::new(String::from("Alice"))); assert_eq!(exec_trim(&input_str, Some(pattern_str)), expected_str); } #[test] fn test_ltrim() { - let input_str = OwnedValue::Text(Rc::new(String::from(" Bob and Alice "))); - let expected_str = OwnedValue::Text(Rc::new(String::from("Bob and Alice "))); + let input_str = OwnedValue::build_text(Rc::new(String::from(" Bob and Alice "))); + let expected_str = OwnedValue::build_text(Rc::new(String::from("Bob and Alice "))); assert_eq!(exec_ltrim(&input_str, None), expected_str); - let input_str = OwnedValue::Text(Rc::new(String::from(" Bob and Alice "))); - let pattern_str = OwnedValue::Text(Rc::new(String::from("Bob and"))); - let expected_str = OwnedValue::Text(Rc::new(String::from("Alice "))); + let input_str = OwnedValue::build_text(Rc::new(String::from(" Bob and Alice "))); + let pattern_str = OwnedValue::build_text(Rc::new(String::from("Bob and"))); + let expected_str = OwnedValue::build_text(Rc::new(String::from("Alice "))); assert_eq!(exec_ltrim(&input_str, Some(pattern_str)), expected_str); } #[test] fn test_rtrim() { - let input_str = OwnedValue::Text(Rc::new(String::from(" Bob and Alice "))); - let expected_str = OwnedValue::Text(Rc::new(String::from(" Bob and Alice"))); + let input_str = OwnedValue::build_text(Rc::new(String::from(" Bob and Alice "))); + let expected_str = OwnedValue::build_text(Rc::new(String::from(" Bob and Alice"))); assert_eq!(exec_rtrim(&input_str, None), expected_str); - let input_str = OwnedValue::Text(Rc::new(String::from(" Bob and Alice "))); - let pattern_str = OwnedValue::Text(Rc::new(String::from("Bob and"))); - let expected_str = OwnedValue::Text(Rc::new(String::from(" Bob and Alice"))); + let input_str = OwnedValue::build_text(Rc::new(String::from(" Bob and Alice "))); + let pattern_str = OwnedValue::build_text(Rc::new(String::from("Bob and"))); + let expected_str = OwnedValue::build_text(Rc::new(String::from(" Bob and Alice"))); assert_eq!(exec_rtrim(&input_str, Some(pattern_str)), expected_str); - let input_str = OwnedValue::Text(Rc::new(String::from(" Bob and Alice "))); - let pattern_str = OwnedValue::Text(Rc::new(String::from("and Alice"))); - let expected_str = OwnedValue::Text(Rc::new(String::from(" Bob"))); + let input_str = OwnedValue::build_text(Rc::new(String::from(" Bob and Alice "))); + let pattern_str = OwnedValue::build_text(Rc::new(String::from("and Alice"))); + let expected_str = OwnedValue::build_text(Rc::new(String::from(" Bob"))); assert_eq!(exec_rtrim(&input_str, Some(pattern_str)), expected_str); } #[test] fn test_soundex() { - let input_str = OwnedValue::Text(Rc::new(String::from("Pfister"))); - let expected_str = OwnedValue::Text(Rc::new(String::from("P236"))); + let input_str = OwnedValue::build_text(Rc::new(String::from("Pfister"))); + let expected_str = OwnedValue::build_text(Rc::new(String::from("P236"))); assert_eq!(exec_soundex(&input_str), expected_str); - let input_str = OwnedValue::Text(Rc::new(String::from("husobee"))); - let expected_str = OwnedValue::Text(Rc::new(String::from("H210"))); + let input_str = OwnedValue::build_text(Rc::new(String::from("husobee"))); + let expected_str = OwnedValue::build_text(Rc::new(String::from("H210"))); assert_eq!(exec_soundex(&input_str), expected_str); - let input_str = OwnedValue::Text(Rc::new(String::from("Tymczak"))); - let expected_str = OwnedValue::Text(Rc::new(String::from("T522"))); + let input_str = OwnedValue::build_text(Rc::new(String::from("Tymczak"))); + let expected_str = OwnedValue::build_text(Rc::new(String::from("T522"))); assert_eq!(exec_soundex(&input_str), expected_str); - let input_str = OwnedValue::Text(Rc::new(String::from("Ashcraft"))); - let expected_str = OwnedValue::Text(Rc::new(String::from("A261"))); + let input_str = OwnedValue::build_text(Rc::new(String::from("Ashcraft"))); + let expected_str = OwnedValue::build_text(Rc::new(String::from("A261"))); assert_eq!(exec_soundex(&input_str), expected_str); - let input_str = OwnedValue::Text(Rc::new(String::from("Robert"))); - let expected_str = OwnedValue::Text(Rc::new(String::from("R163"))); + let input_str = OwnedValue::build_text(Rc::new(String::from("Robert"))); + let expected_str = OwnedValue::build_text(Rc::new(String::from("R163"))); assert_eq!(exec_soundex(&input_str), expected_str); - let input_str = OwnedValue::Text(Rc::new(String::from("Rupert"))); - let expected_str = OwnedValue::Text(Rc::new(String::from("R163"))); + let input_str = OwnedValue::build_text(Rc::new(String::from("Rupert"))); + let expected_str = OwnedValue::build_text(Rc::new(String::from("R163"))); assert_eq!(exec_soundex(&input_str), expected_str); - let input_str = OwnedValue::Text(Rc::new(String::from("Rubin"))); - let expected_str = OwnedValue::Text(Rc::new(String::from("R150"))); + let input_str = OwnedValue::build_text(Rc::new(String::from("Rubin"))); + let expected_str = OwnedValue::build_text(Rc::new(String::from("R150"))); assert_eq!(exec_soundex(&input_str), expected_str); - let input_str = OwnedValue::Text(Rc::new(String::from("Kant"))); - let expected_str = OwnedValue::Text(Rc::new(String::from("K530"))); + let input_str = OwnedValue::build_text(Rc::new(String::from("Kant"))); + let expected_str = OwnedValue::build_text(Rc::new(String::from("K530"))); assert_eq!(exec_soundex(&input_str), expected_str); - let input_str = OwnedValue::Text(Rc::new(String::from("Knuth"))); - let expected_str = OwnedValue::Text(Rc::new(String::from("K530"))); + let input_str = OwnedValue::build_text(Rc::new(String::from("Knuth"))); + let expected_str = OwnedValue::build_text(Rc::new(String::from("K530"))); assert_eq!(exec_soundex(&input_str), expected_str); - let input_str = OwnedValue::Text(Rc::new(String::from("x"))); - let expected_str = OwnedValue::Text(Rc::new(String::from("X000"))); + let input_str = OwnedValue::build_text(Rc::new(String::from("x"))); + let expected_str = OwnedValue::build_text(Rc::new(String::from("X000"))); assert_eq!(exec_soundex(&input_str), expected_str); - let input_str = OwnedValue::Text(Rc::new(String::from("闪电五连鞭"))); - let expected_str = OwnedValue::Text(Rc::new(String::from("?000"))); + let input_str = OwnedValue::build_text(Rc::new(String::from("闪电五连鞭"))); + let expected_str = OwnedValue::build_text(Rc::new(String::from("?000"))); assert_eq!(exec_soundex(&input_str), expected_str); } #[test] fn test_upper_case() { - let input_str = OwnedValue::Text(Rc::new(String::from("Limbo"))); - let expected_str = OwnedValue::Text(Rc::new(String::from("LIMBO"))); + let input_str = OwnedValue::build_text(Rc::new(String::from("Limbo"))); + let expected_str = OwnedValue::build_text(Rc::new(String::from("LIMBO"))); assert_eq!(exec_upper(&input_str).unwrap(), expected_str); let input_int = OwnedValue::Integer(10); @@ -4155,8 +4190,8 @@ mod tests { #[test] fn test_lower_case() { - let input_str = OwnedValue::Text(Rc::new(String::from("Limbo"))); - let expected_str = OwnedValue::Text(Rc::new(String::from("limbo"))); + let input_str = OwnedValue::build_text(Rc::new(String::from("Limbo"))); + let expected_str = OwnedValue::build_text(Rc::new(String::from("limbo"))); assert_eq!(exec_lower(&input_str).unwrap(), expected_str); let input_int = OwnedValue::Integer(10); @@ -4166,38 +4201,38 @@ mod tests { #[test] fn test_hex() { - let input_str = OwnedValue::Text(Rc::new("limbo".to_string())); - let expected_val = OwnedValue::Text(Rc::new(String::from("6C696D626F"))); + let input_str = OwnedValue::build_text(Rc::new("limbo".to_string())); + let expected_val = OwnedValue::build_text(Rc::new(String::from("6C696D626F"))); assert_eq!(exec_hex(&input_str), expected_val); let input_int = OwnedValue::Integer(100); - let expected_val = OwnedValue::Text(Rc::new(String::from("313030"))); + let expected_val = OwnedValue::build_text(Rc::new(String::from("313030"))); assert_eq!(exec_hex(&input_int), expected_val); let input_float = OwnedValue::Float(12.34); - let expected_val = OwnedValue::Text(Rc::new(String::from("31322E3334"))); + let expected_val = OwnedValue::build_text(Rc::new(String::from("31322E3334"))); assert_eq!(exec_hex(&input_float), expected_val); } #[test] fn test_unhex() { - let input = OwnedValue::Text(Rc::new(String::from("6F"))); + let input = OwnedValue::build_text(Rc::new(String::from("6F"))); let expected = OwnedValue::Blob(Rc::new(vec![0x6f])); assert_eq!(exec_unhex(&input, None), expected); - let input = OwnedValue::Text(Rc::new(String::from("6f"))); + let input = OwnedValue::build_text(Rc::new(String::from("6f"))); let expected = OwnedValue::Blob(Rc::new(vec![0x6f])); assert_eq!(exec_unhex(&input, None), expected); - let input = OwnedValue::Text(Rc::new(String::from("611"))); + let input = OwnedValue::build_text(Rc::new(String::from("611"))); let expected = OwnedValue::Null; assert_eq!(exec_unhex(&input, None), expected); - let input = OwnedValue::Text(Rc::new(String::from(""))); + let input = OwnedValue::build_text(Rc::new(String::from(""))); let expected = OwnedValue::Blob(Rc::new(vec![])); assert_eq!(exec_unhex(&input, None), expected); - let input = OwnedValue::Text(Rc::new(String::from("61x"))); + let input = OwnedValue::build_text(Rc::new(String::from("61x"))); let expected = OwnedValue::Null; assert_eq!(exec_unhex(&input, None), expected); @@ -4219,7 +4254,7 @@ mod tests { assert_eq!(exec_abs(&float_negative_reg).unwrap(), float_positive_reg); assert_eq!( - exec_abs(&OwnedValue::Text(Rc::new(String::from("a")))).unwrap(), + exec_abs(&OwnedValue::build_text(Rc::new(String::from("a")))).unwrap(), OwnedValue::Float(0.0) ); assert_eq!(exec_abs(&OwnedValue::Null).unwrap(), OwnedValue::Null); @@ -4229,16 +4264,19 @@ mod tests { fn test_char() { assert_eq!( exec_char(vec![OwnedValue::Integer(108), OwnedValue::Integer(105)]), - OwnedValue::Text(Rc::new("li".to_string())) + OwnedValue::build_text(Rc::new("li".to_string())) + ); + assert_eq!( + exec_char(vec![]), + OwnedValue::build_text(Rc::new("".to_string())) ); - assert_eq!(exec_char(vec![]), OwnedValue::Text(Rc::new("".to_string()))); assert_eq!( exec_char(vec![OwnedValue::Null]), - OwnedValue::Text(Rc::new("".to_string())) + OwnedValue::build_text(Rc::new("".to_string())) ); assert_eq!( - exec_char(vec![OwnedValue::Text(Rc::new("a".to_string()))]), - OwnedValue::Text(Rc::new("".to_string())) + exec_char(vec![OwnedValue::build_text(Rc::new("a".to_string()))]), + OwnedValue::build_text(Rc::new("".to_string())) ); } @@ -4303,19 +4341,19 @@ mod tests { expected_len: 1, }, TestCase { - input: OwnedValue::Text(Rc::new(String::from(""))), + input: OwnedValue::build_text(Rc::new(String::from(""))), expected_len: 1, }, TestCase { - input: OwnedValue::Text(Rc::new(String::from("5"))), + input: OwnedValue::build_text(Rc::new(String::from("5"))), expected_len: 5, }, TestCase { - input: OwnedValue::Text(Rc::new(String::from("0"))), + input: OwnedValue::build_text(Rc::new(String::from("0"))), expected_len: 1, }, TestCase { - input: OwnedValue::Text(Rc::new(String::from("-1"))), + input: OwnedValue::build_text(Rc::new(String::from("-1"))), expected_len: 1, }, TestCase { @@ -4355,11 +4393,11 @@ mod tests { assert_eq!(exec_round(&input_val, Some(precision_val)), expected_val); let input_val = OwnedValue::Float(123.456); - let precision_val = OwnedValue::Text(Rc::new(String::from("1"))); + let precision_val = OwnedValue::build_text(Rc::new(String::from("1"))); let expected_val = OwnedValue::Float(123.5); assert_eq!(exec_round(&input_val, Some(precision_val)), expected_val); - let input_val = OwnedValue::Text(Rc::new(String::from("123.456"))); + let input_val = OwnedValue::build_text(Rc::new(String::from("123.456"))); let precision_val = OwnedValue::Integer(2); let expected_val = OwnedValue::Float(123.46); assert_eq!(exec_round(&input_val, Some(precision_val)), expected_val); @@ -4418,8 +4456,8 @@ mod tests { ); assert_eq!( exec_nullif( - &OwnedValue::Text(Rc::new("limbo".to_string())), - &OwnedValue::Text(Rc::new("limbo".to_string())) + &OwnedValue::build_text(Rc::new("limbo".to_string())), + &OwnedValue::build_text(Rc::new("limbo".to_string())) ), OwnedValue::Null ); @@ -4434,55 +4472,55 @@ mod tests { ); assert_eq!( exec_nullif( - &OwnedValue::Text(Rc::new("limbo".to_string())), - &OwnedValue::Text(Rc::new("limb".to_string())) + &OwnedValue::build_text(Rc::new("limbo".to_string())), + &OwnedValue::build_text(Rc::new("limb".to_string())) ), - OwnedValue::Text(Rc::new("limbo".to_string())) + OwnedValue::build_text(Rc::new("limbo".to_string())) ); } #[test] fn test_substring() { - let str_value = OwnedValue::Text(Rc::new("limbo".to_string())); + let str_value = OwnedValue::build_text(Rc::new("limbo".to_string())); let start_value = OwnedValue::Integer(1); let length_value = OwnedValue::Integer(3); - let expected_val = OwnedValue::Text(Rc::new(String::from("lim"))); + let expected_val = OwnedValue::build_text(Rc::new(String::from("lim"))); assert_eq!( exec_substring(&str_value, &start_value, &length_value), expected_val ); - let str_value = OwnedValue::Text(Rc::new("limbo".to_string())); + let str_value = OwnedValue::build_text(Rc::new("limbo".to_string())); let start_value = OwnedValue::Integer(1); let length_value = OwnedValue::Integer(10); - let expected_val = OwnedValue::Text(Rc::new(String::from("limbo"))); + let expected_val = OwnedValue::build_text(Rc::new(String::from("limbo"))); assert_eq!( exec_substring(&str_value, &start_value, &length_value), expected_val ); - let str_value = OwnedValue::Text(Rc::new("limbo".to_string())); + let str_value = OwnedValue::build_text(Rc::new("limbo".to_string())); let start_value = OwnedValue::Integer(10); let length_value = OwnedValue::Integer(3); - let expected_val = OwnedValue::Text(Rc::new(String::from(""))); + let expected_val = OwnedValue::build_text(Rc::new(String::from(""))); assert_eq!( exec_substring(&str_value, &start_value, &length_value), expected_val ); - let str_value = OwnedValue::Text(Rc::new("limbo".to_string())); + let str_value = OwnedValue::build_text(Rc::new("limbo".to_string())); let start_value = OwnedValue::Integer(3); let length_value = OwnedValue::Null; - let expected_val = OwnedValue::Text(Rc::new(String::from("mbo"))); + let expected_val = OwnedValue::build_text(Rc::new(String::from("mbo"))); assert_eq!( exec_substring(&str_value, &start_value, &length_value), expected_val ); - let str_value = OwnedValue::Text(Rc::new("limbo".to_string())); + let str_value = OwnedValue::build_text(Rc::new("limbo".to_string())); let start_value = OwnedValue::Integer(10); let length_value = OwnedValue::Null; - let expected_val = OwnedValue::Text(Rc::new(String::from(""))); + let expected_val = OwnedValue::build_text(Rc::new(String::from(""))); assert_eq!( exec_substring(&str_value, &start_value, &length_value), expected_val @@ -4491,43 +4529,43 @@ mod tests { #[test] fn test_exec_instr() { - let input = OwnedValue::Text(Rc::new(String::from("limbo"))); - let pattern = OwnedValue::Text(Rc::new(String::from("im"))); + let input = OwnedValue::build_text(Rc::new(String::from("limbo"))); + let pattern = OwnedValue::build_text(Rc::new(String::from("im"))); let expected = OwnedValue::Integer(2); assert_eq!(exec_instr(&input, &pattern), expected); - let input = OwnedValue::Text(Rc::new(String::from("limbo"))); - let pattern = OwnedValue::Text(Rc::new(String::from("limbo"))); + let input = OwnedValue::build_text(Rc::new(String::from("limbo"))); + let pattern = OwnedValue::build_text(Rc::new(String::from("limbo"))); let expected = OwnedValue::Integer(1); assert_eq!(exec_instr(&input, &pattern), expected); - let input = OwnedValue::Text(Rc::new(String::from("limbo"))); - let pattern = OwnedValue::Text(Rc::new(String::from("o"))); + let input = OwnedValue::build_text(Rc::new(String::from("limbo"))); + let pattern = OwnedValue::build_text(Rc::new(String::from("o"))); let expected = OwnedValue::Integer(5); assert_eq!(exec_instr(&input, &pattern), expected); - let input = OwnedValue::Text(Rc::new(String::from("liiiiimbo"))); - let pattern = OwnedValue::Text(Rc::new(String::from("ii"))); + let input = OwnedValue::build_text(Rc::new(String::from("liiiiimbo"))); + let pattern = OwnedValue::build_text(Rc::new(String::from("ii"))); let expected = OwnedValue::Integer(2); assert_eq!(exec_instr(&input, &pattern), expected); - let input = OwnedValue::Text(Rc::new(String::from("limbo"))); - let pattern = OwnedValue::Text(Rc::new(String::from("limboX"))); + let input = OwnedValue::build_text(Rc::new(String::from("limbo"))); + let pattern = OwnedValue::build_text(Rc::new(String::from("limboX"))); let expected = OwnedValue::Integer(0); assert_eq!(exec_instr(&input, &pattern), expected); - let input = OwnedValue::Text(Rc::new(String::from("limbo"))); - let pattern = OwnedValue::Text(Rc::new(String::from(""))); + let input = OwnedValue::build_text(Rc::new(String::from("limbo"))); + let pattern = OwnedValue::build_text(Rc::new(String::from(""))); let expected = OwnedValue::Integer(1); assert_eq!(exec_instr(&input, &pattern), expected); - let input = OwnedValue::Text(Rc::new(String::from(""))); - let pattern = OwnedValue::Text(Rc::new(String::from("limbo"))); + let input = OwnedValue::build_text(Rc::new(String::from(""))); + let pattern = OwnedValue::build_text(Rc::new(String::from("limbo"))); let expected = OwnedValue::Integer(0); assert_eq!(exec_instr(&input, &pattern), expected); - let input = OwnedValue::Text(Rc::new(String::from(""))); - let pattern = OwnedValue::Text(Rc::new(String::from(""))); + let input = OwnedValue::build_text(Rc::new(String::from(""))); + let pattern = OwnedValue::build_text(Rc::new(String::from(""))); let expected = OwnedValue::Integer(1); assert_eq!(exec_instr(&input, &pattern), expected); @@ -4536,13 +4574,13 @@ mod tests { let expected = OwnedValue::Null; assert_eq!(exec_instr(&input, &pattern), expected); - let input = OwnedValue::Text(Rc::new(String::from("limbo"))); + let input = OwnedValue::build_text(Rc::new(String::from("limbo"))); let pattern = OwnedValue::Null; let expected = OwnedValue::Null; assert_eq!(exec_instr(&input, &pattern), expected); let input = OwnedValue::Null; - let pattern = OwnedValue::Text(Rc::new(String::from("limbo"))); + let pattern = OwnedValue::build_text(Rc::new(String::from("limbo"))); let expected = OwnedValue::Null; assert_eq!(exec_instr(&input, &pattern), expected); @@ -4567,7 +4605,7 @@ mod tests { assert_eq!(exec_instr(&input, &pattern), expected); let input = OwnedValue::Float(12.34); - let pattern = OwnedValue::Text(Rc::new(String::from("."))); + let pattern = OwnedValue::build_text(Rc::new(String::from("."))); let expected = OwnedValue::Integer(3); assert_eq!(exec_instr(&input, &pattern), expected); @@ -4582,11 +4620,11 @@ mod tests { assert_eq!(exec_instr(&input, &pattern), expected); let input = OwnedValue::Blob(Rc::new(vec![0x61, 0x62, 0x63, 0x64, 0x65])); - let pattern = OwnedValue::Text(Rc::new(String::from("cd"))); + let pattern = OwnedValue::build_text(Rc::new(String::from("cd"))); let expected = OwnedValue::Integer(3); assert_eq!(exec_instr(&input, &pattern), expected); - let input = OwnedValue::Text(Rc::new(String::from("abcde"))); + let input = OwnedValue::build_text(Rc::new(String::from("abcde"))); let pattern = OwnedValue::Blob(Rc::new(vec![0x63, 0x64])); let expected = OwnedValue::Integer(3); assert_eq!(exec_instr(&input, &pattern), expected); @@ -4622,19 +4660,19 @@ mod tests { let expected = Some(OwnedValue::Integer(-1)); assert_eq!(exec_sign(&input), expected); - let input = OwnedValue::Text(Rc::new("abc".to_string())); + let input = OwnedValue::build_text(Rc::new("abc".to_string())); let expected = Some(OwnedValue::Null); assert_eq!(exec_sign(&input), expected); - let input = OwnedValue::Text(Rc::new("42".to_string())); + let input = OwnedValue::build_text(Rc::new("42".to_string())); let expected = Some(OwnedValue::Integer(1)); assert_eq!(exec_sign(&input), expected); - let input = OwnedValue::Text(Rc::new("-42".to_string())); + let input = OwnedValue::build_text(Rc::new("-42".to_string())); let expected = Some(OwnedValue::Integer(-1)); assert_eq!(exec_sign(&input), expected); - let input = OwnedValue::Text(Rc::new("0".to_string())); + let input = OwnedValue::build_text(Rc::new("0".to_string())); let expected = Some(OwnedValue::Integer(0)); assert_eq!(exec_sign(&input), expected); @@ -4677,15 +4715,15 @@ mod tests { let expected = OwnedValue::Blob(Rc::new(vec![])); assert_eq!(exec_zeroblob(&input), expected); - let input = OwnedValue::Text(Rc::new("5".to_string())); + let input = OwnedValue::build_text(Rc::new("5".to_string())); let expected = OwnedValue::Blob(Rc::new(vec![0; 5])); assert_eq!(exec_zeroblob(&input), expected); - let input = OwnedValue::Text(Rc::new("-5".to_string())); + let input = OwnedValue::build_text(Rc::new("-5".to_string())); let expected = OwnedValue::Blob(Rc::new(vec![])); assert_eq!(exec_zeroblob(&input), expected); - let input = OwnedValue::Text(Rc::new("text".to_string())); + let input = OwnedValue::build_text(Rc::new("text".to_string())); let expected = OwnedValue::Blob(Rc::new(vec![])); assert_eq!(exec_zeroblob(&input), expected); @@ -4707,101 +4745,101 @@ mod tests { #[test] fn test_replace() { - let input_str = OwnedValue::Text(Rc::new(String::from("bob"))); - let pattern_str = OwnedValue::Text(Rc::new(String::from("b"))); - let replace_str = OwnedValue::Text(Rc::new(String::from("a"))); - let expected_str = OwnedValue::Text(Rc::new(String::from("aoa"))); + let input_str = OwnedValue::build_text(Rc::new(String::from("bob"))); + let pattern_str = OwnedValue::build_text(Rc::new(String::from("b"))); + let replace_str = OwnedValue::build_text(Rc::new(String::from("a"))); + let expected_str = OwnedValue::build_text(Rc::new(String::from("aoa"))); assert_eq!( exec_replace(&input_str, &pattern_str, &replace_str), expected_str ); - let input_str = OwnedValue::Text(Rc::new(String::from("bob"))); - let pattern_str = OwnedValue::Text(Rc::new(String::from("b"))); - let replace_str = OwnedValue::Text(Rc::new(String::from(""))); - let expected_str = OwnedValue::Text(Rc::new(String::from("o"))); + let input_str = OwnedValue::build_text(Rc::new(String::from("bob"))); + let pattern_str = OwnedValue::build_text(Rc::new(String::from("b"))); + let replace_str = OwnedValue::build_text(Rc::new(String::from(""))); + let expected_str = OwnedValue::build_text(Rc::new(String::from("o"))); assert_eq!( exec_replace(&input_str, &pattern_str, &replace_str), expected_str ); - let input_str = OwnedValue::Text(Rc::new(String::from("bob"))); - let pattern_str = OwnedValue::Text(Rc::new(String::from("b"))); - let replace_str = OwnedValue::Text(Rc::new(String::from("abc"))); - let expected_str = OwnedValue::Text(Rc::new(String::from("abcoabc"))); + let input_str = OwnedValue::build_text(Rc::new(String::from("bob"))); + let pattern_str = OwnedValue::build_text(Rc::new(String::from("b"))); + let replace_str = OwnedValue::build_text(Rc::new(String::from("abc"))); + let expected_str = OwnedValue::build_text(Rc::new(String::from("abcoabc"))); assert_eq!( exec_replace(&input_str, &pattern_str, &replace_str), expected_str ); - let input_str = OwnedValue::Text(Rc::new(String::from("bob"))); - let pattern_str = OwnedValue::Text(Rc::new(String::from("a"))); - let replace_str = OwnedValue::Text(Rc::new(String::from("b"))); - let expected_str = OwnedValue::Text(Rc::new(String::from("bob"))); + let input_str = OwnedValue::build_text(Rc::new(String::from("bob"))); + let pattern_str = OwnedValue::build_text(Rc::new(String::from("a"))); + let replace_str = OwnedValue::build_text(Rc::new(String::from("b"))); + let expected_str = OwnedValue::build_text(Rc::new(String::from("bob"))); assert_eq!( exec_replace(&input_str, &pattern_str, &replace_str), expected_str ); - let input_str = OwnedValue::Text(Rc::new(String::from("bob"))); - let pattern_str = OwnedValue::Text(Rc::new(String::from(""))); - let replace_str = OwnedValue::Text(Rc::new(String::from("a"))); - let expected_str = OwnedValue::Text(Rc::new(String::from("bob"))); + let input_str = OwnedValue::build_text(Rc::new(String::from("bob"))); + let pattern_str = OwnedValue::build_text(Rc::new(String::from(""))); + let replace_str = OwnedValue::build_text(Rc::new(String::from("a"))); + let expected_str = OwnedValue::build_text(Rc::new(String::from("bob"))); assert_eq!( exec_replace(&input_str, &pattern_str, &replace_str), expected_str ); - let input_str = OwnedValue::Text(Rc::new(String::from("bob"))); + let input_str = OwnedValue::build_text(Rc::new(String::from("bob"))); let pattern_str = OwnedValue::Null; - let replace_str = OwnedValue::Text(Rc::new(String::from("a"))); + let replace_str = OwnedValue::build_text(Rc::new(String::from("a"))); let expected_str = OwnedValue::Null; assert_eq!( exec_replace(&input_str, &pattern_str, &replace_str), expected_str ); - let input_str = OwnedValue::Text(Rc::new(String::from("bo5"))); + let input_str = OwnedValue::build_text(Rc::new(String::from("bo5"))); let pattern_str = OwnedValue::Integer(5); - let replace_str = OwnedValue::Text(Rc::new(String::from("a"))); - let expected_str = OwnedValue::Text(Rc::new(String::from("boa"))); + let replace_str = OwnedValue::build_text(Rc::new(String::from("a"))); + let expected_str = OwnedValue::build_text(Rc::new(String::from("boa"))); assert_eq!( exec_replace(&input_str, &pattern_str, &replace_str), expected_str ); - let input_str = OwnedValue::Text(Rc::new(String::from("bo5.0"))); + let input_str = OwnedValue::build_text(Rc::new(String::from("bo5.0"))); let pattern_str = OwnedValue::Float(5.0); - let replace_str = OwnedValue::Text(Rc::new(String::from("a"))); - let expected_str = OwnedValue::Text(Rc::new(String::from("boa"))); + let replace_str = OwnedValue::build_text(Rc::new(String::from("a"))); + let expected_str = OwnedValue::build_text(Rc::new(String::from("boa"))); assert_eq!( exec_replace(&input_str, &pattern_str, &replace_str), expected_str ); - let input_str = OwnedValue::Text(Rc::new(String::from("bo5"))); + let input_str = OwnedValue::build_text(Rc::new(String::from("bo5"))); let pattern_str = OwnedValue::Float(5.0); - let replace_str = OwnedValue::Text(Rc::new(String::from("a"))); - let expected_str = OwnedValue::Text(Rc::new(String::from("bo5"))); + let replace_str = OwnedValue::build_text(Rc::new(String::from("a"))); + let expected_str = OwnedValue::build_text(Rc::new(String::from("bo5"))); assert_eq!( exec_replace(&input_str, &pattern_str, &replace_str), expected_str ); - let input_str = OwnedValue::Text(Rc::new(String::from("bo5.0"))); + let input_str = OwnedValue::build_text(Rc::new(String::from("bo5.0"))); let pattern_str = OwnedValue::Float(5.0); let replace_str = OwnedValue::Float(6.0); - let expected_str = OwnedValue::Text(Rc::new(String::from("bo6.0"))); + let expected_str = OwnedValue::build_text(Rc::new(String::from("bo6.0"))); assert_eq!( exec_replace(&input_str, &pattern_str, &replace_str), expected_str ); // todo: change this test to use (0.1 + 0.2) instead of 0.3 when decimals are implemented. - let input_str = OwnedValue::Text(Rc::new(String::from("tes3"))); + let input_str = OwnedValue::build_text(Rc::new(String::from("tes3"))); let pattern_str = OwnedValue::Integer(3); let replace_str = OwnedValue::Agg(Box::new(AggContext::Sum(OwnedValue::Float(0.3)))); - let expected_str = OwnedValue::Text(Rc::new(String::from("tes0.3"))); + let expected_str = OwnedValue::build_text(Rc::new(String::from("tes0.3"))); assert_eq!( exec_replace(&input_str, &pattern_str, &replace_str), expected_str diff --git a/testing/json.test b/testing/json.test old mode 100644 new mode 100755 index 6f566fa05..3b839f3c9 --- a/testing/json.test +++ b/testing/json.test @@ -55,3 +55,15 @@ do_execsql_test json5-multi-comment { SELECT json(' /* abc */ { /*def*/ aaa /* xyz */ : // to the end of line 123 /* xyz */ , /* 123 */ }') } {{{"aaa":123}}} + +do_execsql_test json_array_str { + SELECT json_array('a') +} {{["a"]}} + +do_execsql_test json_array_not_json { + SELECT json_array('{"a":1}'); +} {{["{\"a\":1}"]}} + +do_execsql_test json_array_json { + SELECT json_array(json('{"a":1}')); +} {{[{"a":1}]}} From cdb24d3de175496b8d44182ea768f9d503b1a2a3 Mon Sep 17 00:00:00 2001 From: Kacper Madej Date: Wed, 18 Dec 2024 15:35:10 +0100 Subject: [PATCH 057/144] Handle issues with nested arguments --- core/error.rs | 7 +++++++ core/json/mod.rs | 35 ++++++++++++++++++++++++++++------- core/translate/expr.rs | 26 ++++++++++++++++---------- core/vdbe/mod.rs | 5 +++-- testing/json.test | 20 ++++++++++++++++++-- testing/scalar-functions.test | 4 ++++ 6 files changed, 76 insertions(+), 21 deletions(-) diff --git a/core/error.rs b/core/error.rs index 2e688867f..3ab2a7f54 100644 --- a/core/error.rs +++ b/core/error.rs @@ -54,5 +54,12 @@ macro_rules! bail_corrupt_error { }; } +#[macro_export] +macro_rules! bail_constraint_error { + ($($arg:tt)*) => { + return Err($crate::error::LimboError::Constraint(format!($($arg)*))) + }; +} + pub const SQLITE_CONSTRAINT: usize = 19; pub const SQLITE_CONSTRAINT_PRIMARYKEY: usize = SQLITE_CONSTRAINT | (6 << 8); diff --git a/core/json/mod.rs b/core/json/mod.rs index f0bffa8f1..b1394b2bd 100644 --- a/core/json/mod.rs +++ b/core/json/mod.rs @@ -50,13 +50,13 @@ pub fn get_json(json_value: &OwnedValue) -> crate::Result { } } -pub fn json_array(values: Vec) -> crate::Result { +pub fn json_array(values: Vec<&OwnedValue>) -> crate::Result { let mut s = String::new(); s.push('['); for (idx, value) in values.iter().enumerate() { match value { - OwnedValue::Blob(_) => crate::bail_parse_error!("JSON cannot hold BLOB values"), + OwnedValue::Blob(_) => crate::bail_constraint_error!("JSON cannot hold BLOB values"), OwnedValue::Text(t) => { if t.subtype == TextSubtype::Json { s.push_str(&t.value); @@ -67,8 +67,15 @@ pub fn json_array(values: Vec) -> crate::Result { } } } - OwnedValue::Integer(i) => s.push_str(&i.to_string()), - OwnedValue::Float(f) => s.push_str(&f.to_string()), + OwnedValue::Integer(i) => match crate::json::to_string(&i) { + Ok(json) => s.push_str(&json), + Err(_) => crate::bail_parse_error!("malformed JSON"), + }, + OwnedValue::Float(f) => match crate::json::to_string(&f) { + Ok(json) => s.push_str(&json), + Err(_) => crate::bail_parse_error!("malformed JSON"), + }, + OwnedValue::Null => s.push_str("null"), _ => unreachable!(), } @@ -78,7 +85,7 @@ pub fn json_array(values: Vec) -> crate::Result { } s.push(']'); - Ok(OwnedValue::build_text(Rc::new(s))) + Ok(OwnedValue::Text(LimboText::json(Rc::new(s)))) } #[cfg(test)] @@ -92,6 +99,7 @@ mod tests { let result = get_json(&input).unwrap(); if let OwnedValue::Text(result_str) = result { assert!(result_str.value.contains("\"key\":\"value\"")); + assert_eq!(result_str.subtype, TextSubtype::Json); } else { panic!("Expected OwnedValue::Text"); } @@ -103,6 +111,7 @@ mod tests { let result = get_json(&input).unwrap(); if let OwnedValue::Text(result_str) = result { assert!(result_str.value.contains("\"key\":\"value\"")); + assert_eq!(result_str.subtype, TextSubtype::Json); } else { panic!("Expected OwnedValue::Text"); } @@ -114,6 +123,7 @@ mod tests { let result = get_json(&input).unwrap(); if let OwnedValue::Text(result_str) = result { assert!(result_str.value.contains("{\"key\":9e999}")); + assert_eq!(result_str.subtype, TextSubtype::Json); } else { panic!("Expected OwnedValue::Text"); } @@ -125,6 +135,7 @@ mod tests { let result = get_json(&input).unwrap(); if let OwnedValue::Text(result_str) = result { assert!(result_str.value.contains("{\"key\":-9e999}")); + assert_eq!(result_str.subtype, TextSubtype::Json); } else { panic!("Expected OwnedValue::Text"); } @@ -136,6 +147,7 @@ mod tests { let result = get_json(&input).unwrap(); if let OwnedValue::Text(result_str) = result { assert!(result_str.value.contains("{\"key\":null}")); + assert_eq!(result_str.subtype, TextSubtype::Json); } else { panic!("Expected OwnedValue::Text"); } @@ -157,6 +169,7 @@ mod tests { let result = get_json(&input).unwrap(); if let OwnedValue::Text(result_str) = result { assert!(result_str.value.contains("\"key\":\"value\"")); + assert_eq!(result_str.subtype, TextSubtype::Json); } else { panic!("Expected OwnedValue::Text"); } @@ -179,6 +192,7 @@ mod tests { let result = get_json(&input).unwrap(); if let OwnedValue::Text(result_str) = result { assert!(result_str.value.contains("\"asd\":\"adf\"")); + assert_eq!(result_str.subtype, TextSubtype::Json); } else { panic!("Expected OwnedValue::Text"); } @@ -210,11 +224,17 @@ mod tests { fn test_json_array_simple() { let text = OwnedValue::build_text(Rc::new("value1".to_string())); let json = OwnedValue::Text(LimboText::json(Rc::new("\"value2\"".to_string()))); - let input = vec![text, json, OwnedValue::Integer(1), OwnedValue::Float(1.1)]; + let input = vec![ + &text, + &json, + &OwnedValue::Integer(1), + &OwnedValue::Float(1.1), + ]; let result = json_array(input).unwrap(); if let OwnedValue::Text(res) = result { assert_eq!(res.value.as_str(), "[\"value1\",\"value2\",1,1.1]"); + assert_eq!(res.subtype, TextSubtype::Json); } else { panic!("Expected OwnedValue::Text"); } @@ -227,6 +247,7 @@ mod tests { let result = json_array(input).unwrap(); if let OwnedValue::Text(res) = result { assert_eq!(res.value.as_str(), "[]"); + assert_eq!(res.subtype, TextSubtype::Json); } else { panic!("Expected OwnedValue::Text"); } @@ -236,7 +257,7 @@ mod tests { fn test_json_array_blob_invalid() { let blob = OwnedValue::Blob(Rc::new("1".as_bytes().to_vec())); - let input = vec![blob]; + let input = vec![&blob]; let result = json_array(input); diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 679e86819..363d96a99 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -899,7 +899,7 @@ pub fn translate_expr( Ok(target_register) } JsonFunc::JsonArray => { - allocate_registers( + let start_reg = translate_variable_sized_function_parameter_list( program, args, referenced_tables, @@ -908,7 +908,7 @@ pub fn translate_expr( program.emit_insn(Insn::Function { constant_mask: 0, - start_reg: target_register + 1, + start_reg, dest: target_register, func: func_ctx, }); @@ -921,7 +921,7 @@ pub fn translate_expr( unreachable!("this is always ast::Expr::Cast") } ScalarFunc::Char => { - allocate_registers( + let start_reg = translate_variable_sized_function_parameter_list( program, args, referenced_tables, @@ -930,7 +930,7 @@ pub fn translate_expr( program.emit_insn(Insn::Function { constant_mask: 0, - start_reg: target_register + 1, + start_reg, dest: target_register, func: func_ctx, }); @@ -1952,26 +1952,32 @@ pub fn translate_expr( } } -fn allocate_registers( +// Returns the starting register for the function. +// TODO: Use this function for all functions with variable number of parameters in `translate_expr` +fn translate_variable_sized_function_parameter_list( program: &mut ProgramBuilder, args: &Option>, referenced_tables: Option<&[BTreeTableReference]>, precomputed_exprs_to_registers: Option<&Vec<(&ast::Expr, usize)>>, -) -> Result<()> { - let args = args.clone().unwrap_or_else(Vec::new); +) -> Result { + let args = args.as_deref().unwrap_or_default(); + + let reg = program.alloc_registers(args.len()); + let mut current_reg = reg; for arg in args.iter() { - let reg = program.alloc_register(); translate_expr( program, referenced_tables, arg, - reg, + current_reg, precomputed_exprs_to_registers, )?; + + current_reg += 1; } - Ok(()) + Ok(reg) } fn wrap_eval_jump_expr( diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 9d07d94b0..84ef0e778 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -2267,8 +2267,9 @@ impl Program { } #[cfg(feature = "json")] crate::function::Func::Json(JsonFunc::JsonArray) => { - let reg_values = - state.registers[*start_reg..*start_reg + arg_count].to_vec(); + let reg_values = state.registers[*start_reg..*start_reg + arg_count] + .iter() + .collect(); let json_array = json_array(reg_values); diff --git a/testing/json.test b/testing/json.test index 3b839f3c9..a62040555 100755 --- a/testing/json.test +++ b/testing/json.test @@ -60,10 +60,26 @@ do_execsql_test json_array_str { SELECT json_array('a') } {{["a"]}} +do_execsql_test json_array_numbers { + SELECT json_array(1, 1.5) +} {{[1,1.5]}} + +do_execsql_test json_array_numbers_2 { + SELECT json_array(1., +2., -2.) +} {{[1.0,2.0,-2.0]}} + +do_execsql_test json_array_null { + SELECT json_array(null) +} {{[null]}} + do_execsql_test json_array_not_json { - SELECT json_array('{"a":1}'); + SELECT json_array('{"a":1}') } {{["{\"a\":1}"]}} do_execsql_test json_array_json { - SELECT json_array(json('{"a":1}')); + SELECT json_array(json('{"a":1}')) } {{[{"a":1}]}} + +do_execsql_test json_array_nested { + SELECT json_array(json_array(1,2,3), json('[1,2,3]'), '[1,2,3]') +} {{[[1,2,3],[1,2,3],"[1,2,3]"]}} diff --git a/testing/scalar-functions.test b/testing/scalar-functions.test index 5b7151c58..e7f1c1b10 100755 --- a/testing/scalar-functions.test +++ b/testing/scalar-functions.test @@ -39,6 +39,10 @@ do_execsql_test char { select char(108, 105) } {li} +do_execsql_test char-nested { + select char(106 + 2, 105) +} {li} + do_execsql_test char-empty { select char() } {} From e2fc03e8226baeef4c8457bb60f741f5368ac895 Mon Sep 17 00:00:00 2001 From: Kacper Madej Date: Wed, 18 Dec 2024 19:20:42 +0100 Subject: [PATCH 058/144] Update COMPAT.md --- COMPAT.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/COMPAT.md b/COMPAT.md index f02c25b66..213841cb1 100644 --- a/COMPAT.md +++ b/COMPAT.md @@ -228,7 +228,7 @@ Feature support of [sqlite expr syntax](https://www.sqlite.org/lang_expr.html). |------------------------------------|---------|---------| | json(json) | Partial | | | jsonb(json) | | | -| json_array(value1,value2,...) | | | +| json_array(value1,value2,...) | Yes | | | jsonb_array(value1,value2,...) | | | | json_array_length(json) | | | | json_array_length(json,path) | | | From d5d71859955522fe61802a6334570495bede8153 Mon Sep 17 00:00:00 2001 From: KaguraMilet Date: Fri, 20 Dec 2024 22:49:44 +0800 Subject: [PATCH 059/144] add between expr tests --- testing/where.test | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/testing/where.test b/testing/where.test index 28ce70f8e..264bdfdd8 100755 --- a/testing/where.test +++ b/testing/where.test @@ -317,3 +317,20 @@ do_execsql_test where-age-index-seek-regression-test { do_execsql_test where-age-index-seek-regression-test-2 { select count(1) from users where age > 0; } {10000} + +do_execsql_test where-simple-between { + SELECT * FROM products WHERE price BETWEEN 70 AND 100; +} {1|hat|79.0 +2|cap|82.0 +5|sweatshirt|74.0 +6|shorts|70.0 +7|jeans|78.0 +8|sneakers|82.0 +11|accessories|81.0} + +do_execsql_test between-price-range-with-names { + SELECT * FROM products + WHERE (price BETWEEN 70 AND 100) + AND (name = 'sweatshirt' OR name = 'sneakers'); +} {5|sweatshirt|74.0 +8|sneakers|82.0} From f912771ae6721275c6db4c269411c07c3653d335 Mon Sep 17 00:00:00 2001 From: amuldotexe Date: Fri, 20 Dec 2024 20:32:03 +0530 Subject: [PATCH 060/144] gracefully handling errors for issue https://github.com/tursodatabase/limbo/issues/494 , changes made 5 places where todo macros were replaced with relevant errors --- bindings/python/src/lib.rs | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/bindings/python/src/lib.rs b/bindings/python/src/lib.rs index aca225304..b3b618dd3 100644 --- a/bindings/python/src/lib.rs +++ b/bindings/python/src/lib.rs @@ -104,7 +104,9 @@ impl Cursor { // TODO: use stmt_is_dml to set rowcount if stmt_is_dml { - todo!() + return Err(PyErr::new::( + "DML statements (INSERT/UPDATE/DELETE) are not fully supported in this version" + ).into()); } Ok(Cursor { @@ -181,18 +183,24 @@ impl Cursor { } } - pub fn close(&self) -> Result<()> { - todo!() + pub fn close(&self) -> PyResult<()> { + Err(PyErr::new::( + "close() is not supported in this version" + )) } #[pyo3(signature = (sql, parameters=None))] - pub fn executemany(&self, sql: &str, parameters: Option>) { - todo!() + pub fn executemany(&self, sql: &str, parameters: Option>) -> PyResult<()> { + Err(PyErr::new::( + "executemany() is not supported in this version" + )) } #[pyo3(signature = (size=None))] - pub fn fetchmany(&self, size: Option) { - todo!() + pub fn fetchmany(&self, size: Option) -> PyResult>> { + Err(PyErr::new::( + "fetchmany() is not supported in this version" + )) } } @@ -228,12 +236,16 @@ impl Connection { drop(self.conn.clone()); } - pub fn commit(&self) { - todo!() + pub fn commit(&self) -> PyResult<()> { + Err(PyErr::new::( + "Transactions are not supported in this version" + )) } - pub fn rollback(&self) { - todo!() + pub fn rollback(&self) -> PyResult<()> { + Err(PyErr::new::( + "Transactions are not supported in this version" + )) } } From b7b22f303f4a68aa85b6507d86eaf4f807c99dec Mon Sep 17 00:00:00 2001 From: amuldotexe Date: Fri, 20 Dec 2024 20:36:35 +0530 Subject: [PATCH 061/144] ran cargofmt --- bindings/python/src/lib.rs | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/bindings/python/src/lib.rs b/bindings/python/src/lib.rs index b3b618dd3..595400a21 100644 --- a/bindings/python/src/lib.rs +++ b/bindings/python/src/lib.rs @@ -105,8 +105,9 @@ impl Cursor { // TODO: use stmt_is_dml to set rowcount if stmt_is_dml { return Err(PyErr::new::( - "DML statements (INSERT/UPDATE/DELETE) are not fully supported in this version" - ).into()); + "DML statements (INSERT/UPDATE/DELETE) are not fully supported in this version", + ) + .into()); } Ok(Cursor { @@ -185,21 +186,21 @@ impl Cursor { pub fn close(&self) -> PyResult<()> { Err(PyErr::new::( - "close() is not supported in this version" + "close() is not supported in this version", )) } #[pyo3(signature = (sql, parameters=None))] pub fn executemany(&self, sql: &str, parameters: Option>) -> PyResult<()> { Err(PyErr::new::( - "executemany() is not supported in this version" + "executemany() is not supported in this version", )) } #[pyo3(signature = (size=None))] pub fn fetchmany(&self, size: Option) -> PyResult>> { Err(PyErr::new::( - "fetchmany() is not supported in this version" + "fetchmany() is not supported in this version", )) } } @@ -238,13 +239,13 @@ impl Connection { pub fn commit(&self) -> PyResult<()> { Err(PyErr::new::( - "Transactions are not supported in this version" + "Transactions are not supported in this version", )) } pub fn rollback(&self) -> PyResult<()> { Err(PyErr::new::( - "Transactions are not supported in this version" + "Transactions are not supported in this version", )) } } From ef39f11a9fcde3c5ddc6629e39a4782aa08a8250 Mon Sep 17 00:00:00 2001 From: KaguraMilet Date: Fri, 20 Dec 2024 23:11:17 +0800 Subject: [PATCH 062/144] fix(optimizer): process `Parenthesized` expression --- core/translate/optimizer.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 962e2ea80..443448860 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -881,6 +881,9 @@ fn convert_between_expr(expr: ast::Expr) -> ast::Expr { ) } } + ast::Expr::Parenthesized(mut exprs) => { + ast::Expr::Parenthesized(exprs.drain(..).map(convert_between_expr).collect()) + } // Process other expressions recursively ast::Expr::Binary(lhs, op, rhs) => ast::Expr::Binary( Box::new(convert_between_expr(*lhs)), From d2723b777bc01b222d5bafebf10f1ea314f7c076 Mon Sep 17 00:00:00 2001 From: alpaylan Date: Fri, 20 Dec 2024 12:17:59 -0500 Subject: [PATCH 063/144] update table create probability, print interactions as info logs --- simulator/generation/plan.rs | 5 ++++- simulator/main.rs | 5 +++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/simulator/generation/plan.rs b/simulator/generation/plan.rs index b1a233f9e..d83911642 100644 --- a/simulator/generation/plan.rs +++ b/simulator/generation/plan.rs @@ -392,7 +392,10 @@ impl ArbitraryFrom<(&SimulatorEnv, InteractionStats)> for Interactions { remaining_write, Box::new(|rng: &mut R| random_write(rng, env)), ), - (1, Box::new(|rng: &mut R| create_table(rng, env))), + ( + remaining_write / 10, + Box::new(|rng: &mut R| create_table(rng, env)), + ), (1, Box::new(|rng: &mut R| random_fault(rng, env))), ], rng, diff --git a/simulator/main.rs b/simulator/main.rs index 8dc477290..085711391 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -66,7 +66,7 @@ fn main() { }; let opts = SimulatorOpts { - ticks: rng.gen_range(0..1024), + ticks: rng.gen_range(0..10240), max_connections: 1, // TODO: for now let's use one connection as we didn't implement // correct transactions procesing max_tables: rng.gen_range(0..128), @@ -74,7 +74,7 @@ fn main() { write_percent, delete_percent, page_size: 4096, // TODO: randomize this too - max_interactions: rng.gen_range(0..1024), + max_interactions: rng.gen_range(0..10240), }; let io = Arc::new(SimulatorIO::new(seed, opts.page_size).unwrap()); @@ -175,6 +175,7 @@ fn execute_interaction( interaction: &Interaction, stack: &mut Vec, ) -> Result<()> { + log::info!("executing: {}", interaction); match interaction { generation::plan::Interaction::Query(_) => { let conn = match &mut env.connections[connection_index] { From 8f8b97d54b7d18a756247dcbbd5cb4701e289120 Mon Sep 17 00:00:00 2001 From: alpaylan Date: Fri, 20 Dec 2024 12:27:54 -0500 Subject: [PATCH 064/144] add the missing rowresult variant --- simulator/generation/plan.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/simulator/generation/plan.rs b/simulator/generation/plan.rs index d83911642..fd194de66 100644 --- a/simulator/generation/plan.rs +++ b/simulator/generation/plan.rs @@ -1,4 +1,4 @@ -use std::{f32::consts::E, fmt::Display, os::macos::raw::stat, rc::Rc}; +use std::{fmt::Display, rc::Rc}; use limbo_core::{Connection, Result, RowResult}; use rand::SeedableRng; @@ -231,6 +231,7 @@ impl Interaction { out.push(r); } RowResult::IO => {} + RowResult::Interrupt => {} RowResult::Done => { break; } From 1e72fee34311f7d7f0227ed572b04835e0328fec Mon Sep 17 00:00:00 2001 From: vignesh-j-shetty Date: Sat, 21 Dec 2024 12:23:04 +0530 Subject: [PATCH 065/144] Implemented proc_macro_derive extract description from rust docs and generate get_description function --- Cargo.lock | 5 ++ Cargo.toml | 2 +- core/Cargo.toml | 1 + core/vdbe/mod.rs | 5 +- description_derive/Cargo.toml | 5 ++ description_derive/src/lib.rs | 137 ++++++++++++++++++++++++++++++++++ 6 files changed, 151 insertions(+), 4 deletions(-) create mode 100644 description_derive/Cargo.toml create mode 100644 description_derive/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index ead336051..ff669bf42 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -536,6 +536,10 @@ dependencies = [ "uuid", ] +[[package]] +name = "description_derive" +version = "0.0.0" + [[package]] name = "digest" version = "0.10.7" @@ -1130,6 +1134,7 @@ dependencies = [ "cfg_block", "chrono", "criterion", + "description_derive", "fallible-iterator 0.3.0", "getrandom", "hex", diff --git a/Cargo.toml b/Cargo.toml index f09011a3a..7db7e5803 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,7 +9,7 @@ members = [ "sqlite3", "core", "simulator", - "test", + "test", "description_derive", ] exclude = ["perf/latency/limbo"] diff --git a/core/Cargo.toml b/core/Cargo.toml index 9c9ed5521..d93906b3f 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -53,6 +53,7 @@ pest = { version = "2.0", optional = true } pest_derive = { version = "2.0", optional = true } rand = "0.8.5" bumpalo = { version = "3.16.0", features = ["collections", "boxed"] } +description_derive = { path = "../description_derive" } [target.'cfg(not(target_family = "windows"))'.dev-dependencies] pprof = { version = "0.14.0", features = ["criterion", "flamegraph"] } diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index cbb046d88..1d1a0cb4d 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -48,9 +48,8 @@ use std::cell::RefCell; use std::collections::{BTreeMap, HashMap}; use std::fmt::Display; use std::rc::{Rc, Weak}; - pub type BranchOffset = i64; - +use description_derive::Description; pub type CursorID = usize; pub type PageIdx = usize; @@ -74,7 +73,7 @@ impl Display for Func { } } -#[derive(Debug)] +#[derive(Description, Debug)] pub enum Insn { // Initialize the program state and jump to the given PC. Init { diff --git a/description_derive/Cargo.toml b/description_derive/Cargo.toml new file mode 100644 index 000000000..09ffe3de3 --- /dev/null +++ b/description_derive/Cargo.toml @@ -0,0 +1,5 @@ +[package] +name = "description_derive" + +[lib] +proc-macro = true diff --git a/description_derive/src/lib.rs b/description_derive/src/lib.rs new file mode 100644 index 000000000..605c3b956 --- /dev/null +++ b/description_derive/src/lib.rs @@ -0,0 +1,137 @@ +extern crate proc_macro; +use proc_macro::{token_stream::IntoIter, Group, TokenStream, TokenTree}; +use std::collections::HashMap; + +/// A procedural macro that derives a `Description` trait for enums. +/// This macro extracts documentation comments (specified with `/// Description...`) for enum variants +/// and generates an implementation for `get_description`, which returns the associated description. +#[proc_macro_derive(Description, attributes(desc))] +pub fn derive_description_from_doc(item: TokenStream) -> TokenStream { + // Convert the TokenStream into an iterator of TokenTree + let mut tokens = item.into_iter(); + + let mut enum_name = String::new(); + + // Vector to store enum variants and their associated payloads (if any) + let mut enum_variants: Vec<(String, Option)> = Vec::<(String, Option)>::new(); + + // HashMap to store descriptions associated with each enum variant + let mut variant_description_map: HashMap = HashMap::new(); + + // Parses the token stream to extract the enum name and its variants + while let Some(token) = tokens.next() { + match token { + TokenTree::Ident(ident) if ident.to_string() == "enum" => { + // Get the enum name + if let Some(TokenTree::Ident(name)) = tokens.next() { + enum_name = name.to_string(); + } + } + TokenTree::Group(group) => { + let mut group_tokens_iter: IntoIter = group.stream().into_iter(); + + let mut last_seen_desc: Option = None; + while let Some(token) = group_tokens_iter.next() { + match token { + TokenTree::Punct(punct) => { + if punct.to_string() == "#" { + last_seen_desc = process_description(&mut group_tokens_iter); + } + } + TokenTree::Ident(ident) => { + // Capture the enum variant name and associate it with its description + let ident_str = ident.to_string(); + if let Some(desc) = &last_seen_desc { + variant_description_map.insert(ident_str.clone(), desc.clone()); + } + enum_variants.push((ident_str, None)); + last_seen_desc = None; + } + TokenTree::Group(group) => { + // Capture payload information for the current enum variant + if let Some(last_variant) = enum_variants.last_mut() { + last_variant.1 = Some(process_payload(group)); + } + } + _ => {} + } + } + } + _ => {} + } + } + generate_get_description(enum_name, &variant_description_map, enum_variants) +} + +/// Processes a Rust docs to extract the description string. +fn process_description(token_iter: &mut IntoIter) -> Option { + if let Some(doc_token_tree) = token_iter.next() { + if let TokenTree::Group(doc_group) = doc_token_tree { + let mut doc_group_iter = doc_group.stream().into_iter(); + // Skip the `desc` and `(` tokens to reach the actual description + doc_group_iter.next(); + doc_group_iter.next(); + if let Some(TokenTree::Literal(description)) = doc_group_iter.next() { + return Some(description.to_string()); + } + } + } + None +} + +/// Processes the payload of an enum variant to extract variable names (ignoring types). +fn process_payload(payload_group: Group) -> String { + let mut payload_group_iter = payload_group.stream().into_iter(); + let mut variable_name_list = String::from(""); + let mut is_variable_name = true; + while let Some(token) = payload_group_iter.next() { + match token { + TokenTree::Ident(ident) => { + if is_variable_name { + variable_name_list.push_str(&format!("{},", ident.to_string())); + } + is_variable_name = false; + } + TokenTree::Punct(punct) => { + if punct.to_string() == "," { + is_variable_name = true; + } + } + _ => {} + } + } + format!("{{ {} }}", variable_name_list).to_string() +} +/// Generates the `get_description` implementation for the processed enum. +fn generate_get_description( + enum_name: String, + variant_description_map: &HashMap, + enum_variants: Vec<(String, Option)>, +) -> TokenStream { + let mut all_enum_arms = String::from(""); + for (variant, payload) in enum_variants { + let payload = payload.unwrap_or("".to_string()); + let desc; + if let Some(description) = variant_description_map.get(&variant) { + desc = format!("Some({})", description); + } else { + desc = "None".to_string(); + } + all_enum_arms.push_str(&format!( + "{}::{} {} => {},\n", + enum_name, variant, payload, desc + )); + } + + let enum_impl = format!( + "impl {} {{ + pub fn get_description(&self) -> Option<&str> {{ + match self {{ + {} + }} + }} + }}", + enum_name, all_enum_arms + ); + enum_impl.parse().unwrap() +} From a43a1d204cf9700113f21d94b3b5790c6fdac25d Mon Sep 17 00:00:00 2001 From: vignesh-j-shetty Date: Sat, 21 Dec 2024 13:19:04 +0530 Subject: [PATCH 066/144] renamed macro crate --- Cargo.lock | 10 +++++----- Cargo.toml | 2 +- core/Cargo.toml | 2 +- core/vdbe/mod.rs | 2 +- {description_derive => macros}/Cargo.toml | 2 +- {description_derive => macros}/src/lib.rs | 0 6 files changed, 9 insertions(+), 9 deletions(-) rename {description_derive => macros}/Cargo.toml (55%) rename {description_derive => macros}/src/lib.rs (100%) diff --git a/Cargo.lock b/Cargo.lock index ff669bf42..53b97bfc1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -536,10 +536,6 @@ dependencies = [ "uuid", ] -[[package]] -name = "description_derive" -version = "0.0.0" - [[package]] name = "digest" version = "0.10.7" @@ -1134,7 +1130,6 @@ dependencies = [ "cfg_block", "chrono", "criterion", - "description_derive", "fallible-iterator 0.3.0", "getrandom", "hex", @@ -1144,6 +1139,7 @@ dependencies = [ "julian_day_converter", "libc", "log", + "macros", "mimalloc", "mockall", "nix 0.29.0", @@ -1208,6 +1204,10 @@ version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" +[[package]] +name = "macros" +version = "0.0.0" + [[package]] name = "memchr" version = "2.7.4" diff --git a/Cargo.toml b/Cargo.toml index 7db7e5803..e81017729 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,7 +9,7 @@ members = [ "sqlite3", "core", "simulator", - "test", "description_derive", + "test", "macros", ] exclude = ["perf/latency/limbo"] diff --git a/core/Cargo.toml b/core/Cargo.toml index d93906b3f..58783855a 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -53,7 +53,7 @@ pest = { version = "2.0", optional = true } pest_derive = { version = "2.0", optional = true } rand = "0.8.5" bumpalo = { version = "3.16.0", features = ["collections", "boxed"] } -description_derive = { path = "../description_derive" } +macros = { path = "../macros" } [target.'cfg(not(target_family = "windows"))'.dev-dependencies] pprof = { version = "0.14.0", features = ["criterion", "flamegraph"] } diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 1d1a0cb4d..362f60042 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -49,7 +49,7 @@ use std::collections::{BTreeMap, HashMap}; use std::fmt::Display; use std::rc::{Rc, Weak}; pub type BranchOffset = i64; -use description_derive::Description; +use macros::Description; pub type CursorID = usize; pub type PageIdx = usize; diff --git a/description_derive/Cargo.toml b/macros/Cargo.toml similarity index 55% rename from description_derive/Cargo.toml rename to macros/Cargo.toml index 09ffe3de3..e3516da14 100644 --- a/description_derive/Cargo.toml +++ b/macros/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "description_derive" +name = "macros" [lib] proc-macro = true diff --git a/description_derive/src/lib.rs b/macros/src/lib.rs similarity index 100% rename from description_derive/src/lib.rs rename to macros/src/lib.rs From 13f229020ed4414d12306915602a8c7b9a156052 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Sat, 21 Dec 2024 09:50:23 +0200 Subject: [PATCH 067/144] simulator: Use "mod.rs" for module top-level files Let's use "mod.rs" for the module top-level file as we do in the rest of the Limbo codebase for consistency. --- simulator/{generation.rs => generation/mod.rs} | 0 simulator/{model.rs => model/mod.rs} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename simulator/{generation.rs => generation/mod.rs} (100%) rename simulator/{model.rs => model/mod.rs} (100%) diff --git a/simulator/generation.rs b/simulator/generation/mod.rs similarity index 100% rename from simulator/generation.rs rename to simulator/generation/mod.rs diff --git a/simulator/model.rs b/simulator/model/mod.rs similarity index 100% rename from simulator/model.rs rename to simulator/model/mod.rs From fcab0ae299d6baaaf1120a7a518172aae19d1e36 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Thu, 19 Dec 2024 11:45:43 -0500 Subject: [PATCH 068/144] Add uuid support for v4 and v7 --- Cargo.lock | 4 ++ core/Cargo.toml | 4 +- core/function.rs | 18 +++++++++ core/translate/expr.rs | 35 ++++++++++++++++- core/vdbe/mod.rs | 88 +++++++++++++++++++++++++++++++++++++++++- 5 files changed, 145 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 53b97bfc1..0c84456f9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1157,6 +1157,7 @@ dependencies = [ "sqlite3-parser", "tempfile", "thiserror 1.0.69", + "uuid", ] [[package]] @@ -2277,6 +2278,9 @@ name = "uuid" version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a" +dependencies = [ + "getrandom", +] [[package]] name = "vcpkg" diff --git a/core/Cargo.toml b/core/Cargo.toml index 58783855a..9051508df 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -14,8 +14,9 @@ name = "limbo_core" path = "lib.rs" [features] -default = ["fs", "json"] +default = ["fs", "json", "uuid"] fs = [] +uuid = ["dep:uuid"] json = [ "dep:jsonb", "dep:pest", @@ -54,6 +55,7 @@ pest_derive = { version = "2.0", optional = true } rand = "0.8.5" bumpalo = { version = "3.16.0", features = ["collections", "boxed"] } macros = { path = "../macros" } +uuid = { version = "1.11.0", features = ["v4", "v7"], optional = true } [target.'cfg(not(target_family = "windows"))'.dev-dependencies] pprof = { version = "0.14.0", features = ["criterion", "flamegraph"] } diff --git a/core/function.rs b/core/function.rs index 86c88a1e4..a158c1dce 100644 --- a/core/function.rs +++ b/core/function.rs @@ -91,6 +91,12 @@ pub enum ScalarFunc { ZeroBlob, LastInsertRowid, Replace, + Uuid4, + Uuid4Str, + UuidStr, + UuidBlob, + Uuid7, + Uuid7Str, } impl Display for ScalarFunc { @@ -136,6 +142,12 @@ impl Display for ScalarFunc { ScalarFunc::ZeroBlob => "zeroblob".to_string(), ScalarFunc::LastInsertRowid => "last_insert_rowid".to_string(), ScalarFunc::Replace => "replace".to_string(), + ScalarFunc::Uuid4 => "uuid4".to_string(), + ScalarFunc::UuidStr => "uuid_str".to_string(), + ScalarFunc::UuidBlob => "uuid_blob".to_string(), + ScalarFunc::Uuid7 => "uuid7".to_string(), + ScalarFunc::Uuid7Str => "uuid7_str".to_string(), + ScalarFunc::Uuid4Str => "uuid4_str".to_string(), }; write!(f, "{}", str) } @@ -325,6 +337,12 @@ impl Func { "typeof" => Ok(Func::Scalar(ScalarFunc::Typeof)), "last_insert_rowid" => Ok(Func::Scalar(ScalarFunc::LastInsertRowid)), "unicode" => Ok(Func::Scalar(ScalarFunc::Unicode)), + "uuid4" => Ok(Func::Scalar(ScalarFunc::Uuid4)), + "uuid7" => Ok(Func::Scalar(ScalarFunc::Uuid7)), + "uuid4_str" => Ok(Func::Scalar(ScalarFunc::Uuid4Str)), + "uuid7_str" => Ok(Func::Scalar(ScalarFunc::Uuid7Str)), + "uuid_str" => Ok(Func::Scalar(ScalarFunc::UuidStr)), + "uuid_blob" => Ok(Func::Scalar(ScalarFunc::UuidBlob)), "quote" => Ok(Func::Scalar(ScalarFunc::Quote)), "sqlite_version" => Ok(Func::Scalar(ScalarFunc::SqliteVersion)), "replace" => Ok(Func::Scalar(ScalarFunc::Replace)), diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 363d96a99..9bdd297ac 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -1194,7 +1194,9 @@ pub fn translate_expr( | ScalarFunc::RandomBlob | ScalarFunc::Sign | ScalarFunc::Soundex - | ScalarFunc::ZeroBlob => { + | ScalarFunc::ZeroBlob + | ScalarFunc::UuidStr + | ScalarFunc::UuidBlob => { let args = if let Some(args) = args { if args.len() != 1 { crate::bail_parse_error!( @@ -1226,7 +1228,36 @@ pub fn translate_expr( }); Ok(target_register) } - ScalarFunc::Random => { + ScalarFunc::Uuid7 | ScalarFunc::Uuid7Str => { + if let Some(args) = args { + // can take optional time arg + if args.len() > 1 { + crate::bail_parse_error!( + "{} function with more than 1 argument", + srf.to_string() + ); + } + if let Some(arg) = args.first() { + let regs = program.alloc_register(); + translate_expr( + program, + referenced_tables, + arg, + regs, + precomputed_exprs_to_registers, + )?; + } + } + let regs = program.alloc_register(); + program.emit_insn(Insn::Function { + constant_mask: 0, + start_reg: regs, + dest: target_register, + func: func_ctx, + }); + Ok(target_register) + } + ScalarFunc::Random | ScalarFunc::Uuid4 | ScalarFunc::Uuid4Str => { if args.is_some() { crate::bail_parse_error!( "{} function with arguments", diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 362f60042..b8fc85119 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -48,6 +48,8 @@ use std::cell::RefCell; use std::collections::{BTreeMap, HashMap}; use std::fmt::Display; use std::rc::{Rc, Weak}; + +use uuid::{ContextV7, Timestamp, Uuid}; pub type BranchOffset = i64; use macros::Description; pub type CursorID = usize; @@ -2395,7 +2397,9 @@ impl Program { | ScalarFunc::RandomBlob | ScalarFunc::Sign | ScalarFunc::Soundex - | ScalarFunc::ZeroBlob => { + | ScalarFunc::ZeroBlob + | ScalarFunc::UuidStr + | ScalarFunc::UuidBlob => { let reg_value = state.registers[*start_reg].borrow_mut(); let result = match scalar_func { ScalarFunc::Sign => exec_sign(reg_value), @@ -2410,6 +2414,8 @@ impl Program { ScalarFunc::RandomBlob => Some(exec_randomblob(reg_value)), ScalarFunc::ZeroBlob => Some(exec_zeroblob(reg_value)), ScalarFunc::Soundex => Some(exec_soundex(reg_value)), + ScalarFunc::UuidStr => Some(exec_uuidstr(reg_value)?), + ScalarFunc::UuidBlob => Some(exec_uuidblob(reg_value)?), _ => unreachable!(), }; state.registers[*dest] = result.unwrap_or(OwnedValue::Null); @@ -2428,6 +2434,9 @@ impl Program { ScalarFunc::Random => { state.registers[*dest] = exec_random(); } + ScalarFunc::Uuid4 | ScalarFunc::Uuid4Str => { + state.registers[*dest] = exec_uuid(scalar_func); + } ScalarFunc::Trim => { let reg_value = state.registers[*start_reg].clone(); let pattern_value = state.registers.get(*start_reg + 1).cloned(); @@ -3093,6 +3102,83 @@ fn exec_random() -> OwnedValue { OwnedValue::Integer(random_number) } +enum UuidType { + V4Blob, + V4Str, + V7Blob, + V7Str, +} + +fn exec_uuid(var: &ScalarFunc, time: Option<&OwnedValue>) -> OwnedValue { + match var { + ScalarFunc::Uuid4Str => OwnedValue::Text(Rc::new(Uuid::new_v4().to_string())), + ScalarFunc::Uuid4 => OwnedValue::Blob(Rc::new(Uuid::new_v4().into_bytes().to_vec())), + ScalarFunc::Uuid7 | ScalarFunc::Uuid7Str => match time { + Some(OwnedValue::Integer(i)) => { + let ctx = ContextV7::new(); + if *i < 0 { + // not valid unix timestamp + return OwnedValue::Null; + } + let uuid = Uuid::new_v7(Timestamp::from_unix(ctx, *i as u64, 0)); + match var { + ScalarFunc::Uuid7Str => OwnedValue::Text(Rc::new(uuid.to_string())), + ScalarFunc::Uuid7 => OwnedValue::Blob(Rc::new(uuid.as_bytes().to_vec())), + _ => unreachable!(), + } + } + Some(OwnedValue::Text(t)) => { + let uuid = Uuid::new_v7(); + match var { + ScalarFunc::Uuid7Str => OwnedValue::Text(Rc::new(uuid.to_string())), + ScalarFunc::Uuid7 => OwnedValue::Blob(Rc::new(uuid.as_bytes().to_vec())), + _ => unreachable!(), + } + } + _ => match var { + ScalarFunc::Uuid7Str => OwnedValue::Text(Rc::new(Uuid::now_v7().to_string())), + ScalarFunc::Uuid7 => OwnedValue::Blob(Rc::new(Uuid::now_v7().as_bytes().to_vec())), + _ => unreachable!(), + }, + }, + _ => unreachable!(), + } +} + +fn exec_uuidstr(reg: &OwnedValue) -> Result { + match reg { + OwnedValue::Blob(blob) => { + let uuid = Uuid::from_slice(blob).map_err(|e| LimboError::ParseError(e.to_string()))?; + Ok(OwnedValue::Text(Rc::new(uuid.to_string()))) + } + OwnedValue::Text(val) => { + let uuid = Uuid::parse_str(val).map_err(|e| LimboError::ParseError(e.to_string()))?; + Ok(OwnedValue::Text(Rc::new(uuid.to_string()))) + } + OwnedValue::Null => Ok(OwnedValue::Null), + _ => Err(LimboError::ParseError( + "Invalid argument type for UUID function".to_string(), + )), + } +} + +fn exec_uuidblob(reg: &OwnedValue) -> Result { + match reg { + OwnedValue::Text(val) => { + let uuid = Uuid::parse_str(val).map_err(|e| LimboError::ParseError(e.to_string()))?; + Ok(OwnedValue::Blob(Rc::new(uuid.as_bytes().to_vec()))) + } + OwnedValue::Blob(blob) => { + let uuid = Uuid::from_slice(blob).map_err(|e| LimboError::ParseError(e.to_string()))?; + Ok(OwnedValue::Blob(Rc::new(uuid.as_bytes().to_vec()))) + } + OwnedValue::Null => Ok(OwnedValue::Null), + _ => Err(LimboError::ParseError( + "Invalid argument type for UUID function".to_string(), + )), + } +} + fn exec_randomblob(reg: &OwnedValue) -> OwnedValue { let length = match reg { OwnedValue::Integer(i) => *i, From b207f7ded516471164c062616f3d898e914fdec8 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Thu, 19 Dec 2024 13:13:31 -0500 Subject: [PATCH 069/144] Give uuidv7 optional unix time arg --- core/vdbe/mod.rs | 65 +++++++++++++++++++++++++++--------------------- 1 file changed, 37 insertions(+), 28 deletions(-) diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index b8fc85119..517b839ed 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -2420,6 +2420,24 @@ impl Program { }; state.registers[*dest] = result.unwrap_or(OwnedValue::Null); } + ScalarFunc::Uuid7Str | ScalarFunc::Uuid7 => match arg_count { + 0 => { + state.registers[*dest] = + exec_uuid(scalar_func, None).unwrap_or(OwnedValue::Null); + } + 1 => { + let reg_value = state.registers[*start_reg].borrow_mut(); + state.registers[*dest] = + exec_uuid(scalar_func, Some(reg_value)) + .unwrap_or(OwnedValue::Null); + } + _ => { + return Err(LimboError::ParseError(format!( + "Invalid number of arguments for Uuid7 function: {}", + arg_count + ))); + } + }, ScalarFunc::Hex => { let reg_value = state.registers[*start_reg].borrow_mut(); let result = exec_hex(reg_value); @@ -2435,7 +2453,7 @@ impl Program { state.registers[*dest] = exec_random(); } ScalarFunc::Uuid4 | ScalarFunc::Uuid4Str => { - state.registers[*dest] = exec_uuid(scalar_func); + state.registers[*dest] = exec_uuid(scalar_func, None)?; } ScalarFunc::Trim => { let reg_value = state.registers[*start_reg].clone(); @@ -3109,38 +3127,29 @@ enum UuidType { V7Str, } -fn exec_uuid(var: &ScalarFunc, time: Option<&OwnedValue>) -> OwnedValue { +fn exec_uuid(var: &ScalarFunc, time: Option<&OwnedValue>) -> Result { match var { - ScalarFunc::Uuid4Str => OwnedValue::Text(Rc::new(Uuid::new_v4().to_string())), - ScalarFunc::Uuid4 => OwnedValue::Blob(Rc::new(Uuid::new_v4().into_bytes().to_vec())), - ScalarFunc::Uuid7 | ScalarFunc::Uuid7Str => match time { - Some(OwnedValue::Integer(i)) => { + ScalarFunc::Uuid4Str => Ok(OwnedValue::Text(Rc::new(Uuid::new_v4().to_string()))), + ScalarFunc::Uuid4 => Ok(OwnedValue::Blob(Rc::new( + Uuid::new_v4().into_bytes().to_vec(), + ))), + ScalarFunc::Uuid7 | ScalarFunc::Uuid7Str => { + let uuid = if let Some(OwnedValue::Integer(ref i)) = time { let ctx = ContextV7::new(); if *i < 0 { - // not valid unix timestamp - return OwnedValue::Null; - } - let uuid = Uuid::new_v7(Timestamp::from_unix(ctx, *i as u64, 0)); - match var { - ScalarFunc::Uuid7Str => OwnedValue::Text(Rc::new(uuid.to_string())), - ScalarFunc::Uuid7 => OwnedValue::Blob(Rc::new(uuid.as_bytes().to_vec())), - _ => unreachable!(), + // not valid unix timestamp, error or null? + return Ok(OwnedValue::Null); } - } - Some(OwnedValue::Text(t)) => { - let uuid = Uuid::new_v7(); - match var { - ScalarFunc::Uuid7Str => OwnedValue::Text(Rc::new(uuid.to_string())), - ScalarFunc::Uuid7 => OwnedValue::Blob(Rc::new(uuid.as_bytes().to_vec())), - _ => unreachable!(), - } - } - _ => match var { - ScalarFunc::Uuid7Str => OwnedValue::Text(Rc::new(Uuid::now_v7().to_string())), - ScalarFunc::Uuid7 => OwnedValue::Blob(Rc::new(Uuid::now_v7().as_bytes().to_vec())), + Uuid::new_v7(Timestamp::from_unix(ctx, *i as u64, 0)) + } else { + Uuid::now_v7() + }; + return match var { + ScalarFunc::Uuid7Str => Ok(OwnedValue::Text(Rc::new(uuid.to_string()))), + ScalarFunc::Uuid7 => Ok(OwnedValue::Blob(Rc::new(uuid.into_bytes().to_vec()))), _ => unreachable!(), - }, - }, + }; + } _ => unreachable!(), } } From c1561ecbb0d705bdeb913fbe3577b4ba3a9a1b2d Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Thu, 19 Dec 2024 20:21:33 -0500 Subject: [PATCH 070/144] Tests for uuid funcitons, add compat docs --- COMPAT.md | 13 ++ core/function.rs | 10 +- core/translate/expr.rs | 45 +++---- core/vdbe/mod.rs | 280 ++++++++++++++++++++++++++++++++++++----- 4 files changed, 289 insertions(+), 59 deletions(-) diff --git a/COMPAT.md b/COMPAT.md index 213841cb1..38d475680 100644 --- a/COMPAT.md +++ b/COMPAT.md @@ -160,6 +160,19 @@ Feature support of [sqlite expr syntax](https://www.sqlite.org/lang_expr.html). | upper(X) | Yes | | | zeroblob(N) | Yes | | + +|-------------------------------------------------| +| LibSql / sqlean Scalar | | | +| ---------------------------- | ------ | ------- | +| uuid4() | Yes | uuid version 4 **uuid's are `blob` by default** | +| uuid4_str() | Yes | uuid v4 string alias `gen_random_uuid()` for PG compatibility| +| uuid7(X?) | Yes | uuid version 7, Optional arg for seconds since epoch| +| uuid7_timestamp_ms(X) | Yes | Convert a uuid v7 to milliseconds since epoch| +| uuid_str(X) | Yes | Convert a valid uuid to string| +| uuid_blob(X) | Yes | Convert a valid uuid to blob| + + + ### Mathematical functions | Function | Status | Comment | diff --git a/core/function.rs b/core/function.rs index a158c1dce..b17d388bb 100644 --- a/core/function.rs +++ b/core/function.rs @@ -96,7 +96,7 @@ pub enum ScalarFunc { UuidStr, UuidBlob, Uuid7, - Uuid7Str, + Uuid7TS, } impl Display for ScalarFunc { @@ -146,8 +146,8 @@ impl Display for ScalarFunc { ScalarFunc::UuidStr => "uuid_str".to_string(), ScalarFunc::UuidBlob => "uuid_blob".to_string(), ScalarFunc::Uuid7 => "uuid7".to_string(), - ScalarFunc::Uuid7Str => "uuid7_str".to_string(), ScalarFunc::Uuid4Str => "uuid4_str".to_string(), + ScalarFunc::Uuid7TS => "uuid7_timestamp_ms".to_string(), }; write!(f, "{}", str) } @@ -337,12 +337,14 @@ impl Func { "typeof" => Ok(Func::Scalar(ScalarFunc::Typeof)), "last_insert_rowid" => Ok(Func::Scalar(ScalarFunc::LastInsertRowid)), "unicode" => Ok(Func::Scalar(ScalarFunc::Unicode)), + "uuid4_str" => Ok(Func::Scalar(ScalarFunc::Uuid4Str)), "uuid4" => Ok(Func::Scalar(ScalarFunc::Uuid4)), "uuid7" => Ok(Func::Scalar(ScalarFunc::Uuid7)), - "uuid4_str" => Ok(Func::Scalar(ScalarFunc::Uuid4Str)), - "uuid7_str" => Ok(Func::Scalar(ScalarFunc::Uuid7Str)), "uuid_str" => Ok(Func::Scalar(ScalarFunc::UuidStr)), "uuid_blob" => Ok(Func::Scalar(ScalarFunc::UuidBlob)), + "uuid7_timestamp_ms" => Ok(Func::Scalar(ScalarFunc::Uuid7TS)), + // postgres_compatability + "gen_random_uuid" => Ok(Func::Scalar(ScalarFunc::Uuid4Str)), "quote" => Ok(Func::Scalar(ScalarFunc::Quote)), "sqlite_version" => Ok(Func::Scalar(ScalarFunc::SqliteVersion)), "replace" => Ok(Func::Scalar(ScalarFunc::Replace)), diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 9bdd297ac..25c986739 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -1196,7 +1196,8 @@ pub fn translate_expr( | ScalarFunc::Soundex | ScalarFunc::ZeroBlob | ScalarFunc::UuidStr - | ScalarFunc::UuidBlob => { + | ScalarFunc::UuidBlob + | ScalarFunc::Uuid7TS => { let args = if let Some(args) = args { if args.len() != 1 { crate::bail_parse_error!( @@ -1228,30 +1229,30 @@ pub fn translate_expr( }); Ok(target_register) } - ScalarFunc::Uuid7 | ScalarFunc::Uuid7Str => { - if let Some(args) = args { - // can take optional time arg - if args.len() > 1 { - crate::bail_parse_error!( - "{} function with more than 1 argument", - srf.to_string() - ); - } - if let Some(arg) = args.first() { - let regs = program.alloc_register(); - translate_expr( - program, - referenced_tables, - arg, - regs, - precomputed_exprs_to_registers, - )?; - } + ScalarFunc::Uuid7 => { + let args = match args { + Some(args) if args.len() > 3 => crate::bail_parse_error!( + "{} function with more than 2 arguments", + srf.to_string() + ), + Some(args) => args, + None => &vec![], + }; + let mut start_reg = None; + for arg in args.iter() { + let reg = program.alloc_register(); + start_reg = Some(start_reg.unwrap_or(reg)); + translate_expr( + program, + referenced_tables, + arg, + reg, + precomputed_exprs_to_registers, + )?; } - let regs = program.alloc_register(); program.emit_insn(Insn::Function { constant_mask: 0, - start_reg: regs, + start_reg: start_reg.unwrap_or(target_register), dest: target_register, func: func_ctx, }); diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 517b839ed..68cfa264f 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -43,7 +43,7 @@ use datetime::{exec_date, exec_time, exec_unixepoch}; use rand::distributions::{Distribution, Uniform}; use rand::{thread_rng, Rng}; use regex::Regex; -use std::borrow::BorrowMut; +use std::borrow::{Borrow, BorrowMut}; use std::cell::RefCell; use std::collections::{BTreeMap, HashMap}; use std::fmt::Display; @@ -2399,7 +2399,8 @@ impl Program { | ScalarFunc::Soundex | ScalarFunc::ZeroBlob | ScalarFunc::UuidStr - | ScalarFunc::UuidBlob => { + | ScalarFunc::UuidBlob + | ScalarFunc::Uuid7TS => { let reg_value = state.registers[*start_reg].borrow_mut(); let result = match scalar_func { ScalarFunc::Sign => exec_sign(reg_value), @@ -2416,27 +2417,23 @@ impl Program { ScalarFunc::Soundex => Some(exec_soundex(reg_value)), ScalarFunc::UuidStr => Some(exec_uuidstr(reg_value)?), ScalarFunc::UuidBlob => Some(exec_uuidblob(reg_value)?), + ScalarFunc::Uuid7TS => Some(exec_ts_from_uuid7(reg_value)), _ => unreachable!(), }; state.registers[*dest] = result.unwrap_or(OwnedValue::Null); } - ScalarFunc::Uuid7Str | ScalarFunc::Uuid7 => match arg_count { + ScalarFunc::Uuid7 => match arg_count { 0 => { state.registers[*dest] = exec_uuid(scalar_func, None).unwrap_or(OwnedValue::Null); } 1 => { - let reg_value = state.registers[*start_reg].borrow_mut(); + let reg_value = state.registers[*start_reg].borrow(); state.registers[*dest] = exec_uuid(scalar_func, Some(reg_value)) .unwrap_or(OwnedValue::Null); } - _ => { - return Err(LimboError::ParseError(format!( - "Invalid number of arguments for Uuid7 function: {}", - arg_count - ))); - } + _ => unreachable!(), }, ScalarFunc::Hex => { let reg_value = state.registers[*start_reg].borrow_mut(); @@ -3120,35 +3117,25 @@ fn exec_random() -> OwnedValue { OwnedValue::Integer(random_number) } -enum UuidType { - V4Blob, - V4Str, - V7Blob, - V7Str, -} - -fn exec_uuid(var: &ScalarFunc, time: Option<&OwnedValue>) -> Result { +fn exec_uuid(var: &ScalarFunc, sec: Option<&OwnedValue>) -> Result { match var { - ScalarFunc::Uuid4Str => Ok(OwnedValue::Text(Rc::new(Uuid::new_v4().to_string()))), ScalarFunc::Uuid4 => Ok(OwnedValue::Blob(Rc::new( Uuid::new_v4().into_bytes().to_vec(), ))), - ScalarFunc::Uuid7 | ScalarFunc::Uuid7Str => { - let uuid = if let Some(OwnedValue::Integer(ref i)) = time { - let ctx = ContextV7::new(); - if *i < 0 { - // not valid unix timestamp, error or null? - return Ok(OwnedValue::Null); + ScalarFunc::Uuid4Str => Ok(OwnedValue::Text(Rc::new(Uuid::new_v4().to_string()))), + ScalarFunc::Uuid7 => { + let uuid = match sec { + Some(OwnedValue::Integer(ref seconds)) => { + let ctx = ContextV7::new(); + if *seconds < 0 { + // not valid unix timestamp, error or null? + return Ok(OwnedValue::Null); + } + Uuid::new_v7(Timestamp::from_unix(ctx, *seconds as u64, 0)) } - Uuid::new_v7(Timestamp::from_unix(ctx, *i as u64, 0)) - } else { - Uuid::now_v7() - }; - return match var { - ScalarFunc::Uuid7Str => Ok(OwnedValue::Text(Rc::new(uuid.to_string()))), - ScalarFunc::Uuid7 => Ok(OwnedValue::Blob(Rc::new(uuid.into_bytes().to_vec()))), - _ => unreachable!(), + _ => Uuid::now_v7(), }; + Ok(OwnedValue::Blob(Rc::new(uuid.into_bytes().to_vec()))) } _ => unreachable!(), } @@ -3188,6 +3175,35 @@ fn exec_uuidblob(reg: &OwnedValue) -> Result { } } +fn exec_ts_from_uuid7(reg: &OwnedValue) -> OwnedValue { + let uuid = match reg { + OwnedValue::Blob(blob) => { + Uuid::from_slice(blob).map_err(|e| LimboError::ParseError(e.to_string())) + } + OwnedValue::Text(val) => { + Uuid::parse_str(val).map_err(|e| LimboError::ParseError(e.to_string())) + } + _ => Err(LimboError::ParseError( + "Invalid argument type for UUID function".to_string(), + )), + }; + match uuid { + Ok(uuid) => OwnedValue::Integer(uuid_to_unix(uuid.as_bytes()) as i64), + // display error? sqlean seems to set value to null + Err(_) => OwnedValue::Null, + } +} + +#[inline(always)] +fn uuid_to_unix(uuid: &[u8; 16]) -> u64 { + ((uuid[0] as u64) << 40) + | ((uuid[1] as u64) << 32) + | ((uuid[2] as u64) << 24) + | ((uuid[3] as u64) << 16) + | ((uuid[4] as u64) << 8) + | (uuid[5] as u64) +} + fn exec_randomblob(reg: &OwnedValue) -> OwnedValue { let length = match reg { OwnedValue::Integer(i) => *i, @@ -4954,4 +4970,202 @@ mod tests { expected_str ); } + + #[test] + fn test_exec_uuid_v4blob() { + use super::{exec_uuid, ScalarFunc}; + use uuid::Uuid; + let func = ScalarFunc::Uuid4; + let owned_val = exec_uuid(&func, None); + match owned_val { + Ok(OwnedValue::Blob(blob)) => { + assert_eq!(blob.len(), 16); + let uuid = Uuid::from_slice(&blob); + assert!(uuid.is_ok()); + assert_eq!(uuid.unwrap().get_version_num(), 4); + } + _ => panic!("exec_uuid did not return a Blob variant"), + } + } + + #[test] + fn test_exec_uuid_v4str() { + use super::{exec_uuid, ScalarFunc}; + use uuid::Uuid; + let func = ScalarFunc::Uuid4Str; + let owned_val = exec_uuid(&func, None); + match owned_val { + Ok(OwnedValue::Text(v4str)) => { + assert_eq!(v4str.len(), 36); + let uuid = Uuid::parse_str(&v4str); + assert!(uuid.is_ok()); + assert_eq!(uuid.unwrap().get_version_num(), 4); + } + _ => panic!("exec_uuid did not return a Blob variant"), + } + } + + #[test] + fn test_exec_uuid_v7_now() { + use super::{exec_uuid, ScalarFunc}; + use uuid::Uuid; + let func = ScalarFunc::Uuid7; + let owned_val = exec_uuid(&func, None); + match owned_val { + Ok(OwnedValue::Blob(blob)) => { + assert_eq!(blob.len(), 16); + let uuid = Uuid::from_slice(&blob); + assert!(uuid.is_ok()); + assert_eq!(uuid.unwrap().get_version_num(), 7); + } + _ => panic!("exec_uuid did not return a Blob variant"), + } + } + + #[test] + fn test_exec_uuid_v7_with_input() { + use super::{exec_uuid, ScalarFunc}; + use uuid::Uuid; + let func = ScalarFunc::Uuid7; + let owned_val = exec_uuid(&func, Some(&OwnedValue::Integer(946702800))); + match owned_val { + Ok(OwnedValue::Blob(blob)) => { + assert_eq!(blob.len(), 16); + let uuid = Uuid::from_slice(&blob); + assert!(uuid.is_ok()); + assert_eq!(uuid.unwrap().get_version_num(), 7); + } + _ => panic!("exec_uuid did not return a Blob variant"), + } + } + + #[test] + fn test_exec_uuid_v7_now_to_timestamp() { + use super::{exec_ts_from_uuid7, exec_uuid, ScalarFunc}; + use uuid::Uuid; + let func = ScalarFunc::Uuid7; + let owned_val = exec_uuid(&func, None); + match owned_val { + Ok(OwnedValue::Blob(ref blob)) => { + assert_eq!(blob.len(), 16); + let uuid = Uuid::from_slice(blob); + assert!(uuid.is_ok()); + assert_eq!(uuid.unwrap().get_version_num(), 7); + } + _ => panic!("exec_uuid did not return a Blob variant"), + } + let result = exec_ts_from_uuid7(&owned_val.expect("uuid7")); + if let OwnedValue::Integer(ref ts) = result { + let unixnow = (std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_secs() + * 1000) as i64; + assert!(*ts >= unixnow - 1000); + } + } + + #[test] + fn test_exec_uuid_v7_to_timestamp() { + use super::{exec_ts_from_uuid7, exec_uuid, ScalarFunc}; + use uuid::Uuid; + let func = ScalarFunc::Uuid7; + let owned_val = exec_uuid(&func, Some(&OwnedValue::Integer(946702800))); + match owned_val { + Ok(OwnedValue::Blob(ref blob)) => { + assert_eq!(blob.len(), 16); + let uuid = Uuid::from_slice(blob); + assert!(uuid.is_ok()); + assert_eq!(uuid.unwrap().get_version_num(), 7); + } + _ => panic!("exec_uuid did not return a Blob variant"), + } + let result = exec_ts_from_uuid7(&owned_val.expect("uuid7")); + assert_eq!(result, OwnedValue::Integer(946702800 * 1000)); + if let OwnedValue::Integer(ts) = result { + let time = chrono::DateTime::from_timestamp(ts / 1000, 0); + assert_eq!( + time.unwrap(), + "2000-01-01T05:00:00Z" + .parse::>() + .unwrap() + ); + } + } + + #[test] + fn test_exec_uuid_v4_str_to_blob() { + use super::{exec_uuid, exec_uuidblob, ScalarFunc}; + use uuid::Uuid; + let owned_val = exec_uuidblob( + &exec_uuid(&ScalarFunc::Uuid4Str, None).expect("uuid v4 string to generate"), + ); + match owned_val { + Ok(OwnedValue::Blob(blob)) => { + assert_eq!(blob.len(), 16); + let uuid = Uuid::from_slice(&blob); + assert!(uuid.is_ok()); + assert_eq!(uuid.unwrap().get_version_num(), 4); + } + _ => panic!("exec_uuid did not return a Blob variant"), + } + } + + #[test] + fn test_exec_uuid_v7_str_to_blob() { + use super::{exec_uuid, exec_uuidblob, exec_uuidstr, ScalarFunc}; + use uuid::Uuid; + // convert a v7 blob to a string then back to a blob + let owned_val = exec_uuidblob( + &exec_uuidstr(&exec_uuid(&ScalarFunc::Uuid7, None).expect("uuid v7 blob to generate")) + .expect("uuid v7 string to generate"), + ); + match owned_val { + Ok(OwnedValue::Blob(blob)) => { + assert_eq!(blob.len(), 16); + let uuid = Uuid::from_slice(&blob); + assert!(uuid.is_ok()); + assert_eq!(uuid.unwrap().get_version_num(), 7); + } + _ => panic!("exec_uuid did not return a Blob variant"), + } + } + + #[test] + fn test_exec_uuid_v4_blob_to_str() { + use super::{exec_uuid, exec_uuidstr, ScalarFunc}; + use uuid::Uuid; + // convert a v4 blob to a string + let owned_val = + exec_uuidstr(&exec_uuid(&ScalarFunc::Uuid4, None).expect("uuid v7 blob to generate")); + match owned_val { + Ok(OwnedValue::Text(v4str)) => { + assert_eq!(v4str.len(), 36); + let uuid = Uuid::parse_str(&v4str); + assert!(uuid.is_ok()); + assert_eq!(uuid.unwrap().get_version_num(), 4); + } + _ => panic!("exec_uuid did not return a Blob variant"), + } + } + + #[test] + fn test_exec_uuid_v7_blob_to_str() { + use super::{exec_uuid, exec_uuidstr, ScalarFunc}; + use uuid::Uuid; + // convert a v7 blob to a string + let owned_val = exec_uuidstr( + &exec_uuid(&ScalarFunc::Uuid7, Some(&OwnedValue::Integer(123456789))) + .expect("uuid v7 blob to generate"), + ); + match owned_val { + Ok(OwnedValue::Text(v7str)) => { + assert_eq!(v7str.len(), 36); + let uuid = Uuid::parse_str(&v7str); + assert!(uuid.is_ok()); + assert_eq!(uuid.unwrap().get_version_num(), 7); + } + _ => panic!("exec_uuid did not return a Blob variant"), + } + } } From f96f2896097356fcfaba80dc82d7da13fc72c82b Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Thu, 19 Dec 2024 20:25:52 -0500 Subject: [PATCH 071/144] Remove unnecessary nanos arg from uuid7, add insn const --- COMPAT.md | 22 +++++++++++++--------- core/translate/expr.rs | 11 +++++++---- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/COMPAT.md b/COMPAT.md index 38d475680..a7baaca83 100644 --- a/COMPAT.md +++ b/COMPAT.md @@ -161,15 +161,6 @@ Feature support of [sqlite expr syntax](https://www.sqlite.org/lang_expr.html). | zeroblob(N) | Yes | | -|-------------------------------------------------| -| LibSql / sqlean Scalar | | | -| ---------------------------- | ------ | ------- | -| uuid4() | Yes | uuid version 4 **uuid's are `blob` by default** | -| uuid4_str() | Yes | uuid v4 string alias `gen_random_uuid()` for PG compatibility| -| uuid7(X?) | Yes | uuid version 7, Optional arg for seconds since epoch| -| uuid7_timestamp_ms(X) | Yes | Convert a uuid v7 to milliseconds since epoch| -| uuid_str(X) | Yes | Convert a valid uuid to string| -| uuid_blob(X) | Yes | Convert a valid uuid to blob| @@ -462,3 +453,16 @@ Feature support of [sqlite expr syntax](https://www.sqlite.org/lang_expr.html). | Variable | No | | VerifyCookie | No | | Yield | Yes | + + + + +| LibSql Compatibility / Extensions| | | +| ---------------------------- | ------ | ------- | +| **UUID** | | UUID's in limbo are `blobs` by default| +| uuid4() | Yes | uuid version 4 | +| uuid4_str() | Yes | uuid v4 string alias `gen_random_uuid()` for PG compatibility| +| uuid7(X?) | Yes | uuid version 7, Optional arg for seconds since epoch| +| uuid7_timestamp_ms(X) | Yes | Convert a uuid v7 to milliseconds since epoch| +| uuid_str(X) | Yes | Convert a valid uuid to string| +| uuid_blob(X) | Yes | Convert a valid uuid to blob| diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 25c986739..ea81d9ead 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -1231,17 +1231,17 @@ pub fn translate_expr( } ScalarFunc::Uuid7 => { let args = match args { - Some(args) if args.len() > 3 => crate::bail_parse_error!( - "{} function with more than 2 arguments", + Some(args) if args.len() > 1 => crate::bail_parse_error!( + "{} function with more than 1 argument", srf.to_string() ), Some(args) => args, None => &vec![], }; let mut start_reg = None; - for arg in args.iter() { + if let Some(arg) = args.first() { let reg = program.alloc_register(); - start_reg = Some(start_reg.unwrap_or(reg)); + start_reg = Some(reg); translate_expr( program, referenced_tables, @@ -1249,6 +1249,9 @@ pub fn translate_expr( reg, precomputed_exprs_to_registers, )?; + if let ast::Expr::Literal(_) = arg { + program.mark_last_insn_constant() + } } program.emit_insn(Insn::Function { constant_mask: 0, From 2fcae80902bfe9af004548311e690702b7b21c6f Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Fri, 20 Dec 2024 15:17:50 -0500 Subject: [PATCH 072/144] Create ext directory for outside funcs, add uuid to ext dir --- core/Cargo.toml | 1 + core/ext/mod.rs | 30 ++++ core/ext/uuid.rs | 334 +++++++++++++++++++++++++++++++++++++++ core/function.rs | 31 +--- core/lib.rs | 1 + core/translate/expr.rs | 127 ++++++++++----- core/vdbe/mod.rs | 347 +++++------------------------------------ 7 files changed, 501 insertions(+), 370 deletions(-) create mode 100644 core/ext/mod.rs create mode 100644 core/ext/uuid.rs diff --git a/core/Cargo.toml b/core/Cargo.toml index 9051508df..25a0c6c90 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -22,6 +22,7 @@ json = [ "dep:pest", "dep:pest_derive", ] +uuid = ["dep:uuid"] [target.'cfg(target_os = "linux")'.dependencies] io-uring = "0.6.1" diff --git a/core/ext/mod.rs b/core/ext/mod.rs new file mode 100644 index 000000000..fea543869 --- /dev/null +++ b/core/ext/mod.rs @@ -0,0 +1,30 @@ +#[cfg(feature = "uuid")] +mod uuid; +#[cfg(feature = "uuid")] +pub use uuid::{exec_ts_from_uuid7, exec_uuid, exec_uuidblob, exec_uuidstr, UuidFunc}; + +#[derive(Debug, Clone, PartialEq)] +pub enum ExtFunc { + #[cfg(feature = "uuid")] + Uuid(UuidFunc), +} + +impl std::fmt::Display for ExtFunc { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + #[cfg(feature = "uuid")] + ExtFunc::Uuid(uuidfn) => write!(f, "{}", uuidfn), + _ => write!(f, "unknown"), + } + } +} + +impl ExtFunc { + pub fn resolve_function(name: &str, num_args: usize) -> Result { + match name { + #[cfg(feature = "uuid")] + name => UuidFunc::resolve_function(name, num_args), + _ => Err(()), + } + } +} diff --git a/core/ext/uuid.rs b/core/ext/uuid.rs new file mode 100644 index 000000000..aa717c13d --- /dev/null +++ b/core/ext/uuid.rs @@ -0,0 +1,334 @@ +use super::ExtFunc; +use crate::{types::OwnedValue, LimboError}; +use std::rc::Rc; +use uuid::{ContextV7, Timestamp, Uuid}; + +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum UuidFunc { + Uuid4Str, + Uuid4, + Uuid7, + Uuid7TS, + UuidStr, + UuidBlob, +} + +impl UuidFunc { + pub fn resolve_function(name: &str, num_args: usize) -> Result { + match name { + "uuid4_str" => Ok(ExtFunc::Uuid(UuidFunc::Uuid4Str)), + "uuid4" => Ok(ExtFunc::Uuid(UuidFunc::Uuid4)), + "uuid7" if num_args < 2 => Ok(ExtFunc::Uuid(UuidFunc::Uuid7)), + "uuid_str" if num_args == 1 => Ok(ExtFunc::Uuid(UuidFunc::UuidStr)), + "uuid_blob" if num_args == 1 => Ok(ExtFunc::Uuid(UuidFunc::UuidBlob)), + "uuid7_timestamp_ms" if num_args == 1 => Ok(ExtFunc::Uuid(UuidFunc::Uuid7TS)), + // postgres_compatability + "gen_random_uuid" => Ok(ExtFunc::Uuid(UuidFunc::Uuid4Str)), + _ => Err(()), + } + } +} + +impl std::fmt::Display for UuidFunc { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + UuidFunc::Uuid4Str => write!(f, "uuid4_str"), + UuidFunc::Uuid4 => write!(f, "uuid4"), + UuidFunc::Uuid7 => write!(f, "uuid7"), + UuidFunc::Uuid7TS => write!(f, "uuid7_timestamp_ms"), + UuidFunc::UuidStr => write!(f, "uuid_str"), + UuidFunc::UuidBlob => write!(f, "uuid_blob"), + } + } +} + +pub fn exec_uuid(var: &UuidFunc, sec: Option<&OwnedValue>) -> crate::Result { + match var { + UuidFunc::Uuid4 => Ok(OwnedValue::Blob(Rc::new( + Uuid::new_v4().into_bytes().to_vec(), + ))), + UuidFunc::Uuid4Str => Ok(OwnedValue::Text(Rc::new(Uuid::new_v4().to_string()))), + UuidFunc::Uuid7 => { + let uuid = match sec { + Some(OwnedValue::Integer(ref seconds)) => { + let ctx = ContextV7::new(); + if *seconds < 0 { + // not valid unix timestamp, error or null? + return Ok(OwnedValue::Null); + } + Uuid::new_v7(Timestamp::from_unix(ctx, *seconds as u64, 0)) + } + _ => Uuid::now_v7(), + }; + Ok(OwnedValue::Blob(Rc::new(uuid.into_bytes().to_vec()))) + } + _ => unreachable!(), + } +} + +pub fn exec_uuidstr(reg: &OwnedValue) -> crate::Result { + match reg { + OwnedValue::Blob(blob) => { + let uuid = Uuid::from_slice(blob).map_err(|e| LimboError::ParseError(e.to_string()))?; + Ok(OwnedValue::Text(Rc::new(uuid.to_string()))) + } + OwnedValue::Text(val) => { + let uuid = Uuid::parse_str(val).map_err(|e| LimboError::ParseError(e.to_string()))?; + Ok(OwnedValue::Text(Rc::new(uuid.to_string()))) + } + OwnedValue::Null => Ok(OwnedValue::Null), + _ => Err(LimboError::ParseError( + "Invalid argument type for UUID function".to_string(), + )), + } +} + +pub fn exec_uuidblob(reg: &OwnedValue) -> crate::Result { + match reg { + OwnedValue::Text(val) => { + let uuid = Uuid::parse_str(val).map_err(|e| LimboError::ParseError(e.to_string()))?; + Ok(OwnedValue::Blob(Rc::new(uuid.as_bytes().to_vec()))) + } + OwnedValue::Blob(blob) => { + let uuid = Uuid::from_slice(blob).map_err(|e| LimboError::ParseError(e.to_string()))?; + Ok(OwnedValue::Blob(Rc::new(uuid.as_bytes().to_vec()))) + } + OwnedValue::Null => Ok(OwnedValue::Null), + _ => Err(LimboError::ParseError( + "Invalid argument type for UUID function".to_string(), + )), + } +} + +pub fn exec_ts_from_uuid7(reg: &OwnedValue) -> OwnedValue { + let uuid = match reg { + OwnedValue::Blob(blob) => { + Uuid::from_slice(blob).map_err(|e| LimboError::ParseError(e.to_string())) + } + OwnedValue::Text(val) => { + Uuid::parse_str(val).map_err(|e| LimboError::ParseError(e.to_string())) + } + _ => Err(LimboError::ParseError( + "Invalid argument type for UUID function".to_string(), + )), + }; + match uuid { + Ok(uuid) => OwnedValue::Integer(uuid_to_unix(uuid.as_bytes()) as i64), + // display error? sqlean seems to set value to null + Err(_) => OwnedValue::Null, + } +} + +#[inline(always)] +fn uuid_to_unix(uuid: &[u8; 16]) -> u64 { + ((uuid[0] as u64) << 40) + | ((uuid[1] as u64) << 32) + | ((uuid[2] as u64) << 24) + | ((uuid[3] as u64) << 16) + | ((uuid[4] as u64) << 8) + | (uuid[5] as u64) +} + +#[cfg(test)] +#[cfg(feature = "uuid")] +pub mod test { + use super::UuidFunc; + use crate::types::OwnedValue; + #[test] + fn test_exec_uuid_v4blob() { + use super::exec_uuid; + use uuid::Uuid; + let func = UuidFunc::Uuid4; + let owned_val = exec_uuid(&func, None); + match owned_val { + Ok(OwnedValue::Blob(blob)) => { + assert_eq!(blob.len(), 16); + let uuid = Uuid::from_slice(&blob); + assert!(uuid.is_ok()); + assert_eq!(uuid.unwrap().get_version_num(), 4); + } + _ => panic!("exec_uuid did not return a Blob variant"), + } + } + + #[test] + fn test_exec_uuid_v4str() { + use super::{exec_uuid, UuidFunc}; + use uuid::Uuid; + let func = UuidFunc::Uuid4Str; + let owned_val = exec_uuid(&func, None); + match owned_val { + Ok(OwnedValue::Text(v4str)) => { + assert_eq!(v4str.len(), 36); + let uuid = Uuid::parse_str(&v4str); + assert!(uuid.is_ok()); + assert_eq!(uuid.unwrap().get_version_num(), 4); + } + _ => panic!("exec_uuid did not return a Blob variant"), + } + } + + #[test] + fn test_exec_uuid_v7_now() { + use super::{exec_uuid, UuidFunc}; + use uuid::Uuid; + let func = UuidFunc::Uuid7; + let owned_val = exec_uuid(&func, None); + match owned_val { + Ok(OwnedValue::Blob(blob)) => { + assert_eq!(blob.len(), 16); + let uuid = Uuid::from_slice(&blob); + assert!(uuid.is_ok()); + assert_eq!(uuid.unwrap().get_version_num(), 7); + } + _ => panic!("exec_uuid did not return a Blob variant"), + } + } + + #[test] + fn test_exec_uuid_v7_with_input() { + use super::{exec_uuid, UuidFunc}; + use uuid::Uuid; + let func = UuidFunc::Uuid7; + let owned_val = exec_uuid(&func, Some(&OwnedValue::Integer(946702800))); + match owned_val { + Ok(OwnedValue::Blob(blob)) => { + assert_eq!(blob.len(), 16); + let uuid = Uuid::from_slice(&blob); + assert!(uuid.is_ok()); + assert_eq!(uuid.unwrap().get_version_num(), 7); + } + _ => panic!("exec_uuid did not return a Blob variant"), + } + } + + #[test] + fn test_exec_uuid_v7_now_to_timestamp() { + use super::{exec_ts_from_uuid7, exec_uuid, UuidFunc}; + use uuid::Uuid; + let func = UuidFunc::Uuid7; + let owned_val = exec_uuid(&func, None); + match owned_val { + Ok(OwnedValue::Blob(ref blob)) => { + assert_eq!(blob.len(), 16); + let uuid = Uuid::from_slice(blob); + assert!(uuid.is_ok()); + assert_eq!(uuid.unwrap().get_version_num(), 7); + } + _ => panic!("exec_uuid did not return a Blob variant"), + } + let result = exec_ts_from_uuid7(&owned_val.expect("uuid7")); + if let OwnedValue::Integer(ref ts) = result { + let unixnow = (std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_secs() + * 1000) as i64; + assert!(*ts >= unixnow - 1000); + } + } + + #[test] + fn test_exec_uuid_v7_to_timestamp() { + use super::{exec_ts_from_uuid7, exec_uuid, UuidFunc}; + use uuid::Uuid; + let func = UuidFunc::Uuid7; + let owned_val = exec_uuid(&func, Some(&OwnedValue::Integer(946702800))); + match owned_val { + Ok(OwnedValue::Blob(ref blob)) => { + assert_eq!(blob.len(), 16); + let uuid = Uuid::from_slice(blob); + assert!(uuid.is_ok()); + assert_eq!(uuid.unwrap().get_version_num(), 7); + } + _ => panic!("exec_uuid did not return a Blob variant"), + } + let result = exec_ts_from_uuid7(&owned_val.expect("uuid7")); + assert_eq!(result, OwnedValue::Integer(946702800 * 1000)); + if let OwnedValue::Integer(ts) = result { + let time = chrono::DateTime::from_timestamp(ts / 1000, 0); + assert_eq!( + time.unwrap(), + "2000-01-01T05:00:00Z" + .parse::>() + .unwrap() + ); + } + } + + #[test] + fn test_exec_uuid_v4_str_to_blob() { + use super::{exec_uuid, exec_uuidblob, UuidFunc}; + use uuid::Uuid; + let owned_val = exec_uuidblob( + &exec_uuid(&UuidFunc::Uuid4Str, None).expect("uuid v4 string to generate"), + ); + match owned_val { + Ok(OwnedValue::Blob(blob)) => { + assert_eq!(blob.len(), 16); + let uuid = Uuid::from_slice(&blob); + assert!(uuid.is_ok()); + assert_eq!(uuid.unwrap().get_version_num(), 4); + } + _ => panic!("exec_uuid did not return a Blob variant"), + } + } + + #[test] + fn test_exec_uuid_v7_str_to_blob() { + use super::{exec_uuid, exec_uuidblob, exec_uuidstr, UuidFunc}; + use uuid::Uuid; + // convert a v7 blob to a string then back to a blob + let owned_val = exec_uuidblob( + &exec_uuidstr(&exec_uuid(&UuidFunc::Uuid7, None).expect("uuid v7 blob to generate")) + .expect("uuid v7 string to generate"), + ); + match owned_val { + Ok(OwnedValue::Blob(blob)) => { + assert_eq!(blob.len(), 16); + let uuid = Uuid::from_slice(&blob); + assert!(uuid.is_ok()); + assert_eq!(uuid.unwrap().get_version_num(), 7); + } + _ => panic!("exec_uuid did not return a Blob variant"), + } + } + + #[test] + fn test_exec_uuid_v4_blob_to_str() { + use super::{exec_uuid, exec_uuidstr, UuidFunc}; + use uuid::Uuid; + // convert a v4 blob to a string + let owned_val = + exec_uuidstr(&exec_uuid(&UuidFunc::Uuid4, None).expect("uuid v7 blob to generate")); + match owned_val { + Ok(OwnedValue::Text(v4str)) => { + assert_eq!(v4str.len(), 36); + let uuid = Uuid::parse_str(&v4str); + assert!(uuid.is_ok()); + assert_eq!(uuid.unwrap().get_version_num(), 4); + } + _ => panic!("exec_uuid did not return a Blob variant"), + } + } + + #[test] + fn test_exec_uuid_v7_blob_to_str() { + use super::{exec_uuid, exec_uuidstr}; + use uuid::Uuid; + // convert a v7 blob to a string + let owned_val = exec_uuidstr( + &exec_uuid(&UuidFunc::Uuid7, Some(&OwnedValue::Integer(123456789))) + .expect("uuid v7 blob to generate"), + ); + match owned_val { + Ok(OwnedValue::Text(v7str)) => { + assert_eq!(v7str.len(), 36); + let uuid = Uuid::parse_str(&v7str); + assert!(uuid.is_ok()); + assert_eq!(uuid.unwrap().get_version_num(), 7); + } + _ => panic!("exec_uuid did not return a Blob variant"), + } + } +} diff --git a/core/function.rs b/core/function.rs index b17d388bb..7d97432ca 100644 --- a/core/function.rs +++ b/core/function.rs @@ -1,6 +1,6 @@ +use crate::ext::ExtFunc; use std::fmt; use std::fmt::Display; - #[cfg(feature = "json")] #[derive(Debug, Clone, PartialEq)] pub enum JsonFunc { @@ -91,12 +91,6 @@ pub enum ScalarFunc { ZeroBlob, LastInsertRowid, Replace, - Uuid4, - Uuid4Str, - UuidStr, - UuidBlob, - Uuid7, - Uuid7TS, } impl Display for ScalarFunc { @@ -142,12 +136,6 @@ impl Display for ScalarFunc { ScalarFunc::ZeroBlob => "zeroblob".to_string(), ScalarFunc::LastInsertRowid => "last_insert_rowid".to_string(), ScalarFunc::Replace => "replace".to_string(), - ScalarFunc::Uuid4 => "uuid4".to_string(), - ScalarFunc::UuidStr => "uuid_str".to_string(), - ScalarFunc::UuidBlob => "uuid_blob".to_string(), - ScalarFunc::Uuid7 => "uuid7".to_string(), - ScalarFunc::Uuid4Str => "uuid4_str".to_string(), - ScalarFunc::Uuid7TS => "uuid7_timestamp_ms".to_string(), }; write!(f, "{}", str) } @@ -268,13 +256,14 @@ impl Display for MathFunc { } } -#[derive(Debug)] +#[derive(Debug, Clone, PartialEq)] pub enum Func { Agg(AggFunc), Scalar(ScalarFunc), Math(MathFunc), #[cfg(feature = "json")] Json(JsonFunc), + Extention(ExtFunc), } impl Display for Func { @@ -285,6 +274,7 @@ impl Display for Func { Func::Math(math_func) => write!(f, "{}", math_func), #[cfg(feature = "json")] Func::Json(json_func) => write!(f, "{}", json_func), + Func::Extention(ext_func) => write!(f, "{}", ext_func), } } } @@ -337,14 +327,6 @@ impl Func { "typeof" => Ok(Func::Scalar(ScalarFunc::Typeof)), "last_insert_rowid" => Ok(Func::Scalar(ScalarFunc::LastInsertRowid)), "unicode" => Ok(Func::Scalar(ScalarFunc::Unicode)), - "uuid4_str" => Ok(Func::Scalar(ScalarFunc::Uuid4Str)), - "uuid4" => Ok(Func::Scalar(ScalarFunc::Uuid4)), - "uuid7" => Ok(Func::Scalar(ScalarFunc::Uuid7)), - "uuid_str" => Ok(Func::Scalar(ScalarFunc::UuidStr)), - "uuid_blob" => Ok(Func::Scalar(ScalarFunc::UuidBlob)), - "uuid7_timestamp_ms" => Ok(Func::Scalar(ScalarFunc::Uuid7TS)), - // postgres_compatability - "gen_random_uuid" => Ok(Func::Scalar(ScalarFunc::Uuid4Str)), "quote" => Ok(Func::Scalar(ScalarFunc::Quote)), "sqlite_version" => Ok(Func::Scalar(ScalarFunc::SqliteVersion)), "replace" => Ok(Func::Scalar(ScalarFunc::Replace)), @@ -386,7 +368,10 @@ impl Func { "tan" => Ok(Func::Math(MathFunc::Tan)), "tanh" => Ok(Func::Math(MathFunc::Tanh)), "trunc" => Ok(Func::Math(MathFunc::Trunc)), - _ => Err(()), + _ => match ExtFunc::resolve_function(name, arg_count) { + Ok(ext_func) => Ok(Func::Extention(ext_func)), + Err(_) => Err(()), + }, } } } diff --git a/core/lib.rs b/core/lib.rs index 1f5668d76..79e06abfb 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -1,4 +1,5 @@ mod error; +mod ext; mod function; mod io; #[cfg(feature = "json")] diff --git a/core/translate/expr.rs b/core/translate/expr.rs index ea81d9ead..523be6e51 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -1,5 +1,7 @@ use sqlite3_parser::ast::{self, UnaryOperator}; +#[cfg(feature = "uuid")] +use crate::ext::{ExtFunc, UuidFunc}; #[cfg(feature = "json")] use crate::function::JsonFunc; use crate::function::{AggFunc, Func, FuncCtx, MathFuncArity, ScalarFunc}; @@ -1194,10 +1196,7 @@ pub fn translate_expr( | ScalarFunc::RandomBlob | ScalarFunc::Sign | ScalarFunc::Soundex - | ScalarFunc::ZeroBlob - | ScalarFunc::UuidStr - | ScalarFunc::UuidBlob - | ScalarFunc::Uuid7TS => { + | ScalarFunc::ZeroBlob => { let args = if let Some(args) = args { if args.len() != 1 { crate::bail_parse_error!( @@ -1229,39 +1228,7 @@ pub fn translate_expr( }); Ok(target_register) } - ScalarFunc::Uuid7 => { - let args = match args { - Some(args) if args.len() > 1 => crate::bail_parse_error!( - "{} function with more than 1 argument", - srf.to_string() - ), - Some(args) => args, - None => &vec![], - }; - let mut start_reg = None; - if let Some(arg) = args.first() { - let reg = program.alloc_register(); - start_reg = Some(reg); - translate_expr( - program, - referenced_tables, - arg, - reg, - precomputed_exprs_to_registers, - )?; - if let ast::Expr::Literal(_) = arg { - program.mark_last_insn_constant() - } - } - program.emit_insn(Insn::Function { - constant_mask: 0, - start_reg: start_reg.unwrap_or(target_register), - dest: target_register, - func: func_ctx, - }); - Ok(target_register) - } - ScalarFunc::Random | ScalarFunc::Uuid4 | ScalarFunc::Uuid4Str => { + ScalarFunc::Random => { if args.is_some() { crate::bail_parse_error!( "{} function with arguments", @@ -1648,6 +1615,92 @@ pub fn translate_expr( } } } + Func::Extention(ext_func) => match ext_func { + #[cfg(feature = "uuid")] + ExtFunc::Uuid(ref uuid_fn) => match uuid_fn { + UuidFunc::UuidStr | UuidFunc::UuidBlob | UuidFunc::Uuid7TS => { + let args = if let Some(args) = args { + if args.len() != 1 { + crate::bail_parse_error!( + "{} function with not exactly 1 argument", + ext_func.to_string() + ); + } + args + } else { + crate::bail_parse_error!( + "{} function with no arguments", + ext_func.to_string() + ); + }; + + let regs = program.alloc_register(); + translate_expr( + program, + referenced_tables, + &args[0], + regs, + precomputed_exprs_to_registers, + )?; + program.emit_insn(Insn::Function { + constant_mask: 0, + start_reg: regs, + dest: target_register, + func: func_ctx, + }); + Ok(target_register) + } + UuidFunc::Uuid4 | UuidFunc::Uuid4Str => { + if args.is_some() { + crate::bail_parse_error!( + "{} function with arguments", + ext_func.to_string() + ); + } + let regs = program.alloc_register(); + program.emit_insn(Insn::Function { + constant_mask: 0, + start_reg: regs, + dest: target_register, + func: func_ctx, + }); + Ok(target_register) + } + UuidFunc::Uuid7 => { + let args = match args { + Some(args) if args.len() > 1 => crate::bail_parse_error!( + "{} function with more than 1 argument", + ext_func.to_string() + ), + Some(args) => args, + None => &vec![], + }; + let mut start_reg = None; + if let Some(arg) = args.first() { + let reg = program.alloc_register(); + start_reg = Some(reg); + translate_expr( + program, + referenced_tables, + arg, + reg, + precomputed_exprs_to_registers, + )?; + if let ast::Expr::Literal(_) = arg { + program.mark_last_insn_constant() + } + } + program.emit_insn(Insn::Function { + constant_mask: 0, + start_reg: start_reg.unwrap_or(target_register), + dest: target_register, + func: func_ctx, + }); + Ok(target_register) + } + }, + _ => unreachable!("{ext_func} not implemented yet"), + }, Func::Math(math_func) => match math_func.arity() { MathFuncArity::Nullary => { if args.is_some() { diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 68cfa264f..b5eb9d60f 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -24,7 +24,9 @@ pub mod sorter; mod datetime; use crate::error::{LimboError, SQLITE_CONSTRAINT_PRIMARYKEY}; -use crate::function::{AggFunc, FuncCtx, MathFunc, MathFuncArity, ScalarFunc}; +#[cfg(feature = "uuid")] +use crate::ext::{exec_ts_from_uuid7, exec_uuid, exec_uuidblob, exec_uuidstr, ExtFunc, UuidFunc}; +use crate::function::{AggFunc, Func, FuncCtx, MathFunc, MathFuncArity, ScalarFunc}; use crate::pseudo::PseudoCursor; use crate::schema::Table; use crate::storage::sqlite3_ondisk::DatabaseHeader; @@ -46,10 +48,10 @@ use regex::Regex; use std::borrow::{Borrow, BorrowMut}; use std::cell::RefCell; use std::collections::{BTreeMap, HashMap}; -use std::fmt::Display; use std::rc::{Rc, Weak}; use uuid::{ContextV7, Timestamp, Uuid}; + pub type BranchOffset = i64; use macros::Description; pub type CursorID = usize; @@ -2397,10 +2399,7 @@ impl Program { | ScalarFunc::RandomBlob | ScalarFunc::Sign | ScalarFunc::Soundex - | ScalarFunc::ZeroBlob - | ScalarFunc::UuidStr - | ScalarFunc::UuidBlob - | ScalarFunc::Uuid7TS => { + | ScalarFunc::ZeroBlob => { let reg_value = state.registers[*start_reg].borrow_mut(); let result = match scalar_func { ScalarFunc::Sign => exec_sign(reg_value), @@ -2415,26 +2414,10 @@ impl Program { ScalarFunc::RandomBlob => Some(exec_randomblob(reg_value)), ScalarFunc::ZeroBlob => Some(exec_zeroblob(reg_value)), ScalarFunc::Soundex => Some(exec_soundex(reg_value)), - ScalarFunc::UuidStr => Some(exec_uuidstr(reg_value)?), - ScalarFunc::UuidBlob => Some(exec_uuidblob(reg_value)?), - ScalarFunc::Uuid7TS => Some(exec_ts_from_uuid7(reg_value)), _ => unreachable!(), }; state.registers[*dest] = result.unwrap_or(OwnedValue::Null); } - ScalarFunc::Uuid7 => match arg_count { - 0 => { - state.registers[*dest] = - exec_uuid(scalar_func, None).unwrap_or(OwnedValue::Null); - } - 1 => { - let reg_value = state.registers[*start_reg].borrow(); - state.registers[*dest] = - exec_uuid(scalar_func, Some(reg_value)) - .unwrap_or(OwnedValue::Null); - } - _ => unreachable!(), - }, ScalarFunc::Hex => { let reg_value = state.registers[*start_reg].borrow_mut(); let result = exec_hex(reg_value); @@ -2449,9 +2432,6 @@ impl Program { ScalarFunc::Random => { state.registers[*dest] = exec_random(); } - ScalarFunc::Uuid4 | ScalarFunc::Uuid4Str => { - state.registers[*dest] = exec_uuid(scalar_func, None)?; - } ScalarFunc::Trim => { let reg_value = state.registers[*start_reg].clone(); let pattern_value = state.registers.get(*start_reg + 1).cloned(); @@ -2555,6 +2535,38 @@ impl Program { state.registers[*dest] = exec_replace(source, pattern, replacement); } }, + Func::Extention(extfn) => match extfn { + #[cfg(feature = "uuid")] + ExtFunc::Uuid(uuidfn) => match uuidfn { + UuidFunc::Uuid4 | UuidFunc::Uuid4Str => { + state.registers[*dest] = exec_uuid(uuidfn, None)? + } + UuidFunc::Uuid7 => match arg_count { + 0 => { + state.registers[*dest] = + exec_uuid(uuidfn, None).unwrap_or(OwnedValue::Null); + } + 1 => { + let reg_value = state.registers[*start_reg].borrow(); + state.registers[*dest] = exec_uuid(uuidfn, Some(reg_value)) + .unwrap_or(OwnedValue::Null); + } + _ => unreachable!(), + }, + _ => { + // remaining accept 1 arg + let reg_value = state.registers[*start_reg].borrow(); + state.registers[*dest] = match uuidfn { + UuidFunc::Uuid7TS => Some(exec_ts_from_uuid7(reg_value)), + UuidFunc::UuidStr => exec_uuidstr(reg_value).ok(), + UuidFunc::UuidBlob => exec_uuidblob(reg_value).ok(), + _ => unreachable!(), + } + .unwrap_or(OwnedValue::Null); + } + }, + _ => unreachable!(), // when more extension types are added + }, crate::function::Func::Math(math_func) => match math_func.arity() { MathFuncArity::Nullary => match math_func { MathFunc::Pi => { @@ -3117,93 +3129,6 @@ fn exec_random() -> OwnedValue { OwnedValue::Integer(random_number) } -fn exec_uuid(var: &ScalarFunc, sec: Option<&OwnedValue>) -> Result { - match var { - ScalarFunc::Uuid4 => Ok(OwnedValue::Blob(Rc::new( - Uuid::new_v4().into_bytes().to_vec(), - ))), - ScalarFunc::Uuid4Str => Ok(OwnedValue::Text(Rc::new(Uuid::new_v4().to_string()))), - ScalarFunc::Uuid7 => { - let uuid = match sec { - Some(OwnedValue::Integer(ref seconds)) => { - let ctx = ContextV7::new(); - if *seconds < 0 { - // not valid unix timestamp, error or null? - return Ok(OwnedValue::Null); - } - Uuid::new_v7(Timestamp::from_unix(ctx, *seconds as u64, 0)) - } - _ => Uuid::now_v7(), - }; - Ok(OwnedValue::Blob(Rc::new(uuid.into_bytes().to_vec()))) - } - _ => unreachable!(), - } -} - -fn exec_uuidstr(reg: &OwnedValue) -> Result { - match reg { - OwnedValue::Blob(blob) => { - let uuid = Uuid::from_slice(blob).map_err(|e| LimboError::ParseError(e.to_string()))?; - Ok(OwnedValue::Text(Rc::new(uuid.to_string()))) - } - OwnedValue::Text(val) => { - let uuid = Uuid::parse_str(val).map_err(|e| LimboError::ParseError(e.to_string()))?; - Ok(OwnedValue::Text(Rc::new(uuid.to_string()))) - } - OwnedValue::Null => Ok(OwnedValue::Null), - _ => Err(LimboError::ParseError( - "Invalid argument type for UUID function".to_string(), - )), - } -} - -fn exec_uuidblob(reg: &OwnedValue) -> Result { - match reg { - OwnedValue::Text(val) => { - let uuid = Uuid::parse_str(val).map_err(|e| LimboError::ParseError(e.to_string()))?; - Ok(OwnedValue::Blob(Rc::new(uuid.as_bytes().to_vec()))) - } - OwnedValue::Blob(blob) => { - let uuid = Uuid::from_slice(blob).map_err(|e| LimboError::ParseError(e.to_string()))?; - Ok(OwnedValue::Blob(Rc::new(uuid.as_bytes().to_vec()))) - } - OwnedValue::Null => Ok(OwnedValue::Null), - _ => Err(LimboError::ParseError( - "Invalid argument type for UUID function".to_string(), - )), - } -} - -fn exec_ts_from_uuid7(reg: &OwnedValue) -> OwnedValue { - let uuid = match reg { - OwnedValue::Blob(blob) => { - Uuid::from_slice(blob).map_err(|e| LimboError::ParseError(e.to_string())) - } - OwnedValue::Text(val) => { - Uuid::parse_str(val).map_err(|e| LimboError::ParseError(e.to_string())) - } - _ => Err(LimboError::ParseError( - "Invalid argument type for UUID function".to_string(), - )), - }; - match uuid { - Ok(uuid) => OwnedValue::Integer(uuid_to_unix(uuid.as_bytes()) as i64), - // display error? sqlean seems to set value to null - Err(_) => OwnedValue::Null, - } -} - -#[inline(always)] -fn uuid_to_unix(uuid: &[u8; 16]) -> u64 { - ((uuid[0] as u64) << 40) - | ((uuid[1] as u64) << 32) - | ((uuid[2] as u64) << 24) - | ((uuid[3] as u64) << 16) - | ((uuid[4] as u64) << 8) - | (uuid[5] as u64) -} - fn exec_randomblob(reg: &OwnedValue) -> OwnedValue { let length = match reg { OwnedValue::Integer(i) => *i, @@ -4970,202 +4895,4 @@ mod tests { expected_str ); } - - #[test] - fn test_exec_uuid_v4blob() { - use super::{exec_uuid, ScalarFunc}; - use uuid::Uuid; - let func = ScalarFunc::Uuid4; - let owned_val = exec_uuid(&func, None); - match owned_val { - Ok(OwnedValue::Blob(blob)) => { - assert_eq!(blob.len(), 16); - let uuid = Uuid::from_slice(&blob); - assert!(uuid.is_ok()); - assert_eq!(uuid.unwrap().get_version_num(), 4); - } - _ => panic!("exec_uuid did not return a Blob variant"), - } - } - - #[test] - fn test_exec_uuid_v4str() { - use super::{exec_uuid, ScalarFunc}; - use uuid::Uuid; - let func = ScalarFunc::Uuid4Str; - let owned_val = exec_uuid(&func, None); - match owned_val { - Ok(OwnedValue::Text(v4str)) => { - assert_eq!(v4str.len(), 36); - let uuid = Uuid::parse_str(&v4str); - assert!(uuid.is_ok()); - assert_eq!(uuid.unwrap().get_version_num(), 4); - } - _ => panic!("exec_uuid did not return a Blob variant"), - } - } - - #[test] - fn test_exec_uuid_v7_now() { - use super::{exec_uuid, ScalarFunc}; - use uuid::Uuid; - let func = ScalarFunc::Uuid7; - let owned_val = exec_uuid(&func, None); - match owned_val { - Ok(OwnedValue::Blob(blob)) => { - assert_eq!(blob.len(), 16); - let uuid = Uuid::from_slice(&blob); - assert!(uuid.is_ok()); - assert_eq!(uuid.unwrap().get_version_num(), 7); - } - _ => panic!("exec_uuid did not return a Blob variant"), - } - } - - #[test] - fn test_exec_uuid_v7_with_input() { - use super::{exec_uuid, ScalarFunc}; - use uuid::Uuid; - let func = ScalarFunc::Uuid7; - let owned_val = exec_uuid(&func, Some(&OwnedValue::Integer(946702800))); - match owned_val { - Ok(OwnedValue::Blob(blob)) => { - assert_eq!(blob.len(), 16); - let uuid = Uuid::from_slice(&blob); - assert!(uuid.is_ok()); - assert_eq!(uuid.unwrap().get_version_num(), 7); - } - _ => panic!("exec_uuid did not return a Blob variant"), - } - } - - #[test] - fn test_exec_uuid_v7_now_to_timestamp() { - use super::{exec_ts_from_uuid7, exec_uuid, ScalarFunc}; - use uuid::Uuid; - let func = ScalarFunc::Uuid7; - let owned_val = exec_uuid(&func, None); - match owned_val { - Ok(OwnedValue::Blob(ref blob)) => { - assert_eq!(blob.len(), 16); - let uuid = Uuid::from_slice(blob); - assert!(uuid.is_ok()); - assert_eq!(uuid.unwrap().get_version_num(), 7); - } - _ => panic!("exec_uuid did not return a Blob variant"), - } - let result = exec_ts_from_uuid7(&owned_val.expect("uuid7")); - if let OwnedValue::Integer(ref ts) = result { - let unixnow = (std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .unwrap() - .as_secs() - * 1000) as i64; - assert!(*ts >= unixnow - 1000); - } - } - - #[test] - fn test_exec_uuid_v7_to_timestamp() { - use super::{exec_ts_from_uuid7, exec_uuid, ScalarFunc}; - use uuid::Uuid; - let func = ScalarFunc::Uuid7; - let owned_val = exec_uuid(&func, Some(&OwnedValue::Integer(946702800))); - match owned_val { - Ok(OwnedValue::Blob(ref blob)) => { - assert_eq!(blob.len(), 16); - let uuid = Uuid::from_slice(blob); - assert!(uuid.is_ok()); - assert_eq!(uuid.unwrap().get_version_num(), 7); - } - _ => panic!("exec_uuid did not return a Blob variant"), - } - let result = exec_ts_from_uuid7(&owned_val.expect("uuid7")); - assert_eq!(result, OwnedValue::Integer(946702800 * 1000)); - if let OwnedValue::Integer(ts) = result { - let time = chrono::DateTime::from_timestamp(ts / 1000, 0); - assert_eq!( - time.unwrap(), - "2000-01-01T05:00:00Z" - .parse::>() - .unwrap() - ); - } - } - - #[test] - fn test_exec_uuid_v4_str_to_blob() { - use super::{exec_uuid, exec_uuidblob, ScalarFunc}; - use uuid::Uuid; - let owned_val = exec_uuidblob( - &exec_uuid(&ScalarFunc::Uuid4Str, None).expect("uuid v4 string to generate"), - ); - match owned_val { - Ok(OwnedValue::Blob(blob)) => { - assert_eq!(blob.len(), 16); - let uuid = Uuid::from_slice(&blob); - assert!(uuid.is_ok()); - assert_eq!(uuid.unwrap().get_version_num(), 4); - } - _ => panic!("exec_uuid did not return a Blob variant"), - } - } - - #[test] - fn test_exec_uuid_v7_str_to_blob() { - use super::{exec_uuid, exec_uuidblob, exec_uuidstr, ScalarFunc}; - use uuid::Uuid; - // convert a v7 blob to a string then back to a blob - let owned_val = exec_uuidblob( - &exec_uuidstr(&exec_uuid(&ScalarFunc::Uuid7, None).expect("uuid v7 blob to generate")) - .expect("uuid v7 string to generate"), - ); - match owned_val { - Ok(OwnedValue::Blob(blob)) => { - assert_eq!(blob.len(), 16); - let uuid = Uuid::from_slice(&blob); - assert!(uuid.is_ok()); - assert_eq!(uuid.unwrap().get_version_num(), 7); - } - _ => panic!("exec_uuid did not return a Blob variant"), - } - } - - #[test] - fn test_exec_uuid_v4_blob_to_str() { - use super::{exec_uuid, exec_uuidstr, ScalarFunc}; - use uuid::Uuid; - // convert a v4 blob to a string - let owned_val = - exec_uuidstr(&exec_uuid(&ScalarFunc::Uuid4, None).expect("uuid v7 blob to generate")); - match owned_val { - Ok(OwnedValue::Text(v4str)) => { - assert_eq!(v4str.len(), 36); - let uuid = Uuid::parse_str(&v4str); - assert!(uuid.is_ok()); - assert_eq!(uuid.unwrap().get_version_num(), 4); - } - _ => panic!("exec_uuid did not return a Blob variant"), - } - } - - #[test] - fn test_exec_uuid_v7_blob_to_str() { - use super::{exec_uuid, exec_uuidstr, ScalarFunc}; - use uuid::Uuid; - // convert a v7 blob to a string - let owned_val = exec_uuidstr( - &exec_uuid(&ScalarFunc::Uuid7, Some(&OwnedValue::Integer(123456789))) - .expect("uuid v7 blob to generate"), - ); - match owned_val { - Ok(OwnedValue::Text(v7str)) => { - assert_eq!(v7str.len(), 36); - let uuid = Uuid::parse_str(&v7str); - assert!(uuid.is_ok()); - assert_eq!(uuid.unwrap().get_version_num(), 7); - } - _ => panic!("exec_uuid did not return a Blob variant"), - } - } } From c06c4115f1e5ec6ed59f1f0daddf296847b7fb71 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Fri, 20 Dec 2024 16:03:16 -0500 Subject: [PATCH 073/144] Adapt OwnedValues in uuid ext to new LimboText --- core/Cargo.toml | 1 - core/ext/mod.rs | 4 ++-- core/ext/uuid.rs | 53 ++++++++++++++++++++++++------------------ core/function.rs | 8 +++---- core/translate/expr.rs | 2 +- core/vdbe/mod.rs | 27 +++------------------ 6 files changed, 40 insertions(+), 55 deletions(-) diff --git a/core/Cargo.toml b/core/Cargo.toml index 25a0c6c90..4ef87b469 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -16,7 +16,6 @@ path = "lib.rs" [features] default = ["fs", "json", "uuid"] fs = [] -uuid = ["dep:uuid"] json = [ "dep:jsonb", "dep:pest", diff --git a/core/ext/mod.rs b/core/ext/mod.rs index fea543869..312ebfcea 100644 --- a/core/ext/mod.rs +++ b/core/ext/mod.rs @@ -20,11 +20,11 @@ impl std::fmt::Display for ExtFunc { } impl ExtFunc { - pub fn resolve_function(name: &str, num_args: usize) -> Result { + pub fn resolve_function(name: &str, num_args: usize) -> Option { match name { #[cfg(feature = "uuid")] name => UuidFunc::resolve_function(name, num_args), - _ => Err(()), + _ => None, } } } diff --git a/core/ext/uuid.rs b/core/ext/uuid.rs index aa717c13d..00ce23d9b 100644 --- a/core/ext/uuid.rs +++ b/core/ext/uuid.rs @@ -1,5 +1,8 @@ use super::ExtFunc; -use crate::{types::OwnedValue, LimboError}; +use crate::{ + types::{LimboText, OwnedValue}, + LimboError, +}; use std::rc::Rc; use uuid::{ContextV7, Timestamp, Uuid}; @@ -14,17 +17,17 @@ pub enum UuidFunc { } impl UuidFunc { - pub fn resolve_function(name: &str, num_args: usize) -> Result { + pub fn resolve_function(name: &str, num_args: usize) -> Option { match name { - "uuid4_str" => Ok(ExtFunc::Uuid(UuidFunc::Uuid4Str)), - "uuid4" => Ok(ExtFunc::Uuid(UuidFunc::Uuid4)), - "uuid7" if num_args < 2 => Ok(ExtFunc::Uuid(UuidFunc::Uuid7)), - "uuid_str" if num_args == 1 => Ok(ExtFunc::Uuid(UuidFunc::UuidStr)), - "uuid_blob" if num_args == 1 => Ok(ExtFunc::Uuid(UuidFunc::UuidBlob)), - "uuid7_timestamp_ms" if num_args == 1 => Ok(ExtFunc::Uuid(UuidFunc::Uuid7TS)), + "uuid4_str" => Some(ExtFunc::Uuid(UuidFunc::Uuid4Str)), + "uuid4" => Some(ExtFunc::Uuid(UuidFunc::Uuid4)), + "uuid7" if num_args < 2 => Some(ExtFunc::Uuid(UuidFunc::Uuid7)), + "uuid_str" if num_args == 1 => Some(ExtFunc::Uuid(UuidFunc::UuidStr)), + "uuid_blob" if num_args == 1 => Some(ExtFunc::Uuid(UuidFunc::UuidBlob)), + "uuid7_timestamp_ms" if num_args == 1 => Some(ExtFunc::Uuid(UuidFunc::Uuid7TS)), // postgres_compatability - "gen_random_uuid" => Ok(ExtFunc::Uuid(UuidFunc::Uuid4Str)), - _ => Err(()), + "gen_random_uuid" => Some(ExtFunc::Uuid(UuidFunc::Uuid4Str)), + _ => None, } } } @@ -47,7 +50,9 @@ pub fn exec_uuid(var: &UuidFunc, sec: Option<&OwnedValue>) -> crate::Result Ok(OwnedValue::Blob(Rc::new( Uuid::new_v4().into_bytes().to_vec(), ))), - UuidFunc::Uuid4Str => Ok(OwnedValue::Text(Rc::new(Uuid::new_v4().to_string()))), + UuidFunc::Uuid4Str => Ok(OwnedValue::Text(LimboText::new(Rc::new( + Uuid::new_v4().to_string(), + )))), UuidFunc::Uuid7 => { let uuid = match sec { Some(OwnedValue::Integer(ref seconds)) => { @@ -70,11 +75,12 @@ pub fn exec_uuidstr(reg: &OwnedValue) -> crate::Result { match reg { OwnedValue::Blob(blob) => { let uuid = Uuid::from_slice(blob).map_err(|e| LimboError::ParseError(e.to_string()))?; - Ok(OwnedValue::Text(Rc::new(uuid.to_string()))) + Ok(OwnedValue::Text(LimboText::new(Rc::new(uuid.to_string())))) } - OwnedValue::Text(val) => { - let uuid = Uuid::parse_str(val).map_err(|e| LimboError::ParseError(e.to_string()))?; - Ok(OwnedValue::Text(Rc::new(uuid.to_string()))) + OwnedValue::Text(ref val) => { + let uuid = + Uuid::parse_str(&val.value).map_err(|e| LimboError::ParseError(e.to_string()))?; + Ok(OwnedValue::Text(LimboText::new(Rc::new(uuid.to_string())))) } OwnedValue::Null => Ok(OwnedValue::Null), _ => Err(LimboError::ParseError( @@ -86,7 +92,8 @@ pub fn exec_uuidstr(reg: &OwnedValue) -> crate::Result { pub fn exec_uuidblob(reg: &OwnedValue) -> crate::Result { match reg { OwnedValue::Text(val) => { - let uuid = Uuid::parse_str(val).map_err(|e| LimboError::ParseError(e.to_string()))?; + let uuid = + Uuid::parse_str(&val.value).map_err(|e| LimboError::ParseError(e.to_string()))?; Ok(OwnedValue::Blob(Rc::new(uuid.as_bytes().to_vec()))) } OwnedValue::Blob(blob) => { @@ -106,7 +113,7 @@ pub fn exec_ts_from_uuid7(reg: &OwnedValue) -> OwnedValue { Uuid::from_slice(blob).map_err(|e| LimboError::ParseError(e.to_string())) } OwnedValue::Text(val) => { - Uuid::parse_str(val).map_err(|e| LimboError::ParseError(e.to_string())) + Uuid::parse_str(&val.value).map_err(|e| LimboError::ParseError(e.to_string())) } _ => Err(LimboError::ParseError( "Invalid argument type for UUID function".to_string(), @@ -159,8 +166,8 @@ pub mod test { let owned_val = exec_uuid(&func, None); match owned_val { Ok(OwnedValue::Text(v4str)) => { - assert_eq!(v4str.len(), 36); - let uuid = Uuid::parse_str(&v4str); + assert_eq!(v4str.value.len(), 36); + let uuid = Uuid::parse_str(&v4str.value); assert!(uuid.is_ok()); assert_eq!(uuid.unwrap().get_version_num(), 4); } @@ -303,8 +310,8 @@ pub mod test { exec_uuidstr(&exec_uuid(&UuidFunc::Uuid4, None).expect("uuid v7 blob to generate")); match owned_val { Ok(OwnedValue::Text(v4str)) => { - assert_eq!(v4str.len(), 36); - let uuid = Uuid::parse_str(&v4str); + assert_eq!(v4str.value.len(), 36); + let uuid = Uuid::parse_str(&v4str.value); assert!(uuid.is_ok()); assert_eq!(uuid.unwrap().get_version_num(), 4); } @@ -323,8 +330,8 @@ pub mod test { ); match owned_val { Ok(OwnedValue::Text(v7str)) => { - assert_eq!(v7str.len(), 36); - let uuid = Uuid::parse_str(&v7str); + assert_eq!(v7str.value.len(), 36); + let uuid = Uuid::parse_str(&v7str.value); assert!(uuid.is_ok()); assert_eq!(uuid.unwrap().get_version_num(), 7); } diff --git a/core/function.rs b/core/function.rs index 7d97432ca..8681a4fdf 100644 --- a/core/function.rs +++ b/core/function.rs @@ -263,7 +263,7 @@ pub enum Func { Math(MathFunc), #[cfg(feature = "json")] Json(JsonFunc), - Extention(ExtFunc), + Extension(ExtFunc), } impl Display for Func { @@ -274,7 +274,7 @@ impl Display for Func { Func::Math(math_func) => write!(f, "{}", math_func), #[cfg(feature = "json")] Func::Json(json_func) => write!(f, "{}", json_func), - Func::Extention(ext_func) => write!(f, "{}", ext_func), + Func::Extension(ext_func) => write!(f, "{}", ext_func), } } } @@ -369,8 +369,8 @@ impl Func { "tanh" => Ok(Func::Math(MathFunc::Tanh)), "trunc" => Ok(Func::Math(MathFunc::Trunc)), _ => match ExtFunc::resolve_function(name, arg_count) { - Ok(ext_func) => Ok(Func::Extention(ext_func)), - Err(_) => Err(()), + Some(ext_func) => Ok(Func::Extension(ext_func)), + None => Err(()), }, } } diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 523be6e51..512e8e394 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -1615,7 +1615,7 @@ pub fn translate_expr( } } } - Func::Extention(ext_func) => match ext_func { + Func::Extension(ext_func) => match ext_func { #[cfg(feature = "uuid")] ExtFunc::Uuid(ref uuid_fn) => match uuid_fn { UuidFunc::UuidStr | UuidFunc::UuidBlob | UuidFunc::Uuid7TS => { diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index b5eb9d60f..520100463 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -26,7 +26,7 @@ mod datetime; use crate::error::{LimboError, SQLITE_CONSTRAINT_PRIMARYKEY}; #[cfg(feature = "uuid")] use crate::ext::{exec_ts_from_uuid7, exec_uuid, exec_uuidblob, exec_uuidstr, ExtFunc, UuidFunc}; -use crate::function::{AggFunc, Func, FuncCtx, MathFunc, MathFuncArity, ScalarFunc}; +use crate::function::{AggFunc, FuncCtx, MathFunc, MathFuncArity, ScalarFunc}; use crate::pseudo::PseudoCursor; use crate::schema::Table; use crate::storage::sqlite3_ondisk::DatabaseHeader; @@ -39,6 +39,7 @@ use crate::util::parse_schema_rows; use crate::{function::JsonFunc, json::get_json, json::json_array}; use crate::{Connection, Result, TransactionState}; use crate::{Rows, DATABASE_VERSION}; +use macros::Description; use datetime::{exec_date, exec_time, exec_unixepoch}; @@ -50,33 +51,11 @@ use std::cell::RefCell; use std::collections::{BTreeMap, HashMap}; use std::rc::{Rc, Weak}; -use uuid::{ContextV7, Timestamp, Uuid}; - pub type BranchOffset = i64; -use macros::Description; pub type CursorID = usize; pub type PageIdx = usize; -#[allow(dead_code)] -#[derive(Debug)] -pub enum Func { - Scalar(ScalarFunc), - #[cfg(feature = "json")] - Json(JsonFunc), -} - -impl Display for Func { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let str = match self { - Func::Scalar(scalar_func) => scalar_func.to_string(), - #[cfg(feature = "json")] - Func::Json(json_func) => json_func.to_string(), - }; - write!(f, "{}", str) - } -} - #[derive(Description, Debug)] pub enum Insn { // Initialize the program state and jump to the given PC. @@ -2535,7 +2514,7 @@ impl Program { state.registers[*dest] = exec_replace(source, pattern, replacement); } }, - Func::Extention(extfn) => match extfn { + crate::function::Func::Extension(extfn) => match extfn { #[cfg(feature = "uuid")] ExtFunc::Uuid(uuidfn) => match uuidfn { UuidFunc::Uuid4 | UuidFunc::Uuid4Str => { From bea49549ed5bcea11e3dafdc25cb130318d0fae8 Mon Sep 17 00:00:00 2001 From: Dezhi Wu Date: Sun, 22 Dec 2024 10:01:42 +0800 Subject: [PATCH 074/144] feat(core/io): Add support for file creation in `open_file` function `cargo test` is always failing on FreeBSD, the following is one of the errors: ``` ---- tests::test_simple_overflow_page stdout ---- thread 'tests::test_simple_overflow_page' panicked at test/src/lib.rs:32:84: called `Result::unwrap()` on an `Err` value: IOError(Os { code: 2, kind: NotFound, message: "No such file or directory" }) ``` After some digging, I found that the `open_file` function in `core/io/generic.rs` does not respect the `OpenFlags::Create` flag. This commit adds support for file creation in the `open_file` function. `cargo test` now passes on FreeBSD. --- core/io/generic.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/core/io/generic.rs b/core/io/generic.rs index 0c35eaf52..17f51d792 100644 --- a/core/io/generic.rs +++ b/core/io/generic.rs @@ -15,7 +15,11 @@ impl GenericIO { impl IO for GenericIO { fn open_file(&self, path: &str, flags: OpenFlags, _direct: bool) -> Result> { trace!("open_file(path = {})", path); - let file = std::fs::File::open(path)?; + let file = std::fs::OpenOptions::new() + .read(true) + .write(true) + .create(matches!(flags, OpenFlags::Create)) + .open(path)?; Ok(Rc::new(GenericFile { file: RefCell::new(file), })) From 9a8b94ef9358929d91a7507295b4442eb42e7ce1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=84=A0=EC=9A=B0?= Date: Sun, 22 Dec 2024 13:10:50 +0900 Subject: [PATCH 075/144] First successful implementation of delete planning --- core/pseudo.rs | 4 + core/storage/btree.rs | 6 + core/translate/delete.rs | 25 +++++ core/translate/emitter.rs | 216 ++++++++++++++++++++++++++++++++++++ core/translate/mod.rs | 21 +++- core/translate/optimizer.rs | 9 ++ core/translate/planner.rs | 54 ++++++++- core/types.rs | 1 + core/vdbe/explain.rs | 18 +++ core/vdbe/mod.rs | 24 +++- core/vdbe/sorter.rs | 4 + 11 files changed, 378 insertions(+), 4 deletions(-) create mode 100644 core/translate/delete.rs diff --git a/core/pseudo.rs b/core/pseudo.rs index a87647d2b..45f47856e 100644 --- a/core/pseudo.rs +++ b/core/pseudo.rs @@ -79,6 +79,10 @@ impl Cursor for PseudoCursor { Ok(CursorResult::Ok(())) } + fn delete(&mut self) -> Result> { + unimplemented!() + } + fn get_null_flag(&self) -> bool { false } diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 19ba4c7fa..d500e7727 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1753,6 +1753,12 @@ impl Cursor for BTreeCursor { Ok(CursorResult::Ok(())) } + fn delete(&mut self) -> Result> { + println!("ROWID: {:?}", self.rowid.borrow()); + return Ok(CursorResult::Ok(())); + unimplemented!() + } + fn set_null_flag(&mut self, flag: bool) { self.null_flag = flag; } diff --git a/core/translate/delete.rs b/core/translate/delete.rs new file mode 100644 index 000000000..dd34d957b --- /dev/null +++ b/core/translate/delete.rs @@ -0,0 +1,25 @@ +use crate::translate::emitter::emit_program_for_delete; +use crate::translate::optimizer::optimize_delete_plan; +use crate::translate::planner::prepare_delete_plan; +use crate::{ + schema::Schema, + storage::sqlite3_ondisk::DatabaseHeader, + vdbe::Program, +}; +use crate::{Connection, Result}; +use sqlite3_parser::ast::{Expr, QualifiedName, ResultColumn}; +use std::rc::Weak; +use std::{cell::RefCell, rc::Rc}; + +pub fn translate_delete( + schema: &Schema, + tbl_name: &QualifiedName, + where_clause: Option, + _returning: &Option>, + database_header: Rc>, + connection: Weak, +) -> Result { + let delete_plan = prepare_delete_plan(schema, tbl_name, where_clause)?; + let optimized_plan = optimize_delete_plan(delete_plan)?; + emit_program_for_delete(database_header, optimized_plan, connection) +} diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 38311b9d9..7a32336be 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -272,6 +272,51 @@ pub fn emit_program( Ok(program.build(database_header, connection)) } +pub fn emit_program_for_delete( + database_header: Rc>, + mut plan: Plan, + connection: Weak, +) -> Result { + let (mut program, mut metadata, init_label, start_offset) = prologue()?; + + // No rows will be read from source table loops if there is a constant false condition eg. WHERE 0 + let skip_loops_label = if plan.contains_constant_false_condition { + let skip_loops_label = program.allocate_label(); + program.emit_insn_with_label_dependency( + Insn::Goto { + target_pc: skip_loops_label, + }, + skip_loops_label, + ); + Some(skip_loops_label) + } else { + None + }; + + // Initialize cursors and other resources needed for query execution + init_source_for_delete(&mut program, &plan.source, &mut metadata)?; + + // Set up main query execution loop + open_loop( + &mut program, + &mut plan.source, + &plan.referenced_tables, + &mut metadata, + )?; + + // Close the loop and handle deletion + close_loop_for_delete(&mut program, &plan.source, &mut metadata)?; + + if let Some(skip_loops_label) = skip_loops_label { + program.resolve_label(skip_loops_label, program.offset()); + } + + // Finalize program + epilogue(&mut program, &mut metadata, init_label, start_offset)?; + + Ok(program.build(database_header, connection)) +} + /// Initialize resources needed for ORDER BY processing fn init_order_by( program: &mut ProgramBuilder, @@ -466,6 +511,74 @@ fn init_source( } } +fn init_source_for_delete( + program: &mut ProgramBuilder, + source: &SourceOperator, + metadata: &mut Metadata, +) -> Result<()> { + match source { + SourceOperator::Join { .. } => { + unreachable!() + } + SourceOperator::Scan { + id, + table_reference, + .. + } => { + let cursor_id = program.alloc_cursor_id( + Some(table_reference.table_identifier.clone()), + Some(Table::BTree(table_reference.table.clone())), + ); + let root_page = table_reference.table.root_page; + let next_row_label = program.allocate_label(); + metadata.next_row_labels.insert(*id, next_row_label); + program.emit_insn(Insn::OpenWriteAsync{ + cursor_id, + root_page, + }); + program.emit_insn(Insn::OpenWriteAwait {}); + + Ok(()) + } + SourceOperator::Search { + id, + table_reference, + search, + .. + } => { + let table_cursor_id = program.alloc_cursor_id( + Some(table_reference.table_identifier.clone()), + Some(Table::BTree(table_reference.table.clone())), + ); + + let next_row_label = program.allocate_label(); + + metadata.next_row_labels.insert(*id, next_row_label); + + program.emit_insn(Insn::OpenWriteAsync { + cursor_id: table_cursor_id, + root_page: table_reference.table.root_page, + }); + program.emit_insn(Insn::OpenWriteAwait {}); + + if let Search::IndexSearch { index, .. } = search { + let index_cursor_id = program + .alloc_cursor_id(Some(index.name.clone()), Some(Table::Index(index.clone()))); + program.emit_insn(Insn::OpenWriteAsync { + cursor_id: index_cursor_id, + root_page: index.root_page, + }); + program.emit_insn(Insn::OpenWriteAwait {}); + } + + Ok(()) + } + SourceOperator::Nothing => { + Ok(()) + } + } +} + /// Set up the main query execution loop /// For example in the case of a nested table scan, this means emitting the RewindAsync instruction /// for all tables involved, outermost first. @@ -1121,6 +1234,109 @@ fn close_loop( } } +fn close_loop_for_delete( + program: &mut ProgramBuilder, + source: &SourceOperator, + metadata: &mut Metadata, +) -> Result<()> { + match source { + SourceOperator::Scan { + id, + table_reference, + iter_dir, + .. + } => { + let cursor_id = program.resolve_cursor_id(&table_reference.table_identifier); + + // Emit the instructions to delete the row + let key_reg = program.alloc_register(); + program.emit_insn(Insn::RowId { + cursor_id, + dest: key_reg, + }); + program.emit_insn(Insn::DeleteAsync { cursor_id }); + program.emit_insn(Insn::DeleteAwait { cursor_id }); + + program.resolve_label(*metadata.next_row_labels.get(id).unwrap(), program.offset()); + + // Emit the NextAsync or PrevAsync instruction to continue the loop + if iter_dir + .as_ref() + .is_some_and(|dir| *dir == IterationDirection::Backwards) + { + program.emit_insn(Insn::PrevAsync { cursor_id }); + } else { + program.emit_insn(Insn::NextAsync { cursor_id }); + } + let jump_label = metadata.scan_loop_body_labels.pop().unwrap(); + + // Emit the NextAwait or PrevAwait instruction with label dependency + if iter_dir + .as_ref() + .is_some_and(|dir| *dir == IterationDirection::Backwards) + { + program.emit_insn_with_label_dependency( + Insn::PrevAwait { + cursor_id, + pc_if_next: jump_label, + }, + jump_label, + ); + } else { + program.emit_insn_with_label_dependency( + Insn::NextAwait { + cursor_id, + pc_if_next: jump_label, + }, + jump_label, + ); + } + Ok(()) + } + SourceOperator::Search { + id, + table_reference, + search, + .. + } => { + let cursor_id = match search { + Search::RowidEq { .. } | Search::RowidSearch { .. } => { + program.resolve_cursor_id(&table_reference.table_identifier) + } + Search::IndexSearch { index, .. } => program.resolve_cursor_id(&index.name), + }; + + // Emit the instructions to delete the row + let key_reg = program.alloc_register(); + program.emit_insn(Insn::RowId { + cursor_id, + dest: key_reg, + }); + program.emit_insn(Insn::DeleteAsync { cursor_id }); + program.emit_insn(Insn::DeleteAwait { cursor_id }); + + // resolve labels after calling Delete opcodes + program.resolve_label(*metadata.next_row_labels.get(id).unwrap(), program.offset()); + + // Emit the NextAsync instruction to continue the loop + if !matches!(search, Search::RowidEq { .. }) { + program.emit_insn(Insn::NextAsync { cursor_id }); + let jump_label = metadata.scan_loop_body_labels.pop().unwrap(); + program.emit_insn_with_label_dependency( + Insn::NextAwait { + cursor_id, + pc_if_next: jump_label, + }, + jump_label, + ); + } + + Ok(()) + } + _ => Ok(()), + } +} + /// Emits the bytecode for processing a GROUP BY clause. /// This is called when the main query execution loop has finished processing, /// and we now have data in the GROUP BY sorter. diff --git a/core/translate/mod.rs b/core/translate/mod.rs index 2e5d86141..5ea44f05a 100644 --- a/core/translate/mod.rs +++ b/core/translate/mod.rs @@ -14,6 +14,7 @@ pub(crate) mod optimizer; pub(crate) mod plan; pub(crate) mod planner; pub(crate) mod select; +pub(crate) mod delete; use std::cell::RefCell; use std::fmt::Display; @@ -29,6 +30,7 @@ use insert::translate_insert; use select::translate_select; use sqlite3_parser::ast::fmt::ToTokens; use sqlite3_parser::ast::{self, PragmaName}; +use crate::translate::delete::translate_delete; /// Translate SQL statement into bytecode program. pub fn translate( @@ -68,7 +70,24 @@ pub fn translate( ast::Stmt::CreateVirtualTable { .. } => { bail_parse_error!("CREATE VIRTUAL TABLE not supported yet") } - ast::Stmt::Delete { .. } => bail_parse_error!("DELETE not supported yet"), + ast::Stmt::Delete { + with, + tbl_name, + indexed, + where_clause, + returning, + order_by, + limit + } => { + translate_delete( + schema, + &tbl_name, + where_clause, + &returning, + database_header, + connection + ) + } ast::Stmt::Detach(_) => bail_parse_error!("DETACH not supported yet"), ast::Stmt::DropIndex { .. } => bail_parse_error!("DROP INDEX not supported yet"), ast::Stmt::DropTable { .. } => bail_parse_error!("DROP TABLE not supported yet"), diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 4763f8b1e..6ca17f217 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -41,6 +41,15 @@ pub fn optimize_plan(mut select_plan: Plan) -> Result { Ok(select_plan) } +pub fn optimize_delete_plan(mut delete_plan: Plan) -> Result { + use_indexes( + &mut delete_plan.source, + &delete_plan.referenced_tables, + &delete_plan.available_indexes, + )?; + Ok(delete_plan) +} + fn _operator_is_already_ordered_by( operator: &mut SourceOperator, key: &mut ast::Expr, diff --git a/core/translate/planner.rs b/core/translate/planner.rs index 14757e00a..373fa498f 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -1,11 +1,10 @@ use super::{ - optimizer::Optimizable, plan::{ Aggregate, BTreeTableReference, Direction, GroupBy, Plan, ResultSetColumn, SourceOperator, }, }; use crate::{function::Func, schema::Schema, util::normalize_ident, Result}; -use sqlite3_parser::ast::{self, FromClause, JoinType, ResultColumn}; +use sqlite3_parser::ast::{self, Expr, FromClause, JoinType, QualifiedName, ResultColumn}; pub struct OperatorIdCounter { id: usize, @@ -738,6 +737,57 @@ fn parse_join( )) } +pub fn prepare_delete_plan( + schema: &Schema, + tbl_name: &QualifiedName, + where_clause: Option, +) -> Result { + let table_name = tbl_name.name.0.clone(); + + let table = if let Some(table) = schema.get_table(&table_name) { + table + } else { + crate::bail_parse_error!("Table {} not found", table_name); + }; + + let table_ref = BTreeTableReference { + table: table.clone(), + table_identifier: table_name.clone(), + table_index: 0 + }; + + // Parse and resolve the where_clause + let mut resolved_where_clause = None; + if let Some(where_expr) = where_clause { + let mut predicates = vec![]; + break_predicate_at_and_boundaries(where_expr, &mut predicates); + for expr in predicates.iter_mut() { + bind_column_references(expr, &[table_ref.clone()])?; + } + resolved_where_clause = Some(predicates); + } + + let plan = Plan { + source: SourceOperator::Scan { + id: 0, + table_reference: table_ref.clone(), + predicates: resolved_where_clause.clone(), + iter_dir: None + }, + result_columns: vec![], + where_clause: resolved_where_clause, + group_by: None, + order_by: None, + aggregates: vec![], + limit: None, + referenced_tables: vec![table_ref], + available_indexes: vec![], + contains_constant_false_condition: false + }; + + Ok(plan) +} + fn break_predicate_at_and_boundaries(predicate: ast::Expr, out_predicates: &mut Vec) { match predicate { ast::Expr::Binary(left, ast::Operator::And, right) => { diff --git a/core/types.rs b/core/types.rs index 5f1b55d7b..c545fa514 100644 --- a/core/types.rs +++ b/core/types.rs @@ -484,6 +484,7 @@ pub trait Cursor { record: &OwnedRecord, moved_before: bool, /* Tells inserter that it doesn't need to traverse in order to find leaf page */ ) -> Result>; // + fn delete(&mut self) -> Result>; fn exists(&mut self, key: &OwnedValue) -> Result>; fn set_null_flag(&mut self, flag: bool); fn get_null_flag(&self) -> bool; diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index ce03a53fd..94d89f94c 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -834,6 +834,24 @@ pub fn insn_to_str( 0, "".to_string(), ), + Insn::DeleteAsync { cursor_id } => ( + "DeleteAsync", + *cursor_id as i32, + 0, + 0, + OwnedValue::Text(Rc::new("".to_string())), + 0, + "".to_string(), + ), + Insn::DeleteAwait { cursor_id } => ( + "DeleteAwait", + *cursor_id as i32, + 0, + 0, + OwnedValue::Text(Rc::new("".to_string())), + 0, + "".to_string(), + ), Insn::NewRowid { cursor, rowid_reg, diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 362f60042..3db275f63 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -484,6 +484,14 @@ pub enum Insn { cursor_id: usize, }, + DeleteAsync { + cursor_id: CursorID, + }, + + DeleteAwait { + cursor_id: CursorID + }, + NewRowid { cursor: CursorID, // P1 rowid_reg: usize, // P2 Destination register to store the new rowid @@ -2648,7 +2656,17 @@ impl Program { } } state.pc += 1; - } + }, + Insn::DeleteAsync { cursor_id } => { + let cursor = cursors.get_mut(cursor_id).unwrap(); + return_if_io!(cursor.delete()); + state.pc += 1; + }, + Insn::DeleteAwait { cursor_id } => { + let cursor = cursors.get_mut(cursor_id).unwrap(); + cursor.wait_for_completion()?; + state.pc += 1; + }, Insn::NewRowid { cursor, rowid_reg, .. } => { @@ -3879,6 +3897,10 @@ mod tests { unimplemented!() } + fn delete(&mut self, key: &OwnedValue) -> Result> { + unimplemented!() + } + fn wait_for_completion(&mut self) -> Result<()> { unimplemented!() } diff --git a/core/vdbe/sorter.rs b/core/vdbe/sorter.rs index 0365007a4..e3962b096 100644 --- a/core/vdbe/sorter.rs +++ b/core/vdbe/sorter.rs @@ -96,6 +96,10 @@ impl Cursor for Sorter { Ok(CursorResult::Ok(())) } + fn delete(&mut self) -> Result> { + unimplemented!() + } + fn set_null_flag(&mut self, _flag: bool) { todo!(); } From a42b185ecec078f4845206c48eaa2a4dbfe99e5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=84=A0=EC=9A=B0?= Date: Sun, 22 Dec 2024 14:22:10 +0900 Subject: [PATCH 076/144] Nit --- core/storage/btree.rs | 5 ++--- core/vdbe/explain.rs | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index d500e7727..b2761a5aa 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1754,9 +1754,8 @@ impl Cursor for BTreeCursor { } fn delete(&mut self) -> Result> { - println!("ROWID: {:?}", self.rowid.borrow()); - return Ok(CursorResult::Ok(())); - unimplemented!() + debug!("rowid: {:?}", self.rowid.borrow()); + Ok(CursorResult::Ok(())) } fn set_null_flag(&mut self, flag: bool) { diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index 94d89f94c..a5e4f91bd 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -839,7 +839,7 @@ pub fn insn_to_str( *cursor_id as i32, 0, 0, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, "".to_string(), ), @@ -848,7 +848,7 @@ pub fn insn_to_str( *cursor_id as i32, 0, 0, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, "".to_string(), ), From 57c7a56e35aa855370710732726bfc5012e1d4f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=84=A0=EC=9A=B0?= Date: Sun, 22 Dec 2024 14:27:21 +0900 Subject: [PATCH 077/144] Apply fmt, clippy --- core/translate/delete.rs | 6 +----- core/translate/emitter.rs | 6 ++---- core/translate/mod.rs | 24 +++++++++++------------- core/translate/planner.rs | 12 +++++------- core/vdbe/mod.rs | 10 +++++----- 5 files changed, 24 insertions(+), 34 deletions(-) diff --git a/core/translate/delete.rs b/core/translate/delete.rs index dd34d957b..d3d5fd346 100644 --- a/core/translate/delete.rs +++ b/core/translate/delete.rs @@ -1,11 +1,7 @@ use crate::translate::emitter::emit_program_for_delete; use crate::translate::optimizer::optimize_delete_plan; use crate::translate::planner::prepare_delete_plan; -use crate::{ - schema::Schema, - storage::sqlite3_ondisk::DatabaseHeader, - vdbe::Program, -}; +use crate::{schema::Schema, storage::sqlite3_ondisk::DatabaseHeader, vdbe::Program}; use crate::{Connection, Result}; use sqlite3_parser::ast::{Expr, QualifiedName, ResultColumn}; use std::rc::Weak; diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 7a32336be..8779dbc25 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -532,7 +532,7 @@ fn init_source_for_delete( let root_page = table_reference.table.root_page; let next_row_label = program.allocate_label(); metadata.next_row_labels.insert(*id, next_row_label); - program.emit_insn(Insn::OpenWriteAsync{ + program.emit_insn(Insn::OpenWriteAsync { cursor_id, root_page, }); @@ -573,9 +573,7 @@ fn init_source_for_delete( Ok(()) } - SourceOperator::Nothing => { - Ok(()) - } + SourceOperator::Nothing => Ok(()), } } diff --git a/core/translate/mod.rs b/core/translate/mod.rs index 5ea44f05a..381c82df1 100644 --- a/core/translate/mod.rs +++ b/core/translate/mod.rs @@ -7,6 +7,7 @@ //! a SELECT statement will be translated into a sequence of instructions that //! will read rows from the database and filter them according to a WHERE clause. +pub(crate) mod delete; pub(crate) mod emitter; pub(crate) mod expr; pub(crate) mod insert; @@ -14,7 +15,6 @@ pub(crate) mod optimizer; pub(crate) mod plan; pub(crate) mod planner; pub(crate) mod select; -pub(crate) mod delete; use std::cell::RefCell; use std::fmt::Display; @@ -24,13 +24,13 @@ use std::str::FromStr; use crate::schema::Schema; use crate::storage::pager::Pager; use crate::storage::sqlite3_ondisk::{DatabaseHeader, MIN_PAGE_CACHE_SIZE}; +use crate::translate::delete::translate_delete; use crate::vdbe::{builder::ProgramBuilder, Insn, Program}; use crate::{bail_parse_error, Connection, Result}; use insert::translate_insert; use select::translate_select; use sqlite3_parser::ast::fmt::ToTokens; use sqlite3_parser::ast::{self, PragmaName}; -use crate::translate::delete::translate_delete; /// Translate SQL statement into bytecode program. pub fn translate( @@ -77,17 +77,15 @@ pub fn translate( where_clause, returning, order_by, - limit - } => { - translate_delete( - schema, - &tbl_name, - where_clause, - &returning, - database_header, - connection - ) - } + limit, + } => translate_delete( + schema, + &tbl_name, + where_clause, + &returning, + database_header, + connection, + ), ast::Stmt::Detach(_) => bail_parse_error!("DETACH not supported yet"), ast::Stmt::DropIndex { .. } => bail_parse_error!("DROP INDEX not supported yet"), ast::Stmt::DropTable { .. } => bail_parse_error!("DROP TABLE not supported yet"), diff --git a/core/translate/planner.rs b/core/translate/planner.rs index 373fa498f..e613dd14d 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -1,7 +1,5 @@ -use super::{ - plan::{ - Aggregate, BTreeTableReference, Direction, GroupBy, Plan, ResultSetColumn, SourceOperator, - }, +use super::plan::{ + Aggregate, BTreeTableReference, Direction, GroupBy, Plan, ResultSetColumn, SourceOperator, }; use crate::{function::Func, schema::Schema, util::normalize_ident, Result}; use sqlite3_parser::ast::{self, Expr, FromClause, JoinType, QualifiedName, ResultColumn}; @@ -753,7 +751,7 @@ pub fn prepare_delete_plan( let table_ref = BTreeTableReference { table: table.clone(), table_identifier: table_name.clone(), - table_index: 0 + table_index: 0, }; // Parse and resolve the where_clause @@ -772,7 +770,7 @@ pub fn prepare_delete_plan( id: 0, table_reference: table_ref.clone(), predicates: resolved_where_clause.clone(), - iter_dir: None + iter_dir: None, }, result_columns: vec![], where_clause: resolved_where_clause, @@ -782,7 +780,7 @@ pub fn prepare_delete_plan( limit: None, referenced_tables: vec![table_ref], available_indexes: vec![], - contains_constant_false_condition: false + contains_constant_false_condition: false, }; Ok(plan) diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 3db275f63..26f125675 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -489,7 +489,7 @@ pub enum Insn { }, DeleteAwait { - cursor_id: CursorID + cursor_id: CursorID, }, NewRowid { @@ -2656,17 +2656,17 @@ impl Program { } } state.pc += 1; - }, + } Insn::DeleteAsync { cursor_id } => { let cursor = cursors.get_mut(cursor_id).unwrap(); return_if_io!(cursor.delete()); state.pc += 1; - }, + } Insn::DeleteAwait { cursor_id } => { let cursor = cursors.get_mut(cursor_id).unwrap(); cursor.wait_for_completion()?; state.pc += 1; - }, + } Insn::NewRowid { cursor, rowid_reg, .. } => { @@ -3897,7 +3897,7 @@ mod tests { unimplemented!() } - fn delete(&mut self, key: &OwnedValue) -> Result> { + fn delete(&mut self) -> Result> { unimplemented!() } From 9bacf80f2e29bc83e1c6859b9e70f89bdfc0ae24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=84=A0=EC=9A=B0?= Date: Sun, 22 Dec 2024 14:41:12 +0900 Subject: [PATCH 078/144] Change to println! --- core/storage/btree.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index b2761a5aa..3f6b87da3 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1754,7 +1754,7 @@ impl Cursor for BTreeCursor { } fn delete(&mut self) -> Result> { - debug!("rowid: {:?}", self.rowid.borrow()); + println!("rowid: {:?}", self.rowid.borrow()); Ok(CursorResult::Ok(())) } From 1d3ce528122fb4a81bdeaca6749958df1f517de6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=84=A0=EC=9A=B0?= Date: Sun, 22 Dec 2024 15:11:26 +0900 Subject: [PATCH 079/144] Refactor planner and optimizer to be DRY --- core/lib.rs | 4 +- core/translate/optimizer.rs | 65 ++++++++++++-------- core/translate/planner.rs | 119 ++++++++++++++++++------------------ core/translate/select.rs | 9 ++- 4 files changed, 105 insertions(+), 92 deletions(-) diff --git a/core/lib.rs b/core/lib.rs index 1f5668d76..e39a5fa7a 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -34,12 +34,12 @@ pub use storage::wal::WalFile; pub use storage::wal::WalFileShared; use util::parse_schema_rows; -use translate::optimizer::optimize_plan; use translate::planner::prepare_select_plan; pub use error::LimboError; pub type Result = std::result::Result; +use crate::translate::optimizer::optimize_select_plan; pub use io::OpenFlags; #[cfg(feature = "fs")] pub use io::PlatformIO; @@ -267,7 +267,7 @@ impl Connection { match stmt { ast::Stmt::Select(select) => { let plan = prepare_select_plan(&*self.schema.borrow(), select)?; - let plan = optimize_plan(plan)?; + let plan = optimize_select_plan(plan)?; println!("{}", plan); } _ => todo!(), diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 6ca17f217..9ccd8e7f7 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -9,12 +9,25 @@ use super::plan::{ Direction, IterationDirection, Plan, Search, SourceOperator, }; +pub fn optimize_select_plan(plan: Plan) -> Result { + optimize_plan(plan, true, true, true) +} + +pub fn optimize_delete_plan(plan: Plan) -> Result { + optimize_plan(plan, false, true, false) +} + /** * Make a few passes over the plan to optimize it. * TODO: these could probably be done in less passes, * but having them separate makes them easier to understand */ -pub fn optimize_plan(mut select_plan: Plan) -> Result { +fn optimize_plan( + mut select_plan: Plan, + optimize_push_predicates: bool, + optimize_use_indexes: bool, + optimize_eliminate_unnecessary_order_by: bool, +) -> Result { eliminate_between(&mut select_plan.source, &mut select_plan.where_clause)?; if let ConstantConditionEliminationResult::ImpossibleCondition = eliminate_constants(&mut select_plan.source, &mut select_plan.where_clause)? @@ -22,32 +35,32 @@ pub fn optimize_plan(mut select_plan: Plan) -> Result { select_plan.contains_constant_false_condition = true; return Ok(select_plan); } - push_predicates( - &mut select_plan.source, - &mut select_plan.where_clause, - &select_plan.referenced_tables, - )?; - use_indexes( - &mut select_plan.source, - &select_plan.referenced_tables, - &select_plan.available_indexes, - )?; - eliminate_unnecessary_orderby( - &mut select_plan.source, - &mut select_plan.order_by, - &select_plan.referenced_tables, - &select_plan.available_indexes, - )?; - Ok(select_plan) -} -pub fn optimize_delete_plan(mut delete_plan: Plan) -> Result { - use_indexes( - &mut delete_plan.source, - &delete_plan.referenced_tables, - &delete_plan.available_indexes, - )?; - Ok(delete_plan) + if optimize_push_predicates { + push_predicates( + &mut select_plan.source, + &mut select_plan.where_clause, + &select_plan.referenced_tables, + )?; + } + + if optimize_use_indexes { + use_indexes( + &mut select_plan.source, + &select_plan.referenced_tables, + &select_plan.available_indexes, + )?; + } + + if optimize_eliminate_unnecessary_order_by { + eliminate_unnecessary_orderby( + &mut select_plan.source, + &mut select_plan.order_by, + &select_plan.referenced_tables, + &select_plan.available_indexes, + )?; + } + Ok(select_plan) } fn _operator_is_already_ordered_by( diff --git a/core/translate/planner.rs b/core/translate/planner.rs index e613dd14d..d32eba6c4 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -283,14 +283,7 @@ pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result

(schema: &Schema, select: ast::Select) -> Result

, +) -> Result { + let table_name = tbl_name.name.0.clone(); + + let table = if let Some(table) = schema.get_table(&table_name) { + table + } else { + crate::bail_parse_error!("Table {} not found", table_name); + }; + + let table_ref = BTreeTableReference { + table: table.clone(), + table_identifier: table_name.clone(), + table_index: 0, + }; + + // Parse and resolve the where_clause + let resolved_where_clauses = parse_where(where_clause, &[table_ref.clone()])?; + + let plan = Plan { + source: SourceOperator::Scan { + id: 0, + table_reference: table_ref.clone(), + predicates: resolved_where_clauses.clone(), + iter_dir: None, + }, + result_columns: vec![], + where_clause: resolved_where_clauses, + group_by: None, + order_by: None, + aggregates: vec![], + limit: None, // TODO: add support for limit + referenced_tables: vec![table_ref], + available_indexes: vec![], + contains_constant_false_condition: false, + }; + + Ok(plan) +} + #[allow(clippy::type_complexity)] fn parse_from( schema: &Schema, @@ -552,6 +588,22 @@ fn parse_from( Ok((operator, tables)) } +fn parse_where( + where_clause: Option, + referenced_tables: &[BTreeTableReference], +) -> Result>> { + if let Some(where_expr) = where_clause { + let mut predicates = vec![]; + break_predicate_at_and_boundaries(where_expr, &mut predicates); + for expr in predicates.iter_mut() { + bind_column_references(expr, referenced_tables)?; + } + Ok(Some(predicates)) + } else { + Ok(None) + } +} + fn parse_join( schema: &Schema, join: ast::JoinedSelectTable, @@ -735,57 +787,6 @@ fn parse_join( )) } -pub fn prepare_delete_plan( - schema: &Schema, - tbl_name: &QualifiedName, - where_clause: Option, -) -> Result { - let table_name = tbl_name.name.0.clone(); - - let table = if let Some(table) = schema.get_table(&table_name) { - table - } else { - crate::bail_parse_error!("Table {} not found", table_name); - }; - - let table_ref = BTreeTableReference { - table: table.clone(), - table_identifier: table_name.clone(), - table_index: 0, - }; - - // Parse and resolve the where_clause - let mut resolved_where_clause = None; - if let Some(where_expr) = where_clause { - let mut predicates = vec![]; - break_predicate_at_and_boundaries(where_expr, &mut predicates); - for expr in predicates.iter_mut() { - bind_column_references(expr, &[table_ref.clone()])?; - } - resolved_where_clause = Some(predicates); - } - - let plan = Plan { - source: SourceOperator::Scan { - id: 0, - table_reference: table_ref.clone(), - predicates: resolved_where_clause.clone(), - iter_dir: None, - }, - result_columns: vec![], - where_clause: resolved_where_clause, - group_by: None, - order_by: None, - aggregates: vec![], - limit: None, - referenced_tables: vec![table_ref], - available_indexes: vec![], - contains_constant_false_condition: false, - }; - - Ok(plan) -} - fn break_predicate_at_and_boundaries(predicate: ast::Expr, out_predicates: &mut Vec) { match predicate { ast::Expr::Binary(left, ast::Operator::And, right) => { diff --git a/core/translate/select.rs b/core/translate/select.rs index 6d846ded8..a0a0fa36c 100644 --- a/core/translate/select.rs +++ b/core/translate/select.rs @@ -1,15 +1,14 @@ use std::rc::Weak; use std::{cell::RefCell, rc::Rc}; +use super::emitter::emit_program; +use super::planner::prepare_select_plan; use crate::storage::sqlite3_ondisk::DatabaseHeader; +use crate::translate::optimizer::optimize_select_plan; use crate::Connection; use crate::{schema::Schema, vdbe::Program, Result}; use sqlite3_parser::ast; -use super::emitter::emit_program; -use super::optimizer::optimize_plan; -use super::planner::prepare_select_plan; - pub fn translate_select( schema: &Schema, select: ast::Select, @@ -17,6 +16,6 @@ pub fn translate_select( connection: Weak, ) -> Result { let select_plan = prepare_select_plan(schema, select)?; - let optimized_plan = optimize_plan(select_plan)?; + let optimized_plan = optimize_select_plan(select_plan)?; emit_program(database_header, optimized_plan, connection) } From e83819ef3094c882658c323fb5065f2d045484cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=84=A0=EC=9A=B0?= Date: Sun, 22 Dec 2024 16:00:35 +0900 Subject: [PATCH 080/144] Extract the appending delete related opcodes to `emit_delete_opcodes` --- core/translate/emitter.rs | 233 +++++++++++++++----------------------- 1 file changed, 91 insertions(+), 142 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 8779dbc25..49b85e17b 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -101,6 +101,15 @@ pub struct Metadata { pub result_columns_to_skip_in_orderby_sorter: Option>, } +/// Used to distinguish database operations +#[derive(Debug, Clone)] +pub enum OperationMode { + SELECT, + INSERT, + UPDATE, + DELETE, +} + /// Initialize the program with basic setup and return initial metadata and labels fn prologue() -> Result<(ProgramBuilder, Metadata, BranchOffset, BranchOffset)> { let mut program = ProgramBuilder::new(); @@ -201,7 +210,12 @@ pub fn emit_program( if let Some(ref mut group_by) = plan.group_by { init_group_by(&mut program, group_by, &plan.aggregates, &mut metadata)?; } - init_source(&mut program, &plan.source, &mut metadata)?; + init_source( + &mut program, + &plan.source, + &mut metadata, + &OperationMode::SELECT, + )?; // Set up main query execution loop open_loop( @@ -294,7 +308,12 @@ pub fn emit_program_for_delete( }; // Initialize cursors and other resources needed for query execution - init_source_for_delete(&mut program, &plan.source, &mut metadata)?; + init_source( + &mut program, + &plan.source, + &mut metadata, + &OperationMode::DELETE, + )?; // Set up main query execution loop open_loop( @@ -304,8 +323,15 @@ pub fn emit_program_for_delete( &mut metadata, )?; + emit_delete_insns(&mut program, &plan.source)?; + // Close the loop and handle deletion - close_loop_for_delete(&mut program, &plan.source, &mut metadata)?; + close_loop( + &mut program, + &plan.source, + &mut metadata, + &plan.referenced_tables, + )?; if let Some(skip_loops_label) = skip_loops_label { program.resolve_label(skip_loops_label, program.offset()); @@ -430,6 +456,7 @@ fn init_source( program: &mut ProgramBuilder, source: &SourceOperator, metadata: &mut Metadata, + mode: &OperationMode, ) -> Result<()> { match source { SourceOperator::Join { @@ -447,10 +474,10 @@ fn init_source( }; metadata.left_joins.insert(*id, lj_metadata); } - init_source(program, left, metadata)?; - init_source(program, right, metadata)?; + init_source(program, left, metadata, mode)?; + init_source(program, right, metadata, mode)?; - return Ok(()); + Ok(()) } SourceOperator::Scan { id, @@ -464,80 +491,27 @@ fn init_source( let root_page = table_reference.table.root_page; let next_row_label = program.allocate_label(); metadata.next_row_labels.insert(*id, next_row_label); - program.emit_insn(Insn::OpenReadAsync { - cursor_id, - root_page, - }); - program.emit_insn(Insn::OpenReadAwait); - - return Ok(()); - } - SourceOperator::Search { - id, - table_reference, - search, - .. - } => { - let table_cursor_id = program.alloc_cursor_id( - Some(table_reference.table_identifier.clone()), - Some(Table::BTree(table_reference.table.clone())), - ); - - let next_row_label = program.allocate_label(); - metadata.next_row_labels.insert(*id, next_row_label); - - program.emit_insn(Insn::OpenReadAsync { - cursor_id: table_cursor_id, - root_page: table_reference.table.root_page, - }); - program.emit_insn(Insn::OpenReadAwait); - - if let Search::IndexSearch { index, .. } = search { - let index_cursor_id = program - .alloc_cursor_id(Some(index.name.clone()), Some(Table::Index(index.clone()))); - program.emit_insn(Insn::OpenReadAsync { - cursor_id: index_cursor_id, - root_page: index.root_page, - }); - program.emit_insn(Insn::OpenReadAwait); + match mode { + OperationMode::SELECT => { + program.emit_insn(Insn::OpenReadAsync { + cursor_id, + root_page, + }); + program.emit_insn(Insn::OpenReadAwait {}); + } + OperationMode::DELETE => { + program.emit_insn(Insn::OpenWriteAsync { + cursor_id, + root_page, + }); + program.emit_insn(Insn::OpenWriteAwait {}); + } + _ => { + unimplemented!() + } } - return Ok(()); - } - SourceOperator::Nothing => { - return Ok(()); - } - } -} - -fn init_source_for_delete( - program: &mut ProgramBuilder, - source: &SourceOperator, - metadata: &mut Metadata, -) -> Result<()> { - match source { - SourceOperator::Join { .. } => { - unreachable!() - } - SourceOperator::Scan { - id, - table_reference, - .. - } => { - let cursor_id = program.alloc_cursor_id( - Some(table_reference.table_identifier.clone()), - Some(Table::BTree(table_reference.table.clone())), - ); - let root_page = table_reference.table.root_page; - let next_row_label = program.allocate_label(); - metadata.next_row_labels.insert(*id, next_row_label); - program.emit_insn(Insn::OpenWriteAsync { - cursor_id, - root_page, - }); - program.emit_insn(Insn::OpenWriteAwait {}); - Ok(()) } SourceOperator::Search { @@ -555,20 +529,49 @@ fn init_source_for_delete( metadata.next_row_labels.insert(*id, next_row_label); - program.emit_insn(Insn::OpenWriteAsync { - cursor_id: table_cursor_id, - root_page: table_reference.table.root_page, - }); - program.emit_insn(Insn::OpenWriteAwait {}); + match mode { + OperationMode::SELECT => { + program.emit_insn(Insn::OpenReadAsync { + cursor_id: table_cursor_id, + root_page: table_reference.table.root_page, + }); + program.emit_insn(Insn::OpenReadAwait {}); + } + OperationMode::DELETE => { + program.emit_insn(Insn::OpenWriteAsync { + cursor_id: table_cursor_id, + root_page: table_reference.table.root_page, + }); + program.emit_insn(Insn::OpenWriteAwait {}); + } + _ => { + unimplemented!() + } + } if let Search::IndexSearch { index, .. } = search { let index_cursor_id = program .alloc_cursor_id(Some(index.name.clone()), Some(Table::Index(index.clone()))); - program.emit_insn(Insn::OpenWriteAsync { - cursor_id: index_cursor_id, - root_page: index.root_page, - }); - program.emit_insn(Insn::OpenWriteAwait {}); + + match mode { + OperationMode::SELECT => { + program.emit_insn(Insn::OpenReadAsync { + cursor_id: index_cursor_id, + root_page: index.root_page, + }); + program.emit_insn(Insn::OpenReadAwait); + } + OperationMode::DELETE => { + program.emit_insn(Insn::OpenWriteAsync { + cursor_id: index_cursor_id, + root_page: index.root_page, + }); + program.emit_insn(Insn::OpenWriteAwait {}); + } + _ => { + unimplemented!() + } + } } Ok(()) @@ -1232,11 +1235,7 @@ fn close_loop( } } -fn close_loop_for_delete( - program: &mut ProgramBuilder, - source: &SourceOperator, - metadata: &mut Metadata, -) -> Result<()> { +fn emit_delete_insns(program: &mut ProgramBuilder, source: &SourceOperator) -> Result<()> { match source { SourceOperator::Scan { id, @@ -1255,40 +1254,6 @@ fn close_loop_for_delete( program.emit_insn(Insn::DeleteAsync { cursor_id }); program.emit_insn(Insn::DeleteAwait { cursor_id }); - program.resolve_label(*metadata.next_row_labels.get(id).unwrap(), program.offset()); - - // Emit the NextAsync or PrevAsync instruction to continue the loop - if iter_dir - .as_ref() - .is_some_and(|dir| *dir == IterationDirection::Backwards) - { - program.emit_insn(Insn::PrevAsync { cursor_id }); - } else { - program.emit_insn(Insn::NextAsync { cursor_id }); - } - let jump_label = metadata.scan_loop_body_labels.pop().unwrap(); - - // Emit the NextAwait or PrevAwait instruction with label dependency - if iter_dir - .as_ref() - .is_some_and(|dir| *dir == IterationDirection::Backwards) - { - program.emit_insn_with_label_dependency( - Insn::PrevAwait { - cursor_id, - pc_if_next: jump_label, - }, - jump_label, - ); - } else { - program.emit_insn_with_label_dependency( - Insn::NextAwait { - cursor_id, - pc_if_next: jump_label, - }, - jump_label, - ); - } Ok(()) } SourceOperator::Search { @@ -1313,22 +1278,6 @@ fn close_loop_for_delete( program.emit_insn(Insn::DeleteAsync { cursor_id }); program.emit_insn(Insn::DeleteAwait { cursor_id }); - // resolve labels after calling Delete opcodes - program.resolve_label(*metadata.next_row_labels.get(id).unwrap(), program.offset()); - - // Emit the NextAsync instruction to continue the loop - if !matches!(search, Search::RowidEq { .. }) { - program.emit_insn(Insn::NextAsync { cursor_id }); - let jump_label = metadata.scan_loop_body_labels.pop().unwrap(); - program.emit_insn_with_label_dependency( - Insn::NextAwait { - cursor_id, - pc_if_next: jump_label, - }, - jump_label, - ); - } - Ok(()) } _ => Ok(()), From 6f235e6f6c03fe6ee2e3e80b266d6280d4cbdce4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=84=A0=EC=9A=B0?= Date: Sun, 22 Dec 2024 21:06:54 +0900 Subject: [PATCH 081/144] Fix comment --- core/translate/emitter.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 49b85e17b..3f3bdf1b2 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -325,7 +325,7 @@ pub fn emit_program_for_delete( emit_delete_insns(&mut program, &plan.source)?; - // Close the loop and handle deletion + // Clean up and close the main execution loop close_loop( &mut program, &plan.source, From 82c127b7a3491af8841b2ab2546fbf0fcdf2bda1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=84=A0=EC=9A=B0?= Date: Mon, 23 Dec 2024 04:47:05 +0900 Subject: [PATCH 082/144] Remove bool args in optimize_plan --- core/translate/optimizer.rs | 80 ++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 9ccd8e7f7..9e93b1d71 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -9,58 +9,58 @@ use super::plan::{ Direction, IterationDirection, Plan, Search, SourceOperator, }; -pub fn optimize_select_plan(plan: Plan) -> Result { - optimize_plan(plan, true, true, true) -} - -pub fn optimize_delete_plan(plan: Plan) -> Result { - optimize_plan(plan, false, true, false) -} - /** * Make a few passes over the plan to optimize it. * TODO: these could probably be done in less passes, * but having them separate makes them easier to understand */ -fn optimize_plan( - mut select_plan: Plan, - optimize_push_predicates: bool, - optimize_use_indexes: bool, - optimize_eliminate_unnecessary_order_by: bool, -) -> Result { - eliminate_between(&mut select_plan.source, &mut select_plan.where_clause)?; +pub fn optimize_select_plan(mut plan: Plan) -> Result { + eliminate_between(&mut plan.source, &mut plan.where_clause)?; if let ConstantConditionEliminationResult::ImpossibleCondition = - eliminate_constants(&mut select_plan.source, &mut select_plan.where_clause)? + eliminate_constants(&mut plan.source, &mut plan.where_clause)? { - select_plan.contains_constant_false_condition = true; - return Ok(select_plan); + plan.contains_constant_false_condition = true; + return Ok(plan); } - if optimize_push_predicates { - push_predicates( - &mut select_plan.source, - &mut select_plan.where_clause, - &select_plan.referenced_tables, - )?; - } + push_predicates( + &mut plan.source, + &mut plan.where_clause, + &plan.referenced_tables, + )?; + + use_indexes( + &mut plan.source, + &plan.referenced_tables, + &plan.available_indexes, + )?; + + eliminate_unnecessary_orderby( + &mut plan.source, + &mut plan.order_by, + &plan.referenced_tables, + &plan.available_indexes, + )?; + + Ok(plan) +} - if optimize_use_indexes { - use_indexes( - &mut select_plan.source, - &select_plan.referenced_tables, - &select_plan.available_indexes, - )?; +pub fn optimize_delete_plan(mut plan: Plan) -> Result { + eliminate_between(&mut plan.source, &mut plan.where_clause)?; + if let ConstantConditionEliminationResult::ImpossibleCondition = + eliminate_constants(&mut plan.source, &mut plan.where_clause)? + { + plan.contains_constant_false_condition = true; + return Ok(plan); } - if optimize_eliminate_unnecessary_order_by { - eliminate_unnecessary_orderby( - &mut select_plan.source, - &mut select_plan.order_by, - &select_plan.referenced_tables, - &select_plan.available_indexes, - )?; - } - Ok(select_plan) + use_indexes( + &mut plan.source, + &plan.referenced_tables, + &plan.available_indexes, + )?; + + Ok(plan) } fn _operator_is_already_ordered_by( From f8d4edc8d7495aebde190b0955c3f35429a35916 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=84=A0=EC=9A=B0?= Date: Mon, 23 Dec 2024 04:54:40 +0900 Subject: [PATCH 083/144] Use schema.get_table(...) instead of referencing directly --- core/translate/planner.rs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/core/translate/planner.rs b/core/translate/planner.rs index d32eba6c4..0c417fa66 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -489,17 +489,15 @@ pub fn prepare_delete_plan( tbl_name: &QualifiedName, where_clause: Option, ) -> Result { - let table_name = tbl_name.name.0.clone(); - - let table = if let Some(table) = schema.get_table(&table_name) { - table - } else { - crate::bail_parse_error!("Table {} not found", table_name); + // let table_name = tbl_name.name.0.clone(); + let table = match schema.get_table(tbl_name.name.0.as_str()) { + Some(table) => table, + None => crate::bail_corrupt_error!("Parse error: no such table: {}", tbl_name), }; let table_ref = BTreeTableReference { table: table.clone(), - table_identifier: table_name.clone(), + table_identifier: table.name.clone(), table_index: 0, }; From 5cdcb8d78ce08226168474bd32d856d1e6147585 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=84=A0=EC=9A=B0?= Date: Mon, 23 Dec 2024 05:45:23 +0900 Subject: [PATCH 084/144] Split `Plan` into `Select` and `Delete` --- core/lib.rs | 4 ++-- core/translate/delete.rs | 8 ++++---- core/translate/emitter.rs | 21 ++++++++++++++----- core/translate/optimizer.rs | 13 +++++++++--- core/translate/plan.rs | 40 ++++++++++++++++++++++++++++++++----- core/translate/planner.rs | 13 ++++++------ core/translate/select.rs | 4 ++-- 7 files changed, 75 insertions(+), 28 deletions(-) diff --git a/core/lib.rs b/core/lib.rs index e39a5fa7a..f49d8bd3b 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -39,7 +39,7 @@ use translate::planner::prepare_select_plan; pub use error::LimboError; pub type Result = std::result::Result; -use crate::translate::optimizer::optimize_select_plan; +use crate::translate::optimizer::optimize_plan; pub use io::OpenFlags; #[cfg(feature = "fs")] pub use io::PlatformIO; @@ -267,7 +267,7 @@ impl Connection { match stmt { ast::Stmt::Select(select) => { let plan = prepare_select_plan(&*self.schema.borrow(), select)?; - let plan = optimize_select_plan(plan)?; + let plan = optimize_plan(plan)?; println!("{}", plan); } _ => todo!(), diff --git a/core/translate/delete.rs b/core/translate/delete.rs index d3d5fd346..b0ecbdc69 100644 --- a/core/translate/delete.rs +++ b/core/translate/delete.rs @@ -1,5 +1,5 @@ -use crate::translate::emitter::emit_program_for_delete; -use crate::translate::optimizer::optimize_delete_plan; +use crate::translate::emitter::emit_program; +use crate::translate::optimizer::optimize_plan; use crate::translate::planner::prepare_delete_plan; use crate::{schema::Schema, storage::sqlite3_ondisk::DatabaseHeader, vdbe::Program}; use crate::{Connection, Result}; @@ -16,6 +16,6 @@ pub fn translate_delete( connection: Weak, ) -> Result { let delete_plan = prepare_delete_plan(schema, tbl_name, where_clause)?; - let optimized_plan = optimize_delete_plan(delete_plan)?; - emit_program_for_delete(database_header, optimized_plan, connection) + let optimized_plan = optimize_plan(delete_plan)?; + emit_program(database_header, optimized_plan, connection) } diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 3f3bdf1b2..5e5f0d601 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -9,7 +9,7 @@ use sqlite3_parser::ast::{self}; use crate::schema::{Column, PseudoTable, Table}; use crate::storage::sqlite3_ondisk::DatabaseHeader; -use crate::translate::plan::{IterationDirection, Search}; +use crate::translate::plan::{DeletePlan, IterationDirection, Plan, Search}; use crate::types::{OwnedRecord, OwnedValue}; use crate::util::exprs_are_equivalent; use crate::vdbe::builder::ProgramBuilder; @@ -20,7 +20,7 @@ use super::expr::{ translate_aggregation, translate_aggregation_groupby, translate_condition_expr, translate_expr, ConditionMetadata, }; -use super::plan::{Aggregate, BTreeTableReference, Direction, GroupBy, Plan}; +use super::plan::{Aggregate, BTreeTableReference, Direction, GroupBy, SelectPlan}; use super::plan::{ResultSetColumn, SourceOperator}; // Metadata for handling LEFT JOIN operations @@ -175,6 +175,17 @@ pub fn emit_program( database_header: Rc>, mut plan: Plan, connection: Weak, +) -> Result { + match plan { + Plan::Select(plan) => emit_program_for_select(database_header, plan, connection), + Plan::Delete(plan) => emit_program_for_delete(database_header, plan, connection), + } +} + +fn emit_program_for_select( + database_header: Rc>, + mut plan: SelectPlan, + connection: Weak, ) -> Result { let (mut program, mut metadata, init_label, start_offset) = prologue()?; @@ -286,9 +297,9 @@ pub fn emit_program( Ok(program.build(database_header, connection)) } -pub fn emit_program_for_delete( +fn emit_program_for_delete( database_header: Rc>, - mut plan: Plan, + mut plan: DeletePlan, connection: Weak, ) -> Result { let (mut program, mut metadata, init_label, start_offset) = prologue()?; @@ -925,7 +936,7 @@ pub enum InnerLoopEmitTarget<'a> { /// At this point the cursors for all tables have been opened and rewound. fn inner_loop_emit( program: &mut ProgramBuilder, - plan: &mut Plan, + plan: &mut SelectPlan, metadata: &mut Metadata, ) -> Result<()> { // if we have a group by, we emit a record into the group by sorter. diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 9e93b1d71..f86c20c26 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -6,15 +6,22 @@ use crate::{schema::Index, Result}; use super::plan::{ get_table_ref_bitmask_for_ast_expr, get_table_ref_bitmask_for_operator, BTreeTableReference, - Direction, IterationDirection, Plan, Search, SourceOperator, + DeletePlan, Direction, IterationDirection, Plan, Search, SelectPlan, SourceOperator, }; +pub fn optimize_plan(mut plan: Plan) -> Result { + match plan { + Plan::Select(plan) => optimize_select_plan(plan).map(Plan::Select), + Plan::Delete(plan) => optimize_delete_plan(plan).map(Plan::Delete), + } +} + /** * Make a few passes over the plan to optimize it. * TODO: these could probably be done in less passes, * but having them separate makes them easier to understand */ -pub fn optimize_select_plan(mut plan: Plan) -> Result { +fn optimize_select_plan(mut plan: SelectPlan) -> Result { eliminate_between(&mut plan.source, &mut plan.where_clause)?; if let ConstantConditionEliminationResult::ImpossibleCondition = eliminate_constants(&mut plan.source, &mut plan.where_clause)? @@ -45,7 +52,7 @@ pub fn optimize_select_plan(mut plan: Plan) -> Result { Ok(plan) } -pub fn optimize_delete_plan(mut plan: Plan) -> Result { +fn optimize_delete_plan(mut plan: DeletePlan) -> Result { eliminate_between(&mut plan.source, &mut plan.where_clause)?; if let ConstantConditionEliminationResult::ImpossibleCondition = eliminate_constants(&mut plan.source, &mut plan.where_clause)? diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 8e0fa326e..4cd30d08b 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -1,11 +1,12 @@ use core::fmt; +use sqlite3_parser::ast; +use std::ptr::write; use std::{ fmt::{Display, Formatter}, rc::Rc, }; -use sqlite3_parser::ast; - +use crate::translate::plan::Plan::{Delete, Select}; use crate::{ function::AggFunc, schema::{BTreeTable, Column, Index}, @@ -27,7 +28,13 @@ pub struct GroupBy { } #[derive(Debug)] -pub struct Plan { +pub enum Plan { + Select(SelectPlan), + Delete(DeletePlan), +} + +#[derive(Debug)] +pub struct SelectPlan { /// A tree of sources (tables). pub source: SourceOperator, /// the columns inside SELECT ... FROM @@ -50,9 +57,32 @@ pub struct Plan { pub contains_constant_false_condition: bool, } +#[derive(Debug)] +pub struct DeletePlan { + /// A tree of sources (tables). + pub source: SourceOperator, + /// the columns inside SELECT ... FROM + pub result_columns: Vec, + /// where clause split into a vec at 'AND' boundaries. + pub where_clause: Option>, + /// order by clause + pub order_by: Option>, + /// limit clause + pub limit: Option, + /// all the tables referenced in the query + pub referenced_tables: Vec, + /// all the indexes available + pub available_indexes: Vec>, + /// query contains a constant condition that is always false + pub contains_constant_false_condition: bool, +} + impl Display for Plan { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.source) + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + Select(select_plan) => write!(f, "{}", select_plan.source), + Delete(delete_plan) => write!(f, "{}", delete_plan.source), + } } } diff --git a/core/translate/planner.rs b/core/translate/planner.rs index 0c417fa66..f9b941571 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -1,5 +1,6 @@ use super::plan::{ - Aggregate, BTreeTableReference, Direction, GroupBy, Plan, ResultSetColumn, SourceOperator, + Aggregate, BTreeTableReference, DeletePlan, Direction, GroupBy, Plan, ResultSetColumn, + SelectPlan, SourceOperator, }; use crate::{function::Func, schema::Schema, util::normalize_ident, Result}; use sqlite3_parser::ast::{self, Expr, FromClause, JoinType, QualifiedName, ResultColumn}; @@ -269,7 +270,7 @@ pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result

(schema: &Schema, select: ast::Select) -> Result

todo!(), } @@ -504,7 +505,7 @@ pub fn prepare_delete_plan( // Parse and resolve the where_clause let resolved_where_clauses = parse_where(where_clause, &[table_ref.clone()])?; - let plan = Plan { + let plan = DeletePlan { source: SourceOperator::Scan { id: 0, table_reference: table_ref.clone(), @@ -513,16 +514,14 @@ pub fn prepare_delete_plan( }, result_columns: vec![], where_clause: resolved_where_clauses, - group_by: None, order_by: None, - aggregates: vec![], limit: None, // TODO: add support for limit referenced_tables: vec![table_ref], available_indexes: vec![], contains_constant_false_condition: false, }; - Ok(plan) + Ok(Plan::Delete(plan)) } #[allow(clippy::type_complexity)] diff --git a/core/translate/select.rs b/core/translate/select.rs index a0a0fa36c..b79560fda 100644 --- a/core/translate/select.rs +++ b/core/translate/select.rs @@ -4,7 +4,7 @@ use std::{cell::RefCell, rc::Rc}; use super::emitter::emit_program; use super::planner::prepare_select_plan; use crate::storage::sqlite3_ondisk::DatabaseHeader; -use crate::translate::optimizer::optimize_select_plan; +use crate::translate::optimizer::optimize_plan; use crate::Connection; use crate::{schema::Schema, vdbe::Program, Result}; use sqlite3_parser::ast; @@ -16,6 +16,6 @@ pub fn translate_select( connection: Weak, ) -> Result { let select_plan = prepare_select_plan(schema, select)?; - let optimized_plan = optimize_select_plan(select_plan)?; + let optimized_plan = optimize_plan(select_plan)?; emit_program(database_header, optimized_plan, connection) } From b57a95752cc438bac7ec3c4aca02b69bc5101540 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sun, 22 Dec 2024 22:53:05 +0200 Subject: [PATCH 085/144] core/btree: improve documentation --- core/storage/btree.rs | 330 +++++++++++++++++++++------------ core/storage/pager.rs | 2 +- core/storage/sqlite3_ondisk.rs | 208 ++++++++++++++++----- core/translate/mod.rs | 4 +- 4 files changed, 377 insertions(+), 167 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 19ba4c7fa..08ec9c855 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -20,22 +20,27 @@ use super::sqlite3_ondisk::{ /* These are offsets of fields in the header of a b-tree page. */ -const BTREE_HEADER_OFFSET_TYPE: usize = 0; /* type of btree page -> u8 */ -const BTREE_HEADER_OFFSET_FREEBLOCK: usize = 1; /* pointer to first freeblock -> u16 */ -const BTREE_HEADER_OFFSET_CELL_COUNT: usize = 3; /* number of cells in the page -> u16 */ -const BTREE_HEADER_OFFSET_CELL_CONTENT: usize = 5; /* pointer to first byte of cell allocated content from top -> u16 */ -const BTREE_HEADER_OFFSET_FRAGMENTED: usize = 7; /* number of fragmented bytes -> u8 */ -const BTREE_HEADER_OFFSET_RIGHTMOST: usize = 8; /* if internalnode, pointer right most pointer (saved separately from cells) -> u32 */ -/* -** Maximum depth of an SQLite B-Tree structure. Any B-Tree deeper than -** this will be declared corrupt. This value is calculated based on a -** maximum database size of 2^31 pages a minimum fanout of 2 for a -** root-node and 3 for all other internal nodes. -** -** If a tree that appears to be taller than this is encountered, it is -** assumed that the database is corrupt. -*/ +/// type of btree page -> u8 +const PAGE_HEADER_OFFSET_PAGE_TYPE: usize = 0; +/// pointer to first freeblock -> u16 +const PAGE_HEADER_OFFSET_FREEBLOCK: usize = 1; +/// number of cells in the page -> u16 +const PAGE_HEADER_OFFSET_CELL_COUNT: usize = 3; +/// pointer to first byte of cell allocated content from top -> u16 +const PAGE_HEADER_OFFSET_CELL_CONTENT_AREA: usize = 5; +/// number of fragmented bytes -> u8 +const PAGE_HEADER_OFFSET_FRAGMENTED_BYTES_COUNT: usize = 7; +/// if internalnode, pointer right most pointer (saved separately from cells) -> u32 +const PAGE_HEADER_OFFSET_RIGHTMOST_PTR: usize = 8; + +/// Maximum depth of an SQLite B-Tree structure. Any B-Tree deeper than +/// this will be declared corrupt. This value is calculated based on a +/// maximum database size of 2^31 pages a minimum fanout of 2 for a +/// root-node and 3 for all other internal nodes. +/// +/// If a tree that appears to be taller than this is encountered, it is +/// assumed that the database is corrupt. pub const BTCURSOR_MAX_DEPTH: usize = 20; /// Evaluate a Result>, if IO return IO. @@ -57,6 +62,8 @@ macro_rules! return_if_locked { }}; } +/// State machine of a write operation. +/// May involve balancing due to overflow. #[derive(Debug)] enum WriteState { Start, @@ -67,11 +74,16 @@ enum WriteState { } struct WriteInfo { + /// State of the write operation state machine. state: WriteState, + /// Pages allocated during the write operation due to balancing. new_pages: RefCell>, + /// Scratch space used during balancing. scratch_cells: RefCell>, + /// Bookkeeping of the rightmost pointer so the PAGE_HEADER_OFFSET_RIGHTMOST_PTR can be updated. rightmost_pointer: RefCell>, - page_copy: RefCell>, // this holds the copy a of a page needed for buffer references + /// Copy of the current page needed for buffer references. + page_copy: RefCell>, } pub struct BTreeCursor { @@ -142,6 +154,8 @@ impl BTreeCursor { } } + /// Check if the table is empty. + /// This is done by checking if the root page has no cells. fn is_empty_table(&mut self) -> Result> { let page = self.pager.read_page(self.root_page)?; return_if_locked!(page); @@ -150,16 +164,18 @@ impl BTreeCursor { Ok(CursorResult::Ok(cell_count == 0)) } + /// Move the cursor to the previous record and return it. + /// Used in backwards iteration. fn get_prev_record(&mut self) -> Result, Option)>> { loop { let page = self.stack.top(); - let cell_idx = self.stack.current_index(); + let cell_idx = self.stack.current_cell_index(); - // moved to current page begin + // moved to beginning of current page // todo: find a better way to flag moved to end or begin of page - if self.stack.curr_idx_out_of_begin() { + if self.stack.current_cell_index_less_than_min() { loop { - if self.stack.current_index() > 0 { + if self.stack.current_cell_index() > 0 { self.stack.retreat(); break; } @@ -198,8 +214,8 @@ impl BTreeCursor { let cell = contents.cell_get( cell_idx, self.pager.clone(), - self.max_local(contents.page_type()), - self.min_local(contents.page_type()), + self.payload_overflow_threshold_max(contents.page_type()), + self.payload_overflow_threshold_min(contents.page_type()), self.usable_space(), )?; @@ -228,13 +244,15 @@ impl BTreeCursor { } } + /// Move the cursor to the next record and return it. + /// Used in forwards iteration, which is the default. fn get_next_record( &mut self, predicate: Option<(SeekKey<'_>, SeekOp)>, ) -> Result, Option)>> { loop { let mem_page_rc = self.stack.top(); - let cell_idx = self.stack.current_index() as usize; + let cell_idx = self.stack.current_cell_index() as usize; debug!("current id={} cell={}", mem_page_rc.get().id, cell_idx); return_if_locked!(mem_page_rc); @@ -286,8 +304,8 @@ impl BTreeCursor { let cell = contents.cell_get( cell_idx, self.pager.clone(), - self.max_local(contents.page_type()), - self.min_local(contents.page_type()), + self.payload_overflow_threshold_max(contents.page_type()), + self.payload_overflow_threshold_min(contents.page_type()), self.usable_space(), )?; match &cell { @@ -386,6 +404,9 @@ impl BTreeCursor { } } + /// Move the cursor to the record that matches the seek key and seek operation. + /// This may be used to seek to a specific record in a point query (e.g. SELECT * FROM table WHERE col = 10) + /// or e.g. find the first record greater than the seek key in a range query (e.g. SELECT * FROM table WHERE col > 10). fn seek( &mut self, key: SeekKey<'_>, @@ -403,8 +424,8 @@ impl BTreeCursor { let cell = contents.cell_get( cell_idx, self.pager.clone(), - self.max_local(contents.page_type()), - self.min_local(contents.page_type()), + self.payload_overflow_threshold_max(contents.page_type()), + self.payload_overflow_threshold_min(contents.page_type()), self.usable_space(), )?; match &cell { @@ -476,12 +497,14 @@ impl BTreeCursor { Ok(CursorResult::Ok((None, None))) } + /// Move the cursor to the root page of the btree. fn move_to_root(&mut self) { let mem_page = self.pager.read_page(self.root_page).unwrap(); self.stack.clear(); self.stack.push(mem_page); } + /// Move the cursor to the rightmost record in the btree. fn move_to_rightmost(&mut self) -> Result> { self.move_to_root(); @@ -553,8 +576,8 @@ impl BTreeCursor { match &contents.cell_get( cell_idx, self.pager.clone(), - self.max_local(contents.page_type()), - self.min_local(contents.page_type()), + self.payload_overflow_threshold_max(contents.page_type()), + self.payload_overflow_threshold_min(contents.page_type()), self.usable_space(), )? { BTreeCell::TableInteriorCell(TableInteriorCell { @@ -634,6 +657,8 @@ impl BTreeCursor { } } + /// Insert a record into the btree. + /// If the insert operation overflows the page, it will be split and the btree will be balanced. fn insert_into_page( &mut self, key: &OwnedValue, @@ -700,7 +725,11 @@ impl BTreeCursor { } } - /* insert to position and shift other pointers */ + /// Insert a record into a cell. + /// If the cell overflows, an overflow cell is created. + /// insert_into_cell() is called from insert_into_page(), + /// and the overflow cell count is used to determine if the page overflows, + /// i.e. whether we need to balance the btree after the insert. fn insert_into_cell(&self, page: &mut PageContent, payload: &[u8], cell_idx: usize) { let free = self.compute_free_space(page, RefCell::borrow(&self.database_header)); let enough_space = payload.len() + 2 <= free as usize; @@ -734,41 +763,51 @@ impl BTreeCursor { page.write_u16(pointer_area_pc_by_idx - page.offset, pc); // update first byte of content area - page.write_u16(BTREE_HEADER_OFFSET_CELL_CONTENT, pc); + page.write_u16(PAGE_HEADER_OFFSET_CELL_CONTENT_AREA, pc); // update cell count let new_n_cells = (page.cell_count() + 1) as u16; - page.write_u16(BTREE_HEADER_OFFSET_CELL_COUNT, new_n_cells); + page.write_u16(PAGE_HEADER_OFFSET_CELL_COUNT, new_n_cells); } + /// Free the range of bytes that a cell occupies. + /// This function also updates the freeblock list in the page. + /// Freeblocks are used to keep track of free space in the page, + /// and are organized as a linked list. fn free_cell_range(&self, page: &mut PageContent, offset: u16, len: u16) { + // if the freeblock list is empty, we set this block as the first freeblock in the page header. if page.first_freeblock() == 0 { - // insert into empty list - page.write_u16(offset as usize, 0); - page.write_u16(offset as usize + 2, len); - page.write_u16(BTREE_HEADER_OFFSET_FREEBLOCK, offset); + page.write_u16(offset as usize, 0); // next freeblock = null + page.write_u16(offset as usize + 2, len); // size of this freeblock + page.write_u16(PAGE_HEADER_OFFSET_FREEBLOCK, offset); // first freeblock in page = this block return; } let first_block = page.first_freeblock(); + // if the freeblock list is not empty, and the offset is less than the first freeblock, + // we insert this block at the head of the list if offset < first_block { - // insert into head of list - page.write_u16(offset as usize, first_block); - page.write_u16(offset as usize + 2, len); - page.write_u16(BTREE_HEADER_OFFSET_FREEBLOCK, offset); + page.write_u16(offset as usize, first_block); // next freeblock = previous first freeblock + page.write_u16(offset as usize + 2, len); // size of this freeblock + page.write_u16(PAGE_HEADER_OFFSET_FREEBLOCK, offset); // first freeblock in page = this block return; } + // if we clear space that is at the start of the cell content area, + // we need to update the cell content area pointer forward to account for the removed space + // FIXME: is offset ever < cell_content_area? if offset <= page.cell_content_area() { - // extend boundary of content area - page.write_u16(BTREE_HEADER_OFFSET_FREEBLOCK, page.first_freeblock()); - page.write_u16(BTREE_HEADER_OFFSET_CELL_CONTENT, offset + len); + page.write_u16(PAGE_HEADER_OFFSET_FREEBLOCK, page.first_freeblock()); // why is this here? + page.write_u16(PAGE_HEADER_OFFSET_CELL_CONTENT_AREA, offset + len); return; } + // if the freeblock list is not empty, and the offset is greater than the first freeblock, + // then we need to do some more calculation to figure out where to insert the freeblock + // in the freeblock linked list. let maxpc = { let db_header = self.database_header.borrow(); - let usable_space = (db_header.page_size - db_header.unused_space as u16) as usize; + let usable_space = (db_header.page_size - db_header.reserved_space as u16) as usize; usable_space as u16 }; @@ -799,17 +838,23 @@ impl BTreeCursor { } } + /// Drop a cell from a page. + /// This is done by freeing the range of bytes that the cell occupies. fn drop_cell(&self, page: &mut PageContent, cell_idx: usize) { let (cell_start, cell_len) = page.cell_get_raw_region( cell_idx, - self.max_local(page.page_type()), - self.min_local(page.page_type()), + self.payload_overflow_threshold_max(page.page_type()), + self.payload_overflow_threshold_min(page.page_type()), self.usable_space(), ); self.free_cell_range(page, cell_start as u16, cell_len as u16); - page.write_u16(BTREE_HEADER_OFFSET_CELL_COUNT, page.cell_count() as u16 - 1); + page.write_u16(PAGE_HEADER_OFFSET_CELL_COUNT, page.cell_count() as u16 - 1); } + /// Balance a leaf page. + /// Balancing is done when a page overflows. + /// see e.g. https://en.wikipedia.org/wiki/B-tree + /// /// This is a naive algorithm that doesn't try to distribute cells evenly by content. /// It will try to split the page in half by keys not by content. /// Sqlite tries to have a page at least 40% full. @@ -852,8 +897,8 @@ impl BTreeCursor { for cell_idx in 0..page_copy.cell_count() { let (start, len) = page_copy.cell_get_raw_region( cell_idx, - self.max_local(page_copy.page_type()), - self.min_local(page_copy.page_type()), + self.payload_overflow_threshold_max(page_copy.page_type()), + self.payload_overflow_threshold_min(page_copy.page_type()), self.usable_space(), ); let buf = page_copy.as_ptr(); @@ -930,14 +975,14 @@ impl BTreeCursor { assert_eq!(parent_contents.overflow_cells.len(), 0); // Right page pointer is u32 in right most pointer, and in cell is u32 too, so we can use a *u32 to hold where we want to change this value - let mut right_pointer = BTREE_HEADER_OFFSET_RIGHTMOST; + let mut right_pointer = PAGE_HEADER_OFFSET_RIGHTMOST_PTR; for cell_idx in 0..parent_contents.cell_count() { let cell = parent_contents .cell_get( cell_idx, self.pager.clone(), - self.max_local(page_type.clone()), - self.min_local(page_type.clone()), + self.payload_overflow_threshold_max(page_type.clone()), + self.payload_overflow_threshold_min(page_type.clone()), self.usable_space(), ) .unwrap(); @@ -950,8 +995,8 @@ impl BTreeCursor { if found { let (start, _len) = parent_contents.cell_get_raw_region( cell_idx, - self.max_local(page_type.clone()), - self.min_local(page_type.clone()), + self.payload_overflow_threshold_max(page_type.clone()), + self.payload_overflow_threshold_min(page_type.clone()), self.usable_space(), ); right_pointer = start; @@ -967,17 +1012,20 @@ impl BTreeCursor { assert!(page.is_dirty()); let contents = page.get().contents.as_mut().unwrap(); - contents.write_u16(BTREE_HEADER_OFFSET_FREEBLOCK, 0); - contents.write_u16(BTREE_HEADER_OFFSET_CELL_COUNT, 0); + contents.write_u16(PAGE_HEADER_OFFSET_FREEBLOCK, 0); + contents.write_u16(PAGE_HEADER_OFFSET_CELL_COUNT, 0); let db_header = RefCell::borrow(&self.database_header); let cell_content_area_start = - db_header.page_size - db_header.unused_space as u16; - contents.write_u16(BTREE_HEADER_OFFSET_CELL_CONTENT, cell_content_area_start); + db_header.page_size - db_header.reserved_space as u16; + contents.write_u16( + PAGE_HEADER_OFFSET_CELL_CONTENT_AREA, + cell_content_area_start, + ); - contents.write_u8(BTREE_HEADER_OFFSET_FRAGMENTED, 0); + contents.write_u8(PAGE_HEADER_OFFSET_FRAGMENTED_BYTES_COUNT, 0); if !contents.is_leaf() { - contents.write_u32(BTREE_HEADER_OFFSET_RIGHTMOST, 0); + contents.write_u32(PAGE_HEADER_OFFSET_RIGHTMOST_PTR, 0); } } @@ -1035,8 +1083,8 @@ impl BTreeCursor { .cell_get( contents.cell_count() - 1, self.pager.clone(), - self.max_local(contents.page_type()), - self.min_local(contents.page_type()), + self.payload_overflow_threshold_max(contents.page_type()), + self.payload_overflow_threshold_min(contents.page_type()), self.usable_space(), ) .unwrap(); @@ -1045,13 +1093,13 @@ impl BTreeCursor { _ => unreachable!(), }; self.drop_cell(contents, contents.cell_count() - 1); - contents.write_u32(BTREE_HEADER_OFFSET_RIGHTMOST, last_cell_pointer); + contents.write_u32(PAGE_HEADER_OFFSET_RIGHTMOST_PTR, last_cell_pointer); } // last page right most pointer points to previous right most pointer before splitting let last_page = new_pages.last().unwrap(); let last_page_contents = last_page.get().contents.as_mut().unwrap(); last_page_contents.write_u32( - BTREE_HEADER_OFFSET_RIGHTMOST, + PAGE_HEADER_OFFSET_RIGHTMOST_PTR, self.write_info.rightmost_pointer.borrow().unwrap(), ); } @@ -1069,8 +1117,8 @@ impl BTreeCursor { &contents.page_type(), 0, self.pager.clone(), - self.max_local(contents.page_type()), - self.min_local(contents.page_type()), + self.payload_overflow_threshold_max(contents.page_type()), + self.payload_overflow_threshold_min(contents.page_type()), self.usable_space(), ) .unwrap(); @@ -1119,6 +1167,9 @@ impl BTreeCursor { } } + /// Balance the root page. + /// This is done when the root page overflows, and we need to create a new root page. + /// See e.g. https://en.wikipedia.org/wiki/B-tree fn balance_root(&mut self) { /* todo: balance deeper, create child and copy contents of root there. Then split root */ /* if we are in root page then we just need to create a new root and push key there */ @@ -1145,8 +1196,8 @@ impl BTreeCursor { } // point new root right child to previous root new_root_page_contents - .write_u32(BTREE_HEADER_OFFSET_RIGHTMOST, new_root_page_id as u32); - new_root_page_contents.write_u16(BTREE_HEADER_OFFSET_CELL_COUNT, 0); + .write_u32(PAGE_HEADER_OFFSET_RIGHTMOST_PTR, new_root_page_id as u32); + new_root_page_contents.write_u16(PAGE_HEADER_OFFSET_CELL_COUNT, 0); } /* swap splitted page buffer with new root buffer so we don't have to update page idx */ @@ -1195,12 +1246,16 @@ impl BTreeCursor { } } + /// Allocate a new page to the btree via the pager. + /// This marks the page as dirty and writes the page header. fn allocate_page(&self, page_type: PageType, offset: usize) -> PageRef { let page = self.pager.allocate_page().unwrap(); btree_init_page(&page, page_type, &self.database_header.borrow(), offset); page } + /// Allocate a new overflow page. + /// This is done when a cell overflows and new space is needed. fn allocate_overflow_page(&self) -> PageRef { let page = self.pager.allocate_page().unwrap(); @@ -1212,9 +1267,7 @@ impl BTreeCursor { page } - /* - Allocate space for a cell on a page. - */ + /// Allocate space for a cell on a page. fn allocate_cell_space(&self, page_ref: &PageContent, amount: u16) -> u16 { let amount = amount as usize; @@ -1236,24 +1289,25 @@ impl BTreeCursor { if gap + 2 + amount > top { // defragment self.defragment_page(page_ref, RefCell::borrow(&self.database_header)); - top = page_ref.read_u16(BTREE_HEADER_OFFSET_CELL_CONTENT) as usize; + top = page_ref.read_u16(PAGE_HEADER_OFFSET_CELL_CONTENT_AREA) as usize; } let db_header = RefCell::borrow(&self.database_header); top -= amount; - page_ref.write_u16(BTREE_HEADER_OFFSET_CELL_CONTENT, top as u16); + page_ref.write_u16(PAGE_HEADER_OFFSET_CELL_CONTENT_AREA, top as u16); - let usable_space = (db_header.page_size - db_header.unused_space as u16) as usize; + let usable_space = (db_header.page_size - db_header.reserved_space as u16) as usize; assert!(top + amount <= usable_space); top as u16 } + /// Defragment a page. This means packing all the cells to the end of the page. fn defragment_page(&self, page: &PageContent, db_header: Ref) { log::debug!("defragment_page"); let cloned_page = page.clone(); // TODO(pere): usable space should include offset probably - let usable_space = (db_header.page_size - db_header.unused_space as u16) as u64; + let usable_space = (db_header.page_size - db_header.reserved_space as u16) as u64; let mut cbrk = usable_space; // TODO: implement fast algorithm @@ -1330,23 +1384,23 @@ impl BTreeCursor { let write_buf = page.as_ptr(); // set new first byte of cell content - page.write_u16(BTREE_HEADER_OFFSET_CELL_CONTENT, cbrk as u16); + page.write_u16(PAGE_HEADER_OFFSET_CELL_CONTENT_AREA, cbrk as u16); // set free block to 0, unused spaced can be retrieved from gap between cell pointer end and content start - page.write_u16(BTREE_HEADER_OFFSET_FREEBLOCK, 0); + page.write_u16(PAGE_HEADER_OFFSET_FREEBLOCK, 0); // set unused space to 0 let first_cell = cloned_page.cell_content_area() as u64; assert!(first_cell <= cbrk); write_buf[first_cell as usize..cbrk as usize].fill(0); } - // Free blocks can be zero, meaning the "real free space" that can be used to allocate is expected to be between first cell byte - // and end of cell pointer area. + /// Free blocks can be zero, meaning the "real free space" that can be used to allocate is expected to be between first cell byte + /// and end of cell pointer area. #[allow(unused_assignments)] fn compute_free_space(&self, page: &PageContent, db_header: Ref) -> u16 { // TODO(pere): maybe free space is not calculated correctly with offset let buf = page.as_ptr(); - let usable_space = (db_header.page_size - db_header.unused_space as u16) as usize; + let usable_space = (db_header.page_size - db_header.reserved_space as u16) as usize; let mut first_byte_in_cell_content = page.cell_content_area(); if first_byte_in_cell_content == 0 { first_byte_in_cell_content = u16::MAX; @@ -1399,6 +1453,8 @@ impl BTreeCursor { nfree as u16 } + /// Fill in the cell payload with the record. + /// If the record is too large to fit in the cell, it will spill onto overflow pages. fn fill_cell_payload( &self, page_type: PageType, @@ -1423,13 +1479,13 @@ impl BTreeCursor { write_varint_to_vec(record_buf.len() as u64, cell_payload); } - let max_local = self.max_local(page_type.clone()); + let payload_overflow_threshold_max = self.payload_overflow_threshold_max(page_type.clone()); log::debug!( - "fill_cell_payload(record_size={}, max_local={})", + "fill_cell_payload(record_size={}, payload_overflow_threshold_max={})", record_buf.len(), - max_local + payload_overflow_threshold_max ); - if record_buf.len() <= max_local { + if record_buf.len() <= payload_overflow_threshold_max { // enough allowed space to fit inside a btree page cell_payload.extend_from_slice(record_buf.as_slice()); cell_payload.resize(cell_payload.len() + 4, 0); @@ -1437,11 +1493,12 @@ impl BTreeCursor { } log::debug!("fill_cell_payload(overflow)"); - let min_local = self.min_local(page_type); - let mut space_left = min_local + (record_buf.len() - min_local) % (self.usable_space() - 4); + let payload_overflow_threshold_min = self.payload_overflow_threshold_min(page_type); + let mut space_left = payload_overflow_threshold_min + + (record_buf.len() - payload_overflow_threshold_min) % (self.usable_space() - 4); - if space_left > max_local { - space_left = min_local; + if space_left > payload_overflow_threshold_max { + space_left = payload_overflow_threshold_min; } // cell_size must be equal to first value of space_left as this will be the bytes copied to non-overflow page. @@ -1487,31 +1544,54 @@ impl BTreeCursor { assert_eq!(cell_size, cell_payload.len()); } - fn max_local(&self, page_type: PageType) -> usize { - let usable_space = self.usable_space(); + /// Returns the maximum payload size (X) that can be stored directly on a b-tree page without spilling to overflow pages. + /// + /// For table leaf pages: X = usable_size - 35 + /// For index pages: X = ((usable_size - 12) * 64/255) - 23 + /// + /// The usable size is the total page size less the reserved space at the end of each page. + /// These thresholds are designed to: + /// - Give a minimum fanout of 4 for index b-trees + /// - Ensure enough payload is on the b-tree page that the record header can usually be accessed + /// without consulting an overflow page + fn payload_overflow_threshold_max(&self, page_type: PageType) -> usize { + let usable_size = self.usable_space(); match page_type { - PageType::IndexInterior | PageType::TableInterior => { - (usable_space - 12) * 64 / 255 - 23 + PageType::IndexInterior | PageType::IndexLeaf => { + ((usable_size - 12) * 64 / 255) - 23 // Index page formula + } + PageType::TableInterior | PageType::TableLeaf => { + usable_size - 35 // Table leaf page formula } - PageType::IndexLeaf | PageType::TableLeaf => usable_space - 35, } } - fn min_local(&self, page_type: PageType) -> usize { - let usable_space = self.usable_space(); - match page_type { - PageType::IndexInterior | PageType::TableInterior => { - (usable_space - 12) * 32 / 255 - 23 - } - PageType::IndexLeaf | PageType::TableLeaf => (usable_space - 12) * 32 / 255 - 23, - } + /// Returns the minimum payload size (M) that must be stored on the b-tree page before spilling to overflow pages is allowed. + /// + /// For all page types: M = ((usable_size - 12) * 32/255) - 23 + /// + /// When payload size P exceeds max_local(): + /// - If K = M + ((P-M) % (usable_size-4)) <= max_local(): store K bytes on page + /// - Otherwise: store M bytes on page + /// + /// The remaining bytes are stored on overflow pages in both cases. + fn payload_overflow_threshold_min(&self, _page_type: PageType) -> usize { + let usable_size = self.usable_space(); + // Same formula for all page types + ((usable_size - 12) * 32 / 255) - 23 } + /// The "usable size" of a database page is the page size specified by the 2-byte integer at offset 16 + /// in the header, minus the "reserved" space size recorded in the 1-byte integer at offset 20 in the header. + /// The usable size of a page might be an odd number. However, the usable size is not allowed to be less than 480. + /// In other words, if the page size is 512, then the reserved space size cannot exceed 32. fn usable_space(&self) -> usize { let db_header = RefCell::borrow(&self.database_header); - (db_header.page_size - db_header.unused_space as u16) as usize + (db_header.page_size - db_header.reserved_space as u16) as usize } + /// Find the index of the cell in the page that contains the given rowid. + /// BTree tables only. fn find_cell(&self, page: &PageContent, int_key: u64) -> usize { let mut cell_idx = 0; let cell_count = page.cell_count(); @@ -1520,8 +1600,8 @@ impl BTreeCursor { .cell_get( cell_idx, self.pager.clone(), - self.max_local(page.page_type()), - self.min_local(page.page_type()), + self.payload_overflow_threshold_max(page.page_type()), + self.payload_overflow_threshold_min(page.page_type()), self.usable_space(), ) .unwrap() @@ -1545,6 +1625,8 @@ impl BTreeCursor { } impl PageStack { + /// Push a new page onto the stack. + /// This effectively means traversing to a child page. fn push(&self, page: PageRef) { debug!( "pagestack::push(current={}, new_page_id={})", @@ -1561,6 +1643,8 @@ impl PageStack { self.cell_indices.borrow_mut()[current as usize] = 0; } + /// Pop a page off the stack. + /// This effectively means traversing back up to a parent page. fn pop(&self) { let current = *self.current_page.borrow(); debug!("pagestack::pop(current={})", current); @@ -1569,6 +1653,8 @@ impl PageStack { *self.current_page.borrow_mut() -= 1; } + /// Get the top page on the stack. + /// This is the page that is currently being traversed. fn top(&self) -> PageRef { let current = *self.current_page.borrow(); let page = self.stack.borrow()[current as usize] @@ -1583,6 +1669,7 @@ impl PageStack { page } + /// Get the parent page of the current page. fn parent(&self) -> PageRef { let current = *self.current_page.borrow(); self.stack.borrow()[current as usize - 1] @@ -1597,13 +1684,15 @@ impl PageStack { } /// Cell index of the current page - fn current_index(&self) -> i32 { + fn current_cell_index(&self) -> i32 { let current = self.current(); self.cell_indices.borrow()[current] } - fn curr_idx_out_of_begin(&self) -> bool { - let cell_idx = self.current_index(); + /// Check if the current cell index is less than 0. + /// This means we have been iterating backwards and have reached the start of the page. + fn current_cell_index_less_than_min(&self) -> bool { + let cell_idx = self.current_cell_index(); cell_idx < 0 } @@ -1639,7 +1728,7 @@ fn find_free_cell(page_ref: &PageContent, db_header: Ref, amount let buf = page_ref.as_ptr(); - let usable_space = (db_header.page_size - db_header.unused_space as u16) as usize; + let usable_space = (db_header.page_size - db_header.reserved_space as u16) as usize; let maxpc = usable_space - amount; let mut found = false; while pc <= maxpc { @@ -1785,8 +1874,8 @@ impl Cursor for BTreeCursor { let equals = match &contents.cell_get( cell_idx, self.pager.clone(), - self.max_local(contents.page_type()), - self.min_local(contents.page_type()), + self.payload_overflow_threshold_max(contents.page_type()), + self.payload_overflow_threshold_min(contents.page_type()), self.usable_space(), )? { BTreeCell::TableLeafCell(l) => l._rowid == int_key, @@ -1823,15 +1912,18 @@ pub fn btree_init_page( let contents = contents.contents.as_mut().unwrap(); contents.offset = offset; let id = page_type as u8; - contents.write_u8(BTREE_HEADER_OFFSET_TYPE, id); - contents.write_u16(BTREE_HEADER_OFFSET_FREEBLOCK, 0); - contents.write_u16(BTREE_HEADER_OFFSET_CELL_COUNT, 0); - - let cell_content_area_start = db_header.page_size - db_header.unused_space as u16; - contents.write_u16(BTREE_HEADER_OFFSET_CELL_CONTENT, cell_content_area_start); - - contents.write_u8(BTREE_HEADER_OFFSET_FRAGMENTED, 0); - contents.write_u32(BTREE_HEADER_OFFSET_RIGHTMOST, 0); + contents.write_u8(PAGE_HEADER_OFFSET_PAGE_TYPE, id); + contents.write_u16(PAGE_HEADER_OFFSET_FREEBLOCK, 0); + contents.write_u16(PAGE_HEADER_OFFSET_CELL_COUNT, 0); + + let cell_content_area_start = db_header.page_size - db_header.reserved_space as u16; + contents.write_u16( + PAGE_HEADER_OFFSET_CELL_CONTENT_AREA, + cell_content_area_start, + ); + + contents.write_u8(PAGE_HEADER_OFFSET_FRAGMENTED_BYTES_COUNT, 0); + contents.write_u32(PAGE_HEADER_OFFSET_RIGHTMOST_PTR, 0); } fn to_static_buf(buf: &[u8]) -> &'static [u8] { diff --git a/core/storage/pager.rs b/core/storage/pager.rs index 105a8a75a..cd934d42a 100644 --- a/core/storage/pager.rs +++ b/core/storage/pager.rs @@ -482,7 +482,7 @@ impl Pager { pub fn usable_size(&self) -> usize { let db_header = self.db_header.borrow(); - (db_header.page_size - db_header.unused_space as u16) as usize + (db_header.page_size - db_header.reserved_space as u16) as usize } } diff --git a/core/storage/sqlite3_ondisk.rs b/core/storage/sqlite3_ondisk.rs index a1a8aec0c..8af1c7eba 100644 --- a/core/storage/sqlite3_ondisk.rs +++ b/core/storage/sqlite3_ondisk.rs @@ -64,30 +64,84 @@ const DEFAULT_CACHE_SIZE: i32 = -2000; // Minimum number of pages that cache can hold. pub const MIN_PAGE_CACHE_SIZE: usize = 10; +/// The database header. +/// The first 100 bytes of the database file comprise the database file header. +/// The database file header is divided into fields as shown by the table below. +/// All multibyte fields in the database file header are stored with the most significant byte first (big-endian). #[derive(Debug, Clone)] pub struct DatabaseHeader { + /// The header string: "SQLite format 3\0" magic: [u8; 16], + + /// The database page size in bytes. Must be a power of two between 512 and 32768 inclusive, + /// or the value 1 representing a page size of 65536. pub page_size: u16, + + /// File format write version. 1 for legacy; 2 for WAL. write_version: u8, + + /// File format read version. 1 for legacy; 2 for WAL. read_version: u8, - pub unused_space: u8, + + /// Bytes of unused "reserved" space at the end of each page. Usually 0. + /// SQLite has the ability to set aside a small number of extra bytes at the end of every page for use by extensions. + /// These extra bytes are used, for example, by the SQLite Encryption Extension to store a nonce and/or + /// cryptographic checksum associated with each page. + pub reserved_space: u8, + + /// Maximum embedded payload fraction. Must be 64. max_embed_frac: u8, + + /// Minimum embedded payload fraction. Must be 32. min_embed_frac: u8, + + /// Leaf payload fraction. Must be 32. min_leaf_frac: u8, + + /// File change counter, incremented when database is modified. change_counter: u32, + + /// Size of the database file in pages. The "in-header database size". pub database_size: u32, + + /// Page number of the first freelist trunk page. freelist_trunk_page: u32, + + /// Total number of freelist pages. freelist_pages: u32, + + /// The schema cookie. Incremented when the database schema changes. schema_cookie: u32, + + /// The schema format number. Supported formats are 1, 2, 3, and 4. schema_format: u32, - pub default_cache_size: i32, - vacuum: u32, + + /// Default page cache size. + pub default_page_cache_size: i32, + + /// The page number of the largest root b-tree page when in auto-vacuum or + /// incremental-vacuum modes, or zero otherwise. + vacuum_mode_largest_root_page: u32, + + /// The database text encoding. 1=UTF-8, 2=UTF-16le, 3=UTF-16be. text_encoding: u32, + + /// The "user version" as read and set by the user_version pragma. user_version: u32, - incremental_vacuum: u32, + + /// True (non-zero) for incremental-vacuum mode. False (zero) otherwise. + incremental_vacuum_enabled: u32, + + /// The "Application ID" set by PRAGMA application_id. application_id: u32, - reserved: [u8; 20], + + /// Reserved for expansion. Must be zero. + reserved_for_expansion: [u8; 20], + + /// The version-valid-for number. version_valid_for: u32, + + /// SQLITE_VERSION_NUMBER pub version_number: u32, } @@ -98,28 +152,62 @@ pub const WAL_FRAME_HEADER_SIZE: usize = 24; pub const WAL_MAGIC_LE: u32 = 0x377f0682; pub const WAL_MAGIC_BE: u32 = 0x377f0683; +/// The Write-Ahead Log (WAL) header. +/// The first 32 bytes of a WAL file comprise the WAL header. +/// The WAL header is divided into the following fields stored in big-endian order. #[derive(Debug, Default, Clone)] #[repr(C)] // This helps with encoding because rust does not respect the order in structs, so in // this case we want to keep the order pub struct WalHeader { + /// Magic number. 0x377f0682 or 0x377f0683 + /// If the LSB is 0, checksums are native byte order, else checksums are serialized pub magic: u32, + + /// WAL format version. Currently 3007000 pub file_format: u32, + + /// Database page size in bytes. Power of two between 512 and 32768 inclusive pub page_size: u32, + + /// Checkpoint sequence number. Increases with each checkpoint pub checkpoint_seq: u32, + + /// Random value used for the first salt in checksum calculations pub salt_1: u32, + + /// Random value used for the second salt in checksum calculations pub salt_2: u32, + + /// First checksum value in the wal-header pub checksum_1: u32, + + /// Second checksum value in the wal-header pub checksum_2: u32, } +/// Immediately following the wal-header are zero or more frames. +/// Each frame consists of a 24-byte frame-header followed by bytes of page data. +/// The frame-header is six big-endian 32-bit unsigned integer values, as follows: #[allow(dead_code)] #[derive(Debug, Default)] pub struct WalFrameHeader { + /// Page number page_number: u32, + + /// For commit records, the size of the database file in pages after the commit. + /// For all other records, zero. db_size: u32, + + /// Salt-1 copied from the WAL header salt_1: u32, + + /// Salt-2 copied from the WAL header salt_2: u32, + + /// Checksum-1: Cumulative checksum up through and including this page checksum_1: u32, + + /// Checksum-2: Second half of the cumulative checksum checksum_2: u32, } @@ -130,7 +218,7 @@ impl Default for DatabaseHeader { page_size: 4096, write_version: 2, read_version: 2, - unused_space: 0, + reserved_space: 0, max_embed_frac: 64, min_embed_frac: 32, min_leaf_frac: 32, @@ -140,13 +228,13 @@ impl Default for DatabaseHeader { freelist_pages: 0, schema_cookie: 0, schema_format: 4, // latest format, new sqlite3 databases use this format - default_cache_size: 500, // pages - vacuum: 0, + default_page_cache_size: 500, // pages + vacuum_mode_largest_root_page: 0, text_encoding: 1, // utf-8 user_version: 1, - incremental_vacuum: 0, + incremental_vacuum_enabled: 0, application_id: 0, - reserved: [0; 20], + reserved_for_expansion: [0; 20], version_valid_for: 3047000, version_number: 3047000, } @@ -180,7 +268,7 @@ fn finish_read_database_header( header.page_size = u16::from_be_bytes([buf[16], buf[17]]); header.write_version = buf[18]; header.read_version = buf[19]; - header.unused_space = buf[20]; + header.reserved_space = buf[20]; header.max_embed_frac = buf[21]; header.min_embed_frac = buf[22]; header.min_leaf_frac = buf[23]; @@ -190,16 +278,16 @@ fn finish_read_database_header( header.freelist_pages = u32::from_be_bytes([buf[36], buf[37], buf[38], buf[39]]); header.schema_cookie = u32::from_be_bytes([buf[40], buf[41], buf[42], buf[43]]); header.schema_format = u32::from_be_bytes([buf[44], buf[45], buf[46], buf[47]]); - header.default_cache_size = i32::from_be_bytes([buf[48], buf[49], buf[50], buf[51]]); - if header.default_cache_size == 0 { - header.default_cache_size = DEFAULT_CACHE_SIZE; + header.default_page_cache_size = i32::from_be_bytes([buf[48], buf[49], buf[50], buf[51]]); + if header.default_page_cache_size == 0 { + header.default_page_cache_size = DEFAULT_CACHE_SIZE; } - header.vacuum = u32::from_be_bytes([buf[52], buf[53], buf[54], buf[55]]); + header.vacuum_mode_largest_root_page = u32::from_be_bytes([buf[52], buf[53], buf[54], buf[55]]); header.text_encoding = u32::from_be_bytes([buf[56], buf[57], buf[58], buf[59]]); header.user_version = u32::from_be_bytes([buf[60], buf[61], buf[62], buf[63]]); - header.incremental_vacuum = u32::from_be_bytes([buf[64], buf[65], buf[66], buf[67]]); + header.incremental_vacuum_enabled = u32::from_be_bytes([buf[64], buf[65], buf[66], buf[67]]); header.application_id = u32::from_be_bytes([buf[68], buf[69], buf[70], buf[71]]); - header.reserved.copy_from_slice(&buf[72..92]); + header.reserved_for_expansion.copy_from_slice(&buf[72..92]); header.version_valid_for = u32::from_be_bytes([buf[92], buf[93], buf[94], buf[95]]); header.version_number = u32::from_be_bytes([buf[96], buf[97], buf[98], buf[99]]); Ok(()) @@ -258,7 +346,7 @@ fn write_header_to_buf(buf: &mut [u8], header: &DatabaseHeader) { buf[16..18].copy_from_slice(&header.page_size.to_be_bytes()); buf[18] = header.write_version; buf[19] = header.read_version; - buf[20] = header.unused_space; + buf[20] = header.reserved_space; buf[21] = header.max_embed_frac; buf[22] = header.min_embed_frac; buf[23] = header.min_leaf_frac; @@ -268,15 +356,15 @@ fn write_header_to_buf(buf: &mut [u8], header: &DatabaseHeader) { buf[36..40].copy_from_slice(&header.freelist_pages.to_be_bytes()); buf[40..44].copy_from_slice(&header.schema_cookie.to_be_bytes()); buf[44..48].copy_from_slice(&header.schema_format.to_be_bytes()); - buf[48..52].copy_from_slice(&header.default_cache_size.to_be_bytes()); + buf[48..52].copy_from_slice(&header.default_page_cache_size.to_be_bytes()); - buf[52..56].copy_from_slice(&header.vacuum.to_be_bytes()); + buf[52..56].copy_from_slice(&header.vacuum_mode_largest_root_page.to_be_bytes()); buf[56..60].copy_from_slice(&header.text_encoding.to_be_bytes()); buf[60..64].copy_from_slice(&header.user_version.to_be_bytes()); - buf[64..68].copy_from_slice(&header.incremental_vacuum.to_be_bytes()); + buf[64..68].copy_from_slice(&header.incremental_vacuum_enabled.to_be_bytes()); buf[68..72].copy_from_slice(&header.application_id.to_be_bytes()); - buf[72..92].copy_from_slice(&header.reserved); + buf[72..92].copy_from_slice(&header.reserved_for_expansion); buf[92..96].copy_from_slice(&header.version_valid_for.to_be_bytes()); buf[96..100].copy_from_slice(&header.version_number.to_be_bytes()); } @@ -395,6 +483,7 @@ impl PageContent { self.read_u16(3) as usize } + /// The start of the cell content area. pub fn cell_content_area(&self) -> u16 { self.read_u16(5) } @@ -416,8 +505,8 @@ impl PageContent { &self, idx: usize, pager: Rc, - max_local: usize, - min_local: usize, + payload_overflow_threshold_max: usize, + payload_overflow_threshold_min: usize, usable_size: usize, ) -> Result { log::debug!("cell_get(idx={})", idx); @@ -439,8 +528,8 @@ impl PageContent { &self.page_type(), cell_pointer, pager, - max_local, - min_local, + payload_overflow_threshold_max, + payload_overflow_threshold_min, usable_size, ) } @@ -460,8 +549,8 @@ impl PageContent { pub fn cell_get_raw_region( &self, idx: usize, - max_local: usize, - min_local: usize, + payload_overflow_threshold_max: usize, + payload_overflow_threshold_min: usize, usable_size: usize, ) -> (usize, usize) { let buf = self.as_ptr(); @@ -479,8 +568,12 @@ impl PageContent { let len = match self.page_type() { PageType::IndexInterior => { let (len_payload, n_payload) = read_varint(&buf[cell_pointer + 4..]).unwrap(); - let (overflows, to_read) = - payload_overflows(len_payload as usize, max_local, min_local, usable_size); + let (overflows, to_read) = payload_overflows( + len_payload as usize, + payload_overflow_threshold_max, + payload_overflow_threshold_min, + usable_size, + ); if overflows { 4 + to_read + n_payload + 4 } else { @@ -493,8 +586,12 @@ impl PageContent { } PageType::IndexLeaf => { let (len_payload, n_payload) = read_varint(&buf[cell_pointer..]).unwrap(); - let (overflows, to_read) = - payload_overflows(len_payload as usize, max_local, min_local, usable_size); + let (overflows, to_read) = payload_overflows( + len_payload as usize, + payload_overflow_threshold_max, + payload_overflow_threshold_min, + usable_size, + ); if overflows { to_read + n_payload + 4 } else { @@ -504,8 +601,12 @@ impl PageContent { PageType::TableLeaf => { let (len_payload, n_payload) = read_varint(&buf[cell_pointer..]).unwrap(); let (_, n_rowid) = read_varint(&buf[cell_pointer + n_payload..]).unwrap(); - let (overflows, to_read) = - payload_overflows(len_payload as usize, max_local, min_local, usable_size); + let (overflows, to_read) = payload_overflows( + len_payload as usize, + payload_overflow_threshold_max, + payload_overflow_threshold_min, + usable_size, + ); if overflows { to_read + n_payload + n_rowid } else { @@ -1170,28 +1271,45 @@ pub fn begin_write_wal_header(io: &Rc, header: &WalHeader) -> Result<( Ok(()) } -/* - Checks if payload will overflow a cell based on max local and - it will return the min size that will be stored in that case, - including overflow pointer -*/ +/// Checks if payload will overflow a cell based on the maximum allowed size. +/// It will return the min size that will be stored in that case, +/// including overflow pointer pub fn payload_overflows( payload_size: usize, - max_local: usize, - min_local: usize, + payload_overflow_threshold_max: usize, + payload_overflow_threshold_min: usize, usable_size: usize, ) -> (bool, usize) { - if payload_size <= max_local { + if payload_size <= payload_overflow_threshold_max { return (false, 0); } - let mut space_left = min_local + (payload_size - min_local) % (usable_size - 4); - if space_left > max_local { - space_left = min_local; + let mut space_left = payload_overflow_threshold_min + + (payload_size - payload_overflow_threshold_min) % (usable_size - 4); + if space_left > payload_overflow_threshold_max { + space_left = payload_overflow_threshold_min; } (true, space_left + 4) } +/// The checksum is computed by interpreting the input as an even number of unsigned 32-bit integers: x(0) through x(N). +/// The 32-bit integers are big-endian if the magic number in the first 4 bytes of the WAL header is 0x377f0683 +/// and the integers are little-endian if the magic number is 0x377f0682. +/// The checksum values are always stored in the frame header in a big-endian format regardless of which byte order is used to compute the checksum. + +/// The checksum algorithm only works for content which is a multiple of 8 bytes in length. +/// In other words, if the inputs are x(0) through x(N) then N must be odd. +/// The checksum algorithm is as follows: +/// +/// s0 = s1 = 0 +/// for i from 0 to n-1 step 2: +/// s0 += x(i) + s1; +/// s1 += x(i+1) + s0; +/// endfor +/// +/// The outputs s0 and s1 are both weighted checksums using Fibonacci weights in reverse order. +/// (The largest Fibonacci weight occurs on the first element of the sequence being summed.) +/// The s1 value spans all 32-bit integer terms of the sequence whereas s0 omits the final term. pub fn checksum_wal( buf: &[u8], _wal_header: &WalHeader, diff --git a/core/translate/mod.rs b/core/translate/mod.rs index 2e5d86141..db69f1578 100644 --- a/core/translate/mod.rs +++ b/core/translate/mod.rs @@ -386,7 +386,7 @@ fn query_pragma( match pragma { PragmaName::CacheSize => { program.emit_insn(Insn::Integer { - value: database_header.borrow().default_cache_size.into(), + value: database_header.borrow().default_page_cache_size.into(), dest: register, }); } @@ -424,7 +424,7 @@ fn update_cache_size(value: i64, header: Rc>, pager: Rc< } // update in-memory header - header.borrow_mut().default_cache_size = cache_size_unformatted + header.borrow_mut().default_page_cache_size = cache_size_unformatted .try_into() .unwrap_or_else(|_| panic!("invalid value, too big for a i32 {}", value)); From 9f08b621ec9c0277a42e4c2a269b6bd16300b57f Mon Sep 17 00:00:00 2001 From: alpaylan Date: Sun, 22 Dec 2024 17:06:46 -0500 Subject: [PATCH 086/144] add clap CLI for configuring the simulator --- Cargo.lock | 1 + simulator/Cargo.toml | 1 + simulator/main.rs | 80 ++++++++++++++++++++++++++++++++------------ 3 files changed, 61 insertions(+), 21 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 53b97bfc1..e654e1bf2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1164,6 +1164,7 @@ name = "limbo_sim" version = "0.0.10" dependencies = [ "anarchist-readable-name-generator-lib", + "clap", "env_logger 0.10.2", "limbo_core", "log", diff --git a/simulator/Cargo.toml b/simulator/Cargo.toml index 51351aee2..31a54f1e6 100644 --- a/simulator/Cargo.toml +++ b/simulator/Cargo.toml @@ -22,3 +22,4 @@ log = "0.4.20" tempfile = "3.0.7" env_logger = "0.10.1" anarchist-readable-name-generator-lib = "0.1.2" +clap = { version = "4.5", features = ["derive"] } diff --git a/simulator/main.rs b/simulator/main.rs index 085711391..154fa76e2 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -1,3 +1,4 @@ +use clap::{command, Parser}; use generation::plan::{Interaction, InteractionPlan, ResultSet}; use generation::{pick, pick_index, Arbitrary, ArbitraryFrom}; use limbo_core::{Connection, Database, File, OpenFlags, PlatformIO, Result, RowResult, IO}; @@ -8,8 +9,11 @@ use rand::prelude::*; use rand_chacha::ChaCha8Rng; use std::cell::RefCell; use std::io::Write; +use std::panic::UnwindSafe; +use std::path::Path; use std::rc::Rc; use std::sync::Arc; +use std::time::Duration; use tempfile::TempDir; mod generation; @@ -25,6 +29,8 @@ struct SimulatorEnv { rng: ChaCha8Rng, } +impl UnwindSafe for SimulatorEnv {} + #[derive(Clone)] enum SimConnection { Connected(Rc), @@ -45,14 +51,48 @@ struct SimulatorOpts { page_size: usize, } +#[derive(Parser)] +#[command(name = "limbo-simulator")] +#[command(author, version, about, long_about = None)] +pub struct SimulatorCLI { + #[clap(short, long, help = "set seed for reproducible runs", default_value = None)] + pub seed: Option, + #[clap(short, long, help = "set custom output directory for produced files", default_value = None)] + pub output_dir: Option, + #[clap( + short, + long, + help = "enable doublechecking, run the simulator with the plan twice and check output equality" + )] + pub doublecheck: bool, + #[clap(short, long, help = "change the maximum size of the randomly generated sequence of interactions", default_value_t = 1024)] + pub maximum_size: usize, +} + #[allow(clippy::arc_with_non_send_sync)] fn main() { let _ = env_logger::try_init(); - let seed = match std::env::var("SEED") { - Ok(seed) => seed.parse::().unwrap(), - Err(_) => rand::thread_rng().next_u64(), + + let opts = SimulatorCLI::parse(); + + let seed = match opts.seed { + Some(seed) => seed, + None => rand::thread_rng().next_u64(), }; - println!("Seed: {}", seed); + + let output_dir = match opts.output_dir { + Some(dir) => Path::new(&dir).to_path_buf(), + None => TempDir::new().unwrap().into_path(), + }; + + let db_path = output_dir.join("simulator.db"); + let plan_path = output_dir.join("simulator.plan"); + + // Print the seed, the locations of the database and the plan file + log::info!("database path: {:?}", db_path); + log::info!("simulator plan path: {:?}", plan_path); + log::info!("seed: {}", seed); + let mut rng = ChaCha8Rng::seed_from_u64(seed); let (read_percent, write_percent, delete_percent) = { @@ -66,7 +106,7 @@ fn main() { }; let opts = SimulatorOpts { - ticks: rng.gen_range(0..10240), + ticks: rng.gen_range(0..opts.maximum_size), max_connections: 1, // TODO: for now let's use one connection as we didn't implement // correct transactions procesing max_tables: rng.gen_range(0..128), @@ -74,21 +114,19 @@ fn main() { write_percent, delete_percent, page_size: 4096, // TODO: randomize this too - max_interactions: rng.gen_range(0..10240), + max_interactions: rng.gen_range(0..opts.maximum_size), }; let io = Arc::new(SimulatorIO::new(seed, opts.page_size).unwrap()); - let mut path = TempDir::new().unwrap().into_path(); - path.push("simulator.db"); - println!("path to db '{:?}'", path); - let db = match Database::open_file(io.clone(), path.as_path().to_str().unwrap()) { + let db = match Database::open_file(io.clone(), db_path.to_str().unwrap()) { Ok(db) => db, Err(e) => { - panic!("error opening simulator test file {:?}: {:?}", path, e); + panic!("error opening simulator test file {:?}: {:?}", db_path, e); } }; let connections = vec![SimConnection::Disconnected; opts.max_connections]; + let mut env = SimulatorEnv { opts, tables: Vec::new(), @@ -98,30 +136,30 @@ fn main() { db, }; - println!("Initial opts {:?}", env.opts); - log::info!("Generating database interaction plan..."); let mut plans = (1..=env.opts.max_connections) .map(|_| InteractionPlan::arbitrary_from(&mut env.rng.clone(), &env)) .collect::>(); + let mut f = std::fs::File::create(plan_path.clone()).unwrap(); + // todo: create a detailed plan file with all the plans. for now, we only use 1 connection, so it's safe to use the first plan. + f.write(plans[0].to_string().as_bytes()).unwrap(); + log::info!("{}", plans[0].stats()); log::info!("Executing database interaction plan..."); - let result = execute_plans(&mut env, &mut plans); + let result = execute_plans(&mut env, &mut plans); if result.is_err() { log::error!("error executing plans: {:?}", result.err()); } - log::info!("db is at {:?}", path); - let mut path = TempDir::new().unwrap().into_path(); - path.push("simulator.plan"); - let mut f = std::fs::File::create(path.clone()).unwrap(); - f.write(plans[0].to_string().as_bytes()).unwrap(); - log::info!("plan saved at {:?}", path); - log::info!("seed was {}", seed); env.io.print_stats(); + + // Print the seed, the locations of the database and the plan file at the end again for easily accessing them. + log::info!("database path: {:?}", db_path); + log::info!("simulator plan path: {:?}", plan_path); + log::info!("seed: {}", seed); } fn execute_plans(env: &mut SimulatorEnv, plans: &mut Vec) -> Result<()> { From 833c75080bcba100ca2007868ea944e76d1ec65e Mon Sep 17 00:00:00 2001 From: alpaylan Date: Sun, 22 Dec 2024 17:16:50 -0500 Subject: [PATCH 087/144] break up the simulator primitives into their own files in the simulator submodule --- simulator/main.rs | 238 ++---------------------------------- simulator/simulator/cli.rs | 24 ++++ simulator/simulator/env.rs | 38 ++++++ simulator/simulator/file.rs | 93 ++++++++++++++ simulator/simulator/io.rs | 90 ++++++++++++++ simulator/simulator/mod.rs | 4 + 6 files changed, 257 insertions(+), 230 deletions(-) create mode 100644 simulator/simulator/cli.rs create mode 100644 simulator/simulator/env.rs create mode 100644 simulator/simulator/file.rs create mode 100644 simulator/simulator/io.rs create mode 100644 simulator/simulator/mod.rs diff --git a/simulator/main.rs b/simulator/main.rs index 154fa76e2..6a09e4f4a 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -1,73 +1,24 @@ -use clap::{command, Parser}; +use clap::Parser; use generation::plan::{Interaction, InteractionPlan, ResultSet}; -use generation::{pick, pick_index, Arbitrary, ArbitraryFrom}; -use limbo_core::{Connection, Database, File, OpenFlags, PlatformIO, Result, RowResult, IO}; -use model::query::{Create, Insert, Predicate, Query, Select}; +use generation::{pick_index, Arbitrary, ArbitraryFrom}; +use limbo_core::{Connection, Database, Result, RowResult, IO}; +use model::query::{Create, Query}; use model::table::{Column, Name, Table, Value}; -use properties::{property_insert_select, property_select_all}; use rand::prelude::*; use rand_chacha::ChaCha8Rng; -use std::cell::RefCell; +use simulator::cli::SimulatorCLI; +use simulator::env::{SimConnection, SimulatorEnv, SimulatorOpts}; +use simulator::io::SimulatorIO; use std::io::Write; -use std::panic::UnwindSafe; use std::path::Path; use std::rc::Rc; use std::sync::Arc; -use std::time::Duration; use tempfile::TempDir; mod generation; mod model; mod properties; - -struct SimulatorEnv { - opts: SimulatorOpts, - tables: Vec

, - connections: Vec, - io: Arc, - db: Arc, - rng: ChaCha8Rng, -} - -impl UnwindSafe for SimulatorEnv {} - -#[derive(Clone)] -enum SimConnection { - Connected(Rc), - Disconnected, -} - -#[derive(Debug, Clone)] -struct SimulatorOpts { - ticks: usize, - max_connections: usize, - max_tables: usize, - // this next options are the distribution of workload where read_percent + write_percent + - // delete_percent == 100% - read_percent: usize, - write_percent: usize, - delete_percent: usize, - max_interactions: usize, - page_size: usize, -} - -#[derive(Parser)] -#[command(name = "limbo-simulator")] -#[command(author, version, about, long_about = None)] -pub struct SimulatorCLI { - #[clap(short, long, help = "set seed for reproducible runs", default_value = None)] - pub seed: Option, - #[clap(short, long, help = "set custom output directory for produced files", default_value = None)] - pub output_dir: Option, - #[clap( - short, - long, - help = "enable doublechecking, run the simulator with the plan twice and check output equality" - )] - pub doublecheck: bool, - #[clap(short, long, help = "change the maximum size of the randomly generated sequence of interactions", default_value_t = 1024)] - pub maximum_size: usize, -} +mod simulator; #[allow(clippy::arc_with_non_send_sync)] fn main() { @@ -340,176 +291,3 @@ fn get_all_rows( } Ok(out) } - -struct SimulatorIO { - inner: Box, - fault: RefCell, - files: RefCell>>, - rng: RefCell, - nr_run_once_faults: RefCell, - page_size: usize, -} - -impl SimulatorIO { - fn new(seed: u64, page_size: usize) -> Result { - let inner = Box::new(PlatformIO::new()?); - let fault = RefCell::new(false); - let files = RefCell::new(Vec::new()); - let rng = RefCell::new(ChaCha8Rng::seed_from_u64(seed)); - let nr_run_once_faults = RefCell::new(0); - Ok(Self { - inner, - fault, - files, - rng, - nr_run_once_faults, - page_size, - }) - } - - fn inject_fault(&self, fault: bool) { - self.fault.replace(fault); - for file in self.files.borrow().iter() { - file.inject_fault(fault); - } - } - - fn print_stats(&self) { - println!("run_once faults: {}", self.nr_run_once_faults.borrow()); - for file in self.files.borrow().iter() { - file.print_stats(); - } - } -} - -impl IO for SimulatorIO { - fn open_file( - &self, - path: &str, - flags: OpenFlags, - _direct: bool, - ) -> Result> { - let inner = self.inner.open_file(path, flags, false)?; - let file = Rc::new(SimulatorFile { - inner, - fault: RefCell::new(false), - nr_pread_faults: RefCell::new(0), - nr_pwrite_faults: RefCell::new(0), - reads: RefCell::new(0), - writes: RefCell::new(0), - syncs: RefCell::new(0), - page_size: self.page_size, - }); - self.files.borrow_mut().push(file.clone()); - Ok(file) - } - - fn run_once(&self) -> Result<()> { - if *self.fault.borrow() { - *self.nr_run_once_faults.borrow_mut() += 1; - return Err(limbo_core::LimboError::InternalError( - "Injected fault".into(), - )); - } - self.inner.run_once().unwrap(); - Ok(()) - } - - fn generate_random_number(&self) -> i64 { - self.rng.borrow_mut().next_u64() as i64 - } - - fn get_current_time(&self) -> String { - "2024-01-01 00:00:00".to_string() - } -} - -struct SimulatorFile { - inner: Rc, - fault: RefCell, - nr_pread_faults: RefCell, - nr_pwrite_faults: RefCell, - writes: RefCell, - reads: RefCell, - syncs: RefCell, - page_size: usize, -} - -impl SimulatorFile { - fn inject_fault(&self, fault: bool) { - self.fault.replace(fault); - } - - fn print_stats(&self) { - println!( - "pread faults: {}, pwrite faults: {}, reads: {}, writes: {}, syncs: {}", - *self.nr_pread_faults.borrow(), - *self.nr_pwrite_faults.borrow(), - *self.reads.borrow(), - *self.writes.borrow(), - *self.syncs.borrow(), - ); - } -} - -impl limbo_core::File for SimulatorFile { - fn lock_file(&self, exclusive: bool) -> Result<()> { - if *self.fault.borrow() { - return Err(limbo_core::LimboError::InternalError( - "Injected fault".into(), - )); - } - self.inner.lock_file(exclusive) - } - - fn unlock_file(&self) -> Result<()> { - if *self.fault.borrow() { - return Err(limbo_core::LimboError::InternalError( - "Injected fault".into(), - )); - } - self.inner.unlock_file() - } - - fn pread(&self, pos: usize, c: Rc) -> Result<()> { - if *self.fault.borrow() { - *self.nr_pread_faults.borrow_mut() += 1; - return Err(limbo_core::LimboError::InternalError( - "Injected fault".into(), - )); - } - *self.reads.borrow_mut() += 1; - self.inner.pread(pos, c) - } - - fn pwrite( - &self, - pos: usize, - buffer: Rc>, - c: Rc, - ) -> Result<()> { - if *self.fault.borrow() { - *self.nr_pwrite_faults.borrow_mut() += 1; - return Err(limbo_core::LimboError::InternalError( - "Injected fault".into(), - )); - } - *self.writes.borrow_mut() += 1; - self.inner.pwrite(pos, buffer, c) - } - - fn sync(&self, c: Rc) -> Result<()> { - *self.syncs.borrow_mut() += 1; - self.inner.sync(c) - } - - fn size(&self) -> Result { - self.inner.size() - } -} - -impl Drop for SimulatorFile { - fn drop(&mut self) { - self.inner.unlock_file().expect("Failed to unlock file"); - } -} diff --git a/simulator/simulator/cli.rs b/simulator/simulator/cli.rs new file mode 100644 index 000000000..f977937bb --- /dev/null +++ b/simulator/simulator/cli.rs @@ -0,0 +1,24 @@ +use clap::{command, Parser}; + +#[derive(Parser)] +#[command(name = "limbo-simulator")] +#[command(author, version, about, long_about = None)] +pub struct SimulatorCLI { + #[clap(short, long, help = "set seed for reproducible runs", default_value = None)] + pub seed: Option, + #[clap(short, long, help = "set custom output directory for produced files", default_value = None)] + pub output_dir: Option, + #[clap( + short, + long, + help = "enable doublechecking, run the simulator with the plan twice and check output equality" + )] + pub doublecheck: bool, + #[clap( + short, + long, + help = "change the maximum size of the randomly generated sequence of interactions", + default_value_t = 1024 + )] + pub maximum_size: usize, +} diff --git a/simulator/simulator/env.rs b/simulator/simulator/env.rs new file mode 100644 index 000000000..0107ac501 --- /dev/null +++ b/simulator/simulator/env.rs @@ -0,0 +1,38 @@ +use std::rc::Rc; +use std::sync::Arc; + +use limbo_core::{Connection, Database}; +use rand_chacha::ChaCha8Rng; + +use crate::model::table::Table; + +use crate::simulator::io::SimulatorIO; + +pub(crate) struct SimulatorEnv { + pub(crate) opts: SimulatorOpts, + pub(crate) tables: Vec
, + pub(crate) connections: Vec, + pub(crate) io: Arc, + pub(crate) db: Arc, + pub(crate) rng: ChaCha8Rng, +} + +#[derive(Clone)] +pub(crate) enum SimConnection { + Connected(Rc), + Disconnected, +} + +#[derive(Debug, Clone)] +pub(crate) struct SimulatorOpts { + pub(crate) ticks: usize, + pub(crate) max_connections: usize, + pub(crate) max_tables: usize, + // this next options are the distribution of workload where read_percent + write_percent + + // delete_percent == 100% + pub(crate) read_percent: usize, + pub(crate) write_percent: usize, + pub(crate) delete_percent: usize, + pub(crate) max_interactions: usize, + pub(crate) page_size: usize, +} diff --git a/simulator/simulator/file.rs b/simulator/simulator/file.rs new file mode 100644 index 000000000..7f3fe9072 --- /dev/null +++ b/simulator/simulator/file.rs @@ -0,0 +1,93 @@ +use std::{cell::RefCell, rc::Rc}; + +use limbo_core::{File, Result}; + +pub(crate) struct SimulatorFile { + pub(crate) inner: Rc, + pub(crate) fault: RefCell, + pub(crate) nr_pread_faults: RefCell, + pub(crate) nr_pwrite_faults: RefCell, + pub(crate) writes: RefCell, + pub(crate) reads: RefCell, + pub(crate) syncs: RefCell, + pub(crate) page_size: usize, +} + +impl SimulatorFile { + pub(crate) fn inject_fault(&self, fault: bool) { + self.fault.replace(fault); + } + + pub(crate) fn print_stats(&self) { + println!( + "pread faults: {}, pwrite faults: {}, reads: {}, writes: {}, syncs: {}", + *self.nr_pread_faults.borrow(), + *self.nr_pwrite_faults.borrow(), + *self.reads.borrow(), + *self.writes.borrow(), + *self.syncs.borrow(), + ); + } +} + +impl limbo_core::File for SimulatorFile { + fn lock_file(&self, exclusive: bool) -> Result<()> { + if *self.fault.borrow() { + return Err(limbo_core::LimboError::InternalError( + "Injected fault".into(), + )); + } + self.inner.lock_file(exclusive) + } + + fn unlock_file(&self) -> Result<()> { + if *self.fault.borrow() { + return Err(limbo_core::LimboError::InternalError( + "Injected fault".into(), + )); + } + self.inner.unlock_file() + } + + fn pread(&self, pos: usize, c: Rc) -> Result<()> { + if *self.fault.borrow() { + *self.nr_pread_faults.borrow_mut() += 1; + return Err(limbo_core::LimboError::InternalError( + "Injected fault".into(), + )); + } + *self.reads.borrow_mut() += 1; + self.inner.pread(pos, c) + } + + fn pwrite( + &self, + pos: usize, + buffer: Rc>, + c: Rc, + ) -> Result<()> { + if *self.fault.borrow() { + *self.nr_pwrite_faults.borrow_mut() += 1; + return Err(limbo_core::LimboError::InternalError( + "Injected fault".into(), + )); + } + *self.writes.borrow_mut() += 1; + self.inner.pwrite(pos, buffer, c) + } + + fn sync(&self, c: Rc) -> Result<()> { + *self.syncs.borrow_mut() += 1; + self.inner.sync(c) + } + + fn size(&self) -> Result { + self.inner.size() + } +} + +impl Drop for SimulatorFile { + fn drop(&mut self) { + self.inner.unlock_file().expect("Failed to unlock file"); + } +} diff --git a/simulator/simulator/io.rs b/simulator/simulator/io.rs new file mode 100644 index 000000000..e6b96218e --- /dev/null +++ b/simulator/simulator/io.rs @@ -0,0 +1,90 @@ +use std::{cell::RefCell, rc::Rc}; + +use limbo_core::{OpenFlags, PlatformIO, Result, IO}; +use rand::{RngCore, SeedableRng}; +use rand_chacha::ChaCha8Rng; + +use crate::simulator::file::SimulatorFile; + +pub(crate) struct SimulatorIO { + pub(crate) inner: Box, + pub(crate) fault: RefCell, + pub(crate) files: RefCell>>, + pub(crate) rng: RefCell, + pub(crate) nr_run_once_faults: RefCell, + pub(crate) page_size: usize, +} + +impl SimulatorIO { + pub(crate) fn new(seed: u64, page_size: usize) -> Result { + let inner = Box::new(PlatformIO::new()?); + let fault = RefCell::new(false); + let files = RefCell::new(Vec::new()); + let rng = RefCell::new(ChaCha8Rng::seed_from_u64(seed)); + let nr_run_once_faults = RefCell::new(0); + Ok(Self { + inner, + fault, + files, + rng, + nr_run_once_faults, + page_size, + }) + } + + pub(crate) fn inject_fault(&self, fault: bool) { + self.fault.replace(fault); + for file in self.files.borrow().iter() { + file.inject_fault(fault); + } + } + + pub(crate) fn print_stats(&self) { + println!("run_once faults: {}", self.nr_run_once_faults.borrow()); + for file in self.files.borrow().iter() { + file.print_stats(); + } + } +} + +impl IO for SimulatorIO { + fn open_file( + &self, + path: &str, + flags: OpenFlags, + _direct: bool, + ) -> Result> { + let inner = self.inner.open_file(path, flags, false)?; + let file = Rc::new(SimulatorFile { + inner, + fault: RefCell::new(false), + nr_pread_faults: RefCell::new(0), + nr_pwrite_faults: RefCell::new(0), + reads: RefCell::new(0), + writes: RefCell::new(0), + syncs: RefCell::new(0), + page_size: self.page_size, + }); + self.files.borrow_mut().push(file.clone()); + Ok(file) + } + + fn run_once(&self) -> Result<()> { + if *self.fault.borrow() { + *self.nr_run_once_faults.borrow_mut() += 1; + return Err(limbo_core::LimboError::InternalError( + "Injected fault".into(), + )); + } + self.inner.run_once().unwrap(); + Ok(()) + } + + fn generate_random_number(&self) -> i64 { + self.rng.borrow_mut().next_u64() as i64 + } + + fn get_current_time(&self) -> String { + "2024-01-01 00:00:00".to_string() + } +} diff --git a/simulator/simulator/mod.rs b/simulator/simulator/mod.rs new file mode 100644 index 000000000..d5f06c103 --- /dev/null +++ b/simulator/simulator/mod.rs @@ -0,0 +1,4 @@ +pub mod cli; +pub mod env; +pub mod file; +pub mod io; From 357ab551a5d0243d5c9252c971e3ac81fbace62b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=84=A0=EC=9A=B0?= Date: Mon, 23 Dec 2024 07:42:12 +0900 Subject: [PATCH 088/144] nit --- core/translate/planner.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/core/translate/planner.rs b/core/translate/planner.rs index f9b941571..70df2eee9 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -490,7 +490,6 @@ pub fn prepare_delete_plan( tbl_name: &QualifiedName, where_clause: Option, ) -> Result { - // let table_name = tbl_name.name.0.clone(); let table = match schema.get_table(tbl_name.name.0.as_str()) { Some(table) => table, None => crate::bail_corrupt_error!("Parse error: no such table: {}", tbl_name), From fbf42458b89f3f89c25e1f23c6255453ca1b8220 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Sun, 22 Dec 2024 21:43:29 -0500 Subject: [PATCH 089/144] Use custom expr equality check in translation and planner --- core/translate/expr.rs | 7 ++----- core/translate/planner.rs | 12 ++++++++++-- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 363d96a99..94a61beaa 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -4,7 +4,7 @@ use sqlite3_parser::ast::{self, UnaryOperator}; use crate::function::JsonFunc; use crate::function::{AggFunc, Func, FuncCtx, MathFuncArity, ScalarFunc}; use crate::schema::Type; -use crate::util::normalize_ident; +use crate::util::{exprs_are_equivalent, normalize_ident}; use crate::vdbe::{builder::ProgramBuilder, BranchOffset, Insn}; use crate::Result; @@ -554,10 +554,7 @@ pub fn translate_expr( ) -> Result { if let Some(precomputed_exprs_to_registers) = precomputed_exprs_to_registers { for (precomputed_expr, reg) in precomputed_exprs_to_registers.iter() { - // TODO: implement a custom equality check for expressions - // there are lots of examples where this breaks, even simple ones like - // sum(x) != SUM(x) - if expr == *precomputed_expr { + if exprs_are_equivalent(expr, precomputed_expr) { program.emit_insn(Insn::Copy { src_reg: *reg, dst_reg: target_register, diff --git a/core/translate/planner.rs b/core/translate/planner.rs index 14757e00a..0bdc447f3 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -4,7 +4,12 @@ use super::{ Aggregate, BTreeTableReference, Direction, GroupBy, Plan, ResultSetColumn, SourceOperator, }, }; -use crate::{function::Func, schema::Schema, util::normalize_ident, Result}; +use crate::{ + function::Func, + schema::Schema, + util::{exprs_are_equivalent, normalize_ident}, + Result, +}; use sqlite3_parser::ast::{self, FromClause, JoinType, ResultColumn}; pub struct OperatorIdCounter { @@ -23,7 +28,10 @@ impl OperatorIdCounter { } fn resolve_aggregates(expr: &ast::Expr, aggs: &mut Vec) -> bool { - if aggs.iter().any(|a| a.original_expr == *expr) { + if aggs + .iter() + .any(|a| exprs_are_equivalent(&a.original_expr, expr)) + { return true; } match expr { From 4f07342fdcc107728f4d87c072edd370aba9c29e Mon Sep 17 00:00:00 2001 From: alpaylan Date: Sun, 22 Dec 2024 23:25:35 -0500 Subject: [PATCH 090/144] catch panics, add doublecheck --- simulator/main.rs | 116 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 104 insertions(+), 12 deletions(-) diff --git a/simulator/main.rs b/simulator/main.rs index 6a09e4f4a..51a75de96 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -9,6 +9,7 @@ use rand_chacha::ChaCha8Rng; use simulator::cli::SimulatorCLI; use simulator::env::{SimConnection, SimulatorEnv, SimulatorOpts}; use simulator::io::SimulatorIO; +use std::backtrace::Backtrace; use std::io::Write; use std::path::Path; use std::rc::Rc; @@ -24,15 +25,15 @@ mod simulator; fn main() { let _ = env_logger::try_init(); - let opts = SimulatorCLI::parse(); + let cli_opts = SimulatorCLI::parse(); - let seed = match opts.seed { + let seed = match cli_opts.seed { Some(seed) => seed, None => rand::thread_rng().next_u64(), }; - let output_dir = match opts.output_dir { - Some(dir) => Path::new(&dir).to_path_buf(), + let output_dir = match &cli_opts.output_dir { + Some(dir) => Path::new(dir).to_path_buf(), None => TempDir::new().unwrap().into_path(), }; @@ -44,6 +45,98 @@ fn main() { log::info!("simulator plan path: {:?}", plan_path); log::info!("seed: {}", seed); + std::panic::set_hook(Box::new(move |info| { + log::error!("panic occurred"); + + let payload = info.payload(); + if let Some(s) = payload.downcast_ref::<&str>() { + log::error!("{}", s); + } else if let Some(s) = payload.downcast_ref::() { + log::error!("{}", s); + } else { + log::error!("unknown panic payload"); + } + + let bt = Backtrace::force_capture(); + log::error!("captured backtrace:\n{}", bt); + })); + + let result = std::panic::catch_unwind(|| run_simulation(seed, &cli_opts, &db_path, &plan_path)); + + if cli_opts.doublecheck { + // Move the old database and plan file to a new location + let old_db_path = db_path.with_extension("_old.db"); + let old_plan_path = plan_path.with_extension("_old.plan"); + + std::fs::rename(&db_path, &old_db_path).unwrap(); + std::fs::rename(&plan_path, &old_plan_path).unwrap(); + + // Run the simulation again + let result2 = + std::panic::catch_unwind(|| run_simulation(seed, &cli_opts, &db_path, &plan_path)); + + match (result, result2) { + (Ok(Ok(_)), Err(_)) => { + log::error!("doublecheck failed! first run succeeded, but second run panicked."); + } + (Ok(Err(_)), Err(_)) => { + log::error!( + "doublecheck failed! first run failed assertion, but second run panicked." + ); + } + (Err(_), Ok(Ok(_))) => { + log::error!("doublecheck failed! first run panicked, but second run succeeded."); + } + (Err(_), Ok(Err(_))) => { + log::error!( + "doublecheck failed! first run panicked, but second run failed assertion." + ); + } + (Ok(Ok(_)), Ok(Err(_))) => { + log::error!( + "doublecheck failed! first run succeeded, but second run failed assertion." + ); + } + (Ok(Err(_)), Ok(Ok(_))) => { + log::error!( + "doublecheck failed! first run failed assertion, but second run succeeded." + ); + } + (Err(_), Err(_)) | (Ok(_), Ok(_)) => { + // Compare the two database files byte by byte + let old_db = std::fs::read(&old_db_path).unwrap(); + let new_db = std::fs::read(&db_path).unwrap(); + if old_db != new_db { + log::error!("doublecheck failed! database files are different."); + } else { + log::info!("doublecheck succeeded! database files are the same."); + } + } + } + + // Move the new database and plan file to a new location + let new_db_path = db_path.with_extension("_double.db"); + let new_plan_path = plan_path.with_extension("_double.plan"); + + std::fs::rename(&db_path, &new_db_path).unwrap(); + std::fs::rename(&plan_path, &new_plan_path).unwrap(); + + // Move the old database and plan file back + std::fs::rename(&old_db_path, &db_path).unwrap(); + std::fs::rename(&old_plan_path, &plan_path).unwrap(); + } + // Print the seed, the locations of the database and the plan file at the end again for easily accessing them. + log::info!("database path: {:?}", db_path); + log::info!("simulator plan path: {:?}", plan_path); + log::info!("seed: {}", seed); +} + +fn run_simulation( + seed: u64, + cli_opts: &SimulatorCLI, + db_path: &Path, + plan_path: &Path, +) -> Result<()> { let mut rng = ChaCha8Rng::seed_from_u64(seed); let (read_percent, write_percent, delete_percent) = { @@ -57,7 +150,7 @@ fn main() { }; let opts = SimulatorOpts { - ticks: rng.gen_range(0..opts.maximum_size), + ticks: rng.gen_range(0..cli_opts.maximum_size), max_connections: 1, // TODO: for now let's use one connection as we didn't implement // correct transactions procesing max_tables: rng.gen_range(0..128), @@ -65,7 +158,7 @@ fn main() { write_percent, delete_percent, page_size: 4096, // TODO: randomize this too - max_interactions: rng.gen_range(0..opts.maximum_size), + max_interactions: rng.gen_range(0..cli_opts.maximum_size), }; let io = Arc::new(SimulatorIO::new(seed, opts.page_size).unwrap()); @@ -92,7 +185,7 @@ fn main() { .map(|_| InteractionPlan::arbitrary_from(&mut env.rng.clone(), &env)) .collect::>(); - let mut f = std::fs::File::create(plan_path.clone()).unwrap(); + let mut f = std::fs::File::create(plan_path).unwrap(); // todo: create a detailed plan file with all the plans. for now, we only use 1 connection, so it's safe to use the first plan. f.write(plans[0].to_string().as_bytes()).unwrap(); @@ -102,15 +195,14 @@ fn main() { let result = execute_plans(&mut env, &mut plans); if result.is_err() { - log::error!("error executing plans: {:?}", result.err()); + log::error!("error executing plans: {:?}", result.as_ref().err()); } env.io.print_stats(); - // Print the seed, the locations of the database and the plan file at the end again for easily accessing them. - log::info!("database path: {:?}", db_path); - log::info!("simulator plan path: {:?}", plan_path); - log::info!("seed: {}", seed); + log::info!("Simulation completed"); + + result } fn execute_plans(env: &mut SimulatorEnv, plans: &mut Vec) -> Result<()> { From c417fe788083f2dfa20970c9954facaabd9cf654 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Mon, 23 Dec 2024 21:12:40 +0200 Subject: [PATCH 091/144] add link to sqlite source about payload_overflows() --- core/storage/btree.rs | 1 + core/storage/sqlite3_ondisk.rs | 1 + 2 files changed, 2 insertions(+) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 08ec9c855..05dacb53d 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1494,6 +1494,7 @@ impl BTreeCursor { log::debug!("fill_cell_payload(overflow)"); let payload_overflow_threshold_min = self.payload_overflow_threshold_min(page_type); + // see e.g. https://github.com/sqlite/sqlite/blob/9591d3fe93936533c8c3b0dc4d025ac999539e11/src/dbstat.c#L371 let mut space_left = payload_overflow_threshold_min + (record_buf.len() - payload_overflow_threshold_min) % (self.usable_space() - 4); diff --git a/core/storage/sqlite3_ondisk.rs b/core/storage/sqlite3_ondisk.rs index 8af1c7eba..f6a7fbe2e 100644 --- a/core/storage/sqlite3_ondisk.rs +++ b/core/storage/sqlite3_ondisk.rs @@ -1274,6 +1274,7 @@ pub fn begin_write_wal_header(io: &Rc, header: &WalHeader) -> Result<( /// Checks if payload will overflow a cell based on the maximum allowed size. /// It will return the min size that will be stored in that case, /// including overflow pointer +/// see e.g. https://github.com/sqlite/sqlite/blob/9591d3fe93936533c8c3b0dc4d025ac999539e11/src/dbstat.c#L371 pub fn payload_overflows( payload_size: usize, payload_overflow_threshold_max: usize, From 40a0bef0dc7a093b5a6781f4c29d93c11ba5779e Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Mon, 23 Dec 2024 21:15:44 +0200 Subject: [PATCH 092/144] better fixme comments --- core/storage/btree.rs | 7 +++++-- core/storage/sqlite3_ondisk.rs | 2 ++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 05dacb53d..3f6822872 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -795,9 +795,12 @@ impl BTreeCursor { // if we clear space that is at the start of the cell content area, // we need to update the cell content area pointer forward to account for the removed space - // FIXME: is offset ever < cell_content_area? + // FIXME: is offset ever < cell_content_area? cell content area grows leftwards and the pointer + // is to the start of the last allocated cell. should we assert!(offset >= page.cell_content_area()) + // and change this to if offset == page.cell_content_area()? if offset <= page.cell_content_area() { - page.write_u16(PAGE_HEADER_OFFSET_FREEBLOCK, page.first_freeblock()); // why is this here? + // FIXME: remove the line directly below this, it does not change anything. + page.write_u16(PAGE_HEADER_OFFSET_FREEBLOCK, page.first_freeblock()); page.write_u16(PAGE_HEADER_OFFSET_CELL_CONTENT_AREA, offset + len); return; } diff --git a/core/storage/sqlite3_ondisk.rs b/core/storage/sqlite3_ondisk.rs index f6a7fbe2e..e92371e05 100644 --- a/core/storage/sqlite3_ondisk.rs +++ b/core/storage/sqlite3_ondisk.rs @@ -484,6 +484,8 @@ impl PageContent { } /// The start of the cell content area. + /// SQLite strives to place cells as far toward the end of the b-tree page as it can, + /// in order to leave space for future growth of the cell pointer array. pub fn cell_content_area(&self) -> u16 { self.read_u16(5) } From 6a287ae1a98d99a54acca98de4cf70d22eb57dbe Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Mon, 23 Dec 2024 21:31:42 +0200 Subject: [PATCH 093/144] add comment about cell_content_area 0 value meaning u16::MAX --- core/storage/btree.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 3f6822872..413a72744 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1405,6 +1405,15 @@ impl BTreeCursor { let usable_space = (db_header.page_size - db_header.reserved_space as u16) as usize; let mut first_byte_in_cell_content = page.cell_content_area(); + // A zero value for the cell content area pointer is interpreted as 65536. + // See https://www.sqlite.org/fileformat.html + // The max page size for a sqlite database is 64kiB i.e. 65536 bytes. + // 65536 is u16::MAX + 1, and since cell content grows from right to left, this means + // the cell content area pointer is at the end of the page, + // i.e. + // 1. the page size is 64kiB + // 2. there are no cells on the page + // 3. there is no reserved space at the end of the page if first_byte_in_cell_content == 0 { first_byte_in_cell_content = u16::MAX; } From 9ea4c95ee162812244bb6cd39e992320af341ad4 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Mon, 23 Dec 2024 22:07:20 +0200 Subject: [PATCH 094/144] even more comments --- core/storage/btree.rs | 30 ++++++++++++++++++++++-------- core/storage/sqlite3_ondisk.rs | 9 +++++++++ 2 files changed, 31 insertions(+), 8 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 413a72744..bdd27932b 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -24,12 +24,22 @@ use super::sqlite3_ondisk::{ /// type of btree page -> u8 const PAGE_HEADER_OFFSET_PAGE_TYPE: usize = 0; /// pointer to first freeblock -> u16 -const PAGE_HEADER_OFFSET_FREEBLOCK: usize = 1; +/// The second field of the b-tree page header is the offset of the first freeblock, or zero if there are no freeblocks on the page. +/// A freeblock is a structure used to identify unallocated space within a b-tree page. +/// Freeblocks are organized as a chain. +/// +/// To be clear, freeblocks do not mean the regular unallocated free space to the left of the cell content area pointer, but instead +/// blocks of at least 4 bytes WITHIN the cell content area that are not in use due to e.g. deletions. +const PAGE_HEADER_OFFSET_FIRST_FREEBLOCK: usize = 1; /// number of cells in the page -> u16 const PAGE_HEADER_OFFSET_CELL_COUNT: usize = 3; /// pointer to first byte of cell allocated content from top -> u16 +/// SQLite strives to place cells as far toward the end of the b-tree page as it can, +/// in order to leave space for future growth of the cell pointer array. +/// = the cell content area pointer moves leftward as cells are added to the page const PAGE_HEADER_OFFSET_CELL_CONTENT_AREA: usize = 5; /// number of fragmented bytes -> u8 +/// Fragments are isolated groups of 1, 2, or 3 unused bytes within the cell content area. const PAGE_HEADER_OFFSET_FRAGMENTED_BYTES_COUNT: usize = 7; /// if internalnode, pointer right most pointer (saved separately from cells) -> u32 const PAGE_HEADER_OFFSET_RIGHTMOST_PTR: usize = 8; @@ -779,7 +789,7 @@ impl BTreeCursor { if page.first_freeblock() == 0 { page.write_u16(offset as usize, 0); // next freeblock = null page.write_u16(offset as usize + 2, len); // size of this freeblock - page.write_u16(PAGE_HEADER_OFFSET_FREEBLOCK, offset); // first freeblock in page = this block + page.write_u16(PAGE_HEADER_OFFSET_FIRST_FREEBLOCK, offset); // first freeblock in page = this block return; } let first_block = page.first_freeblock(); @@ -789,7 +799,7 @@ impl BTreeCursor { if offset < first_block { page.write_u16(offset as usize, first_block); // next freeblock = previous first freeblock page.write_u16(offset as usize + 2, len); // size of this freeblock - page.write_u16(PAGE_HEADER_OFFSET_FREEBLOCK, offset); // first freeblock in page = this block + page.write_u16(PAGE_HEADER_OFFSET_FIRST_FREEBLOCK, offset); // first freeblock in page = this block return; } @@ -800,7 +810,7 @@ impl BTreeCursor { // and change this to if offset == page.cell_content_area()? if offset <= page.cell_content_area() { // FIXME: remove the line directly below this, it does not change anything. - page.write_u16(PAGE_HEADER_OFFSET_FREEBLOCK, page.first_freeblock()); + page.write_u16(PAGE_HEADER_OFFSET_FIRST_FREEBLOCK, page.first_freeblock()); page.write_u16(PAGE_HEADER_OFFSET_CELL_CONTENT_AREA, offset + len); return; } @@ -1015,7 +1025,7 @@ impl BTreeCursor { assert!(page.is_dirty()); let contents = page.get().contents.as_mut().unwrap(); - contents.write_u16(PAGE_HEADER_OFFSET_FREEBLOCK, 0); + contents.write_u16(PAGE_HEADER_OFFSET_FIRST_FREEBLOCK, 0); contents.write_u16(PAGE_HEADER_OFFSET_CELL_COUNT, 0); let db_header = RefCell::borrow(&self.database_header); @@ -1389,7 +1399,7 @@ impl BTreeCursor { // set new first byte of cell content page.write_u16(PAGE_HEADER_OFFSET_CELL_CONTENT_AREA, cbrk as u16); // set free block to 0, unused spaced can be retrieved from gap between cell pointer end and content start - page.write_u16(PAGE_HEADER_OFFSET_FREEBLOCK, 0); + page.write_u16(PAGE_HEADER_OFFSET_FIRST_FREEBLOCK, 0); // set unused space to 0 let first_cell = cloned_page.cell_content_area() as u64; assert!(first_cell <= cbrk); @@ -1426,12 +1436,16 @@ impl BTreeCursor { let child_pointer_size = if page.is_leaf() { 0 } else { 4 }; let first_cell = (page.offset + 8 + child_pointer_size + (2 * ncell)) as u16; + // The amount of free space is the sum of: + // 1. 0..first_byte_in_cell_content (everything to the left of the cell content area pointer is unused free space) + // 2. fragmented_free_bytes. let mut nfree = fragmented_free_bytes as usize + first_byte_in_cell_content as usize; let mut pc = free_block_pointer as usize; if pc > 0 { if pc < first_byte_in_cell_content as usize { - // corrupt + // Freeblocks exist in the cell content area e.g. after deletions + // They should never exist in the unused area of the page. todo!("corrupted page"); } @@ -1926,7 +1940,7 @@ pub fn btree_init_page( contents.offset = offset; let id = page_type as u8; contents.write_u8(PAGE_HEADER_OFFSET_PAGE_TYPE, id); - contents.write_u16(PAGE_HEADER_OFFSET_FREEBLOCK, 0); + contents.write_u16(PAGE_HEADER_OFFSET_FIRST_FREEBLOCK, 0); contents.write_u16(PAGE_HEADER_OFFSET_CELL_COUNT, 0); let cell_content_area_start = db_header.page_size - db_header.reserved_space as u16; diff --git a/core/storage/sqlite3_ondisk.rs b/core/storage/sqlite3_ondisk.rs index e92371e05..ca7c62f6a 100644 --- a/core/storage/sqlite3_ondisk.rs +++ b/core/storage/sqlite3_ondisk.rs @@ -475,6 +475,12 @@ impl PageContent { buf[self.offset + pos..self.offset + pos + 4].copy_from_slice(&value.to_be_bytes()); } + /// The second field of the b-tree page header is the offset of the first freeblock, or zero if there are no freeblocks on the page. + /// A freeblock is a structure used to identify unallocated space within a b-tree page. + /// Freeblocks are organized as a chain. + /// + /// To be clear, freeblocks do not mean the regular unallocated free space to the left of the cell content area pointer, but instead + /// blocks of at least 4 bytes WITHIN the cell content area that are not in use due to e.g. deletions. pub fn first_freeblock(&self) -> u16 { self.read_u16(1) } @@ -486,10 +492,13 @@ impl PageContent { /// The start of the cell content area. /// SQLite strives to place cells as far toward the end of the b-tree page as it can, /// in order to leave space for future growth of the cell pointer array. + /// = the cell content area pointer moves leftward as cells are added to the page pub fn cell_content_area(&self) -> u16 { self.read_u16(5) } + /// The total number of bytes in all fragments is stored in the fifth field of the b-tree page header. + /// Fragments are isolated groups of 1, 2, or 3 unused bytes within the cell content area. pub fn num_frag_free_bytes(&self) -> u8 { self.read_u8(7) } From 668a0ecae8315454f6904bb3a11a1041e92e7243 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Mon, 23 Dec 2024 22:18:22 +0200 Subject: [PATCH 095/144] comment about page header size difference between page types --- core/storage/sqlite3_ondisk.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/storage/sqlite3_ondisk.rs b/core/storage/sqlite3_ondisk.rs index ca7c62f6a..2cb10efaf 100644 --- a/core/storage/sqlite3_ondisk.rs +++ b/core/storage/sqlite3_ondisk.rs @@ -524,6 +524,8 @@ impl PageContent { let buf = self.as_ptr(); let ncells = self.cell_count(); + // the page header is 12 bytes for interior pages, 8 bytes for leaf pages + // this is because the 4 last bytes in the interior page's header are used for the rightmost pointer. let cell_start = match self.page_type() { PageType::IndexInterior => 12, PageType::TableInterior => 12, @@ -545,7 +547,6 @@ impl PageContent { ) } - /// When using this fu pub fn cell_get_raw_pointer_region(&self) -> (usize, usize) { let cell_start = match self.page_type() { PageType::IndexInterior => 12, From 81526089a4580086df886762a524f865653de534 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Mon, 23 Dec 2024 22:26:49 +0200 Subject: [PATCH 096/144] add comment about cell_get_raw_pointer_region() --- core/storage/sqlite3_ondisk.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/core/storage/sqlite3_ondisk.rs b/core/storage/sqlite3_ondisk.rs index 2cb10efaf..2d5eb31d2 100644 --- a/core/storage/sqlite3_ondisk.rs +++ b/core/storage/sqlite3_ondisk.rs @@ -546,7 +546,12 @@ impl PageContent { usable_size, ) } - + /// The cell pointer array of a b-tree page immediately follows the b-tree page header. + /// Let K be the number of cells on the btree. + /// The cell pointer array consists of K 2-byte integer offsets to the cell contents. + /// The cell pointers are arranged in key order with: + /// - left-most cell (the cell with the smallest key) first and + /// - the right-most cell (the cell with the largest key) last. pub fn cell_get_raw_pointer_region(&self) -> (usize, usize) { let cell_start = match self.page_type() { PageType::IndexInterior => 12, From 17440393f50d8890cbdb91b3ca48a8184191252b Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Mon, 23 Dec 2024 22:30:05 +0200 Subject: [PATCH 097/144] rename cell_start to cell_pointer_array_start --- core/storage/sqlite3_ondisk.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/storage/sqlite3_ondisk.rs b/core/storage/sqlite3_ondisk.rs index 2d5eb31d2..4a8eff020 100644 --- a/core/storage/sqlite3_ondisk.rs +++ b/core/storage/sqlite3_ondisk.rs @@ -572,14 +572,14 @@ impl PageContent { ) -> (usize, usize) { let buf = self.as_ptr(); let ncells = self.cell_count(); - let cell_start = match self.page_type() { + let cell_pointer_array_start = match self.page_type() { PageType::IndexInterior => 12, PageType::TableInterior => 12, PageType::IndexLeaf => 8, PageType::TableLeaf => 8, }; assert!(idx < ncells, "cell_get: idx out of bounds"); - let cell_pointer = cell_start + (idx * 2); + let cell_pointer = cell_pointer_array_start + (idx * 2); // pointers are 2 bytes each let cell_pointer = self.read_u16(cell_pointer) as usize; let start = cell_pointer; let len = match self.page_type() { From c727ed7e8a304538dd14f0ccc7880b26daa0b6a6 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Mon, 23 Dec 2024 22:31:35 +0200 Subject: [PATCH 098/144] rename cell_start to cell_pointer_array_start, part 2: electric boogaloo --- core/storage/sqlite3_ondisk.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/storage/sqlite3_ondisk.rs b/core/storage/sqlite3_ondisk.rs index 4a8eff020..15d5b2c6c 100644 --- a/core/storage/sqlite3_ondisk.rs +++ b/core/storage/sqlite3_ondisk.rs @@ -526,14 +526,14 @@ impl PageContent { let ncells = self.cell_count(); // the page header is 12 bytes for interior pages, 8 bytes for leaf pages // this is because the 4 last bytes in the interior page's header are used for the rightmost pointer. - let cell_start = match self.page_type() { + let cell_pointer_array_start = match self.page_type() { PageType::IndexInterior => 12, PageType::TableInterior => 12, PageType::IndexLeaf => 8, PageType::TableLeaf => 8, }; assert!(idx < ncells, "cell_get: idx out of bounds"); - let cell_pointer = cell_start + (idx * 2); + let cell_pointer = cell_pointer_array_start + (idx * 2); let cell_pointer = self.read_u16(cell_pointer) as usize; read_btree_cell( From 2186b3973b84ca05494db3ad1e6680f106c09709 Mon Sep 17 00:00:00 2001 From: alpaylan Date: Mon, 23 Dec 2024 16:16:39 -0500 Subject: [PATCH 099/144] change the name of the simulator submodule into runner --- simulator/main.rs | 8 ++++---- simulator/{simulator => runner}/cli.rs | 0 simulator/{simulator => runner}/env.rs | 2 +- simulator/{simulator => runner}/file.rs | 0 simulator/{simulator => runner}/io.rs | 2 +- simulator/{simulator => runner}/mod.rs | 0 6 files changed, 6 insertions(+), 6 deletions(-) rename simulator/{simulator => runner}/cli.rs (100%) rename simulator/{simulator => runner}/env.rs (96%) rename simulator/{simulator => runner}/file.rs (100%) rename simulator/{simulator => runner}/io.rs (98%) rename simulator/{simulator => runner}/mod.rs (100%) diff --git a/simulator/main.rs b/simulator/main.rs index 51a75de96..2b5eee3f6 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -6,9 +6,9 @@ use model::query::{Create, Query}; use model::table::{Column, Name, Table, Value}; use rand::prelude::*; use rand_chacha::ChaCha8Rng; -use simulator::cli::SimulatorCLI; -use simulator::env::{SimConnection, SimulatorEnv, SimulatorOpts}; -use simulator::io::SimulatorIO; +use runner::cli::SimulatorCLI; +use runner::env::{SimConnection, SimulatorEnv, SimulatorOpts}; +use runner::io::SimulatorIO; use std::backtrace::Backtrace; use std::io::Write; use std::path::Path; @@ -19,7 +19,7 @@ use tempfile::TempDir; mod generation; mod model; mod properties; -mod simulator; +mod runner; #[allow(clippy::arc_with_non_send_sync)] fn main() { diff --git a/simulator/simulator/cli.rs b/simulator/runner/cli.rs similarity index 100% rename from simulator/simulator/cli.rs rename to simulator/runner/cli.rs diff --git a/simulator/simulator/env.rs b/simulator/runner/env.rs similarity index 96% rename from simulator/simulator/env.rs rename to simulator/runner/env.rs index 0107ac501..0624b94b4 100644 --- a/simulator/simulator/env.rs +++ b/simulator/runner/env.rs @@ -6,7 +6,7 @@ use rand_chacha::ChaCha8Rng; use crate::model::table::Table; -use crate::simulator::io::SimulatorIO; +use crate::runner::io::SimulatorIO; pub(crate) struct SimulatorEnv { pub(crate) opts: SimulatorOpts, diff --git a/simulator/simulator/file.rs b/simulator/runner/file.rs similarity index 100% rename from simulator/simulator/file.rs rename to simulator/runner/file.rs diff --git a/simulator/simulator/io.rs b/simulator/runner/io.rs similarity index 98% rename from simulator/simulator/io.rs rename to simulator/runner/io.rs index e6b96218e..c039764b0 100644 --- a/simulator/simulator/io.rs +++ b/simulator/runner/io.rs @@ -4,7 +4,7 @@ use limbo_core::{OpenFlags, PlatformIO, Result, IO}; use rand::{RngCore, SeedableRng}; use rand_chacha::ChaCha8Rng; -use crate::simulator::file::SimulatorFile; +use crate::runner::file::SimulatorFile; pub(crate) struct SimulatorIO { pub(crate) inner: Box, diff --git a/simulator/simulator/mod.rs b/simulator/runner/mod.rs similarity index 100% rename from simulator/simulator/mod.rs rename to simulator/runner/mod.rs From 906975e1ca7010fdf70391eaa15b1cb224de30d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=84=A0=EC=9A=B0?= Date: Tue, 24 Dec 2024 12:25:04 +0900 Subject: [PATCH 100/144] Add limit support --- core/translate/delete.rs | 6 +-- core/translate/emitter.rs | 87 +++++++++++++++++++++------------------ core/translate/mod.rs | 2 +- core/translate/planner.rs | 35 +++++++++------- core/vdbe/builder.rs | 11 +++++ 5 files changed, 82 insertions(+), 59 deletions(-) diff --git a/core/translate/delete.rs b/core/translate/delete.rs index b0ecbdc69..135c9d76d 100644 --- a/core/translate/delete.rs +++ b/core/translate/delete.rs @@ -3,7 +3,7 @@ use crate::translate::optimizer::optimize_plan; use crate::translate::planner::prepare_delete_plan; use crate::{schema::Schema, storage::sqlite3_ondisk::DatabaseHeader, vdbe::Program}; use crate::{Connection, Result}; -use sqlite3_parser::ast::{Expr, QualifiedName, ResultColumn}; +use sqlite3_parser::ast::{Expr, Limit, QualifiedName}; use std::rc::Weak; use std::{cell::RefCell, rc::Rc}; @@ -11,11 +11,11 @@ pub fn translate_delete( schema: &Schema, tbl_name: &QualifiedName, where_clause: Option, - _returning: &Option>, + limit: Option, database_header: Rc>, connection: Weak, ) -> Result { - let delete_plan = prepare_delete_plan(schema, tbl_name, where_clause)?; + let delete_plan = prepare_delete_plan(schema, tbl_name, where_clause, limit)?; let optimized_plan = optimize_plan(delete_plan)?; emit_program(database_header, optimized_plan, connection) } diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 5e5f0d601..a032aa09e 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -334,7 +334,7 @@ fn emit_program_for_delete( &mut metadata, )?; - emit_delete_insns(&mut program, &plan.source)?; + emit_delete_insns(&mut program, &plan.source, &plan.limit, &metadata)?; // Clean up and close the main execution loop close_loop( @@ -1246,53 +1246,58 @@ fn close_loop( } } -fn emit_delete_insns(program: &mut ProgramBuilder, source: &SourceOperator) -> Result<()> { - match source { +fn emit_delete_insns( + program: &mut ProgramBuilder, + source: &SourceOperator, + limit: &Option, + metadata: &Metadata, +) -> Result<()> { + let cursor_id = match source { SourceOperator::Scan { - id, - table_reference, - iter_dir, - .. - } => { - let cursor_id = program.resolve_cursor_id(&table_reference.table_identifier); - - // Emit the instructions to delete the row - let key_reg = program.alloc_register(); - program.emit_insn(Insn::RowId { - cursor_id, - dest: key_reg, - }); - program.emit_insn(Insn::DeleteAsync { cursor_id }); - program.emit_insn(Insn::DeleteAwait { cursor_id }); - - Ok(()) - } + table_reference, .. + } => program.resolve_cursor_id(&table_reference.table_identifier), SourceOperator::Search { - id, table_reference, search, .. - } => { - let cursor_id = match search { - Search::RowidEq { .. } | Search::RowidSearch { .. } => { - program.resolve_cursor_id(&table_reference.table_identifier) - } - Search::IndexSearch { index, .. } => program.resolve_cursor_id(&index.name), - }; - - // Emit the instructions to delete the row - let key_reg = program.alloc_register(); - program.emit_insn(Insn::RowId { - cursor_id, - dest: key_reg, - }); - program.emit_insn(Insn::DeleteAsync { cursor_id }); - program.emit_insn(Insn::DeleteAwait { cursor_id }); + } => match search { + Search::RowidEq { .. } | Search::RowidSearch { .. } => { + program.resolve_cursor_id(&table_reference.table_identifier) + } + Search::IndexSearch { index, .. } => program.resolve_cursor_id(&index.name), + }, + _ => return Ok(()), + }; - Ok(()) - } - _ => Ok(()), + // Emit the instructions to delete the row + let key_reg = program.alloc_register(); + program.emit_insn(Insn::RowId { + cursor_id, + dest: key_reg, + }); + program.emit_insn(Insn::DeleteAsync { cursor_id }); + program.emit_insn(Insn::DeleteAwait { cursor_id }); + if let Some(limit) = limit { + let limit_reg = program.alloc_register(); + program.emit_insn(Insn::Integer { + value: *limit as i64, + dest: limit_reg, + }); + program.mark_last_insn_constant(); + let jump_label_on_limit_reached = metadata + .termination_label_stack + .last() + .expect("termination_label_stack should not be empty."); + program.emit_insn_with_label_dependency( + Insn::DecrJumpZero { + reg: limit_reg, + target_pc: *jump_label_on_limit_reached, + }, + *jump_label_on_limit_reached, + ) } + + Ok(()) } /// Emits the bytecode for processing a GROUP BY clause. diff --git a/core/translate/mod.rs b/core/translate/mod.rs index 381c82df1..8d082c1f3 100644 --- a/core/translate/mod.rs +++ b/core/translate/mod.rs @@ -82,7 +82,7 @@ pub fn translate( schema, &tbl_name, where_clause, - &returning, + limit, database_header, connection, ), diff --git a/core/translate/planner.rs b/core/translate/planner.rs index 70df2eee9..8a20a4029 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -2,8 +2,10 @@ use super::plan::{ Aggregate, BTreeTableReference, DeletePlan, Direction, GroupBy, Plan, ResultSetColumn, SelectPlan, SourceOperator, }; -use crate::{function::Func, schema::Schema, util::normalize_ident, Result}; -use sqlite3_parser::ast::{self, Expr, FromClause, JoinType, QualifiedName, ResultColumn}; +use crate::{bail_parse_error, function::Func, schema::Schema, util::normalize_ident, Result}; +use sqlite3_parser::ast::{ + self, Expr, FromClause, JoinType, Limit, QualifiedName, ResultColumn, SortedColumn, +}; pub struct OperatorIdCounter { id: usize, @@ -468,15 +470,7 @@ pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result

{ - let l = n.parse()?; - Some(l) - } - _ => todo!(), - } - } + plan.limit = select.limit.and_then(|limit| parse_limit(limit)); // Return the unoptimized query plan Ok(Plan::Select(plan)) @@ -489,6 +483,7 @@ pub fn prepare_delete_plan( schema: &Schema, tbl_name: &QualifiedName, where_clause: Option, + limit: Option, ) -> Result { let table = match schema.get_table(tbl_name.name.0.as_str()) { Some(table) => table, @@ -500,10 +495,14 @@ pub fn prepare_delete_plan( table_identifier: table.name.clone(), table_index: 0, }; + let referenced_tables = vec![table_ref.clone()]; - // Parse and resolve the where_clause + // Parse the WHERE clause let resolved_where_clauses = parse_where(where_clause, &[table_ref.clone()])?; + // Parse the LIMIT clause + let resolved_limit = limit.and_then(|limit| parse_limit(limit)); + let plan = DeletePlan { source: SourceOperator::Scan { id: 0, @@ -514,8 +513,8 @@ pub fn prepare_delete_plan( result_columns: vec![], where_clause: resolved_where_clauses, order_by: None, - limit: None, // TODO: add support for limit - referenced_tables: vec![table_ref], + limit: resolved_limit, + referenced_tables, available_indexes: vec![], contains_constant_false_condition: false, }; @@ -783,6 +782,14 @@ fn parse_join( )) } +fn parse_limit(limit: Limit) -> Option { + if let Expr::Literal(ast::Literal::Numeric(n)) = limit.expr { + n.parse().ok() + } else { + None + } +} + fn break_predicate_at_and_boundaries(predicate: ast::Expr, out_predicates: &mut Vec) { match predicate { ast::Expr::Binary(left, ast::Operator::And, right) => { diff --git a/core/vdbe/builder.rs b/core/vdbe/builder.rs index 8dd1cd4de..e3c6dc322 100644 --- a/core/vdbe/builder.rs +++ b/core/vdbe/builder.rs @@ -144,6 +144,17 @@ impl ProgramBuilder { .push((label, insn_reference)); } + /// Resolve unresolved labels to a specific offset in the instruction list. + /// + /// This function updates all instructions that reference the given label + /// to point to the specified offset. It ensures that the label and offset + /// are valid and updates the target program counter (PC) of each instruction + /// that references the label. + /// + /// # Arguments + /// + /// * `label` - The label to resolve. + /// * `to_offset` - The offset to which the labeled instructions should be resolved to. pub fn resolve_label(&mut self, label: BranchOffset, to_offset: BranchOffset) { assert!(label < 0); assert!(to_offset >= 0); From 91cca0d5b7b337249a555cfdfa78fc7a4c5ba878 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Tue, 24 Dec 2024 10:28:53 +0200 Subject: [PATCH 101/144] use more descriptive names in BTreeCursor::insert_into_cell() --- core/storage/btree.rs | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index bdd27932b..d62173342 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -742,7 +742,8 @@ impl BTreeCursor { /// i.e. whether we need to balance the btree after the insert. fn insert_into_cell(&self, page: &mut PageContent, payload: &[u8], cell_idx: usize) { let free = self.compute_free_space(page, RefCell::borrow(&self.database_header)); - let enough_space = payload.len() + 2 <= free as usize; + const CELL_POINTER_SIZE_BYTES: usize = 2; + let enough_space = payload.len() + CELL_POINTER_SIZE_BYTES <= free as usize; if !enough_space { // add to overflow cell page.overflow_cells.push(OverflowCell { @@ -753,27 +754,30 @@ impl BTreeCursor { } // TODO: insert into cell payload in internal page - let pc = self.allocate_cell_space(page, payload.len() as u16); + let new_cell_data_pointer = self.allocate_cell_space(page, payload.len() as u16); let buf = page.as_ptr(); // copy data - buf[pc as usize..pc as usize + payload.len()].copy_from_slice(payload); + buf[new_cell_data_pointer as usize..new_cell_data_pointer as usize + payload.len()] + .copy_from_slice(payload); // memmove(pIns+2, pIns, 2*(pPage->nCell - i)); - let (pointer_area_pc_by_idx, _) = page.cell_get_raw_pointer_region(); - let pointer_area_pc_by_idx = pointer_area_pc_by_idx + (2 * cell_idx); + let (cell_pointer_array_start, _) = page.cell_get_raw_pointer_region(); + let cell_pointer_cur_idx = cell_pointer_array_start + (CELL_POINTER_SIZE_BYTES * cell_idx); - // move previous pointers forward and insert new pointer there - let n_cells_forward = 2 * (page.cell_count() - cell_idx); - if n_cells_forward > 0 { + // move existing pointers forward by CELL_POINTER_SIZE_BYTES... + let n_cells_forward = page.cell_count() - cell_idx; + let n_bytes_forward = CELL_POINTER_SIZE_BYTES * n_cells_forward; + if n_bytes_forward > 0 { buf.copy_within( - pointer_area_pc_by_idx..pointer_area_pc_by_idx + n_cells_forward, - pointer_area_pc_by_idx + 2, + cell_pointer_cur_idx..cell_pointer_cur_idx + n_bytes_forward, + cell_pointer_cur_idx + CELL_POINTER_SIZE_BYTES, ); } - page.write_u16(pointer_area_pc_by_idx - page.offset, pc); + // ...and insert new cell pointer at the current index + page.write_u16(cell_pointer_cur_idx - page.offset, new_cell_data_pointer); - // update first byte of content area - page.write_u16(PAGE_HEADER_OFFSET_CELL_CONTENT_AREA, pc); + // update first byte of content area (cell data always appended to the left, so cell content area pointer moves to point to the new cell data) + page.write_u16(PAGE_HEADER_OFFSET_CELL_CONTENT_AREA, new_cell_data_pointer); // update cell count let new_n_cells = (page.cell_count() + 1) as u16; From c6b7ddf77adee9e57fcfa99e34b295914ad3e438 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Tue, 24 Dec 2024 10:30:27 +0200 Subject: [PATCH 102/144] Improve comments in BTreeCursor::compute_free_space() --- core/storage/btree.rs | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index d62173342..5a0a51e33 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1441,10 +1441,14 @@ impl BTreeCursor { let first_cell = (page.offset + 8 + child_pointer_size + (2 * ncell)) as u16; // The amount of free space is the sum of: - // 1. 0..first_byte_in_cell_content (everything to the left of the cell content area pointer is unused free space) - // 2. fragmented_free_bytes. + // #1. space to the left of the cell content area pointer + // #2. fragmented_free_bytes (isolated 1-3 byte chunks of free space within the cell content area) + // #3. freeblocks (linked list of blocks of at least 4 bytes within the cell content area that are not in use due to e.g. deletions) + + // #1 and #2 are known from the page header let mut nfree = fragmented_free_bytes as usize + first_byte_in_cell_content as usize; + // #3 is computed by iterating over the freeblocks linked list let mut pc = free_block_pointer as usize; if pc > 0 { if pc < first_byte_in_cell_content as usize { @@ -1457,28 +1461,33 @@ impl BTreeCursor { let mut size = 0; loop { // TODO: check corruption icellast - next = u16::from_be_bytes(buf[pc..pc + 2].try_into().unwrap()) as usize; - size = u16::from_be_bytes(buf[pc + 2..pc + 4].try_into().unwrap()) as usize; + next = u16::from_be_bytes(buf[pc..pc + 2].try_into().unwrap()) as usize; // first 2 bytes in freeblock = next freeblock pointer + size = u16::from_be_bytes(buf[pc + 2..pc + 4].try_into().unwrap()) as usize; // next 2 bytes in freeblock = size of current freeblock nfree += size; + // Freeblocks are in order from left to right on the page, + // so next pointer should > current pointer + its size, or 0 if no next block exists. if next <= pc + size + 3 { break; } pc = next; } - if next > 0 { - todo!("corrupted page ascending order"); - } + // Next should always be 0 (NULL) at this point since we have reached the end of the freeblocks linked list + assert!( + next == 0, + "corrupted page: freeblocks list not in ascending order" + ); - if pc + size > usable_space { - todo!("corrupted page last freeblock extends last page end"); - } + assert!( + pc + size <= usable_space, + "corrupted page: last freeblock extends last page end" + ); } // if( nFree>usableSize || nFree Date: Tue, 24 Dec 2024 18:50:16 +0200 Subject: [PATCH 103/144] refactor compute_free_space() --- core/storage/btree.rs | 63 +++++++++++++++++++--------------- core/storage/sqlite3_ondisk.rs | 50 +++++++++++++++++---------- 2 files changed, 67 insertions(+), 46 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 5a0a51e33..1ff53e7d2 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1415,10 +1415,12 @@ impl BTreeCursor { #[allow(unused_assignments)] fn compute_free_space(&self, page: &PageContent, db_header: Ref) -> u16 { // TODO(pere): maybe free space is not calculated correctly with offset - let buf = page.as_ptr(); + // Usable space, not the same as free space, simply means: + // space that is not reserved for extensions by sqlite. Usually reserved_space is 0. let usable_space = (db_header.page_size - db_header.reserved_space as u16) as usize; - let mut first_byte_in_cell_content = page.cell_content_area(); + + let mut cell_content_area_start = page.cell_content_area(); // A zero value for the cell content area pointer is interpreted as 65536. // See https://www.sqlite.org/fileformat.html // The max page size for a sqlite database is 64kiB i.e. 65536 bytes. @@ -1428,30 +1430,23 @@ impl BTreeCursor { // 1. the page size is 64kiB // 2. there are no cells on the page // 3. there is no reserved space at the end of the page - if first_byte_in_cell_content == 0 { - first_byte_in_cell_content = u16::MAX; + if cell_content_area_start == 0 { + cell_content_area_start = u16::MAX; } - let fragmented_free_bytes = page.num_frag_free_bytes(); - let free_block_pointer = page.first_freeblock(); - let ncell = page.cell_count(); - - // 8 + 4 == header end - let child_pointer_size = if page.is_leaf() { 0 } else { 4 }; - let first_cell = (page.offset + 8 + child_pointer_size + (2 * ncell)) as u16; - // The amount of free space is the sum of: - // #1. space to the left of the cell content area pointer - // #2. fragmented_free_bytes (isolated 1-3 byte chunks of free space within the cell content area) + // #1. the size of the unallocated region + // #2. fragments (isolated 1-3 byte chunks of free space within the cell content area) // #3. freeblocks (linked list of blocks of at least 4 bytes within the cell content area that are not in use due to e.g. deletions) - // #1 and #2 are known from the page header - let mut nfree = fragmented_free_bytes as usize + first_byte_in_cell_content as usize; + let mut free_space_bytes = + page.unallocated_region_size() as usize + page.num_frag_free_bytes() as usize; // #3 is computed by iterating over the freeblocks linked list - let mut pc = free_block_pointer as usize; - if pc > 0 { - if pc < first_byte_in_cell_content as usize { + let mut cur_freeblock_ptr = page.first_freeblock() as usize; + let page_buf = page.as_ptr(); + if cur_freeblock_ptr > 0 { + if cur_freeblock_ptr < cell_content_area_start as usize { // Freeblocks exist in the cell content area e.g. after deletions // They should never exist in the unused area of the page. todo!("corrupted page"); @@ -1461,15 +1456,23 @@ impl BTreeCursor { let mut size = 0; loop { // TODO: check corruption icellast - next = u16::from_be_bytes(buf[pc..pc + 2].try_into().unwrap()) as usize; // first 2 bytes in freeblock = next freeblock pointer - size = u16::from_be_bytes(buf[pc + 2..pc + 4].try_into().unwrap()) as usize; // next 2 bytes in freeblock = size of current freeblock - nfree += size; + next = u16::from_be_bytes( + page_buf[cur_freeblock_ptr..cur_freeblock_ptr + 2] + .try_into() + .unwrap(), + ) as usize; // first 2 bytes in freeblock = next freeblock pointer + size = u16::from_be_bytes( + page_buf[cur_freeblock_ptr + 2..cur_freeblock_ptr + 4] + .try_into() + .unwrap(), + ) as usize; // next 2 bytes in freeblock = size of current freeblock + free_space_bytes += size; // Freeblocks are in order from left to right on the page, // so next pointer should > current pointer + its size, or 0 if no next block exists. - if next <= pc + size + 3 { + if next <= cur_freeblock_ptr + size + 3 { break; } - pc = next; + cur_freeblock_ptr = next; } // Next should always be 0 (NULL) at this point since we have reached the end of the freeblocks linked list @@ -1479,17 +1482,21 @@ impl BTreeCursor { ); assert!( - pc + size <= usable_space, + cur_freeblock_ptr + size <= usable_space, "corrupted page: last freeblock extends last page end" ); } + assert!( + free_space_bytes <= usable_space, + "corrupted page: free space is greater than usable space" + ); + // if( nFree>usableSize || nFree usize { self.read_u16(3) as usize } + /// The size of the cell pointer array in bytes. + /// 2 bytes per cell pointer + pub fn cell_pointer_array_size(&self) -> usize { + const CELL_POINTER_SIZE_BYTES: usize = 2; + self.cell_count() * CELL_POINTER_SIZE_BYTES + } + + /// The start of the unallocated region. + /// Effectively: the offset after the page header + the cell pointer array. + pub fn unallocated_region_start(&self) -> usize { + self.offset + self.header_size() + self.cell_pointer_array_size() + } + + pub fn unallocated_region_size(&self) -> usize { + self.cell_content_area() as usize - self.unallocated_region_start() + } + /// The start of the cell content area. /// SQLite strives to place cells as far toward the end of the b-tree page as it can, /// in order to leave space for future growth of the cell pointer array. @@ -497,6 +515,17 @@ impl PageContent { self.read_u16(5) } + /// The size of the page header in bytes. + /// 8 bytes for leaf pages, 12 bytes for interior pages (due to storing rightmost child pointer) + pub fn header_size(&self) -> usize { + match self.page_type() { + PageType::IndexInterior => 12, + PageType::TableInterior => 12, + PageType::IndexLeaf => 8, + PageType::TableLeaf => 8, + } + } + /// The total number of bytes in all fragments is stored in the fifth field of the b-tree page header. /// Fragments are isolated groups of 1, 2, or 3 unused bytes within the cell content area. pub fn num_frag_free_bytes(&self) -> u8 { @@ -526,12 +555,7 @@ impl PageContent { let ncells = self.cell_count(); // the page header is 12 bytes for interior pages, 8 bytes for leaf pages // this is because the 4 last bytes in the interior page's header are used for the rightmost pointer. - let cell_pointer_array_start = match self.page_type() { - PageType::IndexInterior => 12, - PageType::TableInterior => 12, - PageType::IndexLeaf => 8, - PageType::TableLeaf => 8, - }; + let cell_pointer_array_start = self.header_size(); assert!(idx < ncells, "cell_get: idx out of bounds"); let cell_pointer = cell_pointer_array_start + (idx * 2); let cell_pointer = self.read_u16(cell_pointer) as usize; @@ -553,12 +577,7 @@ impl PageContent { /// - left-most cell (the cell with the smallest key) first and /// - the right-most cell (the cell with the largest key) last. pub fn cell_get_raw_pointer_region(&self) -> (usize, usize) { - let cell_start = match self.page_type() { - PageType::IndexInterior => 12, - PageType::TableInterior => 12, - PageType::IndexLeaf => 8, - PageType::TableLeaf => 8, - }; + let cell_start = self.header_size(); (self.offset + cell_start, self.cell_count() * 2) } @@ -572,12 +591,7 @@ impl PageContent { ) -> (usize, usize) { let buf = self.as_ptr(); let ncells = self.cell_count(); - let cell_pointer_array_start = match self.page_type() { - PageType::IndexInterior => 12, - PageType::TableInterior => 12, - PageType::IndexLeaf => 8, - PageType::TableLeaf => 8, - }; + let cell_pointer_array_start = self.header_size(); assert!(idx < ncells, "cell_get: idx out of bounds"); let cell_pointer = cell_pointer_array_start + (idx * 2); // pointers are 2 bytes each let cell_pointer = self.read_u16(cell_pointer) as usize; From aed14117c9433c13d1c4e48837cd2f25f4295b5b Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Tue, 24 Dec 2024 18:04:30 +0100 Subject: [PATCH 104/144] core: transaction support --- bindings/python/src/lib.rs | 10 ++ bindings/wasm/lib.rs | 4 +- cli/app.rs | 16 +++ core/lib.rs | 8 +- core/result.rs | 6 + core/storage/pager.rs | 11 +- core/storage/wal.rs | 268 +++++++++++++++++++++++++++++++---- core/util.rs | 1 + core/vdbe/mod.rs | 49 ++++--- simulator/generation/plan.rs | 1 + simulator/main.rs | 3 + sqlite3/src/lib.rs | 3 +- test/src/lib.rs | 9 ++ 13 files changed, 334 insertions(+), 55 deletions(-) create mode 100644 core/result.rs diff --git a/bindings/python/src/lib.rs b/bindings/python/src/lib.rs index 595400a21..c31520a82 100644 --- a/bindings/python/src/lib.rs +++ b/bindings/python/src/lib.rs @@ -143,6 +143,11 @@ impl Cursor { limbo_core::RowResult::Done => { return Ok(None); } + limbo_core::RowResult::Busy => { + return Err( + PyErr::new::("Busy error".to_string()).into() + ); + } } } } else { @@ -177,6 +182,11 @@ impl Cursor { limbo_core::RowResult::Done => { return Ok(results); } + limbo_core::RowResult::Busy => { + return Err( + PyErr::new::("Busy error".to_string()).into() + ); + } } } } else { diff --git a/bindings/wasm/lib.rs b/bindings/wasm/lib.rs index ec2762b91..a2ae5b266 100644 --- a/bindings/wasm/lib.rs +++ b/bindings/wasm/lib.rs @@ -85,7 +85,8 @@ impl Statement { } Ok(limbo_core::RowResult::IO) | Ok(limbo_core::RowResult::Done) - | Ok(limbo_core::RowResult::Interrupt) => JsValue::UNDEFINED, + | Ok(limbo_core::RowResult::Interrupt) + | Ok(limbo_core::RowResult::Busy) => JsValue::UNDEFINED, Err(e) => panic!("Error: {:?}", e), } } @@ -105,6 +106,7 @@ impl Statement { Ok(limbo_core::RowResult::IO) => {} Ok(limbo_core::RowResult::Interrupt) => break, Ok(limbo_core::RowResult::Done) => break, + Ok(limbo_core::RowResult::Busy) => break, Err(e) => panic!("Error: {:?}", e), } } diff --git a/cli/app.rs b/cli/app.rs index 34ab20481..cbce1ca5c 100644 --- a/cli/app.rs +++ b/cli/app.rs @@ -525,6 +525,10 @@ impl Limbo { Ok(RowResult::Done) => { break; } + Ok(RowResult::Busy) => { + self.writeln("database is busy"); + break; + } Err(err) => { let _ = self.writeln(err.to_string()); break; @@ -560,6 +564,10 @@ impl Limbo { } Ok(RowResult::Interrupt) => break, Ok(RowResult::Done) => break, + Ok(RowResult::Busy) => { + self.writeln("database is busy"); + break; + } Err(err) => { let _ = self.write_fmt(format_args!("{}", err)); break; @@ -610,6 +618,10 @@ impl Limbo { } RowResult::Interrupt => break, RowResult::Done => break, + RowResult::Busy => { + self.writeln("database is busy"); + break; + } } } if !found { @@ -663,6 +675,10 @@ impl Limbo { } RowResult::Interrupt => break, RowResult::Done => break, + RowResult::Busy => { + self.writeln("database is busy"); + break; + } } } diff --git a/core/lib.rs b/core/lib.rs index 79e06abfb..255c47217 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -5,6 +5,7 @@ mod io; #[cfg(feature = "json")] mod json; mod pseudo; +mod result; mod schema; mod storage; mod translate; @@ -66,7 +67,6 @@ pub struct Database { pager: Rc, schema: Rc>, header: Rc>, - transaction_state: RefCell, // Shared structures of a Database are the parts that are common to multiple threads that might // create DB connections. shared_page_cache: Arc>, @@ -123,6 +123,7 @@ impl Database { pager: pager.clone(), schema: bootstrap_schema.clone(), header: db_header.clone(), + transaction_state: RefCell::new(TransactionState::None), db: Weak::new(), last_insert_rowid: Cell::new(0), }); @@ -135,7 +136,6 @@ impl Database { pager, schema, header, - transaction_state: RefCell::new(TransactionState::None), shared_page_cache, shared_wal, })) @@ -148,6 +148,7 @@ impl Database { header: self.header.clone(), last_insert_rowid: Cell::new(0), db: Arc::downgrade(self), + transaction_state: RefCell::new(TransactionState::None), }) } } @@ -206,6 +207,7 @@ pub struct Connection { schema: Rc>, header: Rc>, db: Weak, // backpointer to the database holding this connection + transaction_state: RefCell, last_insert_rowid: Cell, } @@ -379,6 +381,7 @@ impl Statement { vdbe::StepResult::IO => Ok(RowResult::IO), vdbe::StepResult::Done => Ok(RowResult::Done), vdbe::StepResult::Interrupt => Ok(RowResult::Interrupt), + vdbe::StepResult::Busy => Ok(RowResult::Busy), } } @@ -395,6 +398,7 @@ pub enum RowResult<'a> { IO, Done, Interrupt, + Busy, } pub struct Row<'a> { diff --git a/core/result.rs b/core/result.rs new file mode 100644 index 000000000..3056528ce --- /dev/null +++ b/core/result.rs @@ -0,0 +1,6 @@ +/// Common results that different functions can return in limbo. +pub enum LimboResult { + /// Couldn't acquire a lock + Busy, + Ok, +} diff --git a/core/storage/pager.rs b/core/storage/pager.rs index cd934d42a..0e0d0304c 100644 --- a/core/storage/pager.rs +++ b/core/storage/pager.rs @@ -1,3 +1,4 @@ +use crate::result::LimboResult; use crate::storage::buffer_pool::BufferPool; use crate::storage::database::DatabaseStorage; use crate::storage::sqlite3_ondisk::{self, DatabaseHeader, PageContent}; @@ -196,14 +197,12 @@ impl Pager { }) } - pub fn begin_read_tx(&self) -> Result<()> { - self.wal.borrow_mut().begin_read_tx()?; - Ok(()) + pub fn begin_read_tx(&self) -> Result { + self.wal.borrow_mut().begin_read_tx() } - pub fn begin_write_tx(&self) -> Result<()> { - self.wal.borrow_mut().begin_write_tx()?; - Ok(()) + pub fn begin_write_tx(&self) -> Result { + self.wal.borrow_mut().begin_write_tx() } pub fn end_tx(&self) -> Result { diff --git a/core/storage/wal.rs b/core/storage/wal.rs index 13323dac3..ef8a718ce 100644 --- a/core/storage/wal.rs +++ b/core/storage/wal.rs @@ -1,10 +1,12 @@ use std::collections::{HashMap, HashSet}; +use std::sync::atomic::{AtomicU32, Ordering}; use std::sync::RwLock; use std::{cell::RefCell, rc::Rc, sync::Arc}; use log::{debug, trace}; use crate::io::{File, SyncCompletion, IO}; +use crate::result::LimboResult; use crate::storage::sqlite3_ondisk::{ begin_read_wal_frame, begin_write_wal_frame, WAL_FRAME_HEADER_SIZE, WAL_HEADER_SIZE, }; @@ -18,19 +20,110 @@ use super::page_cache::PageCacheKey; use super::pager::{PageRef, Pager}; use super::sqlite3_ondisk::{self, begin_write_btree_page, WalHeader}; +pub const READMARK_NOT_USED: u32 = 0xffffffff; + +pub const NO_LOCK: u32 = 0; +pub const SHARED_LOCK: u32 = 1; +pub const WRITE_LOCK: u32 = 2; + +#[derive(Debug)] +struct LimboRwLock { + lock: AtomicU32, + nreads: AtomicU32, + value: AtomicU32, +} + +impl LimboRwLock { + /// Shared lock. Returns true if it was successful, false if it couldn't lock it + pub fn read(&mut self) -> bool { + let lock = self.lock.load(Ordering::SeqCst); + match lock { + NO_LOCK => { + let res = self.lock.compare_exchange( + lock, + SHARED_LOCK, + Ordering::SeqCst, + Ordering::SeqCst, + ); + let ok = res.is_ok(); + if ok { + dbg!("adding"); + self.nreads.fetch_add(1, Ordering::SeqCst); + } + ok + } + SHARED_LOCK => { + self.nreads.fetch_add(1, Ordering::SeqCst); + true + } + WRITE_LOCK => false, + _ => unreachable!(), + } + } + + /// Locks exlusively. Returns true if it was successful, false if it couldn't lock it + pub fn write(&mut self) -> bool { + let lock = self.lock.load(Ordering::SeqCst); + match lock { + NO_LOCK => { + let res = self.lock.compare_exchange( + lock, + WRITE_LOCK, + Ordering::SeqCst, + Ordering::SeqCst, + ); + res.is_ok() + } + SHARED_LOCK => { + // no op + false + } + WRITE_LOCK => true, + _ => unreachable!(), + } + } + + /// Unlock the current held lock. + pub fn unlock(&mut self) { + let lock = self.lock.load(Ordering::SeqCst); + match lock { + NO_LOCK => {} + SHARED_LOCK => { + let prev = self.nreads.fetch_sub(1, Ordering::SeqCst); + if prev == 1 { + let res = self.lock.compare_exchange( + lock, + NO_LOCK, + Ordering::SeqCst, + Ordering::SeqCst, + ); + assert!(res.is_ok()); + } + } + WRITE_LOCK => { + let res = + self.lock + .compare_exchange(lock, NO_LOCK, Ordering::SeqCst, Ordering::SeqCst); + assert!(res.is_ok()); + } + _ => unreachable!(), + } + } +} + /// Write-ahead log (WAL). pub trait Wal { /// Begin a read transaction. - fn begin_read_tx(&mut self) -> Result<()>; + fn begin_read_tx(&mut self) -> Result; /// Begin a write transaction. - fn begin_write_tx(&mut self) -> Result<()>; + fn begin_write_tx(&mut self) -> Result; /// End a read transaction. - fn end_read_tx(&self) -> Result<()>; + fn end_read_tx(&self) -> Result; /// End a write transaction. - fn end_write_tx(&self) -> Result<()>; + fn end_write_tx(&self) -> Result; /// Find the latest frame containing a page. fn find_frame(&self, page_id: u64) -> Result>; @@ -108,10 +201,16 @@ pub struct WalFile { ongoing_checkpoint: OngoingCheckpoint, checkpoint_threshold: usize, // min and max frames for this connection + /// This is the index to the read_lock in WalFileShared that we are holding. This lock contains + /// the max frame for this connection. + max_frame_read_lock_index: usize, + /// Max frame allowed to lookup range=(minframe..max_frame) max_frame: u64, + /// Start of range to look for frames range=(minframe..max_frame) min_frame: u64, } +// TODO(pere): lock only important parts + pin WalFileShared /// WalFileShared is the part of a WAL that will be shared between threads. A wal has information /// that needs to be communicated between threads so this struct does the job. pub struct WalFileShared { @@ -130,20 +229,94 @@ pub struct WalFileShared { pages_in_frames: Vec, last_checksum: (u32, u32), // Check of last frame in WAL, this is a cumulative checksum over all frames in the WAL file: Rc, + /// read_locks is a list of read locks that can coexist with the max_frame nubmer stored in + /// value. There is a limited amount because and unbounded amount of connections could be + /// fatal. Therefore, for now we copy how SQLite behaves with limited amounts of read max + /// frames that is equal to 5 + read_locks: [LimboRwLock; 5], + /// There is only one write allowed in WAL mode. This lock takes care of ensuring there is only + /// one used. + write_lock: LimboRwLock, } impl Wal for WalFile { /// Begin a read transaction. - fn begin_read_tx(&mut self) -> Result<()> { - let shared = self.shared.read().unwrap(); + fn begin_read_tx(&mut self) -> Result { + let mut shared = self.shared.write().unwrap(); + let max_frame_in_wal = shared.max_frame; self.min_frame = shared.nbackfills + 1; - self.max_frame = shared.max_frame; - Ok(()) + + let mut max_read_mark = 0; + let mut max_read_mark_index = -1; + // Find the largest mark we can find, ignore frames that are impossible to be in range and + // that are not set + for (index, lock) in shared.read_locks.iter().enumerate() { + let this_mark = lock.value.load(Ordering::SeqCst); + if this_mark > max_read_mark && this_mark <= max_frame_in_wal as u32 { + max_read_mark = this_mark; + max_read_mark_index = index as i64; + } + } + + // If we didn't find any mark, then let's add a new one + if max_read_mark_index == -1 { + for (index, lock) in shared.read_locks.iter_mut().enumerate() { + let busy = !lock.write(); + if !busy { + // If this was busy then it must mean >1 threads tried to set this read lock + lock.value.store(max_frame_in_wal as u32, Ordering::SeqCst); + max_read_mark = max_frame_in_wal as u32; + max_read_mark_index = index as i64; + lock.unlock(); + break; + } + } + } + + if max_read_mark_index == -1 { + return Ok(LimboResult::Busy); + } + + let lock = &mut shared.read_locks[max_read_mark_index as usize]; + let busy = !lock.read(); + if busy { + return Ok(LimboResult::Busy); + } + self.max_frame_read_lock_index = max_read_mark_index as usize; + self.max_frame = max_read_mark as u64; + self.min_frame = shared.nbackfills + 1; + log::trace!( + "begin_read_tx(min_frame={}, max_frame={}, lock={})", + self.min_frame, + self.max_frame, + self.max_frame_read_lock_index + ); + Ok(LimboResult::Ok) } /// End a read transaction. - fn end_read_tx(&self) -> Result<()> { - Ok(()) + fn end_read_tx(&self) -> Result { + let mut shared = self.shared.write().unwrap(); + let read_lock = &mut shared.read_locks[self.max_frame_read_lock_index]; + read_lock.unlock(); + Ok(LimboResult::Ok) + } + + /// Begin a write transaction + fn begin_write_tx(&mut self) -> Result { + let mut shared = self.shared.write().unwrap(); + let busy = !shared.write_lock.write(); + if busy { + return Ok(LimboResult::Busy); + } + Ok(LimboResult::Ok) + } + + /// End a write transaction + fn end_write_tx(&self) -> Result { + let mut shared = self.shared.write().unwrap(); + shared.write_lock.unlock(); + Ok(LimboResult::Ok) } /// Find the latest frame containing a page. @@ -186,7 +359,11 @@ impl Wal for WalFile { ) -> Result<()> { let page_id = page.get().id; let mut shared = self.shared.write().unwrap(); - let frame_id = shared.max_frame; + let frame_id = if shared.max_frame == 0 { + 1 + } else { + shared.max_frame + }; let offset = self.frame_offset(frame_id); trace!( "append_frame(frame={}, offset={}, page_id={})", @@ -221,16 +398,6 @@ impl Wal for WalFile { Ok(()) } - /// Begin a write transaction - fn begin_write_tx(&mut self) -> Result<()> { - Ok(()) - } - - /// End a write transaction - fn end_write_tx(&self) -> Result<()> { - Ok(()) - } - fn should_checkpoint(&self) -> bool { let shared = self.shared.read().unwrap(); let frame_id = shared.max_frame as usize; @@ -249,9 +416,29 @@ impl Wal for WalFile { CheckpointState::Start => { // TODO(pere): check what frames are safe to checkpoint between many readers! self.ongoing_checkpoint.min_frame = self.min_frame; - self.ongoing_checkpoint.max_frame = self.max_frame; + let mut shared = self.shared.write().unwrap(); + let max_frame_in_wal = shared.max_frame as u32; + let mut max_safe_frame = shared.max_frame; + for read_lock in shared.read_locks.iter_mut() { + let this_mark = read_lock.value.load(Ordering::SeqCst); + if this_mark < max_safe_frame as u32 { + let busy = !read_lock.write(); + if !busy { + read_lock.value.store(max_frame_in_wal, Ordering::SeqCst); + read_lock.unlock(); + } else { + max_safe_frame = this_mark as u64; + } + } + } + self.ongoing_checkpoint.max_frame = max_safe_frame; self.ongoing_checkpoint.current_page = 0; self.ongoing_checkpoint.state = CheckpointState::ReadFrame; + log::trace!( + "checkpoint_start(min_frame={}, max_frame={})", + self.ongoing_checkpoint.max_frame, + self.ongoing_checkpoint.min_frame + ); } CheckpointState::ReadFrame => { let shared = self.shared.read().unwrap(); @@ -412,10 +599,11 @@ impl WalFile { syncing: Rc::new(RefCell::new(false)), checkpoint_threshold: 1000, page_size, - max_frame: 0, - min_frame: 0, buffer_pool, sync_state: RefCell::new(SyncState::NotSyncing), + max_frame: 0, + min_frame: 0, + max_frame_read_lock_index: 0, } } @@ -488,6 +676,38 @@ impl WalFileShared { last_checksum: checksum, file, pages_in_frames: Vec::new(), + read_locks: [ + LimboRwLock { + lock: AtomicU32::new(NO_LOCK), + nreads: AtomicU32::new(0), + value: AtomicU32::new(READMARK_NOT_USED), + }, + LimboRwLock { + lock: AtomicU32::new(NO_LOCK), + nreads: AtomicU32::new(0), + value: AtomicU32::new(READMARK_NOT_USED), + }, + LimboRwLock { + lock: AtomicU32::new(NO_LOCK), + nreads: AtomicU32::new(0), + value: AtomicU32::new(READMARK_NOT_USED), + }, + LimboRwLock { + lock: AtomicU32::new(NO_LOCK), + nreads: AtomicU32::new(0), + value: AtomicU32::new(READMARK_NOT_USED), + }, + LimboRwLock { + lock: AtomicU32::new(NO_LOCK), + nreads: AtomicU32::new(0), + value: AtomicU32::new(READMARK_NOT_USED), + }, + ], + write_lock: LimboRwLock { + lock: AtomicU32::new(NO_LOCK), + nreads: AtomicU32::new(0), + value: AtomicU32::new(READMARK_NOT_USED), + }, }; Ok(Arc::new(RwLock::new(shared))) } diff --git a/core/util.rs b/core/util.rs index 6320f6612..a57186890 100644 --- a/core/util.rs +++ b/core/util.rs @@ -60,6 +60,7 @@ pub fn parse_schema_rows(rows: Option, schema: &mut Schema, io: Arc break, RowResult::Done => break, + RowResult::Busy => break, } } } diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 520100463..debabb067 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -28,6 +28,7 @@ use crate::error::{LimboError, SQLITE_CONSTRAINT_PRIMARYKEY}; use crate::ext::{exec_ts_from_uuid7, exec_uuid, exec_uuidblob, exec_uuidstr, ExtFunc, UuidFunc}; use crate::function::{AggFunc, FuncCtx, MathFunc, MathFuncArity, ScalarFunc}; use crate::pseudo::PseudoCursor; +use crate::result::LimboResult; use crate::schema::Table; use crate::storage::sqlite3_ondisk::DatabaseHeader; use crate::storage::{btree::BTreeCursor, pager::Pager}; @@ -537,6 +538,7 @@ pub enum StepResult<'a> { IO, Row(Record<'a>), Interrupt, + Busy, } /// If there is I/O, the instruction is restarted. @@ -1657,29 +1659,34 @@ impl Program { } Insn::Transaction { write } => { let connection = self.connection.upgrade().unwrap(); - if let Some(db) = connection.db.upgrade() { - // TODO(pere): are backpointers good ?? this looks ugly af - // upgrade transaction if needed - let new_transaction_state = - match (db.transaction_state.borrow().clone(), write) { - (crate::TransactionState::Write, true) => TransactionState::Write, - (crate::TransactionState::Write, false) => TransactionState::Write, - (crate::TransactionState::Read, true) => TransactionState::Write, - (crate::TransactionState::Read, false) => TransactionState::Read, - (crate::TransactionState::None, true) => TransactionState::Read, - (crate::TransactionState::None, false) => TransactionState::Read, - }; - // TODO(Pere): - // 1. lock wal - // 2. lock shared - // 3. lock write db if write - db.transaction_state.replace(new_transaction_state.clone()); - if matches!(new_transaction_state, TransactionState::Write) { - pager.begin_read_tx()?; - } else { - pager.begin_write_tx()?; + let current_state = connection.transaction_state.borrow().clone(); + let (new_transaction_state, updated) = match (¤t_state, write) { + (crate::TransactionState::Write, true) => (TransactionState::Write, false), + (crate::TransactionState::Write, false) => (TransactionState::Write, false), + (crate::TransactionState::Read, true) => (TransactionState::Write, true), + (crate::TransactionState::Read, false) => (TransactionState::Read, false), + (crate::TransactionState::None, true) => (TransactionState::Write, true), + (crate::TransactionState::None, false) => (TransactionState::Read, true), + }; + + if updated && matches!(current_state, TransactionState::None) { + if let LimboResult::Busy = pager.begin_read_tx()? { + log::trace!("begin_read_tx busy"); + return Ok(StepResult::Busy); } } + + if updated && matches!(new_transaction_state, TransactionState::Write) { + if let LimboResult::Busy = pager.begin_write_tx()? { + log::trace!("begin_write_tx busy"); + return Ok(StepResult::Busy); + } + } + if updated { + connection + .transaction_state + .replace(new_transaction_state.clone()); + } state.pc += 1; } Insn::Goto { target_pc } => { diff --git a/simulator/generation/plan.rs b/simulator/generation/plan.rs index fd194de66..61b115f01 100644 --- a/simulator/generation/plan.rs +++ b/simulator/generation/plan.rs @@ -235,6 +235,7 @@ impl Interaction { RowResult::Done => { break; } + RowResult::Busy => {} } } diff --git a/simulator/main.rs b/simulator/main.rs index 085711391..7959c655e 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -298,6 +298,9 @@ fn get_all_rows( RowResult::Done => { break; } + RowResult::Busy => { + // for now let's retry? + } } } Ok(out) diff --git a/sqlite3/src/lib.rs b/sqlite3/src/lib.rs index da057df79..b183648b2 100644 --- a/sqlite3/src/lib.rs +++ b/sqlite3/src/lib.rs @@ -246,6 +246,7 @@ pub unsafe extern "C" fn sqlite3_step(stmt: *mut sqlite3_stmt) -> std::ffi::c_in stmt.row.replace(Some(row)); SQLITE_ROW } + limbo_core::RowResult::Busy => SQLITE_BUSY, } } else { SQLITE_ERROR @@ -1032,7 +1033,7 @@ fn sqlite3_errstr_impl(rc: i32) -> *const std::ffi::c_char { "datatype mismatch", // SQLITE_MISMATCH "bad parameter or other API misuse", // SQLITE_MISUSE #[cfg(feature = "lfs")] - "", // SQLITE_NOLFS + "", // SQLITE_NOLFS #[cfg(not(feature = "lfs"))] "large file support is disabled", // SQLITE_NOLFS "authorization denied", // SQLITE_AUTH diff --git a/test/src/lib.rs b/test/src/lib.rs index 53cec37a4..8bd6feea2 100644 --- a/test/src/lib.rs +++ b/test/src/lib.rs @@ -95,6 +95,9 @@ mod tests { } RowResult::Interrupt => break, RowResult::Done => break, + RowResult::Busy => { + panic!("Database is busy"); + } } }, Ok(None) => {} @@ -163,6 +166,7 @@ mod tests { } RowResult::Interrupt => break, RowResult::Done => break, + RowResult::Busy => unreachable!(), } }, Ok(None) => {} @@ -237,6 +241,7 @@ mod tests { } RowResult::Interrupt => break, RowResult::Done => break, + RowResult::Busy => unreachable!(), } }, Ok(None) => {} @@ -300,6 +305,7 @@ mod tests { } RowResult::Interrupt => break, RowResult::Done => break, + RowResult::Busy => unreachable!(), } }, Ok(None) => {} @@ -361,6 +367,7 @@ mod tests { } RowResult::Interrupt => break, RowResult::Done => break, + RowResult::Busy => panic!("Database is busy"), } } } @@ -453,6 +460,7 @@ mod tests { } RowResult::Interrupt => break, RowResult::Done => break, + RowResult::Busy => panic!("Database is busy"), } } } @@ -487,6 +495,7 @@ mod tests { } RowResult::Interrupt => break, RowResult::Done => break, + RowResult::Busy => panic!("Database is busy"), } }, Ok(None) => {} From 3bce2823528e70089a897cd3dceff7781e178e13 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Tue, 24 Dec 2024 18:18:17 +0100 Subject: [PATCH 105/144] respect max_frame on checkpoint --- core/storage/wal.rs | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/core/storage/wal.rs b/core/storage/wal.rs index ef8a718ce..5aaab2f75 100644 --- a/core/storage/wal.rs +++ b/core/storage/wal.rs @@ -47,7 +47,6 @@ impl LimboRwLock { ); let ok = res.is_ok(); if ok { - dbg!("adding"); self.nreads.fetch_add(1, Ordering::SeqCst); } ok @@ -459,8 +458,9 @@ impl Wal for WalFile { .expect("page must be in frame cache if it's in list"); for frame in frames.iter().rev() { - // TODO: do proper selection of frames to checkpoint - if *frame >= self.ongoing_checkpoint.min_frame { + if *frame >= self.ongoing_checkpoint.min_frame + && *frame <= self.ongoing_checkpoint.max_frame + { log::debug!( "checkpoint page(state={:?}, page={}, frame={})", state, @@ -515,10 +515,18 @@ impl Wal for WalFile { return Ok(CheckpointStatus::IO); } let mut shared = self.shared.write().unwrap(); - shared.frame_cache.clear(); - shared.pages_in_frames.clear(); - shared.max_frame = 0; - shared.nbackfills = 0; + let everything_backfilled = + shared.max_frame == self.ongoing_checkpoint.max_frame; + if everything_backfilled { + // Here we know that we backfilled everything, therefore we can safely + // reset the wal. + shared.frame_cache.clear(); + shared.pages_in_frames.clear(); + shared.max_frame = 0; + shared.nbackfills = 0; + } else { + shared.nbackfills = self.ongoing_checkpoint.max_frame; + } self.ongoing_checkpoint.state = CheckpointState::Start; return Ok(CheckpointStatus::Done); } From 42ea9041e1f3ce30ca95a843426d1fa64b0f9015 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Tue, 24 Dec 2024 19:21:44 +0200 Subject: [PATCH 106/144] rename cell_get_raw_pointer_region() and refactor a bit --- core/storage/btree.rs | 11 ++++------- core/storage/sqlite3_ondisk.rs | 9 +++++---- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 1ff53e7d2..387351225 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -761,7 +761,7 @@ impl BTreeCursor { buf[new_cell_data_pointer as usize..new_cell_data_pointer as usize + payload.len()] .copy_from_slice(payload); // memmove(pIns+2, pIns, 2*(pPage->nCell - i)); - let (cell_pointer_array_start, _) = page.cell_get_raw_pointer_region(); + let (cell_pointer_array_start, _) = page.cell_pointer_array_offset_and_size(); let cell_pointer_cur_idx = cell_pointer_array_start + (CELL_POINTER_SIZE_BYTES * cell_idx); // move existing pointers forward by CELL_POINTER_SIZE_BYTES... @@ -1232,7 +1232,7 @@ impl BTreeCursor { if is_page_1 { // Remove header from child and set offset to 0 let contents = child.get().contents.as_mut().unwrap(); - let (cell_pointer_offset, _) = contents.cell_get_raw_pointer_region(); + let (cell_pointer_offset, _) = contents.cell_pointer_array_offset_and_size(); // change cell pointers for cell_idx in 0..contents.cell_count() { let cell_pointer_offset = cell_pointer_offset + (2 * cell_idx) - offset; @@ -1288,7 +1288,7 @@ impl BTreeCursor { fn allocate_cell_space(&self, page_ref: &PageContent, amount: u16) -> u16 { let amount = amount as usize; - let (cell_offset, _) = page_ref.cell_get_raw_pointer_region(); + let (cell_offset, _) = page_ref.cell_pointer_array_offset_and_size(); let gap = cell_offset + 2 * page_ref.cell_count(); let mut top = page_ref.cell_content_area() as usize; @@ -1330,10 +1330,7 @@ impl BTreeCursor { // TODO: implement fast algorithm let last_cell = usable_space - 4; - let first_cell = { - let (start, end) = cloned_page.cell_get_raw_pointer_region(); - start + end - }; + let first_cell = cloned_page.unallocated_region_start() as u64; if cloned_page.cell_count() > 0 { let page_type = page.page_type(); diff --git a/core/storage/sqlite3_ondisk.rs b/core/storage/sqlite3_ondisk.rs index eb57ff041..0403bee87 100644 --- a/core/storage/sqlite3_ondisk.rs +++ b/core/storage/sqlite3_ondisk.rs @@ -500,7 +500,8 @@ impl PageContent { /// The start of the unallocated region. /// Effectively: the offset after the page header + the cell pointer array. pub fn unallocated_region_start(&self) -> usize { - self.offset + self.header_size() + self.cell_pointer_array_size() + let (cell_ptr_array_start, cell_ptr_array_size) = self.cell_pointer_array_offset_and_size(); + cell_ptr_array_start + cell_ptr_array_size } pub fn unallocated_region_size(&self) -> usize { @@ -576,9 +577,9 @@ impl PageContent { /// The cell pointers are arranged in key order with: /// - left-most cell (the cell with the smallest key) first and /// - the right-most cell (the cell with the largest key) last. - pub fn cell_get_raw_pointer_region(&self) -> (usize, usize) { - let cell_start = self.header_size(); - (self.offset + cell_start, self.cell_count() * 2) + pub fn cell_pointer_array_offset_and_size(&self) -> (usize, usize) { + let header_size = self.header_size(); + (self.offset + header_size, self.cell_pointer_array_size()) } /* Get region of a cell's payload */ From a2921bd32c3c4acc2af5a0f7bd77f1849e7b36da Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Tue, 24 Dec 2024 18:30:58 +0100 Subject: [PATCH 107/144] core: add checkpoint mode passive --- core/storage/pager.rs | 20 ++++++++++++-------- core/storage/wal.rs | 13 +++++++++++++ 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/core/storage/pager.rs b/core/storage/pager.rs index 0e0d0304c..503cd2900 100644 --- a/core/storage/pager.rs +++ b/core/storage/pager.rs @@ -4,7 +4,7 @@ use crate::storage::database::DatabaseStorage; use crate::storage::sqlite3_ondisk::{self, DatabaseHeader, PageContent}; use crate::storage::wal::Wal; use crate::{Buffer, Result}; -use log::{debug, trace}; +use log::trace; use std::cell::{RefCell, UnsafeCell}; use std::collections::HashSet; use std::rc::Rc; @@ -12,7 +12,7 @@ use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::{Arc, RwLock}; use super::page_cache::{DumbLruPageCache, PageCacheKey}; -use super::wal::CheckpointStatus; +use super::wal::{CheckpointMode, CheckpointStatus}; pub struct PageInner { pub flags: AtomicUsize, @@ -377,7 +377,11 @@ impl Pager { match state { CheckpointState::Checkpoint => { let in_flight = self.checkpoint_inflight.clone(); - match self.wal.borrow_mut().checkpoint(self, in_flight)? { + match self.wal.borrow_mut().checkpoint( + self, + in_flight, + CheckpointMode::Passive, + )? { CheckpointStatus::IO => return Ok(CheckpointStatus::IO), CheckpointStatus::Done => { self.checkpoint_state.replace(CheckpointState::SyncDbFile); @@ -413,11 +417,11 @@ impl Pager { // WARN: used for testing purposes pub fn clear_page_cache(&self) { loop { - match self - .wal - .borrow_mut() - .checkpoint(self, Rc::new(RefCell::new(0))) - { + match self.wal.borrow_mut().checkpoint( + self, + Rc::new(RefCell::new(0)), + CheckpointMode::Passive, + ) { Ok(CheckpointStatus::IO) => { self.io.run_once(); } diff --git a/core/storage/wal.rs b/core/storage/wal.rs index 5aaab2f75..3cdad9263 100644 --- a/core/storage/wal.rs +++ b/core/storage/wal.rs @@ -26,6 +26,13 @@ pub const NO_LOCK: u32 = 0; pub const SHARED_LOCK: u32 = 1; pub const WRITE_LOCK: u32 = 2; +pub enum CheckpointMode { + Passive, + Full, + Restart, + Truncate, +} + #[derive(Debug)] struct LimboRwLock { lock: AtomicU32, @@ -143,6 +150,7 @@ pub trait Wal { &mut self, pager: &Pager, write_counter: Rc>, + mode: CheckpointMode, ) -> Result; fn sync(&mut self) -> Result; fn get_max_frame(&self) -> u64; @@ -407,7 +415,12 @@ impl Wal for WalFile { &mut self, pager: &Pager, write_counter: Rc>, + mode: CheckpointMode, ) -> Result { + assert!( + matches!(mode, CheckpointMode::Passive), + "only passive mode supported for now" + ); 'checkpoint_loop: loop { let state = self.ongoing_checkpoint.state; log::debug!("checkpoint(state={:?})", state); From 5cd84a407f3e6eaf6efa8a91e4ea912404a8b083 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Tue, 24 Dec 2024 18:42:58 +0100 Subject: [PATCH 108/144] fmt --- sqlite3/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlite3/src/lib.rs b/sqlite3/src/lib.rs index b183648b2..6bd5b23d6 100644 --- a/sqlite3/src/lib.rs +++ b/sqlite3/src/lib.rs @@ -1033,7 +1033,7 @@ fn sqlite3_errstr_impl(rc: i32) -> *const std::ffi::c_char { "datatype mismatch", // SQLITE_MISMATCH "bad parameter or other API misuse", // SQLITE_MISUSE #[cfg(feature = "lfs")] - "", // SQLITE_NOLFS + "", // SQLITE_NOLFS #[cfg(not(feature = "lfs"))] "large file support is disabled", // SQLITE_NOLFS "authorization denied", // SQLITE_AUTH From 93e3b49f08d56223e411610ec34943e78e2a1118 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Wed, 25 Dec 2024 00:25:23 +0100 Subject: [PATCH 109/144] bench --- core/benches/benchmark.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/core/benches/benchmark.rs b/core/benches/benchmark.rs index ab1b0aa9a..0fe17d991 100644 --- a/core/benches/benchmark.rs +++ b/core/benches/benchmark.rs @@ -52,6 +52,9 @@ fn limbo_bench(criterion: &mut Criterion) { limbo_core::RowResult::Done => { unreachable!(); } + limbo_core::RowResult::Busy => { + unreachable!(); + } } stmt.reset(); }); @@ -77,6 +80,9 @@ fn limbo_bench(criterion: &mut Criterion) { limbo_core::RowResult::Done => { unreachable!(); } + limbo_core::RowResult::Busy => { + unreachable!() + } } stmt.reset(); }); @@ -103,6 +109,9 @@ fn limbo_bench(criterion: &mut Criterion) { limbo_core::RowResult::Done => { unreachable!(); } + limbo_core::RowResult::Busy => { + unreachable!() + } } stmt.reset(); }); From 28ae691bf7ec72c24b13d96025554863f3587321 Mon Sep 17 00:00:00 2001 From: alpaylan Date: Wed, 25 Dec 2024 03:04:57 -0500 Subject: [PATCH 110/144] switch the seed, database path, and plan path prints to println instead of log::info --- simulator/main.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/simulator/main.rs b/simulator/main.rs index 8a7317706..fb3892c4d 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -126,9 +126,9 @@ fn main() { std::fs::rename(&old_plan_path, &plan_path).unwrap(); } // Print the seed, the locations of the database and the plan file at the end again for easily accessing them. - log::info!("database path: {:?}", db_path); - log::info!("simulator plan path: {:?}", plan_path); - log::info!("seed: {}", seed); + println!("database path: {:?}", db_path); + println!("simulator plan path: {:?}", plan_path); + println!("seed: {}", seed); } fn run_simulation( From 652283efc1a550f6cf1ac1510864a055ca392936 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Wed, 25 Dec 2024 10:26:42 +0200 Subject: [PATCH 111/144] simulator: Kill dead code ...the old maybe_add_table() codepath as it is not used. --- simulator/main.rs | 103 +--------------------------------------- simulator/properties.rs | 78 ------------------------------ 2 files changed, 2 insertions(+), 179 deletions(-) delete mode 100644 simulator/properties.rs diff --git a/simulator/main.rs b/simulator/main.rs index fb3892c4d..2d72c81b8 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -1,9 +1,8 @@ use clap::Parser; use generation::plan::{Interaction, InteractionPlan, ResultSet}; -use generation::{pick_index, Arbitrary, ArbitraryFrom}; +use generation::{pick_index, ArbitraryFrom}; use limbo_core::{Connection, Database, Result, RowResult, IO}; -use model::query::{Create, Query}; -use model::table::{Column, Name, Table, Value}; +use model::table::Value; use rand::prelude::*; use rand_chacha::ChaCha8Rng; use runner::cli::SimulatorCLI; @@ -18,7 +17,6 @@ use tempfile::TempDir; mod generation; mod model; -mod properties; mod runner; #[allow(clippy::arc_with_non_send_sync)] @@ -289,100 +287,3 @@ fn compare_equal_rows(a: &[Vec], b: &[Vec]) { } } } - -fn maybe_add_table(env: &mut SimulatorEnv, conn: &mut Rc) -> Result<()> { - if env.tables.len() < env.opts.max_tables { - let table = Table { - rows: Vec::new(), - name: Name::arbitrary(&mut env.rng).0, - columns: (1..env.rng.gen_range(1..128)) - .map(|_| Column::arbitrary(&mut env.rng)) - .collect(), - }; - let query = Query::Create(Create { - table: table.clone(), - }); - let rows = get_all_rows(env, conn, query.to_string().as_str())?; - log::debug!("{:?}", rows); - let rows = get_all_rows( - env, - conn, - format!( - "SELECT sql FROM sqlite_schema WHERE type IN ('table', 'index') AND name = '{}';", - table.name - ) - .as_str(), - )?; - log::debug!("{:?}", rows); - assert!(rows.len() == 1); - let as_text = match &rows[0][0] { - Value::Text(t) => t, - _ => unreachable!(), - }; - assert!( - *as_text != query.to_string(), - "table was not inserted correctly" - ); - env.tables.push(table); - } - Ok(()) -} - -fn get_all_rows( - env: &mut SimulatorEnv, - conn: &mut Rc, - query: &str, -) -> Result>> { - log::info!("running query '{}'", &query[0..query.len().min(4096)]); - let mut out = Vec::new(); - let rows = conn.query(query); - if rows.is_err() { - let err = rows.err(); - log::error!( - "Error running query '{}': {:?}", - &query[0..query.len().min(4096)], - err - ); - return Err(err.unwrap()); - } - let rows = rows.unwrap(); - assert!(rows.is_some()); - let mut rows = rows.unwrap(); - 'rows_loop: loop { - env.io.inject_fault(env.rng.gen_ratio(1, 10000)); - match rows.next_row()? { - RowResult::Row(row) => { - let mut r = Vec::new(); - for el in &row.values { - let v = match el { - limbo_core::Value::Null => Value::Null, - limbo_core::Value::Integer(i) => Value::Integer(*i), - limbo_core::Value::Float(f) => Value::Float(*f), - limbo_core::Value::Text(t) => Value::Text(t.to_string()), - limbo_core::Value::Blob(b) => Value::Blob(b.to_vec()), - }; - r.push(v); - } - - out.push(r); - } - RowResult::IO => { - env.io.inject_fault(env.rng.gen_ratio(1, 10000)); - if env.io.run_once().is_err() { - log::info!("query inject fault"); - break 'rows_loop; - } - } - RowResult::Interrupt => { - break; - } - RowResult::Done => { - break; - } - RowResult::Busy => { - // for now let's retry? - } - } - } - Ok(out) -} diff --git a/simulator/properties.rs b/simulator/properties.rs deleted file mode 100644 index a6536d1d8..000000000 --- a/simulator/properties.rs +++ /dev/null @@ -1,78 +0,0 @@ -use std::rc::Rc; - -use limbo_core::Connection; -use rand::Rng; - -use crate::{ - compare_equal_rows, - generation::ArbitraryFrom, - get_all_rows, - model::{ - query::{Insert, Predicate, Query, Select}, - table::Value, - }, - SimulatorEnv, -}; - -pub fn property_insert_select(env: &mut SimulatorEnv, conn: &mut Rc) { - // Get a random table - let table = env.rng.gen_range(0..env.tables.len()); - - // Pick a random column - let column_index = env.rng.gen_range(0..env.tables[table].columns.len()); - let column = &env.tables[table].columns[column_index].clone(); - - let mut rng = env.rng.clone(); - - // Generate a random value of the column type - let value = Value::arbitrary_from(&mut rng, &column.column_type); - - // Create a whole new row - let mut row = Vec::new(); - for (i, column) in env.tables[table].columns.iter().enumerate() { - if i == column_index { - row.push(value.clone()); - } else { - let value = Value::arbitrary_from(&mut rng, &column.column_type); - row.push(value); - } - } - - // Insert the row - let query = Query::Insert(Insert { - table: env.tables[table].name.clone(), - values: row.clone(), - }); - let _ = get_all_rows(env, conn, query.to_string().as_str()).unwrap(); - // Shadow operation on the table - env.tables[table].rows.push(row.clone()); - - // Create a query that selects the row - let query = Query::Select(Select { - table: env.tables[table].name.clone(), - predicate: Predicate::Eq(column.name.clone(), value), - }); - - // Get all rows - let rows = get_all_rows(env, conn, query.to_string().as_str()).unwrap(); - - // Check that the row is there - assert!(rows.iter().any(|r| r == &row)); -} - -pub fn property_select_all(env: &mut SimulatorEnv, conn: &mut Rc) { - // Get a random table - let table = env.rng.gen_range(0..env.tables.len()); - - // Create a query that selects all rows - let query = Query::Select(Select { - table: env.tables[table].name.clone(), - predicate: Predicate::And(Vec::new()), - }); - - // Get all rows - let rows = get_all_rows(env, conn, query.to_string().as_str()).unwrap(); - - // Make sure the rows are the same - compare_equal_rows(&rows, &env.tables[table].rows); -} From 37e1f35df815c053d7d70eb676f0f254e26294c5 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Wed, 25 Dec 2024 11:54:16 +0200 Subject: [PATCH 112/144] Fix Cargo.toml in macros crate --- Cargo.lock | 10 +++++----- core/Cargo.toml | 2 +- core/vdbe/mod.rs | 2 +- macros/Cargo.toml | 10 +++++++++- 4 files changed, 16 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6053a3e93..0f034d89d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1138,8 +1138,8 @@ dependencies = [ "jsonb", "julian_day_converter", "libc", + "limbo_macros", "log", - "macros", "mimalloc", "mockall", "nix 0.29.0", @@ -1160,6 +1160,10 @@ dependencies = [ "uuid", ] +[[package]] +name = "limbo_macros" +version = "0.0.10" + [[package]] name = "limbo_sim" version = "0.0.10" @@ -1206,10 +1210,6 @@ version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" -[[package]] -name = "macros" -version = "0.0.0" - [[package]] name = "memchr" version = "2.7.4" diff --git a/core/Cargo.toml b/core/Cargo.toml index 4ef87b469..4d731eb2b 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -54,7 +54,7 @@ pest = { version = "2.0", optional = true } pest_derive = { version = "2.0", optional = true } rand = "0.8.5" bumpalo = { version = "3.16.0", features = ["collections", "boxed"] } -macros = { path = "../macros" } +limbo_macros = { path = "../macros" } uuid = { version = "1.11.0", features = ["v4", "v7"], optional = true } [target.'cfg(not(target_family = "windows"))'.dev-dependencies] diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index debabb067..427770155 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -40,7 +40,7 @@ use crate::util::parse_schema_rows; use crate::{function::JsonFunc, json::get_json, json::json_array}; use crate::{Connection, Result, TransactionState}; use crate::{Rows, DATABASE_VERSION}; -use macros::Description; +use limbo_macros::Description; use datetime::{exec_date, exec_time, exec_unixepoch}; diff --git a/macros/Cargo.toml b/macros/Cargo.toml index e3516da14..78a3805c6 100644 --- a/macros/Cargo.toml +++ b/macros/Cargo.toml @@ -1,5 +1,13 @@ +# Copyright 2024 the Limbo authors. All rights reserved. MIT license. + [package] -name = "macros" +name = "limbo_macros" +version.workspace = true +authors.workspace = true +edition.workspace = true +license.workspace = true +repository.workspace = true +description = "The Limbo database library" [lib] proc-macro = true From e49ba4f982c4bac4d852fa0e8c047a36ac721e5c Mon Sep 17 00:00:00 2001 From: alpaylan Date: Wed, 25 Dec 2024 09:55:28 -0500 Subject: [PATCH 113/144] fix empty range error when 0 interactions are produced by creating at least 1 interaction --- simulator/main.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/simulator/main.rs b/simulator/main.rs index fb3892c4d..49f738c56 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -149,8 +149,12 @@ fn run_simulation( (read_percent, write_percent, delete_percent) }; + if cli_opts.maximum_size < 1 { + panic!("maximum size must be at least 1"); + } + let opts = SimulatorOpts { - ticks: rng.gen_range(0..cli_opts.maximum_size), + ticks: rng.gen_range(1..=cli_opts.maximum_size), max_connections: 1, // TODO: for now let's use one connection as we didn't implement // correct transactions procesing max_tables: rng.gen_range(0..128), @@ -158,7 +162,7 @@ fn run_simulation( write_percent, delete_percent, page_size: 4096, // TODO: randomize this too - max_interactions: rng.gen_range(0..cli_opts.maximum_size), + max_interactions: rng.gen_range(1..=cli_opts.maximum_size), }; let io = Arc::new(SimulatorIO::new(seed, opts.page_size).unwrap()); From 51541dd8dc93ba2085988e572403c62220a13d78 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sat, 21 Dec 2024 14:46:38 +0200 Subject: [PATCH 114/144] fix issues with insert --- core/translate/insert.rs | 315 ++++++++++++++++++++++++++++----------- 1 file changed, 232 insertions(+), 83 deletions(-) diff --git a/core/translate/insert.rs b/core/translate/insert.rs index 614cde8b2..89665a726 100644 --- a/core/translate/insert.rs +++ b/core/translate/insert.rs @@ -2,10 +2,11 @@ use std::rc::Weak; use std::{cell::RefCell, ops::Deref, rc::Rc}; use sqlite3_parser::ast::{ - DistinctNames, InsertBody, QualifiedName, ResolveType, ResultColumn, With, + DistinctNames, Expr, InsertBody, QualifiedName, ResolveType, ResultColumn, With, }; use crate::error::SQLITE_CONSTRAINT_PRIMARYKEY; +use crate::util::normalize_ident; use crate::{ schema::{Schema, Table}, storage::sqlite3_ondisk::DatabaseHeader, @@ -14,13 +15,117 @@ use crate::{ }; use crate::{Connection, Result}; +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +/// Helper enum to indicate how a column is being inserted. +/// For example: +/// CREATE TABLE t (a, b, c, d); +/// INSERT INTO t (c, b) VALUES (1, 2); +/// +/// resolve_columns_for_insert() returns [ +/// ColumnToInsert::AutomaticNull, +/// ColumnToInsert::UserProvided { index_in_value_tuple: 1 }, +/// ColumnToInsert::UserProvided { index_in_value_tuple: 0 }, +/// ColumnToInsert::AutomaticNull, +/// ] +enum ColumnToInsert { + /// The column is provided by the user. + UserProvided { index_in_value_tuple: usize }, + /// The column is automatically set to NULL since it was not provided by the user. + AutomaticNull, +} + +/// Resolves how each column in a table should be populated during an INSERT. +/// For each column, determines whether it will: +/// 1. Use a user-provided value from the VALUES clause, or +/// 2. Be automatically set to NULL +/// +/// Two cases are handled: +/// 1. No column list specified in INSERT statement: +/// - Values are assigned to columns in table definition order +/// - If fewer values than columns, remaining columns are NULL +/// 2. Column list specified in INSERT statement: +/// - For specified columns, a ColumnToInsert::UserProvided entry is created. +/// - Any columns not listed are set to NULL, i.e. a ColumnToInsert::AutomaticNull entry is created. +/// +/// Returns a Vec with an entry for each column in the table, +/// indicating how that column should be populated. +fn resolve_columns_for_insert( + table: Rc

, + columns: &Option, + values: &[Vec], +) -> Result> { + assert!(table.has_rowid()); + if values.is_empty() { + crate::bail_parse_error!("no values to insert"); + } + + let num_cols_in_table = table.columns().len(); + + if columns.is_none() { + let num_cols = values[0].len(); + // ensure value tuples dont have more columns than the table + if num_cols > num_cols_in_table { + crate::bail_parse_error!( + "table {} has {} columns but {} values were supplied", + table.get_name(), + num_cols_in_table, + num_cols + ); + } + // ensure each value tuple has the same number of columns + for value in values.iter().skip(1) { + if value.len() != num_cols { + crate::bail_parse_error!("all VALUES must have the same number of terms"); + } + } + let columns: Vec = (0..num_cols_in_table) + .map(|i| { + if i < num_cols { + ColumnToInsert::UserProvided { + index_in_value_tuple: i, + } + } else { + ColumnToInsert::AutomaticNull + } + }) + .collect(); + return Ok(columns); + } + + // resolve the given columns to actual table column names and ensure they exist + let columns = columns.as_ref().unwrap(); + let mut resolved_columns: Vec = (0..num_cols_in_table) + .map(|i| ColumnToInsert::AutomaticNull) + .collect(); + for (index_in_value_tuple, column) in columns.iter().enumerate() { + let column_name = normalize_ident(column.0.as_str()); + let column_idx = table + .columns() + .iter() + .position(|c| c.name.eq_ignore_ascii_case(&column_name)); + if let Some(i) = column_idx { + resolved_columns[i] = ColumnToInsert::UserProvided { + index_in_value_tuple, + }; + } else { + crate::bail_parse_error!( + "table {} has no column named {}", + table.get_name(), + column_name + ); + } + } + + Ok(resolved_columns) +} + #[allow(clippy::too_many_arguments)] pub fn translate_insert( schema: &Schema, with: &Option, or_conflict: &Option, tbl_name: &QualifiedName, - _columns: &Option, + columns: &Option, body: &InsertBody, _returning: &Option>, database_header: Rc>, @@ -46,6 +151,10 @@ pub fn translate_insert( None => crate::bail_corrupt_error!("Parse error: no such table: {}", table_name), }; let table = Rc::new(Table::BTree(table)); + if !table.has_rowid() { + crate::bail_parse_error!("INSERT into WITHOUT ROWID table is not supported"); + } + let cursor_id = program.alloc_cursor_id( Some(table_name.0.clone()), Some(table.clone().deref().clone()), @@ -55,18 +164,44 @@ pub fn translate_insert( Table::Index(index) => index.root_page, Table::Pseudo(_) => todo!(), }; + let values = match body { + InsertBody::Select(select, None) => match &select.body.select { + sqlite3_parser::ast::OneSelect::Values(values) => values, + _ => todo!(), + }, + _ => todo!(), + }; - let mut num_cols = table.columns().len(); - if table.has_rowid() { - num_cols += 1; - } - // column_registers_start[0] == rowid if has rowid - let column_registers_start = program.alloc_registers(num_cols); + let columns = resolve_columns_for_insert(table.clone(), columns, values)?; + // Check if rowid was provided (through INTEGER PRIMARY KEY as a rowid alias) + let rowid_alias_index = table.columns().iter().position(|c| c.is_rowid_alias); + let has_user_provided_rowid = { + assert!(columns.len() == table.columns().len()); + if let Some(index) = rowid_alias_index { + matches!(columns[index], ColumnToInsert::UserProvided { .. }) + } else { + false + } + }; + + // allocate a register for each column in the table. if not provided by user, they will simply be set as null. + // allocate an extra register for rowid regardless of whether user provided a rowid alias column. + let num_cols = table.columns().len(); + let rowid_reg = program.alloc_registers(num_cols + 1); + let column_registers_start = rowid_reg + 1; + let rowid_alias_reg = { + if has_user_provided_rowid { + Some(column_registers_start + rowid_alias_index.unwrap()) + } else { + None + } + }; - // Coroutine for values let yield_reg = program.alloc_register(); let jump_on_definition_label = program.allocate_label(); { + // Coroutine for values + // TODO/efficiency: only use coroutine when there are multiple values to insert program.emit_insn_with_label_dependency( Insn::InitCoroutine { yield_reg, @@ -75,40 +210,41 @@ pub fn translate_insert( }, jump_on_definition_label, ); - match body { - InsertBody::Select(select, None) => match &select.body.select { - sqlite3_parser::ast::OneSelect::Select { - distinctness: _, - columns: _, - from: _, - where_clause: _, - group_by: _, - window_clause: _, - } => todo!(), - sqlite3_parser::ast::OneSelect::Values(values) => { - for value in values { - for (col, expr) in value.iter().enumerate() { - let mut col = col; - if table.has_rowid() { - col += 1; - } - translate_expr( - &mut program, - None, - expr, - column_registers_start + col, - None, - )?; - } - program.emit_insn(Insn::Yield { - yield_reg, - end_offset: 0, + + for value in values { + // Process each value according to resolved columns + for (i, column) in columns.iter().enumerate() { + match column { + ColumnToInsert::UserProvided { + index_in_value_tuple, + } => { + translate_expr( + &mut program, + None, + value.get(*index_in_value_tuple).expect( + format!( + "values tuple has no value for column {}", + table.column_index_to_name(i).unwrap() + ) + .as_str(), + ), + column_registers_start + i, + None, + )?; + } + ColumnToInsert::AutomaticNull => { + program.emit_insn(Insn::Null { + dest: column_registers_start + i, + dest_end: None, }); + program.mark_last_insn_constant(); } } - }, - InsertBody::DefaultValues => todo!("default values not yet supported"), - _ => todo!(), + } + program.emit_insn(Insn::Yield { + yield_reg, + end_offset: 0, + }); } program.emit_insn(Insn::EndCoroutine { yield_reg }); } @@ -121,6 +257,8 @@ pub fn translate_insert( program.emit_insn(Insn::OpenWriteAwait {}); // Main loop + // FIXME: rollback is not implemented. E.g. if you insert 2 rows and one fails to unique constraint violation, + // the other row will still be inserted. let record_register = program.alloc_register(); let halt_label = program.allocate_label(); let loop_start_offset = program.offset(); @@ -132,68 +270,79 @@ pub fn translate_insert( halt_label, ); - if table.has_rowid() { - let row_id_reg = column_registers_start; - if let Some(rowid_alias_column) = table.get_rowid_alias_column() { - let key_reg = column_registers_start + 1 + rowid_alias_column.0; - // copy key to rowid - program.emit_insn(Insn::Copy { - src_reg: key_reg, - dst_reg: row_id_reg, - amount: 0, - }); - program.emit_insn(Insn::SoftNull { reg: key_reg }); - } - - let notnull_label = program.allocate_label(); + let check_rowid_is_integer_label = rowid_alias_reg.and(Some(program.allocate_label())); + if let Some(reg) = rowid_alias_reg { + program.emit_insn(Insn::Copy { + src_reg: reg, + dst_reg: rowid_reg, + amount: 0, // TODO: rename 'amount' to something else; amount==0 means 1 + }); + // for the row record, the rowid alias column is always set to NULL + program.emit_insn(Insn::SoftNull { reg }); + // the user provided rowid value might itself be NULL. If it is, we create a new rowid on the next instruction. program.emit_insn_with_label_dependency( Insn::NotNull { - reg: row_id_reg, - target_pc: notnull_label, + reg: rowid_reg, + target_pc: check_rowid_is_integer_label.unwrap(), }, - notnull_label, + check_rowid_is_integer_label.unwrap(), ); - program.emit_insn(Insn::NewRowid { - cursor: cursor_id, - rowid_reg: row_id_reg, - prev_largest_reg: 0, - }); + } - program.resolve_label(notnull_label, program.offset()); - program.emit_insn(Insn::MustBeInt { reg: row_id_reg }); + // Create new rowid if a) not provided by user or b) provided by user but is NULL + program.emit_insn(Insn::NewRowid { + cursor: cursor_id, + rowid_reg: rowid_reg, + prev_largest_reg: 0, + }); + + if let Some(must_be_int_label) = check_rowid_is_integer_label { + program.resolve_label(must_be_int_label, program.offset()); + // If the user provided a rowid, it must be an integer. + program.emit_insn(Insn::MustBeInt { reg: rowid_reg }); + } + + // Check uniqueness constraint for rowid if it was provided by user. + // When the DB allocates it there are no need for separate uniqueness checks. + if has_user_provided_rowid { let make_record_label = program.allocate_label(); program.emit_insn_with_label_dependency( Insn::NotExists { cursor: cursor_id, - rowid_reg: row_id_reg, + rowid_reg: rowid_reg, target_pc: make_record_label, }, make_record_label, ); - // TODO: rollback + let rowid_column_name = if let Some(index) = rowid_alias_index { + table.column_index_to_name(index).unwrap() + } else { + "rowid" + }; + program.emit_insn(Insn::Halt { err_code: SQLITE_CONSTRAINT_PRIMARYKEY, - description: format!( - "{}.{}", - table.get_name(), - table.column_index_to_name(0).unwrap() - ), + description: format!("{}.{}", table.get_name(), rowid_column_name), }); + program.resolve_label(make_record_label, program.offset()); - program.emit_insn(Insn::MakeRecord { - start_reg: column_registers_start + 1, - count: num_cols - 1, - dest_reg: record_register, - }); - program.emit_insn(Insn::InsertAsync { - cursor: cursor_id, - key_reg: column_registers_start, - record_reg: record_register, - flag: 0, - }); - program.emit_insn(Insn::InsertAwait { cursor_id }); } + // Create and insert the record + program.emit_insn(Insn::MakeRecord { + start_reg: column_registers_start, + count: num_cols, + dest_reg: record_register, + }); + + program.emit_insn(Insn::InsertAsync { + cursor: cursor_id, + key_reg: rowid_reg, + record_reg: record_register, + flag: 0, + }); + program.emit_insn(Insn::InsertAwait { cursor_id }); + program.emit_insn(Insn::Goto { target_pc: loop_start_offset, }); From fa5ca68eec2f327434520c215833c3aad68f68b6 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sat, 21 Dec 2024 15:12:53 +0200 Subject: [PATCH 115/144] Add multi-row insert to simulator --- simulator/generation/plan.rs | 4 ++-- simulator/generation/query.rs | 13 +++++++++---- simulator/model/query.rs | 17 ++++++++++++----- 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/simulator/generation/plan.rs b/simulator/generation/plan.rs index 61b115f01..82c75c4e3 100644 --- a/simulator/generation/plan.rs +++ b/simulator/generation/plan.rs @@ -106,7 +106,7 @@ impl Interactions { .iter_mut() .find(|t| t.name == insert.table) .unwrap(); - table.rows.push(insert.values.clone()); + table.rows.extend(insert.values.clone()); } Query::Delete(_) => todo!(), Query::Select(_) => {} @@ -320,7 +320,7 @@ fn property_insert_select(rng: &mut R, env: &SimulatorEnv) -> Inte // Insert the row let insert_query = Interaction::Query(Query::Insert(Insert { table: table.name.clone(), - values: row.clone(), + values: vec![row.clone()], })); // Select the row diff --git a/simulator/generation/query.rs b/simulator/generation/query.rs index ca6926650..0ff9d44e1 100644 --- a/simulator/generation/query.rs +++ b/simulator/generation/query.rs @@ -37,10 +37,15 @@ impl ArbitraryFrom> for Select { impl ArbitraryFrom
for Insert { fn arbitrary_from(rng: &mut R, table: &Table) -> Self { - let values = table - .columns - .iter() - .map(|c| Value::arbitrary_from(rng, &c.column_type)) + let num_rows = rng.gen_range(1..10); + let values: Vec> = (0..num_rows) + .map(|_| { + table + .columns + .iter() + .map(|c| Value::arbitrary_from(rng, &c.column_type)) + .collect() + }) .collect(); Insert { table: table.name.clone(), diff --git a/simulator/model/query.rs b/simulator/model/query.rs index eeec68d08..7a12def8d 100644 --- a/simulator/model/query.rs +++ b/simulator/model/query.rs @@ -75,7 +75,7 @@ pub(crate) struct Select { #[derive(Clone, Debug, PartialEq)] pub(crate) struct Insert { pub(crate) table: String, - pub(crate) values: Vec, + pub(crate) values: Vec>, } #[derive(Clone, Debug, PartialEq)] @@ -104,14 +104,21 @@ impl Display for Query { predicate: guard, }) => write!(f, "SELECT * FROM {} WHERE {}", table, guard), Query::Insert(Insert { table, values }) => { - write!(f, "INSERT INTO {} VALUES (", table)?; - for (i, v) in values.iter().enumerate() { + write!(f, "INSERT INTO {} VALUES ", table)?; + for (i, row) in values.iter().enumerate() { if i != 0 { write!(f, ", ")?; } - write!(f, "{}", v)?; + write!(f, "(")?; + for (j, value) in row.iter().enumerate() { + if j != 0 { + write!(f, ", ")?; + } + write!(f, "{}", value)?; + } + write!(f, ")")?; } - write!(f, ")") + Ok(()) } Query::Delete(Delete { table, From c78a3e952a73854002a0b72b3a550ccc6064d73c Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sat, 21 Dec 2024 19:33:16 +0200 Subject: [PATCH 116/144] clean up implementation --- core/translate/insert.rs | 173 +++++++++++++++++++-------------------- 1 file changed, 83 insertions(+), 90 deletions(-) diff --git a/core/translate/insert.rs b/core/translate/insert.rs index 89665a726..aee6e0ec4 100644 --- a/core/translate/insert.rs +++ b/core/translate/insert.rs @@ -8,115 +8,109 @@ use sqlite3_parser::ast::{ use crate::error::SQLITE_CONSTRAINT_PRIMARYKEY; use crate::util::normalize_ident; use crate::{ - schema::{Schema, Table}, + schema::{Column, Schema, Table}, storage::sqlite3_ondisk::DatabaseHeader, translate::expr::translate_expr, vdbe::{builder::ProgramBuilder, Insn, Program}, }; use crate::{Connection, Result}; -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -/// Helper enum to indicate how a column is being inserted. -/// For example: -/// CREATE TABLE t (a, b, c, d); -/// INSERT INTO t (c, b) VALUES (1, 2); -/// -/// resolve_columns_for_insert() returns [ -/// ColumnToInsert::AutomaticNull, -/// ColumnToInsert::UserProvided { index_in_value_tuple: 1 }, -/// ColumnToInsert::UserProvided { index_in_value_tuple: 0 }, -/// ColumnToInsert::AutomaticNull, -/// ] -enum ColumnToInsert { - /// The column is provided by the user. - UserProvided { index_in_value_tuple: usize }, - /// The column is automatically set to NULL since it was not provided by the user. - AutomaticNull, +#[derive(Debug)] +/// Represents how a column should be populated during an INSERT. +/// Contains both the column definition and optionally the index into the VALUES tuple. +struct ColumnMapping<'a> { + /// Reference to the column definition from the table schema + column: &'a Column, + /// If Some(i), use the i-th value from the VALUES tuple + /// If None, use NULL (column was not specified in INSERT statement) + value_index: Option, } /// Resolves how each column in a table should be populated during an INSERT. -/// For each column, determines whether it will: -/// 1. Use a user-provided value from the VALUES clause, or -/// 2. Be automatically set to NULL +/// Returns a Vec of ColumnMapping, one for each column in the table's schema. +/// +/// For each column, specifies: +/// 1. The column definition (type, constraints, etc) +/// 2. Where to get the value from: +/// - Some(i) -> use i-th value from the VALUES tuple +/// - None -> use NULL (column wasn't specified in INSERT) /// /// Two cases are handled: -/// 1. No column list specified in INSERT statement: +/// 1. No column list specified (INSERT INTO t VALUES ...): /// - Values are assigned to columns in table definition order -/// - If fewer values than columns, remaining columns are NULL -/// 2. Column list specified in INSERT statement: -/// - For specified columns, a ColumnToInsert::UserProvided entry is created. -/// - Any columns not listed are set to NULL, i.e. a ColumnToInsert::AutomaticNull entry is created. -/// -/// Returns a Vec with an entry for each column in the table, -/// indicating how that column should be populated. -fn resolve_columns_for_insert( - table: Rc
, +/// - If fewer values than columns, remaining columns map to None +/// 2. Column list specified (INSERT INTO t (col1, col3) VALUES ...): +/// - Named columns map to their corresponding value index +/// - Unspecified columns map to None +fn resolve_columns_for_insert<'a>( + table: &'a Table, columns: &Option, values: &[Vec], -) -> Result> { - assert!(table.has_rowid()); +) -> Result>> { if values.is_empty() { crate::bail_parse_error!("no values to insert"); } - let num_cols_in_table = table.columns().len(); + let table_columns = table.columns(); + // Case 1: No columns specified - map values to columns in order if columns.is_none() { - let num_cols = values[0].len(); - // ensure value tuples dont have more columns than the table - if num_cols > num_cols_in_table { + let num_values = values[0].len(); + if num_values > table_columns.len() { crate::bail_parse_error!( "table {} has {} columns but {} values were supplied", table.get_name(), - num_cols_in_table, - num_cols + table_columns.len(), + num_values ); } - // ensure each value tuple has the same number of columns + + // Verify all value tuples have same length for value in values.iter().skip(1) { - if value.len() != num_cols { + if value.len() != num_values { crate::bail_parse_error!("all VALUES must have the same number of terms"); } } - let columns: Vec = (0..num_cols_in_table) - .map(|i| { - if i < num_cols { - ColumnToInsert::UserProvided { - index_in_value_tuple: i, - } - } else { - ColumnToInsert::AutomaticNull - } + + // Map each column to either its corresponding value index or None + return Ok(table_columns + .iter() + .enumerate() + .map(|(i, col)| ColumnMapping { + column: col, + value_index: if i < num_values { Some(i) } else { None }, }) - .collect(); - return Ok(columns); + .collect()); } - // resolve the given columns to actual table column names and ensure they exist - let columns = columns.as_ref().unwrap(); - let mut resolved_columns: Vec = (0..num_cols_in_table) - .map(|i| ColumnToInsert::AutomaticNull) + // Case 2: Columns specified - map named columns to their values + let mut mappings: Vec<_> = table_columns + .iter() + .map(|col| ColumnMapping { + column: col, + value_index: None, + }) .collect(); - for (index_in_value_tuple, column) in columns.iter().enumerate() { - let column_name = normalize_ident(column.0.as_str()); - let column_idx = table - .columns() + + // Map each named column to its value index + for (value_index, column_name) in columns.as_ref().unwrap().iter().enumerate() { + let column_name = normalize_ident(column_name.0.as_str()); + let table_index = table_columns .iter() .position(|c| c.name.eq_ignore_ascii_case(&column_name)); - if let Some(i) = column_idx { - resolved_columns[i] = ColumnToInsert::UserProvided { - index_in_value_tuple, - }; - } else { + + if table_index.is_none() { crate::bail_parse_error!( "table {} has no column named {}", table.get_name(), column_name ); } + + mappings[table_index.unwrap()].value_index = Some(value_index); } - Ok(resolved_columns) + Ok(mappings) } #[allow(clippy::too_many_arguments)] @@ -172,13 +166,13 @@ pub fn translate_insert( _ => todo!(), }; - let columns = resolve_columns_for_insert(table.clone(), columns, values)?; + let column_mappings = resolve_columns_for_insert(&table, columns, values)?; // Check if rowid was provided (through INTEGER PRIMARY KEY as a rowid alias) let rowid_alias_index = table.columns().iter().position(|c| c.is_rowid_alias); let has_user_provided_rowid = { - assert!(columns.len() == table.columns().len()); + assert!(column_mappings.len() == table.columns().len()); if let Some(index) = rowid_alias_index { - matches!(columns[index], ColumnToInsert::UserProvided { .. }) + column_mappings[index].value_index.is_some() } else { false } @@ -213,31 +207,30 @@ pub fn translate_insert( for value in values { // Process each value according to resolved columns - for (i, column) in columns.iter().enumerate() { - match column { - ColumnToInsert::UserProvided { - index_in_value_tuple, - } => { - translate_expr( - &mut program, - None, - value.get(*index_in_value_tuple).expect( - format!( - "values tuple has no value for column {}", - table.column_index_to_name(i).unwrap() - ) - .as_str(), - ), - column_registers_start + i, - None, - )?; - } - ColumnToInsert::AutomaticNull => { + for (i, mapping) in column_mappings.iter().enumerate() { + let target_reg = column_registers_start + i; + + if let Some(value_index) = mapping.value_index { + // Column has a value in the VALUES tuple + translate_expr( + &mut program, + None, + value.get(value_index).expect("value index out of bounds"), + target_reg, + None, + )?; + } else { + // Column was not specified - use NULL if it is nullable, otherwise error. + // Rowid alias columns can be NULL because we will autogenerate a rowid in that case. + let is_nullable = !mapping.column.primary_key || mapping.column.is_rowid_alias; + if is_nullable { program.emit_insn(Insn::Null { - dest: column_registers_start + i, + dest: target_reg, dest_end: None, }); program.mark_last_insn_constant(); + } else { + crate::bail_parse_error!("column {} is not nullable", mapping.column.name); } } } From 050b8744eaabe0e7914624869ff7c2c508e8a5a0 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sat, 21 Dec 2024 23:21:48 +0200 Subject: [PATCH 117/144] Dont use coroutine when inserting a single row --- core/translate/insert.rs | 190 ++++++++++++++++++++++++++------------- 1 file changed, 127 insertions(+), 63 deletions(-) diff --git a/core/translate/insert.rs b/core/translate/insert.rs index aee6e0ec4..bd9caae4c 100644 --- a/core/translate/insert.rs +++ b/core/translate/insert.rs @@ -113,6 +113,58 @@ fn resolve_columns_for_insert<'a>( Ok(mappings) } +/// Populates the column registers with values for a single row +fn populate_column_registers( + program: &mut ProgramBuilder, + value: &[Expr], + column_mappings: &[ColumnMapping], + column_registers_start: usize, + inserting_multiple_rows: bool, + rowid_reg: usize, +) -> Result<()> { + for (i, mapping) in column_mappings.iter().enumerate() { + let target_reg = column_registers_start + i; + + // Column has a value in the VALUES tuple + if let Some(value_index) = mapping.value_index { + // When inserting a single row, SQLite writes the value provided for the rowid alias column (INTEGER PRIMARY KEY) + // directly into the rowid register and writes a NULL into the rowid alias column. Not sure why this only happens + // in the single row case, but let's copy it. + let write_directly_to_rowid_reg = + mapping.column.is_rowid_alias && !inserting_multiple_rows; + let reg = if write_directly_to_rowid_reg { + rowid_reg + } else { + target_reg + }; + translate_expr( + program, + None, + value.get(value_index).expect("value index out of bounds"), + reg, + None, + )?; + if write_directly_to_rowid_reg { + program.emit_insn(Insn::SoftNull { reg: target_reg }); + } + } else { + // Column was not specified - use NULL if it is nullable, otherwise error + // Rowid alias columns can be NULL because we will autogenerate a rowid in that case. + let is_nullable = !mapping.column.primary_key || mapping.column.is_rowid_alias; + if is_nullable { + program.emit_insn(Insn::Null { + dest: target_reg, + dest_end: None, + }); + program.mark_last_insn_constant(); + } else { + crate::bail_parse_error!("column {} is not nullable", mapping.column.name); + } + } + } + Ok(()) +} + #[allow(clippy::too_many_arguments)] pub fn translate_insert( schema: &Schema, @@ -191,11 +243,16 @@ pub fn translate_insert( } }; - let yield_reg = program.alloc_register(); - let jump_on_definition_label = program.allocate_label(); - { - // Coroutine for values - // TODO/efficiency: only use coroutine when there are multiple values to insert + let record_register = program.alloc_register(); + let halt_label = program.allocate_label(); + let mut loop_start_offset = 0; + + let inserting_multiple_rows = values.len() > 1; + + // Multiple rows - use coroutine for value population + if inserting_multiple_rows { + let yield_reg = program.alloc_register(); + let jump_on_definition_label = program.allocate_label(); program.emit_insn_with_label_dependency( Insn::InitCoroutine { yield_reg, @@ -206,72 +263,75 @@ pub fn translate_insert( ); for value in values { - // Process each value according to resolved columns - for (i, mapping) in column_mappings.iter().enumerate() { - let target_reg = column_registers_start + i; - - if let Some(value_index) = mapping.value_index { - // Column has a value in the VALUES tuple - translate_expr( - &mut program, - None, - value.get(value_index).expect("value index out of bounds"), - target_reg, - None, - )?; - } else { - // Column was not specified - use NULL if it is nullable, otherwise error. - // Rowid alias columns can be NULL because we will autogenerate a rowid in that case. - let is_nullable = !mapping.column.primary_key || mapping.column.is_rowid_alias; - if is_nullable { - program.emit_insn(Insn::Null { - dest: target_reg, - dest_end: None, - }); - program.mark_last_insn_constant(); - } else { - crate::bail_parse_error!("column {} is not nullable", mapping.column.name); - } - } - } + populate_column_registers( + &mut program, + value, + &column_mappings, + column_registers_start, + true, + rowid_reg, + )?; program.emit_insn(Insn::Yield { yield_reg, end_offset: 0, }); } program.emit_insn(Insn::EndCoroutine { yield_reg }); - } + program.resolve_label(jump_on_definition_label, program.offset()); - program.resolve_label(jump_on_definition_label, program.offset()); - program.emit_insn(Insn::OpenWriteAsync { - cursor_id, - root_page, - }); - program.emit_insn(Insn::OpenWriteAwait {}); + program.emit_insn(Insn::OpenWriteAsync { + cursor_id, + root_page, + }); + program.emit_insn(Insn::OpenWriteAwait {}); - // Main loop - // FIXME: rollback is not implemented. E.g. if you insert 2 rows and one fails to unique constraint violation, - // the other row will still be inserted. - let record_register = program.alloc_register(); - let halt_label = program.allocate_label(); - let loop_start_offset = program.offset(); - program.emit_insn_with_label_dependency( - Insn::Yield { - yield_reg, - end_offset: halt_label, - }, - halt_label, - ); + // Main loop + // FIXME: rollback is not implemented. E.g. if you insert 2 rows and one fails to unique constraint violation, + // the other row will still be inserted. + loop_start_offset = program.offset(); + program.emit_insn_with_label_dependency( + Insn::Yield { + yield_reg, + end_offset: halt_label, + }, + halt_label, + ); + } else { + // Single row - populate registers directly + program.emit_insn(Insn::OpenWriteAsync { + cursor_id, + root_page, + }); + program.emit_insn(Insn::OpenWriteAwait {}); + + populate_column_registers( + &mut program, + &values[0], + &column_mappings, + column_registers_start, + false, + rowid_reg, + )?; + } + // Common record insertion logic for both single and multiple rows let check_rowid_is_integer_label = rowid_alias_reg.and(Some(program.allocate_label())); if let Some(reg) = rowid_alias_reg { - program.emit_insn(Insn::Copy { - src_reg: reg, - dst_reg: rowid_reg, - amount: 0, // TODO: rename 'amount' to something else; amount==0 means 1 - }); - // for the row record, the rowid alias column is always set to NULL - program.emit_insn(Insn::SoftNull { reg }); + // for the row record, the rowid alias column (INTEGER PRIMARY KEY) is always set to NULL + // and its value is copied to the rowid register. in the case where a single row is inserted, + // the value is written directly to the rowid register (see populate_column_registers()). + // again, not sure why this only happens in the single row case, but let's mimic sqlite. + // in the single row case we save a Copy instruction, but in the multiple rows case we do + // it here in the loop. + if inserting_multiple_rows { + program.emit_insn(Insn::Copy { + src_reg: reg, + dst_reg: rowid_reg, + amount: 0, // TODO: rename 'amount' to something else; amount==0 means 1 + }); + // for the row record, the rowid alias column is always set to NULL + program.emit_insn(Insn::SoftNull { reg }); + } // the user provided rowid value might itself be NULL. If it is, we create a new rowid on the next instruction. program.emit_insn_with_label_dependency( Insn::NotNull { @@ -336,15 +396,19 @@ pub fn translate_insert( }); program.emit_insn(Insn::InsertAwait { cursor_id }); - program.emit_insn(Insn::Goto { - target_pc: loop_start_offset, - }); + if inserting_multiple_rows { + // For multiple rows, loop back + program.emit_insn(Insn::Goto { + target_pc: loop_start_offset, + }); + } program.resolve_label(halt_label, program.offset()); program.emit_insn(Insn::Halt { err_code: 0, description: String::new(), }); + program.resolve_label(init_label, program.offset()); program.emit_insn(Insn::Transaction { write: true }); program.emit_constant_insns(); From c4e2a344ae2462008b20900ab002d2b719b0a871 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sat, 21 Dec 2024 23:44:41 +0200 Subject: [PATCH 118/144] parse error instead of assert! for unsupported features --- core/translate/insert.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/core/translate/insert.rs b/core/translate/insert.rs index bd9caae4c..12c9ed016 100644 --- a/core/translate/insert.rs +++ b/core/translate/insert.rs @@ -169,7 +169,7 @@ fn populate_column_registers( pub fn translate_insert( schema: &Schema, with: &Option, - or_conflict: &Option, + on_conflict: &Option, tbl_name: &QualifiedName, columns: &Option, body: &InsertBody, @@ -177,8 +177,12 @@ pub fn translate_insert( database_header: Rc>, connection: Weak, ) -> Result { - assert!(with.is_none()); - assert!(or_conflict.is_none()); + if with.is_some() { + crate::bail_parse_error!("WITH clause is not supported"); + } + if on_conflict.is_some() { + crate::bail_parse_error!("ON CONFLICT clause is not supported"); + } let mut program = ProgramBuilder::new(); let init_label = program.allocate_label(); program.emit_insn_with_label_dependency( From 78da71c72a8e427ad1866d32cce523e34d223353 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Wed, 25 Dec 2024 22:08:11 +0200 Subject: [PATCH 119/144] encode integers with proper varint types --- core/types.rs | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/core/types.rs b/core/types.rs index 5f1b55d7b..ec86efe93 100644 --- a/core/types.rs +++ b/core/types.rs @@ -394,11 +394,20 @@ impl OwnedRecord { pub fn serialize(&self, buf: &mut Vec) { let initial_i = buf.len(); + let mut serial_types = Vec::with_capacity(self.values.len()); + // First pass: calculate serial types and store them for value in &self.values { let serial_type = match value { OwnedValue::Null => 0, - OwnedValue::Integer(_) => 6, // for now let's only do i64 + OwnedValue::Integer(i) => match i { + i if *i >= -128 && *i <= 127 => 1, // 8-bit + i if *i >= -32768 && *i <= 32767 => 2, // 16-bit + i if *i >= -8388608 && *i <= 8388607 => 3, // 24-bit + i if *i >= -2147483648 && *i <= 2147483647 => 4, // 32-bit + i if *i >= -140737488355328 && *i <= 140737488355327 => 5, // 48-bit + _ => 6, // 64-bit + }, OwnedValue::Float(_) => 7, OwnedValue::Text(t) => (t.value.len() * 2 + 13) as u64, OwnedValue::Blob(b) => (b.len() * 2 + 12) as u64, @@ -411,15 +420,24 @@ impl OwnedRecord { let len = buf.len(); let n = write_varint(&mut buf[len - 9..], serial_type); buf.truncate(buf.len() - 9 + n); // Remove unused bytes + + serial_types.push(serial_type); } let mut header_size = buf.len() - initial_i; // write content - for value in &self.values { - // TODO: make integers and floats with smaller serial types + for (value, &serial_type) in self.values.iter().zip(serial_types.iter()) { match value { OwnedValue::Null => {} - OwnedValue::Integer(i) => buf.extend_from_slice(&i.to_be_bytes()), + OwnedValue::Integer(i) => match serial_type { + 1 => buf.extend_from_slice(&(*i as i8).to_be_bytes()), + 2 => buf.extend_from_slice(&(*i as i16).to_be_bytes()), + 3 => buf.extend_from_slice(&(*i as i32).to_be_bytes()[1..]), + 4 => buf.extend_from_slice(&(*i as i32).to_be_bytes()), + 5 => buf.extend_from_slice(&i.to_be_bytes()[2..]), + 6 => buf.extend_from_slice(&i.to_be_bytes()), + _ => unreachable!(), + }, OwnedValue::Float(f) => buf.extend_from_slice(&f.to_be_bytes()), OwnedValue::Text(t) => buf.extend_from_slice(t.value.as_bytes()), OwnedValue::Blob(b) => buf.extend_from_slice(b), From 6bf1ab7726fe06dd4879036314b8f470d907a070 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Wed, 25 Dec 2024 22:34:34 +0200 Subject: [PATCH 120/144] add consts for integer lo/hi values and serial types --- core/types.rs | 51 ++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 15 deletions(-) diff --git a/core/types.rs b/core/types.rs index ec86efe93..ad201572e 100644 --- a/core/types.rs +++ b/core/types.rs @@ -387,6 +387,27 @@ pub struct OwnedRecord { pub values: Vec, } +const I8_LOW: i64 = -128; +const I8_HIGH: i64 = 127; +const I16_LOW: i64 = -32768; +const I16_HIGH: i64 = 32767; +const I24_LOW: i64 = -8388608; +const I24_HIGH: i64 = 8388607; +const I32_LOW: i64 = -2147483648; +const I32_HIGH: i64 = 2147483647; +const I48_LOW: i64 = -140737488355328; +const I48_HIGH: i64 = 140737488355327; + +// https://www.sqlite.org/fileformat.html#record_format +const SERIAL_TYPE_INTEGER_ZERO: u64 = 0; +const SERIAL_TYPE_I8: u64 = 1; +const SERIAL_TYPE_I16: u64 = 2; +const SERIAL_TYPE_I24: u64 = 3; +const SERIAL_TYPE_I32: u64 = 4; +const SERIAL_TYPE_I48: u64 = 5; +const SERIAL_TYPE_I64: u64 = 6; +const SERIAL_TYPE_F64: u64 = 7; + impl OwnedRecord { pub fn new(values: Vec) -> Self { Self { values } @@ -399,16 +420,16 @@ impl OwnedRecord { // First pass: calculate serial types and store them for value in &self.values { let serial_type = match value { - OwnedValue::Null => 0, + OwnedValue::Null => SERIAL_TYPE_INTEGER_ZERO, OwnedValue::Integer(i) => match i { - i if *i >= -128 && *i <= 127 => 1, // 8-bit - i if *i >= -32768 && *i <= 32767 => 2, // 16-bit - i if *i >= -8388608 && *i <= 8388607 => 3, // 24-bit - i if *i >= -2147483648 && *i <= 2147483647 => 4, // 32-bit - i if *i >= -140737488355328 && *i <= 140737488355327 => 5, // 48-bit - _ => 6, // 64-bit + i if *i >= I8_LOW && *i <= I8_HIGH => SERIAL_TYPE_I8, + i if *i >= I16_LOW && *i <= I16_HIGH => SERIAL_TYPE_I16, + i if *i >= I24_LOW && *i <= I24_HIGH => SERIAL_TYPE_I24, + i if *i >= I32_LOW && *i <= I32_HIGH => SERIAL_TYPE_I32, + i if *i >= I48_LOW && *i <= I48_HIGH => SERIAL_TYPE_I48, + _ => SERIAL_TYPE_I64, }, - OwnedValue::Float(_) => 7, + OwnedValue::Float(_) => SERIAL_TYPE_F64, OwnedValue::Text(t) => (t.value.len() * 2 + 13) as u64, OwnedValue::Blob(b) => (b.len() * 2 + 12) as u64, // not serializable values @@ -416,7 +437,7 @@ impl OwnedRecord { OwnedValue::Record(_) => unreachable!(), }; - buf.resize(buf.len() + 9, 0); // Ensure space for varint + buf.resize(buf.len() + 9, 0); // Ensure space for varint (1-9 bytes in length) let len = buf.len(); let n = write_varint(&mut buf[len - 9..], serial_type); buf.truncate(buf.len() - 9 + n); // Remove unused bytes @@ -430,12 +451,12 @@ impl OwnedRecord { match value { OwnedValue::Null => {} OwnedValue::Integer(i) => match serial_type { - 1 => buf.extend_from_slice(&(*i as i8).to_be_bytes()), - 2 => buf.extend_from_slice(&(*i as i16).to_be_bytes()), - 3 => buf.extend_from_slice(&(*i as i32).to_be_bytes()[1..]), - 4 => buf.extend_from_slice(&(*i as i32).to_be_bytes()), - 5 => buf.extend_from_slice(&i.to_be_bytes()[2..]), - 6 => buf.extend_from_slice(&i.to_be_bytes()), + SERIAL_TYPE_I8 => buf.extend_from_slice(&(*i as i8).to_be_bytes()), + SERIAL_TYPE_I16 => buf.extend_from_slice(&(*i as i16).to_be_bytes()), + SERIAL_TYPE_I24 => buf.extend_from_slice(&(*i as i32).to_be_bytes()[1..]), // remove most significant byte + SERIAL_TYPE_I32 => buf.extend_from_slice(&(*i as i32).to_be_bytes()), + SERIAL_TYPE_I48 => buf.extend_from_slice(&i.to_be_bytes()[2..]), // remove 2 most significant bytes + SERIAL_TYPE_I64 => buf.extend_from_slice(&i.to_be_bytes()), _ => unreachable!(), }, OwnedValue::Float(f) => buf.extend_from_slice(&f.to_be_bytes()), From 381335724a12deb1f3033bc2428cb46633a61c1f Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Wed, 25 Dec 2024 22:57:55 +0200 Subject: [PATCH 121/144] add tests for serialize() --- core/types.rs | 203 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 203 insertions(+) diff --git a/core/types.rs b/core/types.rs index ad201572e..20f1943ab 100644 --- a/core/types.rs +++ b/core/types.rs @@ -528,3 +528,206 @@ pub trait Cursor { fn get_null_flag(&self) -> bool; fn btree_create(&mut self, flags: usize) -> u32; } + +#[cfg(test)] +mod tests { + use super::*; + use std::rc::Rc; + + #[test] + fn test_serialize_null() { + let record = OwnedRecord::new(vec![OwnedValue::Null]); + let mut buf = Vec::new(); + record.serialize(&mut buf); + + let header_length = record.values.len() + 1; + let header = &buf[0..header_length]; + // First byte should be header size + assert_eq!(header[0], header_length as u8); + // Second byte should be serial type for NULL + assert_eq!(header[1], SERIAL_TYPE_INTEGER_ZERO as u8); + // Check that the buffer is empty after the header + assert_eq!(buf.len(), header_length); + } + + #[test] + fn test_serialize_integers() { + let record = OwnedRecord::new(vec![ + OwnedValue::Integer(42), // Should use SERIAL_TYPE_I8 + OwnedValue::Integer(1000), // Should use SERIAL_TYPE_I16 + OwnedValue::Integer(1_000_000), // Should use SERIAL_TYPE_I24 + OwnedValue::Integer(1_000_000_000), // Should use SERIAL_TYPE_I32 + OwnedValue::Integer(1_000_000_000_000), // Should use SERIAL_TYPE_I48 + OwnedValue::Integer(i64::MAX), // Should use SERIAL_TYPE_I64 + ]); + let mut buf = Vec::new(); + record.serialize(&mut buf); + + let header_length = record.values.len() + 1; + let header = &buf[0..header_length]; + // First byte should be header size + assert!(header[0] == header_length as u8); // Header should be larger than number of values + + // Check that correct serial types were chosen + assert_eq!(header[1], SERIAL_TYPE_I8 as u8); + assert_eq!(header[2], SERIAL_TYPE_I16 as u8); + assert_eq!(header[3], SERIAL_TYPE_I24 as u8); + assert_eq!(header[4], SERIAL_TYPE_I32 as u8); + assert_eq!(header[5], SERIAL_TYPE_I48 as u8); + assert_eq!(header[6], SERIAL_TYPE_I64 as u8); + + // test that the bytes after the header can be interpreted as the correct values + let mut cur_offset = header_length; + let i8_bytes = &buf[cur_offset..cur_offset + size_of::()]; + cur_offset += size_of::(); + let i16_bytes = &buf[cur_offset..cur_offset + size_of::()]; + cur_offset += size_of::(); + let i24_bytes = &buf[cur_offset..cur_offset + size_of::() - 1]; + cur_offset += size_of::() - 1; // i24 + let i32_bytes = &buf[cur_offset..cur_offset + size_of::()]; + cur_offset += size_of::(); + let i48_bytes = &buf[cur_offset..cur_offset + size_of::() - 2]; + cur_offset += size_of::() - 2; // i48 + let i64_bytes = &buf[cur_offset..cur_offset + size_of::()]; + + let val_int8 = i8::from_be_bytes(i8_bytes.try_into().unwrap()); + let val_int16 = i16::from_be_bytes(i16_bytes.try_into().unwrap()); + + let mut leading_0 = vec![0]; + leading_0.extend(i24_bytes); + let val_int24 = i32::from_be_bytes(leading_0.try_into().unwrap()); + + let val_int32 = i32::from_be_bytes(i32_bytes.try_into().unwrap()); + + let mut leading_00 = vec![0, 0]; + leading_00.extend(i48_bytes); + let val_int48 = i64::from_be_bytes(leading_00.try_into().unwrap()); + + let val_int64 = i64::from_be_bytes(i64_bytes.try_into().unwrap()); + + assert_eq!(val_int8, 42); + assert_eq!(val_int16, 1000); + assert_eq!(val_int24, 1_000_000); + assert_eq!(val_int32, 1_000_000_000); + assert_eq!(val_int48, 1_000_000_000_000); + assert_eq!(val_int64, i64::MAX); + + // assert correct size of buffer: header + values (bytes per value depends on serial type) + assert_eq!( + buf.len(), + header_length + + size_of::() + + size_of::() + + (size_of::() - 1) // i24 + + size_of::() + + (size_of::() - 2) // i48 + + size_of::() + ); + } + + #[test] + fn test_serialize_float() { + let record = OwnedRecord::new(vec![OwnedValue::Float(3.14159)]); + let mut buf = Vec::new(); + record.serialize(&mut buf); + + let header_length = record.values.len() + 1; + let header = &buf[0..header_length]; + // First byte should be header size + assert_eq!(header[0], header_length as u8); + // Second byte should be serial type for FLOAT + assert_eq!(header[1], SERIAL_TYPE_F64 as u8); + // Check that the bytes after the header can be interpreted as the float + let float_bytes = &buf[header_length..header_length + size_of::()]; + let float = f64::from_be_bytes(float_bytes.try_into().unwrap()); + assert_eq!(float, 3.14159); + // Check that buffer length is correct + assert_eq!(buf.len(), header_length + size_of::()); + } + + #[test] + fn test_serialize_text() { + let text = Rc::new("hello".to_string()); + let record = OwnedRecord::new(vec![OwnedValue::Text(LimboText::new(text.clone()))]); + let mut buf = Vec::new(); + record.serialize(&mut buf); + + let header_length = record.values.len() + 1; + let header = &buf[0..header_length]; + // First byte should be header size + assert_eq!(header[0], header_length as u8); + // Second byte should be serial type for TEXT, which is (len * 2 + 13) + assert_eq!(header[1], (5 * 2 + 13) as u8); + // Check the actual text bytes + assert_eq!(&buf[2..7], b"hello"); + // Check that buffer length is correct + assert_eq!(buf.len(), header_length + text.len()); + } + + #[test] + fn test_serialize_blob() { + let blob = Rc::new(vec![1, 2, 3, 4, 5]); + let record = OwnedRecord::new(vec![OwnedValue::Blob(blob.clone())]); + let mut buf = Vec::new(); + record.serialize(&mut buf); + + let header_length = record.values.len() + 1; + let header = &buf[0..header_length]; + // First byte should be header size + assert_eq!(header[0], header_length as u8); + // Second byte should be serial type for BLOB, which is (len * 2 + 12) + assert_eq!(header[1], (5 * 2 + 12) as u8); + // Check the actual blob bytes + assert_eq!(&buf[2..7], &[1, 2, 3, 4, 5]); + // Check that buffer length is correct + assert_eq!(buf.len(), header_length + blob.len()); + } + + #[test] + fn test_serialize_mixed_types() { + let text = Rc::new("test".to_string()); + let record = OwnedRecord::new(vec![ + OwnedValue::Null, + OwnedValue::Integer(42), + OwnedValue::Float(3.14), + OwnedValue::Text(LimboText::new(text.clone())), + ]); + let mut buf = Vec::new(); + record.serialize(&mut buf); + + let header_length = record.values.len() + 1; + let header = &buf[0..header_length]; + // First byte should be header size + assert_eq!(header[0], header_length as u8); + // Second byte should be serial type for NULL + assert_eq!(header[1], SERIAL_TYPE_INTEGER_ZERO as u8); + // Third byte should be serial type for I8 + assert_eq!(header[2], SERIAL_TYPE_I8 as u8); + // Fourth byte should be serial type for F64 + assert_eq!(header[3], SERIAL_TYPE_F64 as u8); + // Fifth byte should be serial type for TEXT, which is (len * 2 + 13) + assert_eq!(header[4], (4 * 2 + 13) as u8); + + // Check that the bytes after the header can be interpreted as the correct values + let mut cur_offset = header_length; + let i8_bytes = &buf[cur_offset..cur_offset + size_of::()]; + cur_offset += size_of::(); + let f64_bytes = &buf[cur_offset..cur_offset + size_of::()]; + cur_offset += size_of::(); + let text_bytes = &buf[cur_offset..cur_offset + text.len()]; + + let val_int8 = i8::from_be_bytes(i8_bytes.try_into().unwrap()); + let val_float = f64::from_be_bytes(f64_bytes.try_into().unwrap()); + let val_text = String::from_utf8(text_bytes.to_vec()).unwrap(); + + assert_eq!(val_int8, 42); + assert_eq!(val_float, 3.14); + assert_eq!(val_text, "test"); + + // Check that buffer length is correct + assert_eq!( + buf.len(), + header_length + size_of::() + size_of::() + text.len() + ); + } +} From 80933a32e9bc46d65b117cb1c051e526fdb3b705 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Wed, 25 Dec 2024 23:09:23 +0200 Subject: [PATCH 122/144] remove space allocated for overflow pointer in non-overflow cases --- core/storage/btree.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index bdd27932b..e72cbfa6d 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1514,7 +1514,6 @@ impl BTreeCursor { if record_buf.len() <= payload_overflow_threshold_max { // enough allowed space to fit inside a btree page cell_payload.extend_from_slice(record_buf.as_slice()); - cell_payload.resize(cell_payload.len() + 4, 0); return; } log::debug!("fill_cell_payload(overflow)"); From c7448d29176adc599cca43fab4a4115751502674 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Thu, 26 Dec 2024 10:49:43 +0200 Subject: [PATCH 123/144] no allocation for serial types --- core/types.rs | 139 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 85 insertions(+), 54 deletions(-) diff --git a/core/types.rs b/core/types.rs index 20f1943ab..b0e6e679d 100644 --- a/core/types.rs +++ b/core/types.rs @@ -398,15 +398,63 @@ const I32_HIGH: i64 = 2147483647; const I48_LOW: i64 = -140737488355328; const I48_HIGH: i64 = 140737488355327; -// https://www.sqlite.org/fileformat.html#record_format -const SERIAL_TYPE_INTEGER_ZERO: u64 = 0; -const SERIAL_TYPE_I8: u64 = 1; -const SERIAL_TYPE_I16: u64 = 2; -const SERIAL_TYPE_I24: u64 = 3; -const SERIAL_TYPE_I32: u64 = 4; -const SERIAL_TYPE_I48: u64 = 5; -const SERIAL_TYPE_I64: u64 = 6; -const SERIAL_TYPE_F64: u64 = 7; +/// Sqlite Serial Types +/// https://www.sqlite.org/fileformat.html#record_format +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +enum SerialType { + Null, + I8, + I16, + I24, + I32, + I48, + I64, + F64, + Text { content_size: usize }, + Blob { content_size: usize }, +} + +impl From<&OwnedValue> for SerialType { + fn from(value: &OwnedValue) -> Self { + match value { + OwnedValue::Null => SerialType::Null, + OwnedValue::Integer(i) => match i { + i if *i >= I8_LOW && *i <= I8_HIGH => SerialType::I8, + i if *i >= I16_LOW && *i <= I16_HIGH => SerialType::I16, + i if *i >= I24_LOW && *i <= I24_HIGH => SerialType::I24, + i if *i >= I32_LOW && *i <= I32_HIGH => SerialType::I32, + i if *i >= I48_LOW && *i <= I48_HIGH => SerialType::I48, + _ => SerialType::I64, + }, + OwnedValue::Float(_) => SerialType::F64, + OwnedValue::Text(t) => SerialType::Text { + content_size: t.value.len(), + }, + OwnedValue::Blob(b) => SerialType::Blob { + content_size: b.len(), + }, + OwnedValue::Agg(_) => unreachable!(), + OwnedValue::Record(_) => unreachable!(), + } + } +} + +impl From for u64 { + fn from(serial_type: SerialType) -> Self { + match serial_type { + SerialType::Null => 0, + SerialType::I8 => 1, + SerialType::I16 => 2, + SerialType::I24 => 3, + SerialType::I32 => 4, + SerialType::I48 => 5, + SerialType::I64 => 6, + SerialType::F64 => 7, + SerialType::Text { content_size } => (content_size * 2 + 13) as u64, + SerialType::Blob { content_size } => (content_size * 2 + 12) as u64, + } + } +} impl OwnedRecord { pub fn new(values: Vec) -> Self { @@ -415,50 +463,33 @@ impl OwnedRecord { pub fn serialize(&self, buf: &mut Vec) { let initial_i = buf.len(); - let mut serial_types = Vec::with_capacity(self.values.len()); - // First pass: calculate serial types and store them + // write serial types for value in &self.values { - let serial_type = match value { - OwnedValue::Null => SERIAL_TYPE_INTEGER_ZERO, - OwnedValue::Integer(i) => match i { - i if *i >= I8_LOW && *i <= I8_HIGH => SERIAL_TYPE_I8, - i if *i >= I16_LOW && *i <= I16_HIGH => SERIAL_TYPE_I16, - i if *i >= I24_LOW && *i <= I24_HIGH => SERIAL_TYPE_I24, - i if *i >= I32_LOW && *i <= I32_HIGH => SERIAL_TYPE_I32, - i if *i >= I48_LOW && *i <= I48_HIGH => SERIAL_TYPE_I48, - _ => SERIAL_TYPE_I64, - }, - OwnedValue::Float(_) => SERIAL_TYPE_F64, - OwnedValue::Text(t) => (t.value.len() * 2 + 13) as u64, - OwnedValue::Blob(b) => (b.len() * 2 + 12) as u64, - // not serializable values - OwnedValue::Agg(_) => unreachable!(), - OwnedValue::Record(_) => unreachable!(), - }; - + let serial_type = SerialType::from(value); buf.resize(buf.len() + 9, 0); // Ensure space for varint (1-9 bytes in length) let len = buf.len(); - let n = write_varint(&mut buf[len - 9..], serial_type); + let n = write_varint(&mut buf[len - 9..], serial_type.into()); buf.truncate(buf.len() - 9 + n); // Remove unused bytes - - serial_types.push(serial_type); } let mut header_size = buf.len() - initial_i; // write content - for (value, &serial_type) in self.values.iter().zip(serial_types.iter()) { + for value in &self.values { match value { OwnedValue::Null => {} - OwnedValue::Integer(i) => match serial_type { - SERIAL_TYPE_I8 => buf.extend_from_slice(&(*i as i8).to_be_bytes()), - SERIAL_TYPE_I16 => buf.extend_from_slice(&(*i as i16).to_be_bytes()), - SERIAL_TYPE_I24 => buf.extend_from_slice(&(*i as i32).to_be_bytes()[1..]), // remove most significant byte - SERIAL_TYPE_I32 => buf.extend_from_slice(&(*i as i32).to_be_bytes()), - SERIAL_TYPE_I48 => buf.extend_from_slice(&i.to_be_bytes()[2..]), // remove 2 most significant bytes - SERIAL_TYPE_I64 => buf.extend_from_slice(&i.to_be_bytes()), - _ => unreachable!(), - }, + OwnedValue::Integer(i) => { + let serial_type = SerialType::from(value); + match serial_type { + SerialType::I8 => buf.extend_from_slice(&(*i as i8).to_be_bytes()), + SerialType::I16 => buf.extend_from_slice(&(*i as i16).to_be_bytes()), + SerialType::I24 => buf.extend_from_slice(&(*i as i32).to_be_bytes()[1..]), // remove most significant byte + SerialType::I32 => buf.extend_from_slice(&(*i as i32).to_be_bytes()), + SerialType::I48 => buf.extend_from_slice(&i.to_be_bytes()[2..]), // remove 2 most significant bytes + SerialType::I64 => buf.extend_from_slice(&i.to_be_bytes()), + _ => unreachable!(), + } + } OwnedValue::Float(f) => buf.extend_from_slice(&f.to_be_bytes()), OwnedValue::Text(t) => buf.extend_from_slice(t.value.as_bytes()), OwnedValue::Blob(b) => buf.extend_from_slice(b), @@ -545,7 +576,7 @@ mod tests { // First byte should be header size assert_eq!(header[0], header_length as u8); // Second byte should be serial type for NULL - assert_eq!(header[1], SERIAL_TYPE_INTEGER_ZERO as u8); + assert_eq!(header[1] as u64, u64::from(SerialType::Null)); // Check that the buffer is empty after the header assert_eq!(buf.len(), header_length); } @@ -569,12 +600,12 @@ mod tests { assert!(header[0] == header_length as u8); // Header should be larger than number of values // Check that correct serial types were chosen - assert_eq!(header[1], SERIAL_TYPE_I8 as u8); - assert_eq!(header[2], SERIAL_TYPE_I16 as u8); - assert_eq!(header[3], SERIAL_TYPE_I24 as u8); - assert_eq!(header[4], SERIAL_TYPE_I32 as u8); - assert_eq!(header[5], SERIAL_TYPE_I48 as u8); - assert_eq!(header[6], SERIAL_TYPE_I64 as u8); + assert_eq!(header[1] as u64, u64::from(SerialType::I8)); + assert_eq!(header[2] as u64, u64::from(SerialType::I16)); + assert_eq!(header[3] as u64, u64::from(SerialType::I24)); + assert_eq!(header[4] as u64, u64::from(SerialType::I32)); + assert_eq!(header[5] as u64, u64::from(SerialType::I48)); + assert_eq!(header[6] as u64, u64::from(SerialType::I64)); // test that the bytes after the header can be interpreted as the correct values let mut cur_offset = header_length; @@ -636,7 +667,7 @@ mod tests { // First byte should be header size assert_eq!(header[0], header_length as u8); // Second byte should be serial type for FLOAT - assert_eq!(header[1], SERIAL_TYPE_F64 as u8); + assert_eq!(header[1] as u64, u64::from(SerialType::F64)); // Check that the bytes after the header can be interpreted as the float let float_bytes = &buf[header_length..header_length + size_of::()]; let float = f64::from_be_bytes(float_bytes.try_into().unwrap()); @@ -700,13 +731,13 @@ mod tests { // First byte should be header size assert_eq!(header[0], header_length as u8); // Second byte should be serial type for NULL - assert_eq!(header[1], SERIAL_TYPE_INTEGER_ZERO as u8); + assert_eq!(header[1] as u64, u64::from(SerialType::Null)); // Third byte should be serial type for I8 - assert_eq!(header[2], SERIAL_TYPE_I8 as u8); + assert_eq!(header[2] as u64, u64::from(SerialType::I8)); // Fourth byte should be serial type for F64 - assert_eq!(header[3], SERIAL_TYPE_F64 as u8); + assert_eq!(header[3] as u64, u64::from(SerialType::F64)); // Fifth byte should be serial type for TEXT, which is (len * 2 + 13) - assert_eq!(header[4], (4 * 2 + 13) as u8); + assert_eq!(header[4] as u64, (4 * 2 + 13) as u64); // Check that the bytes after the header can be interpreted as the correct values let mut cur_offset = header_length; From 4368e8767ba363768bfb7ab25fed1f2293e9e7a2 Mon Sep 17 00:00:00 2001 From: psvri Date: Thu, 26 Dec 2024 22:38:54 +0530 Subject: [PATCH 124/144] Fix like function giving wrong results --- core/Cargo.toml | 3 ++- core/vdbe/mod.rs | 48 ++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 40 insertions(+), 11 deletions(-) diff --git a/core/Cargo.toml b/core/Cargo.toml index 4d731eb2b..1f2285f5f 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -44,7 +44,8 @@ sieve-cache = "0.1.4" sqlite3-parser = { path = "../vendored/sqlite3-parser" } thiserror = "1.0.61" getrandom = { version = "0.2.15", features = ["js"] } -regex = "1.10.5" +regex = "1.11.1" +regex-syntax = { version = "0.8.5", default-features = false, features = ["unicode"] } chrono = "0.4.38" julian_day_converter = "0.3.2" jsonb = { version = "0.4.4", optional = true } diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 427770155..f75df4a5f 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -46,7 +46,7 @@ use datetime::{exec_date, exec_time, exec_unixepoch}; use rand::distributions::{Distribution, Uniform}; use rand::{thread_rng, Rng}; -use regex::Regex; +use regex::{Regex, RegexBuilder}; use std::borrow::{Borrow, BorrowMut}; use std::cell::RefCell; use std::collections::{BTreeMap, HashMap}; @@ -3166,10 +3166,32 @@ fn exec_char(values: Vec) -> OwnedValue { } fn construct_like_regex(pattern: &str) -> Regex { - let mut regex_pattern = String::from("(?i)^"); - regex_pattern.push_str(&pattern.replace('%', ".*").replace('_', ".")); + + let mut regex_pattern = String::with_capacity(pattern.len() * 2); + + regex_pattern.push('^'); + + for c in pattern.chars() { + match c { + '\\' => regex_pattern.push_str("\\\\"), + '%' => regex_pattern.push_str(".*"), + '_' => regex_pattern.push('.'), + ch => { + if regex_syntax::is_meta_character(c) { + regex_pattern.push('\\'); + } + regex_pattern.push(ch); + } + } + } + regex_pattern.push('$'); - Regex::new(®ex_pattern).unwrap() + + RegexBuilder::new(®ex_pattern) + .case_insensitive(true) + .dot_matches_new_line(true) + .build() + .unwrap() } // Implements LIKE pattern matching. Caches the constructed regex if a cache is provided @@ -4316,12 +4338,18 @@ mod tests { ); } + #[test] + fn test_like_with_escape_or_regexmeta_chars() { + assert!(exec_like(None, r#"\%A"#, r#"\A"#)); + assert!(exec_like(None, "%a%a", "aaaa")); + } + #[test] fn test_like_no_cache() { assert!(exec_like(None, "a%", "aaaa")); assert!(exec_like(None, "%a%a", "aaaa")); - assert!(exec_like(None, "%a.a", "aaaa")); - assert!(exec_like(None, "a.a%", "aaaa")); + assert!(!exec_like(None, "%a.a", "aaaa")); + assert!(!exec_like(None, "a.a%", "aaaa")); assert!(!exec_like(None, "%a.ab", "aaaa")); } @@ -4330,15 +4358,15 @@ mod tests { let mut cache = HashMap::new(); assert!(exec_like(Some(&mut cache), "a%", "aaaa")); assert!(exec_like(Some(&mut cache), "%a%a", "aaaa")); - assert!(exec_like(Some(&mut cache), "%a.a", "aaaa")); - assert!(exec_like(Some(&mut cache), "a.a%", "aaaa")); + assert!(!exec_like(Some(&mut cache), "%a.a", "aaaa")); + assert!(!exec_like(Some(&mut cache), "a.a%", "aaaa")); assert!(!exec_like(Some(&mut cache), "%a.ab", "aaaa")); // again after values have been cached assert!(exec_like(Some(&mut cache), "a%", "aaaa")); assert!(exec_like(Some(&mut cache), "%a%a", "aaaa")); - assert!(exec_like(Some(&mut cache), "%a.a", "aaaa")); - assert!(exec_like(Some(&mut cache), "a.a%", "aaaa")); + assert!(!exec_like(Some(&mut cache), "%a.a", "aaaa")); + assert!(!exec_like(Some(&mut cache), "a.a%", "aaaa")); assert!(!exec_like(Some(&mut cache), "%a.ab", "aaaa")); } From 12e49da1d0b9f66c6353f2141f071d6e41c61f51 Mon Sep 17 00:00:00 2001 From: psvri Date: Thu, 26 Dec 2024 22:42:46 +0530 Subject: [PATCH 125/144] fmt fixes --- core/vdbe/mod.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index f75df4a5f..e9df3a139 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -3166,7 +3166,6 @@ fn exec_char(values: Vec) -> OwnedValue { } fn construct_like_regex(pattern: &str) -> Regex { - let mut regex_pattern = String::with_capacity(pattern.len() * 2); regex_pattern.push('^'); From 28244b10d6bccb2d32bc5f4742735993798da11a Mon Sep 17 00:00:00 2001 From: Peter Sooley Date: Thu, 26 Dec 2024 14:36:44 -0800 Subject: [PATCH 126/144] implement json_array_length --- COMPAT.md | 4 +- core/function.rs | 4 + core/json/mod.rs | 164 ++++++++++++++++++++++++++++++++++++- core/json/path.rs | 181 +++++++++++++++++++++++++++++++++++++++++ core/translate/expr.rs | 45 ++++++++++ core/vdbe/mod.rs | 17 +++- testing/json.test | 32 ++++++++ 7 files changed, 443 insertions(+), 4 deletions(-) create mode 100644 core/json/path.rs diff --git a/COMPAT.md b/COMPAT.md index a7baaca83..71a00290b 100644 --- a/COMPAT.md +++ b/COMPAT.md @@ -234,8 +234,8 @@ Feature support of [sqlite expr syntax](https://www.sqlite.org/lang_expr.html). | jsonb(json) | | | | json_array(value1,value2,...) | Yes | | | jsonb_array(value1,value2,...) | | | -| json_array_length(json) | | | -| json_array_length(json,path) | | | +| json_array_length(json) | Yes | | +| json_array_length(json,path) | Yes | | | json_error_position(json) | | | | json_extract(json,path,...) | | | | jsonb_extract(json,path,...) | | | diff --git a/core/function.rs b/core/function.rs index 8681a4fdf..0b19a5474 100644 --- a/core/function.rs +++ b/core/function.rs @@ -6,6 +6,7 @@ use std::fmt::Display; pub enum JsonFunc { Json, JsonArray, + JsonArrayLength, } #[cfg(feature = "json")] @@ -17,6 +18,7 @@ impl Display for JsonFunc { match self { JsonFunc::Json => "json".to_string(), JsonFunc::JsonArray => "json_array".to_string(), + JsonFunc::JsonArrayLength => "json_array_length".to_string(), } ) } @@ -334,6 +336,8 @@ impl Func { "json" => Ok(Func::Json(JsonFunc::Json)), #[cfg(feature = "json")] "json_array" => Ok(Func::Json(JsonFunc::JsonArray)), + #[cfg(feature = "json")] + "json_array_length" => Ok(Func::Json(JsonFunc::JsonArrayLength)), "unixepoch" => Ok(Func::Scalar(ScalarFunc::UnixEpoch)), "hex" => Ok(Func::Scalar(ScalarFunc::Hex)), "unhex" => Ok(Func::Scalar(ScalarFunc::Unhex)), diff --git a/core/json/mod.rs b/core/json/mod.rs index b1394b2bd..046f66237 100644 --- a/core/json/mod.rs +++ b/core/json/mod.rs @@ -1,5 +1,6 @@ mod de; mod error; +mod path; mod ser; use std::rc::Rc; @@ -8,9 +9,10 @@ pub use crate::json::de::from_str; pub use crate::json::ser::to_string; use crate::types::{LimboText, OwnedValue, TextSubtype}; use indexmap::IndexMap; +use path::get_json_val_by_path; use serde::{Deserialize, Serialize}; -#[derive(Serialize, Deserialize, Debug)] +#[derive(Serialize, Deserialize, PartialEq, Debug)] #[serde(untagged)] pub enum Val { Null, @@ -88,6 +90,49 @@ pub fn json_array(values: Vec<&OwnedValue>) -> crate::Result { Ok(OwnedValue::Text(LimboText::json(Rc::new(s)))) } +pub fn json_array_length( + json_value: &OwnedValue, + json_path: Option<&OwnedValue>, +) -> crate::Result { + let path = match json_path { + Some(OwnedValue::Text(t)) => Some(t.value.to_string()), + Some(OwnedValue::Integer(i)) => Some(i.to_string()), + Some(OwnedValue::Float(f)) => Some(f.to_string()), + _ => None::, + }; + + let top_val = match json_value { + OwnedValue::Text(ref t) => crate::json::from_str::(&t.value), + OwnedValue::Blob(b) => match jsonb::from_slice(b) { + Ok(j) => { + let json = j.to_string(); + crate::json::from_str(&json) + } + Err(_) => crate::bail_parse_error!("malformed JSON"), + }, + _ => return Ok(OwnedValue::Integer(0)), + }; + + let Ok(top_val) = top_val else { + crate::bail_parse_error!("malformed JSON") + }; + + let arr_val = if let Some(path) = path { + match get_json_val_by_path(&top_val, &path) { + Ok(Some(val)) => val, + Ok(None) => return Ok(OwnedValue::Null), + Err(e) => return Err(e), + } + } else { + &top_val + }; + + if let Val::Array(val) = &arr_val { + return Ok(OwnedValue::Integer(val.len() as i64)); + } + Ok(OwnedValue::Integer(0)) +} + #[cfg(test)] mod tests { use super::*; @@ -266,4 +311,121 @@ mod tests { Err(e) => assert!(e.to_string().contains("JSON cannot hold BLOB values")), } } + + #[test] + fn test_json_array_length() { + let input = OwnedValue::build_text(Rc::new("[1,2,3,4]".to_string())); + let result = json_array_length(&input, None).unwrap(); + if let OwnedValue::Integer(res) = result { + assert_eq!(res, 4); + } else { + panic!("Expected OwnedValue::Integer"); + } + } + + #[test] + fn test_json_array_length_empty() { + let input = OwnedValue::build_text(Rc::new("[]".to_string())); + let result = json_array_length(&input, None).unwrap(); + if let OwnedValue::Integer(res) = result { + assert_eq!(res, 0); + } else { + panic!("Expected OwnedValue::Integer"); + } + } + + #[test] + fn test_json_array_length_root() { + let input = OwnedValue::build_text(Rc::new("[1,2,3,4]".to_string())); + let result = json_array_length( + &input, + Some(&OwnedValue::build_text(Rc::new("$".to_string()))), + ) + .unwrap(); + if let OwnedValue::Integer(res) = result { + assert_eq!(res, 4); + } else { + panic!("Expected OwnedValue::Integer"); + } + } + + #[test] + fn test_json_array_length_not_array() { + let input = OwnedValue::build_text(Rc::new("{one: [1,2,3,4]}".to_string())); + let result = json_array_length(&input, None).unwrap(); + if let OwnedValue::Integer(res) = result { + assert_eq!(res, 0); + } else { + panic!("Expected OwnedValue::Integer"); + } + } + + #[test] + fn test_json_array_length_via_prop() { + let input = OwnedValue::build_text(Rc::new("{one: [1,2,3,4]}".to_string())); + let result = json_array_length( + &input, + Some(&OwnedValue::build_text(Rc::new("$.one".to_string()))), + ) + .unwrap(); + if let OwnedValue::Integer(res) = result { + assert_eq!(res, 4); + } else { + panic!("Expected OwnedValue::Integer"); + } + } + + #[test] + fn test_json_array_length_via_index() { + let input = OwnedValue::build_text(Rc::new("[[1,2,3,4]]".to_string())); + let result = json_array_length( + &input, + Some(&OwnedValue::build_text(Rc::new("$[0]".to_string()))), + ) + .unwrap(); + if let OwnedValue::Integer(res) = result { + assert_eq!(res, 4); + } else { + panic!("Expected OwnedValue::Integer"); + } + } + + #[test] + fn test_json_array_length_via_index_not_array() { + let input = OwnedValue::build_text(Rc::new("[1,2,3,4]".to_string())); + let result = json_array_length( + &input, + Some(&OwnedValue::build_text(Rc::new("$[2]".to_string()))), + ) + .unwrap(); + if let OwnedValue::Integer(res) = result { + assert_eq!(res, 0); + } else { + panic!("Expected OwnedValue::Integer"); + } + } + + #[test] + fn test_json_array_length_via_index_bad_prop() { + let input = OwnedValue::build_text(Rc::new("{one: [1,2,3,4]}".to_string())); + let result = json_array_length( + &input, + Some(&OwnedValue::build_text(Rc::new("$.two".to_string()))), + ) + .unwrap(); + assert_eq!(OwnedValue::Null, result); + } + + #[test] + fn test_json_array_length_simple_json_subtype() { + let input = OwnedValue::build_text(Rc::new("[1,2,3]".to_string())); + let wrapped = get_json(&input).unwrap(); + let result = json_array_length(&wrapped, None).unwrap(); + + if let OwnedValue::Integer(res) = result { + assert_eq!(res, 3); + } else { + panic!("Expected OwnedValue::Integer"); + } + } } diff --git a/core/json/path.rs b/core/json/path.rs new file mode 100644 index 000000000..e475f6647 --- /dev/null +++ b/core/json/path.rs @@ -0,0 +1,181 @@ +use super::Val; + +pub fn get_json_val_by_path<'v>(val: &'v Val, path: &str) -> crate::Result> { + match path.strip_prefix('$') { + Some(tail) => json_val_by_path(val, tail), + None => crate::bail_parse_error!("malformed path"), + } +} + +fn json_val_by_path<'v>(val: &'v Val, path: &str) -> crate::Result> { + if path.is_empty() { + return Ok(Some(val)); + } + + match val { + Val::Array(inner) => { + if inner.is_empty() { + return Ok(None); + } + let Some(tail) = path.strip_prefix('[') else { + return Ok(None); + }; + let (from_end, tail) = if let Some(updated_tail) = tail.strip_prefix("#-") { + (true, updated_tail) + } else { + (false, tail) + }; + + let Some((idx_str, tail)) = tail.split_once("]") else { + crate::bail_parse_error!("malformed path"); + }; + + if idx_str.is_empty() { + return Ok(None); + } + let Ok(idx) = idx_str.parse::() else { + crate::bail_parse_error!("malformed path"); + }; + let result = if from_end { + inner.get(inner.len() - 1 - idx) + } else { + inner.get(idx) + }; + + if let Some(result) = result { + return json_val_by_path(result, tail); + } + Ok(None) + } + Val::Object(inner) => { + let Some(tail) = path.strip_prefix('.') else { + return Ok(None); + }; + + let (property, tail) = if let Some(tail) = tail.strip_prefix('"') { + if let Some((property, tail)) = tail.split_once('"') { + (property, tail) + } else { + crate::bail_parse_error!("malformed path"); + } + } else if let Some(idx) = tail.find('.') { + (&tail[..idx], &tail[idx..]) + } else { + (tail, "") + }; + + if let Some(result) = inner.get(property) { + return json_val_by_path(result, tail); + } + Ok(None) + } + _ => Ok(None), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_path_root() { + assert_eq!( + get_json_val_by_path(&Val::Bool(true), "$",).unwrap(), + Some(&Val::Bool(true)) + ); + } + + #[test] + fn test_path_index() { + assert_eq!( + get_json_val_by_path( + &Val::Array(vec![Val::Integer(33), Val::Integer(55), Val::Integer(66)]), + "$[2]", + ) + .unwrap(), + Some(&Val::Integer(66)) + ); + } + + #[test] + fn test_path_negative_index() { + assert_eq!( + get_json_val_by_path( + &Val::Array(vec![Val::Integer(33), Val::Integer(55), Val::Integer(66)]), + "$[#-2]", + ) + .unwrap(), + Some(&Val::Integer(33)) + ); + } + + #[test] + fn test_path_index_deep() { + assert_eq!( + get_json_val_by_path( + &Val::Array(vec![Val::Array(vec![ + Val::Integer(33), + Val::Integer(55), + Val::Integer(66) + ])]), + "$[0][1]", + ) + .unwrap(), + Some(&Val::Integer(55)) + ); + } + + #[test] + fn test_path_prop_simple() { + assert_eq!( + get_json_val_by_path( + &Val::Object( + [ + ("foo".into(), Val::Integer(55)), + ("bar".into(), Val::Integer(66)) + ] + .into() + ), + "$.bar", + ) + .unwrap(), + Some(&Val::Integer(66)) + ); + } + + #[test] + fn test_path_prop_nested() { + assert_eq!( + get_json_val_by_path( + &Val::Object( + [( + "foo".into(), + Val::Object([("bar".into(), Val::Integer(66))].into()) + )] + .into() + ), + "$.foo.bar", + ) + .unwrap(), + Some(&Val::Integer(66)) + ); + } + + #[test] + fn test_path_prop_quoted() { + assert_eq!( + get_json_val_by_path( + &Val::Object( + [ + ("foo.baz".into(), Val::Integer(55)), + ("bar".into(), Val::Integer(66)) + ] + .into() + ), + r#"$."foo.baz""#, + ) + .unwrap(), + Some(&Val::Integer(55)) + ); + } +} diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 734dbb98e..afdb6c7ad 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -913,6 +913,51 @@ pub fn translate_expr( }); Ok(target_register) } + JsonFunc::JsonArrayLength => { + let args = if let Some(args) = args { + if args.len() > 2 { + crate::bail_parse_error!( + "{} function with wrong number of arguments", + j.to_string() + ) + } + args + } else { + crate::bail_parse_error!( + "{} function with no arguments", + j.to_string() + ); + }; + + let json_reg = program.alloc_register(); + let path_reg = program.alloc_register(); + + translate_expr( + program, + referenced_tables, + &args[0], + json_reg, + precomputed_exprs_to_registers, + )?; + + if args.len() == 2 { + translate_expr( + program, + referenced_tables, + &args[1], + path_reg, + precomputed_exprs_to_registers, + )?; + } + + program.emit_insn(Insn::Function { + constant_mask: 0, + start_reg: json_reg, + dest: target_register, + func: func_ctx, + }); + Ok(target_register) + } }, Func::Scalar(srf) => { match srf { diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 427770155..da5a32c3f 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -37,7 +37,7 @@ use crate::types::{ }; use crate::util::parse_schema_rows; #[cfg(feature = "json")] -use crate::{function::JsonFunc, json::get_json, json::json_array}; +use crate::{function::JsonFunc, json::get_json, json::json_array, json::json_array_length}; use crate::{Connection, Result, TransactionState}; use crate::{Rows, DATABASE_VERSION}; use limbo_macros::Description; @@ -2281,6 +2281,21 @@ impl Program { Err(e) => return Err(e), } } + #[cfg(feature = "json")] + crate::function::Func::Json(JsonFunc::JsonArrayLength) => { + let json_value = &state.registers[*start_reg]; + let path_value = if arg_count > 1 { + Some(&state.registers[*start_reg + 1]) + } else { + None + }; + let json_array_length = json_array_length(json_value, path_value); + + match json_array_length { + Ok(length) => state.registers[*dest] = length, + Err(e) => return Err(e), + } + } crate::function::Func::Scalar(scalar_func) => match scalar_func { ScalarFunc::Cast => { assert!(arg_count == 2); diff --git a/testing/json.test b/testing/json.test index a62040555..7c33fdc5a 100755 --- a/testing/json.test +++ b/testing/json.test @@ -83,3 +83,35 @@ do_execsql_test json_array_json { do_execsql_test json_array_nested { SELECT json_array(json_array(1,2,3), json('[1,2,3]'), '[1,2,3]') } {{[[1,2,3],[1,2,3],"[1,2,3]"]}} + +do_execsql_test json_array_length { + SELECT json_array_length('[1,2,3,4]'); +} {{4}} + +do_execsql_test json_array_length_empty { + SELECT json_array_length('[]'); +} {{0}} + +do_execsql_test json_array_length_root { + SELECT json_array_length('[1,2,3,4]', '$'); +} {{4}} + +do_execsql_test json_array_length_not_array { + SELECT json_array_length('{"one":[1,2,3]}'); +} {{0}} + +do_execsql_test json_array_length_via_prop { + SELECT json_array_length('{"one":[1,2,3]}', '$.one'); +} {{3}} + +do_execsql_test json_array_length_via_index { + SELECT json_array_length('[[1,2,3,4]]', '$[0]'); +} {{4}} + +do_execsql_test json_array_length_via_index_not_array { + SELECT json_array_length('[1,2,3,4]', '$[2]'); +} {{0}} + +do_execsql_test json_array_length_via_bad_prop { + SELECT json_array_length('{"one":[1,2,3]}', '$.two'); +} {{}} \ No newline at end of file From f2ecebc3574e26a7500c3ecc2ba05cbf9a52f83b Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Fri, 27 Dec 2024 10:20:26 +0200 Subject: [PATCH 127/144] Rename RowResult to StepResult The name "row result" is confusing because it really *is* a result from a step() call. The only difference is how a row is represented as we return from VDBE or from a statement. Therefore, rename RowResult to StepResult. --- bindings/python/src/lib.rs | 20 +++---- bindings/wasm/lib.rs | 20 +++---- cli/app.rs | 42 +++++++------- core/benches/benchmark.rs | 30 +++++----- core/lib.rs | 16 +++--- core/util.rs | 12 ++-- perf/latency/limbo/src/main.rs | 6 +- simulator/generation/plan.rs | 12 ++-- simulator/main.rs | 2 +- sqlite3/src/lib.rs | 10 ++-- test/src/lib.rs | 100 ++++++++++++++++----------------- 11 files changed, 135 insertions(+), 135 deletions(-) diff --git a/bindings/python/src/lib.rs b/bindings/python/src/lib.rs index c31520a82..1b3514032 100644 --- a/bindings/python/src/lib.rs +++ b/bindings/python/src/lib.rs @@ -128,22 +128,22 @@ impl Cursor { match smt_lock.step().map_err(|e| { PyErr::new::(format!("Step error: {:?}", e)) })? { - limbo_core::RowResult::Row(row) => { + limbo_core::StepResult::Row(row) => { let py_row = row_to_py(py, &row); return Ok(Some(py_row)); } - limbo_core::RowResult::IO => { + limbo_core::StepResult::IO => { self.conn.io.run_once().map_err(|e| { PyErr::new::(format!("IO error: {:?}", e)) })?; } - limbo_core::RowResult::Interrupt => { + limbo_core::StepResult::Interrupt => { return Ok(None); } - limbo_core::RowResult::Done => { + limbo_core::StepResult::Done => { return Ok(None); } - limbo_core::RowResult::Busy => { + limbo_core::StepResult::Busy => { return Err( PyErr::new::("Busy error".to_string()).into() ); @@ -167,22 +167,22 @@ impl Cursor { match smt_lock.step().map_err(|e| { PyErr::new::(format!("Step error: {:?}", e)) })? { - limbo_core::RowResult::Row(row) => { + limbo_core::StepResult::Row(row) => { let py_row = row_to_py(py, &row); results.push(py_row); } - limbo_core::RowResult::IO => { + limbo_core::StepResult::IO => { self.conn.io.run_once().map_err(|e| { PyErr::new::(format!("IO error: {:?}", e)) })?; } - limbo_core::RowResult::Interrupt => { + limbo_core::StepResult::Interrupt => { return Ok(results); } - limbo_core::RowResult::Done => { + limbo_core::StepResult::Done => { return Ok(results); } - limbo_core::RowResult::Busy => { + limbo_core::StepResult::Busy => { return Err( PyErr::new::("Busy error".to_string()).into() ); diff --git a/bindings/wasm/lib.rs b/bindings/wasm/lib.rs index a2ae5b266..a06321f16 100644 --- a/bindings/wasm/lib.rs +++ b/bindings/wasm/lib.rs @@ -75,7 +75,7 @@ impl Statement { pub fn get(&self) -> JsValue { match self.inner.borrow_mut().step() { - Ok(limbo_core::RowResult::Row(row)) => { + Ok(limbo_core::StepResult::Row(row)) => { let row_array = js_sys::Array::new(); for value in row.values { let value = to_js_value(value); @@ -83,10 +83,10 @@ impl Statement { } JsValue::from(row_array) } - Ok(limbo_core::RowResult::IO) - | Ok(limbo_core::RowResult::Done) - | Ok(limbo_core::RowResult::Interrupt) - | Ok(limbo_core::RowResult::Busy) => JsValue::UNDEFINED, + Ok(limbo_core::StepResult::IO) + | Ok(limbo_core::StepResult::Done) + | Ok(limbo_core::StepResult::Interrupt) + | Ok(limbo_core::StepResult::Busy) => JsValue::UNDEFINED, Err(e) => panic!("Error: {:?}", e), } } @@ -95,7 +95,7 @@ impl Statement { let array = js_sys::Array::new(); loop { match self.inner.borrow_mut().step() { - Ok(limbo_core::RowResult::Row(row)) => { + Ok(limbo_core::StepResult::Row(row)) => { let row_array = js_sys::Array::new(); for value in row.values { let value = to_js_value(value); @@ -103,10 +103,10 @@ impl Statement { } array.push(&row_array); } - Ok(limbo_core::RowResult::IO) => {} - Ok(limbo_core::RowResult::Interrupt) => break, - Ok(limbo_core::RowResult::Done) => break, - Ok(limbo_core::RowResult::Busy) => break, + Ok(limbo_core::StepResult::IO) => {} + Ok(limbo_core::StepResult::Interrupt) => break, + Ok(limbo_core::StepResult::Done) => break, + Ok(limbo_core::StepResult::Busy) => break, Err(e) => panic!("Error: {:?}", e), } } diff --git a/cli/app.rs b/cli/app.rs index cbce1ca5c..0593066c5 100644 --- a/cli/app.rs +++ b/cli/app.rs @@ -1,6 +1,6 @@ use crate::opcodes_dictionary::OPCODE_DESCRIPTIONS; use cli_table::{Cell, Table}; -use limbo_core::{Database, LimboError, RowResult, Value}; +use limbo_core::{Database, LimboError, StepResult, Value}; use clap::{Parser, ValueEnum}; use std::{ @@ -498,7 +498,7 @@ impl Limbo { } match rows.next_row() { - Ok(RowResult::Row(row)) => { + Ok(StepResult::Row(row)) => { for (i, value) in row.values.iter().enumerate() { if i > 0 { let _ = self.writer.write(b"|"); @@ -518,14 +518,14 @@ impl Limbo { } let _ = self.writeln(""); } - Ok(RowResult::IO) => { + Ok(StepResult::IO) => { self.io.run_once()?; } - Ok(RowResult::Interrupt) => break, - Ok(RowResult::Done) => { + Ok(StepResult::Interrupt) => break, + Ok(StepResult::Done) => { break; } - Ok(RowResult::Busy) => { + Ok(StepResult::Busy) => { self.writeln("database is busy"); break; } @@ -543,7 +543,7 @@ impl Limbo { let mut table_rows: Vec> = vec![]; loop { match rows.next_row() { - Ok(RowResult::Row(row)) => { + Ok(StepResult::Row(row)) => { table_rows.push( row.values .iter() @@ -559,12 +559,12 @@ impl Limbo { .collect(), ); } - Ok(RowResult::IO) => { + Ok(StepResult::IO) => { self.io.run_once()?; } - Ok(RowResult::Interrupt) => break, - Ok(RowResult::Done) => break, - Ok(RowResult::Busy) => { + Ok(StepResult::Interrupt) => break, + Ok(StepResult::Done) => break, + Ok(StepResult::Busy) => { self.writeln("database is busy"); break; } @@ -607,18 +607,18 @@ impl Limbo { let mut found = false; loop { match rows.next_row()? { - RowResult::Row(row) => { + StepResult::Row(row) => { if let Some(Value::Text(schema)) = row.values.first() { let _ = self.write_fmt(format_args!("{};", schema)); found = true; } } - RowResult::IO => { + StepResult::IO => { self.io.run_once()?; } - RowResult::Interrupt => break, - RowResult::Done => break, - RowResult::Busy => { + StepResult::Interrupt => break, + StepResult::Done => break, + StepResult::Busy => { self.writeln("database is busy"); break; } @@ -664,18 +664,18 @@ impl Limbo { let mut tables = String::new(); loop { match rows.next_row()? { - RowResult::Row(row) => { + StepResult::Row(row) => { if let Some(Value::Text(table)) = row.values.first() { tables.push_str(table); tables.push(' '); } } - RowResult::IO => { + StepResult::IO => { self.io.run_once()?; } - RowResult::Interrupt => break, - RowResult::Done => break, - RowResult::Busy => { + StepResult::Interrupt => break, + StepResult::Done => break, + StepResult::Busy => { self.writeln("database is busy"); break; } diff --git a/core/benches/benchmark.rs b/core/benches/benchmark.rs index 0fe17d991..0dff08b5b 100644 --- a/core/benches/benchmark.rs +++ b/core/benches/benchmark.rs @@ -40,19 +40,19 @@ fn limbo_bench(criterion: &mut Criterion) { b.iter(|| { let mut rows = stmt.query().unwrap(); match rows.next_row().unwrap() { - limbo_core::RowResult::Row(row) => { + limbo_core::StepResult::Row(row) => { assert_eq!(row.get::(0).unwrap(), 1); } - limbo_core::RowResult::IO => { + limbo_core::StepResult::IO => { io.run_once().unwrap(); } - limbo_core::RowResult::Interrupt => { + limbo_core::StepResult::Interrupt => { unreachable!(); } - limbo_core::RowResult::Done => { + limbo_core::StepResult::Done => { unreachable!(); } - limbo_core::RowResult::Busy => { + limbo_core::StepResult::Busy => { unreachable!(); } } @@ -68,19 +68,19 @@ fn limbo_bench(criterion: &mut Criterion) { b.iter(|| { let mut rows = stmt.query().unwrap(); match rows.next_row().unwrap() { - limbo_core::RowResult::Row(row) => { + limbo_core::StepResult::Row(row) => { assert_eq!(row.get::(0).unwrap(), 1); } - limbo_core::RowResult::IO => { + limbo_core::StepResult::IO => { io.run_once().unwrap(); } - limbo_core::RowResult::Interrupt => { + limbo_core::StepResult::Interrupt => { unreachable!(); } - limbo_core::RowResult::Done => { + limbo_core::StepResult::Done => { unreachable!(); } - limbo_core::RowResult::Busy => { + limbo_core::StepResult::Busy => { unreachable!() } } @@ -97,19 +97,19 @@ fn limbo_bench(criterion: &mut Criterion) { b.iter(|| { let mut rows = stmt.query().unwrap(); match rows.next_row().unwrap() { - limbo_core::RowResult::Row(row) => { + limbo_core::StepResult::Row(row) => { assert_eq!(row.get::(0).unwrap(), 1); } - limbo_core::RowResult::IO => { + limbo_core::StepResult::IO => { io.run_once().unwrap(); } - limbo_core::RowResult::Interrupt => { + limbo_core::StepResult::Interrupt => { unreachable!(); } - limbo_core::RowResult::Done => { + limbo_core::StepResult::Done => { unreachable!(); } - limbo_core::RowResult::Busy => { + limbo_core::StepResult::Busy => { unreachable!() } } diff --git a/core/lib.rs b/core/lib.rs index 255c47217..445786cdf 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -374,14 +374,14 @@ impl Statement { self.state.interrupt(); } - pub fn step(&mut self) -> Result> { + pub fn step(&mut self) -> Result> { let result = self.program.step(&mut self.state, self.pager.clone())?; match result { - vdbe::StepResult::Row(row) => Ok(RowResult::Row(Row { values: row.values })), - vdbe::StepResult::IO => Ok(RowResult::IO), - vdbe::StepResult::Done => Ok(RowResult::Done), - vdbe::StepResult::Interrupt => Ok(RowResult::Interrupt), - vdbe::StepResult::Busy => Ok(RowResult::Busy), + vdbe::StepResult::Row(row) => Ok(StepResult::Row(Row { values: row.values })), + vdbe::StepResult::IO => Ok(StepResult::IO), + vdbe::StepResult::Done => Ok(StepResult::Done), + vdbe::StepResult::Interrupt => Ok(StepResult::Interrupt), + vdbe::StepResult::Busy => Ok(StepResult::Busy), } } @@ -393,7 +393,7 @@ impl Statement { pub fn reset(&self) {} } -pub enum RowResult<'a> { +pub enum StepResult<'a> { Row(Row<'a>), IO, Done, @@ -421,7 +421,7 @@ impl Rows { Self { stmt } } - pub fn next_row(&mut self) -> Result> { + pub fn next_row(&mut self) -> Result> { self.stmt.step() } } diff --git a/core/util.rs b/core/util.rs index a57186890..dcf04ac81 100644 --- a/core/util.rs +++ b/core/util.rs @@ -4,7 +4,7 @@ use sqlite3_parser::ast::{Expr, FunctionTail, Literal}; use crate::{ schema::{self, Schema}, - Result, RowResult, Rows, IO, + Result, Rows, StepResult, IO, }; // https://sqlite.org/lang_keywords.html @@ -27,7 +27,7 @@ pub fn parse_schema_rows(rows: Option, schema: &mut Schema, io: Arc { + StepResult::Row(row) => { let ty = row.get::<&str>(0)?; if ty != "table" && ty != "index" { continue; @@ -53,14 +53,14 @@ pub fn parse_schema_rows(rows: Option, schema: &mut Schema, io: Arc continue, } } - RowResult::IO => { + StepResult::IO => { // TODO: How do we ensure that the I/O we submitted to // read the schema is actually complete? io.run_once()?; } - RowResult::Interrupt => break, - RowResult::Done => break, - RowResult::Busy => break, + StepResult::Interrupt => break, + StepResult::Done => break, + StepResult::Busy => break, } } } diff --git a/perf/latency/limbo/src/main.rs b/perf/latency/limbo/src/main.rs index c790c6bc8..b51ffb406 100644 --- a/perf/latency/limbo/src/main.rs +++ b/perf/latency/limbo/src/main.rs @@ -38,11 +38,11 @@ fn main() { loop { let row = rows.next_row().unwrap(); match row { - limbo_core::RowResult::Row(_) => { + limbo_core::StepResult::Row(_) => { count += 1; } - limbo_core::RowResult::IO => yield, - limbo_core::RowResult::Done => break, + limbo_core::StepResult::IO => yield, + limbo_core::StepResult::Done => break, } } assert!(count == 100); diff --git a/simulator/generation/plan.rs b/simulator/generation/plan.rs index 82c75c4e3..ea2392f4e 100644 --- a/simulator/generation/plan.rs +++ b/simulator/generation/plan.rs @@ -1,6 +1,6 @@ use std::{fmt::Display, rc::Rc}; -use limbo_core::{Connection, Result, RowResult}; +use limbo_core::{Connection, Result, StepResult}; use rand::SeedableRng; use rand_chacha::ChaCha8Rng; @@ -215,7 +215,7 @@ impl Interaction { let mut out = Vec::new(); while let Ok(row) = rows.next_row() { match row { - RowResult::Row(row) => { + StepResult::Row(row) => { let mut r = Vec::new(); for el in &row.values { let v = match el { @@ -230,12 +230,12 @@ impl Interaction { out.push(r); } - RowResult::IO => {} - RowResult::Interrupt => {} - RowResult::Done => { + StepResult::IO => {} + StepResult::Interrupt => {} + StepResult::Done => { break; } - RowResult::Busy => {} + StepResult::Busy => {} } } diff --git a/simulator/main.rs b/simulator/main.rs index 9f70abed2..b12018062 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -1,7 +1,7 @@ use clap::Parser; use generation::plan::{Interaction, InteractionPlan, ResultSet}; use generation::{pick_index, ArbitraryFrom}; -use limbo_core::{Connection, Database, Result, RowResult, IO}; +use limbo_core::{Connection, Database, Result, StepResult, IO}; use model::table::Value; use rand::prelude::*; use rand_chacha::ChaCha8Rng; diff --git a/sqlite3/src/lib.rs b/sqlite3/src/lib.rs index 6bd5b23d6..cd09ef62b 100644 --- a/sqlite3/src/lib.rs +++ b/sqlite3/src/lib.rs @@ -239,14 +239,14 @@ pub unsafe extern "C" fn sqlite3_step(stmt: *mut sqlite3_stmt) -> std::ffi::c_in let stmt = &mut *stmt; if let Ok(result) = stmt.stmt.step() { match result { - limbo_core::RowResult::IO => SQLITE_BUSY, - limbo_core::RowResult::Done => SQLITE_DONE, - limbo_core::RowResult::Interrupt => SQLITE_INTERRUPT, - limbo_core::RowResult::Row(row) => { + limbo_core::StepResult::IO => SQLITE_BUSY, + limbo_core::StepResult::Done => SQLITE_DONE, + limbo_core::StepResult::Interrupt => SQLITE_INTERRUPT, + limbo_core::StepResult::Row(row) => { stmt.row.replace(Some(row)); SQLITE_ROW } - limbo_core::RowResult::Busy => SQLITE_BUSY, + limbo_core::StepResult::Busy => SQLITE_BUSY, } } else { SQLITE_ERROR diff --git a/test/src/lib.rs b/test/src/lib.rs index 8bd6feea2..931c9b1bf 100644 --- a/test/src/lib.rs +++ b/test/src/lib.rs @@ -40,7 +40,7 @@ impl TempDatabase { #[cfg(test)] mod tests { use super::*; - use limbo_core::{CheckpointStatus, Connection, RowResult, Value}; + use limbo_core::{CheckpointStatus, Connection, StepResult, Value}; use log::debug; #[ignore] @@ -63,10 +63,10 @@ mod tests { match conn.query(insert_query) { Ok(Some(ref mut rows)) => loop { match rows.next_row()? { - RowResult::IO => { + StepResult::IO => { tmp_db.io.run_once()?; } - RowResult::Done => break, + StepResult::Done => break, _ => unreachable!(), } }, @@ -80,7 +80,7 @@ mod tests { match conn.query(list_query) { Ok(Some(ref mut rows)) => loop { match rows.next_row()? { - RowResult::Row(row) => { + StepResult::Row(row) => { let first_value = row.values.first().expect("missing id"); let id = match first_value { Value::Integer(i) => *i as i32, @@ -90,12 +90,12 @@ mod tests { assert_eq!(current_read_index, id); current_read_index += 1; } - RowResult::IO => { + StepResult::IO => { tmp_db.io.run_once()?; } - RowResult::Interrupt => break, - RowResult::Done => break, - RowResult::Busy => { + StepResult::Interrupt => break, + StepResult::Done => break, + StepResult::Busy => { panic!("Database is busy"); } } @@ -127,10 +127,10 @@ mod tests { match conn.query(insert_query) { Ok(Some(ref mut rows)) => loop { match rows.next_row()? { - RowResult::IO => { + StepResult::IO => { tmp_db.io.run_once()?; } - RowResult::Done => break, + StepResult::Done => break, _ => unreachable!(), } }, @@ -146,7 +146,7 @@ mod tests { match conn.query(list_query) { Ok(Some(ref mut rows)) => loop { match rows.next_row()? { - RowResult::Row(row) => { + StepResult::Row(row) => { let first_value = &row.values[0]; let text = &row.values[1]; let id = match first_value { @@ -161,12 +161,12 @@ mod tests { assert_eq!(1, id); compare_string(&huge_text, text); } - RowResult::IO => { + StepResult::IO => { tmp_db.io.run_once()?; } - RowResult::Interrupt => break, - RowResult::Done => break, - RowResult::Busy => unreachable!(), + StepResult::Interrupt => break, + StepResult::Done => break, + StepResult::Busy => unreachable!(), } }, Ok(None) => {} @@ -200,10 +200,10 @@ mod tests { match conn.query(insert_query) { Ok(Some(ref mut rows)) => loop { match rows.next_row()? { - RowResult::IO => { + StepResult::IO => { tmp_db.io.run_once()?; } - RowResult::Done => break, + StepResult::Done => break, _ => unreachable!(), } }, @@ -219,7 +219,7 @@ mod tests { match conn.query(list_query) { Ok(Some(ref mut rows)) => loop { match rows.next_row()? { - RowResult::Row(row) => { + StepResult::Row(row) => { let first_value = &row.values[0]; let text = &row.values[1]; let id = match first_value { @@ -236,12 +236,12 @@ mod tests { compare_string(huge_text, text); current_index += 1; } - RowResult::IO => { + StepResult::IO => { tmp_db.io.run_once()?; } - RowResult::Interrupt => break, - RowResult::Done => break, - RowResult::Busy => unreachable!(), + StepResult::Interrupt => break, + StepResult::Done => break, + StepResult::Busy => unreachable!(), } }, Ok(None) => {} @@ -269,10 +269,10 @@ mod tests { match conn.query(insert_query) { Ok(Some(ref mut rows)) => loop { match rows.next_row()? { - RowResult::IO => { + StepResult::IO => { tmp_db.io.run_once()?; } - RowResult::Done => break, + StepResult::Done => break, _ => unreachable!(), } }, @@ -290,7 +290,7 @@ mod tests { match conn.query(list_query) { Ok(Some(ref mut rows)) => loop { match rows.next_row()? { - RowResult::Row(row) => { + StepResult::Row(row) => { let first_value = &row.values[0]; let id = match first_value { Value::Integer(i) => *i as i32, @@ -300,12 +300,12 @@ mod tests { assert_eq!(current_index, id as usize); current_index += 1; } - RowResult::IO => { + StepResult::IO => { tmp_db.io.run_once()?; } - RowResult::Interrupt => break, - RowResult::Done => break, - RowResult::Busy => unreachable!(), + StepResult::Interrupt => break, + StepResult::Done => break, + StepResult::Busy => unreachable!(), } }, Ok(None) => {} @@ -329,10 +329,10 @@ mod tests { match conn.query(insert_query) { Ok(Some(ref mut rows)) => loop { match rows.next_row()? { - RowResult::IO => { + StepResult::IO => { tmp_db.io.run_once()?; } - RowResult::Done => break, + StepResult::Done => break, _ => unreachable!(), } }, @@ -353,7 +353,7 @@ mod tests { if let Some(ref mut rows) = conn.query(list_query).unwrap() { loop { match rows.next_row()? { - RowResult::Row(row) => { + StepResult::Row(row) => { let first_value = &row.values[0]; let count = match first_value { Value::Integer(i) => *i as i32, @@ -362,12 +362,12 @@ mod tests { log::debug!("counted {}", count); return Ok(count as usize); } - RowResult::IO => { + StepResult::IO => { tmp_db.io.run_once()?; } - RowResult::Interrupt => break, - RowResult::Done => break, - RowResult::Busy => panic!("Database is busy"), + StepResult::Interrupt => break, + StepResult::Done => break, + StepResult::Busy => panic!("Database is busy"), } } } @@ -436,10 +436,10 @@ mod tests { if let Some(ref mut rows) = insert_query { loop { match rows.next_row()? { - RowResult::IO => { + StepResult::IO => { tmp_db.io.run_once()?; } - RowResult::Done => break, + StepResult::Done => break, _ => unreachable!(), } } @@ -450,17 +450,17 @@ mod tests { if let Some(ref mut rows) = select_query { loop { match rows.next_row()? { - RowResult::Row(row) => { + StepResult::Row(row) => { if let Value::Integer(id) = row.values[0] { assert_eq!(id, 1, "First insert should have rowid 1"); } } - RowResult::IO => { + StepResult::IO => { tmp_db.io.run_once()?; } - RowResult::Interrupt => break, - RowResult::Done => break, - RowResult::Busy => panic!("Database is busy"), + StepResult::Interrupt => break, + StepResult::Done => break, + StepResult::Busy => panic!("Database is busy"), } } } @@ -469,10 +469,10 @@ mod tests { match conn.query("INSERT INTO test_rowid (id, val) VALUES (5, 'test2')") { Ok(Some(ref mut rows)) => loop { match rows.next_row()? { - RowResult::IO => { + StepResult::IO => { tmp_db.io.run_once()?; } - RowResult::Done => break, + StepResult::Done => break, _ => unreachable!(), } }, @@ -485,17 +485,17 @@ mod tests { match conn.query("SELECT last_insert_rowid()") { Ok(Some(ref mut rows)) => loop { match rows.next_row()? { - RowResult::Row(row) => { + StepResult::Row(row) => { if let Value::Integer(id) = row.values[0] { last_id = id; } } - RowResult::IO => { + StepResult::IO => { tmp_db.io.run_once()?; } - RowResult::Interrupt => break, - RowResult::Done => break, - RowResult::Busy => panic!("Database is busy"), + StepResult::Interrupt => break, + StepResult::Done => break, + StepResult::Busy => panic!("Database is busy"), } }, Ok(None) => {} From 75992a84d8093e4f8df9953a57266c7708c8e1b7 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Fri, 27 Dec 2024 10:30:15 +0200 Subject: [PATCH 128/144] cli: Fix unused result warnings --- cli/app.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cli/app.rs b/cli/app.rs index 0593066c5..7e6155543 100644 --- a/cli/app.rs +++ b/cli/app.rs @@ -526,7 +526,7 @@ impl Limbo { break; } Ok(StepResult::Busy) => { - self.writeln("database is busy"); + let _ = self.writeln("database is busy"); break; } Err(err) => { @@ -565,7 +565,7 @@ impl Limbo { Ok(StepResult::Interrupt) => break, Ok(StepResult::Done) => break, Ok(StepResult::Busy) => { - self.writeln("database is busy"); + let _ = self.writeln("database is busy"); break; } Err(err) => { @@ -619,7 +619,7 @@ impl Limbo { StepResult::Interrupt => break, StepResult::Done => break, StepResult::Busy => { - self.writeln("database is busy"); + let _ = self.writeln("database is busy"); break; } } @@ -676,7 +676,7 @@ impl Limbo { StepResult::Interrupt => break, StepResult::Done => break, StepResult::Busy => { - self.writeln("database is busy"); + let _ = self.writeln("database is busy"); break; } } From 244326ee572183d383b7bf4c4607b74afbd3bed1 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Fri, 27 Dec 2024 10:33:47 +0200 Subject: [PATCH 129/144] core: Remove unused imports --- core/storage/wal.rs | 3 +-- core/translate/planner.rs | 7 ++----- core/vdbe/explain.rs | 1 - 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/core/storage/wal.rs b/core/storage/wal.rs index 3cdad9263..8648efe1c 100644 --- a/core/storage/wal.rs +++ b/core/storage/wal.rs @@ -1,4 +1,4 @@ -use std::collections::{HashMap, HashSet}; +use std::collections::HashMap; use std::sync::atomic::{AtomicU32, Ordering}; use std::sync::RwLock; use std::{cell::RefCell, rc::Rc, sync::Arc}; @@ -16,7 +16,6 @@ use crate::{Completion, Page}; use self::sqlite3_ondisk::{checksum_wal, PageContent, WAL_MAGIC_BE, WAL_MAGIC_LE}; use super::buffer_pool::BufferPool; -use super::page_cache::PageCacheKey; use super::pager::{PageRef, Pager}; use super::sqlite3_ondisk::{self, begin_write_btree_page, WalHeader}; diff --git a/core/translate/planner.rs b/core/translate/planner.rs index 0bdc447f3..75e37d1a5 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -1,8 +1,5 @@ -use super::{ - optimizer::Optimizable, - plan::{ - Aggregate, BTreeTableReference, Direction, GroupBy, Plan, ResultSetColumn, SourceOperator, - }, +use super::plan::{ + Aggregate, BTreeTableReference, Direction, GroupBy, Plan, ResultSetColumn, SourceOperator, }; use crate::{ function::Func, diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index ce03a53fd..cdcbe70bb 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -1,5 +1,4 @@ use super::{Insn, InsnReference, OwnedValue, Program}; -use crate::types::LimboText; use std::rc::Rc; pub fn insn_to_str( From 464508bb298640198c130e607291934ceb55e186 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Fri, 27 Dec 2024 10:35:25 +0200 Subject: [PATCH 130/144] core/vdbe: Kill unused next_free_register() --- core/vdbe/builder.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/core/vdbe/builder.rs b/core/vdbe/builder.rs index 8dd1cd4de..3a687b441 100644 --- a/core/vdbe/builder.rs +++ b/core/vdbe/builder.rs @@ -59,10 +59,6 @@ impl ProgramBuilder { reg } - pub fn next_free_register(&self) -> usize { - self.next_free_register - } - pub fn alloc_cursor_id( &mut self, table_identifier: Option, From 9680471876c027fc8b220505faa39f0e5fc1d256 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Fri, 27 Dec 2024 10:36:20 +0200 Subject: [PATCH 131/144] core: Remove unreachable pragma patterns --- core/translate/mod.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/core/translate/mod.rs b/core/translate/mod.rs index db69f1578..0a459e3fb 100644 --- a/core/translate/mod.rs +++ b/core/translate/mod.rs @@ -369,7 +369,6 @@ fn update_pragma( query_pragma("journal_mode", header, program)?; Ok(()) } - _ => todo!("pragma `{name}`"), } } @@ -396,9 +395,6 @@ fn query_pragma( dest: register, }); } - _ => { - todo!("pragma `{name}`"); - } } program.emit_insn(Insn::ResultRow { From b2f96ddfbd66fdbc9861477813a9d0c490e8863e Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Fri, 27 Dec 2024 10:39:24 +0200 Subject: [PATCH 132/144] core/translate: Remove unnecessary mut --- core/translate/planner.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/translate/planner.rs b/core/translate/planner.rs index 75e37d1a5..7eb269b9a 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -264,7 +264,7 @@ pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result

{ let col_count = columns.len(); From 9dea335a0ab8afd762ffaa3b44aa371d6af27328 Mon Sep 17 00:00:00 2001 From: Diego Reis Date: Fri, 27 Dec 2024 11:39:02 -0300 Subject: [PATCH 133/144] Add test function with regex --- testing/tester.tcl | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/testing/tester.tcl b/testing/tester.tcl index 04a43c3eb..b8cbff17f 100644 --- a/testing/tester.tcl +++ b/testing/tester.tcl @@ -26,6 +26,23 @@ proc do_execsql_test {test_name sql_statements expected_outputs} { } } +proc do_execsql_test_regex {test_name sql_statements expected_regex} { + foreach db $::test_dbs { + puts [format "(%s) %s Running test: %s" $db [string repeat " " [expr {40 - [string length $db]}]] $test_name] + set combined_sql [string trim $sql_statements] + set actual_output [evaluate_sql $::sqlite_exec $db $combined_sql] + + # Validate the actual output against the regular expression + if {![regexp $expected_regex $actual_output]} { + puts "Test FAILED: '$sql_statements'" + puts "returned '$actual_output'" + puts "expected to match regex '$expected_regex'" + exit 1 + } + } +} + + proc do_execsql_test_on_specific_db {db_name test_name sql_statements expected_outputs} { puts [format "(%s) %s Running test: %s" $db_name [string repeat " " [expr {40 - [string length $db_name]}]] $test_name] set combined_sql [string trim $sql_statements] From 2d0c16c428d8de0da8d63df969dd15dd9878f3da Mon Sep 17 00:00:00 2001 From: Diego Reis Date: Fri, 27 Dec 2024 11:39:33 -0300 Subject: [PATCH 134/144] Fix sqlite_version() out of bound --- core/translate/expr.rs | 2 +- testing/scalar-functions.test | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 734dbb98e..31463f8d6 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -1555,7 +1555,7 @@ pub fn translate_expr( program.emit_insn(Insn::Copy { src_reg: output_register, dst_reg: target_register, - amount: 1, + amount: 0, }); Ok(target_register) } diff --git a/testing/scalar-functions.test b/testing/scalar-functions.test index e7f1c1b10..f04fa1765 100755 --- a/testing/scalar-functions.test +++ b/testing/scalar-functions.test @@ -809,6 +809,10 @@ do_execsql_test cast-small-float-to-numeric { SELECT typeof(CAST('1.23' AS NUMERIC)), CAST('1.23' AS NUMERIC); } {real|1.23} +do_execsql_test_regex sqlite-version-should-return-valid-output { + SELECT sqlite_version(); +} {\d+\.\d+\.\d+} + # TODO COMPAT: sqlite returns 9.22337203685478e+18, do we care...? # do_execsql_test cast-large-text-to-numeric { # SELECT typeof(CAST('9223372036854775808' AS NUMERIC)), CAST('9223372036854775808' AS NUMERIC); From 5470ea2344424f3b64ce5ef440a894cb1c678948 Mon Sep 17 00:00:00 2001 From: psvri Date: Fri, 27 Dec 2024 21:49:26 +0530 Subject: [PATCH 135/144] Add tests in like.test --- testing/like.test | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/testing/like.test b/testing/like.test index edd6ba5e5..a52b90b60 100755 --- a/testing/like.test +++ b/testing/like.test @@ -77,3 +77,15 @@ Robert|Roberts} do_execsql_test where-like-impossible { select * from products where 'foobar' like 'fooba'; } {} + +do_execsql_test like-with-backslash { + select like('\%A', '\A') +} {1} + +do_execsql_test like-with-dollar { + select like('A$%', 'A$') +} {1} + +do_execsql_test like-with-dot { + select like('%a.a', 'aaaa') +} {0} From 3bc554f27c1de2a302e16065464d5aca19cc070f Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Fri, 27 Dec 2024 18:39:38 +0200 Subject: [PATCH 136/144] core: Remove unused import --- core/translate/plan.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 4cd30d08b..abfab41fb 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -1,6 +1,5 @@ use core::fmt; use sqlite3_parser::ast; -use std::ptr::write; use std::{ fmt::{Display, Formatter}, rc::Rc, From f08d62b446db6820885179247c29d0b8cb0e3bbd Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Fri, 27 Dec 2024 15:38:07 -0500 Subject: [PATCH 137/144] Add Remainder vdbe oppcode --- core/translate/expr.rs | 7 +++ core/vdbe/explain.rs | 9 ++++ core/vdbe/mod.rs | 103 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 119 insertions(+) diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 275851201..10405f97c 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -693,6 +693,13 @@ pub fn translate_expr( dest: target_register, }); } + ast::Operator::Modulus => { + program.emit_insn(Insn::Remainder { + lhs: e1_reg, + rhs: e2_reg, + dest: target_register, + }); + } ast::Operator::BitwiseAnd => { program.emit_insn(Insn::BitAnd { lhs: e1_reg, diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index a2d722948..28a87b2d6 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -82,6 +82,15 @@ pub fn insn_to_str( 0, format!("r[{}]=~r[{}]", dest, reg), ), + Insn::Remainder { lhs, rhs, dest } => ( + "Modulus", + *lhs as i32, + *rhs as i32, + *dest as i32, + OwnedValue::build_text(Rc::new("".to_string())), + 0, + format!("r[{}]=r[{}]%r[{}]", dest, lhs, rhs), + ), Insn::Null { dest, dest_end } => ( "Null", 0, diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 980aab282..01c0b3cd1 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -119,6 +119,12 @@ pub enum Insn { reg: usize, dest: usize, }, + // Divide lhs by rhs and place the remainder in dest register. + Remainder { + lhs: usize, + rhs: usize, + dest: usize, + }, // Jump to the instruction at address P1, P2, or P3 depending on whether in the most recent Compare instruction the P1 vector was less than, equal to, or greater than the P2 vector, respectively. Jump { target_pc_lt: BranchOffset, @@ -1224,6 +1230,103 @@ impl Program { } state.pc += 1; } + Insn::Remainder { lhs, rhs, dest } => { + let lhs = *lhs; + let rhs = *rhs; + let dest = *dest; + state.registers[dest] = match (&state.registers[lhs], &state.registers[rhs]) { + (OwnedValue::Null, _) + | (_, OwnedValue::Null) + | (_, OwnedValue::Integer(0)) + | (_, OwnedValue::Float(0.0)) => OwnedValue::Null, + (OwnedValue::Integer(lhs), OwnedValue::Integer(rhs)) => { + OwnedValue::Integer(lhs % rhs) + } + (OwnedValue::Float(lhs), OwnedValue::Float(rhs)) => { + OwnedValue::Float(((*lhs as i64) % (*rhs as i64)) as f64) + } + (OwnedValue::Float(lhs), OwnedValue::Integer(rhs)) => { + OwnedValue::Float(((*lhs as i64) % rhs) as f64) + } + (OwnedValue::Integer(lhs), OwnedValue::Float(rhs)) => { + OwnedValue::Float((lhs % *rhs as i64) as f64) + } + (lhs, OwnedValue::Agg(agg_rhs)) => match lhs { + OwnedValue::Agg(agg_lhs) => { + let acc = agg_lhs.final_value(); + let acc2 = agg_rhs.final_value(); + match (acc, acc2) { + (_, OwnedValue::Integer(0)) + | (_, OwnedValue::Float(0.0)) + | (_, OwnedValue::Null) + | (OwnedValue::Null, _) => OwnedValue::Null, + (OwnedValue::Integer(l), OwnedValue::Integer(r)) => { + OwnedValue::Integer(l % r) + } + (OwnedValue::Float(lh_f), OwnedValue::Float(rh_f)) => { + OwnedValue::Float(((*lh_f as i64) % (*rh_f as i64)) as f64) + } + (OwnedValue::Integer(lh_i), OwnedValue::Float(rh_f)) => { + OwnedValue::Float((lh_i % (*rh_f as i64)) as f64) + } + _ => { + todo!("{:?} {:?}", acc, acc2); + } + } + } + OwnedValue::Integer(lh_i) => match agg_rhs.final_value() { + OwnedValue::Null => OwnedValue::Null, + OwnedValue::Float(rh_f) => { + OwnedValue::Float((lh_i % (*rh_f as i64)) as f64) + } + OwnedValue::Integer(rh_i) => OwnedValue::Integer(lh_i % rh_i), + _ => { + todo!("{:?}", agg_rhs); + } + }, + OwnedValue::Float(lh_f) => match agg_rhs.final_value() { + OwnedValue::Null => OwnedValue::Null, + OwnedValue::Float(rh_f) => { + OwnedValue::Float(((*lh_f as i64) % (*rh_f as i64)) as f64) + } + OwnedValue::Integer(rh_i) => { + OwnedValue::Float(((*lh_f as i64) % rh_i) as f64) + } + _ => { + todo!("{:?}", agg_rhs); + } + }, + _ => todo!("{:?}", rhs), + }, + (OwnedValue::Agg(aggctx), rhs) => match rhs { + OwnedValue::Integer(rh_i) => match aggctx.final_value() { + OwnedValue::Null => OwnedValue::Null, + OwnedValue::Float(lh_f) => { + OwnedValue::Float(((*lh_f as i64) % rh_i) as f64) + } + OwnedValue::Integer(lh_i) => OwnedValue::Integer(lh_i % rh_i), + _ => { + todo!("{:?}", aggctx); + } + }, + OwnedValue::Float(rh_f) => match aggctx.final_value() { + OwnedValue::Null => OwnedValue::Null, + OwnedValue::Float(lh_f) => { + OwnedValue::Float(((*lh_f as i64) % (*rh_f as i64)) as f64) + } + OwnedValue::Integer(lh_i) => { + OwnedValue::Float((lh_i % (*rh_f as i64)) as f64) + } + _ => { + todo!("{:?}", aggctx); + } + }, + _ => todo!("{:?}", rhs), + }, + _ => todo!("{:?} {:?}", state.registers[lhs], state.registers[rhs]), + }; + state.pc += 1; + } Insn::Null { dest, dest_end } => { if let Some(dest_end) = dest_end { for i in *dest..=*dest_end { From 82de59dd88bd2c8f4e9be7054bd8d99d42d2743c Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Fri, 27 Dec 2024 15:38:29 -0500 Subject: [PATCH 138/144] Add compatability tests for mod operator --- testing/math.test | 48 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/testing/math.test b/testing/math.test index 7b27495e3..9dc517755 100644 --- a/testing/math.test +++ b/testing/math.test @@ -1025,3 +1025,51 @@ do_execsql_test log-null-int { do_execsql_test log-int-null { SELECT log(5, null) } {} + +do_execsql_test mod-int-null { + SELECT 183 % null +} {} + +do_execsql_test mod-int-0 { + SELECT 183 % 0 +} {} + +do_execsql_test mod-int-int { + SELECT 183 % 10 +} { 3 } + +do_execsql_test mod-int-float { + SELECT 38 % 10.35 +} { 8.0 } + +do_execsql_test mod-float-int { + SELECT 38.43 % 13 +} { 12.0 } + +do_execsql_test mod-0-float { + SELECT 0 % 12.0 +} { 0.0 } + +do_execsql_test mod-float-0 { + SELECT 23.14 % 0 +} {} + +do_execsql_test mod-float-float { + SELECT 23.14 % 12.0 +} { 11.0 } + +do_execsql_test mod-float-agg { + SELECT 23.14 % sum(id) from products +} { 23.0 } + +do_execsql_test mod-int-agg { + SELECT 17 % sum(id) from users +} { 17 } + +do_execsql_test mod-agg-int { + SELECT count(*) % 17 from users +} { 4 } + +do_execsql_test mod-agg-float { + SELECT count(*) % 2.43 from users +} { 0.0 } From ddf229c4329392d62dac22018a4a9be15022cc38 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Fri, 27 Dec 2024 15:38:44 -0500 Subject: [PATCH 139/144] Update COMPAT.md --- COMPAT.md | 2 +- core/vdbe/explain.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/COMPAT.md b/COMPAT.md index 71a00290b..1cdc475a8 100644 --- a/COMPAT.md +++ b/COMPAT.md @@ -395,7 +395,7 @@ Feature support of [sqlite expr syntax](https://www.sqlite.org/lang_expr.html). | ReadCookie | No | | Real | Yes | | RealAffinity | Yes | -| Remainder | No | +| Remainder | Yes | | ResetCount | No | | ResultRow | Yes | | Return | Yes | diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index 28a87b2d6..a17ababcf 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -83,7 +83,7 @@ pub fn insn_to_str( format!("r[{}]=~r[{}]", dest, reg), ), Insn::Remainder { lhs, rhs, dest } => ( - "Modulus", + "Remainder", *lhs as i32, *rhs as i32, *dest as i32, From 1922b8ea382a1992988e7faa70802d6dc0e5d935 Mon Sep 17 00:00:00 2001 From: psvri Date: Sat, 28 Dec 2024 13:55:12 +0530 Subject: [PATCH 140/144] Support like function with escape --- COMPAT.md | 4 +-- core/vdbe/likeop.rs | 87 +++++++++++++++++++++++++++++++++++++++++++++ core/vdbe/mod.rs | 22 ++++++++++++ testing/like.test | 45 +++++++++++++++++++++++ 4 files changed, 156 insertions(+), 2 deletions(-) create mode 100644 core/vdbe/likeop.rs diff --git a/COMPAT.md b/COMPAT.md index 71a00290b..575f2c1bb 100644 --- a/COMPAT.md +++ b/COMPAT.md @@ -116,8 +116,8 @@ Feature support of [sqlite expr syntax](https://www.sqlite.org/lang_expr.html). | instr(X,Y) | Yes | | | last_insert_rowid() | Yes | | | length(X) | Yes | | -| like(X,Y) | No | | -| like(X,Y,Z) | No | | +| like(X,Y) | Yes | | +| like(X,Y,Z) | Yes | | | likelihood(X,Y) | No | | | likely(X) | No | | | load_extension(X) | No | | diff --git a/core/vdbe/likeop.rs b/core/vdbe/likeop.rs new file mode 100644 index 000000000..f4ef62f8d --- /dev/null +++ b/core/vdbe/likeop.rs @@ -0,0 +1,87 @@ +use regex::{Regex, RegexBuilder}; + +use crate::{types::OwnedValue, LimboError}; + +pub fn construct_like_escape_arg(escape_value: &OwnedValue) -> Result { + match escape_value { + OwnedValue::Text(text) => { + let mut escape_chars = text.value.chars(); + match (escape_chars.next(), escape_chars.next()) { + (Some(escape), None) => Ok(escape), + _ => { + return Result::Err(LimboError::Constraint( + "ESCAPE expression must be a single character".to_string(), + )) + } + } + } + _ => { + unreachable!("Like on non-text registers"); + } + } +} + +// Implements LIKE pattern matching with escape +pub fn exec_like_with_escape(pattern: &str, text: &str, escape: char) -> bool { + construct_like_regex_with_escape(pattern, escape).is_match(text) +} + +fn construct_like_regex_with_escape(pattern: &str, escape: char) -> Regex { + let mut regex_pattern = String::with_capacity(pattern.len() * 2); + + regex_pattern.push('^'); + + let mut chars = pattern.chars(); + + while let Some(ch) = chars.next() { + match ch { + esc_ch if esc_ch == escape => { + if let Some(escaped_char) = chars.next() { + if regex_syntax::is_meta_character(escaped_char) { + regex_pattern.push('\\'); + } + regex_pattern.push(escaped_char); + } + } + '%' => regex_pattern.push_str(".*"), + '_' => regex_pattern.push('.'), + c => { + if regex_syntax::is_meta_character(c) { + regex_pattern.push('\\'); + } + regex_pattern.push(c); + } + } + } + + regex_pattern.push('$'); + + RegexBuilder::new(®ex_pattern) + .case_insensitive(true) + .dot_matches_new_line(true) + .build() + .unwrap() +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_exec_like_with_escape() { + assert!(exec_like_with_escape("abcX%", "abc%", 'X')); + assert!(!exec_like_with_escape("abcX%", "abc5", 'X')); + assert!(!exec_like_with_escape("abcX%", "abc", 'X')); + assert!(!exec_like_with_escape("abcX%", "abcX%", 'X')); + assert!(!exec_like_with_escape("abcX%", "abc%%", 'X')); + assert!(exec_like_with_escape("abcX_", "abc_", 'X')); + assert!(!exec_like_with_escape("abcX_", "abc5", 'X')); + assert!(!exec_like_with_escape("abcX_", "abc", 'X')); + assert!(!exec_like_with_escape("abcX_", "abcX_", 'X')); + assert!(!exec_like_with_escape("abcX_", "abc__", 'X')); + assert!(exec_like_with_escape("abcXX", "abcX", 'X')); + assert!(!exec_like_with_escape("abcXX", "abc5", 'X')); + assert!(!exec_like_with_escape("abcXX", "abc", 'X')); + assert!(!exec_like_with_escape("abcXX", "abcXX", 'X')); + } +} diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 980aab282..8dfd67b37 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -19,6 +19,7 @@ pub mod builder; pub mod explain; +pub mod likeop; pub mod sorter; mod datetime; @@ -40,6 +41,7 @@ use crate::util::parse_schema_rows; use crate::{function::JsonFunc, json::get_json, json::json_array, json::json_array_length}; use crate::{Connection, Result, TransactionState}; use crate::{Rows, DATABASE_VERSION}; +use likeop::{construct_like_escape_arg, exec_like_with_escape}; use limbo_macros::Description; use datetime::{exec_date, exec_time, exec_unixepoch}; @@ -564,6 +566,7 @@ struct RegexCache { like: HashMap, glob: HashMap, } + impl RegexCache { fn new() -> Self { RegexCache { @@ -2377,7 +2380,25 @@ impl Program { ScalarFunc::Like => { let pattern = &state.registers[*start_reg]; let text = &state.registers[*start_reg + 1]; + let result = match (pattern, text) { + (OwnedValue::Text(pattern), OwnedValue::Text(text)) + if arg_count == 3 => + { + let escape = match construct_like_escape_arg( + &state.registers[*start_reg + 2], + ) { + Ok(x) => x, + Err(e) => return Result::Err(e), + }; + + OwnedValue::Integer(exec_like_with_escape( + &pattern.value, + &text.value, + escape, + ) + as i64) + } (OwnedValue::Text(pattern), OwnedValue::Text(text)) => { let cache = if *constant_mask > 0 { Some(&mut state.regex_cache.like) @@ -2395,6 +2416,7 @@ impl Program { unreachable!("Like on non-text registers"); } }; + state.registers[*dest] = result; } ScalarFunc::Abs diff --git a/testing/like.test b/testing/like.test index a52b90b60..8d4456406 100755 --- a/testing/like.test +++ b/testing/like.test @@ -89,3 +89,48 @@ do_execsql_test like-with-dollar { do_execsql_test like-with-dot { select like('%a.a', 'aaaa') } {0} + +do_execsql_test like-fn-esc-1 { + SELECT like('abcX%', 'abc%' , 'X') +} 1 +do_execsql_test like-fn-esc-2 { + SELECT like('abcX%', 'abc5' , 'X') +} 0 +do_execsql_test like-fn-esc-3 { + SELECT like('abcX%', 'abc', 'X') +} 0 +do_execsql_test like-fn-esc-4 { + SELECT like('abcX%', 'abcX%', 'X') +} 0 +do_execsql_test like-fn-esc-5 { + SELECT like('abcX%', 'abc%%', 'X') +} 0 + +do_execsql_test like-fn-esc-6 { + SELECT like('abcX_', 'abc_' , 'X') +} 1 +do_execsql_test like-fn-esc-7 { + SELECT like('abcX_', 'abc5' , 'X') +} 0 +do_execsql_test like-fn-esc-8 { + SELECT like('abcX_', 'abc' , 'X') +} 0 +do_execsql_test like-fn-esc-9 { + SELECT like('abcX_', 'abcX_', 'X') +} 0 +do_execsql_test like-fn-esc-10 { + SELECT like('abcX_', 'abc__', 'X') +} 0 + +do_execsql_test like-fn-esc-11 { + SELECT like('abcXX', 'abcX' , 'X') +} 1 +do_execsql_test like-fn-esc-12 { + SELECT like('abcXX', 'abc5' , 'X') +} 0 +do_execsql_test like-fn-esc-13 { + SELECT like('abcXX', 'abc' , 'X') +} 0 +do_execsql_test like-fn-esc-14 { + SELECT like('abcXX', 'abcXX', 'X') +} 0 From 8c9bd0deb9951b624379d9640bbf9951db92e8ec Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Sun, 29 Dec 2024 08:58:26 +0200 Subject: [PATCH 141/144] chore: commit cargo lock changes from #553 --- Cargo.lock | 1 + 1 file changed, 1 insertion(+) diff --git a/Cargo.lock b/Cargo.lock index 0f034d89d..aacc8d495 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1149,6 +1149,7 @@ dependencies = [ "pprof", "rand", "regex", + "regex-syntax", "rstest", "rusqlite", "rustix", From 97647ff0568f2a160bf3f639440a592a18881fab Mon Sep 17 00:00:00 2001 From: adamnemecek Date: Thu, 26 Dec 2024 16:13:29 -0800 Subject: [PATCH 142/144] Clean up code to use Self Closes #556 --- bindings/wasm/lib.rs | 6 +- core/ext/mod.rs | 2 +- core/ext/uuid.rs | 26 +-- core/function.rs | 387 +++++++++++++++++----------------- core/io/mod.rs | 6 +- core/json/de.rs | 2 +- core/json/error.rs | 12 +- core/schema.rs | 62 +++--- core/storage/pager.rs | 4 +- core/translate/optimizer.rs | 20 +- core/types.rs | 188 ++++++++--------- core/vdbe/mod.rs | 2 +- simulator/generation/plan.rs | 26 +-- simulator/generation/query.rs | 26 +-- simulator/generation/table.rs | 53 +++-- simulator/model/query.rs | 20 +- simulator/model/table.rs | 18 +- 17 files changed, 420 insertions(+), 440 deletions(-) diff --git a/bindings/wasm/lib.rs b/bindings/wasm/lib.rs index a06321f16..060ad20f8 100644 --- a/bindings/wasm/lib.rs +++ b/bindings/wasm/lib.rs @@ -64,7 +64,7 @@ pub struct Statement { #[wasm_bindgen] impl Statement { fn new(inner: RefCell, raw: bool) -> Self { - Statement { inner, raw } + Self { inner, raw } } #[wasm_bindgen] @@ -150,7 +150,7 @@ pub struct File { #[allow(dead_code)] impl File { fn new(vfs: VFS, fd: i32) -> Self { - File { vfs, fd } + Self { vfs, fd } } } @@ -263,7 +263,7 @@ pub struct DatabaseStorage { impl DatabaseStorage { pub fn new(file: Rc) -> Self { - DatabaseStorage { file } + Self { file } } } diff --git a/core/ext/mod.rs b/core/ext/mod.rs index 312ebfcea..ab5a1ce11 100644 --- a/core/ext/mod.rs +++ b/core/ext/mod.rs @@ -13,7 +13,7 @@ impl std::fmt::Display for ExtFunc { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { #[cfg(feature = "uuid")] - ExtFunc::Uuid(uuidfn) => write!(f, "{}", uuidfn), + Self::Uuid(uuidfn) => write!(f, "{}", uuidfn), _ => write!(f, "unknown"), } } diff --git a/core/ext/uuid.rs b/core/ext/uuid.rs index 00ce23d9b..d3225e8e0 100644 --- a/core/ext/uuid.rs +++ b/core/ext/uuid.rs @@ -19,14 +19,14 @@ pub enum UuidFunc { impl UuidFunc { pub fn resolve_function(name: &str, num_args: usize) -> Option { match name { - "uuid4_str" => Some(ExtFunc::Uuid(UuidFunc::Uuid4Str)), - "uuid4" => Some(ExtFunc::Uuid(UuidFunc::Uuid4)), - "uuid7" if num_args < 2 => Some(ExtFunc::Uuid(UuidFunc::Uuid7)), - "uuid_str" if num_args == 1 => Some(ExtFunc::Uuid(UuidFunc::UuidStr)), - "uuid_blob" if num_args == 1 => Some(ExtFunc::Uuid(UuidFunc::UuidBlob)), - "uuid7_timestamp_ms" if num_args == 1 => Some(ExtFunc::Uuid(UuidFunc::Uuid7TS)), + "uuid4_str" => Some(ExtFunc::Uuid(Self::Uuid4Str)), + "uuid4" => Some(ExtFunc::Uuid(Self::Uuid4)), + "uuid7" if num_args < 2 => Some(ExtFunc::Uuid(Self::Uuid7)), + "uuid_str" if num_args == 1 => Some(ExtFunc::Uuid(Self::UuidStr)), + "uuid_blob" if num_args == 1 => Some(ExtFunc::Uuid(Self::UuidBlob)), + "uuid7_timestamp_ms" if num_args == 1 => Some(ExtFunc::Uuid(Self::Uuid7TS)), // postgres_compatability - "gen_random_uuid" => Some(ExtFunc::Uuid(UuidFunc::Uuid4Str)), + "gen_random_uuid" => Some(ExtFunc::Uuid(Self::Uuid4Str)), _ => None, } } @@ -35,12 +35,12 @@ impl UuidFunc { impl std::fmt::Display for UuidFunc { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - UuidFunc::Uuid4Str => write!(f, "uuid4_str"), - UuidFunc::Uuid4 => write!(f, "uuid4"), - UuidFunc::Uuid7 => write!(f, "uuid7"), - UuidFunc::Uuid7TS => write!(f, "uuid7_timestamp_ms"), - UuidFunc::UuidStr => write!(f, "uuid_str"), - UuidFunc::UuidBlob => write!(f, "uuid_blob"), + Self::Uuid4Str => write!(f, "uuid4_str"), + Self::Uuid4 => write!(f, "uuid4"), + Self::Uuid7 => write!(f, "uuid7"), + Self::Uuid7TS => write!(f, "uuid7_timestamp_ms"), + Self::UuidStr => write!(f, "uuid_str"), + Self::UuidBlob => write!(f, "uuid_blob"), } } } diff --git a/core/function.rs b/core/function.rs index 0b19a5474..a97c6e1b6 100644 --- a/core/function.rs +++ b/core/function.rs @@ -16,9 +16,9 @@ impl Display for JsonFunc { f, "{}", match self { - JsonFunc::Json => "json".to_string(), - JsonFunc::JsonArray => "json_array".to_string(), - JsonFunc::JsonArrayLength => "json_array_length".to_string(), + Self::Json => "json".to_string(), + Self::JsonArray => "json_array".to_string(), + Self::JsonArrayLength => "json_array_length".to_string(), } ) } @@ -39,14 +39,14 @@ pub enum AggFunc { impl AggFunc { pub fn to_string(&self) -> &str { match self { - AggFunc::Avg => "avg", - AggFunc::Count => "count", - AggFunc::GroupConcat => "group_concat", - AggFunc::Max => "max", - AggFunc::Min => "min", - AggFunc::StringAgg => "string_agg", - AggFunc::Sum => "sum", - AggFunc::Total => "total", + Self::Avg => "avg", + Self::Count => "count", + Self::GroupConcat => "group_concat", + Self::Max => "max", + Self::Min => "min", + Self::StringAgg => "string_agg", + Self::Sum => "sum", + Self::Total => "total", } } } @@ -98,46 +98,46 @@ pub enum ScalarFunc { impl Display for ScalarFunc { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let str = match self { - ScalarFunc::Cast => "cast".to_string(), - ScalarFunc::Char => "char".to_string(), - ScalarFunc::Coalesce => "coalesce".to_string(), - ScalarFunc::Concat => "concat".to_string(), - ScalarFunc::ConcatWs => "concat_ws".to_string(), - ScalarFunc::Glob => "glob".to_string(), - ScalarFunc::IfNull => "ifnull".to_string(), - ScalarFunc::Iif => "iif".to_string(), - ScalarFunc::Instr => "instr".to_string(), - ScalarFunc::Like => "like(2)".to_string(), - ScalarFunc::Abs => "abs".to_string(), - ScalarFunc::Upper => "upper".to_string(), - ScalarFunc::Lower => "lower".to_string(), - ScalarFunc::Random => "random".to_string(), - ScalarFunc::RandomBlob => "randomblob".to_string(), - ScalarFunc::Trim => "trim".to_string(), - ScalarFunc::LTrim => "ltrim".to_string(), - ScalarFunc::RTrim => "rtrim".to_string(), - ScalarFunc::Round => "round".to_string(), - ScalarFunc::Length => "length".to_string(), - ScalarFunc::OctetLength => "octet_length".to_string(), - ScalarFunc::Min => "min".to_string(), - ScalarFunc::Max => "max".to_string(), - ScalarFunc::Nullif => "nullif".to_string(), - ScalarFunc::Sign => "sign".to_string(), - ScalarFunc::Substr => "substr".to_string(), - ScalarFunc::Substring => "substring".to_string(), - ScalarFunc::Soundex => "soundex".to_string(), - ScalarFunc::Date => "date".to_string(), - ScalarFunc::Time => "time".to_string(), - ScalarFunc::Typeof => "typeof".to_string(), - ScalarFunc::Unicode => "unicode".to_string(), - ScalarFunc::Quote => "quote".to_string(), - ScalarFunc::SqliteVersion => "sqlite_version".to_string(), - ScalarFunc::UnixEpoch => "unixepoch".to_string(), - ScalarFunc::Hex => "hex".to_string(), - ScalarFunc::Unhex => "unhex".to_string(), - ScalarFunc::ZeroBlob => "zeroblob".to_string(), - ScalarFunc::LastInsertRowid => "last_insert_rowid".to_string(), - ScalarFunc::Replace => "replace".to_string(), + Self::Cast => "cast".to_string(), + Self::Char => "char".to_string(), + Self::Coalesce => "coalesce".to_string(), + Self::Concat => "concat".to_string(), + Self::ConcatWs => "concat_ws".to_string(), + Self::Glob => "glob".to_string(), + Self::IfNull => "ifnull".to_string(), + Self::Iif => "iif".to_string(), + Self::Instr => "instr".to_string(), + Self::Like => "like(2)".to_string(), + Self::Abs => "abs".to_string(), + Self::Upper => "upper".to_string(), + Self::Lower => "lower".to_string(), + Self::Random => "random".to_string(), + Self::RandomBlob => "randomblob".to_string(), + Self::Trim => "trim".to_string(), + Self::LTrim => "ltrim".to_string(), + Self::RTrim => "rtrim".to_string(), + Self::Round => "round".to_string(), + Self::Length => "length".to_string(), + Self::OctetLength => "octet_length".to_string(), + Self::Min => "min".to_string(), + Self::Max => "max".to_string(), + Self::Nullif => "nullif".to_string(), + Self::Sign => "sign".to_string(), + Self::Substr => "substr".to_string(), + Self::Substring => "substring".to_string(), + Self::Soundex => "soundex".to_string(), + Self::Date => "date".to_string(), + Self::Time => "time".to_string(), + Self::Typeof => "typeof".to_string(), + Self::Unicode => "unicode".to_string(), + Self::Quote => "quote".to_string(), + Self::SqliteVersion => "sqlite_version".to_string(), + Self::UnixEpoch => "unixepoch".to_string(), + Self::Hex => "hex".to_string(), + Self::Unhex => "unhex".to_string(), + Self::ZeroBlob => "zeroblob".to_string(), + Self::LastInsertRowid => "last_insert_rowid".to_string(), + Self::Replace => "replace".to_string(), }; write!(f, "{}", str) } @@ -186,37 +186,34 @@ pub enum MathFuncArity { impl MathFunc { pub fn arity(&self) -> MathFuncArity { match self { - MathFunc::Pi => MathFuncArity::Nullary, + Self::Pi => MathFuncArity::Nullary, + Self::Acos + | Self::Acosh + | Self::Asin + | Self::Asinh + | Self::Atan + | Self::Atanh + | Self::Ceil + | Self::Ceiling + | Self::Cos + | Self::Cosh + | Self::Degrees + | Self::Exp + | Self::Floor + | Self::Ln + | Self::Log10 + | Self::Log2 + | Self::Radians + | Self::Sin + | Self::Sinh + | Self::Sqrt + | Self::Tan + | Self::Tanh + | Self::Trunc => MathFuncArity::Unary, - MathFunc::Acos - | MathFunc::Acosh - | MathFunc::Asin - | MathFunc::Asinh - | MathFunc::Atan - | MathFunc::Atanh - | MathFunc::Ceil - | MathFunc::Ceiling - | MathFunc::Cos - | MathFunc::Cosh - | MathFunc::Degrees - | MathFunc::Exp - | MathFunc::Floor - | MathFunc::Ln - | MathFunc::Log10 - | MathFunc::Log2 - | MathFunc::Radians - | MathFunc::Sin - | MathFunc::Sinh - | MathFunc::Sqrt - | MathFunc::Tan - | MathFunc::Tanh - | MathFunc::Trunc => MathFuncArity::Unary, + Self::Atan2 | Self::Mod | Self::Pow | Self::Power => MathFuncArity::Binary, - MathFunc::Atan2 | MathFunc::Mod | MathFunc::Pow | MathFunc::Power => { - MathFuncArity::Binary - } - - MathFunc::Log => MathFuncArity::UnaryOrBinary, + Self::Log => MathFuncArity::UnaryOrBinary, } } } @@ -224,35 +221,35 @@ impl MathFunc { impl Display for MathFunc { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let str = match self { - MathFunc::Acos => "acos".to_string(), - MathFunc::Acosh => "acosh".to_string(), - MathFunc::Asin => "asin".to_string(), - MathFunc::Asinh => "asinh".to_string(), - MathFunc::Atan => "atan".to_string(), - MathFunc::Atan2 => "atan2".to_string(), - MathFunc::Atanh => "atanh".to_string(), - MathFunc::Ceil => "ceil".to_string(), - MathFunc::Ceiling => "ceiling".to_string(), - MathFunc::Cos => "cos".to_string(), - MathFunc::Cosh => "cosh".to_string(), - MathFunc::Degrees => "degrees".to_string(), - MathFunc::Exp => "exp".to_string(), - MathFunc::Floor => "floor".to_string(), - MathFunc::Ln => "ln".to_string(), - MathFunc::Log => "log".to_string(), - MathFunc::Log10 => "log10".to_string(), - MathFunc::Log2 => "log2".to_string(), - MathFunc::Mod => "mod".to_string(), - MathFunc::Pi => "pi".to_string(), - MathFunc::Pow => "pow".to_string(), - MathFunc::Power => "power".to_string(), - MathFunc::Radians => "radians".to_string(), - MathFunc::Sin => "sin".to_string(), - MathFunc::Sinh => "sinh".to_string(), - MathFunc::Sqrt => "sqrt".to_string(), - MathFunc::Tan => "tan".to_string(), - MathFunc::Tanh => "tanh".to_string(), - MathFunc::Trunc => "trunc".to_string(), + Self::Acos => "acos".to_string(), + Self::Acosh => "acosh".to_string(), + Self::Asin => "asin".to_string(), + Self::Asinh => "asinh".to_string(), + Self::Atan => "atan".to_string(), + Self::Atan2 => "atan2".to_string(), + Self::Atanh => "atanh".to_string(), + Self::Ceil => "ceil".to_string(), + Self::Ceiling => "ceiling".to_string(), + Self::Cos => "cos".to_string(), + Self::Cosh => "cosh".to_string(), + Self::Degrees => "degrees".to_string(), + Self::Exp => "exp".to_string(), + Self::Floor => "floor".to_string(), + Self::Ln => "ln".to_string(), + Self::Log => "log".to_string(), + Self::Log10 => "log10".to_string(), + Self::Log2 => "log2".to_string(), + Self::Mod => "mod".to_string(), + Self::Pi => "pi".to_string(), + Self::Pow => "pow".to_string(), + Self::Power => "power".to_string(), + Self::Radians => "radians".to_string(), + Self::Sin => "sin".to_string(), + Self::Sinh => "sinh".to_string(), + Self::Sqrt => "sqrt".to_string(), + Self::Tan => "tan".to_string(), + Self::Tanh => "tanh".to_string(), + Self::Trunc => "trunc".to_string(), }; write!(f, "{}", str) } @@ -271,12 +268,12 @@ pub enum Func { impl Display for Func { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - Func::Agg(agg_func) => write!(f, "{}", agg_func.to_string()), - Func::Scalar(scalar_func) => write!(f, "{}", scalar_func), - Func::Math(math_func) => write!(f, "{}", math_func), + Self::Agg(agg_func) => write!(f, "{}", agg_func.to_string()), + Self::Scalar(scalar_func) => write!(f, "{}", scalar_func), + Self::Math(math_func) => write!(f, "{}", math_func), #[cfg(feature = "json")] - Func::Json(json_func) => write!(f, "{}", json_func), - Func::Extension(ext_func) => write!(f, "{}", ext_func), + Self::Json(json_func) => write!(f, "{}", json_func), + Self::Extension(ext_func) => write!(f, "{}", ext_func), } } } @@ -288,92 +285,92 @@ pub struct FuncCtx { } impl Func { - pub fn resolve_function(name: &str, arg_count: usize) -> Result { + pub fn resolve_function(name: &str, arg_count: usize) -> Result { match name { - "avg" => Ok(Func::Agg(AggFunc::Avg)), - "count" => Ok(Func::Agg(AggFunc::Count)), - "group_concat" => Ok(Func::Agg(AggFunc::GroupConcat)), - "max" if arg_count == 0 || arg_count == 1 => Ok(Func::Agg(AggFunc::Max)), - "max" if arg_count > 1 => Ok(Func::Scalar(ScalarFunc::Max)), - "min" if arg_count == 0 || arg_count == 1 => Ok(Func::Agg(AggFunc::Min)), - "min" if arg_count > 1 => Ok(Func::Scalar(ScalarFunc::Min)), - "nullif" if arg_count == 2 => Ok(Func::Scalar(ScalarFunc::Nullif)), - "string_agg" => Ok(Func::Agg(AggFunc::StringAgg)), - "sum" => Ok(Func::Agg(AggFunc::Sum)), - "total" => Ok(Func::Agg(AggFunc::Total)), - "char" => Ok(Func::Scalar(ScalarFunc::Char)), - "coalesce" => Ok(Func::Scalar(ScalarFunc::Coalesce)), - "concat" => Ok(Func::Scalar(ScalarFunc::Concat)), - "concat_ws" => Ok(Func::Scalar(ScalarFunc::ConcatWs)), - "glob" => Ok(Func::Scalar(ScalarFunc::Glob)), - "ifnull" => Ok(Func::Scalar(ScalarFunc::IfNull)), - "iif" => Ok(Func::Scalar(ScalarFunc::Iif)), - "instr" => Ok(Func::Scalar(ScalarFunc::Instr)), - "like" => Ok(Func::Scalar(ScalarFunc::Like)), - "abs" => Ok(Func::Scalar(ScalarFunc::Abs)), - "upper" => Ok(Func::Scalar(ScalarFunc::Upper)), - "lower" => Ok(Func::Scalar(ScalarFunc::Lower)), - "random" => Ok(Func::Scalar(ScalarFunc::Random)), - "randomblob" => Ok(Func::Scalar(ScalarFunc::RandomBlob)), - "trim" => Ok(Func::Scalar(ScalarFunc::Trim)), - "ltrim" => Ok(Func::Scalar(ScalarFunc::LTrim)), - "rtrim" => Ok(Func::Scalar(ScalarFunc::RTrim)), - "round" => Ok(Func::Scalar(ScalarFunc::Round)), - "length" => Ok(Func::Scalar(ScalarFunc::Length)), - "octet_length" => Ok(Func::Scalar(ScalarFunc::OctetLength)), - "sign" => Ok(Func::Scalar(ScalarFunc::Sign)), - "substr" => Ok(Func::Scalar(ScalarFunc::Substr)), - "substring" => Ok(Func::Scalar(ScalarFunc::Substring)), - "date" => Ok(Func::Scalar(ScalarFunc::Date)), - "time" => Ok(Func::Scalar(ScalarFunc::Time)), - "typeof" => Ok(Func::Scalar(ScalarFunc::Typeof)), - "last_insert_rowid" => Ok(Func::Scalar(ScalarFunc::LastInsertRowid)), - "unicode" => Ok(Func::Scalar(ScalarFunc::Unicode)), - "quote" => Ok(Func::Scalar(ScalarFunc::Quote)), - "sqlite_version" => Ok(Func::Scalar(ScalarFunc::SqliteVersion)), - "replace" => Ok(Func::Scalar(ScalarFunc::Replace)), + "avg" => Ok(Self::Agg(AggFunc::Avg)), + "count" => Ok(Self::Agg(AggFunc::Count)), + "group_concat" => Ok(Self::Agg(AggFunc::GroupConcat)), + "max" if arg_count == 0 || arg_count == 1 => Ok(Self::Agg(AggFunc::Max)), + "max" if arg_count > 1 => Ok(Self::Scalar(ScalarFunc::Max)), + "min" if arg_count == 0 || arg_count == 1 => Ok(Self::Agg(AggFunc::Min)), + "min" if arg_count > 1 => Ok(Self::Scalar(ScalarFunc::Min)), + "nullif" if arg_count == 2 => Ok(Self::Scalar(ScalarFunc::Nullif)), + "string_agg" => Ok(Self::Agg(AggFunc::StringAgg)), + "sum" => Ok(Self::Agg(AggFunc::Sum)), + "total" => Ok(Self::Agg(AggFunc::Total)), + "char" => Ok(Self::Scalar(ScalarFunc::Char)), + "coalesce" => Ok(Self::Scalar(ScalarFunc::Coalesce)), + "concat" => Ok(Self::Scalar(ScalarFunc::Concat)), + "concat_ws" => Ok(Self::Scalar(ScalarFunc::ConcatWs)), + "glob" => Ok(Self::Scalar(ScalarFunc::Glob)), + "ifnull" => Ok(Self::Scalar(ScalarFunc::IfNull)), + "iif" => Ok(Self::Scalar(ScalarFunc::Iif)), + "instr" => Ok(Self::Scalar(ScalarFunc::Instr)), + "like" => Ok(Self::Scalar(ScalarFunc::Like)), + "abs" => Ok(Self::Scalar(ScalarFunc::Abs)), + "upper" => Ok(Self::Scalar(ScalarFunc::Upper)), + "lower" => Ok(Self::Scalar(ScalarFunc::Lower)), + "random" => Ok(Self::Scalar(ScalarFunc::Random)), + "randomblob" => Ok(Self::Scalar(ScalarFunc::RandomBlob)), + "trim" => Ok(Self::Scalar(ScalarFunc::Trim)), + "ltrim" => Ok(Self::Scalar(ScalarFunc::LTrim)), + "rtrim" => Ok(Self::Scalar(ScalarFunc::RTrim)), + "round" => Ok(Self::Scalar(ScalarFunc::Round)), + "length" => Ok(Self::Scalar(ScalarFunc::Length)), + "octet_length" => Ok(Self::Scalar(ScalarFunc::OctetLength)), + "sign" => Ok(Self::Scalar(ScalarFunc::Sign)), + "substr" => Ok(Self::Scalar(ScalarFunc::Substr)), + "substring" => Ok(Self::Scalar(ScalarFunc::Substring)), + "date" => Ok(Self::Scalar(ScalarFunc::Date)), + "time" => Ok(Self::Scalar(ScalarFunc::Time)), + "typeof" => Ok(Self::Scalar(ScalarFunc::Typeof)), + "last_insert_rowid" => Ok(Self::Scalar(ScalarFunc::LastInsertRowid)), + "unicode" => Ok(Self::Scalar(ScalarFunc::Unicode)), + "quote" => Ok(Self::Scalar(ScalarFunc::Quote)), + "sqlite_version" => Ok(Self::Scalar(ScalarFunc::SqliteVersion)), + "replace" => Ok(Self::Scalar(ScalarFunc::Replace)), #[cfg(feature = "json")] - "json" => Ok(Func::Json(JsonFunc::Json)), + "json" => Ok(Self::Json(JsonFunc::Json)), #[cfg(feature = "json")] - "json_array" => Ok(Func::Json(JsonFunc::JsonArray)), + "json_array_length" => Ok(Self::Json(JsonFunc::JsonArrayLength)), #[cfg(feature = "json")] - "json_array_length" => Ok(Func::Json(JsonFunc::JsonArrayLength)), - "unixepoch" => Ok(Func::Scalar(ScalarFunc::UnixEpoch)), - "hex" => Ok(Func::Scalar(ScalarFunc::Hex)), - "unhex" => Ok(Func::Scalar(ScalarFunc::Unhex)), - "zeroblob" => Ok(Func::Scalar(ScalarFunc::ZeroBlob)), - "soundex" => Ok(Func::Scalar(ScalarFunc::Soundex)), - "acos" => Ok(Func::Math(MathFunc::Acos)), - "acosh" => Ok(Func::Math(MathFunc::Acosh)), - "asin" => Ok(Func::Math(MathFunc::Asin)), - "asinh" => Ok(Func::Math(MathFunc::Asinh)), - "atan" => Ok(Func::Math(MathFunc::Atan)), - "atan2" => Ok(Func::Math(MathFunc::Atan2)), - "atanh" => Ok(Func::Math(MathFunc::Atanh)), - "ceil" => Ok(Func::Math(MathFunc::Ceil)), - "ceiling" => Ok(Func::Math(MathFunc::Ceiling)), - "cos" => Ok(Func::Math(MathFunc::Cos)), - "cosh" => Ok(Func::Math(MathFunc::Cosh)), - "degrees" => Ok(Func::Math(MathFunc::Degrees)), - "exp" => Ok(Func::Math(MathFunc::Exp)), - "floor" => Ok(Func::Math(MathFunc::Floor)), - "ln" => Ok(Func::Math(MathFunc::Ln)), - "log" => Ok(Func::Math(MathFunc::Log)), - "log10" => Ok(Func::Math(MathFunc::Log10)), - "log2" => Ok(Func::Math(MathFunc::Log2)), - "mod" => Ok(Func::Math(MathFunc::Mod)), - "pi" => Ok(Func::Math(MathFunc::Pi)), - "pow" => Ok(Func::Math(MathFunc::Pow)), - "power" => Ok(Func::Math(MathFunc::Power)), - "radians" => Ok(Func::Math(MathFunc::Radians)), - "sin" => Ok(Func::Math(MathFunc::Sin)), - "sinh" => Ok(Func::Math(MathFunc::Sinh)), - "sqrt" => Ok(Func::Math(MathFunc::Sqrt)), - "tan" => Ok(Func::Math(MathFunc::Tan)), - "tanh" => Ok(Func::Math(MathFunc::Tanh)), - "trunc" => Ok(Func::Math(MathFunc::Trunc)), + "json_array" => Ok(Self::Json(JsonFunc::JsonArray)), + "unixepoch" => Ok(Self::Scalar(ScalarFunc::UnixEpoch)), + "hex" => Ok(Self::Scalar(ScalarFunc::Hex)), + "unhex" => Ok(Self::Scalar(ScalarFunc::Unhex)), + "zeroblob" => Ok(Self::Scalar(ScalarFunc::ZeroBlob)), + "soundex" => Ok(Self::Scalar(ScalarFunc::Soundex)), + "acos" => Ok(Self::Math(MathFunc::Acos)), + "acosh" => Ok(Self::Math(MathFunc::Acosh)), + "asin" => Ok(Self::Math(MathFunc::Asin)), + "asinh" => Ok(Self::Math(MathFunc::Asinh)), + "atan" => Ok(Self::Math(MathFunc::Atan)), + "atan2" => Ok(Self::Math(MathFunc::Atan2)), + "atanh" => Ok(Self::Math(MathFunc::Atanh)), + "ceil" => Ok(Self::Math(MathFunc::Ceil)), + "ceiling" => Ok(Self::Math(MathFunc::Ceiling)), + "cos" => Ok(Self::Math(MathFunc::Cos)), + "cosh" => Ok(Self::Math(MathFunc::Cosh)), + "degrees" => Ok(Self::Math(MathFunc::Degrees)), + "exp" => Ok(Self::Math(MathFunc::Exp)), + "floor" => Ok(Self::Math(MathFunc::Floor)), + "ln" => Ok(Self::Math(MathFunc::Ln)), + "log" => Ok(Self::Math(MathFunc::Log)), + "log10" => Ok(Self::Math(MathFunc::Log10)), + "log2" => Ok(Self::Math(MathFunc::Log2)), + "mod" => Ok(Self::Math(MathFunc::Mod)), + "pi" => Ok(Self::Math(MathFunc::Pi)), + "pow" => Ok(Self::Math(MathFunc::Pow)), + "power" => Ok(Self::Math(MathFunc::Power)), + "radians" => Ok(Self::Math(MathFunc::Radians)), + "sin" => Ok(Self::Math(MathFunc::Sin)), + "sinh" => Ok(Self::Math(MathFunc::Sinh)), + "sqrt" => Ok(Self::Math(MathFunc::Sqrt)), + "tan" => Ok(Self::Math(MathFunc::Tan)), + "tanh" => Ok(Self::Math(MathFunc::Tanh)), + "trunc" => Ok(Self::Math(MathFunc::Trunc)), _ => match ExtFunc::resolve_function(name, arg_count) { - Some(ext_func) => Ok(Func::Extension(ext_func)), + Some(ext_func) => Ok(Self::Extension(ext_func)), None => Err(()), }, } diff --git a/core/io/mod.rs b/core/io/mod.rs index 0765cb748..3bed97b16 100644 --- a/core/io/mod.rs +++ b/core/io/mod.rs @@ -51,9 +51,9 @@ pub struct ReadCompletion { impl Completion { pub fn complete(&self, result: i32) { match self { - Completion::Read(r) => r.complete(), - Completion::Write(w) => w.complete(result), - Completion::Sync(s) => s.complete(result), // fix + Self::Read(r) => r.complete(), + Self::Write(w) => w.complete(result), + Self::Sync(s) => s.complete(result), // fix } } } diff --git a/core/json/de.rs b/core/json/de.rs index aac5cb86f..02bad4adb 100644 --- a/core/json/de.rs +++ b/core/json/de.rs @@ -167,7 +167,7 @@ impl<'de> Deserializer<'de> { } fn from_pair(pair: Pair<'de, Rule>) -> Self { - Deserializer { pair: Some(pair) } + Self { pair: Some(pair) } } } diff --git a/core/json/error.rs b/core/json/error.rs index aef775968..870601738 100644 --- a/core/json/error.rs +++ b/core/json/error.rs @@ -43,7 +43,7 @@ impl From> for Error { pest::error::LineColLocation::Pos((l, c)) => (l, c), pest::error::LineColLocation::Span((l, c), (_, _)) => (l, c), }; - Error::Message { + Self::Message { msg: err.to_string(), location: Some(Location { line, column }), } @@ -52,7 +52,7 @@ impl From> for Error { impl From for Error { fn from(err: std::io::Error) -> Self { - Error::Message { + Self::Message { msg: err.to_string(), location: None, } @@ -61,7 +61,7 @@ impl From for Error { impl From for Error { fn from(err: std::str::Utf8Error) -> Self { - Error::Message { + Self::Message { msg: err.to_string(), location: None, } @@ -70,7 +70,7 @@ impl From for Error { impl ser::Error for Error { fn custom(msg: T) -> Self { - Error::Message { + Self::Message { msg: msg.to_string(), location: None, } @@ -79,7 +79,7 @@ impl ser::Error for Error { impl de::Error for Error { fn custom(msg: T) -> Self { - Error::Message { + Self::Message { msg: msg.to_string(), location: None, } @@ -89,7 +89,7 @@ impl de::Error for Error { impl Display for Error { fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - Error::Message { ref msg, .. } => write!(formatter, "{}", msg), + Self::Message { ref msg, .. } => write!(formatter, "{}", msg), } } } diff --git a/core/schema.rs b/core/schema.rs index b038c2dbb..b9f9bdccf 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -57,39 +57,39 @@ impl Table { pub fn get_rowid_alias_column(&self) -> Option<(usize, &Column)> { match self { - Table::BTree(table) => table.get_rowid_alias_column(), - Table::Index(_) => None, - Table::Pseudo(_) => None, + Self::BTree(table) => table.get_rowid_alias_column(), + Self::Index(_) => None, + Self::Pseudo(_) => None, } } pub fn column_is_rowid_alias(&self, col: &Column) -> bool { match self { Table::BTree(table) => table.column_is_rowid_alias(col), - Table::Index(_) => false, - Table::Pseudo(_) => false, + Self::Index(_) => false, + Self::Pseudo(_) => false, } } pub fn get_name(&self) -> &str { match self { - Table::BTree(table) => &table.name, - Table::Index(index) => &index.name, - Table::Pseudo(_) => "", + Self::BTree(table) => &table.name, + Self::Index(index) => &index.name, + Self::Pseudo(_) => "", } } pub fn column_index_to_name(&self, index: usize) -> Option<&str> { match self { - Table::BTree(table) => match table.columns.get(index) { + Self::BTree(table) => match table.columns.get(index) { Some(column) => Some(&column.name), None => None, }, - Table::Index(i) => match i.columns.get(index) { + Self::Index(i) => match i.columns.get(index) { Some(column) => Some(&column.name), None => None, }, - Table::Pseudo(table) => match table.columns.get(index) { + Self::Pseudo(table) => match table.columns.get(index) { Some(_) => None, None => None, }, @@ -98,33 +98,33 @@ impl Table { pub fn get_column(&self, name: &str) -> Option<(usize, &Column)> { match self { - Table::BTree(table) => table.get_column(name), - Table::Index(_) => unimplemented!(), - Table::Pseudo(table) => table.get_column(name), + Self::BTree(table) => table.get_column(name), + Self::Index(_) => unimplemented!(), + Self::Pseudo(table) => table.get_column(name), } } pub fn get_column_at(&self, index: usize) -> &Column { match self { - Table::BTree(table) => table.columns.get(index).unwrap(), - Table::Index(_) => unimplemented!(), - Table::Pseudo(table) => table.columns.get(index).unwrap(), + Self::BTree(table) => table.columns.get(index).unwrap(), + Self::Index(_) => unimplemented!(), + Self::Pseudo(table) => table.columns.get(index).unwrap(), } } pub fn columns(&self) -> &Vec { match self { - Table::BTree(table) => &table.columns, - Table::Index(_) => unimplemented!(), - Table::Pseudo(table) => &table.columns, + Self::BTree(table) => &table.columns, + Self::Index(_) => unimplemented!(), + Self::Pseudo(table) => &table.columns, } } pub fn has_rowid(&self) -> bool { match self { - Table::BTree(table) => table.has_rowid, - Table::Index(_) => unimplemented!(), - Table::Pseudo(_) => unimplemented!(), + Self::BTree(table) => table.has_rowid, + Self::Index(_) => unimplemented!(), + Self::Pseudo(_) => unimplemented!(), } } } @@ -132,8 +132,8 @@ impl Table { impl PartialEq for Table { fn eq(&self, other: &Self) -> bool { match (self, other) { - (Table::BTree(a), Table::BTree(b)) => Rc::ptr_eq(a, b), - (Table::Pseudo(a), Table::Pseudo(b)) => Rc::ptr_eq(a, b), + (Self::BTree(a), Self::BTree(b)) => Rc::ptr_eq(a, b), + (Self::Pseudo(a), Self::Pseudo(b)) => Rc::ptr_eq(a, b), _ => false, } } @@ -386,12 +386,12 @@ pub enum Type { impl fmt::Display for Type { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let s = match self { - Type::Null => "NULL", - Type::Text => "TEXT", - Type::Numeric => "NUMERIC", - Type::Integer => "INTEGER", - Type::Real => "REAL", - Type::Blob => "BLOB", + Self::Null => "NULL", + Self::Text => "TEXT", + Self::Numeric => "NUMERIC", + Self::Integer => "INTEGER", + Self::Real => "REAL", + Self::Blob => "BLOB", }; write!(f, "{}", s) } diff --git a/core/storage/pager.rs b/core/storage/pager.rs index 503cd2900..034030d04 100644 --- a/core/storage/pager.rs +++ b/core/storage/pager.rs @@ -40,8 +40,8 @@ const PAGE_DIRTY: usize = 0b1000; const PAGE_LOADED: usize = 0b10000; impl Page { - pub fn new(id: usize) -> Page { - Page { + pub fn new(id: usize) -> Self { + Self { inner: UnsafeCell::new(PageInner { flags: AtomicUsize::new(0), contents: None, diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index f86c20c26..1ee68476b 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -603,7 +603,7 @@ pub trait Optimizable { impl Optimizable for ast::Expr { fn is_rowid_alias_of(&self, table_index: usize) -> bool { match self { - ast::Expr::Column { + Self::Column { table, is_rowid_alias, .. @@ -618,7 +618,7 @@ impl Optimizable for ast::Expr { available_indexes: &[Rc], ) -> Result> { match self { - ast::Expr::Column { table, column, .. } => { + Self::Column { table, column, .. } => { if *table != table_index { return Ok(None); } @@ -636,7 +636,7 @@ impl Optimizable for ast::Expr { } Ok(None) } - ast::Expr::Binary(lhs, op, rhs) => { + Self::Binary(lhs, op, rhs) => { let lhs_index = lhs.check_index_scan(table_index, referenced_tables, available_indexes)?; if lhs_index.is_some() { @@ -648,7 +648,7 @@ impl Optimizable for ast::Expr { // swap lhs and rhs let lhs_new = rhs.take_ownership(); let rhs_new = lhs.take_ownership(); - *self = ast::Expr::Binary(Box::new(lhs_new), *op, Box::new(rhs_new)); + *self = Self::Binary(Box::new(lhs_new), *op, Box::new(rhs_new)); return Ok(rhs_index); } Ok(None) @@ -658,7 +658,7 @@ impl Optimizable for ast::Expr { } fn check_constant(&self) -> Result> { match self { - ast::Expr::Id(id) => { + Self::Id(id) => { // true and false are special constants that are effectively aliases for 1 and 0 if id.0.eq_ignore_ascii_case("true") { return Ok(Some(ConstantPredicate::AlwaysTrue)); @@ -668,7 +668,7 @@ impl Optimizable for ast::Expr { } return Ok(None); } - ast::Expr::Literal(lit) => match lit { + Self::Literal(lit) => match lit { ast::Literal::Null => Ok(Some(ConstantPredicate::AlwaysFalse)), ast::Literal::Numeric(b) => { if let Ok(int_value) = b.parse::() { @@ -710,7 +710,7 @@ impl Optimizable for ast::Expr { } _ => Ok(None), }, - ast::Expr::Unary(op, expr) => { + Self::Unary(op, expr) => { if *op == ast::UnaryOperator::Not { let trivial = expr.check_constant()?; return Ok(trivial.map(|t| match t { @@ -726,7 +726,7 @@ impl Optimizable for ast::Expr { Ok(None) } - ast::Expr::InList { lhs: _, not, rhs } => { + Self::InList { lhs: _, not, rhs } => { if rhs.is_none() { return Ok(Some(if *not { ConstantPredicate::AlwaysTrue @@ -745,7 +745,7 @@ impl Optimizable for ast::Expr { Ok(None) } - ast::Expr::Binary(lhs, op, rhs) => { + Self::Binary(lhs, op, rhs) => { let lhs_trivial = lhs.check_constant()?; let rhs_trivial = rhs.check_constant()?; match op { @@ -949,6 +949,6 @@ impl TakeOwnership for ast::Expr { impl TakeOwnership for SourceOperator { fn take_ownership(&mut self) -> Self { - std::mem::replace(self, SourceOperator::Nothing) + std::mem::replace(self, Self::Nothing) } } diff --git a/core/types.rs b/core/types.rs index 8f5951780..3412afcfb 100644 --- a/core/types.rs +++ b/core/types.rs @@ -18,11 +18,11 @@ pub enum Value<'a> { impl<'a> Display for Value<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Value::Null => write!(f, "NULL"), - Value::Integer(i) => write!(f, "{}", i), - Value::Float(fl) => write!(f, "{}", fl), - Value::Text(s) => write!(f, "{}", s), - Value::Blob(b) => write!(f, "{:?}", b), + Self::Null => write!(f, "NULL"), + Self::Integer(i) => write!(f, "{}", i), + Self::Float(fl) => write!(f, "{}", fl), + Self::Text(s) => write!(f, "{}", s), + Self::Blob(b) => write!(f, "{:?}", b), } } } @@ -69,27 +69,27 @@ pub enum OwnedValue { impl OwnedValue { // A helper function that makes building a text OwnedValue easier. pub fn build_text(text: Rc) -> Self { - OwnedValue::Text(LimboText::new(text)) + Self::Text(LimboText::new(text)) } } impl Display for OwnedValue { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - OwnedValue::Null => write!(f, "NULL"), - OwnedValue::Integer(i) => write!(f, "{}", i), - OwnedValue::Float(fl) => write!(f, "{:?}", fl), - OwnedValue::Text(s) => write!(f, "{}", s.value), - OwnedValue::Blob(b) => write!(f, "{}", String::from_utf8_lossy(b)), - OwnedValue::Agg(a) => match a.as_ref() { + Self::Null => write!(f, "NULL"), + Self::Integer(i) => write!(f, "{}", i), + Self::Float(fl) => write!(f, "{:?}", fl), + Self::Text(s) => write!(f, "{}", s.value), + Self::Blob(b) => write!(f, "{}", String::from_utf8_lossy(b)), + Self::Agg(a) => match a.as_ref() { AggContext::Avg(acc, _count) => write!(f, "{}", acc), AggContext::Sum(acc) => write!(f, "{}", acc), AggContext::Count(count) => write!(f, "{}", count), - AggContext::Max(max) => write!(f, "{}", max.as_ref().unwrap_or(&OwnedValue::Null)), - AggContext::Min(min) => write!(f, "{}", min.as_ref().unwrap_or(&OwnedValue::Null)), + AggContext::Max(max) => write!(f, "{}", max.as_ref().unwrap_or(&Self::Null)), + AggContext::Min(min) => write!(f, "{}", min.as_ref().unwrap_or(&Self::Null)), AggContext::GroupConcat(s) => write!(f, "{}", s), }, - OwnedValue::Record(r) => write!(f, "{:?}", r), + Self::Record(r) => write!(f, "{:?}", r), } } } @@ -109,12 +109,12 @@ const NULL: OwnedValue = OwnedValue::Null; impl AggContext { pub fn final_value(&self) -> &OwnedValue { match self { - AggContext::Avg(acc, _count) => acc, - AggContext::Sum(acc) => acc, - AggContext::Count(count) => count, - AggContext::Max(max) => max.as_ref().unwrap_or(&NULL), - AggContext::Min(min) => min.as_ref().unwrap_or(&NULL), - AggContext::GroupConcat(s) => s, + Self::Avg(acc, _count) => acc, + Self::Sum(acc) => acc, + Self::Count(count) => count, + Self::Max(max) => max.as_ref().unwrap_or(&NULL), + Self::Min(min) => min.as_ref().unwrap_or(&NULL), + Self::GroupConcat(s) => s, } } } @@ -123,44 +123,38 @@ impl AggContext { impl PartialOrd for OwnedValue { fn partial_cmp(&self, other: &Self) -> Option { match (self, other) { - (OwnedValue::Integer(int_left), OwnedValue::Integer(int_right)) => { - int_left.partial_cmp(int_right) - } - (OwnedValue::Integer(int_left), OwnedValue::Float(float_right)) => { + (Self::Integer(int_left), Self::Integer(int_right)) => int_left.partial_cmp(int_right), + (Self::Integer(int_left), Self::Float(float_right)) => { (*int_left as f64).partial_cmp(float_right) } - (OwnedValue::Float(float_left), OwnedValue::Integer(int_right)) => { + (Self::Float(float_left), Self::Integer(int_right)) => { float_left.partial_cmp(&(*int_right as f64)) } - (OwnedValue::Float(float_left), OwnedValue::Float(float_right)) => { + (Self::Float(float_left), Self::Float(float_right)) => { float_left.partial_cmp(float_right) } // Numeric vs Text/Blob - ( - OwnedValue::Integer(_) | OwnedValue::Float(_), - OwnedValue::Text(_) | OwnedValue::Blob(_), - ) => Some(std::cmp::Ordering::Less), - ( - OwnedValue::Text(_) | OwnedValue::Blob(_), - OwnedValue::Integer(_) | OwnedValue::Float(_), - ) => Some(std::cmp::Ordering::Greater), - - (OwnedValue::Text(text_left), OwnedValue::Text(text_right)) => { - text_left.value.partial_cmp(&text_right.value) + (Self::Integer(_) | Self::Float(_), Self::Text(_) | Self::Blob(_)) => { + Some(std::cmp::Ordering::Less) + } + (Self::Text(_) | Self::Blob(_), Self::Integer(_) | Self::Float(_)) => { + Some(std::cmp::Ordering::Greater) } - // Text vs Blob - (OwnedValue::Text(_), OwnedValue::Blob(_)) => Some(std::cmp::Ordering::Less), - (OwnedValue::Blob(_), OwnedValue::Text(_)) => Some(std::cmp::Ordering::Greater), - (OwnedValue::Blob(blob_left), OwnedValue::Blob(blob_right)) => { - blob_left.partial_cmp(blob_right) + (Self::Text(text_left), Self::Text(text_right)) => { + text_left.value.partial_cmp(&text_right.value) } - (OwnedValue::Null, OwnedValue::Null) => Some(std::cmp::Ordering::Equal), - (OwnedValue::Null, _) => Some(std::cmp::Ordering::Less), - (_, OwnedValue::Null) => Some(std::cmp::Ordering::Greater), - (OwnedValue::Agg(a), OwnedValue::Agg(b)) => a.partial_cmp(b), - (OwnedValue::Agg(a), other) => a.final_value().partial_cmp(other), - (other, OwnedValue::Agg(b)) => other.partial_cmp(b.final_value()), + // Text vs Blob + (Self::Text(_), Self::Blob(_)) => Some(std::cmp::Ordering::Less), + (Self::Blob(_), Self::Text(_)) => Some(std::cmp::Ordering::Greater), + + (Self::Blob(blob_left), Self::Blob(blob_right)) => blob_left.partial_cmp(blob_right), + (Self::Null, Self::Null) => Some(std::cmp::Ordering::Equal), + (Self::Null, _) => Some(std::cmp::Ordering::Less), + (_, Self::Null) => Some(std::cmp::Ordering::Greater), + (Self::Agg(a), Self::Agg(b)) => a.partial_cmp(b), + (Self::Agg(a), other) => a.final_value().partial_cmp(other), + (other, Self::Agg(b)) => other.partial_cmp(b.final_value()), other => todo!("{:?}", other), } } @@ -169,12 +163,12 @@ impl PartialOrd for OwnedValue { impl std::cmp::PartialOrd for AggContext { fn partial_cmp(&self, other: &AggContext) -> Option { match (self, other) { - (AggContext::Avg(a, _), AggContext::Avg(b, _)) => a.partial_cmp(b), - (AggContext::Sum(a), AggContext::Sum(b)) => a.partial_cmp(b), - (AggContext::Count(a), AggContext::Count(b)) => a.partial_cmp(b), - (AggContext::Max(a), AggContext::Max(b)) => a.partial_cmp(b), - (AggContext::Min(a), AggContext::Min(b)) => a.partial_cmp(b), - (AggContext::GroupConcat(a), AggContext::GroupConcat(b)) => a.partial_cmp(b), + (Self::Avg(a, _), Self::Avg(b, _)) => a.partial_cmp(b), + (Self::Sum(a), Self::Sum(b)) => a.partial_cmp(b), + (Self::Count(a), Self::Count(b)) => a.partial_cmp(b), + (Self::Max(a), Self::Max(b)) => a.partial_cmp(b), + (Self::Min(a), Self::Min(b)) => a.partial_cmp(b), + (Self::GroupConcat(a), Self::GroupConcat(b)) => a.partial_cmp(b), _ => None, } } @@ -193,44 +187,38 @@ impl std::ops::Add for OwnedValue { fn add(self, rhs: Self) -> Self::Output { match (self, rhs) { - (OwnedValue::Integer(int_left), OwnedValue::Integer(int_right)) => { - OwnedValue::Integer(int_left + int_right) - } - (OwnedValue::Integer(int_left), OwnedValue::Float(float_right)) => { - OwnedValue::Float(int_left as f64 + float_right) - } - (OwnedValue::Float(float_left), OwnedValue::Integer(int_right)) => { - OwnedValue::Float(float_left + int_right as f64) - } - (OwnedValue::Float(float_left), OwnedValue::Float(float_right)) => { - OwnedValue::Float(float_left + float_right) + (Self::Integer(int_left), Self::Integer(int_right)) => { + Self::Integer(int_left + int_right) } - (OwnedValue::Text(string_left), OwnedValue::Text(string_right)) => { - OwnedValue::build_text(Rc::new( - string_left.value.to_string() + &string_right.value.to_string(), - )) + (Self::Integer(int_left), Self::Float(float_right)) => { + Self::Float(int_left as f64 + float_right) } - (OwnedValue::Text(string_left), OwnedValue::Integer(int_right)) => { - OwnedValue::build_text(Rc::new( - string_left.value.to_string() + &int_right.to_string(), - )) + (Self::Float(float_left), Self::Integer(int_right)) => { + Self::Float(float_left + int_right as f64) } - (OwnedValue::Integer(int_left), OwnedValue::Text(string_right)) => { - OwnedValue::build_text(Rc::new( - int_left.to_string() + &string_right.value.to_string(), - )) + (Self::Float(float_left), Self::Float(float_right)) => { + Self::Float(float_left + float_right) } - (OwnedValue::Text(string_left), OwnedValue::Float(float_right)) => { - let string_right = OwnedValue::Float(float_right).to_string(); - OwnedValue::build_text(Rc::new(string_left.value.to_string() + &string_right)) + (Self::Text(string_left), Self::Text(string_right)) => Self::build_text(Rc::new( + string_left.value.to_string() + &string_right.value.to_string(), + )), + (Self::Text(string_left), Self::Integer(int_right)) => Self::build_text(Rc::new( + string_left.value.to_string() + &int_right.to_string(), + )), + (Self::Integer(int_left), Self::Text(string_right)) => Self::build_text(Rc::new( + int_left.to_string() + &string_right.value.to_string(), + )), + (Self::Text(string_left), Self::Float(float_right)) => { + let string_right = Self::Float(float_right).to_string(); + Self::build_text(Rc::new(string_left.value.to_string() + &string_right)) } - (OwnedValue::Float(float_left), OwnedValue::Text(string_right)) => { - let string_left = OwnedValue::Float(float_left).to_string(); - OwnedValue::build_text(Rc::new(string_left + &string_right.value.to_string())) + (Self::Float(float_left), Self::Text(string_right)) => { + let string_left = Self::Float(float_left).to_string(); + Self::build_text(Rc::new(string_left + &string_right.value.to_string())) } - (lhs, OwnedValue::Null) => lhs, - (OwnedValue::Null, rhs) => rhs, - _ => OwnedValue::Float(0.0), + (lhs, Self::Null) => lhs, + (Self::Null, rhs) => rhs, + _ => Self::Float(0.0), } } } @@ -240,8 +228,8 @@ impl std::ops::Add for OwnedValue { fn add(self, rhs: f64) -> Self::Output { match self { - OwnedValue::Integer(int_left) => OwnedValue::Float(int_left as f64 + rhs), - OwnedValue::Float(float_left) => OwnedValue::Float(float_left + rhs), + Self::Integer(int_left) => Self::Float(int_left as f64 + rhs), + Self::Float(float_left) => Self::Float(float_left + rhs), _ => unreachable!(), } } @@ -252,8 +240,8 @@ impl std::ops::Add for OwnedValue { fn add(self, rhs: i64) -> Self::Output { match self { - OwnedValue::Integer(int_left) => OwnedValue::Integer(int_left + rhs), - OwnedValue::Float(float_left) => OwnedValue::Float(float_left + rhs as f64), + Self::Integer(int_left) => Self::Integer(int_left + rhs), + Self::Float(float_left) => Self::Float(float_left + rhs as f64), _ => unreachable!(), } } @@ -282,19 +270,19 @@ impl std::ops::Div for OwnedValue { fn div(self, rhs: OwnedValue) -> Self::Output { match (self, rhs) { - (OwnedValue::Integer(int_left), OwnedValue::Integer(int_right)) => { - OwnedValue::Integer(int_left / int_right) + (Self::Integer(int_left), Self::Integer(int_right)) => { + Self::Integer(int_left / int_right) } - (OwnedValue::Integer(int_left), OwnedValue::Float(float_right)) => { - OwnedValue::Float(int_left as f64 / float_right) + (Self::Integer(int_left), Self::Float(float_right)) => { + Self::Float(int_left as f64 / float_right) } - (OwnedValue::Float(float_left), OwnedValue::Integer(int_right)) => { - OwnedValue::Float(float_left / int_right as f64) + (Self::Float(float_left), Self::Integer(int_right)) => { + Self::Float(float_left / int_right as f64) } - (OwnedValue::Float(float_left), OwnedValue::Float(float_right)) => { - OwnedValue::Float(float_left / float_right) + (Self::Float(float_left), Self::Float(float_right)) => { + Self::Float(float_left / float_right) } - _ => OwnedValue::Float(0.0), + _ => Self::Float(0.0), } } } diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 01c0b3cd1..db4a9b4fd 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -572,7 +572,7 @@ struct RegexCache { } impl RegexCache { fn new() -> Self { - RegexCache { + Self { like: HashMap::new(), glob: HashMap::new(), } diff --git a/simulator/generation/plan.rs b/simulator/generation/plan.rs index ea2392f4e..2c20645af 100644 --- a/simulator/generation/plan.rs +++ b/simulator/generation/plan.rs @@ -66,9 +66,9 @@ pub(crate) enum Interaction { impl Display for Interaction { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Interaction::Query(query) => write!(f, "{}", query), - Interaction::Assertion(assertion) => write!(f, "ASSERT: {}", assertion.message), - Interaction::Fault(fault) => write!(f, "FAULT: {}", fault), + Self::Query(query) => write!(f, "{}", query), + Self::Assertion(assertion) => write!(f, "ASSERT: {}", assertion.message), + Self::Fault(fault) => write!(f, "FAULT: {}", fault), } } } @@ -120,7 +120,7 @@ impl Interactions { impl InteractionPlan { pub(crate) fn new() -> Self { - InteractionPlan { + Self { plan: Vec::new(), stack: Vec::new(), interaction_pointer: 0, @@ -197,7 +197,7 @@ impl ArbitraryFrom for InteractionPlan { impl Interaction { pub(crate) fn execute_query(&self, conn: &mut Rc) -> Result { match self { - Interaction::Query(query) => { + Self::Query(query) => { let query_str = query.to_string(); let rows = conn.query(&query_str); if rows.is_err() { @@ -241,10 +241,10 @@ impl Interaction { Ok(out) } - Interaction::Assertion(_) => { + Self::Assertion(_) => { unreachable!("unexpected: this function should only be called on queries") } - Interaction::Fault(fault) => { + Self::Fault(fault) => { unreachable!("unexpected: this function should only be called on queries") } } @@ -252,10 +252,10 @@ impl Interaction { pub(crate) fn execute_assertion(&self, stack: &Vec) -> Result<()> { match self { - Interaction::Query(_) => { + Self::Query(_) => { unreachable!("unexpected: this function should only be called on assertions") } - Interaction::Assertion(assertion) => { + Self::Assertion(assertion) => { if !assertion.func.as_ref()(stack) { return Err(limbo_core::LimboError::InternalError( assertion.message.clone(), @@ -263,7 +263,7 @@ impl Interaction { } Ok(()) } - Interaction::Fault(_) => { + Self::Fault(_) => { unreachable!("unexpected: this function should only be called on assertions") } } @@ -271,13 +271,13 @@ impl Interaction { pub(crate) fn execute_fault(&self, env: &mut SimulatorEnv, conn_index: usize) -> Result<()> { match self { - Interaction::Query(_) => { + Self::Query(_) => { unreachable!("unexpected: this function should only be called on faults") } - Interaction::Assertion(_) => { + Self::Assertion(_) => { unreachable!("unexpected: this function should only be called on faults") } - Interaction::Fault(fault) => { + Self::Fault(fault) => { match fault { Fault::Disconnect => { match env.connections[conn_index] { diff --git a/simulator/generation/query.rs b/simulator/generation/query.rs index 0ff9d44e1..4625ecc47 100644 --- a/simulator/generation/query.rs +++ b/simulator/generation/query.rs @@ -18,7 +18,7 @@ impl Arbitrary for Create { impl ArbitraryFrom> for Select { fn arbitrary_from(rng: &mut R, tables: &Vec

) -> Self { let table = pick(tables, rng); - Select { + Self { table: table.name.clone(), predicate: Predicate::arbitrary_from(rng, table), } @@ -28,7 +28,7 @@ impl ArbitraryFrom> for Select { impl ArbitraryFrom> for Select { fn arbitrary_from(rng: &mut R, tables: &Vec<&Table>) -> Self { let table = pick(tables, rng); - Select { + Self { table: table.name.clone(), predicate: Predicate::arbitrary_from(rng, *table), } @@ -47,7 +47,7 @@ impl ArbitraryFrom
for Insert { .collect() }) .collect(); - Insert { + Self { table: table.name.clone(), values, } @@ -56,7 +56,7 @@ impl ArbitraryFrom
for Insert { impl ArbitraryFrom
for Delete { fn arbitrary_from(rng: &mut R, table: &Table) -> Self { - Delete { + Self { table: table.name.clone(), predicate: Predicate::arbitrary_from(rng, table), } @@ -67,18 +67,18 @@ impl ArbitraryFrom
for Query { fn arbitrary_from(rng: &mut R, table: &Table) -> Self { frequency( vec![ - (1, Box::new(|rng| Query::Create(Create::arbitrary(rng)))), + (1, Box::new(|rng| Self::Create(Create::arbitrary(rng)))), ( 100, - Box::new(|rng| Query::Select(Select::arbitrary_from(rng, &vec![table]))), + Box::new(|rng| Self::Select(Select::arbitrary_from(rng, &vec![table]))), ), ( 100, - Box::new(|rng| Query::Insert(Insert::arbitrary_from(rng, table))), + Box::new(|rng| Self::Insert(Insert::arbitrary_from(rng, table))), ), ( 0, - Box::new(|rng| Query::Delete(Delete::arbitrary_from(rng, table))), + Box::new(|rng| Self::Delete(Delete::arbitrary_from(rng, table))), ), ], rng, @@ -149,14 +149,14 @@ impl ArbitraryFrom<(&Table, bool)> for SimplePredicate { ), }; - SimplePredicate(operator) + Self(operator) } } impl ArbitraryFrom<(&Table, bool)> for CompoundPredicate { fn arbitrary_from(rng: &mut R, (table, predicate_value): &(&Table, bool)) -> Self { // Decide if you want to create an AND or an OR - CompoundPredicate(if rng.gen_bool(0.7) { + Self(if rng.gen_bool(0.7) { // An AND for true requires each of its children to be true // An AND for false requires at least one of its children to be false if *predicate_value { @@ -227,15 +227,15 @@ impl ArbitraryFrom<(&str, &Value)> for Predicate { fn arbitrary_from(rng: &mut R, (column_name, value): &(&str, &Value)) -> Self { one_of( vec![ - Box::new(|rng| Predicate::Eq(column_name.to_string(), (*value).clone())), + Box::new(|rng| Self::Eq(column_name.to_string(), (*value).clone())), Box::new(|rng| { - Predicate::Gt( + Self::Gt( column_name.to_string(), GTValue::arbitrary_from(rng, *value).0, ) }), Box::new(|rng| { - Predicate::Lt( + Self::Lt( column_name.to_string(), LTValue::arbitrary_from(rng, *value).0, ) diff --git a/simulator/generation/table.rs b/simulator/generation/table.rs index 9af2d7d8e..332aeb1f3 100644 --- a/simulator/generation/table.rs +++ b/simulator/generation/table.rs @@ -30,7 +30,7 @@ impl Arbitrary for Column { fn arbitrary(rng: &mut R) -> Self { let name = Name::arbitrary(rng).0; let column_type = ColumnType::arbitrary(rng); - Column { + Self { name, column_type, primary: false, @@ -42,12 +42,7 @@ impl Arbitrary for Column { impl Arbitrary for ColumnType { fn arbitrary(rng: &mut R) -> Self { pick( - &vec![ - ColumnType::Integer, - ColumnType::Float, - ColumnType::Text, - ColumnType::Blob, - ], + &vec![Self::Integer, Self::Float, Self::Text, Self::Blob], rng, ) .to_owned() @@ -55,9 +50,9 @@ impl Arbitrary for ColumnType { } impl ArbitraryFrom> for Value { - fn arbitrary_from(rng: &mut R, values: &Vec<&Value>) -> Self { + fn arbitrary_from(rng: &mut R, values: &Vec<&Self>) -> Self { if values.is_empty() { - return Value::Null; + return Self::Null; } pick(values, rng).to_owned().clone() @@ -67,10 +62,10 @@ impl ArbitraryFrom> for Value { impl ArbitraryFrom for Value { fn arbitrary_from(rng: &mut R, column_type: &ColumnType) -> Self { match column_type { - ColumnType::Integer => Value::Integer(rng.gen_range(i64::MIN..i64::MAX)), - ColumnType::Float => Value::Float(rng.gen_range(-1e10..1e10)), - ColumnType::Text => Value::Text(gen_random_text(rng)), - ColumnType::Blob => Value::Blob(gen_random_text(rng).as_bytes().to_vec()), + ColumnType::Integer => Self::Integer(rng.gen_range(i64::MIN..i64::MAX)), + ColumnType::Float => Self::Float(rng.gen_range(-1e10..1e10)), + ColumnType::Text => Self::Text(gen_random_text(rng)), + ColumnType::Blob => Self::Blob(gen_random_text(rng).as_bytes().to_vec()), } } } @@ -80,25 +75,25 @@ pub(crate) struct LTValue(pub(crate) Value); impl ArbitraryFrom> for LTValue { fn arbitrary_from(rng: &mut R, values: &Vec<&Value>) -> Self { if values.is_empty() { - return LTValue(Value::Null); + return Self(Value::Null); } let index = pick_index(values.len(), rng); - LTValue::arbitrary_from(rng, values[index]) + Self::arbitrary_from(rng, values[index]) } } impl ArbitraryFrom for LTValue { fn arbitrary_from(rng: &mut R, value: &Value) -> Self { match value { - Value::Integer(i) => LTValue(Value::Integer(rng.gen_range(i64::MIN..*i - 1))), - Value::Float(f) => LTValue(Value::Float(rng.gen_range(-1e10..*f - 1.0))), + Value::Integer(i) => Self(Value::Integer(rng.gen_range(i64::MIN..*i - 1))), + Value::Float(f) => Self(Value::Float(rng.gen_range(-1e10..*f - 1.0))), Value::Text(t) => { // Either shorten the string, or make at least one character smaller and mutate the rest let mut t = t.clone(); if rng.gen_bool(0.01) { t.pop(); - LTValue(Value::Text(t)) + Self(Value::Text(t)) } else { let mut t = t.chars().map(|c| c as u32).collect::>(); let index = rng.gen_range(0..t.len()); @@ -111,7 +106,7 @@ impl ArbitraryFrom for LTValue { .into_iter() .map(|c| char::from_u32(c).unwrap_or('z')) .collect::(); - LTValue(Value::Text(t)) + Self(Value::Text(t)) } } Value::Blob(b) => { @@ -119,7 +114,7 @@ impl ArbitraryFrom for LTValue { let mut b = b.clone(); if rng.gen_bool(0.01) { b.pop(); - LTValue(Value::Blob(b)) + Self(Value::Blob(b)) } else { let index = rng.gen_range(0..b.len()); b[index] -= 1; @@ -127,7 +122,7 @@ impl ArbitraryFrom for LTValue { for i in (index + 1)..b.len() { b[i] = rng.gen_range(0..=255); } - LTValue(Value::Blob(b)) + Self(Value::Blob(b)) } } _ => unreachable!(), @@ -140,25 +135,25 @@ pub(crate) struct GTValue(pub(crate) Value); impl ArbitraryFrom> for GTValue { fn arbitrary_from(rng: &mut R, values: &Vec<&Value>) -> Self { if values.is_empty() { - return GTValue(Value::Null); + return Self(Value::Null); } let index = pick_index(values.len(), rng); - GTValue::arbitrary_from(rng, values[index]) + Self::arbitrary_from(rng, values[index]) } } impl ArbitraryFrom for GTValue { fn arbitrary_from(rng: &mut R, value: &Value) -> Self { match value { - Value::Integer(i) => GTValue(Value::Integer(rng.gen_range(*i..i64::MAX))), - Value::Float(f) => GTValue(Value::Float(rng.gen_range(*f..1e10))), + Value::Integer(i) => Self(Value::Integer(rng.gen_range(*i..i64::MAX))), + Value::Float(f) => Self(Value::Float(rng.gen_range(*f..1e10))), Value::Text(t) => { // Either lengthen the string, or make at least one character smaller and mutate the rest let mut t = t.clone(); if rng.gen_bool(0.01) { t.push(rng.gen_range(0..=255) as u8 as char); - GTValue(Value::Text(t)) + Self(Value::Text(t)) } else { let mut t = t.chars().map(|c| c as u32).collect::>(); let index = rng.gen_range(0..t.len()); @@ -171,7 +166,7 @@ impl ArbitraryFrom for GTValue { .into_iter() .map(|c| char::from_u32(c).unwrap_or('a')) .collect::(); - GTValue(Value::Text(t)) + Self(Value::Text(t)) } } Value::Blob(b) => { @@ -179,7 +174,7 @@ impl ArbitraryFrom for GTValue { let mut b = b.clone(); if rng.gen_bool(0.01) { b.push(rng.gen_range(0..=255)); - GTValue(Value::Blob(b)) + Self(Value::Blob(b)) } else { let index = rng.gen_range(0..b.len()); b[index] += 1; @@ -187,7 +182,7 @@ impl ArbitraryFrom for GTValue { for i in (index + 1)..b.len() { b[i] = rng.gen_range(0..=255); } - GTValue(Value::Blob(b)) + Self(Value::Blob(b)) } } _ => unreachable!(), diff --git a/simulator/model/query.rs b/simulator/model/query.rs index 7a12def8d..080d7c577 100644 --- a/simulator/model/query.rs +++ b/simulator/model/query.rs @@ -15,7 +15,7 @@ pub(crate) enum Predicate { impl Display for Predicate { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Predicate::And(predicates) => { + Self::And(predicates) => { if predicates.is_empty() { // todo: Make this TRUE when the bug is fixed write!(f, "TRUE") @@ -30,7 +30,7 @@ impl Display for Predicate { write!(f, ")") } } - Predicate::Or(predicates) => { + Self::Or(predicates) => { if predicates.is_empty() { write!(f, "FALSE") } else { @@ -44,10 +44,10 @@ impl Display for Predicate { write!(f, ")") } } - Predicate::Eq(name, value) => write!(f, "{} = {}", name, value), - Predicate::Neq(name, value) => write!(f, "{} != {}", name, value), - Predicate::Gt(name, value) => write!(f, "{} > {}", name, value), - Predicate::Lt(name, value) => write!(f, "{} < {}", name, value), + Self::Eq(name, value) => write!(f, "{} = {}", name, value), + Self::Neq(name, value) => write!(f, "{} != {}", name, value), + Self::Gt(name, value) => write!(f, "{} > {}", name, value), + Self::Lt(name, value) => write!(f, "{} < {}", name, value), } } } @@ -87,7 +87,7 @@ pub(crate) struct Delete { impl Display for Query { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Query::Create(Create { table }) => { + Self::Create(Create { table }) => { write!(f, "CREATE TABLE {} (", table.name)?; for (i, column) in table.columns.iter().enumerate() { @@ -99,11 +99,11 @@ impl Display for Query { write!(f, ")") } - Query::Select(Select { + Self::Select(Select { table, predicate: guard, }) => write!(f, "SELECT * FROM {} WHERE {}", table, guard), - Query::Insert(Insert { table, values }) => { + Self::Insert(Insert { table, values }) => { write!(f, "INSERT INTO {} VALUES ", table)?; for (i, row) in values.iter().enumerate() { if i != 0 { @@ -120,7 +120,7 @@ impl Display for Query { } Ok(()) } - Query::Delete(Delete { + Self::Delete(Delete { table, predicate: guard, }) => write!(f, "DELETE FROM {} WHERE {}", table, guard), diff --git a/simulator/model/table.rs b/simulator/model/table.rs index ccc18f738..e1da6a342 100644 --- a/simulator/model/table.rs +++ b/simulator/model/table.rs @@ -36,10 +36,10 @@ pub(crate) enum ColumnType { impl Display for ColumnType { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - ColumnType::Integer => write!(f, "INTEGER"), - ColumnType::Float => write!(f, "REAL"), - ColumnType::Text => write!(f, "TEXT"), - ColumnType::Blob => write!(f, "BLOB"), + Self::Integer => write!(f, "INTEGER"), + Self::Float => write!(f, "REAL"), + Self::Text => write!(f, "TEXT"), + Self::Blob => write!(f, "BLOB"), } } } @@ -61,11 +61,11 @@ fn to_sqlite_blob(bytes: &[u8]) -> String { impl Display for Value { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Value::Null => write!(f, "NULL"), - Value::Integer(i) => write!(f, "{}", i), - Value::Float(fl) => write!(f, "{}", fl), - Value::Text(t) => write!(f, "'{}'", t), - Value::Blob(b) => write!(f, "{}", to_sqlite_blob(b)), + Self::Null => write!(f, "NULL"), + Self::Integer(i) => write!(f, "{}", i), + Self::Float(fl) => write!(f, "{}", fl), + Self::Text(t) => write!(f, "'{}'", t), + Self::Blob(b) => write!(f, "{}", to_sqlite_blob(b)), } } } From f6cd7075449c0142bb11b43c09ba176b8a2f740e Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Fri, 27 Dec 2024 18:17:06 -0500 Subject: [PATCH 143/144] Add clippy CI, fix or ignore warnings where appropriate --- .github/workflows/rust.yml | 6 + bindings/wasm/lib.rs | 2 +- cli/app.rs | 37 ++- cli/main.rs | 2 +- core/ext/mod.rs | 2 + core/lib.rs | 21 +- core/storage/mod.rs | 3 +- core/storage/pager.rs | 3 +- core/storage/sqlite3_ondisk.rs | 2 +- core/storage/wal.rs | 2 + core/translate/emitter.rs | 5 +- core/translate/expr.rs | 3 +- core/translate/insert.rs | 2 +- core/translate/mod.rs | 16 +- core/translate/optimizer.rs | 2 +- core/translate/plan.rs | 7 +- core/util.rs | 2 +- core/vdbe/insn.rs | 490 ++++++++++++++++++++++++++++++++ core/vdbe/mod.rs | 501 +-------------------------------- macros/src/lib.rs | 22 +- simulator/generation/plan.rs | 18 +- simulator/generation/query.rs | 2 +- simulator/main.rs | 11 +- simulator/model/table.rs | 10 +- simulator/runner/file.rs | 1 - simulator/runner/mod.rs | 1 + sqlite3/src/lib.rs | 8 +- 27 files changed, 596 insertions(+), 585 deletions(-) create mode 100644 core/vdbe/insn.rs diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 0c1d8e4f9..835335ff2 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -39,6 +39,12 @@ jobs: run: cargo test --verbose timeout-minutes: 5 + clippy: + runs-on: ubuntu-latest + steps: + - name: Clippy + run: cargo clippy -- -A clippy::all -W clippy::correctness -W clippy::perf -W clippy::suspicious --deny=warnings + build-wasm: runs-on: ubuntu-latest steps: diff --git a/bindings/wasm/lib.rs b/bindings/wasm/lib.rs index 060ad20f8..1870cca87 100644 --- a/bindings/wasm/lib.rs +++ b/bindings/wasm/lib.rs @@ -5,7 +5,7 @@ use std::cell::RefCell; use std::rc::Rc; use std::sync::Arc; use wasm_bindgen::prelude::*; - +#[allow(dead_code)] #[wasm_bindgen] pub struct Database { db: Arc, diff --git a/cli/app.rs b/cli/app.rs index 7e6155543..56f5007ef 100644 --- a/cli/app.rs +++ b/cli/app.rs @@ -160,7 +160,7 @@ impl From<&Opts> for Settings { null_value: String::new(), output_mode: opts.output_mode, echo: false, - is_stdout: opts.output == "", + is_stdout: opts.output.is_empty(), output_filename: opts.output.clone(), db_file: opts .database @@ -192,7 +192,6 @@ impl std::fmt::Display for Settings { } impl Limbo { - #[allow(clippy::arc_with_non_send_sync)] pub fn new() -> anyhow::Result { let opts = Opts::parse(); let db_file = opts @@ -229,13 +228,13 @@ impl Limbo { app.writeln("Enter \".help\" for usage hints.")?; app.display_in_memory()?; } - return Ok(app); + Ok(app) } fn handle_first_input(&mut self, cmd: &str) { if cmd.trim().starts_with('.') { - self.handle_dot_command(&cmd); - } else if let Err(e) = self.query(&cmd) { + self.handle_dot_command(cmd); + } else if let Err(e) = self.query(cmd) { eprintln!("{}", e); } std::process::exit(0); @@ -293,7 +292,7 @@ impl Limbo { let db = Database::open_file(self.io.clone(), path)?; self.conn = db.connect(); self.opts.db_file = ":memory:".to_string(); - return Ok(()); + Ok(()) } path => { let io: Arc = Arc::new(limbo_core::PlatformIO::new()?); @@ -301,7 +300,7 @@ impl Limbo { let db = Database::open_file(self.io.clone(), path)?; self.conn = db.connect(); self.opts.db_file = path.to_string(); - return Ok(()); + Ok(()) } } } @@ -317,11 +316,9 @@ impl Limbo { self.opts.is_stdout = false; self.opts.output_mode = OutputMode::Raw; self.opts.output_filename = path.to_string(); - return Ok(()); - } - Err(e) => { - return Err(e.to_string()); + Ok(()) } + Err(e) => Err(e.to_string()), } } @@ -333,7 +330,7 @@ impl Limbo { fn set_mode(&mut self, mode: OutputMode) -> Result<(), String> { if mode == OutputMode::Pretty && !self.opts.is_stdout { - return Err("pretty output can only be written to a tty".to_string()); + Err("pretty output can only be written to a tty".to_string()) } else { self.opts.output_mode = mode; Ok(()) @@ -682,17 +679,15 @@ impl Limbo { } } - if tables.len() > 0 { + if !tables.is_empty() { let _ = self.writeln(tables.trim_end()); + } else if let Some(pattern) = pattern { + let _ = self.write_fmt(format_args!( + "Error: Tables with pattern '{}' not found.", + pattern + )); } else { - if let Some(pattern) = pattern { - let _ = self.write_fmt(format_args!( - "Error: Tables with pattern '{}' not found.", - pattern - )); - } else { - let _ = self.writeln("No tables found in the database."); - } + let _ = self.writeln("No tables found in the database."); } } Ok(None) => { diff --git a/cli/main.rs b/cli/main.rs index 9977f6540..8af57e2ca 100644 --- a/cli/main.rs +++ b/cli/main.rs @@ -1,10 +1,10 @@ +#![allow(clippy::arc_with_non_send_sync)] mod app; mod opcodes_dictionary; use rustyline::{error::ReadlineError, DefaultEditor}; use std::sync::atomic::Ordering; -#[allow(clippy::arc_with_non_send_sync)] fn main() -> anyhow::Result<()> { env_logger::init(); let mut app = app::Limbo::new()?; diff --git a/core/ext/mod.rs b/core/ext/mod.rs index ab5a1ce11..2140957d4 100644 --- a/core/ext/mod.rs +++ b/core/ext/mod.rs @@ -9,6 +9,7 @@ pub enum ExtFunc { Uuid(UuidFunc), } +#[allow(unreachable_patterns)] // TODO: remove when more extension funcs added impl std::fmt::Display for ExtFunc { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { @@ -19,6 +20,7 @@ impl std::fmt::Display for ExtFunc { } } +#[allow(unreachable_patterns)] impl ExtFunc { pub fn resolve_function(name: &str, num_args: usize) -> Option { match name { diff --git a/core/lib.rs b/core/lib.rs index 85fff4a59..8dc318add 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -69,8 +69,8 @@ pub struct Database { header: Rc>, // Shared structures of a Database are the parts that are common to multiple threads that might // create DB connections. - shared_page_cache: Arc>, - shared_wal: Arc>, + _shared_page_cache: Arc>, + _shared_wal: Arc>, } impl Database { @@ -96,6 +96,7 @@ impl Database { Self::open(io, page_io, wal, wal_shared, buffer_pool) } + #[allow(clippy::arc_with_non_send_sync)] pub fn open( io: Arc, page_io: Rc, @@ -109,13 +110,13 @@ impl Database { let version = db_header.borrow().version_number; version.to_string() }); - let shared_page_cache = Arc::new(RwLock::new(DumbLruPageCache::new(10))); + let _shared_page_cache = Arc::new(RwLock::new(DumbLruPageCache::new(10))); let pager = Rc::new(Pager::finish_open( db_header.clone(), page_io, wal, io.clone(), - shared_page_cache.clone(), + _shared_page_cache.clone(), buffer_pool, )?); let bootstrap_schema = Rc::new(RefCell::new(Schema::new())); @@ -124,7 +125,7 @@ impl Database { schema: bootstrap_schema.clone(), header: db_header.clone(), transaction_state: RefCell::new(TransactionState::None), - db: Weak::new(), + _db: Weak::new(), last_insert_rowid: Cell::new(0), }); let mut schema = Schema::new(); @@ -136,8 +137,8 @@ impl Database { pager, schema, header, - shared_page_cache, - shared_wal, + _shared_page_cache, + _shared_wal: shared_wal, })) } @@ -147,7 +148,7 @@ impl Database { schema: self.schema.clone(), header: self.header.clone(), last_insert_rowid: Cell::new(0), - db: Arc::downgrade(self), + _db: Arc::downgrade(self), transaction_state: RefCell::new(TransactionState::None), }) } @@ -206,7 +207,7 @@ pub struct Connection { pager: Rc, schema: Rc>, header: Rc>, - db: Weak, // backpointer to the database holding this connection + _db: Weak, // backpointer to the database holding this connection transaction_state: RefCell, last_insert_rowid: Cell, } @@ -269,7 +270,7 @@ impl Connection { Cmd::ExplainQueryPlan(stmt) => { match stmt { ast::Stmt::Select(select) => { - let plan = prepare_select_plan(&*self.schema.borrow(), select)?; + let plan = prepare_select_plan(&self.schema.borrow(), select)?; let plan = optimize_plan(plan)?; println!("{}", plan); } diff --git a/core/storage/mod.rs b/core/storage/mod.rs index b3e9c9df6..498daa970 100644 --- a/core/storage/mod.rs +++ b/core/storage/mod.rs @@ -10,11 +10,12 @@ //! for reading and writing pages to the database file, either local or //! remote. The `Wal` struct is responsible for managing the write-ahead log //! for the database, also either local or remote. - pub(crate) mod btree; pub(crate) mod buffer_pool; pub(crate) mod database; pub(crate) mod page_cache; +#[allow(clippy::arc_with_non_send_sync)] pub(crate) mod pager; pub(crate) mod sqlite3_ondisk; +#[allow(clippy::arc_with_non_send_sync)] pub(crate) mod wal; diff --git a/core/storage/pager.rs b/core/storage/pager.rs index 034030d04..6c37913e4 100644 --- a/core/storage/pager.rs +++ b/core/storage/pager.rs @@ -50,6 +50,7 @@ impl Page { } } + #[allow(clippy::mut_from_ref)] pub fn get(&self) -> &mut PageInner { unsafe { &mut *self.inner.get() } } @@ -423,7 +424,7 @@ impl Pager { CheckpointMode::Passive, ) { Ok(CheckpointStatus::IO) => { - self.io.run_once(); + let _ = self.io.run_once(); } Ok(CheckpointStatus::Done) => { break; diff --git a/core/storage/sqlite3_ondisk.rs b/core/storage/sqlite3_ondisk.rs index 0403bee87..d063fce97 100644 --- a/core/storage/sqlite3_ondisk.rs +++ b/core/storage/sqlite3_ondisk.rs @@ -1329,7 +1329,7 @@ pub fn payload_overflows( /// The 32-bit integers are big-endian if the magic number in the first 4 bytes of the WAL header is 0x377f0683 /// and the integers are little-endian if the magic number is 0x377f0682. /// The checksum values are always stored in the frame header in a big-endian format regardless of which byte order is used to compute the checksum. - +/// /// The checksum algorithm only works for content which is a multiple of 8 bytes in length. /// In other words, if the inputs are x(0) through x(N) then N must be odd. /// The checksum algorithm is as follows: diff --git a/core/storage/wal.rs b/core/storage/wal.rs index 8648efe1c..6c5a5a110 100644 --- a/core/storage/wal.rs +++ b/core/storage/wal.rs @@ -195,6 +195,7 @@ struct OngoingCheckpoint { current_page: u64, } +#[allow(dead_code)] pub struct WalFile { io: Arc, buffer_pool: Rc, @@ -219,6 +220,7 @@ pub struct WalFile { // TODO(pere): lock only important parts + pin WalFileShared /// WalFileShared is the part of a WAL that will be shared between threads. A wal has information /// that needs to be communicated between threads so this struct does the job. +#[allow(dead_code)] pub struct WalFileShared { wal_header: Arc>, min_frame: u64, diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index a032aa09e..c9b66d98e 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -13,7 +13,7 @@ use crate::translate::plan::{DeletePlan, IterationDirection, Plan, Search}; use crate::types::{OwnedRecord, OwnedValue}; use crate::util::exprs_are_equivalent; use crate::vdbe::builder::ProgramBuilder; -use crate::vdbe::{BranchOffset, Insn, Program}; +use crate::vdbe::{insn::Insn, BranchOffset, Program}; use crate::{Connection, Result}; use super::expr::{ @@ -102,6 +102,7 @@ pub struct Metadata { } /// Used to distinguish database operations +#[allow(clippy::upper_case_acronyms, dead_code)] #[derive(Debug, Clone)] pub enum OperationMode { SELECT, @@ -173,7 +174,7 @@ fn epilogue( /// Takes a query plan and generates the corresponding bytecode program pub fn emit_program( database_header: Rc>, - mut plan: Plan, + plan: Plan, connection: Weak, ) -> Result { match plan { diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 10405f97c..ff17aa0b0 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -7,7 +7,7 @@ use crate::function::JsonFunc; use crate::function::{AggFunc, Func, FuncCtx, MathFuncArity, ScalarFunc}; use crate::schema::Type; use crate::util::{exprs_are_equivalent, normalize_ident}; -use crate::vdbe::{builder::ProgramBuilder, BranchOffset, Insn}; +use crate::vdbe::{builder::ProgramBuilder, insn::Insn, BranchOffset}; use crate::Result; use super::plan::{Aggregate, BTreeTableReference}; @@ -1748,6 +1748,7 @@ pub fn translate_expr( Ok(target_register) } }, + #[allow(unreachable_patterns)] _ => unreachable!("{ext_func} not implemented yet"), }, Func::Math(math_func) => match math_func.arity() { diff --git a/core/translate/insert.rs b/core/translate/insert.rs index 12c9ed016..31b263195 100644 --- a/core/translate/insert.rs +++ b/core/translate/insert.rs @@ -11,7 +11,7 @@ use crate::{ schema::{Column, Schema, Table}, storage::sqlite3_ondisk::DatabaseHeader, translate::expr::translate_expr, - vdbe::{builder::ProgramBuilder, Insn, Program}, + vdbe::{builder::ProgramBuilder, insn::Insn, Program}, }; use crate::{Connection, Result}; diff --git a/core/translate/mod.rs b/core/translate/mod.rs index e2b10b0e5..ca91eeebd 100644 --- a/core/translate/mod.rs +++ b/core/translate/mod.rs @@ -16,21 +16,20 @@ pub(crate) mod plan; pub(crate) mod planner; pub(crate) mod select; -use std::cell::RefCell; -use std::fmt::Display; -use std::rc::{Rc, Weak}; -use std::str::FromStr; - use crate::schema::Schema; use crate::storage::pager::Pager; use crate::storage::sqlite3_ondisk::{DatabaseHeader, MIN_PAGE_CACHE_SIZE}; use crate::translate::delete::translate_delete; -use crate::vdbe::{builder::ProgramBuilder, Insn, Program}; +use crate::vdbe::{builder::ProgramBuilder, insn::Insn, Program}; use crate::{bail_parse_error, Connection, Result}; use insert::translate_insert; use select::translate_select; use sqlite3_parser::ast::fmt::ToTokens; use sqlite3_parser::ast::{self, PragmaName}; +use std::cell::RefCell; +use std::fmt::Display; +use std::rc::{Rc, Weak}; +use std::str::FromStr; /// Translate SQL statement into bytecode program. pub fn translate( @@ -71,13 +70,10 @@ pub fn translate( bail_parse_error!("CREATE VIRTUAL TABLE not supported yet") } ast::Stmt::Delete { - with, tbl_name, - indexed, where_clause, - returning, - order_by, limit, + .. } => translate_delete( schema, &tbl_name, diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 1ee68476b..1463c0402 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -9,7 +9,7 @@ use super::plan::{ DeletePlan, Direction, IterationDirection, Plan, Search, SelectPlan, SourceOperator, }; -pub fn optimize_plan(mut plan: Plan) -> Result { +pub fn optimize_plan(plan: Plan) -> Result { match plan { Plan::Select(plan) => optimize_select_plan(plan).map(Plan::Select), Plan::Delete(plan) => optimize_delete_plan(plan).map(Plan::Delete), diff --git a/core/translate/plan.rs b/core/translate/plan.rs index abfab41fb..e16676061 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -56,6 +56,7 @@ pub struct SelectPlan { pub contains_constant_false_condition: bool, } +#[allow(dead_code)] #[derive(Debug)] pub struct DeletePlan { /// A tree of sources (tables). @@ -205,6 +206,7 @@ pub struct BTreeTableReference { /// An enum that represents a search operation that can be used to search for a row in a table using an index /// (i.e. a primary key or a secondary index) +#[allow(clippy::enum_variant_names)] #[derive(Clone, Debug)] pub enum Search { /// A rowid equality point lookup. This is a special case that uses the SeekRowid bytecode instruction and does not loop. @@ -395,7 +397,7 @@ pub fn get_table_ref_bitmask_for_operator<'a>( table_refs_mask |= 1 << tables .iter() - .position(|t| &t.table_identifier == &table_reference.table_identifier) + .position(|t| t.table_identifier == table_reference.table_identifier) .unwrap(); } SourceOperator::Search { @@ -404,7 +406,7 @@ pub fn get_table_ref_bitmask_for_operator<'a>( table_refs_mask |= 1 << tables .iter() - .position(|t| &t.table_identifier == &table_reference.table_identifier) + .position(|t| t.table_identifier == table_reference.table_identifier) .unwrap(); } SourceOperator::Nothing => {} @@ -420,6 +422,7 @@ pub fn get_table_ref_bitmask_for_operator<'a>( and predicate = "t1.a = t2.b" then the return value will be (in bits): 011 */ +#[allow(clippy::only_used_in_recursion)] pub fn get_table_ref_bitmask_for_ast_expr<'a>( tables: &'a Vec, predicate: &'a ast::Expr, diff --git a/core/util.rs b/core/util.rs index dcf04ac81..4dee111df 100644 --- a/core/util.rs +++ b/core/util.rs @@ -15,7 +15,7 @@ pub fn normalize_ident(identifier: &str) -> String { .iter() .find(|&(start, end)| identifier.starts_with(*start) && identifier.ends_with(*end)); - if let Some(&(start, end)) = quote_pair { + if let Some(&(_, _)) = quote_pair { &identifier[1..identifier.len() - 1] } else { identifier diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs new file mode 100644 index 000000000..b0eba79d8 --- /dev/null +++ b/core/vdbe/insn.rs @@ -0,0 +1,490 @@ +use super::{AggFunc, BranchOffset, CursorID, FuncCtx, PageIdx}; +use crate::types::OwnedRecord; +use limbo_macros::Description; + +#[derive(Description, Debug)] +pub enum Insn { + // Initialize the program state and jump to the given PC. + Init { + target_pc: BranchOffset, + }, + // Write a NULL into register dest. If dest_end is Some, then also write NULL into register dest_end and every register in between dest and dest_end. If dest_end is not set, then only register dest is set to NULL. + Null { + dest: usize, + dest_end: Option, + }, + // Move the cursor P1 to a null row. Any Column operations that occur while the cursor is on the null row will always write a NULL. + NullRow { + cursor_id: CursorID, + }, + // Add two registers and store the result in a third register. + Add { + lhs: usize, + rhs: usize, + dest: usize, + }, + // Subtract rhs from lhs and store in dest + Subtract { + lhs: usize, + rhs: usize, + dest: usize, + }, + // Multiply two registers and store the result in a third register. + Multiply { + lhs: usize, + rhs: usize, + dest: usize, + }, + // Divide lhs by rhs and store the result in a third register. + Divide { + lhs: usize, + rhs: usize, + dest: usize, + }, + // Compare two vectors of registers in reg(P1)..reg(P1+P3-1) (call this vector "A") and in reg(P2)..reg(P2+P3-1) ("B"). Save the result of the comparison for use by the next Jump instruct. + Compare { + start_reg_a: usize, + start_reg_b: usize, + count: usize, + }, + // Place the result of rhs bitwise AND lhs in third register. + BitAnd { + lhs: usize, + rhs: usize, + dest: usize, + }, + // Place the result of rhs bitwise OR lhs in third register. + BitOr { + lhs: usize, + rhs: usize, + dest: usize, + }, + // Place the result of bitwise NOT register P1 in dest register. + BitNot { + reg: usize, + dest: usize, + }, + // Divide lhs by rhs and place the remainder in dest register. + Remainder { + lhs: usize, + rhs: usize, + dest: usize, + }, + // Jump to the instruction at address P1, P2, or P3 depending on whether in the most recent Compare instruction the P1 vector was less than, equal to, or greater than the P2 vector, respectively. + Jump { + target_pc_lt: BranchOffset, + target_pc_eq: BranchOffset, + target_pc_gt: BranchOffset, + }, + // Move the P3 values in register P1..P1+P3-1 over into registers P2..P2+P3-1. Registers P1..P1+P3-1 are left holding a NULL. It is an error for register ranges P1..P1+P3-1 and P2..P2+P3-1 to overlap. It is an error for P3 to be less than 1. + Move { + source_reg: usize, + dest_reg: usize, + count: usize, + }, + // If the given register is a positive integer, decrement it by decrement_by and jump to the given PC. + IfPos { + reg: usize, + target_pc: BranchOffset, + decrement_by: usize, + }, + // If the given register is not NULL, jump to the given PC. + NotNull { + reg: usize, + target_pc: BranchOffset, + }, + // Compare two registers and jump to the given PC if they are equal. + Eq { + lhs: usize, + rhs: usize, + target_pc: BranchOffset, + }, + // Compare two registers and jump to the given PC if they are not equal. + Ne { + lhs: usize, + rhs: usize, + target_pc: BranchOffset, + }, + // Compare two registers and jump to the given PC if the left-hand side is less than the right-hand side. + Lt { + lhs: usize, + rhs: usize, + target_pc: BranchOffset, + }, + // Compare two registers and jump to the given PC if the left-hand side is less than or equal to the right-hand side. + Le { + lhs: usize, + rhs: usize, + target_pc: BranchOffset, + }, + // Compare two registers and jump to the given PC if the left-hand side is greater than the right-hand side. + Gt { + lhs: usize, + rhs: usize, + target_pc: BranchOffset, + }, + // Compare two registers and jump to the given PC if the left-hand side is greater than or equal to the right-hand side. + Ge { + lhs: usize, + rhs: usize, + target_pc: BranchOffset, + }, + /// Jump to target_pc if r\[reg\] != 0 or (r\[reg\] == NULL && r\[null_reg\] != 0) + If { + reg: usize, // P1 + target_pc: BranchOffset, // P2 + /// P3. If r\[reg\] is null, jump iff r\[null_reg\] != 0 + null_reg: usize, + }, + /// Jump to target_pc if r\[reg\] != 0 or (r\[reg\] == NULL && r\[null_reg\] != 0) + IfNot { + reg: usize, // P1 + target_pc: BranchOffset, // P2 + /// P3. If r\[reg\] is null, jump iff r\[null_reg\] != 0 + null_reg: usize, + }, + // Open a cursor for reading. + OpenReadAsync { + cursor_id: CursorID, + root_page: PageIdx, + }, + + // Await for the completion of open cursor. + OpenReadAwait, + + // Open a cursor for a pseudo-table that contains a single row. + OpenPseudo { + cursor_id: CursorID, + content_reg: usize, + num_fields: usize, + }, + + // Rewind the cursor to the beginning of the B-Tree. + RewindAsync { + cursor_id: CursorID, + }, + + // Await for the completion of cursor rewind. + RewindAwait { + cursor_id: CursorID, + pc_if_empty: BranchOffset, + }, + + LastAsync { + cursor_id: CursorID, + }, + + LastAwait { + cursor_id: CursorID, + pc_if_empty: BranchOffset, + }, + + // Read a column from the current row of the cursor. + Column { + cursor_id: CursorID, + column: usize, + dest: usize, + }, + + // Make a record and write it to destination register. + MakeRecord { + start_reg: usize, // P1 + count: usize, // P2 + dest_reg: usize, // P3 + }, + + // Emit a row of results. + ResultRow { + start_reg: usize, // P1 + count: usize, // P2 + }, + + // Advance the cursor to the next row. + NextAsync { + cursor_id: CursorID, + }, + + // Await for the completion of cursor advance. + NextAwait { + cursor_id: CursorID, + pc_if_next: BranchOffset, + }, + + PrevAsync { + cursor_id: CursorID, + }, + + PrevAwait { + cursor_id: CursorID, + pc_if_next: BranchOffset, + }, + + // Halt the program. + Halt { + err_code: usize, + description: String, + }, + + // Start a transaction. + Transaction { + write: bool, + }, + + // Branch to the given PC. + Goto { + target_pc: BranchOffset, + }, + + // Stores the current program counter into register 'return_reg' then jumps to address target_pc. + Gosub { + target_pc: BranchOffset, + return_reg: usize, + }, + + // Returns to the program counter stored in register 'return_reg'. + Return { + return_reg: usize, + }, + + // Write an integer value into a register. + Integer { + value: i64, + dest: usize, + }, + + // Write a float value into a register + Real { + value: f64, + dest: usize, + }, + + // If register holds an integer, transform it to a float + RealAffinity { + register: usize, + }, + + // Write a string value into a register. + String8 { + value: String, + dest: usize, + }, + + // Write a blob value into a register. + Blob { + value: Vec, + dest: usize, + }, + + // Read the rowid of the current row. + RowId { + cursor_id: CursorID, + dest: usize, + }, + + // Seek to a rowid in the cursor. If not found, jump to the given PC. Otherwise, continue to the next instruction. + SeekRowid { + cursor_id: CursorID, + src_reg: usize, + target_pc: BranchOffset, + }, + + // P1 is an open index cursor and P3 is a cursor on the corresponding table. This opcode does a deferred seek of the P3 table cursor to the row that corresponds to the current row of P1. + // This is a deferred seek. Nothing actually happens until the cursor is used to read a record. That way, if no reads occur, no unnecessary I/O happens. + DeferredSeek { + index_cursor_id: CursorID, + table_cursor_id: CursorID, + }, + + // If cursor_id refers to an SQL table (B-Tree that uses integer keys), use the value in start_reg as the key. + // If cursor_id refers to an SQL index, then start_reg is the first in an array of num_regs registers that are used as an unpacked index key. + // Seek to the first index entry that is greater than or equal to the given key. If not found, jump to the given PC. Otherwise, continue to the next instruction. + SeekGE { + is_index: bool, + cursor_id: CursorID, + start_reg: usize, + num_regs: usize, + target_pc: BranchOffset, + }, + + // If cursor_id refers to an SQL table (B-Tree that uses integer keys), use the value in start_reg as the key. + // If cursor_id refers to an SQL index, then start_reg is the first in an array of num_regs registers that are used as an unpacked index key. + // Seek to the first index entry that is greater than the given key. If not found, jump to the given PC. Otherwise, continue to the next instruction. + SeekGT { + is_index: bool, + cursor_id: CursorID, + start_reg: usize, + num_regs: usize, + target_pc: BranchOffset, + }, + + // The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end. + // If the P1 index entry is greater or equal than the key value then jump to P2. Otherwise fall through to the next instruction. + IdxGE { + cursor_id: CursorID, + start_reg: usize, + num_regs: usize, + target_pc: BranchOffset, + }, + + // The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end. + // If the P1 index entry is greater than the key value then jump to P2. Otherwise fall through to the next instruction. + IdxGT { + cursor_id: CursorID, + start_reg: usize, + num_regs: usize, + target_pc: BranchOffset, + }, + + // Decrement the given register and jump to the given PC if the result is zero. + DecrJumpZero { + reg: usize, + target_pc: BranchOffset, + }, + + AggStep { + acc_reg: usize, + col: usize, + delimiter: usize, + func: AggFunc, + }, + + AggFinal { + register: usize, + func: AggFunc, + }, + + // Open a sorter. + SorterOpen { + cursor_id: CursorID, // P1 + columns: usize, // P2 + order: OwnedRecord, // P4. 0 if ASC and 1 if DESC + }, + + // Insert a row into the sorter. + SorterInsert { + cursor_id: CursorID, + record_reg: usize, + }, + + // Sort the rows in the sorter. + SorterSort { + cursor_id: CursorID, + pc_if_empty: BranchOffset, + }, + + // Retrieve the next row from the sorter. + SorterData { + cursor_id: CursorID, // P1 + dest_reg: usize, // P2 + pseudo_cursor: usize, // P3 + }, + + // Advance to the next row in the sorter. + SorterNext { + cursor_id: CursorID, + pc_if_next: BranchOffset, + }, + + // Function + Function { + constant_mask: i32, // P1 + start_reg: usize, // P2, start of argument registers + dest: usize, // P3 + func: FuncCtx, // P4 + }, + + InitCoroutine { + yield_reg: usize, + jump_on_definition: BranchOffset, + start_offset: BranchOffset, + }, + + EndCoroutine { + yield_reg: usize, + }, + + Yield { + yield_reg: usize, + end_offset: BranchOffset, + }, + + InsertAsync { + cursor: CursorID, + key_reg: usize, // Must be int. + record_reg: usize, // Blob of record data. + flag: usize, // Flags used by insert, for now not used. + }, + + InsertAwait { + cursor_id: usize, + }, + + DeleteAsync { + cursor_id: CursorID, + }, + + DeleteAwait { + cursor_id: CursorID, + }, + + NewRowid { + cursor: CursorID, // P1 + rowid_reg: usize, // P2 Destination register to store the new rowid + prev_largest_reg: usize, // P3 Previous largest rowid in the table (Not used for now) + }, + + MustBeInt { + reg: usize, + }, + + SoftNull { + reg: usize, + }, + + NotExists { + cursor: CursorID, + rowid_reg: usize, + target_pc: BranchOffset, + }, + + OpenWriteAsync { + cursor_id: CursorID, + root_page: PageIdx, + }, + + OpenWriteAwait {}, + + Copy { + src_reg: usize, + dst_reg: usize, + amount: usize, // 0 amount means we include src_reg, dst_reg..=dst_reg+amount = src_reg..=src_reg+amount + }, + + /// Allocate a new b-tree. + CreateBtree { + /// Allocate b-tree in main database if zero or in temp database if non-zero (P1). + db: usize, + /// The root page of the new b-tree (P2). + root: usize, + /// Flags (P3). + flags: usize, + }, + + /// Close a cursor. + Close { + cursor_id: CursorID, + }, + + /// Check if the register is null. + IsNull { + /// Source register (P1). + src: usize, + + /// Jump to this PC if the register is null (P2). + target_pc: BranchOffset, + }, + ParseSchema { + db: usize, + where_clause: String, + }, +} diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 7076a31d5..2d731d133 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -18,12 +18,12 @@ //! https://www.sqlite.org/opcode.html pub mod builder; +mod datetime; pub mod explain; +pub mod insn; pub mod likeop; pub mod sorter; -mod datetime; - use crate::error::{LimboError, SQLITE_CONSTRAINT_PRIMARYKEY}; #[cfg(feature = "uuid")] use crate::ext::{exec_ts_from_uuid7, exec_uuid, exec_uuidblob, exec_uuidstr, ExtFunc, UuidFunc}; @@ -37,15 +37,12 @@ use crate::types::{ AggContext, Cursor, CursorResult, OwnedRecord, OwnedValue, Record, SeekKey, SeekOp, }; use crate::util::parse_schema_rows; +use crate::vdbe::insn::Insn; #[cfg(feature = "json")] use crate::{function::JsonFunc, json::get_json, json::json_array, json::json_array_length}; -use crate::{Connection, Result, TransactionState}; -use crate::{Rows, DATABASE_VERSION}; -use likeop::{construct_like_escape_arg, exec_like_with_escape}; -use limbo_macros::Description; - +use crate::{Connection, Result, Rows, TransactionState, DATABASE_VERSION}; use datetime::{exec_date, exec_time, exec_unixepoch}; - +use likeop::{construct_like_escape_arg, exec_like_with_escape}; use rand::distributions::{Distribution, Uniform}; use rand::{thread_rng, Rng}; use regex::{Regex, RegexBuilder}; @@ -59,493 +56,6 @@ pub type CursorID = usize; pub type PageIdx = usize; -#[derive(Description, Debug)] -pub enum Insn { - // Initialize the program state and jump to the given PC. - Init { - target_pc: BranchOffset, - }, - // Write a NULL into register dest. If dest_end is Some, then also write NULL into register dest_end and every register in between dest and dest_end. If dest_end is not set, then only register dest is set to NULL. - Null { - dest: usize, - dest_end: Option, - }, - // Move the cursor P1 to a null row. Any Column operations that occur while the cursor is on the null row will always write a NULL. - NullRow { - cursor_id: CursorID, - }, - // Add two registers and store the result in a third register. - Add { - lhs: usize, - rhs: usize, - dest: usize, - }, - // Subtract rhs from lhs and store in dest - Subtract { - lhs: usize, - rhs: usize, - dest: usize, - }, - // Multiply two registers and store the result in a third register. - Multiply { - lhs: usize, - rhs: usize, - dest: usize, - }, - // Divide lhs by rhs and store the result in a third register. - Divide { - lhs: usize, - rhs: usize, - dest: usize, - }, - // Compare two vectors of registers in reg(P1)..reg(P1+P3-1) (call this vector "A") and in reg(P2)..reg(P2+P3-1) ("B"). Save the result of the comparison for use by the next Jump instruct. - Compare { - start_reg_a: usize, - start_reg_b: usize, - count: usize, - }, - // Place the result of rhs bitwise AND lhs in third register. - BitAnd { - lhs: usize, - rhs: usize, - dest: usize, - }, - // Place the result of rhs bitwise OR lhs in third register. - BitOr { - lhs: usize, - rhs: usize, - dest: usize, - }, - // Place the result of bitwise NOT register P1 in dest register. - BitNot { - reg: usize, - dest: usize, - }, - // Divide lhs by rhs and place the remainder in dest register. - Remainder { - lhs: usize, - rhs: usize, - dest: usize, - }, - // Jump to the instruction at address P1, P2, or P3 depending on whether in the most recent Compare instruction the P1 vector was less than, equal to, or greater than the P2 vector, respectively. - Jump { - target_pc_lt: BranchOffset, - target_pc_eq: BranchOffset, - target_pc_gt: BranchOffset, - }, - // Move the P3 values in register P1..P1+P3-1 over into registers P2..P2+P3-1. Registers P1..P1+P3-1 are left holding a NULL. It is an error for register ranges P1..P1+P3-1 and P2..P2+P3-1 to overlap. It is an error for P3 to be less than 1. - Move { - source_reg: usize, - dest_reg: usize, - count: usize, - }, - // If the given register is a positive integer, decrement it by decrement_by and jump to the given PC. - IfPos { - reg: usize, - target_pc: BranchOffset, - decrement_by: usize, - }, - // If the given register is not NULL, jump to the given PC. - NotNull { - reg: usize, - target_pc: BranchOffset, - }, - // Compare two registers and jump to the given PC if they are equal. - Eq { - lhs: usize, - rhs: usize, - target_pc: BranchOffset, - }, - // Compare two registers and jump to the given PC if they are not equal. - Ne { - lhs: usize, - rhs: usize, - target_pc: BranchOffset, - }, - // Compare two registers and jump to the given PC if the left-hand side is less than the right-hand side. - Lt { - lhs: usize, - rhs: usize, - target_pc: BranchOffset, - }, - // Compare two registers and jump to the given PC if the left-hand side is less than or equal to the right-hand side. - Le { - lhs: usize, - rhs: usize, - target_pc: BranchOffset, - }, - // Compare two registers and jump to the given PC if the left-hand side is greater than the right-hand side. - Gt { - lhs: usize, - rhs: usize, - target_pc: BranchOffset, - }, - // Compare two registers and jump to the given PC if the left-hand side is greater than or equal to the right-hand side. - Ge { - lhs: usize, - rhs: usize, - target_pc: BranchOffset, - }, - /// Jump to target_pc if r\[reg\] != 0 or (r\[reg\] == NULL && r\[null_reg\] != 0) - If { - reg: usize, // P1 - target_pc: BranchOffset, // P2 - /// P3. If r\[reg\] is null, jump iff r\[null_reg\] != 0 - null_reg: usize, - }, - /// Jump to target_pc if r\[reg\] != 0 or (r\[reg\] == NULL && r\[null_reg\] != 0) - IfNot { - reg: usize, // P1 - target_pc: BranchOffset, // P2 - /// P3. If r\[reg\] is null, jump iff r\[null_reg\] != 0 - null_reg: usize, - }, - // Open a cursor for reading. - OpenReadAsync { - cursor_id: CursorID, - root_page: PageIdx, - }, - - // Await for the completion of open cursor. - OpenReadAwait, - - // Open a cursor for a pseudo-table that contains a single row. - OpenPseudo { - cursor_id: CursorID, - content_reg: usize, - num_fields: usize, - }, - - // Rewind the cursor to the beginning of the B-Tree. - RewindAsync { - cursor_id: CursorID, - }, - - // Await for the completion of cursor rewind. - RewindAwait { - cursor_id: CursorID, - pc_if_empty: BranchOffset, - }, - - LastAsync { - cursor_id: CursorID, - }, - - LastAwait { - cursor_id: CursorID, - pc_if_empty: BranchOffset, - }, - - // Read a column from the current row of the cursor. - Column { - cursor_id: CursorID, - column: usize, - dest: usize, - }, - - // Make a record and write it to destination register. - MakeRecord { - start_reg: usize, // P1 - count: usize, // P2 - dest_reg: usize, // P3 - }, - - // Emit a row of results. - ResultRow { - start_reg: usize, // P1 - count: usize, // P2 - }, - - // Advance the cursor to the next row. - NextAsync { - cursor_id: CursorID, - }, - - // Await for the completion of cursor advance. - NextAwait { - cursor_id: CursorID, - pc_if_next: BranchOffset, - }, - - PrevAsync { - cursor_id: CursorID, - }, - - PrevAwait { - cursor_id: CursorID, - pc_if_next: BranchOffset, - }, - - // Halt the program. - Halt { - err_code: usize, - description: String, - }, - - // Start a transaction. - Transaction { - write: bool, - }, - - // Branch to the given PC. - Goto { - target_pc: BranchOffset, - }, - - // Stores the current program counter into register 'return_reg' then jumps to address target_pc. - Gosub { - target_pc: BranchOffset, - return_reg: usize, - }, - - // Returns to the program counter stored in register 'return_reg'. - Return { - return_reg: usize, - }, - - // Write an integer value into a register. - Integer { - value: i64, - dest: usize, - }, - - // Write a float value into a register - Real { - value: f64, - dest: usize, - }, - - // If register holds an integer, transform it to a float - RealAffinity { - register: usize, - }, - - // Write a string value into a register. - String8 { - value: String, - dest: usize, - }, - - // Write a blob value into a register. - Blob { - value: Vec, - dest: usize, - }, - - // Read the rowid of the current row. - RowId { - cursor_id: CursorID, - dest: usize, - }, - - // Seek to a rowid in the cursor. If not found, jump to the given PC. Otherwise, continue to the next instruction. - SeekRowid { - cursor_id: CursorID, - src_reg: usize, - target_pc: BranchOffset, - }, - - // P1 is an open index cursor and P3 is a cursor on the corresponding table. This opcode does a deferred seek of the P3 table cursor to the row that corresponds to the current row of P1. - // This is a deferred seek. Nothing actually happens until the cursor is used to read a record. That way, if no reads occur, no unnecessary I/O happens. - DeferredSeek { - index_cursor_id: CursorID, - table_cursor_id: CursorID, - }, - - // If cursor_id refers to an SQL table (B-Tree that uses integer keys), use the value in start_reg as the key. - // If cursor_id refers to an SQL index, then start_reg is the first in an array of num_regs registers that are used as an unpacked index key. - // Seek to the first index entry that is greater than or equal to the given key. If not found, jump to the given PC. Otherwise, continue to the next instruction. - SeekGE { - is_index: bool, - cursor_id: CursorID, - start_reg: usize, - num_regs: usize, - target_pc: BranchOffset, - }, - - // If cursor_id refers to an SQL table (B-Tree that uses integer keys), use the value in start_reg as the key. - // If cursor_id refers to an SQL index, then start_reg is the first in an array of num_regs registers that are used as an unpacked index key. - // Seek to the first index entry that is greater than the given key. If not found, jump to the given PC. Otherwise, continue to the next instruction. - SeekGT { - is_index: bool, - cursor_id: CursorID, - start_reg: usize, - num_regs: usize, - target_pc: BranchOffset, - }, - - // The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end. - // If the P1 index entry is greater or equal than the key value then jump to P2. Otherwise fall through to the next instruction. - IdxGE { - cursor_id: CursorID, - start_reg: usize, - num_regs: usize, - target_pc: BranchOffset, - }, - - // The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end. - // If the P1 index entry is greater than the key value then jump to P2. Otherwise fall through to the next instruction. - IdxGT { - cursor_id: CursorID, - start_reg: usize, - num_regs: usize, - target_pc: BranchOffset, - }, - - // Decrement the given register and jump to the given PC if the result is zero. - DecrJumpZero { - reg: usize, - target_pc: BranchOffset, - }, - - AggStep { - acc_reg: usize, - col: usize, - delimiter: usize, - func: AggFunc, - }, - - AggFinal { - register: usize, - func: AggFunc, - }, - - // Open a sorter. - SorterOpen { - cursor_id: CursorID, // P1 - columns: usize, // P2 - order: OwnedRecord, // P4. 0 if ASC and 1 if DESC - }, - - // Insert a row into the sorter. - SorterInsert { - cursor_id: CursorID, - record_reg: usize, - }, - - // Sort the rows in the sorter. - SorterSort { - cursor_id: CursorID, - pc_if_empty: BranchOffset, - }, - - // Retrieve the next row from the sorter. - SorterData { - cursor_id: CursorID, // P1 - dest_reg: usize, // P2 - pseudo_cursor: usize, // P3 - }, - - // Advance to the next row in the sorter. - SorterNext { - cursor_id: CursorID, - pc_if_next: BranchOffset, - }, - - // Function - Function { - constant_mask: i32, // P1 - start_reg: usize, // P2, start of argument registers - dest: usize, // P3 - func: FuncCtx, // P4 - }, - - InitCoroutine { - yield_reg: usize, - jump_on_definition: BranchOffset, - start_offset: BranchOffset, - }, - - EndCoroutine { - yield_reg: usize, - }, - - Yield { - yield_reg: usize, - end_offset: BranchOffset, - }, - - InsertAsync { - cursor: CursorID, - key_reg: usize, // Must be int. - record_reg: usize, // Blob of record data. - flag: usize, // Flags used by insert, for now not used. - }, - - InsertAwait { - cursor_id: usize, - }, - - DeleteAsync { - cursor_id: CursorID, - }, - - DeleteAwait { - cursor_id: CursorID, - }, - - NewRowid { - cursor: CursorID, // P1 - rowid_reg: usize, // P2 Destination register to store the new rowid - prev_largest_reg: usize, // P3 Previous largest rowid in the table (Not used for now) - }, - - MustBeInt { - reg: usize, - }, - - SoftNull { - reg: usize, - }, - - NotExists { - cursor: CursorID, - rowid_reg: usize, - target_pc: BranchOffset, - }, - - OpenWriteAsync { - cursor_id: CursorID, - root_page: PageIdx, - }, - - OpenWriteAwait {}, - - Copy { - src_reg: usize, - dst_reg: usize, - amount: usize, // 0 amount means we include src_reg, dst_reg..=dst_reg+amount = src_reg..=src_reg+amount - }, - - /// Allocate a new b-tree. - CreateBtree { - /// Allocate b-tree in main database if zero or in temp database if non-zero (P1). - db: usize, - /// The root page of the new b-tree (P2). - root: usize, - /// Flags (P3). - flags: usize, - }, - - /// Close a cursor. - Close { - cursor_id: CursorID, - }, - - /// Check if the register is null. - IsNull { - /// Source register (P1). - src: usize, - - /// Jump to this PC if the register is null (P2). - target_pc: BranchOffset, - }, - ParseSchema { - db: usize, - where_clause: String, - }, -} - // Index of insn in list of insns type InsnReference = usize; @@ -2669,6 +2179,7 @@ impl Program { state.registers[*dest] = exec_replace(source, pattern, replacement); } }, + #[allow(unreachable_patterns)] crate::function::Func::Extension(extfn) => match extfn { #[cfg(feature = "uuid")] ExtFunc::Uuid(uuidfn) => match uuidfn { diff --git a/macros/src/lib.rs b/macros/src/lib.rs index 605c3b956..1cbf31e7a 100644 --- a/macros/src/lib.rs +++ b/macros/src/lib.rs @@ -65,15 +65,13 @@ pub fn derive_description_from_doc(item: TokenStream) -> TokenStream { /// Processes a Rust docs to extract the description string. fn process_description(token_iter: &mut IntoIter) -> Option { - if let Some(doc_token_tree) = token_iter.next() { - if let TokenTree::Group(doc_group) = doc_token_tree { - let mut doc_group_iter = doc_group.stream().into_iter(); - // Skip the `desc` and `(` tokens to reach the actual description - doc_group_iter.next(); - doc_group_iter.next(); - if let Some(TokenTree::Literal(description)) = doc_group_iter.next() { - return Some(description.to_string()); - } + if let Some(TokenTree::Group(doc_group)) = token_iter.next() { + let mut doc_group_iter = doc_group.stream().into_iter(); + // Skip the `desc` and `(` tokens to reach the actual description + doc_group_iter.next(); + doc_group_iter.next(); + if let Some(TokenTree::Literal(description)) = doc_group_iter.next() { + return Some(description.to_string()); } } None @@ -81,14 +79,14 @@ fn process_description(token_iter: &mut IntoIter) -> Option { /// Processes the payload of an enum variant to extract variable names (ignoring types). fn process_payload(payload_group: Group) -> String { - let mut payload_group_iter = payload_group.stream().into_iter(); + let payload_group_iter = payload_group.stream().into_iter(); let mut variable_name_list = String::from(""); let mut is_variable_name = true; - while let Some(token) = payload_group_iter.next() { + for token in payload_group_iter { match token { TokenTree::Ident(ident) => { if is_variable_name { - variable_name_list.push_str(&format!("{},", ident.to_string())); + variable_name_list.push_str(&format!("{},", ident)); } is_variable_name = false; } diff --git a/simulator/generation/plan.rs b/simulator/generation/plan.rs index 2c20645af..0ff16af52 100644 --- a/simulator/generation/plan.rs +++ b/simulator/generation/plan.rs @@ -9,7 +9,7 @@ use crate::{ query::{Create, Insert, Predicate, Query, Select}, table::Value, }, - SimConnection, SimulatorEnv, SimulatorOpts, + SimConnection, SimulatorEnv, }; use crate::generation::{frequency, Arbitrary, ArbitraryFrom}; @@ -28,11 +28,11 @@ impl Display for InteractionPlan { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { for interaction in &self.plan { match interaction { - Interaction::Query(query) => write!(f, "{};\n", query)?, + Interaction::Query(query) => writeln!(f, "{};", query)?, Interaction::Assertion(assertion) => { - write!(f, "-- ASSERT: {};\n", assertion.message)? + writeln!(f, "-- ASSERT: {};", assertion.message)? } - Interaction::Fault(fault) => write!(f, "-- FAULT: {};\n", fault)?, + Interaction::Fault(fault) => writeln!(f, "-- FAULT: {};", fault)?, } } @@ -73,8 +73,10 @@ impl Display for Interaction { } } +type AssertionFunc = dyn Fn(&Vec) -> bool; + pub(crate) struct Assertion { - pub(crate) func: Box) -> bool>, + pub(crate) func: Box, pub(crate) message: String, } @@ -244,7 +246,7 @@ impl Interaction { Self::Assertion(_) => { unreachable!("unexpected: this function should only be called on queries") } - Self::Fault(fault) => { + Interaction::Fault(_) => { unreachable!("unexpected: this function should only be called on queries") } } @@ -347,7 +349,7 @@ fn property_insert_select(rng: &mut R, env: &SimulatorEnv) -> Inte Interactions(vec![insert_query, select_query, assertion]) } -fn create_table(rng: &mut R, env: &SimulatorEnv) -> Interactions { +fn create_table(rng: &mut R, _env: &SimulatorEnv) -> Interactions { let create_query = Interaction::Query(Query::Create(Create::arbitrary(rng))); Interactions(vec![create_query]) } @@ -363,7 +365,7 @@ fn random_write(rng: &mut R, env: &SimulatorEnv) -> Interactions { Interactions(vec![insert_query]) } -fn random_fault(rng: &mut R, env: &SimulatorEnv) -> Interactions { +fn random_fault(_rng: &mut R, _env: &SimulatorEnv) -> Interactions { let fault = Interaction::Fault(Fault::Disconnect); Interactions(vec![fault]) } diff --git a/simulator/generation/query.rs b/simulator/generation/query.rs index 4625ecc47..b39ef6785 100644 --- a/simulator/generation/query.rs +++ b/simulator/generation/query.rs @@ -227,7 +227,7 @@ impl ArbitraryFrom<(&str, &Value)> for Predicate { fn arbitrary_from(rng: &mut R, (column_name, value): &(&str, &Value)) -> Self { one_of( vec![ - Box::new(|rng| Self::Eq(column_name.to_string(), (*value).clone())), + Box::new(|_| Predicate::Eq(column_name.to_string(), (*value).clone())), Box::new(|rng| { Self::Gt( column_name.to_string(), diff --git a/simulator/main.rs b/simulator/main.rs index b12018062..ce4fe64c8 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -1,7 +1,8 @@ +#![allow(clippy::arc_with_non_send_sync, dead_code)] use clap::Parser; use generation::plan::{Interaction, InteractionPlan, ResultSet}; use generation::{pick_index, ArbitraryFrom}; -use limbo_core::{Connection, Database, Result, StepResult, IO}; +use limbo_core::{Database, Result}; use model::table::Value; use rand::prelude::*; use rand_chacha::ChaCha8Rng; @@ -11,7 +12,6 @@ use runner::io::SimulatorIO; use std::backtrace::Backtrace; use std::io::Write; use std::path::Path; -use std::rc::Rc; use std::sync::Arc; use tempfile::TempDir; @@ -19,7 +19,6 @@ mod generation; mod model; mod runner; -#[allow(clippy::arc_with_non_send_sync)] fn main() { let _ = env_logger::try_init(); @@ -189,7 +188,7 @@ fn run_simulation( let mut f = std::fs::File::create(plan_path).unwrap(); // todo: create a detailed plan file with all the plans. for now, we only use 1 connection, so it's safe to use the first plan. - f.write(plans[0].to_string().as_bytes()).unwrap(); + f.write_all(plans[0].to_string().as_bytes()).unwrap(); log::info!("{}", plans[0].stats()); @@ -207,7 +206,7 @@ fn run_simulation( result } -fn execute_plans(env: &mut SimulatorEnv, plans: &mut Vec) -> Result<()> { +fn execute_plans(env: &mut SimulatorEnv, plans: &mut [InteractionPlan]) -> Result<()> { // todo: add history here by recording which interaction was executed at which tick for _tick in 0..env.opts.ticks { // Pick the connection to interact with @@ -222,7 +221,7 @@ fn execute_plans(env: &mut SimulatorEnv, plans: &mut Vec) -> Re fn execute_plan( env: &mut SimulatorEnv, connection_index: usize, - plans: &mut Vec, + plans: &mut [InteractionPlan], ) -> Result<()> { let connection = &env.connections[connection_index]; let plan = &mut plans[connection_index]; diff --git a/simulator/model/table.rs b/simulator/model/table.rs index e1da6a342..841ae0023 100644 --- a/simulator/model/table.rs +++ b/simulator/model/table.rs @@ -16,7 +16,7 @@ pub(crate) struct Table { pub(crate) name: String, pub(crate) columns: Vec, } - +#[allow(dead_code)] #[derive(Debug, Clone)] pub(crate) struct Column { pub(crate) name: String, @@ -54,8 +54,12 @@ pub(crate) enum Value { } fn to_sqlite_blob(bytes: &[u8]) -> String { - let hex: String = bytes.iter().map(|b| format!("{:02X}", b)).collect(); - format!("X'{}'", hex) + format!( + "X'{}'", + bytes + .iter() + .fold(String::new(), |acc, b| acc + &format!("{:02X}", b)) + ) } impl Display for Value { diff --git a/simulator/runner/file.rs b/simulator/runner/file.rs index 7f3fe9072..e0153f2b3 100644 --- a/simulator/runner/file.rs +++ b/simulator/runner/file.rs @@ -1,7 +1,6 @@ use std::{cell::RefCell, rc::Rc}; use limbo_core::{File, Result}; - pub(crate) struct SimulatorFile { pub(crate) inner: Rc, pub(crate) fault: RefCell, diff --git a/simulator/runner/mod.rs b/simulator/runner/mod.rs index d5f06c103..10a777fd9 100644 --- a/simulator/runner/mod.rs +++ b/simulator/runner/mod.rs @@ -1,4 +1,5 @@ pub mod cli; pub mod env; +#[allow(dead_code)] pub mod file; pub mod io; diff --git a/sqlite3/src/lib.rs b/sqlite3/src/lib.rs index cd09ef62b..98ff31e83 100644 --- a/sqlite3/src/lib.rs +++ b/sqlite3/src/lib.rs @@ -67,7 +67,7 @@ pub struct sqlite3_stmt<'a> { pub(crate) row: RefCell>>, } -impl<'a> sqlite3_stmt<'a> { +impl sqlite3_stmt<'_> { pub fn new(stmt: limbo_core::Statement) -> Self { let row = RefCell::new(None); Self { stmt, row } @@ -998,9 +998,7 @@ pub unsafe extern "C" fn sqlite3_threadsafe() -> ffi::c_int { #[no_mangle] pub unsafe extern "C" fn sqlite3_libversion() -> *const std::ffi::c_char { - ffi::CStr::from_bytes_with_nul(b"3.42.0\0") - .unwrap() - .as_ptr() + c"3.42.0".as_ptr() } #[no_mangle] @@ -1094,7 +1092,7 @@ pub unsafe extern "C" fn sqlite3_wal_checkpoint_v2( } let db: &mut sqlite3 = &mut *db; // TODO: Checkpointing modes and reporting back log size and checkpoint count to caller. - if let Err(e) = db.conn.checkpoint() { + if db.conn.checkpoint().is_err() { return SQLITE_ERROR; } SQLITE_OK From 361e55f8581229fc0ebf893e825bac8946d57e77 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Fri, 27 Dec 2024 18:28:54 -0500 Subject: [PATCH 144/144] Fix checkout action for clippy ci --- .github/workflows/rust.yml | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 835335ff2..19596e140 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -13,7 +13,7 @@ jobs: cargo-fmt-check: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Check formatting run: cargo fmt --check @@ -39,16 +39,19 @@ jobs: run: cargo test --verbose timeout-minutes: 5 + clippy: runs-on: ubuntu-latest steps: + - uses: actions/checkout@v3 - name: Clippy - run: cargo clippy -- -A clippy::all -W clippy::correctness -W clippy::perf -W clippy::suspicious --deny=warnings + run: | + cargo clippy -- -A clippy::all -W clippy::correctness -W clippy::perf -W clippy::suspicious --deny=warnings build-wasm: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Install run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh - run: wasm-pack build --target nodejs bindings/wasm @@ -56,7 +59,7 @@ jobs: bench: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Bench run: cargo bench @@ -72,14 +75,14 @@ jobs: run: | curl -L $LINK/$CARGO_C_FILE | tar xz -C ~/.cargo/bin - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Test run: make test test-sqlite: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Install sqlite run: sudo apt update && sudo apt install -y sqlite3 libsqlite3-dev - name: Test