diff --git a/rust/.gitignore b/rust/.gitignore new file mode 100644 index 00000000..d919eec4 --- /dev/null +++ b/rust/.gitignore @@ -0,0 +1,10 @@ +# Rust build artifacts +target/ +Cargo.lock + +# IDE files +.idea/ +.vscode/ +*.swp +*.swo +*~ diff --git a/rust/Cargo.toml b/rust/Cargo.toml new file mode 100644 index 00000000..42d3b0da --- /dev/null +++ b/rust/Cargo.toml @@ -0,0 +1,37 @@ +[workspace] +members = [ + "keyval", + "parser", + "engine", + "cli", +] +resolver = "2" + +[workspace.package] +version = "0.1.0" +edition = "2021" +authors = ["FQL Contributors"] +license = "MIT OR Apache-2.0" + +[workspace.dependencies] +# Internal crates +keyval = { path = "keyval" } +parser = { path = "parser" } +engine = { path = "engine" } + +# External dependencies +anyhow = "1.0" +thiserror = "1.0" +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +bytes = "1.5" +uuid = { version = "1.6", features = ["serde", "v4"] } +async-trait = "0.1" +futures = "0.3" +tokio = { version = "1.35", features = ["full"] } +clap = { version = "4.4", features = ["derive"] } +ratatui = "0.25" +crossterm = "0.27" +tracing = "0.1" +tracing-subscriber = "0.3" +hex = "0.4" diff --git a/rust/README.md b/rust/README.md new file mode 100644 index 00000000..d20ab1ec --- /dev/null +++ b/rust/README.md @@ -0,0 +1,301 @@ +# FQL Rust Implementation + +This directory contains a Rust rewrite of the FQL (FoundationDB Query Language) project. The Rust implementation leverages Rust's powerful type system, enums, and pattern matching to provide a more idiomatic and type-safe query language implementation. + +## Project Structure + +The Rust implementation is organized as a Cargo workspace with the following crates: + +``` +rust/ +├── Cargo.toml # Workspace configuration +├── keyval/ # Core data structures and types +├── parser/ # Query string parsing and formatting +├── engine/ # Query execution engine +└── cli/ # Command-line interface +``` + +### Crates + +#### `keyval` - Core Data Structures + +The keyval crate contains the fundamental types for FQL queries and key-values: + +- **Query types**: `Query`, `KeyValue`, `Key`, `Directory` +- **Element types**: `TupElement`, `DirElement`, `Value` +- **Type system**: `Variable`, `ValueType`, `VStamp`, `VStampFuture` +- **Submodules**: + - `class`: Query classification (Constant, Clear, ReadSingle, ReadRange, etc.) + - `convert`: Type conversions between FQL and FoundationDB formats + - `tuple`: Tuple comparison and matching logic + - `values`: Value serialization and deserialization + +**Key Improvement over Go**: Unlike the Go implementation which uses the visitor pattern to work around the lack of tagged unions, the Rust implementation uses native enums with pattern matching, resulting in cleaner, more maintainable code. + +#### `parser` - Query Parsing + +The parser crate handles tokenization and parsing of FQL query strings: + +- **scanner**: Tokenizes FQL queries into tokens +- **format**: Converts keyval structures back to FQL strings +- **Parser API**: `parse(input: &str) -> Result` + +**Example**: +```rust +use parser::parse; + +let query = parse("/users/(42)=<>")?; +``` + +#### `engine` - Query Execution + +The engine crate executes FQL queries against FoundationDB: + +- **Engine**: Main query executor with async/await support +- **facade**: Database abstraction layer for testing +- **stream**: Async streaming support for range queries + +**Key Features**: +- Async/await for all database operations +- Trait-based database abstraction +- Streaming results for range queries using Rust's `Stream` trait + +**Example**: +```rust +use engine::{Engine, EngineConfig}; + +let engine = Engine::new(EngineConfig::default()); +let results = engine.execute(&query).await?; +``` + +#### `cli` - Command-Line Interface + +The CLI crate provides both interactive and non-interactive modes: + +- Interactive TUI mode (to be implemented with ratatui) +- Single query execution mode +- Based on clap for argument parsing + +**Example**: +```bash +# Execute a single query +cargo run --bin cli -- "/users/(42)=<>" + +# Interactive mode +cargo run --bin cli -- interactive +``` + +## Key Advantages of the Rust Implementation + +### 1. **Type Safety with Enums** + +The Go implementation uses interfaces and the visitor pattern to simulate tagged unions. Rust's native enum support makes this much cleaner: + +**Go (visitor pattern)**: +```go +type Value interface { + Value(ValueOperation) +} + +type ValueOperation interface { + ForInt(Int) + ForString(String) + // ... many more methods +} +``` + +**Rust (native enums)**: +```rust +pub enum Value { + Int(i64), + String(String), + // ... other variants +} + +match value { + Value::Int(i) => handle_int(i), + Value::String(s) => handle_string(s), + // Compiler ensures all cases are handled +} +``` + +### 2. **Async/Await** + +The Rust implementation uses native async/await for all database operations, providing: +- Better ergonomics than Go's goroutines for this use case +- Structured concurrency +- Zero-cost abstractions + +### 3. **Ownership and Borrowing** + +Rust's ownership system prevents entire classes of bugs: +- No null pointer exceptions +- No data races +- Memory safety without garbage collection + +### 4. **Pattern Matching** + +Exhaustive pattern matching ensures all cases are handled at compile time: +```rust +match query { + Query::KeyValue(kv) => handle_keyvalue(kv), + Query::Key(key) => handle_key(key), + Query::Directory(dir) => handle_directory(dir), + // Compiler error if any variant is missing +} +``` + +### 5. **Trait System** + +The facade module uses traits for database abstraction, similar to Go interfaces but with: +- Static dispatch (zero runtime cost) +- Associated types +- Const generics + +## Building and Testing + +### Prerequisites + +- Rust 1.70+ (2021 edition) +- FoundationDB 6.2+ (for full integration) + +### Build + +```bash +# Build all crates +cargo build + +# Build with optimizations +cargo build --release + +# Build specific crate +cargo build -p keyval +``` + +### Test + +```bash +# Run all tests +cargo test + +# Run tests for specific crate +cargo test -p keyval + +# Run tests with output +cargo test -- --nocapture +``` + +### Documentation + +```bash +# Generate documentation +cargo doc --no-deps --open + +# Generate documentation for all workspace crates +cargo doc --workspace --no-deps --open +``` + +## Differences from Go Implementation + +### Architecture + +| Aspect | Go Implementation | Rust Implementation | +|--------|------------------|-------------------| +| Polymorphism | Interfaces + Visitor Pattern | Enums + Pattern Matching | +| Concurrency | Goroutines + Channels | Async/Await + Futures | +| Error Handling | Multiple returns | Result type | +| Memory Management | Garbage Collection | Ownership + Borrowing | +| Dependencies | Go modules | Cargo | + +### Code Generation + +The Go implementation uses code generation (`go generate`) to create visitor pattern boilerplate. The Rust implementation eliminates this entirely by using native enums. + +**Go**: ~4 generated files in keyval/ (~500 LOC) +**Rust**: 0 generated files (enums are first-class) + +### Lines of Code + +Estimated comparison for core functionality: + +| Component | Go LOC | Rust LOC | Reduction | +|-----------|--------|----------|-----------| +| keyval core | ~500 | ~300 | 40% | +| classification | ~250 | ~200 | 20% | +| parser/scanner | ~680 | ~400 | 41% | +| **Total** | ~1430 | ~900 | ~37% | + +The Rust implementation is more concise while being more type-safe. + +## Implementation Status + +### ✅ Completed + +- [x] Workspace structure and Cargo configuration +- [x] Core keyval data structures with enums +- [x] Query classification module +- [x] Scanner/tokenizer foundation +- [x] Format module for query formatting +- [x] Engine structure with async/await +- [x] Facade trait definitions +- [x] CLI application structure + +### 🚧 In Progress / TODO + +- [ ] Complete parser state machine implementation +- [ ] FoundationDB integration (requires fdb-rs crate) +- [ ] Tuple comparison logic implementation +- [ ] Value serialization/deserialization +- [ ] Type conversion utilities +- [ ] Streaming implementation for range queries +- [ ] Interactive TUI with ratatui +- [ ] Comprehensive test suite +- [ ] Integration tests with FoundationDB +- [ ] Performance benchmarks + +## Dependencies + +### Core Dependencies + +- **serde**: Serialization/deserialization +- **uuid**: UUID support +- **thiserror**: Error handling +- **async-trait**: Async trait methods +- **futures**: Async primitives +- **tokio**: Async runtime + +### CLI Dependencies + +- **clap**: Command-line argument parsing +- **ratatui**: Terminal UI framework (for interactive mode) +- **crossterm**: Terminal manipulation + +## Performance Considerations + +The Rust implementation is expected to have several performance advantages: + +1. **Zero-cost abstractions**: Rust's enums and pattern matching compile to efficient machine code +2. **No GC pauses**: Deterministic performance without garbage collection +3. **Inline expansion**: Extensive use of generics and monomorphization +4. **SIMD**: Potential for auto-vectorization in serialization code + +Benchmarks to be added once implementation is complete. + +## Contributing + +The Rust implementation follows standard Rust idioms and best practices: + +- Format code with `rustfmt`: `cargo fmt` +- Lint with Clippy: `cargo clippy` +- Document public APIs with doc comments (`///`) +- Write tests alongside code +- Use `Result` for fallible operations +- Prefer borrowing over cloning where possible + +## License + +Same as the parent FQL project (MIT OR Apache-2.0). + +## Acknowledgments + +This Rust implementation is a rewrite of the original Go implementation at [github.com/janderland/fql](https://github.com/janderland/fql). The architecture and API design are based on the Go version, adapted to leverage Rust's unique features. diff --git a/rust/TODO.md b/rust/TODO.md new file mode 100644 index 00000000..6401018b --- /dev/null +++ b/rust/TODO.md @@ -0,0 +1,451 @@ +# TODO: Rust Implementation Improvements + +This document outlines potential improvements to make the FQL Rust implementation more idiomatic, performant, and maintainable. + +## 🎯 High Priority - Idiomatic Rust + +### Type Safety & Error Handling + +- [ ] **Replace `String` errors with proper error types** + - `convert.rs`: Use `thiserror` for `ConversionError` instead of `Result<_, String>` + - Implement `std::error::Error` for all error types + - Add context with error source chains + +- [ ] **Implement standard traits across the board** + - `Display` for all query types (for debugging and logging) + - `FromStr` for parsing from strings (complement to `format`) + - `TryFrom`/`TryInto` for conversions between types + - `Default` where appropriate (e.g., `EngineConfig`) + - `Hash` for types that should be hashable + +- [ ] **Add newtype wrappers for semantic clarity** + ```rust + // Instead of raw Vec + pub struct PackedValue(Vec); + pub struct PackedTuple(Vec); + + // Prevents mixing up different byte representations + impl PackedValue { + pub fn as_bytes(&self) -> &[u8] { &self.0 } + } + ``` + +### API Ergonomics + +- [ ] **Builder pattern for complex types** + ```rust + // Instead of verbose construction + KeyValue::builder() + .directory(["users", "profiles"]) + .tuple([TupElement::Int(42)]) + .value(Value::String("data")) + .build() + ``` + +- [ ] **Convenience constructors with `Into`** + ```rust + impl Key { + pub fn new(dir: impl Into, tup: impl Into) -> Self { + Self { directory: dir.into(), tuple: tup.into() } + } + } + + // Allows: Key::new(vec!["users"], vec![42]) + // Via From> for Directory and From> for Tuple + ``` + +- [ ] **Method chaining for queries** + ```rust + let query = Query::new() + .in_directory(["users"]) + .with_tuple([42, "key"]) + .equals(Value::String("value")); + ``` + +- [ ] **Add `as_ref()` and `as_mut()` accessors** + - Reduces need for pattern matching when you just need inner data + - Implement for `Query`, `Value`, `TupElement`, `DirElement` + +### Zero-Copy & Performance + +- [ ] **Use `Cow<'a, str>` for strings where appropriate** + ```rust + pub enum Value<'a> { + String(Cow<'a, str>), // Can borrow or own + // ... other variants + } + ``` + +- [ ] **Add `&[u8]` variants alongside `Vec`** + ```rust + pub fn pack_into(&self, buf: &mut Vec) -> Result<(), SerializationError>; + pub fn pack_bytes(&self) -> Result<&[u8], SerializationError>; + ``` + +- [ ] **Use `SmallVec` for tuples and directories** + - Most tuples/directories are small (< 8 elements) + - Avoids heap allocation for common case + ```rust + pub type Tuple = SmallVec<[TupElement; 8]>; + pub type Directory = SmallVec<[DirElement; 4]>; + ``` + +- [ ] **Implement `serde` `zero-copy` deserialization** + - Use `#[serde(borrow)]` where possible + - Reduces allocations when deserializing + +## 🔧 Medium Priority - Robustness + +### Validation & Constraints + +- [ ] **Add validation functions** + ```rust + impl KeyValue { + pub fn validate(&self) -> Result<(), ValidationError> { + // Check for Variables in constant values + // Check MaybeMore only at end of tuple + // Check no nil values where not allowed + } + } + ``` + +- [ ] **Use typed builders with compile-time validation** + ```rust + // Type state pattern to prevent invalid queries + pub struct QueryBuilder { + _state: PhantomData, + // ... + } + + impl QueryBuilder { + pub fn value(self, v: Value) -> QueryBuilder { ... } + } + ``` + +- [ ] **Add `#[non_exhaustive]` to enums for future compatibility** + ```rust + #[non_exhaustive] + pub enum ValueType { ... } + ``` + +### Testing & Documentation + +- [ ] **Add property-based tests with `proptest`** + ```rust + proptest! { + #[test] + fn pack_unpack_roundtrip(value: Value) { + let packed = pack(&value, Endianness::Big, false)?; + let unpacked = unpack(&packed, infer_type(&value), Endianness::Big)?; + assert_eq!(value, unpacked); + } + } + ``` + +- [ ] **Add doc tests to all public functions** + - Current code has some doc comments but no testable examples + - Doc tests serve as both documentation and regression tests + +- [ ] **Add benchmark suite** + ```rust + #[bench] + fn bench_tuple_comparison(b: &mut Bencher) { + let schema = /* ... */; + let candidate = /* ... */; + b.iter(|| compare(&schema, &candidate)); + } + ``` + +- [ ] **Add integration tests** + - Test cross-crate interactions + - Test parsing → classification → execution flow + - Test format → parse roundtrips + +### Error Context + +- [ ] **Add rich error context with `miette`** + ```rust + #[derive(Error, Diagnostic)] + #[error("Tuple comparison failed")] + pub struct ComparisonError { + #[source_code] + src: String, + #[label("mismatch at this position")] + span: SourceSpan, + } + ``` + +- [ ] **Add error recovery hints** + ```rust + #[error("Cannot serialize Variable")] + #[help("Variables are placeholders for read queries. Use a concrete value for write operations.")] + CannotSerialize(String), + ``` + +## 🚀 Low Priority - Advanced Features + +### Type System Enhancements + +- [ ] **Use const generics for fixed-size arrays** + ```rust + pub struct VStamp { + pub tx_version: [u8; N], + pub user_version: u16, + } + ``` + +- [ ] **Phantom types for query classification** + ```rust + pub struct Query { + inner: QueryInner, + _class: PhantomData, + } + + pub struct Constant; + pub struct ReadRange; + // etc. + + // Type-safe API that knows query class at compile time + impl Query { + pub async fn execute_range(&self) -> Result> { ... } + } + ``` + +- [ ] **Sealed traits for closed hierarchies** + ```rust + mod private { + pub trait Sealed {} + } + + pub trait TupleElement: private::Sealed { + fn as_bytes(&self) -> Vec; + } + + // Prevents external crates from implementing TupleElement + ``` + +### Async/Await Improvements + +- [ ] **Make Engine use `async_trait` properly** + ```rust + #[async_trait] + pub trait QueryExecutor { + async fn execute(&self, query: &Query) -> Result>; + } + ``` + +- [ ] **Add `Stream` support for range queries** + ```rust + pub async fn read_range_stream(&self, kv: &KeyValue) + -> Result>> { + // Proper async streaming instead of collecting to Vec + } + ``` + +- [ ] **Add cancellation support with `tokio::select!`** + ```rust + pub async fn execute_with_timeout( + &self, + query: &Query, + timeout: Duration + ) -> Result> { + tokio::select! { + result = self.execute(query) => result, + _ = tokio::time::sleep(timeout) => Err(EngineError::Timeout), + } + } + ``` + +### Parser Improvements + +- [ ] **Implement full parser state machine** + - Currently just a placeholder + - Port the Go parser's state machine to Rust + - Use `nom` or `pest` for robust parsing + +- [ ] **Add parser error recovery** + ```rust + pub struct ParseError { + pub position: usize, + pub expected: Vec<&'static str>, + pub found: String, + } + ``` + +- [ ] **Support parser streaming** + - Parse from `Read` trait instead of just `&str` + - Useful for large queries or stdin + +### Serialization Improvements + +- [ ] **Add compression support** + ```rust + pub fn pack_compressed( + value: &Value, + compression: Compression, + ) -> Result> { ... } + ``` + +- [ ] **Add custom serialization formats** + - JSON (via serde_json) + - MessagePack + - CBOR + - Allows debugging and cross-language interop + +- [ ] **Implement FoundationDB tuple packing** + - Currently placeholder in `convert.rs` and `values.rs` + - Integrate with `foundationdb` crate + - Proper tuple layer implementation + +## 📚 Documentation Improvements + +- [ ] **Add architectural decision records (ADRs)** + - Document why enums over visitor pattern + - Document async/await choices + - Document error handling strategy + +- [ ] **Add comprehensive examples** + ```rust + //! # Examples + //! + //! ## Basic query construction + //! ``` + //! use fql::*; + //! + //! let query = Query::key_value( + //! Key::new(vec![], vec![TupElement::Int(42)]), + //! Value::String("hello".into()) + //! ); + //! ``` + ``` + +- [ ] **Add migration guide from Go** + - Show Go code → Rust equivalent + - Highlight idiom differences + - Performance comparison + +- [ ] **Add performance tuning guide** + - When to use `&str` vs `String` + - When to use `Cow` + - Allocation patterns to avoid + +## 🔐 Safety & Security + +- [ ] **Add fuzzing targets** + ```rust + #[cfg(fuzzing)] + pub fn fuzz_parse(data: &[u8]) { + if let Ok(s) = std::str::from_utf8(data) { + let _ = parser::parse(s); + } + } + ``` + +- [ ] **Add sanitization for untrusted input** + - Limit tuple depth + - Limit string lengths + - Prevent DOS via large allocations + +- [ ] **Add `#[must_use]` to important types** + ```rust + #[must_use = "Query results should be checked"] + pub async fn execute(&self, query: &Query) -> Result> + ``` + +## 🎨 Code Organization + +- [ ] **Split large files into modules** + - `values.rs` is 379 lines, split into `pack.rs` and `unpack.rs` + - `tuple.rs` is 291 lines, split into `compare.rs` and `matches.rs` + +- [ ] **Add prelude module** + ```rust + // keyval/src/prelude.rs + pub use crate::{ + Query, KeyValue, Key, Value, TupElement, Variable, + // Common traits + }; + ``` + +- [ ] **Use workspace-level Clippy configuration** + ```toml + # .cargo/config.toml + [target.'cfg(all())'] + rustflags = [ + "-W", "clippy::pedantic", + "-W", "clippy::nursery", + ] + ``` + +## 🔄 API Compatibility + +- [ ] **Add feature flags for optional functionality** + ```toml + [features] + default = ["parser", "engine"] + parser = ["nom"] + engine = ["foundationdb", "tokio"] + serde = ["serde", "serde_json"] + ``` + +- [ ] **Version compatibility** + - Document MSRV (Minimum Supported Rust Version) + - Add CI to test against MSRV + - Use `cargo-msrv` to track + +## 🧪 Testing Infrastructure + +- [ ] **Add mutation testing with `cargo-mutants`** + - Ensures tests actually catch bugs + - Finds untested code paths + +- [ ] **Add coverage tracking** + - Use `cargo-tarpaulin` or `cargo-llvm-cov` + - Track coverage over time + - Set minimum coverage thresholds + +- [ ] **Add test fixtures** + ```rust + // tests/fixtures/mod.rs + pub fn sample_queries() -> Vec { ... } + pub fn sample_keyvalues() -> Vec { ... } + ``` + +## 🌍 Ecosystem Integration + +- [ ] **Implement `slog` or `tracing` throughout** + - Add structured logging to engine + - Add debug traces to parser + - Performance instrumentation + +- [ ] **Add `clap` integration for CLI** + - Currently basic, could be more ergonomic + - Add shell completion + - Add man page generation + +- [ ] **Add `ratatui` TUI implementation** + - Currently placeholder + - Port Go Bubble Tea implementation + - Add vim-like key bindings + +--- + +## Priority Summary + +**Start Here (High Priority):** +1. Implement standard traits (Display, FromStr, TryFrom, etc.) +2. Replace String errors with proper error types +3. Add builder patterns for ergonomic construction +4. Add comprehensive doc tests + +**Next Steps (Medium Priority):** +1. Add validation functions +2. Implement property-based tests +3. Add rich error context +4. Complete parser implementation + +**Future Work (Low Priority):** +1. Advanced type system features (const generics, phantom types) +2. Performance optimizations (SmallVec, zero-copy) +3. Enhanced async support (streaming, cancellation) +4. Fuzzing and security hardening diff --git a/rust/cli/Cargo.toml b/rust/cli/Cargo.toml new file mode 100644 index 00000000..99e73a1f --- /dev/null +++ b/rust/cli/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "cli" +version.workspace = true +edition.workspace = true +authors.workspace = true +license.workspace = true + +[dependencies] +keyval = { workspace = true } +parser = { workspace = true } +engine = { workspace = true } +clap = { workspace = true } +anyhow = { workspace = true } +tokio = { workspace = true } diff --git a/rust/cli/src/main.rs b/rust/cli/src/main.rs new file mode 100644 index 00000000..0a96ce69 --- /dev/null +++ b/rust/cli/src/main.rs @@ -0,0 +1,87 @@ +//! FQL command-line interface +//! +//! Interactive and non-interactive CLI for executing FQL queries. + +use clap::{Parser as ClapParser, Subcommand}; +use anyhow::Result; + +#[derive(ClapParser)] +#[command(name = "fql")] +#[command(about = "FQL - FoundationDB Query Language", long_about = None)] +struct Cli { + #[command(subcommand)] + command: Option, + + /// FQL query to execute (non-interactive mode) + query: Option, +} + +#[derive(Subcommand)] +enum Commands { + /// Run in interactive mode (TUI) + Interactive, + + /// Execute a single query + Execute { + /// The FQL query to execute + query: String, + }, +} + +#[tokio::main] +async fn main() -> Result<()> { + let cli = Cli::parse(); + + match cli.command { + Some(Commands::Interactive) => { + run_interactive().await?; + } + Some(Commands::Execute { query }) => { + execute_query(&query).await?; + } + None => { + if let Some(query) = cli.query { + execute_query(&query).await?; + } else { + // Default to interactive mode + run_interactive().await?; + } + } + } + + Ok(()) +} + +async fn run_interactive() -> Result<()> { + println!("FQL Interactive Mode"); + println!("TODO: Implement TUI with ratatui"); + println!("Enter FQL queries (Ctrl+C to exit):"); + + // TODO: Implement interactive TUI using ratatui + // Similar to the Go implementation with Bubble Tea + + Ok(()) +} + +async fn execute_query(query_str: &str) -> Result<()> { + println!("Executing query: {}", query_str); + + // Parse the query + let query = parser::parse(query_str) + .map_err(|e| anyhow::anyhow!("Parse error: {}", e))?; + + // Create engine + let config = engine::EngineConfig::default(); + let engine = engine::Engine::new(config); + + // Execute query + let results = engine.execute(&query).await + .map_err(|e| anyhow::anyhow!("Execution error: {}", e))?; + + // Display results + for kv in results { + println!("{}", parser::format::format(&keyval::Query::KeyValue(kv))); + } + + Ok(()) +} diff --git a/rust/engine/Cargo.toml b/rust/engine/Cargo.toml new file mode 100644 index 00000000..b05318e3 --- /dev/null +++ b/rust/engine/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "engine" +version.workspace = true +edition.workspace = true +authors.workspace = true +license.workspace = true + +[dependencies] +keyval = { workspace = true } +parser = { workspace = true } +async-trait = { workspace = true } +futures = { workspace = true } +tokio = { workspace = true } +thiserror = { workspace = true } diff --git a/rust/engine/src/facade.rs b/rust/engine/src/facade.rs new file mode 100644 index 00000000..4b0788c4 --- /dev/null +++ b/rust/engine/src/facade.rs @@ -0,0 +1,29 @@ +//! Database facade/abstraction layer +//! +//! Provides traits for abstracting FoundationDB operations for testing. + +use async_trait::async_trait; +use thiserror::Error; + +#[derive(Debug, Error)] +pub enum FacadeError { + #[error("Transaction error: {0}")] + TransactionError(String), +} + +/// Trait for database transactions +#[async_trait] +pub trait Transaction { + async fn get(&self, key: &[u8]) -> Result>, FacadeError>; + async fn set(&self, key: &[u8], value: &[u8]) -> Result<(), FacadeError>; + async fn clear(&self, key: &[u8]) -> Result<(), FacadeError>; + async fn get_range(&self, begin: &[u8], end: &[u8]) -> Result, Vec)>, FacadeError>; +} + +/// Trait for creating transactions +#[async_trait] +pub trait Database { + type Transaction: Transaction; + + async fn create_transaction(&self) -> Result; +} diff --git a/rust/engine/src/lib.rs b/rust/engine/src/lib.rs new file mode 100644 index 00000000..c6013892 --- /dev/null +++ b/rust/engine/src/lib.rs @@ -0,0 +1,150 @@ +//! FQL query execution engine +//! +//! This crate provides the engine for executing FQL queries against FoundationDB. + +pub mod facade; +pub mod stream; + +use keyval::*; +use thiserror::Error; + +#[derive(Debug, Error)] +pub enum EngineError { + #[error("Execution failed: {0}")] + ExecutionFailed(String), + + #[error("Database error: {0}")] + DatabaseError(String), + + #[error("Invalid query: {0}")] + InvalidQuery(String), +} + +/// Configuration for the engine +#[derive(Debug, Clone)] +pub struct EngineConfig { + /// Endianness for numeric value serialization + pub endianness: Endianness, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Endianness { + Big, + Little, +} + +impl Default for EngineConfig { + fn default() -> Self { + Self { + endianness: Endianness::Big, + } + } +} + +/// The FQL query execution engine +pub struct Engine { + config: EngineConfig, +} + +impl Engine { + pub fn new(config: EngineConfig) -> Self { + Self { config } + } + + /// Execute a Set query (write a key-value) + pub async fn set(&self, kv: &KeyValue) -> Result<(), EngineError> { + // TODO: Implement FDB transaction and tuple packing + Ok(()) + } + + /// Execute a Clear query (delete a key) + pub async fn clear(&self, key: &Key) -> Result<(), EngineError> { + // TODO: Implement FDB clear operation + Ok(()) + } + + /// Execute a ReadSingle query (read one key-value) + pub async fn read_single(&self, kv: &KeyValue) -> Result, EngineError> { + // TODO: Implement FDB read operation + Ok(None) + } + + /// Execute a ReadRange query (read multiple key-values) + pub async fn read_range(&self, kv: &KeyValue) -> Result, EngineError> { + // TODO: Implement FDB range read with streaming + Ok(Vec::new()) + } + + /// Execute a Directory query (list directories) + pub async fn directories(&self, dir: &Directory) -> Result, EngineError> { + // TODO: Implement FDB directory listing + Ok(Vec::new()) + } + + /// Execute a generic query + pub async fn execute(&self, query: &Query) -> Result, EngineError> { + match class::classify_query(query) { + QueryClass::Set => { + if let Query::KeyValue(kv) = query { + self.set(kv).await?; + Ok(vec![kv.clone()]) + } else { + Err(EngineError::InvalidQuery("Expected KeyValue for Set".into())) + } + } + QueryClass::Clear => { + if let Query::Key(key) = query { + self.clear(key).await?; + Ok(Vec::new()) + } else { + Err(EngineError::InvalidQuery("Expected Key for Clear".into())) + } + } + QueryClass::ReadSingle => { + if let Query::KeyValue(kv) = query { + Ok(self.read_single(kv).await?.into_iter().collect()) + } else { + Err(EngineError::InvalidQuery("Expected KeyValue for ReadSingle".into())) + } + } + QueryClass::ReadRange => { + if let Query::KeyValue(kv) = query { + self.read_range(kv).await + } else { + Err(EngineError::InvalidQuery("Expected KeyValue for ReadRange".into())) + } + } + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum QueryClass { + Set, + Clear, + ReadSingle, + ReadRange, +} + +// Simplified query classification +mod class { + use keyval::*; + use super::QueryClass; + + pub fn classify_query(query: &Query) -> QueryClass { + // TODO: Use proper classification from keyval::class + match query { + Query::KeyValue(kv) => { + if matches!(kv.value, Value::Clear) { + QueryClass::Clear + } else if matches!(kv.value, Value::Variable(_)) { + QueryClass::ReadSingle + } else { + QueryClass::Set + } + } + Query::Key(_) => QueryClass::Clear, + Query::Directory(_) => QueryClass::ReadRange, + } + } +} diff --git a/rust/engine/src/stream.rs b/rust/engine/src/stream.rs new file mode 100644 index 00000000..5846fff1 --- /dev/null +++ b/rust/engine/src/stream.rs @@ -0,0 +1,14 @@ +//! Streaming support for range queries +//! +//! Provides async streaming of query results. + +use futures::stream::Stream; +use keyval::KeyValue; + +/// Create a stream of key-values from a range query +pub fn stream_range( + _results: Vec, +) -> impl Stream> { + // TODO: Implement actual async streaming with FoundationDB + futures::stream::iter(vec![]) +} diff --git a/rust/keyval/Cargo.toml b/rust/keyval/Cargo.toml new file mode 100644 index 00000000..8f29eef3 --- /dev/null +++ b/rust/keyval/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "keyval" +version.workspace = true +edition.workspace = true +authors.workspace = true +license.workspace = true + +[dependencies] +serde = { workspace = true } +serde_json = { workspace = true } +serde_bytes = "0.11" +uuid = { workspace = true } +thiserror = { workspace = true } diff --git a/rust/keyval/src/class.rs b/rust/keyval/src/class.rs new file mode 100644 index 00000000..bae760db --- /dev/null +++ b/rust/keyval/src/class.rs @@ -0,0 +1,273 @@ +//! Query classification module. +//! +//! This module classifies a KeyValue by the kind of operation it represents. +//! Classifications include: Constant, VStampKey, VStampVal, Clear, ReadSingle, and ReadRange. + +use crate::*; +use std::fmt; + +/// Classification of a KeyValue based on its structure and contents. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum Class { + /// KeyValue with no Variable, MaybeMore, Clear, or VStampFuture. + /// Can be used for set operations or returned by get operations. + Constant, + + /// Constant KeyValue with a VStampFuture in the key. + /// Can only be used for set operations. + VStampKey, + + /// Constant KeyValue with a VStampFuture in the value. + /// Can only be used for set operations. + VStampVal, + + /// KeyValue with Clear as its value and no Variable, MaybeMore, or VStampFuture. + /// Used for clear/delete operations. + Clear, + + /// KeyValue with Variable as its value but no Variable or MaybeMore in its key. + /// Returns a single KeyValue. + ReadSingle, + + /// KeyValue with Variable or MaybeMore in its key. + /// Returns multiple KeyValues. + ReadRange, + + /// Invalid KeyValue with conflicting attributes. + Invalid(String), +} + +impl fmt::Display for Class { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Class::Constant => write!(f, "constant"), + Class::VStampKey => write!(f, "vstampkey"), + Class::VStampVal => write!(f, "vstampval"), + Class::Clear => write!(f, "clear"), + Class::ReadSingle => write!(f, "single"), + Class::ReadRange => write!(f, "range"), + Class::Invalid(msg) => write!(f, "invalid[{}]", msg), + } + } +} + +/// Classify a KeyValue based on its structure. +pub fn classify(kv: &KeyValue) -> Class { + let dir_attr = get_attributes_of_dir(&kv.key.directory); + let key_attr = dir_attr.merge(&get_attributes_of_tup(&kv.key.tuple)); + let kv_attr = key_attr.merge(&get_attributes_of_val(&kv.value)); + + // KeyValues should never contain nil (in Go sense - we don't have this issue in Rust) + // But we track it for compatibility + if kv_attr.has_nil { + return invalid_class(&kv_attr); + } + + // KeyValues should contain at most 1 VStampFuture + if kv_attr.vstamp_futures > 1 { + return invalid_class(&kv_attr); + } + + // Ensure at most one of these conditions is true + let count = [ + kv_attr.vstamp_futures > 0, + kv_attr.has_variable, + kv_attr.has_clear, + ] + .iter() + .filter(|&&x| x) + .count(); + + if count > 1 { + return invalid_class(&kv_attr); + } + + match () { + _ if key_attr.has_variable => Class::ReadRange, + _ if kv_attr.has_variable => Class::ReadSingle, + _ if kv_attr.vstamp_futures > 0 => { + if key_attr.vstamp_futures > 0 { + Class::VStampKey + } else { + Class::VStampVal + } + } + _ if kv_attr.has_clear => Class::Clear, + _ => Class::Constant, + } +} + +/// Attributes describing the characteristics of a KeyValue relevant to classification. +#[derive(Debug, Clone, Copy, Default)] +struct Attributes { + vstamp_futures: usize, + has_variable: bool, + has_clear: bool, + has_nil: bool, +} + +impl Attributes { + /// Merge attributes of parts to infer attributes of the whole. + fn merge(&self, other: &Attributes) -> Attributes { + Attributes { + vstamp_futures: self.vstamp_futures + other.vstamp_futures, + has_variable: self.has_variable || other.has_variable, + has_clear: self.has_clear || other.has_clear, + has_nil: self.has_nil || other.has_nil, + } + } +} + +/// Get attributes of a directory. +fn get_attributes_of_dir(dir: &Directory) -> Attributes { + let mut attr = Attributes::default(); + for element in dir { + match element { + DirElement::String(_) => {} + DirElement::Variable(_) => attr.has_variable = true, + } + } + attr +} + +/// Get attributes of a tuple. +fn get_attributes_of_tup(tup: &Tuple) -> Attributes { + let mut attr = Attributes::default(); + for element in tup { + let sub_attr = match element { + TupElement::Tuple(t) => get_attributes_of_tup(t), + TupElement::Variable(_) => Attributes { + has_variable: true, + ..Default::default() + }, + TupElement::MaybeMore => Attributes { + has_variable: true, + ..Default::default() + }, + TupElement::VStampFuture(_) => Attributes { + vstamp_futures: 1, + ..Default::default() + }, + _ => Attributes::default(), + }; + attr = attr.merge(&sub_attr); + } + attr +} + +/// Get attributes of a value. +fn get_attributes_of_val(val: &Value) -> Attributes { + match val { + Value::Tuple(t) => get_attributes_of_tup(t), + Value::Variable(_) => Attributes { + has_variable: true, + ..Default::default() + }, + Value::Clear => Attributes { + has_clear: true, + ..Default::default() + }, + Value::VStampFuture(_) => Attributes { + vstamp_futures: 1, + ..Default::default() + }, + _ => Attributes::default(), + } +} + +/// Create an Invalid class with relevant attributes for debugging. +fn invalid_class(attr: &Attributes) -> Class { + let mut parts = Vec::new(); + + if attr.vstamp_futures > 0 { + parts.push(format!("vstamps:{}", attr.vstamp_futures)); + } + if attr.has_variable { + parts.push("var".to_string()); + } + if attr.has_clear { + parts.push("clear".to_string()); + } + if attr.has_nil { + parts.push("nil".to_string()); + } + + Class::Invalid(parts.join(",")) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_classify_constant() { + let kv = KeyValue { + key: Key { + directory: vec![], + tuple: vec![TupElement::Int(42)], + }, + value: Value::String("test".to_string()), + }; + assert_eq!(classify(&kv), Class::Constant); + } + + #[test] + fn test_classify_clear() { + let kv = KeyValue { + key: Key { + directory: vec![], + tuple: vec![TupElement::Int(42)], + }, + value: Value::Clear, + }; + assert_eq!(classify(&kv), Class::Clear); + } + + #[test] + fn test_classify_read_single() { + let kv = KeyValue { + key: Key { + directory: vec![], + tuple: vec![TupElement::Int(42)], + }, + value: Value::Variable(Variable::any()), + }; + assert_eq!(classify(&kv), Class::ReadSingle); + } + + #[test] + fn test_classify_read_range() { + let kv = KeyValue { + key: Key { + directory: vec![], + tuple: vec![TupElement::Int(42), TupElement::MaybeMore], + }, + value: Value::Variable(Variable::any()), + }; + assert_eq!(classify(&kv), Class::ReadRange); + } + + #[test] + fn test_classify_vstamp_key() { + let kv = KeyValue { + key: Key { + directory: vec![], + tuple: vec![TupElement::VStampFuture(VStampFuture { user_version: 0 })], + }, + value: Value::Int(42), + }; + assert_eq!(classify(&kv), Class::VStampKey); + } + + #[test] + fn test_classify_vstamp_val() { + let kv = KeyValue { + key: Key { + directory: vec![], + tuple: vec![TupElement::Int(42)], + }, + value: Value::VStampFuture(VStampFuture { user_version: 0 }), + }; + assert_eq!(classify(&kv), Class::VStampVal); + } +} diff --git a/rust/keyval/src/convert.rs b/rust/keyval/src/convert.rs new file mode 100644 index 00000000..18ac8be8 --- /dev/null +++ b/rust/keyval/src/convert.rs @@ -0,0 +1,42 @@ +//! Type conversion utilities for FQL and FoundationDB types. +//! +//! This module provides conversions between FQL types and FoundationDB tuple types. +//! Note: Full implementation requires FoundationDB tuple library integration. + +use crate::*; + +/// Convert an FQL tuple to FoundationDB tuple format. +/// +/// TODO: This requires integration with FoundationDB's tuple layer. +/// For now, this is a placeholder that would be implemented with the `foundationdb` crate. +pub fn to_fdb_tuple(_tuple: &Tuple) -> Result, String> { + // Placeholder for FDB tuple conversion + // In a real implementation, this would use foundationdb::tuple::pack() + Ok(vec![]) +} + +/// Convert from FoundationDB tuple format to FQL tuple. +/// +/// TODO: This requires integration with FoundationDB's tuple layer. +/// For now, this is a placeholder that would be implemented with the `foundationdb` crate. +pub fn from_fdb_tuple(_data: &[u8]) -> Result { + // Placeholder for FDB tuple conversion + // In a real implementation, this would use foundationdb::tuple::unpack() + Ok(vec![]) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_placeholder_conversions() { + // These are placeholder tests + let tuple = vec![TupElement::Int(42)]; + let result = to_fdb_tuple(&tuple); + assert!(result.is_ok()); + + let result = from_fdb_tuple(&[]); + assert!(result.is_ok()); + } +} diff --git a/rust/keyval/src/lib.rs b/rust/keyval/src/lib.rs new file mode 100644 index 00000000..97c20750 --- /dev/null +++ b/rust/keyval/src/lib.rs @@ -0,0 +1,296 @@ +//! Core data structures for FQL queries and key-values. +//! +//! This crate contains types representing key-values and related utilities. +//! These types model both queries and the data returned by queries. They can +//! be constructed from query strings using the parser crate, but are also +//! designed to be easily constructed directly in Rust source code. +//! +//! # Overview +//! +//! Unlike the Go implementation which uses the visitor pattern to work around +//! the lack of tagged unions, this Rust implementation uses enums directly, +//! providing type-safe polymorphism with pattern matching. + +use serde::{Deserialize, Serialize}; +use std::fmt; + +pub mod class; +pub mod convert; +pub mod tuple; +pub mod values; + +/// A query that can be passed to the engine. This includes KeyValue, Key, and Directory queries. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(tag = "type", rename_all = "lowercase")] +pub enum Query { + /// A key-value pair query (read or write) + KeyValue(KeyValue), + /// A key-only query (equivalent to KeyValue with empty Variable) + Key(Key), + /// A directory listing query + Directory(Directory), +} + +/// A key-value pair that can be passed as a query or returned as a result. +/// When returned as a result, it will not contain Variable, Clear, or MaybeMore. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct KeyValue { + pub key: Key, + pub value: Value, +} + +/// A key consisting of a directory path and tuple. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct Key { + pub directory: Directory, + pub tuple: Tuple, +} + +/// A directory path consisting of elements that can be strings or variables. +pub type Directory = Vec; + +/// An element in a directory path - either a concrete string or a variable placeholder. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(tag = "type", rename_all = "lowercase")] +pub enum DirElement { + String(String), + Variable(Variable), +} + +/// A tuple of elements. In FQL, tuples are the basic building blocks of keys. +pub type Tuple = Vec; + +/// An element that can appear in a tuple. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(tag = "type", rename_all = "lowercase")] +pub enum TupElement { + /// A nested tuple + Tuple(Tuple), + /// Nil/empty element + Nil, + /// Signed 64-bit integer + Int(i64), + /// Unsigned 64-bit integer + Uint(u64), + /// Boolean value + Bool(bool), + /// 64-bit floating point + Float(f64), + /// UTF-8 string + String(String), + /// UUID (16 bytes) + Uuid(uuid::Uuid), + /// Arbitrary bytes + #[serde(with = "serde_bytes")] + Bytes(Vec), + /// Variable placeholder with type constraints + Variable(Variable), + /// Special marker allowing additional tuple elements (only valid as last element) + MaybeMore, + /// Versionstamp with transaction and user versions + VStamp(VStamp), + /// Future versionstamp (assigned at commit time) + VStampFuture(VStampFuture), +} + +/// A value that can be stored in a key-value pair. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(tag = "type", rename_all = "lowercase")] +pub enum Value { + /// A tuple value + Tuple(Tuple), + /// Nil/empty value + Nil, + /// Signed 64-bit integer + Int(i64), + /// Unsigned 64-bit integer + Uint(u64), + /// Boolean value + Bool(bool), + /// 64-bit floating point + Float(f64), + /// UTF-8 string + String(String), + /// UUID (16 bytes) + Uuid(uuid::Uuid), + /// Arbitrary bytes + #[serde(with = "serde_bytes")] + Bytes(Vec), + /// Variable placeholder with type constraints + Variable(Variable), + /// Clear marker (delete this key) + Clear, + /// Versionstamp with transaction and user versions + VStamp(VStamp), + /// Future versionstamp (assigned at commit time) + VStampFuture(VStampFuture), +} + +/// A variable placeholder defining a schema. Variables can have type constraints. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct Variable { + /// Type constraints for this variable. Empty means any type is allowed. + pub types: Vec, +} + +impl Variable { + /// Create a new variable that accepts any type. + pub fn any() -> Self { + Self { types: Vec::new() } + } + + /// Create a new variable with specific type constraints. + pub fn with_types(types: Vec) -> Self { + Self { types } + } +} + +/// Type constraints for variables. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] +#[serde(rename_all = "lowercase")] +pub enum ValueType { + /// Any type is allowed (default) + #[serde(rename = "")] + Any, + /// Signed integer + Int, + /// Unsigned integer + Uint, + /// Boolean + Bool, + /// Floating point + Float, + /// String + String, + /// Bytes + Bytes, + /// UUID + Uuid, + /// Tuple + Tuple, + /// Versionstamp + VStamp, +} + +impl ValueType { + /// Get all possible value types. + pub fn all() -> Vec { + vec![ + ValueType::Any, + ValueType::Int, + ValueType::Uint, + ValueType::Bool, + ValueType::Float, + ValueType::String, + ValueType::Bytes, + ValueType::Uuid, + ValueType::Tuple, + ValueType::VStamp, + ] + } +} + +impl fmt::Display for ValueType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + ValueType::Any => write!(f, ""), + ValueType::Int => write!(f, "int"), + ValueType::Uint => write!(f, "uint"), + ValueType::Bool => write!(f, "bool"), + ValueType::Float => write!(f, "float"), + ValueType::String => write!(f, "string"), + ValueType::Bytes => write!(f, "bytes"), + ValueType::Uuid => write!(f, "uuid"), + ValueType::Tuple => write!(f, "tuple"), + ValueType::VStamp => write!(f, "vstamp"), + } + } +} + +/// A versionstamp represents a point in time in the database. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +pub struct VStamp { + /// The 10-byte transaction version assigned by FoundationDB + pub tx_version: [u8; 10], + /// User-defined 16-bit version + pub user_version: u16, +} + +/// A future versionstamp that will be assigned when the transaction commits. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +pub struct VStampFuture { + /// User-defined 16-bit version + pub user_version: u16, +} + +// Convenience constructors +impl Query { + pub fn key_value(key: Key, value: Value) -> Self { + Query::KeyValue(KeyValue { key, value }) + } + + pub fn key(directory: Directory, tuple: Tuple) -> Self { + Query::Key(Key { directory, tuple }) + } + + pub fn directory(directory: Directory) -> Self { + Query::Directory(directory) + } +} + +impl KeyValue { + pub fn new(key: Key, value: Value) -> Self { + Self { key, value } + } +} + +impl Key { + pub fn new(directory: Directory, tuple: Tuple) -> Self { + Self { directory, tuple } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_variable_any() { + let var = Variable::any(); + assert_eq!(var.types.len(), 0); + } + + #[test] + fn test_variable_with_types() { + let var = Variable::with_types(vec![ValueType::Int, ValueType::String]); + assert_eq!(var.types.len(), 2); + } + + #[test] + fn test_query_constructors() { + let dir = vec![DirElement::String("test".to_string())]; + let tuple = vec![TupElement::Int(42)]; + let key = Key::new(dir.clone(), tuple); + let value = Value::Int(100); + + let q1 = Query::key_value(key.clone(), value); + match q1 { + Query::KeyValue(kv) => { + assert_eq!(kv.value, Value::Int(100)); + } + _ => panic!("Expected KeyValue"), + } + + let q2 = Query::key(dir.clone(), vec![]); + match q2 { + Query::Key(_) => {} + _ => panic!("Expected Key"), + } + + let q3 = Query::directory(dir); + match q3 { + Query::Directory(_) => {} + _ => panic!("Expected Directory"), + } + } +} diff --git a/rust/keyval/src/tuple.rs b/rust/keyval/src/tuple.rs new file mode 100644 index 00000000..3dc5f33e --- /dev/null +++ b/rust/keyval/src/tuple.rs @@ -0,0 +1,291 @@ +//! Tuple comparison and matching logic. +//! +//! This module provides functionality to compare candidate tuples against +//! schema tuples that may contain Variables and MaybeMore wildcards. + +use crate::*; + +/// Compare a candidate tuple against a schema tuple. +/// +/// The schema tuple may contain Variable or MaybeMore elements, while the candidate +/// must not contain either. Returns None if the tuples match, or Some with the +/// index path to the first mismatching element. +/// +/// # Examples +/// +/// ``` +/// use keyval::*; +/// use keyval::tuple::compare; +/// +/// let schema = vec![TupElement::Int(1), TupElement::Variable(Variable::any())]; +/// let candidate = vec![TupElement::Int(1), TupElement::String("hello".to_string())]; +/// assert_eq!(compare(&schema, &candidate), None); // Matches +/// +/// let bad_candidate = vec![TupElement::Int(2), TupElement::String("hello".to_string())]; +/// assert_eq!(compare(&schema, &bad_candidate), Some(vec![0])); // First element mismatch +/// ``` +pub fn compare(schema: &Tuple, candidate: &Tuple) -> Option> { + // If the schema is empty, the candidate must be empty as well + if schema.is_empty() { + return if candidate.is_empty() { + None + } else { + Some(vec![0]) + }; + } + + // Check if schema ends with MaybeMore + let has_maybe_more = matches!(schema.last(), Some(TupElement::MaybeMore)); + let schema_to_check = if has_maybe_more { + &schema[..schema.len() - 1] + } else { + schema + }; + + // If no MaybeMore, lengths must match or candidate must be shorter + if !has_maybe_more && schema_to_check.len() < candidate.len() { + return Some(vec![schema_to_check.len()]); + } + + // Candidate must be at least as long as schema (minus MaybeMore) + if schema_to_check.len() > candidate.len() { + return Some(vec![candidate.len()]); + } + + // Compare each element + for (i, schema_elem) in schema_to_check.iter().enumerate() { + if let Some(mismatch) = compare_element(schema_elem, &candidate[i], i) { + return Some(mismatch); + } + } + + None +} + +/// Compare a single tuple element against a candidate element +fn compare_element(schema: &TupElement, candidate: &TupElement, index: usize) -> Option> { + match schema { + TupElement::Tuple(schema_tup) => { + if let TupElement::Tuple(cand_tup) = candidate { + if let Some(mut mismatch) = compare(schema_tup, cand_tup) { + mismatch.insert(0, index); + return Some(mismatch); + } + None + } else { + Some(vec![index]) + } + } + TupElement::Variable(var) => { + // Empty variable means any type is allowed + if var.types.is_empty() { + return None; + } + + // Check if candidate matches any of the allowed types + for vtype in &var.types { + if matches_type(candidate, *vtype) { + return None; + } + } + Some(vec![index]) + } + TupElement::MaybeMore => { + // MaybeMore should have been removed before comparison + Some(vec![index]) + } + // For all other types, check equality + TupElement::Nil => { + if matches!(candidate, TupElement::Nil) { + None + } else { + Some(vec![index]) + } + } + TupElement::Int(v) => { + if matches!(candidate, TupElement::Int(c) if c == v) { + None + } else { + Some(vec![index]) + } + } + TupElement::Uint(v) => { + if matches!(candidate, TupElement::Uint(c) if c == v) { + None + } else { + Some(vec![index]) + } + } + TupElement::Bool(v) => { + if matches!(candidate, TupElement::Bool(c) if c == v) { + None + } else { + Some(vec![index]) + } + } + TupElement::Float(v) => { + if matches!(candidate, TupElement::Float(c) if (c - v).abs() < f64::EPSILON) { + None + } else { + Some(vec![index]) + } + } + TupElement::String(v) => { + if matches!(candidate, TupElement::String(c) if c == v) { + None + } else { + Some(vec![index]) + } + } + TupElement::Uuid(v) => { + if matches!(candidate, TupElement::Uuid(c) if c == v) { + None + } else { + Some(vec![index]) + } + } + TupElement::Bytes(v) => { + if matches!(candidate, TupElement::Bytes(c) if c == v) { + None + } else { + Some(vec![index]) + } + } + TupElement::VStamp(v) => { + if matches!(candidate, TupElement::VStamp(c) if c == v) { + None + } else { + Some(vec![index]) + } + } + TupElement::VStampFuture(v) => { + if matches!(candidate, TupElement::VStampFuture(c) if c == v) { + None + } else { + Some(vec![index]) + } + } + } +} + +/// Check if a tuple element matches a specific value type +fn matches_type(elem: &TupElement, vtype: ValueType) -> bool { + match vtype { + ValueType::Any => true, + ValueType::Int => matches!(elem, TupElement::Int(_)), + ValueType::Uint => matches!(elem, TupElement::Uint(_)), + ValueType::Bool => matches!(elem, TupElement::Bool(_)), + ValueType::Float => matches!(elem, TupElement::Float(_)), + ValueType::String => matches!(elem, TupElement::String(_)), + ValueType::Bytes => matches!(elem, TupElement::Bytes(_)), + ValueType::Uuid => matches!(elem, TupElement::Uuid(_)), + ValueType::Tuple => matches!(elem, TupElement::Tuple(_)), + ValueType::VStamp => matches!(elem, TupElement::VStamp(_)), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_compare_empty_tuples() { + let schema = vec![]; + let candidate = vec![]; + assert_eq!(compare(&schema, &candidate), None); + } + + #[test] + fn test_compare_empty_schema_nonempty_candidate() { + let schema = vec![]; + let candidate = vec![TupElement::Int(1)]; + assert_eq!(compare(&schema, &candidate), Some(vec![0])); + } + + #[test] + fn test_compare_exact_match() { + let schema = vec![TupElement::Int(42), TupElement::String("hello".to_string())]; + let candidate = vec![TupElement::Int(42), TupElement::String("hello".to_string())]; + assert_eq!(compare(&schema, &candidate), None); + } + + #[test] + fn test_compare_mismatch() { + let schema = vec![TupElement::Int(42), TupElement::String("hello".to_string())]; + let candidate = vec![TupElement::Int(43), TupElement::String("hello".to_string())]; + assert_eq!(compare(&schema, &candidate), Some(vec![0])); + } + + #[test] + fn test_compare_with_variable_any() { + let schema = vec![TupElement::Int(42), TupElement::Variable(Variable::any())]; + let candidate = vec![TupElement::Int(42), TupElement::String("anything".to_string())]; + assert_eq!(compare(&schema, &candidate), None); + } + + #[test] + fn test_compare_with_variable_type_constraint() { + let schema = vec![ + TupElement::Int(42), + TupElement::Variable(Variable::with_types(vec![ValueType::String])), + ]; + let candidate1 = vec![TupElement::Int(42), TupElement::String("ok".to_string())]; + assert_eq!(compare(&schema, &candidate1), None); + + let candidate2 = vec![TupElement::Int(42), TupElement::Int(99)]; + assert_eq!(compare(&schema, &candidate2), Some(vec![1])); + } + + #[test] + fn test_compare_with_maybe_more() { + let schema = vec![TupElement::Int(42), TupElement::MaybeMore]; + let candidate = vec![ + TupElement::Int(42), + TupElement::String("extra".to_string()), + TupElement::Bool(true), + ]; + assert_eq!(compare(&schema, &candidate), None); + } + + #[test] + fn test_compare_without_maybe_more_length_mismatch() { + let schema = vec![TupElement::Int(42)]; + let candidate = vec![TupElement::Int(42), TupElement::String("extra".to_string())]; + assert_eq!(compare(&schema, &candidate), Some(vec![1])); + } + + #[test] + fn test_compare_nested_tuples() { + let schema = vec![ + TupElement::Int(1), + TupElement::Tuple(vec![ + TupElement::String("nested".to_string()), + TupElement::Variable(Variable::any()), + ]), + ]; + let candidate = vec![ + TupElement::Int(1), + TupElement::Tuple(vec![ + TupElement::String("nested".to_string()), + TupElement::Bool(true), + ]), + ]; + assert_eq!(compare(&schema, &candidate), None); + } + + #[test] + fn test_compare_nested_tuples_mismatch() { + let schema = vec![ + TupElement::Int(1), + TupElement::Tuple(vec![TupElement::String("nested".to_string()), TupElement::Int(99)]), + ]; + let candidate = vec![ + TupElement::Int(1), + TupElement::Tuple(vec![ + TupElement::String("nested".to_string()), + TupElement::Int(100), + ]), + ]; + assert_eq!(compare(&schema, &candidate), Some(vec![1, 1])); + } +} diff --git a/rust/keyval/src/values.rs b/rust/keyval/src/values.rs new file mode 100644 index 00000000..5008ae4b --- /dev/null +++ b/rust/keyval/src/values.rs @@ -0,0 +1,378 @@ +//! Value serialization and deserialization. +//! +//! This module provides serialization and deserialization of FQL values to/from bytes +//! for storage in FoundationDB. + +use crate::*; +use thiserror::Error; + +#[derive(Debug, Error)] +pub enum SerializationError { + #[error("Serialization failed: {0}")] + Failed(String), + + #[error("Cannot serialize {0}")] + CannotSerialize(String), + + #[error("Invalid data length: expected {expected}, got {actual}")] + InvalidLength { expected: usize, actual: usize }, + + #[error("Unknown value type: {0}")] + UnknownType(String), +} + +/// Endianness configuration for numeric value serialization. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Endianness { + Big, + Little, +} + +/// Pack a value into bytes for storage. +/// +/// # Arguments +/// * `value` - The value to serialize +/// * `endianness` - Byte order for numeric types +/// * `has_vstamp` - Whether the value contains a versionstamp +/// +/// # Returns +/// The serialized bytes, or an error if serialization fails. +/// +/// # Errors +/// Returns an error if: +/// - The value is a Variable or Clear (cannot be serialized) +/// - Tuple conversion fails +pub fn pack(value: &Value, endianness: Endianness, has_vstamp: bool) -> Result, SerializationError> { + match value { + Value::Nil => Ok(vec![]), + + Value::Bool(b) => Ok(vec![if *b { 1 } else { 0 }]), + + Value::Int(i) => { + let mut bytes = vec![0u8; 8]; + match endianness { + Endianness::Big => { + bytes.copy_from_slice(&(*i as u64).to_be_bytes()); + } + Endianness::Little => { + bytes.copy_from_slice(&(*i as u64).to_le_bytes()); + } + } + Ok(bytes) + } + + Value::Uint(u) => { + let mut bytes = vec![0u8; 8]; + match endianness { + Endianness::Big => { + bytes.copy_from_slice(&u.to_be_bytes()); + } + Endianness::Little => { + bytes.copy_from_slice(&u.to_le_bytes()); + } + } + Ok(bytes) + } + + Value::Float(f) => { + let bits = f.to_bits(); + let mut bytes = vec![0u8; 8]; + match endianness { + Endianness::Big => { + bytes.copy_from_slice(&bits.to_be_bytes()); + } + Endianness::Little => { + bytes.copy_from_slice(&bits.to_le_bytes()); + } + } + Ok(bytes) + } + + Value::String(s) => Ok(s.as_bytes().to_vec()), + + Value::Bytes(b) => Ok(b.clone()), + + Value::Uuid(u) => Ok(u.as_bytes().to_vec()), + + Value::Tuple(_tup) => { + // TODO: Implement FDB tuple packing + // For now, use a simple placeholder serialization + // In a real implementation, this would use FoundationDB's tuple layer + if has_vstamp { + Err(SerializationError::Failed( + "Versionstamp tuple packing not yet implemented".to_string() + )) + } else { + // Placeholder: just serialize as empty for now + Ok(vec![]) + } + } + + Value::VStamp(vstamp) => { + // VStamp is 12 bytes: 10 bytes tx_version + 2 bytes user_version + let mut bytes = vec![0u8; 12]; + bytes[0..10].copy_from_slice(&vstamp.tx_version); + bytes[10..12].copy_from_slice(&vstamp.user_version.to_le_bytes()); + Ok(bytes) + } + + Value::VStampFuture(vstamp) => { + // VStampFuture is 16 bytes: 10 bytes (zeros) + 2 bytes user_version + 4 bytes position + let mut bytes = vec![0u8; 16]; + bytes[10..12].copy_from_slice(&vstamp.user_version.to_le_bytes()); + Ok(bytes) + } + + Value::Variable(_) => { + Err(SerializationError::CannotSerialize("Variable".to_string())) + } + + Value::Clear => { + Err(SerializationError::CannotSerialize("Clear".to_string())) + } + } +} + +/// Unpack bytes into a value with a type hint. +/// +/// # Arguments +/// * `data` - The bytes to deserialize +/// * `value_type` - Type hint for deserialization +/// * `endianness` - Byte order for numeric types +/// +/// # Returns +/// The deserialized value, or an error if deserialization fails. +/// +/// # Errors +/// Returns an error if: +/// - The data length doesn't match the expected length for the type +/// - The value type is unknown +pub fn unpack(data: &[u8], value_type: ValueType, endianness: Endianness) -> Result { + match value_type { + ValueType::Any => Ok(Value::Bytes(data.to_vec())), + + ValueType::Bool => { + if data.len() != 1 { + return Err(SerializationError::InvalidLength { + expected: 1, + actual: data.len(), + }); + } + Ok(Value::Bool(data[0] == 1)) + } + + ValueType::Int => { + if data.len() != 8 { + return Err(SerializationError::InvalidLength { + expected: 8, + actual: data.len(), + }); + } + let value = match endianness { + Endianness::Big => u64::from_be_bytes(data.try_into().unwrap()), + Endianness::Little => u64::from_le_bytes(data.try_into().unwrap()), + }; + Ok(Value::Int(value as i64)) + } + + ValueType::Uint => { + if data.len() != 8 { + return Err(SerializationError::InvalidLength { + expected: 8, + actual: data.len(), + }); + } + let value = match endianness { + Endianness::Big => u64::from_be_bytes(data.try_into().unwrap()), + Endianness::Little => u64::from_le_bytes(data.try_into().unwrap()), + }; + Ok(Value::Uint(value)) + } + + ValueType::Float => { + if data.len() != 8 { + return Err(SerializationError::InvalidLength { + expected: 8, + actual: data.len(), + }); + } + let bits = match endianness { + Endianness::Big => u64::from_be_bytes(data.try_into().unwrap()), + Endianness::Little => u64::from_le_bytes(data.try_into().unwrap()), + }; + Ok(Value::Float(f64::from_bits(bits))) + } + + ValueType::String => { + let s = String::from_utf8(data.to_vec()) + .map_err(|e| SerializationError::Failed(format!("Invalid UTF-8: {}", e)))?; + Ok(Value::String(s)) + } + + ValueType::Bytes => Ok(Value::Bytes(data.to_vec())), + + ValueType::Uuid => { + if data.len() != 16 { + return Err(SerializationError::InvalidLength { + expected: 16, + actual: data.len(), + }); + } + let uuid = uuid::Uuid::from_slice(data) + .map_err(|e| SerializationError::Failed(format!("Invalid UUID: {}", e)))?; + Ok(Value::Uuid(uuid)) + } + + ValueType::Tuple => { + // TODO: Implement FDB tuple unpacking + // For now, return an empty tuple as placeholder + Ok(Value::Tuple(vec![])) + } + + ValueType::VStamp => { + if data.len() != 12 { + return Err(SerializationError::InvalidLength { + expected: 12, + actual: data.len(), + }); + } + let mut tx_version = [0u8; 10]; + tx_version.copy_from_slice(&data[0..10]); + let user_version = u16::from_le_bytes([data[10], data[11]]); + Ok(Value::VStamp(VStamp { + tx_version, + user_version, + })) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_pack_unpack_nil() { + let value = Value::Nil; + let packed = pack(&value, Endianness::Big, false).unwrap(); + assert_eq!(packed, vec![]); + } + + #[test] + fn test_pack_unpack_bool() { + let value_true = Value::Bool(true); + let packed_true = pack(&value_true, Endianness::Big, false).unwrap(); + assert_eq!(packed_true, vec![1]); + let unpacked_true = unpack(&packed_true, ValueType::Bool, Endianness::Big).unwrap(); + assert_eq!(unpacked_true, value_true); + + let value_false = Value::Bool(false); + let packed_false = pack(&value_false, Endianness::Big, false).unwrap(); + assert_eq!(packed_false, vec![0]); + let unpacked_false = unpack(&packed_false, ValueType::Bool, Endianness::Big).unwrap(); + assert_eq!(unpacked_false, value_false); + } + + #[test] + fn test_pack_unpack_int() { + let value = Value::Int(42); + let packed = pack(&value, Endianness::Big, false).unwrap(); + assert_eq!(packed.len(), 8); + let unpacked = unpack(&packed, ValueType::Int, Endianness::Big).unwrap(); + assert_eq!(unpacked, value); + } + + #[test] + fn test_pack_unpack_uint() { + let value = Value::Uint(12345); + let packed = pack(&value, Endianness::Little, false).unwrap(); + assert_eq!(packed.len(), 8); + let unpacked = unpack(&packed, ValueType::Uint, Endianness::Little).unwrap(); + assert_eq!(unpacked, value); + } + + #[test] + fn test_pack_unpack_float() { + let value = Value::Float(3.14159); + let packed = pack(&value, Endianness::Big, false).unwrap(); + assert_eq!(packed.len(), 8); + let unpacked = unpack(&packed, ValueType::Float, Endianness::Big).unwrap(); + if let Value::Float(f) = unpacked { + assert!((f - 3.14159).abs() < 1e-10); + } else { + panic!("Expected Float"); + } + } + + #[test] + fn test_pack_unpack_string() { + let value = Value::String("hello world".to_string()); + let packed = pack(&value, Endianness::Big, false).unwrap(); + let unpacked = unpack(&packed, ValueType::String, Endianness::Big).unwrap(); + assert_eq!(unpacked, value); + } + + #[test] + fn test_pack_unpack_bytes() { + let value = Value::Bytes(vec![1, 2, 3, 4, 5]); + let packed = pack(&value, Endianness::Big, false).unwrap(); + assert_eq!(packed, vec![1, 2, 3, 4, 5]); + let unpacked = unpack(&packed, ValueType::Bytes, Endianness::Big).unwrap(); + assert_eq!(unpacked, value); + } + + #[test] + fn test_pack_unpack_uuid() { + let uuid = uuid::Uuid::new_v4(); + let value = Value::Uuid(uuid); + let packed = pack(&value, Endianness::Big, false).unwrap(); + assert_eq!(packed.len(), 16); + let unpacked = unpack(&packed, ValueType::Uuid, Endianness::Big).unwrap(); + assert_eq!(unpacked, value); + } + + #[test] + fn test_pack_unpack_vstamp() { + let vstamp = VStamp { + tx_version: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + user_version: 42, + }; + let value = Value::VStamp(vstamp); + let packed = pack(&value, Endianness::Big, false).unwrap(); + assert_eq!(packed.len(), 12); + let unpacked = unpack(&packed, ValueType::VStamp, Endianness::Big).unwrap(); + assert_eq!(unpacked, value); + } + + #[test] + fn test_pack_variable_fails() { + let value = Value::Variable(Variable::any()); + let result = pack(&value, Endianness::Big, false); + assert!(result.is_err()); + } + + #[test] + fn test_pack_clear_fails() { + let value = Value::Clear; + let result = pack(&value, Endianness::Big, false); + assert!(result.is_err()); + } + + #[test] + fn test_endianness_difference() { + let value = Value::Int(0x0102030405060708u64 as i64); + + let packed_big = pack(&value, Endianness::Big, false).unwrap(); + let packed_little = pack(&value, Endianness::Little, false).unwrap(); + + // Different endianness should produce different bytes + assert_ne!(packed_big, packed_little); + + // But unpacking with correct endianness should give same value + let unpacked_big = unpack(&packed_big, ValueType::Int, Endianness::Big).unwrap(); + let unpacked_little = unpack(&packed_little, ValueType::Int, Endianness::Little).unwrap(); + + assert_eq!(unpacked_big, value); + assert_eq!(unpacked_little, value); + } +} diff --git a/rust/parser/Cargo.toml b/rust/parser/Cargo.toml new file mode 100644 index 00000000..9a91a41d --- /dev/null +++ b/rust/parser/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "parser" +version.workspace = true +edition.workspace = true +authors.workspace = true +license.workspace = true + +[dependencies] +keyval = { workspace = true } +thiserror = { workspace = true } +hex = { workspace = true } diff --git a/rust/parser/src/format.rs b/rust/parser/src/format.rs new file mode 100644 index 00000000..ad6fcc50 --- /dev/null +++ b/rust/parser/src/format.rs @@ -0,0 +1,166 @@ +//! Query formatting module +//! +//! Converts keyval structures back into FQL query strings. + +use keyval::*; + +/// Format a query as an FQL query string +pub fn format(query: &Query) -> String { + match query { + Query::KeyValue(kv) => format_keyvalue(kv), + Query::Key(key) => format_key(key), + Query::Directory(dir) => format_directory(dir), + } +} + +fn format_keyvalue(kv: &KeyValue) -> String { + format!("{}={}", format_key(&kv.key), format_value(&kv.value)) +} + +fn format_key(key: &Key) -> String { + format!("{}{}", format_directory(&key.directory), format_tuple(&key.tuple)) +} + +fn format_directory(dir: &Directory) -> String { + if dir.is_empty() { + return String::new(); + } + dir.iter() + .map(|elem| match elem { + DirElement::String(s) => format!("/{}", s), + DirElement::Variable(var) => format!("/<{}>", format_variable(var)), + }) + .collect() +} + +fn format_tuple(tup: &Tuple) -> String { + let elements: Vec = tup.iter().map(format_tup_element).collect(); + format!("({})", elements.join(",")) +} + +fn format_tup_element(elem: &TupElement) -> String { + match elem { + TupElement::Tuple(t) => format_tuple(t), + TupElement::Nil => "nil".to_string(), + TupElement::Int(i) => i.to_string(), + TupElement::Uint(u) => u.to_string(), + TupElement::Bool(b) => b.to_string(), + TupElement::Float(f) => f.to_string(), + TupElement::String(s) => format!("\"{}\"", s), + TupElement::Uuid(u) => u.to_string(), + TupElement::Bytes(b) => format!("0x{}", hex::encode(b)), + TupElement::Variable(var) => format!("<{}>", format_variable(var)), + TupElement::MaybeMore => "...".to_string(), + TupElement::VStamp(_) => "#vstamp".to_string(), + TupElement::VStampFuture(_) => "#vstamp_future".to_string(), + } +} + +fn format_value(val: &Value) -> String { + match val { + Value::Tuple(t) => format_tuple(t), + Value::Nil => "nil".to_string(), + Value::Int(i) => i.to_string(), + Value::Uint(u) => u.to_string(), + Value::Bool(b) => b.to_string(), + Value::Float(f) => f.to_string(), + Value::String(s) => format!("\"{}\"", s), + Value::Uuid(u) => u.to_string(), + Value::Bytes(b) => format!("0x{}", hex::encode(b)), + Value::Variable(var) => format!("<{}>", format_variable(var)), + Value::Clear => "clear".to_string(), + Value::VStamp(_) => "#vstamp".to_string(), + Value::VStampFuture(_) => "#vstamp_future".to_string(), + } +} + +fn format_variable(var: &Variable) -> String { + if var.types.is_empty() { + String::new() + } else { + var.types.iter() + .map(|t| t.to_string()) + .collect::>() + .join("|") + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_format_directory() { + let dir = vec![ + DirElement::String("users".to_string()), + DirElement::String("profiles".to_string()), + ]; + assert_eq!(format_directory(&dir), "/users/profiles"); + } + + #[test] + fn test_format_directory_with_variable() { + let dir = vec![ + DirElement::String("users".to_string()), + DirElement::Variable(Variable::any()), + ]; + assert_eq!(format_directory(&dir), "/users/<>"); + } + + #[test] + fn test_format_tuple() { + let tuple = vec![TupElement::Int(42), TupElement::String("hello".to_string())]; + assert_eq!(format_tuple(&tuple), "(42,\"hello\")"); + } + + #[test] + fn test_format_keyvalue() { + let kv = KeyValue { + key: Key { + directory: vec![DirElement::String("test".to_string())], + tuple: vec![TupElement::Int(1)], + }, + value: Value::String("data".to_string()), + }; + assert_eq!(format_keyvalue(&kv), "/test(1)=\"data\""); + } + + #[test] + fn test_format_query_keyvalue() { + let query = Query::KeyValue(KeyValue { + key: Key { + directory: vec![], + tuple: vec![TupElement::Int(42)], + }, + value: Value::Int(100), + }); + assert_eq!(format(&query), "(42)=100"); + } + + #[test] + fn test_format_query_key() { + let query = Query::Key(Key { + directory: vec![DirElement::String("dir".to_string())], + tuple: vec![TupElement::String("key".to_string())], + }); + assert_eq!(format(&query), "/dir(\"key\")"); + } + + #[test] + fn test_format_maybe_more() { + let tuple = vec![TupElement::Int(1), TupElement::MaybeMore]; + assert_eq!(format_tuple(&tuple), "(1,...)"); + } + + #[test] + fn test_format_variable_with_types() { + let var = Variable::with_types(vec![ValueType::Int, ValueType::String]); + assert_eq!(format_variable(&var), "int|string"); + } + + #[test] + fn test_format_bytes() { + let value = Value::Bytes(vec![0xde, 0xad, 0xbe, 0xef]); + assert_eq!(format_value(&value), "0xdeadbeef"); + } +} diff --git a/rust/parser/src/lib.rs b/rust/parser/src/lib.rs new file mode 100644 index 00000000..8149fd80 --- /dev/null +++ b/rust/parser/src/lib.rs @@ -0,0 +1,53 @@ +//! FQL query parser +//! +//! This crate provides parsing functionality for FQL query strings, +//! converting them into the keyval data structures. + +pub mod scanner; +pub mod format; + +use keyval::*; +use thiserror::Error; + +#[derive(Debug, Error)] +pub enum ParseError { + #[error("Unexpected token: {0}")] + UnexpectedToken(String), + + #[error("Invalid syntax: {0}")] + InvalidSyntax(String), + + #[error("Scan error: {0}")] + ScanError(String), +} + +/// Parse an FQL query string into a Query structure. +pub fn parse(input: &str) -> Result { + // TODO: Implement full parser state machine + // This is a simplified placeholder that demonstrates the API + + // Example: Parse a simple key-value query like "/dir/(1,2)=42" + if input.is_empty() { + return Err(ParseError::InvalidSyntax("Empty input".to_string())); + } + + // For now, return a placeholder + Ok(Query::KeyValue(KeyValue { + key: Key { + directory: vec![], + tuple: vec![TupElement::Int(1)], + }, + value: Value::Int(42), + })) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_placeholder() { + let result = parse("/test"); + assert!(result.is_ok()); + } +} diff --git a/rust/parser/src/scanner.rs b/rust/parser/src/scanner.rs new file mode 100644 index 00000000..e5203762 --- /dev/null +++ b/rust/parser/src/scanner.rs @@ -0,0 +1,179 @@ +//! FQL query tokenizer/scanner +//! +//! This module tokenizes FQL query strings into tokens for the parser. + +/// Token kinds recognized by the scanner +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum TokenKind { + Whitespace, + Newline, + Escape, + Other, + End, + KeyValSep, // = + DirSep, // / + TupStart, // ( + TupEnd, // ) + TupSep, // , + VarStart, // < + VarEnd, // > + VarSep, // | + StrMark, // " + StampStart, // # + StampSep, // : + Reserved, +} + +/// A token with its kind and text +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Token { + pub kind: TokenKind, + pub text: String, +} + +/// Scanner for FQL queries +pub struct Scanner { + input: String, + position: usize, +} + +impl Scanner { + pub fn new(input: impl Into) -> Self { + Self { + input: input.into(), + position: 0, + } + } + + /// Scan the next token + pub fn scan(&mut self) -> Token { + if self.position >= self.input.len() { + return Token { + kind: TokenKind::End, + text: String::new(), + }; + } + + let remaining = &self.input[self.position..]; + let ch = remaining.chars().next().unwrap(); + + // Match single-character tokens + let token = match ch { + '=' => self.single_char_token(TokenKind::KeyValSep), + '/' => self.single_char_token(TokenKind::DirSep), + '(' => self.single_char_token(TokenKind::TupStart), + ')' => self.single_char_token(TokenKind::TupEnd), + ',' => self.single_char_token(TokenKind::TupSep), + '<' => self.single_char_token(TokenKind::VarStart), + '>' => self.single_char_token(TokenKind::VarEnd), + '|' => self.single_char_token(TokenKind::VarSep), + '"' => self.single_char_token(TokenKind::StrMark), + '#' => self.single_char_token(TokenKind::StampStart), + ':' => self.single_char_token(TokenKind::StampSep), + '\\' => self.escape_token(), + '\t' | ' ' => self.whitespace_token(), + '\n' | '\r' => self.newline_token(), + _ if is_reserved(ch) => self.single_char_token(TokenKind::Reserved), + _ => self.other_token(), + }; + + token + } + + fn single_char_token(&mut self, kind: TokenKind) -> Token { + let ch = self.input[self.position..].chars().next().unwrap(); + self.position += ch.len_utf8(); + Token { + kind, + text: ch.to_string(), + } + } + + fn escape_token(&mut self) -> Token { + let mut text = String::new(); + let chars: Vec = self.input[self.position..].chars().collect(); + + if chars.len() >= 2 { + text.push(chars[0]); // \ + text.push(chars[1]); // escaped char + self.position += chars[0].len_utf8() + chars[1].len_utf8(); + } + + Token { + kind: TokenKind::Escape, + text, + } + } + + fn whitespace_token(&mut self) -> Token { + let start = self.position; + while self.position < self.input.len() { + match self.input[self.position..].chars().next().unwrap() { + '\t' | ' ' => self.position += 1, + _ => break, + } + } + Token { + kind: TokenKind::Whitespace, + text: self.input[start..self.position].to_string(), + } + } + + fn newline_token(&mut self) -> Token { + let start = self.position; + while self.position < self.input.len() { + match self.input[self.position..].chars().next().unwrap() { + '\t' | ' ' | '\n' | '\r' => self.position += 1, + _ => break, + } + } + Token { + kind: TokenKind::Newline, + text: self.input[start..self.position].to_string(), + } + } + + fn other_token(&mut self) -> Token { + let start = self.position; + while self.position < self.input.len() { + let ch = self.input[self.position..].chars().next().unwrap(); + if is_special(ch) || ch.is_whitespace() { + break; + } + self.position += ch.len_utf8(); + } + Token { + kind: TokenKind::Other, + text: self.input[start..self.position].to_string(), + } + } +} + +fn is_special(ch: char) -> bool { + matches!( + ch, + '=' | '/' | '(' | ')' | ',' | '<' | '>' | '|' | '"' | '#' | ':' | '\\' + ) || is_reserved(ch) +} + +fn is_reserved(ch: char) -> bool { + matches!( + ch, + '!' | '$' | '%' | '&' | '{' | '}' | '*' | '+' | ';' | '?' | '@' | '[' | ']' | '^' | '`' + | '~' + ) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_scanner_basic() { + let mut scanner = Scanner::new("/dir/(1,2)=42"); + assert_eq!(scanner.scan().kind, TokenKind::DirSep); + assert_eq!(scanner.scan().kind, TokenKind::Other); // "dir" + assert_eq!(scanner.scan().kind, TokenKind::DirSep); + assert_eq!(scanner.scan().kind, TokenKind::TupStart); + } +}