diff --git a/Cargo.toml b/Cargo.toml index ca49748..61f5aa5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "fst" -version = "0.4.7" #:version +version = "0.4.8" #:version authors = ["Andrew Gallant "] description = """ Use finite state transducers to compactly represents sets or maps of many @@ -21,6 +21,7 @@ exclude = ["fst-levenshtein", "fst-regex"] [features] default = [] levenshtein = ["utf8-ranges"] +contains = ["utf8-ranges"] [patch.crates-io] fst = { path = "." } diff --git a/src/automaton/contains.rs b/src/automaton/contains.rs new file mode 100644 index 0000000..98d4ccb --- /dev/null +++ b/src/automaton/contains.rs @@ -0,0 +1,78 @@ +use crate::automaton::{Automaton, StartsWith}; + +/// An automaton that matches if the input contains to a specific string. +/// +/// ```rust +/// extern crate fst; +/// +/// use fst::{Automaton, IntoStreamer, Streamer, Set}; +/// use fst::automaton::Contains; +/// +/// # fn main() { example().unwrap(); } +/// fn example() -> Result<(), Box> { +/// let paths = vec!["/home/projects/bar", "/home/projects/foo", "/tmp/foo"]; +/// let set = Set::from_iter(paths)?; +/// +/// // Build our contains query. +/// let keyword = Contains::new("/projects"); +/// +/// // Apply our query to the set we built. +/// let mut stream = set.search(keyword).into_stream(); +/// +/// let matches = stream.into_strs()?; +/// assert_eq!(matches, vec!["/home/projects/bar", "/home/projects/foo"]); +/// Ok(()) +/// } +/// ``` +#[derive(Clone, Debug)] +pub struct Contains<'a> { + string: &'a [u8], +} + +impl<'a> Contains<'a> { + /// Constructs automaton that matches a part of string. + #[inline] + pub fn new(string: &'a str) -> Contains<'a> { + Self { string: string.as_bytes() } + } +} + +impl<'a> Automaton for Contains<'a> { + type State = Option; + + #[inline] + fn start(&self) -> Option { + Some(0) + } + + #[inline] + fn is_match(&self, pos: &Option) -> bool { + pos.is_some() && pos.unwrap() >= self.string.len() + } + + #[inline] + fn can_match(&self, pos: &Option) -> bool { + pos.is_some() + } + + #[inline] + fn accept(&self, pos: &Option, byte: u8) -> Option { + // if we aren't already past the end... + if let Some(pos) = *pos { + // and there is still a matching byte at the current position... + if self.string.get(pos).cloned() == Some(byte) { + // then move forward + return Some(pos + 1); + } else { + if pos >= self.string.len() { + // if we're past the end, then we're done + return Some(i32::MAX as usize); + } else { + return Some(0); + } + } + } + // otherwise we're either past the end or didn't match the byte + None + } +} diff --git a/src/automaton/mod.rs b/src/automaton/mod.rs index fe503ed..c09d5ec 100644 --- a/src/automaton/mod.rs +++ b/src/automaton/mod.rs @@ -4,6 +4,12 @@ pub use self::levenshtein::{Levenshtein, LevenshteinError}; #[cfg(feature = "levenshtein")] mod levenshtein; +#[cfg(feature = "contains")] +pub use self::contains::Contains; + +#[cfg(feature = "contains")] +mod contains; + /// Automaton describes types that behave as a finite automaton. /// /// All implementors of this trait are represented by *byte based* automata.