Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "fst"
version = "0.4.7" #:version
version = "0.4.8" #:version
authors = ["Andrew Gallant <[email protected]>"]
description = """
Use finite state transducers to compactly represents sets or maps of many
Expand All @@ -21,6 +21,7 @@ exclude = ["fst-levenshtein", "fst-regex"]
[features]
default = []
levenshtein = ["utf8-ranges"]
contains = ["utf8-ranges"]

[patch.crates-io]
fst = { path = "." }
Expand Down
78 changes: 78 additions & 0 deletions src/automaton/contains.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
use crate::automaton::{Automaton, StartsWith};

/// An automaton that matches if the input contains to a specific string.
///
/// ```rust
/// extern crate fst;
///
/// use fst::{Automaton, IntoStreamer, Streamer, Set};
/// use fst::automaton::Contains;
///
/// # fn main() { example().unwrap(); }
/// fn example() -> Result<(), Box<dyn std::error::Error>> {
/// let paths = vec!["/home/projects/bar", "/home/projects/foo", "/tmp/foo"];
/// let set = Set::from_iter(paths)?;
///
/// // Build our contains query.
/// let keyword = Contains::new("/projects");
///
/// // Apply our query to the set we built.
/// let mut stream = set.search(keyword).into_stream();
///
/// let matches = stream.into_strs()?;
/// assert_eq!(matches, vec!["/home/projects/bar", "/home/projects/foo"]);
/// Ok(())
/// }
/// ```
#[derive(Clone, Debug)]
pub struct Contains<'a> {
string: &'a [u8],
}

impl<'a> Contains<'a> {
/// Constructs automaton that matches a part of string.
#[inline]
pub fn new(string: &'a str) -> Contains<'a> {
Self { string: string.as_bytes() }
}
}

impl<'a> Automaton for Contains<'a> {
type State = Option<usize>;

#[inline]
fn start(&self) -> Option<usize> {
Some(0)
}

#[inline]
fn is_match(&self, pos: &Option<usize>) -> bool {
pos.is_some() && pos.unwrap() >= self.string.len()
}

#[inline]
fn can_match(&self, pos: &Option<usize>) -> bool {
pos.is_some()
}

#[inline]
fn accept(&self, pos: &Option<usize>, byte: u8) -> Option<usize> {
// if we aren't already past the end...
if let Some(pos) = *pos {
// and there is still a matching byte at the current position...
if self.string.get(pos).cloned() == Some(byte) {
// then move forward
return Some(pos + 1);
} else {
if pos >= self.string.len() {
// if we're past the end, then we're done
return Some(i32::MAX as usize);
} else {
return Some(0);
}
}
}
// otherwise we're either past the end or didn't match the byte
None
}
}
6 changes: 6 additions & 0 deletions src/automaton/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ pub use self::levenshtein::{Levenshtein, LevenshteinError};
#[cfg(feature = "levenshtein")]
mod levenshtein;

#[cfg(feature = "contains")]
pub use self::contains::Contains;

#[cfg(feature = "contains")]
mod contains;

/// Automaton describes types that behave as a finite automaton.
///
/// All implementors of this trait are represented by *byte based* automata.
Expand Down