diff --git a/fst-bin/src/app.rs b/fst-bin/src/app.rs index f8ecb4c..c18dc79 100644 --- a/fst-bin/src/app.rs +++ b/fst-bin/src/app.rs @@ -288,7 +288,10 @@ pub fn app() -> clap::App<'static, 'static> { )) .arg(flag("keep-tmp-dir").help( "Does not delete the temporary directory. Useful for debugging.", - )); + )) + .arg(flag("delimiter").help( + "The delimiter used in the CSV file to separate key and value in each line. \ + This defaults to ','.",)); let node = cmd("node", ABOUT_NODE) .arg(pos("input").required(true).help("The FST to inspect.")) diff --git a/fst-bin/src/cmd/map.rs b/fst-bin/src/cmd/map.rs index d6e356e..4a49d42 100644 --- a/fst-bin/src/cmd/map.rs +++ b/fst-bin/src/cmd/map.rs @@ -26,6 +26,17 @@ struct Args { keep_tmp_dir: bool, max: bool, min: bool, + delimiter: Option, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +struct DelimiterInvalidError; + +impl std::error::Error for DelimiterInvalidError {} +impl std::fmt::Display for DelimiterInvalidError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "The provided value is no valid delimiter") + } } impl Args { @@ -47,6 +58,15 @@ impl Args { keep_tmp_dir: m.is_present("keep-tmp-dir"), max: m.is_present("max"), min: m.is_present("min"), + delimiter: m + .value_of_lossy("delimiter") + .map(|x| { + x.as_bytes() + .get(0) + .map(|y| *y) + .ok_or(DelimiterInvalidError) + }) + .transpose()?, }) } @@ -66,6 +86,7 @@ impl Args { let mut map = MapBuilder::new(wtr)?; for input in &self.input { let mut rdr = csv::ReaderBuilder::new() + .delimiter(self.delimiter.unwrap_or(b',')) .has_headers(false) .from_reader(util::get_reader(Some(input))?); for row in rdr.deserialize() { @@ -82,7 +103,8 @@ impl Args { .iter() .map(|inp| Path::new(inp).to_path_buf()) .collect(); - let keys = util::ConcatCsv::new(inputs); + let keys = + util::ConcatCsv::new(inputs, self.delimiter.unwrap_or(b',')); let mut merger = Merger::new(keys, &self.output); merger = merger.fd_limit(self.fd_limit); diff --git a/fst-bin/src/util.rs b/fst-bin/src/util.rs index dfe4970..17028f0 100644 --- a/fst-bin/src/util.rs +++ b/fst-bin/src/util.rs @@ -135,15 +135,16 @@ impl Iterator for ConcatLines { pub struct ConcatCsv { inputs: Vec, cur: Option, + delimiter: u8, } type Reader = Box; type Rows = csv::DeserializeRecordsIntoIter; impl ConcatCsv { - pub fn new(mut inputs: Vec) -> ConcatCsv { + pub fn new(mut inputs: Vec, delimiter: u8) -> ConcatCsv { inputs.reverse(); // treat it as a stack - ConcatCsv { inputs, cur: None } + ConcatCsv { inputs, cur: None, delimiter } } fn read_row(&mut self) -> Option> { @@ -173,6 +174,7 @@ impl Iterator for ConcatCsv { Ok(rdr) => rdr, }; let csvrdr = csv::ReaderBuilder::new() + .delimiter(self.delimiter) .has_headers(false) .from_reader(rdr); self.cur = Some(csvrdr.into_deserialize());