Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion fst-bin/src/app.rs
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,10 @@ pub fn app() -> clap::App<'static, 'static> {
))
.arg(flag("keep-tmp-dir").help(
"Does not delete the temporary directory. Useful for debugging.",
));
))
.arg(flag("delimiter").help(
"The delimiter used in the CSV file to separate key and value in each line. \
This defaults to ','.",));

let node = cmd("node", ABOUT_NODE)
.arg(pos("input").required(true).help("The FST to inspect."))
Expand Down
24 changes: 23 additions & 1 deletion fst-bin/src/cmd/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,17 @@ struct Args {
keep_tmp_dir: bool,
max: bool,
min: bool,
delimiter: Option<u8>,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
struct DelimiterInvalidError;

impl std::error::Error for DelimiterInvalidError {}
impl std::fmt::Display for DelimiterInvalidError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "The provided value is no valid delimiter")
}
}

impl Args {
Expand All @@ -47,6 +58,15 @@ impl Args {
keep_tmp_dir: m.is_present("keep-tmp-dir"),
max: m.is_present("max"),
min: m.is_present("min"),
delimiter: m
.value_of_lossy("delimiter")
.map(|x| {
x.as_bytes()
.get(0)
.map(|y| *y)
.ok_or(DelimiterInvalidError)
})
.transpose()?,
})
}

Expand All @@ -66,6 +86,7 @@ impl Args {
let mut map = MapBuilder::new(wtr)?;
for input in &self.input {
let mut rdr = csv::ReaderBuilder::new()
.delimiter(self.delimiter.unwrap_or(b','))
.has_headers(false)
.from_reader(util::get_reader(Some(input))?);
for row in rdr.deserialize() {
Expand All @@ -82,7 +103,8 @@ impl Args {
.iter()
.map(|inp| Path::new(inp).to_path_buf())
.collect();
let keys = util::ConcatCsv::new(inputs);
let keys =
util::ConcatCsv::new(inputs, self.delimiter.unwrap_or(b','));

let mut merger = Merger::new(keys, &self.output);
merger = merger.fd_limit(self.fd_limit);
Expand Down
6 changes: 4 additions & 2 deletions fst-bin/src/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,15 +135,16 @@ impl Iterator for ConcatLines {
pub struct ConcatCsv {
inputs: Vec<PathBuf>,
cur: Option<Rows>,
delimiter: u8,
}

type Reader = Box<dyn io::Read + Send + Sync + 'static>;
type Rows = csv::DeserializeRecordsIntoIter<Reader, (BString, u64)>;

impl ConcatCsv {
pub fn new(mut inputs: Vec<PathBuf>) -> ConcatCsv {
pub fn new(mut inputs: Vec<PathBuf>, delimiter: u8) -> ConcatCsv {
inputs.reverse(); // treat it as a stack
ConcatCsv { inputs, cur: None }
ConcatCsv { inputs, cur: None, delimiter }
}

fn read_row(&mut self) -> Option<Result<(BString, u64), Error>> {
Expand Down Expand Up @@ -173,6 +174,7 @@ impl Iterator for ConcatCsv {
Ok(rdr) => rdr,
};
let csvrdr = csv::ReaderBuilder::new()
.delimiter(self.delimiter)
.has_headers(false)
.from_reader(rdr);
self.cur = Some(csvrdr.into_deserialize());
Expand Down