Skip to content

Commit 2ef06bf

Browse files
committed
support not parsing some columns as int or float when saving to xlsx files.
1 parent b7b619f commit 2ef06bf

File tree

12 files changed

+103
-40
lines changed

12 files changed

+103
-40
lines changed

README.md

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ rsv clean --help # help info on all flags
109109

110110
```shell
111111
rsv unique data.csv # default to drop duplicates on all columns,
112-
# default keep first record of duplicates
112+
# default keep first record of duplicates
113113
rsv unique -c 0 data.csv # drop on first column
114114
rsv unique -c 0,1 data.csv # drop on first and second columns
115115
rsv unique --keep-last data.csv # keep the last record when dropping
@@ -286,6 +286,14 @@ rsv slice -s 10 -e 15 data.csv | rsv table # convert result to an aligned t
286286
rsv table --help # help info on all flags
287287
```
288288
289+
- **rsv to**
290+
291+
```shell
292+
rsv head data.csv | rsv to data.xlsx # save to the data.xlsx file
293+
rsv head data.csv | rsv to data.xlsx --text-columns 0,1 # regard first two columns as texts
294+
rsv to --help # help info on all flags
295+
```
296+
289297
## Command pipeline
290298
291299
- **two commands pipelined**

rsv-lib/src/csv_lib/stats.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ pub fn csv_stats(
1717
quote: char,
1818
no_header: bool,
1919
cols: String,
20+
text_columns: &Vec<usize>,
2021
) -> CliResultData {
2122
let mut result_data = ResultData::new();
2223
result_data.insert_header(CStat::get_fields.iter().map(|f| f.to_string()).collect());
@@ -25,7 +26,9 @@ pub fn csv_stats(
2526
let cols = Columns::new(cols.as_str())
2627
.total_col_of(file, sep, quote)
2728
.parse();
28-
let Some(col_type) = ColumnTypes::guess_from_csv(file, sep, quote, no_header, &cols)? else {
29+
let Some(col_type) =
30+
ColumnTypes::guess_from_csv(file, sep, quote, no_header, &cols, text_columns)?
31+
else {
2932
return Ok(Some(result_data));
3033
};
3134

rsv-lib/src/lib.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,11 @@ pub fn file_stats(
5858
no_header: bool,
5959
cols: String,
6060
sheet: usize,
61+
text_columns: &Vec<usize>,
6162
) -> CliResultData {
6263
let path = full_path(file);
6364
match is_excel(&path) {
6465
true => excel_stats(&path, no_header, cols, sheet),
65-
false => csv_stats(&path, sep, quote, no_header, cols),
66+
false => csv_stats(&path, sep, quote, no_header, cols, text_columns),
6667
}
6768
}

rsv-lib/src/utils/column.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ impl<'a> Columns<'a> {
202202
.collect::<Vec<_>>()
203203
}
204204

205-
pub fn col_vec_or_length_of(&self, n: usize) -> Vec<usize> {
205+
pub fn col_index_vec(&self, n: usize) -> Vec<usize> {
206206
match self.select_all {
207207
true => (0..n).collect::<Vec<_>>(),
208208
false => self.cols.clone(),

rsv-lib/src/utils/column_type.rs

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ impl ColumnTypes {
5252
quote: char,
5353
no_header: bool,
5454
cols: &column::Columns,
55+
text_columns: &Vec<usize>,
5556
) -> Result<Option<Self>, Box<dyn Error>> {
5657
// reader
5758
let rdr = BufReader::new(File::open(path)?).lines();
@@ -72,9 +73,15 @@ impl ColumnTypes {
7273
.collect::<Vec<_>>();
7374

7475
let guess = cols
75-
.col_vec_or_length_of(lines[0].len())
76+
.col_index_vec(lines[0].len())
7677
.into_par_iter()
77-
.map(|n| (n, parse_col_type_at(n, &lines), max_length_at(n, &lines)))
78+
.map(|n| {
79+
if text_columns.contains(&n) {
80+
(n, ColumnType::String, max_length_at(n, &lines))
81+
} else {
82+
(n, parse_col_type_at(n, &lines), max_length_at(n, &lines))
83+
}
84+
})
7885
.collect::<Vec<_>>()
7986
.iter()
8087
.fold(ColumnTypes(vec![]), |mut a, b| {
@@ -98,7 +105,7 @@ impl ColumnTypes {
98105
}
99106

100107
let mut guess = ColumnTypes(vec![]);
101-
for c in cols.col_vec_or_length_of(lines[0].len()) {
108+
for c in cols.col_index_vec(lines[0].len()) {
102109
// max_length is meaningless for excel, so set default to 0
103110
guess.push(c, parse_excel_col_type_at(c, &lines), 0)
104111
}
@@ -107,12 +114,16 @@ impl ColumnTypes {
107114
}
108115

109116
// sequential guess given that io is usually small
110-
pub fn guess_from_io(v: &[Vec<&str>], cols: &Columns) -> Self {
117+
pub fn guess_from_io(v: &[Vec<&str>], cols: &Columns, text_columns: &Vec<usize>) -> Self {
111118
let v = if v.len() < 5000 { v } else { &v[..5000] };
112119

113120
let mut guess = ColumnTypes(vec![]);
114-
for c in cols.col_vec_or_length_of(v[0].len()) {
115-
guess.push(c, parse_col_type_at(c, v), max_length_at(c, v))
121+
for c in cols.col_index_vec(v[0].len()) {
122+
if text_columns.contains(&c) {
123+
guess.push(c, ColumnType::String, max_length_at(c, v))
124+
} else {
125+
guess.push(c, parse_col_type_at(c, v), max_length_at(c, v))
126+
}
116127
}
117128

118129
guess

rsv-lib/src/utils/to.rs

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use super::{cli_result::CliResult, filename::new_file, reader::ExcelReader, writ
22
use crate::utils::{column::Columns, column_type::ColumnTypes, row_split::CsvRowSplitter};
33
use std::{
44
fs::File,
5-
io::{stdin, BufRead, BufReader, BufWriter, Write},
5+
io::{BufRead, BufReader, BufWriter, Write, stdin},
66
path::{Path, PathBuf},
77
};
88
use xlsxwriter::{Workbook, Worksheet};
@@ -64,7 +64,14 @@ pub fn excel_to_csv(path: &Path, sheet: usize, sep: &str, out: &str) -> CliResul
6464
Ok(())
6565
}
6666

67-
pub fn csv_to_excel(path: &Path, sep: char, quote: char, out: &str, no_header: bool) -> CliResult {
67+
pub fn csv_to_excel(
68+
path: &Path,
69+
sep: char,
70+
quote: char,
71+
out: &str,
72+
no_header: bool,
73+
text_columns: &Vec<usize>,
74+
) -> CliResult {
6875
// out path
6976
let out = out_filename(out);
7077

@@ -75,10 +82,11 @@ pub fn csv_to_excel(path: &Path, sep: char, quote: char, out: &str, no_header: b
7582

7683
// column type
7784
let cols = Columns::new("").total_col_of(path, sep, quote).parse();
78-
let ctypes = match ColumnTypes::guess_from_csv(path, sep, quote, no_header, &cols)? {
79-
Some(v) => v,
80-
None => return Ok(()),
81-
};
85+
let ctypes =
86+
match ColumnTypes::guess_from_csv(path, sep, quote, no_header, &cols, text_columns)? {
87+
Some(v) => v,
88+
None => return Ok(()),
89+
};
8290
ctypes.update_excel_column_width(&mut sheet)?;
8391
let ctypes = Some(ctypes);
8492

@@ -94,7 +102,13 @@ pub fn csv_to_excel(path: &Path, sep: char, quote: char, out: &str, no_header: b
94102
Ok(())
95103
}
96104

97-
pub fn io_to_excel(sep: char, quote: char, no_header: bool, out: &str) -> CliResult {
105+
pub fn io_to_excel(
106+
sep: char,
107+
quote: char,
108+
no_header: bool,
109+
out: &str,
110+
text_columns: &Vec<usize>,
111+
) -> CliResult {
98112
// out path
99113
let out = out_filename(out);
100114

@@ -119,7 +133,8 @@ pub fn io_to_excel(sep: char, quote: char, no_header: bool, out: &str) -> CliRes
119133
let ctypes = if equal_width(&lines) {
120134
// column type
121135
let cols = Columns::new("").total_col(lines[0].len()).parse();
122-
let ctypes = ColumnTypes::guess_from_io(&lines[(1 - no_header as usize)..], &cols);
136+
let ctypes =
137+
ColumnTypes::guess_from_io(&lines[(1 - no_header as usize)..], &cols, text_columns);
123138
ctypes.update_excel_column_width(&mut sheet)?;
124139
Some(ctypes)
125140
} else {

rsv/src/args.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,9 @@ pub struct Stats {
251251
/// Get the nth worksheet of EXCEL file
252252
#[arg(short = 'S', long, default_value_t = 0)]
253253
pub sheet: usize,
254+
/// Comma-separated column indexes to format as text (e.g., "0,2")
255+
#[arg(short, long, value_delimiter = ',', allow_hyphen_values = true)]
256+
pub text_columns: Vec<usize>,
254257
}
255258

256259
#[derive(Debug, Args)]
@@ -383,6 +386,9 @@ pub struct To {
383386
/// Get the nth worksheet of EXCEL file
384387
#[arg(short = 'S', long, default_value_t = 0)]
385388
pub sheet: usize,
389+
/// Comma-separated column indexes to format as text (e.g., "0,2")
390+
#[arg(short, long, value_delimiter = ',', allow_hyphen_values = true)]
391+
pub text_columns: Vec<usize>,
386392
}
387393

388394
#[derive(Debug, Args)]

rsv/src/cmd_desc.rs

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -323,15 +323,16 @@ Usage:
323323
rsv stats -c 0,1 --export data.xlsx # EXCEL file
324324
325325
Arguments:
326-
<FILENAME> File to open, including CSV, TXT, and EXCEL
326+
<FILENAME> File to open, including CSV, TXT, and EXCEL
327327
328328
Options:
329-
-s, --sep <SEP> Separator [default: ,]
330-
--no-header Whether the file has a header
331-
-c, --cols <COLS> Columns to generate statistics, Default to select all
332-
-E, --export Export results to a file named current-file-selected.csv
333-
-S, --sheet <SHEET> Get the nth worksheet of EXCEL file [default: 0]
334-
-h, --help Print help information
329+
-s, --sep <SEP> Separator [default: ,]
330+
--no-header Whether the file has a header
331+
-c, --cols <COLS> Columns to generate statistics, Default to select all
332+
-E, --export Export results to a file named current-file-selected.csv
333+
-S, --sheet <SHEET> Get the nth worksheet of EXCEL file [default: 0]
334+
-t, --text-columns <TEXT_COLUMNS> Comma-separated column indexes to format as text [default:]
335+
-h, --help Print help information
335336
336337
Column selection syntax:
337338
-c 0,1,2,5 --> cols [0,1,2,5]
@@ -455,16 +456,17 @@ Usage:
455456
rsv head data | rsv to out.xlsx
456457
457458
Arguments:
458-
<OUT> Output file, a file name or a file format
459-
[FILENAME] File to open
459+
<OUT> Output file, a file name or a file format
460+
[FILENAME] File to open
460461
461462
Options:
462-
--no-header Whether the file has a header
463-
-s, --sep <SEP> Input file Separator [default: ,]
464-
-q, --quote <QUOTE> Quote char [default: "]
465-
-o, --outsep <OUTSEP> Output file Separator [default: ,]
466-
-S, --sheet <SHEET> Get the nth worksheet of EXCEL file [default: 0]
467-
-h, --help Print help
463+
--no-header Whether the file has a header
464+
-s, --sep <SEP> Input file Separator [default: ,]
465+
-q, --quote <QUOTE> Quote char [default: "]
466+
-o, --outsep <OUTSEP> Output file Separator [default: ,]
467+
-S, --sheet <SHEET> Get the nth worksheet of EXCEL file [default: 0]
468+
-t, --text-columns <TEXT_COLUMNS> Comma-separated column indexes to format as text [default:]
469+
-h, --help Print help
468470
"#;
469471

470472
pub const SAMPLE_DESC: &str = r#"

rsv/src/csv/stats.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,14 @@ impl Stats {
2020
let cols = Columns::new(&self.cols)
2121
.total_col_of(path, self.sep, self.quote)
2222
.parse();
23-
let Some(col_type) =
24-
ColumnTypes::guess_from_csv(path, self.sep, self.quote, self.no_header, &cols)?
23+
let Some(col_type) = ColumnTypes::guess_from_csv(
24+
path,
25+
self.sep,
26+
self.quote,
27+
self.no_header,
28+
&cols,
29+
&self.text_columns,
30+
)?
2531
else {
2632
return Ok(());
2733
};

rsv/src/csv/to.rs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,14 @@ impl To {
99

1010
match out.as_str() {
1111
v if is_valid_plain_text(v) => csv_or_io_to_csv(Some(path), &out)?,
12-
v if is_valid_excel(v) => {
13-
csv_to_excel(path, self.sep, self.quote, &out, self.no_header)?
14-
}
12+
v if is_valid_excel(v) => csv_to_excel(
13+
path,
14+
self.sep,
15+
self.quote,
16+
&out,
17+
self.no_header,
18+
&self.text_columns,
19+
)?,
1520
_ => return Err(format!("output file format <{out}> is un-recognized.").into()),
1621
};
1722

0 commit comments

Comments
 (0)