Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 42 additions & 4 deletions include/scrimmage/common/CSV.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,18 +34,51 @@
#define INCLUDE_SCRIMMAGE_COMMON_CSV_H_

#include <fstream>
#include <iomanip>
#include <iostream>
#include <list>
#include <map>
#include <memory>
#include <sstream>
#include <string>
#include <utility>
#include <variant>

#include "scrimmage/parse/ParseUtils.h"

namespace {
struct StringifyVisitor {

bool double_is_fixed = true;
bool double_is_scientific = true;
int double_precision = 13;

std::string operator()(bool value) const { return value ? "true" : "false"; }
std::string operator()(uint64_t value) const { return std::to_string(value); }
std::string operator()(int64_t value) const { return std::to_string(value); }
std::string operator()(const std::string& value) const { return value; }
std::string operator()(double value) const {
// default precision values for double are not enough in many cases
std::ostringstream conv;
if (double_is_fixed) {
conv << std::fixed;
}
if (double_is_scientific) {
conv << std::scientific;
}
conv << std::setprecision(double_precision) << value;
return conv.str();
}
};
} // namespace

namespace scrimmage {

class CSV {
public:
typedef std::list<std::string> Headers;
typedef std::list<std::pair<std::string, double>> Pairs;
typedef std::variant<bool, uint64_t, int64_t, std::string, double> PossibleVariantTypes;
typedef std::list<std::pair<std::string, PossibleVariantTypes>> Pairs;

~CSV();

Expand Down Expand Up @@ -75,7 +108,11 @@ class CSV {

size_t rows();

double at(int row, const std::string& header);
template <class T1>
T1 at(int row, const std::string& header) {
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

see -- now this is where I'd use a size_t, because it's an index. But this ties into existing underlying definitions, so I wouldn't change that now. (Actually, taking a closer look... the table uses an int as a key into a map... wut? Why is it not a vector? And can you have negative column/row indexes? Weird... still -- not something for this update.)

const int column = column_headers_.at(header);
return convert<T1>(table_.at(row).at(column));
}

friend std::ostream& operator<<(std::ostream& os, const CSV& csv);

Expand All @@ -87,6 +124,8 @@ class CSV {
void set_double_scientific(bool is_scientific) { double_is_scientific_ = is_scientific; }

protected:
std::string get_csv_string(const PossibleVariantTypes& val) const;

std::list<std::string> get_csv_line_elements(const std::string& str);

void write_headers();
Expand All @@ -100,7 +139,7 @@ class CSV {
// Key 1 : Row Index
// Key 2 : Column Index
// Value : Cell Value
std::map<int, std::map<int, double>> table_;
std::map<int, std::map<int, std::string>> table_;
int next_row_ = 0;

std::ofstream file_out_;
Expand All @@ -110,7 +149,6 @@ class CSV {
int double_precision_ = 13;
bool double_is_fixed_ = true;
bool double_is_scientific_ = false;

std::string headers_to_string() const;
std::string rows_to_string() const;
std::string row_to_string(const int& i) const;
Expand Down
60 changes: 21 additions & 39 deletions src/common/CSV.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,12 @@
*
*/

#include <fstream>
#include <iomanip>
#include <iostream>
#include <sstream>
#include "scrimmage/common/CSV.h"

#include <vector>

#include <boost/algorithm/string.hpp>
#include <boost/tokenizer.hpp>
#include <scrimmage/parse/ParseUtils.h>
#include "scrimmage/common/CSV.h"

using std::cout;
using std::endl;
Expand Down Expand Up @@ -73,14 +69,23 @@ void CSV::set_column_headers(const std::string& headers, bool write) {
set_column_headers(headers_vec, write);
}

std::string CSV::get_csv_string(const PossibleVariantTypes& v) const {
return std::visit(
StringifyVisitor{
.double_is_fixed = double_is_fixed_,
.double_is_scientific = double_is_scientific_,
.double_precision = double_precision_},
v);
}

bool CSV::append(const Pairs& pairs, bool write, bool keep_in_memory) {

for (std::pair<std::string, double> pair : pairs) {
for (const auto& pair : pairs) {
auto it = column_headers_.find(pair.first);
if (it == column_headers_.end()) {
cout << "Warning: column header doesn't exist: " << pair.first << endl;
}
table_[next_row_][it->second] = pair.second;
table_[next_row_][it->second] = get_csv_string(pair.second);
}

if (write) {
Expand Down Expand Up @@ -156,42 +161,24 @@ std::string CSV::rows_to_string() const {
}

std::string CSV::row_to_string(const int& row) const {
std::string result = "";
std::ostringstream result;

// Initialize a vector with no value string. Iterate over
// column_headers, use column index to fill in column for values.
std::vector<std::string> values(column_headers_.size(), no_value_str_);
auto it_row = table_.find(row);
for (auto& kv : it_row->second) {
if (static_cast<int64_t>(kv.second) == kv.second) {
values[kv.first] = std::to_string(static_cast<int64_t>(kv.second));
} else if (static_cast<double>(kv.second) == kv.second) {
// default precision values for double are not enough in many cases
std::ostringstream conv;
if (double_is_fixed_) {
conv << std::fixed;
}
if (double_is_scientific_) {
conv << std::scientific;
}
conv << std::setprecision(double_precision_) << kv.second;
values[kv.first] = conv.str();
} else {
values[kv.first] = std::to_string(kv.second);
}
if (it_row == table_.end()) {
return "";
}

// Append the rows to the resultant string
unsigned int i = 0;
for (std::string str : values) {
result += str;
for (auto& kv : it_row->second) {
result << kv.second;

if (i + 1 < values.size()) {
result += ",";
result << ",";
}
i++;
}
return result;
return result.str();
}

bool CSV::to_csv(const std::string& filename) {
Expand Down Expand Up @@ -238,7 +225,7 @@ bool CSV::read_csv_from_string(const std::string& csv_str, const bool& contains_
std::vector<std::string> tokens;
boost::split(tokens, line, boost::is_any_of(","));
for (unsigned int i = 0; i < tokens.size(); i++) {
table_[row_num][i] = std::stod(tokens[i]);
table_[row_num][i] = tokens[i];
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is going to be the most annoying thing, and is going to tie into CSV::row_to_string()... what happens if somebody writes a string that contains a comma? Look at the specification section at https://en.wikipedia.org/wiki/Comma-separated_values, and it tells you how to handle that situation. Any time you decide to allow strings in data files, that opens up "fun" edge cases that you have to think about. What about newlines? Carriage returns? Maybe CSV::append() needs to have more failure cases.

Note that when you handle things like escaped double quotes, those need to be handled via linearly scanning through the string; you can't do global substring substitutions because that may not work the way you expect it to for some cases.

}

// If this is the first line and the file doesn't contain a header,
Expand Down Expand Up @@ -284,11 +271,6 @@ size_t CSV::rows() {
return table_.size();
}

double CSV::at(int row, const std::string& header) {
const int column = column_headers_.at(header);
return table_.at(row).at(column);
}

std::list<std::string> CSV::get_csv_line_elements(const std::string& str) {
std::list<std::string> elems;
std::vector<std::string> tokens;
Expand Down