From aae9d7762a49ee947a68f5b8af1e249a4f13bd23 Mon Sep 17 00:00:00 2001 From: Wade Brainerd Date: Sat, 12 Nov 2016 20:46:45 -0500 Subject: [PATCH] Improve parsing of escaped double quotes Per the RFC, "" is treated as an escaped quote. Unfortunately this leads to quoted empty cells, e.g. "","","" being treated as escaped quotes instead of empty cells. A more careful reading of the spec indicates that this is only valid when already inside double quotes, and the parser has been updated to reflect this. Also updates the readme to clarify that quoted newlines in cells are not supported. --- README.md | 2 +- csvplugin.py | 22 +++++++++------------- quote.csv | 4 ++++ 3 files changed, 14 insertions(+), 14 deletions(-) create mode 100644 quote.csv diff --git a/README.md b/README.md index 5c55d37..7796721 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ Using NumPy (http://www.numpy.org), the plugin supports evaluating Python expres All the above features work in both justified and collapsed modes. -Finally, the plugin has full support for RFC 4180 quoting. +Finally, the plugin fully supports RFC 4180 (https://tools.ietf.org/html/rfc4180) quoting, with the exception that quoted newlines (2.6) are treated as row separators. ## Install diff --git a/csvplugin.py b/csvplugin.py index ade9ede..1a6572a 100644 --- a/csvplugin.py +++ b/csvplugin.py @@ -307,24 +307,20 @@ def ParseRow(self, row): while char_index < len(row): char = row[char_index] - if char_index < len(row) - 1: - next_char = row[char_index + 1] - else: - next_char = None - - if char == '"' and next_char == '"': - if self.auto_quote: - currentword += '"' - else: - currentword += '""' - char_index += 2 - continue - if insidequotes: if char == '"': + if char_index < len(row) - 1 and row[char_index + 1] == '"': + if self.auto_quote: + currentword += '"' + else: + currentword += '""' + char_index += 2 + continue + insidequotes = False if not self.auto_quote: currentword += char + else: currentword += char diff --git a/quote.csv b/quote.csv new file mode 100644 index 0000000..d6f2023 --- /dev/null +++ b/quote.csv @@ -0,0 +1,4 @@ +a,"b """,456 +,b +123abc,"45,6" +"","","" \ No newline at end of file