diff --git a/src/Daqifi.Core.Tests/Logging/Export/CsvExporterTests.cs b/src/Daqifi.Core.Tests/Logging/Export/CsvExporterTests.cs index 469a381..7101f1b 100644 --- a/src/Daqifi.Core.Tests/Logging/Export/CsvExporterTests.cs +++ b/src/Daqifi.Core.Tests/Logging/Export/CsvExporterTests.cs @@ -136,15 +136,20 @@ public async Task Export_RelativeTime_SubSecondPrecision_ThreeDecimalPlaces() // ── Invalid ticks fallback ─────────────────────────────────────────────── [Fact] - public async Task Export_ZeroTicks_WritesInvalidToken() + public async Task Export_ZeroTicks_FormatsAsDateTimeMinValue() { + // ticks==0 is DateTime.MinValue (0001-01-01 00:00:00) — a legal + // DateTime value. Pre-fix, FormatTimestamp's `ticks <= 0` check + // rejected it as INVALID; post-fix, only negative ticks are + // invalid and the formatter renders the absolute timestamp. var source = new InMemorySampleSource( [Ch1], [new SampleRow(0L, Ch1.Key, 1.0)]); var (lines, _) = await ExportToLinesAsync(source, new CsvExportOptions()); - Assert.StartsWith("INVALID(0),", lines[1]); + Assert.StartsWith("0001-01-01T00:00:00", lines[1]); + Assert.DoesNotContain("INVALID", lines[1]); } [Fact] @@ -173,15 +178,18 @@ public async Task Export_TicksBeyondMaxValue_WritesInvalidToken() } [Fact] - public async Task Export_RelativeTime_InvalidTicks_StillWritesInvalidToken() + public async Task Export_RelativeTime_NegativeTicks_StillWritesInvalidToken() { + // Use a genuinely invalid tick value (negative). Post-fix, + // ticks==0 is now valid and would format as "0.000" relative + // seconds, so the prior INVALID(0) expectation no longer holds. var source = new InMemorySampleSource( [Ch1], - [new SampleRow(0L, Ch1.Key, 1.0)]); + [new SampleRow(-1L, Ch1.Key, 1.0)]); var (lines, _) = await ExportToLinesAsync(source, new CsvExportOptions { UseRelativeTime = true }); - Assert.StartsWith("INVALID(0),", lines[1]); + Assert.StartsWith("INVALID(-1),", lines[1]); } // ── Timestamp bucketing ────────────────────────────────────────────────── @@ -528,4 +536,153 @@ public void ExporterTypes_DoNotReferenceEfCoreOrWindows() var _ = new CsvExporter(); Assert.NotNull(_); } + + // ── #191 progress finalization on no-op export ─────────────────────────── + + [Fact] + public async Task Export_NoChannels_StillReports100ProgressOnCompletion() + { + var source = new InMemorySampleSource([], []); + var report = new ListProgress(); + var sw = new StringWriter(); + + await new CsvExporter().ExportAsync(source, sw, new CsvExportOptions(), report); + + Assert.Contains(100, report.Reports); + } + + private sealed class ListProgress : IProgress + { + public List Reports { get; } = new(); + public void Report(T value) => Reports.Add(value); + } + + // ── #193 CSV header escaping ───────────────────────────────────────────── + + [Fact] + public async Task Export_ChannelNameContainingDelimiter_QuotesHeaderField() + { + var ch = new ChannelDescriptor("DevA", "SN001", "name,with,commas", ChannelType.Analog); + var source = new InMemorySampleSource([ch], [new SampleRow(T0, ch.Key, 1.0)]); + var (_, header) = await ExportToLinesAsync(source, new CsvExportOptions()); + Assert.Contains("\"DevA:SN001:name,with,commas\"", header); + } + + [Fact] + public async Task Export_ChannelNameContainingQuote_DoublesAndQuotesField() + { + var ch = new ChannelDescriptor("DevA", "SN001", "name\"with\"quote", ChannelType.Analog); + var source = new InMemorySampleSource([ch], [new SampleRow(T0, ch.Key, 1.0)]); + var (_, header) = await ExportToLinesAsync(source, new CsvExportOptions()); + Assert.Contains("\"DevA:SN001:name\"\"with\"\"quote\"", header); + } + + [Fact] + public async Task Export_DeviceNameStartingWithFormulaChar_GetsLeadingApostrophe() + { + // Channel keys starting with =/+/-/@ would be evaluated as + // formulas by Excel/LibreOffice/Sheets. The mitigation prefixes + // a literal ' to force text mode. + var ch = new ChannelDescriptor("=DevA", "SN001", "Channel1", ChannelType.Analog); + var source = new InMemorySampleSource([ch], [new SampleRow(T0, ch.Key, 1.0)]); + var (_, header) = await ExportToLinesAsync(source, new CsvExportOptions()); + Assert.Contains("'=DevA:SN001:Channel1", header); + } + + [Fact] + public async Task Export_WhitespacePrefixedFormulaChar_StillNeutralized() + { + // " =SUM(A1)" — leading whitespace bypasses a naive value[0] + // check but spreadsheets still interpret it as a formula. + var ch = new ChannelDescriptor(" =DevA", "SN001", "Channel1", ChannelType.Analog); + var source = new InMemorySampleSource([ch], [new SampleRow(T0, ch.Key, 1.0)]); + var (_, header) = await ExportToLinesAsync(source, new CsvExportOptions()); + Assert.Contains("' =DevA:SN001:Channel1", header); + } + + [Theory] + [InlineData("\u00A0")] // NBSP + [InlineData("\u2003")] // EM SPACE + public async Task Export_UnicodeWhitespacePrefixedFormulaChar_StillNeutralized(string whitespace) + { + // Trim-based formula-injection mitigations that only strip ' ' + // and '\t' miss CSV PoCs that prepend NBSP / EM SPACE / line + // separator before '='. char.IsWhiteSpace covers the full + // Unicode whitespace set so the leading apostrophe still lands. + var deviceName = whitespace + "=DevA"; + var ch = new ChannelDescriptor(deviceName, "SN001", "Channel1", ChannelType.Analog); + var source = new InMemorySampleSource([ch], [new SampleRow(T0, ch.Key, 1.0)]); + var (_, header) = await ExportToLinesAsync(source, new CsvExportOptions()); + Assert.Contains("'" + deviceName + ":SN001:Channel1", header); + } + + [Fact] + public async Task Export_LeadingTrailingWhitespaceInDeviceName_QuotesField() + { + // Excel, Google Sheets, and pandas trim unquoted leading/trailing + // whitespace in CSV fields; quoting preserves the exact value + // through round-trip parsing. + var deviceName = " DevA "; + var ch = new ChannelDescriptor(deviceName, "SN001", "Channel1", ChannelType.Analog); + var source = new InMemorySampleSource([ch], [new SampleRow(T0, ch.Key, 1.0)]); + var (_, header) = await ExportToLinesAsync(source, new CsvExportOptions()); + Assert.Contains("\" DevA :SN001:Channel1\"", header); + } + + // ── #193 data-row escaping (timestamps + values) ───────────────────────── + + [Fact] + public async Task Export_ColonDelimiter_QuotesIsoTimestamp() + { + // ISO 8601 absolute timestamps inherently contain ':'. With ':' + // chosen as the delimiter, the timestamp field must be RFC 4180 + // quoted so it stays a single CSV field. + var source = new InMemorySampleSource([Ch1], [new SampleRow(T0, Ch1.Key, 1.0)]); + var (lines, _) = await ExportToLinesAsync(source, new CsvExportOptions { Delimiter = ":" }); + // Body row: "2024-...":1 + Assert.StartsWith("\"", lines[1]); + Assert.Contains("\":", lines[1]); + } + + [Fact] + public async Task Export_DotDelimiter_QuotesRelativeTimestampAndValue() + { + var source = new InMemorySampleSource( + [Ch1], + [new SampleRow(T0, Ch1.Key, 0.5), new SampleRow(T0 + TimeSpan.TicksPerSecond, Ch1.Key, 1.5)]); + var (lines, _) = await ExportToLinesAsync( + source, new CsvExportOptions { Delimiter = ".", UseRelativeTime = true }); + // Both relative timestamps and float values contain '.' so both + // get quoted under the '.' delimiter. + Assert.Equal("\"0.000\".\"0.5\"", lines[1]); + Assert.Equal("\"1.000\".\"1.5\"", lines[2]); + } + + [Fact] + public async Task Export_NegativeValue_NotApostrophePrefixed() + { + // Regression: data fields use formulaSafe=false so negative + // numbers (whose leading '-' is a sign, not a formula char) + // aren't clobbered into "'-1.5". + var source = new InMemorySampleSource( + [Ch1], + [new SampleRow(T0, Ch1.Key, -1.5)]); + var (lines, _) = await ExportToLinesAsync(source, new CsvExportOptions { UseRelativeTime = true }); + Assert.Equal("0.000,-1.5", lines[1]); + } + + // ── #193 delimiter validation ──────────────────────────────────────────── + + [Theory] + [InlineData("")] + [InlineData(",,")] + [InlineData("\n")] + [InlineData("\r")] + [InlineData("\"")] + public async Task Export_InvalidDelimiter_ThrowsArgumentException(string bad) + { + var source = new InMemorySampleSource([Ch1], [new SampleRow(T0, Ch1.Key, 1.0)]); + await Assert.ThrowsAsync(async () => + await ExportToLinesAsync(source, new CsvExportOptions { Delimiter = bad })); + } } diff --git a/src/Daqifi.Core/Logging/Export/CsvExporter.cs b/src/Daqifi.Core/Logging/Export/CsvExporter.cs index 9b2d4fd..893b151 100644 --- a/src/Daqifi.Core/Logging/Export/CsvExporter.cs +++ b/src/Daqifi.Core/Logging/Export/CsvExporter.cs @@ -1,4 +1,5 @@ using System.Globalization; +using System.Linq; using System.Text; namespace Daqifi.Core.Logging.Export; @@ -37,11 +38,52 @@ public async Task ExportAsync( options.AverageWindow.Value, $"{nameof(CsvExportOptions.AverageWindow)} must be greater than zero."); + // The double-quote is reserved as the RFC 4180 quoting character used by + // EscapeCsvField, so allowing it as the delimiter would produce ambiguous, + // unparseable output. Newlines would split fields across rows. Multi-char + // / empty delimiters can't be handled by single-character splitting either. + if (string.IsNullOrEmpty(options.Delimiter) + || options.Delimiter.Length != 1 + || options.Delimiter == "\"" + || options.Delimiter == "\r" + || options.Delimiter == "\n") + { + // Format the bad value as code points so a stray '\r' or '\n' + // in the delimiter doesn't break the exception message into + // multiple log lines. Enumerate ALL chars (not just [0]) so + // multi-character delimiters like ",," aren't misreported as a + // single character. + string got; + if (options.Delimiter == null) + { + got = "null"; + } + else if (options.Delimiter.Length == 0) + { + got = "empty"; + } + else + { + var codePoints = string.Join( + " ", + options.Delimiter.Select(c => $"U+{(int)c:X4}")); + got = $"len={options.Delimiter.Length} [{codePoints}]"; + } + throw new ArgumentException( + $"Delimiter must be a single character that is not a newline or double-quote (got {got}).", + $"{nameof(options)}.{nameof(CsvExportOptions.Delimiter)}"); + } + cancellationToken.ThrowIfCancellationRequested(); var channels = source.GetChannels(); if (channels.Count == 0) + { + // Always finalize progress so callers (e.g. UI progress bars) don't + // stall at <100% when the export is a no-op. + progress?.Report(100); return; + } var channelKeys = channels.Select(c => c.Key).ToList(); @@ -60,11 +102,11 @@ public async Task ExportAsync( private static async Task WriteHeaderAsync(TextWriter writer, List channelKeys, CsvExportOptions options) { var timeHeader = options.UseRelativeTime ? "Relative Time (s)" : "Time"; - await writer.WriteAsync(timeHeader); + await writer.WriteAsync(EscapeCsvField(timeHeader, options.Delimiter)); foreach (var key in channelKeys) { await writer.WriteAsync(options.Delimiter); - await writer.WriteAsync(key); + await writer.WriteAsync(EscapeCsvField(key, options.Delimiter)); } await writer.WriteLineAsync(); } @@ -116,7 +158,12 @@ private static async Task WriteTimestampRowAsync( { var ticks = bucket[0].TimestampTicks; sb.Clear(); - sb.Append(FormatTimestamp(ticks, firstTicks, options.UseRelativeTime)); + // Data fields use formulaSafe=false: timestamps and numeric values are + // internally generated; their leading '-' is a sign on negative numbers, + // not a formula char. + sb.Append(EscapeCsvField( + FormatTimestamp(ticks, firstTicks, options.UseRelativeTime), + options.Delimiter, formulaSafe: false)); var lookup = new Dictionary(bucket.Count); foreach (var row in bucket) @@ -126,7 +173,9 @@ private static async Task WriteTimestampRowAsync( { sb.Append(options.Delimiter); if (lookup.TryGetValue(key, out var value)) - sb.Append(value.ToString("G", CultureInfo.InvariantCulture)); + sb.Append(EscapeCsvField( + value.ToString("G", CultureInfo.InvariantCulture), + options.Delimiter, formulaSafe: false)); } sb.AppendLine(); @@ -198,13 +247,17 @@ private static async Task WriteAveragedRowAsync( CsvExportOptions options) { sb.Clear(); - sb.Append(FormatTimestamp(lastTick, firstTicks, options.UseRelativeTime)); + sb.Append(EscapeCsvField( + FormatTimestamp(lastTick, firstTicks, options.UseRelativeTime), + options.Delimiter, formulaSafe: false)); foreach (var key in channelKeys) { sb.Append(options.Delimiter); if (counts[key] > 0) - sb.Append((totals[key] / counts[key]).ToString("G", CultureInfo.InvariantCulture)); + sb.Append(EscapeCsvField( + (totals[key] / counts[key]).ToString("G", CultureInfo.InvariantCulture), + options.Delimiter, formulaSafe: false)); } sb.AppendLine(); @@ -214,11 +267,12 @@ private static async Task WriteAveragedRowAsync( /// /// Formats a tick value as an absolute ISO 8601 string or relative seconds string. /// Ticks that are out of the valid range are rendered as INVALID({ticks}) - /// in both modes. + /// in both modes. ticks==0 (DateTime.MinValue, 0001-01-01 00:00:00) is a legal + /// value and IS rendered through the formatter; only negative ticks are invalid. /// private static string FormatTimestamp(long ticks, long firstTicks, bool useRelativeTime) { - if (ticks <= 0 || ticks > DateTime.MaxValue.Ticks) + if (ticks < 0 || ticks > DateTime.MaxValue.Ticks) return $"INVALID({ticks})"; if (useRelativeTime) @@ -227,6 +281,70 @@ private static string FormatTimestamp(long ticks, long firstTicks, bool useRelat return new DateTime(ticks).ToString("O"); } + /// + /// RFC 4180 quoting + optional spreadsheet formula-injection neutralization. + /// + /// The field value to escape. + /// The current CSV delimiter (single character, validated by caller). + /// + /// When true (default — header fields where channel names are user-controlled), + /// prefix a literal ' on values whose first non-whitespace character is + /// =, +, -, or @ so spreadsheet apps don't evaluate + /// the field as a formula. When false (data fields — internally generated + /// timestamps and numeric values), formula mitigation is skipped so legitimate + /// negative numbers like -1.23 aren't clobbered into '-1.23. + /// + /// The escaped field, ready to write between delimiters. + private static string EscapeCsvField(string value, string delimiter, bool formulaSafe = true) + { + if (formulaSafe && !string.IsNullOrEmpty(value)) + { + // Skip ALL Unicode whitespace, not just ' ' and '\t'. CSV + // formula-injection PoCs use NBSP (U+00A0), thin spaces, line + // separator (U+2028), etc. before '=' to evade trim-based + // checks; spreadsheets still treat the resulting cell as a + // formula. char.IsWhiteSpace covers the full Unicode set. + var i = 0; + while (i < value.Length && char.IsWhiteSpace(value[i])) + i++; + if (i < value.Length && "=+-@".IndexOf(value[i]) >= 0) + { + value = "'" + value; + } + } + + var delimChar = delimiter[0]; + var mustQuote = false; + + // Quote fields with leading or trailing whitespace — many CSV + // parsers (Excel, Google Sheets, pandas with default options) trim + // unquoted whitespace and silently lose it. Quoting preserves the + // exact value through round-trip. + if (value.Length > 0 && (char.IsWhiteSpace(value[0]) || char.IsWhiteSpace(value[^1]))) + { + mustQuote = true; + } + + if (!mustQuote) + { + for (var i = 0; i < value.Length; i++) + { + var c = value[i]; + if (c == delimChar || c == '"' || c == '\r' || c == '\n') + { + mustQuote = true; + break; + } + } + } + + if (mustQuote) + { + return "\"" + value.Replace("\"", "\"\"") + "\""; + } + return value; + } + private static void ReportProgress(IProgress? progress, int processed, int total) { if (progress == null || total <= 0)