Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
207 changes: 206 additions & 1 deletion src/Daqifi.Core.Tests/Firmware/FirmwareUpdateServiceTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -730,6 +730,197 @@ public async Task CheckWifiFirmwareStatusAsync_ReentrantCallFromUpdateProgressCa
Assert.Equal(FirmwareUpdateState.Complete, service.CurrentState);
}

[Fact]
public async Task CheckWifiFirmwareStatusAsync_WhenLanChipInfoFailsTransiently_RetriesUntilSuccess()
{
// Closes #144: post-PIC32-reboot the WiFi subsystem can lag the
// application by a few seconds, so the first chip-info query
// transiently fails. Without retry, the WiFi version decision
// would short-circuit and trigger an unnecessary multi-minute
// reflash. The retry budget covers the startup window.
var wifiRelease = new FirmwareReleaseInfo
{
Version = new FirmwareVersion(19, 5, 4, null, 0),
TagName = "19.5.4",
IsPreRelease = false
};
var device = new FakeLanChipInfoStreamingDevice(
"COM14",
chipInfo: new LanChipInfo
{
ChipId = 1234,
FwVersion = "19.5.4",
BuildDate = "Jan 8 2019"
},
transientFailuresBeforeSuccess: 2);

var options = CreateFastOptions();
options.LanChipInfoMaxAttempts = 3;
options.LanChipInfoRetryDelay = TimeSpan.FromMilliseconds(5); // Keep test fast

var service = new FirmwareUpdateService(
new FakeHidTransport(),
new FakeFirmwareDownloadService { LatestWifiRelease = wifiRelease },
new FakeExternalProcessRunner(),
NullLogger<FirmwareUpdateService>.Instance,
new FakeBootloaderProtocol([[0x10]]),
new FakeHidDeviceEnumerator([]),
options);

var status = await service.CheckWifiFirmwareStatusAsync(device);

Assert.Equal(3, device.GetLanChipInfoCallCount);
Assert.True(status.IsUpToDate);
Assert.Equal(WifiFirmwareStatusReason.UpToDate, status.Reason);
}

[Fact]
public async Task CheckWifiFirmwareStatusAsync_WhenLanChipInfoFailsAllAttempts_ReturnsChipInfoUnavailable()
{
// After exhausting LanChipInfoMaxAttempts the planning method must
// fall through to ChipInfoUnavailable, NOT hang or surface the
// raw exception. This preserves the "couldn't check, default to
// running update" semantics for genuinely-broken devices.
var device = new FakeLanChipInfoStreamingDevice(
"COM15",
chipInfo: new LanChipInfo
{
ChipId = 1234,
FwVersion = "19.5.4",
BuildDate = "Jan 8 2019"
},
transientFailuresBeforeSuccess: 99);

var options = CreateFastOptions();
options.LanChipInfoMaxAttempts = 3;
options.LanChipInfoRetryDelay = TimeSpan.FromMilliseconds(5);

var service = new FirmwareUpdateService(
new FakeHidTransport(),
new FakeFirmwareDownloadService(),
new FakeExternalProcessRunner(),
NullLogger<FirmwareUpdateService>.Instance,
new FakeBootloaderProtocol([[0x10]]),
new FakeHidDeviceEnumerator([]),
options);

var status = await service.CheckWifiFirmwareStatusAsync(device);

Assert.Equal(3, device.GetLanChipInfoCallCount);
Assert.False(status.IsUpToDate);
Assert.Equal(WifiFirmwareStatusReason.ChipInfoUnavailable, status.Reason);
}

[Fact]
public async Task CheckWifiFirmwareStatusAsync_TotalTimeoutHit_ShortCircuitsToChipInfoUnavailable()
{
// Closes a Qodo follow-up on PR #199: per-attempt query timeouts
// compound with retry delays, so a high MaxAttempts × non-trivial
// per-attempt latency could block far beyond the configured retry
// budget while holding _operationLock. The total-timeout cap caps
// wall-clock time independent of attempt counts.
//
// The fake's per-attempt latency is the Task.Delay below; with
// 200ms latency × 10 max attempts × 100ms retry delay, a naive
// implementation would block ~2.9s. The 100ms total timeout
// forces an early ChipInfoUnavailable return after the first
// attempt's latency exceeds the budget.
var device = new SlowFakeLanChipInfoStreamingDevice(
"COM17",
attemptLatency: TimeSpan.FromMilliseconds(200));

var options = CreateFastOptions();
options.LanChipInfoMaxAttempts = 10;
options.LanChipInfoRetryDelay = TimeSpan.FromMilliseconds(100);
options.LanChipInfoTotalTimeout = TimeSpan.FromMilliseconds(100);

var service = new FirmwareUpdateService(
new FakeHidTransport(),
new FakeFirmwareDownloadService(),
new FakeExternalProcessRunner(),
NullLogger<FirmwareUpdateService>.Instance,
new FakeBootloaderProtocol([[0x10]]),
new FakeHidDeviceEnumerator([]),
options);

var stopwatch = System.Diagnostics.Stopwatch.StartNew();
var status = await service.CheckWifiFirmwareStatusAsync(device);
stopwatch.Stop();

Assert.False(status.IsUpToDate);
Assert.Equal(WifiFirmwareStatusReason.ChipInfoUnavailable, status.Reason);
// Should bail well before attempting all 10 × (200ms + 100ms) = 3s.
// Allowing 1500ms for CI variance / xunit overhead.
Assert.True(stopwatch.Elapsed < TimeSpan.FromMilliseconds(1500),
$"Probe took {stopwatch.ElapsedMilliseconds}ms, expected <1500ms with TotalTimeout=100ms.");
}

private sealed class SlowFakeLanChipInfoStreamingDevice : IStreamingDevice, ILanChipInfoProvider
{
private readonly TimeSpan _attemptLatency;
public SlowFakeLanChipInfoStreamingDevice(string name, TimeSpan attemptLatency)
{
Name = name;
_attemptLatency = attemptLatency;
IsConnected = true;
}
public string Name { get; }
public IPAddress? IpAddress => null;
public bool IsConnected { get; private set; }
public ConnectionStatus Status => ConnectionStatus.Connected;
public int StreamingFrequency { get; set; }
public bool IsStreaming { get; private set; }
public event EventHandler<DeviceStatusEventArgs>? StatusChanged { add { } remove { } }
public event EventHandler<MessageReceivedEventArgs>? MessageReceived { add { } remove { } }
public void Connect() => IsConnected = true;
public void Disconnect() => IsConnected = false;
public void Send<T>(IOutboundMessage<T> message) { }
public void StartStreaming() => IsStreaming = true;
public void StopStreaming() => IsStreaming = false;
public async Task<LanChipInfo?> GetLanChipInfoAsync(CancellationToken cancellationToken = default)
{
await Task.Delay(_attemptLatency, cancellationToken).ConfigureAwait(false);
return null;
}
}

[Fact]
public async Task CheckWifiFirmwareStatusAsync_FirstAttemptSucceeds_DoesNotRetry()
{
// Steady-state path: no retry overhead when the first call works.
var wifiRelease = new FirmwareReleaseInfo
{
Version = new FirmwareVersion(19, 5, 4, null, 0),
TagName = "19.5.4",
IsPreRelease = false
};
var device = new FakeLanChipInfoStreamingDevice(
"COM16",
chipInfo: new LanChipInfo
{
ChipId = 1234,
FwVersion = "19.5.4",
BuildDate = "Jan 8 2019"
});

var options = CreateFastOptions();
options.LanChipInfoMaxAttempts = 3;
options.LanChipInfoRetryDelay = TimeSpan.FromMilliseconds(5);

var service = new FirmwareUpdateService(
new FakeHidTransport(),
new FakeFirmwareDownloadService { LatestWifiRelease = wifiRelease },
new FakeExternalProcessRunner(),
NullLogger<FirmwareUpdateService>.Instance,
new FakeBootloaderProtocol([[0x10]]),
new FakeHidDeviceEnumerator([]),
options);

await service.CheckWifiFirmwareStatusAsync(device);

Assert.Equal(1, device.GetLanChipInfoCallCount);
}

private sealed class SyncProgress<T> : IProgress<T>
{
private readonly Action<T> _handler;
Expand Down Expand Up @@ -848,14 +1039,18 @@ private sealed class FakeLanChipInfoStreamingDevice : IStreamingDevice, ILanChip
{
private readonly LanChipInfo? _chipInfo;
private ConnectionStatus _status = ConnectionStatus.Connected;
private int _remainingTransientFailures;

public FakeLanChipInfoStreamingDevice(string name, LanChipInfo? chipInfo)
public FakeLanChipInfoStreamingDevice(string name, LanChipInfo? chipInfo, int transientFailuresBeforeSuccess = 0)
{
Name = name;
_chipInfo = chipInfo;
_remainingTransientFailures = transientFailuresBeforeSuccess;
IsConnected = true;
}

public int GetLanChipInfoCallCount { get; private set; }

public string Name { get; }
public IPAddress? IpAddress => null;
public bool IsConnected { get; private set; }
Expand Down Expand Up @@ -899,6 +1094,16 @@ public void Send<T>(IOutboundMessage<T> message)

public Task<LanChipInfo?> GetLanChipInfoAsync(CancellationToken cancellationToken = default)
{
GetLanChipInfoCallCount++;
if (_remainingTransientFailures > 0)
{
_remainingTransientFailures--;
// Faulted task (not sync throw) more accurately simulates how
// a real async method surfaces failure — caller's await sees a
// genuinely-async exception path.
return Task.FromException<LanChipInfo?>(
new InvalidOperationException("Simulated transient post-reboot failure."));
}
return Task.FromResult(_chipInfo);
}
}
Expand Down
120 changes: 105 additions & 15 deletions src/Daqifi.Core/Firmware/FirmwareUpdateService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -536,21 +536,15 @@ private async Task<WifiFirmwareStatus> CheckWifiFirmwareStatusCoreAsync(
};
}

LanChipInfo? chipInfo;
try
{
chipInfo = await lanChipInfoProvider.GetLanChipInfoAsync(cancellationToken).ConfigureAwait(false);
}
catch (Exception ex) when (ex is not OperationCanceledException)
{
_logger.LogDebug(ex, "Failed to query LAN chip info; reporting status as ChipInfoUnavailable.");
return new WifiFirmwareStatus
{
IsUpToDate = false,
Reason = WifiFirmwareStatusReason.ChipInfoUnavailable,
};
}

// Bounded retry for the LAN chip-info probe (closes #144). Right
// after a PIC32 firmware update the application is up while WiFi
// is still finishing startup, so the first chip-info query can
// transiently fail; without retry, the WiFi version decision
// would short-circuit to ChipInfoUnavailable and flow on to a
// multi-minute reflash of already-current WiFi firmware. The
// retry budget is bounded (LanChipInfoMaxAttempts × RetryDelay)
// and observes cancellation between attempts.
var chipInfo = await TryGetLanChipInfoWithRetryAsync(lanChipInfoProvider, cancellationToken).ConfigureAwait(false);
if (chipInfo == null)
{
return new WifiFirmwareStatus
Expand Down Expand Up @@ -613,6 +607,102 @@ private async Task<WifiFirmwareStatus> CheckWifiFirmwareStatusCoreAsync(
};
}

private async Task<LanChipInfo?> TryGetLanChipInfoWithRetryAsync(
ILanChipInfoProvider lanChipInfoProvider,
CancellationToken cancellationToken)
{
var maxAttempts = Math.Max(1, _options.LanChipInfoMaxAttempts);
var retryDelay = _options.LanChipInfoRetryDelay;
var totalTimeout = _options.LanChipInfoTotalTimeout;

// Wall-clock budget guards against the pathological case where
// attempt-count × per-attempt-timeout + retry-delay sum vastly
// exceeds the configured retry budget (e.g., 3 × 2s device timeout
// + 2 × 2s delay = ~10s while _operationLock is held). Linking
// the caller's CT preserves cancellation semantics; the timeout
// CTS just adds a deadline.
using var timeoutCts = new CancellationTokenSource(totalTimeout);
using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(
cancellationToken, timeoutCts.Token);
var linkedToken = linkedCts.Token;

for (var attempt = 1; attempt <= maxAttempts; attempt++)
{
try
{
linkedToken.ThrowIfCancellationRequested();
}
catch (OperationCanceledException) when (timeoutCts.IsCancellationRequested && !cancellationToken.IsCancellationRequested)
{
_logger.LogDebug(
"LAN chip-info probe hit total timeout ({Timeout}) before attempt {Attempt}/{Max}.",
totalTimeout,
attempt,
maxAttempts);
return null;
}

try
{
var chipInfo = await lanChipInfoProvider.GetLanChipInfoAsync(linkedToken).ConfigureAwait(false);
if (chipInfo != null)
{
if (attempt > 1)
{
_logger.LogDebug(
"LAN chip-info query succeeded on attempt {Attempt}/{Max}.",
attempt,
maxAttempts);
}
return chipInfo;
}
_logger.LogDebug(
"LAN chip-info query returned null on attempt {Attempt}/{Max}.",
attempt,
maxAttempts);
}
catch (OperationCanceledException) when (timeoutCts.IsCancellationRequested && !cancellationToken.IsCancellationRequested)
{
_logger.LogDebug(
"LAN chip-info probe hit total timeout ({Timeout}) during attempt {Attempt}/{Max}.",
totalTimeout,
attempt,
maxAttempts);
return null;
}
catch (Exception ex) when (ex is not OperationCanceledException)
{
_logger.LogDebug(
ex,
"LAN chip-info query failed on attempt {Attempt}/{Max}.",
attempt,
maxAttempts);
}

if (attempt < maxAttempts)
{
try
{
await Task.Delay(retryDelay, linkedToken).ConfigureAwait(false);
}
catch (OperationCanceledException) when (timeoutCts.IsCancellationRequested && !cancellationToken.IsCancellationRequested)
{
_logger.LogDebug(
"LAN chip-info probe hit total timeout ({Timeout}) during retry delay after attempt {Attempt}/{Max}.",
totalTimeout,
attempt,
maxAttempts);
return null;
}
}
}

_logger.LogDebug(
"LAN chip-info query exhausted {Max} attempts; reporting status as ChipInfoUnavailable.",
maxAttempts);
return null;
}

private ExternalProcessRequest BuildWifiProcessRequest(
IStreamingDevice device,
string firmwarePath,
Expand Down
Loading