Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
157 changes: 157 additions & 0 deletions src/Daqifi.Core.Tests/Firmware/FirmwareUpdateServiceTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,163 @@ public async Task UpdateWifiModuleAsync_WhenDeviceDoesNotSupportLanQuery_Proceed
Assert.Contains("SYSTem:COMMUnicate:LAN:FWUpdate", device.SentCommands);
}

[Fact]
public async Task UpdateFirmwareAsync_PostReconnectReadinessProbe_AwaitedBeforeComplete()
{
// Closes #145: serial reconnect succeeds before the application
// firmware is ready to answer protobuf status queries. The
// optional readiness probe gives Core a way to wait for true
// application readiness — when set, no caller needs to
// reimplement that retry loop.
//
// The probe here returns false on the first 2 attempts and true
// on the 3rd, simulating a slow PIC32 application boot. Without
// the wait, the update would Complete immediately after serial
// reopens; with the wait, Complete is held back until the probe
// succeeds.
var device = new FakeStreamingDevice("COM3");
var hidTransport = new FakeHidTransport();
hidTransport.EnqueueRead([0x01, 0x10]);
hidTransport.EnqueueRead([0x01, 0x02]);
hidTransport.EnqueueRead([0x01, 0x03]);
hidTransport.EnqueueRead([0x01, 0x03]);
hidTransport.EnqueueRead([0x01, 0x10]);

var enumerator = new FakeHidDeviceEnumerator([
Array.Empty<HidDeviceInfo>(),
[new HidDeviceInfo(0x04D8, 0x003C, "path-1", "SN-1", "DAQiFi Bootloader")]
]);

var probeCallCount = 0;
var options = CreateFastOptions();
options.PostReconnectReadinessProbe = (_, _) =>
{
probeCallCount++;
return Task.FromResult(probeCallCount >= 3);
};
options.PostReconnectReadinessTimeout = TimeSpan.FromSeconds(2);
options.PostReconnectReadinessRetryDelay = TimeSpan.FromMilliseconds(10);

var service = new FirmwareUpdateService(
hidTransport,
new FakeFirmwareDownloadService(),
new FakeExternalProcessRunner(),
NullLogger<FirmwareUpdateService>.Instance,
new FakeBootloaderProtocol([[0xA1, 0x01], [0xA1, 0x02]]),
enumerator,
options);

var hexPath = CreateTempFile();
try
{
await service.UpdateFirmwareAsync(device, hexPath);
}
finally
{
File.Delete(hexPath);
}

Assert.Equal(FirmwareUpdateState.Complete, service.CurrentState);
Assert.Equal(3, probeCallCount);
}

[Fact]
public async Task UpdateFirmwareAsync_PostReconnectReadinessProbe_TimeoutFailsTheUpdate()
{
// When the application never becomes ready within the configured
// budget, the update must transition to Failed with a clear
// timeout message — NOT silently complete and hand back a
// half-ready device. That's the entire reason for #145.
var device = new FakeStreamingDevice("COM3");
var hidTransport = new FakeHidTransport();
hidTransport.EnqueueRead([0x01, 0x10]);
hidTransport.EnqueueRead([0x01, 0x02]);
hidTransport.EnqueueRead([0x01, 0x03]);
hidTransport.EnqueueRead([0x01, 0x03]);
hidTransport.EnqueueRead([0x01, 0x10]);

var enumerator = new FakeHidDeviceEnumerator([
Array.Empty<HidDeviceInfo>(),
[new HidDeviceInfo(0x04D8, 0x003C, "path-1", "SN-1", "DAQiFi Bootloader")]
]);

var options = CreateFastOptions();
// Probe always returns false → never ready → must time out
options.PostReconnectReadinessProbe = (_, _) => Task.FromResult(false);
options.PostReconnectReadinessTimeout = TimeSpan.FromMilliseconds(150);
options.PostReconnectReadinessRetryDelay = TimeSpan.FromMilliseconds(10);

var service = new FirmwareUpdateService(
hidTransport,
new FakeFirmwareDownloadService(),
new FakeExternalProcessRunner(),
NullLogger<FirmwareUpdateService>.Instance,
new FakeBootloaderProtocol([[0xA1, 0x01], [0xA1, 0x02]]),
enumerator,
options);

var hexPath = CreateTempFile();
FirmwareUpdateException ex;
try
{
ex = await Assert.ThrowsAsync<FirmwareUpdateException>(
() => service.UpdateFirmwareAsync(device, hexPath));
}
finally
{
File.Delete(hexPath);
}

Assert.Equal(FirmwareUpdateState.Failed, service.CurrentState);
Assert.Equal(FirmwareUpdateState.JumpingToApp, ex.FailedState);
var inner = Assert.IsType<TimeoutException>(ex.InnerException);
Assert.Contains("application-ready", inner.Message);
}

[Fact]
public async Task UpdateFirmwareAsync_NoPostReconnectProbe_PreservesLegacyBehavior()
{
// No probe configured == legacy behavior: Complete fires as
// soon as serial reopens. Belt-and-suspenders test that the
// new code path is fully opt-in.
var device = new FakeStreamingDevice("COM3");
var hidTransport = new FakeHidTransport();
hidTransport.EnqueueRead([0x01, 0x10]);
hidTransport.EnqueueRead([0x01, 0x02]);
hidTransport.EnqueueRead([0x01, 0x03]);
hidTransport.EnqueueRead([0x01, 0x03]);
hidTransport.EnqueueRead([0x01, 0x10]);

var enumerator = new FakeHidDeviceEnumerator([
Array.Empty<HidDeviceInfo>(),
[new HidDeviceInfo(0x04D8, 0x003C, "path-1", "SN-1", "DAQiFi Bootloader")]
]);

var options = CreateFastOptions();
// No PostReconnectReadinessProbe set - legacy path

var service = new FirmwareUpdateService(
hidTransport,
new FakeFirmwareDownloadService(),
new FakeExternalProcessRunner(),
NullLogger<FirmwareUpdateService>.Instance,
new FakeBootloaderProtocol([[0xA1, 0x01], [0xA1, 0x02]]),
enumerator,
options);

var hexPath = CreateTempFile();
try
{
await service.UpdateFirmwareAsync(device, hexPath);
}
finally
{
File.Delete(hexPath);
}

Assert.Equal(FirmwareUpdateState.Complete, service.CurrentState);
}

private static FirmwareUpdateServiceOptions CreateFastOptions()
{
return new FirmwareUpdateServiceOptions
Expand Down
99 changes: 99 additions & 0 deletions src/Daqifi.Core/Firmware/FirmwareUpdateService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -806,6 +806,105 @@ await _hidTransport

await SafeDisconnectHidAsync().ConfigureAwait(false);
await WaitForSerialReconnectAsync(device, cancellationToken).ConfigureAwait(false);

// Application-readiness probe (closes #145). Serial transport
// re-enumeration succeeds well before the PIC32 application
// firmware is actually ready to answer protobuf status queries;
// if a downstream flow (LAN chip info, WiFi prep) starts before
// the app is up, those queries fail and callers reimplement
// their own retry. The probe is opt-in via options — when null,
// the legacy "serial reopened == done" semantics apply.
if (_options.PostReconnectReadinessProbe is { } probe)
{
await WaitForApplicationReadyAsync(device, probe, cancellationToken).ConfigureAwait(false);
}
Comment on lines +817 to +820
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Action required

1. Readiness probe is optional 📎 Requirement gap ☼ Reliability

FirmwareUpdateService only waits for application readiness when PostReconnectReadinessProbe is
set; otherwise it proceeds immediately after serial reconnect, which can still return a half-started
device. This keeps the failure mode the checklist is trying to eliminate and can still force
downstream integrations to add their own readiness polling.
Agent Prompt
## Issue description
The post-reconnect readiness wait is currently opt-in (`PostReconnectReadinessProbe` is nullable and gated), so the firmware update flow can still proceed immediately after reconnect without confirming the application is ready.

## Issue Context
Checklist IDs 1 and 2 require Core’s firmware update lifecycle to handle post-reconnect application readiness so reconnect completion isn’t based solely on transport reopening, and downstream callers don’t need their own readiness loops.

## Fix Focus Areas
- src/Daqifi.Core/Firmware/FirmwareUpdateService.cs[810-820]
- src/Daqifi.Core/Firmware/FirmwareUpdateServiceOptions.cs[124-150]

ⓘ Copy this prompt and use it to remediate the issue with your preferred AI generation tools

}

private async Task WaitForApplicationReadyAsync(
IStreamingDevice device,
Func<IStreamingDevice, CancellationToken, Task<bool>> probe,
CancellationToken cancellationToken)
{
var totalTimeout = _options.PostReconnectReadinessTimeout;
var retryDelay = _options.PostReconnectReadinessRetryDelay;

using var timeoutCts = new CancellationTokenSource(totalTimeout);
using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(
cancellationToken, timeoutCts.Token);
var linkedToken = linkedCts.Token;

var attempt = 0;
while (true)
{
attempt++;
try
{
linkedToken.ThrowIfCancellationRequested();
}
catch (OperationCanceledException) when (timeoutCts.IsCancellationRequested && !cancellationToken.IsCancellationRequested)
{
throw new TimeoutException(
$"Device did not become application-ready within {totalTimeout} after PIC32 reconnect (attempt {attempt}). " +
"The serial transport reopened but the readiness probe never returned true; the device may still be initializing or the firmware may have failed to start.");
}

try
{
var isReady = await probe(device, linkedToken).ConfigureAwait(false);

// Re-check the budget after the await: a probe that ignores
// its CancellationToken could otherwise return true after
// the timeout has elapsed and bypass the budget. Caller's
// CT propagates as OperationCanceledException out of this
// method (caught at the same handler below); only the
// timeout-CT case is reinterpreted as TimeoutException.
try
{
linkedToken.ThrowIfCancellationRequested();
}
catch (OperationCanceledException) when (timeoutCts.IsCancellationRequested && !cancellationToken.IsCancellationRequested)
{
throw new TimeoutException(
$"Device did not become application-ready within {totalTimeout} after PIC32 reconnect (attempt {attempt}). " +
"The readiness probe ignored cancellation and returned after the deadline.");
}

if (isReady)
{
if (attempt > 1)
{
_logger.LogDebug(
"Device became application-ready on probe attempt {Attempt}.",
attempt);
}
return;
}
_logger.LogDebug("Application-ready probe returned false on attempt {Attempt}; will retry.", attempt);
}
catch (OperationCanceledException) when (timeoutCts.IsCancellationRequested && !cancellationToken.IsCancellationRequested)
{
throw new TimeoutException(
$"Device did not become application-ready within {totalTimeout} after PIC32 reconnect (attempt {attempt}). " +
"The readiness probe was canceled by the timeout while running.");
}
catch (Exception ex) when (ex is not OperationCanceledException)
{
_logger.LogDebug(
ex,
"Application-ready probe threw on attempt {Attempt}; treating as not-ready and retrying.",
attempt);
}

try
{
await Task.Delay(retryDelay, linkedToken).ConfigureAwait(false);
}
catch (OperationCanceledException) when (timeoutCts.IsCancellationRequested && !cancellationToken.IsCancellationRequested)
{
throw new TimeoutException(
$"Device did not become application-ready within {totalTimeout} after PIC32 reconnect (attempt {attempt}).");
}
}
}

private async Task WaitForSerialReconnectAsync(
Expand Down
32 changes: 32 additions & 0 deletions src/Daqifi.Core/Firmware/FirmwareUpdateServiceOptions.cs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
using Daqifi.Core.Device;

namespace Daqifi.Core.Firmware;

/// <summary>
Expand Down Expand Up @@ -119,6 +121,34 @@ public sealed class FirmwareUpdateServiceOptions
/// </summary>
public string? WifiPortOverride { get; set; }

/// <summary>
/// Optional callback that returns true once the device is ready to
/// answer normal application commands after PIC32 firmware update +
/// reconnect (closes #145). The serial transport can re-enumerate
/// well before the application firmware is actually ready to respond
/// to protobuf status queries — without this probe, the next steps
/// in a downstream flow (LAN chip-info, WiFi prep) hit a half-started
/// device and have to retry. When set, the firmware service polls
/// the probe with bounded retry after each PIC32 reconnect; if it
/// never returns true within <see cref="PostReconnectReadinessTimeout"/>,
/// the update transitions to Failed with a clear timeout exception
/// rather than silently handing back a half-ready device. When null,
/// reconnect succeeds as soon as the serial port reopens (legacy
/// behavior).
/// </summary>
public Func<IStreamingDevice, CancellationToken, Task<bool>>? PostReconnectReadinessProbe { get; set; }

/// <summary>
/// Wall-clock budget for the post-reconnect readiness probe. Default
/// 30s covers a slow PIC32 boot and downstream-firmware initialization.
/// </summary>
public TimeSpan PostReconnectReadinessTimeout { get; set; } = TimeSpan.FromSeconds(30);

/// <summary>
/// Delay between readiness-probe attempts (cancellation-aware).
/// </summary>
public TimeSpan PostReconnectReadinessRetryDelay { get; set; } = TimeSpan.FromMilliseconds(500);

/// <summary>
/// Gets the configured timeout for a given firmware update state.
/// </summary>
Expand Down Expand Up @@ -159,6 +189,8 @@ public void Validate()
ValidatePositive(HidConnectRetryDelay, nameof(HidConnectRetryDelay));
ValidatePositive(FlashWriteRetryDelay, nameof(FlashWriteRetryDelay));
ValidatePositive(WifiProcessTimeout, nameof(WifiProcessTimeout));
ValidatePositive(PostReconnectReadinessTimeout, nameof(PostReconnectReadinessTimeout));
ValidatePositive(PostReconnectReadinessRetryDelay, nameof(PostReconnectReadinessRetryDelay));

if (HidConnectRetryCount < 1)
{
Expand Down
Loading