From eda776038fa2b4b4aa3fc3914e2cfc9c0d7b0a15 Mon Sep 17 00:00:00 2001 From: "Igor von Nyssen (Claude Code)" Date: Sun, 10 May 2026 12:24:46 -0700 Subject: [PATCH 1/3] fix(client): handle unaligned ImageBytes response buffers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `ASCOMResult::from_reqwest` reads the 44-byte `ImageBytesMetadata` header via `bytemuck::try_from_bytes` and the pixel payload via `bytemuck::try_cast_slice::`. Both APIs require the source `&[u8]` to be aligned to the target type's alignment (4 for `i32`, 2 for `i16`/`u16`). The `&[u8]` here is a slice of the HTTP response body — typically a `bytes::Bytes` chunk that reqwest assembled from one or more chunked reads — and its start pointer is not guaranteed to satisfy that alignment. When it doesn't, both calls return `PodCastError::TargetAlignmentGreaterAndInputNotAligned` and the client panics with `ASCOM error UNSPECIFIED: TargetAlignmentGreaterAndInputNotAligned` — intermittently, depending on the host allocator's runtime behaviour. Fix: use the unaligned-friendly equivalents. - Metadata: `bytemuck::pod_read_unaligned` reads the full struct via `ptr::read_unaligned`, no source-alignment requirement. - Pixel data: `bytemuck::pod_collect_to_vec` allocates a fresh `Vec` (which IS aligned) and `memcpy`s the payload in. Adds an upfront length-modulo check to keep the existing slop-error semantics. Adds a regression test that constructs an `imagebytes` payload at each of the four leading-pad offsets (0..4) within a `Vec`, guaranteeing at least three of them place the body slice on a non-4-aligned start. The test fails on the previous code path (`try_from_bytes` rejects the unaligned metadata immediately) and passes after the fix. Closes RReverser#18. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/api/camera/image_array/client.rs | 99 +++++++++++++++++++++++++--- 1 file changed, 91 insertions(+), 8 deletions(-) diff --git a/src/api/camera/image_array/client.rs b/src/api/camera/image_array/client.rs index aa095f0..1b58efe 100644 --- a/src/api/camera/image_array/client.rs +++ b/src/api/camera/image_array/client.rs @@ -82,12 +82,23 @@ impl<'de> Deserialize<'de> for JsonImageArray { } } -fn cast_raw_data(data: &[u8]) -> Result, PodCastError> { - Ok(bytemuck::try_cast_slice::(data)? - .iter() - .copied() - .map(T::into) - .collect()) +fn cast_raw_data( + data: &[u8], +) -> Result, PodCastError> { + // `bytemuck::try_cast_slice::` requires the source pointer + // to be aligned to `align_of::()`. The HTTP response body that + // backs `data` is a `bytes::Bytes` slice from reqwest's chunked + // read — its start pointer is not guaranteed to be 4-byte aligned + // (or 2-byte aligned for `i16` / `u16`). Copy through an aligned + // `Vec` instead so callers don't see intermittent + // `TargetAlignmentGreaterAndInputNotAligned` failures dependent on + // the host allocator's runtime behaviour. + let elem_size = size_of::(); + if data.len() % elem_size != 0 { + return Err(PodCastError::OutputSliceWouldHaveSlop); + } + let aligned: Vec = bytemuck::pod_collect_to_vec(data); + Ok(aligned.into_iter().map(T::into).collect()) } impl Response for ASCOMResult { @@ -108,10 +119,16 @@ impl Response for ASCOMResult { }, }); } - let metadata = bytes + let metadata_bytes = bytes .get(..size_of::()) .ok_or_else(|| eyre::eyre!("not enough bytes to read image metadata"))?; - let metadata = bytemuck::try_from_bytes::(metadata)?; + // `bytemuck::try_from_bytes::` requires + // the source slice to be aligned to `align_of::() == 4`. + // The body buffer's start pointer is not guaranteed to be + // 4-aligned (see `cast_raw_data` for the parallel issue); + // `pod_read_unaligned` reads via `ptr::read_unaligned` and + // is safe for any pointer. + let metadata: ImageBytesMetadata = bytemuck::pod_read_unaligned(metadata_bytes); eyre::ensure!( metadata.metadata_version == 1_i32, "unsupported metadata version {}", @@ -168,3 +185,69 @@ impl Response for ASCOMResult { }) } } + +#[cfg(test)] +mod tests { + use super::*; + + /// Build an `application/imagebytes` payload at the given byte + /// offset within a `Vec`. The returned slice's start pointer + /// has the requested alignment-mod-4. Used to reproduce the + /// upstream-pre-fix `TargetAlignmentGreaterAndInputNotAligned` + /// path, which only fires when the buffer slice is not 4-byte + /// aligned. + fn imagebytes_payload_at_offset( + leading_pad: usize, + pixels: &[i32], + ) -> (Vec, std::ops::Range) { + let metadata = ImageBytesMetadata { + metadata_version: 1, + error_number: 0, + client_transaction_id: None, + server_transaction_id: None, + data_start: i32::try_from(size_of::()).unwrap(), + image_element_type: TransmissionElementType::I32 as i32, + transmission_element_type: TransmissionElementType::I32 as i32, + rank: ImageArrayRank::Rank2 as i32, + dimension_1: i32::try_from(pixels.len()).unwrap(), + dimension_2: 1, + dimension_3: 0, + }; + let mut buf = vec![0u8; leading_pad]; + let payload_start = buf.len(); + buf.extend_from_slice(bytemuck::bytes_of(&metadata)); + buf.extend_from_slice(bytemuck::cast_slice(pixels)); + let payload_end = buf.len(); + (buf, payload_start..payload_end) + } + + /// Regression test for issue #18: parsing an `imagebytes` body + /// whose slice start is not 4-byte aligned must succeed. + /// Pre-fix this path failed with + /// `bytemuck::PodCastError::TargetAlignmentGreaterAndInputNotAligned` + /// because both the metadata `try_from_bytes` and the pixel + /// `try_cast_slice` require source alignment. After the fix — + /// using `pod_read_unaligned` and `pod_collect_to_vec` — the + /// parse is alignment-independent. + #[test] + fn from_reqwest_handles_unaligned_imagebytes_slice() { + let pixels: Vec = (0..16).collect(); + // Try every offset 0..4 — at least one of 1, 2, 3 is + // guaranteed to put the body slice on a non-4-aligned start + // even if `Vec` happens to allocate at a 4-aligned base. + for leading_pad in 0..4 { + let (buf, range) = imagebytes_payload_at_offset(leading_pad, &pixels); + let slice = &buf[range]; + let mime: Mime = IMAGE_BYTES_TYPE.parse().unwrap(); + let parsed = + as Response>::from_reqwest(mime, slice).unwrap_or_else( + |e| panic!("from_reqwest failed at leading_pad={leading_pad}: {e:?}"), + ); + let array = parsed.response.unwrap(); + assert_eq!(array.shape(), &[pixels.len(), 1, 1]); + for (i, &p) in pixels.iter().enumerate() { + assert_eq!(array[[i, 0, 0]], p, "pixel {i} mismatch"); + } + } + } +} From 6da5cf46aa0dd19363c71dcc7b691227703ebcec Mon Sep 17 00:00:00 2001 From: "Igor von Nyssen (Claude Code)" Date: Sun, 10 May 2026 12:32:15 -0700 Subject: [PATCH 2/3] review: address clippy + Copilot feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - cast_raw_data: keep the zero-copy fast path. Try `bytemuck::try_cast_slice::` first; only fall back to the copying `pod_collect_to_vec` when it returns `TargetAlignmentGreaterAndInputNotAligned`. Slop / length-mismatch errors propagate verbatim. Common (aligned) case keeps the pre-PR memory and copy profile; unaligned case still works. - Drop redundant `if !data.len().is_multiple_of(elem_size)` length check now that the slop error path is reached via try_cast_slice directly. - Metadata fix doc comment: refer to `align_of::()` instead of the hard-coded `align_of::() == 4` so it stays correct if the struct's layout grows a wider field. - Test helper doc comment: clarify the alignment guarantee — the `Vec` base allocation is high-aligned by the system allocator, so looping leading_pad over 0..4 covers all four mod-4 rotations and guarantees at least three unaligned cases. - Test fn signature: split into a `Result<()>`-returning helper (`check_one_offset`) using `eyre::ensure!` for validation and a panicking `#[test]` shell that surfaces the failing leading_pad in the panic message. Avoids clippy::panic_in_result_fn while keeping diagnostic context. - Replace `data.len() % elem_size != 0` with the `is_multiple_of` form (clippy::manual_is_multiple_of), superseded by the fast-path refactor above. - Clean up `as i32` casts in the test helper to `i32::from(...)` for the enum reprs and `i32::try_from(...)?` for the size_of/len conversions (clippy::cast_lossless / cast_possible_truncation). Co-Authored-By: Claude Opus 4.7 (1M context) --- src/api/camera/image_array/client.rs | 119 ++++++++++++++++----------- 1 file changed, 69 insertions(+), 50 deletions(-) diff --git a/src/api/camera/image_array/client.rs b/src/api/camera/image_array/client.rs index 1b58efe..f9e03b8 100644 --- a/src/api/camera/image_array/client.rs +++ b/src/api/camera/image_array/client.rs @@ -85,20 +85,25 @@ impl<'de> Deserialize<'de> for JsonImageArray { fn cast_raw_data( data: &[u8], ) -> Result, PodCastError> { - // `bytemuck::try_cast_slice::` requires the source pointer - // to be aligned to `align_of::()`. The HTTP response body that - // backs `data` is a `bytes::Bytes` slice from reqwest's chunked - // read — its start pointer is not guaranteed to be 4-byte aligned - // (or 2-byte aligned for `i16` / `u16`). Copy through an aligned - // `Vec` instead so callers don't see intermittent - // `TargetAlignmentGreaterAndInputNotAligned` failures dependent on - // the host allocator's runtime behaviour. - let elem_size = size_of::(); - if data.len() % elem_size != 0 { - return Err(PodCastError::OutputSliceWouldHaveSlop); + // Fast path: if `data` happens to be aligned to `align_of::()`, + // `try_cast_slice` reinterprets in place and we just iterate. + // + // Slow path: the HTTP response body that backs `data` is a + // `bytes::Bytes` slice from reqwest's chunked read, and its start + // pointer is not guaranteed to be `align_of::()`-aligned (4 + // bytes for `i32`, 2 for `i16`/`u16`). When the fast cast fails + // with `TargetAlignmentGreaterAndInputNotAligned`, copy through + // an aligned `Vec` via `pod_collect_to_vec`. Length-mismatch / + // slop errors propagate verbatim — only the alignment failure + // gets the unaligned fallback. + match bytemuck::try_cast_slice::(data) { + Ok(aligned) => Ok(aligned.iter().copied().map(T::into).collect()), + Err(PodCastError::TargetAlignmentGreaterAndInputNotAligned) => { + let owned: Vec = bytemuck::pod_collect_to_vec(data); + Ok(owned.into_iter().map(T::into).collect()) + } + Err(other) => Err(other), } - let aligned: Vec = bytemuck::pod_collect_to_vec(data); - Ok(aligned.into_iter().map(T::into).collect()) } impl Response for ASCOMResult { @@ -123,11 +128,12 @@ impl Response for ASCOMResult { .get(..size_of::()) .ok_or_else(|| eyre::eyre!("not enough bytes to read image metadata"))?; // `bytemuck::try_from_bytes::` requires - // the source slice to be aligned to `align_of::() == 4`. - // The body buffer's start pointer is not guaranteed to be - // 4-aligned (see `cast_raw_data` for the parallel issue); + // the source slice to be aligned to + // `align_of::()`. The body buffer's start + // pointer is not guaranteed to satisfy that (see + // `cast_raw_data` for the parallel issue); // `pod_read_unaligned` reads via `ptr::read_unaligned` and - // is safe for any pointer. + // works for any pointer. let metadata: ImageBytesMetadata = bytemuck::pod_read_unaligned(metadata_bytes); eyre::ensure!( metadata.metadata_version == 1_i32, @@ -191,34 +197,36 @@ mod tests { use super::*; /// Build an `application/imagebytes` payload at the given byte - /// offset within a `Vec`. The returned slice's start pointer - /// has the requested alignment-mod-4. Used to reproduce the - /// upstream-pre-fix `TargetAlignmentGreaterAndInputNotAligned` - /// path, which only fires when the buffer slice is not 4-byte - /// aligned. + /// offset within a `Vec`. The `Vec`'s base allocation is + /// usually a high-alignment chunk from the system allocator, so + /// looping `leading_pad` over `0..align_of::()` is sufficient + /// to cover all four `mod 4` cases — at least three of those + /// rotations land the body slice on a non-4-aligned start, which + /// is what reproduces the upstream-pre-fix + /// `TargetAlignmentGreaterAndInputNotAligned` path. fn imagebytes_payload_at_offset( leading_pad: usize, pixels: &[i32], - ) -> (Vec, std::ops::Range) { + ) -> eyre::Result<(Vec, std::ops::Range)> { let metadata = ImageBytesMetadata { - metadata_version: 1, - error_number: 0, + metadata_version: 1_i32, + error_number: 0_i32, client_transaction_id: None, server_transaction_id: None, - data_start: i32::try_from(size_of::()).unwrap(), - image_element_type: TransmissionElementType::I32 as i32, - transmission_element_type: TransmissionElementType::I32 as i32, - rank: ImageArrayRank::Rank2 as i32, - dimension_1: i32::try_from(pixels.len()).unwrap(), - dimension_2: 1, - dimension_3: 0, + data_start: i32::try_from(size_of::())?, + image_element_type: i32::from(TransmissionElementType::I32), + transmission_element_type: i32::from(TransmissionElementType::I32), + rank: i32::from(ImageArrayRank::Rank2), + dimension_1: i32::try_from(pixels.len())?, + dimension_2: 1_i32, + dimension_3: 0_i32, }; let mut buf = vec![0u8; leading_pad]; let payload_start = buf.len(); buf.extend_from_slice(bytemuck::bytes_of(&metadata)); buf.extend_from_slice(bytemuck::cast_slice(pixels)); let payload_end = buf.len(); - (buf, payload_start..payload_end) + Ok((buf, payload_start..payload_end)) } /// Regression test for issue #18: parsing an `imagebytes` body @@ -229,25 +237,36 @@ mod tests { /// `try_cast_slice` require source alignment. After the fix — /// using `pod_read_unaligned` and `pod_collect_to_vec` — the /// parse is alignment-independent. + fn check_one_offset(leading_pad: usize, pixels: &[i32]) -> eyre::Result<()> { + let (buf, range) = imagebytes_payload_at_offset(leading_pad, pixels)?; + let slice = &buf[range]; + let mime: Mime = IMAGE_BYTES_TYPE.parse()?; + let parsed = as Response>::from_reqwest(mime, slice)?; + let array = parsed + .response + .map_err(|e| eyre::eyre!("ascom error: {e:?}"))?; + eyre::ensure!( + array.shape() == [pixels.len(), 1_usize, 1_usize], + "shape mismatch: {:?}", + array.shape() + ); + for (i, &p) in pixels.iter().enumerate() { + let actual = array[[i, 0_usize, 0_usize]]; + eyre::ensure!(actual == p, "pixel {i} mismatch: got {actual}, expected {p}"); + } + Ok(()) + } + #[test] fn from_reqwest_handles_unaligned_imagebytes_slice() { - let pixels: Vec = (0..16).collect(); - // Try every offset 0..4 — at least one of 1, 2, 3 is - // guaranteed to put the body slice on a non-4-aligned start - // even if `Vec` happens to allocate at a 4-aligned base. - for leading_pad in 0..4 { - let (buf, range) = imagebytes_payload_at_offset(leading_pad, &pixels); - let slice = &buf[range]; - let mime: Mime = IMAGE_BYTES_TYPE.parse().unwrap(); - let parsed = - as Response>::from_reqwest(mime, slice).unwrap_or_else( - |e| panic!("from_reqwest failed at leading_pad={leading_pad}: {e:?}"), - ); - let array = parsed.response.unwrap(); - assert_eq!(array.shape(), &[pixels.len(), 1, 1]); - for (i, &p) in pixels.iter().enumerate() { - assert_eq!(array[[i, 0, 0]], p, "pixel {i} mismatch"); - } + let pixels: Vec = (0_i32..16_i32).collect(); + // Try every offset 0..4 — at least three of these rotations + // land the body slice on a non-4-aligned start (regardless of + // the `Vec` base alignment), which is the case that fails + // pre-fix. + for leading_pad in 0_usize..4_usize { + check_one_offset(leading_pad, &pixels) + .unwrap_or_else(|e| panic!("leading_pad={leading_pad}: {e:?}")); } } } From 90254fbed4bed41a43abf284a580470c1a6ac3bb Mon Sep 17 00:00:00 2001 From: "Igor von Nyssen (Claude Code)" Date: Sun, 10 May 2026 12:53:35 -0700 Subject: [PATCH 3/3] perf(client): fuse cast_raw_data slow path into single-pass widen MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the slow path's two-memcpy bytes→Vec→Vec staging with a single chunked pass that decodes each `size_of::()`-byte chunk via a new `widen_from_le_chunk` trait method on `AsTransmissionElementType` and writes straight into the output `Vec`. Savings: - One full pixel-buffer memcpy on unaligned input. - Memory ceiling on the slow path drops from `data.len() + sizeof_output` to just `sizeof_output`. For a 32 MP `i32` image that's ~128 MB peak instead of ~256 MB. - Drops the `bytemuck::NoUninit` bound on the local function (was only required by `pod_collect_to_vec`). The fast (aligned) path is unchanged from the previous commit — `try_cast_slice` reinterprets in place, and we iterate-and-widen into the output. So the common case keeps the pre-PR memory and copy profile; the slow path is now strictly an improvement. `from_le_bytes` on `i32` lowers to the same instruction as a normal aligned `i32` load on little-endian targets, so the per-element loop in the slow path is essentially identical in cost to a bulk memcpy + widening loop. For the smaller types (`i16` / `u16` / `u8`) the upconversion was element-wise anyway, so the staging buffer was pure overhead. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/api/camera/image_array/client.rs | 40 +++++++++++++++++----------- src/api/camera/image_array/mod.rs | 31 +++++++++++++++++++++ 2 files changed, 56 insertions(+), 15 deletions(-) diff --git a/src/api/camera/image_array/client.rs b/src/api/camera/image_array/client.rs index f9e03b8..88f9d4e 100644 --- a/src/api/camera/image_array/client.rs +++ b/src/api/camera/image_array/client.rs @@ -82,25 +82,35 @@ impl<'de> Deserialize<'de> for JsonImageArray { } } -fn cast_raw_data( - data: &[u8], -) -> Result, PodCastError> { - // Fast path: if `data` happens to be aligned to `align_of::()`, - // `try_cast_slice` reinterprets in place and we just iterate. +fn cast_raw_data(data: &[u8]) -> Result, PodCastError> { + // Fast path: if `data` is already aligned to `align_of::()`, + // `try_cast_slice` reinterprets in place and we iterate-and-widen + // into the output `Vec`. This is the common case — most host + // allocators hand out 16-aligned buffers so the body slice that + // reqwest hands us satisfies a 4-byte alignment requirement. // - // Slow path: the HTTP response body that backs `data` is a - // `bytes::Bytes` slice from reqwest's chunked read, and its start - // pointer is not guaranteed to be `align_of::()`-aligned (4 - // bytes for `i32`, 2 for `i16`/`u16`). When the fast cast fails - // with `TargetAlignmentGreaterAndInputNotAligned`, copy through - // an aligned `Vec` via `pod_collect_to_vec`. Length-mismatch / - // slop errors propagate verbatim — only the alignment failure - // gets the unaligned fallback. + // Slow path: the HTTP response body is a `bytes::Bytes` slice + // from reqwest's chunked read, and its start pointer is not + // *guaranteed* to be `align_of::()`-aligned (4 bytes for + // `i32`, 2 for `i16` / `u16`). When the fast cast fails with + // `TargetAlignmentGreaterAndInputNotAligned`, decode each chunk + // via `T::widen_from_le_chunk` straight into the output + // `Vec` — one pass, no intermediate aligned `Vec`. + // Memory ceiling on the slow path is `sizeof_output` rather + // than `data.len() + sizeof_output`. Length-mismatch / slop + // errors propagate verbatim — only the alignment failure gets + // the unaligned fallback. match bytemuck::try_cast_slice::(data) { Ok(aligned) => Ok(aligned.iter().copied().map(T::into).collect()), Err(PodCastError::TargetAlignmentGreaterAndInputNotAligned) => { - let owned: Vec = bytemuck::pod_collect_to_vec(data); - Ok(owned.into_iter().map(T::into).collect()) + let elem_size = size_of::(); + if !data.len().is_multiple_of(elem_size) { + return Err(PodCastError::OutputSliceWouldHaveSlop); + } + Ok(data + .chunks_exact(elem_size) + .map(T::widen_from_le_chunk) + .collect()) } Err(other) => Err(other), } diff --git a/src/api/camera/image_array/mod.rs b/src/api/camera/image_array/mod.rs index 6ecd71a..7003e15 100644 --- a/src/api/camera/image_array/mod.rs +++ b/src/api/camera/image_array/mod.rs @@ -64,22 +64,53 @@ pub(crate) enum ImageElementType { trait AsTransmissionElementType: 'static + Into + AnyBitPattern { const TYPE: TransmissionElementType; + + /// Read a single little-endian `Self` from a + /// `size_of::()`-byte chunk and widen to `i32`. Used by + /// the unaligned-input slow path of `cast_raw_data` (see + /// `image_array/client.rs`) so the conversion can go from + /// response bytes straight into `Vec` in one pass without + /// staging through an intermediate aligned `Vec`. + /// Callers must feed exactly `size_of::()` bytes (e.g. via + /// `data.chunks_exact(size_of::())`). + fn widen_from_le_chunk(chunk: &[u8]) -> i32; } impl AsTransmissionElementType for i16 { const TYPE: TransmissionElementType = TransmissionElementType::I16; + + fn widen_from_le_chunk(chunk: &[u8]) -> i32 { + i32::from(Self::from_le_bytes([chunk[0_usize], chunk[1_usize]])) + } } impl AsTransmissionElementType for i32 { const TYPE: TransmissionElementType = TransmissionElementType::I32; + + fn widen_from_le_chunk(chunk: &[u8]) -> i32 { + Self::from_le_bytes([ + chunk[0_usize], + chunk[1_usize], + chunk[2_usize], + chunk[3_usize], + ]) + } } impl AsTransmissionElementType for u16 { const TYPE: TransmissionElementType = TransmissionElementType::U16; + + fn widen_from_le_chunk(chunk: &[u8]) -> i32 { + i32::from(Self::from_le_bytes([chunk[0_usize], chunk[1_usize]])) + } } impl AsTransmissionElementType for u8 { const TYPE: TransmissionElementType = TransmissionElementType::U8; + + fn widen_from_le_chunk(chunk: &[u8]) -> i32 { + i32::from(chunk[0_usize]) + } } /// Image array.