diff --git a/src/rendering/page_renderer.rs b/src/rendering/page_renderer.rs index eab30e11..ce3bc8f5 100644 --- a/src/rendering/page_renderer.rs +++ b/src/rendering/page_renderer.rs @@ -8141,6 +8141,13 @@ fn cmyk_to_rgb(c: f32, m: f32, y: f32, k: f32) -> (f32, f32, f32) { (r.clamp(0.0, 1.0), g.clamp(0.0, 1.0), b.clamp(0.0, 1.0)) } +// Test-only counter that records how many `apply_pending_clip` calls actually +// materialized a clip (i.e. did real rasterization work). Used by the +// regression probe below to lock in the per-paint-op fast path. +#[cfg(test)] +pub(crate) static APC_MATERIALIZED: std::sync::atomic::AtomicU64 = + std::sync::atomic::AtomicU64::new(0); + /// ISO 32000-1 §11.7.4.3 / Table 149 source colour space classes. /// /// The CompatibleOverprint blend function `B(c_b, c_s)` selects between @@ -8397,33 +8404,33 @@ fn apply_pending_clip( gs_stack: &GraphicsStateStack, ) { if let Some((path, fill_rule)) = pending_clip.take() { + #[cfg(test)] + APC_MATERIALIZED.fetch_add(1, std::sync::atomic::Ordering::Relaxed); let gs = gs_stack.current(); let transform = combine_transforms(base_transform, &gs.ctm); - if let Some(path_transformed) = path.transform(transform) { - let bounds = path_transformed.bounds(); - log::debug!("Applying clip: fill_rule={:?}, bounds={:?}", fill_rule, bounds); - - let mut new_mask = tiny_skia::Mask::new(pixmap.width(), pixmap.height()).unwrap(); - new_mask.fill_path( - &path_transformed, - fill_rule, - true, // anti-alias - Transform::identity(), - ); - - if let Some(Some(current_mask)) = clip_stack.last() { - // Intersect with existing mask - let mut combined = current_mask.clone(); - let combined_data = combined.data_mut(); - let new_data = new_mask.data(); - for i in 0..combined_data.len() { - combined_data[i] = ((combined_data[i] as u32 * new_data[i] as u32) / 255) as u8; - } - *clip_stack.last_mut().unwrap() = Some(combined); - } else { - *clip_stack.last_mut().unwrap() = Some(new_mask); - } + let Some(slot) = clip_stack.last_mut() else { + return; + }; + match slot { + // Intersect the new clip path into the current scope's mask in + // place. tiny_skia::Mask::intersect_path allocates one submask, + // rasterizes the path into it, then folds it into `self` via the + // library's rounded `(a*b)/255` premultiply — replacing the + // previous code path which additionally cloned the current mask + // (a full page-sized memcpy) before running an equivalent scalar + // multiply loop. The clone was redundant: every `q` already pushes + // a cloned mask onto `clip_stack`, so the top-of-stack mask at the + // current depth is already this scope's private copy and may be + // mutated in place. + Some(existing_mask) => { + existing_mask.intersect_path(&path, fill_rule, true, transform); + }, + None => { + let mut new_mask = tiny_skia::Mask::new(pixmap.width(), pixmap.height()).unwrap(); + new_mask.fill_path(&path, fill_rule, true, transform); + *slot = Some(new_mask); + }, } } } @@ -8984,4 +8991,79 @@ mod tests { "DeviceCMYK pure cyan must map to (0, 1, 1) under additive clamp; got ({r}, {g}, {b})" ); } + + // Perf-regression probe: apply_pending_clip's only expensive work is the + // path-rasterization branch that runs when `pending_clip` is Some. A naive + // refactor (e.g. dropping the `Option::take` short-circuit, or treating + // every paint op as a fresh clip) would explode the materialization count + // to O(paint ops). This test pins the contract by driving the function + // directly with K paint-op-style invocations and N clip-state changes, and + // asserting the materialization count equals N — not K. + // + // The probe is serialized via `APC_PROBE_LOCK` because it reads / resets a + // process-wide AtomicU64. No other test in this mod calls + // `apply_pending_clip`, but the lock keeps the contract safe under future + // additions and under `cargo test -- --test-threads=1` parity. + static APC_PROBE_LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(()); + + #[test] + fn apply_pending_clip_materializes_only_per_clip_state_change() { + use crate::content::GraphicsStateStack; + use std::sync::atomic::Ordering; + use tiny_skia::{FillRule, PathBuilder, Pixmap, Rect, Transform}; + + let _guard = APC_PROBE_LOCK.lock().unwrap(); + + let pixmap = Pixmap::new(200, 200).expect("pixmap"); + let gs_stack = GraphicsStateStack::new(); + let base_transform = Transform::identity(); + + let make_clip_path = || { + let mut pb = PathBuilder::new(); + pb.push_rect(Rect::from_xywh(10.0, 10.0, 50.0, 50.0).unwrap()); + pb.finish().unwrap() + }; + + // Scenario A: 1 clip-state change followed by K paint-op-style calls + // with no pending clip. Only the first call should materialize. + const K: usize = 100; + APC_MATERIALIZED.store(0, Ordering::Relaxed); + let mut clip_stack: Vec> = vec![None]; + let mut pending: Option<(tiny_skia::Path, FillRule)> = + Some((make_clip_path(), FillRule::Winding)); + for _ in 0..K { + apply_pending_clip(&mut pending, &mut clip_stack, &pixmap, base_transform, &gs_stack); + } + let after_one_clip = APC_MATERIALIZED.load(Ordering::Relaxed); + assert_eq!( + after_one_clip, 1, + "1 W operator followed by {K} paint ops must materialize the clip \ + mask exactly once (got {after_one_clip})" + ); + + // Scenario B: N clip-state changes each followed by K paint ops. + // Materialization count must equal N, not K*N. + const N: usize = 5; + APC_MATERIALIZED.store(0, Ordering::Relaxed); + let mut clip_stack: Vec> = vec![None]; + for _ in 0..N { + let mut pending: Option<(tiny_skia::Path, FillRule)> = + Some((make_clip_path(), FillRule::Winding)); + for _ in 0..K { + apply_pending_clip( + &mut pending, + &mut clip_stack, + &pixmap, + base_transform, + &gs_stack, + ); + } + } + let after_n_clips = APC_MATERIALIZED.load(Ordering::Relaxed); + assert_eq!( + after_n_clips, N as u64, + "{N} W operators each followed by {K} paint ops must materialize \ + exactly {N} times (got {after_n_clips})" + ); + } }