Skip to content

Commit b0bd9d3

Browse files
committed
Fix swapped scalar and simd implementations of strip
The logic for choosing whether to use the scalar or simd version of the strip kernel was backwards. This makes a pretty small performance difference; it just isn't a large part of the total time. Also optimize clamping behavior to take advantage of saturation in conversion operations.
1 parent ea98b8b commit b0bd9d3

File tree

3 files changed

+4
-4
lines changed

3 files changed

+4
-4
lines changed

cpu-sparse/src/fine.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ impl<'a> Fine<'a> {
5757
for i in 0..WIDE_TILE_WIDTH {
5858
let mut rgba_f32 = [0.0; 4];
5959
rgba_f32.copy_from_slice(&self.scratch[(i * STRIP_HEIGHT + j) * 4..][..4]);
60-
let rgba_u8 = rgba_f32.map(|x| (x.clamp(0., 1.) * 255.0).round() as u8);
60+
let rgba_u8 = rgba_f32.map(|x| (x * 255.0).round() as u8);
6161
self.out_buf[line_ix + i * 4..][..4].copy_from_slice(&rgba_u8);
6262
}
6363
}

cpu-sparse/src/simd.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,11 @@ impl<'a> Fine<'a> {
4444

4545
pub(crate) fn strip(&mut self, x: usize, width: usize, alphas: &[u32], color: [f32; 4]) {
4646
if self.use_simd {
47-
self.strip_scalar(x, width, alphas, color);
48-
} else {
4947
unsafe {
5048
self.strip_simd(x, width, alphas, color);
5149
}
50+
} else {
51+
self.strip_scalar(x, width, alphas, color);
5252
}
5353
}
5454
}

cpu-sparse/src/simd/neon.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ impl<'a> Fine<'a> {
2323

2424
pub fn pack_simd(&mut self, x: usize, y: usize) {
2525
unsafe fn cvt(v: float32x4_t) -> uint8x16_t {
26-
let clamped = vminq_f32(vmaxq_f32(v, vdupq_n_f32(0.0)), vdupq_n_f32(1.0));
26+
let clamped = vminq_f32(v, vdupq_n_f32(1.0));
2727
let scaled = vmulq_f32(clamped, vdupq_n_f32(255.0));
2828
vreinterpretq_u8_u32(vcvtnq_u32_f32(scaled))
2929
}

0 commit comments

Comments
 (0)