Skip to content

Commit fb94e13

Browse files
author
Aurelia Molzer
authored
Merge pull request #65 from bushrat011899/no_std
Add `no_std` support to remaining crates
2 parents bd7576b + 43b05c5 commit fb94e13

File tree

11 files changed

+113
-22
lines changed

11 files changed

+113
-22
lines changed

canvas/Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,11 @@ categories = ["multimedia::images"]
1414
[dependencies]
1515
image-texel = { path = "../texel", version = "0.5.0" }
1616
bytemuck = "1.1"
17+
libm = { version = "0.2", default-features = false, features = ["arch"] }
18+
19+
[features]
20+
# Use runtime feature detection on x86 and x86_64 targets.
21+
runtime-features = []
1722

1823
[dev-dependencies]
1924
brunch = "0.6.1"

canvas/src/arch.rs

Lines changed: 77 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
#![allow(unsafe_code)]
2+
// May be unused if no architecture features are detected at compile time or runtime.
3+
#[allow(unused_imports)]
24
use core::mem::transmute;
35

46
// For when we want to make sure we have a texel at compile time based on bytemuck.
@@ -12,8 +14,12 @@ macro_rules! expect_texel {
1214
};
1315
}
1416

17+
// May be unused if no architecture features are detected at compile time or runtime.
18+
#[allow(dead_code)]
1519
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1620
mod x86_avx2;
21+
// May be unused if no architecture features are detected at compile time or runtime.
22+
#[allow(dead_code)]
1723
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1824
mod x86_ssse3;
1925

@@ -30,29 +36,85 @@ pub(crate) struct ShuffleOps {
3036

3137
impl ShuffleOps {
3238
/// FIXME(perf): implement and choose arch-specific shuffles.
39+
// May be unused if no architecture features are detected at compile time or runtime.
40+
#[allow(unused_mut)]
3341
pub fn with_arch(mut self) -> Self {
3442
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
43+
{
44+
self = self.with_x86();
45+
}
46+
47+
self
48+
}
49+
50+
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
51+
// May be unused if no architecture features are detected at compile time or runtime.
52+
#[allow(unused_mut)]
53+
fn with_x86(mut self) -> Self {
54+
#[cfg(target_feature = "ssse3")]
55+
// SAFETY: `ssse3` detected at compile time
56+
unsafe {
57+
self = self.with_x86_ssse3();
58+
}
59+
60+
#[cfg(not(target_feature = "ssse3"))]
61+
#[cfg(feature = "runtime-features")]
3562
if std::is_x86_feature_detected!("ssse3") {
36-
self.shuffle_u8x4 = unsafe {
37-
transmute::<unsafe fn(&mut [[u8; 4]], [u8; 4]), _>(x86_ssse3::shuffle_u8x4)
38-
};
39-
self.shuffle_u16x4 = unsafe {
40-
transmute::<unsafe fn(&mut [[u16; 4]], [u8; 4]), _>(x86_ssse3::shuffle_u16x4)
41-
};
63+
// SAFETY: `ssse3` detected at runtime
64+
unsafe {
65+
self = self.with_x86_ssse3();
66+
}
67+
}
68+
69+
#[cfg(target_feature = "avx2")]
70+
// SAFETY: `avx2` detected at compile time
71+
unsafe {
72+
self = self.with_x86_avx2();
73+
}
74+
75+
#[cfg(not(target_feature = "avx2"))]
76+
#[cfg(feature = "runtime-features")]
77+
if std::is_x86_feature_detected!("avx2") {
78+
// SAFETY: `avx2` detected at runtime
79+
unsafe {
80+
self = self.with_x86_avx2();
81+
}
4282
}
4383

84+
self
85+
}
86+
87+
/// # Safety
88+
///
89+
/// Must only be used when the `ssse3` feature is available.
90+
// May be unused if no architecture features are detected at compile time or runtime.
91+
#[allow(dead_code)]
92+
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
93+
unsafe fn with_x86_ssse3(mut self) -> Self {
94+
self.shuffle_u8x4 =
95+
unsafe { transmute::<unsafe fn(&mut [[u8; 4]], [u8; 4]), _>(x86_ssse3::shuffle_u8x4) };
96+
self.shuffle_u16x4 = unsafe {
97+
transmute::<unsafe fn(&mut [[u16; 4]], [u8; 4]), _>(x86_ssse3::shuffle_u16x4)
98+
};
99+
100+
self
101+
}
102+
103+
/// # Safety
104+
///
105+
/// Must only be used when the `avx2` feature is available.
106+
// May be unused if no architecture features are detected at compile time or runtime.
107+
#[allow(dead_code)]
108+
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
109+
unsafe fn with_x86_avx2(mut self) -> Self {
44110
// Note: On Ivy Bridge these have the same *throughput* of 256bit-per-cycle as their SSSE3
45111
// equivalents until Icelake. With Icelake they are twice as fast at 512bit-per-cycle.
46112
// Therefore, we don't select them until we find a way to predict/select this.
47-
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
48-
if std::is_x86_feature_detected!("avx2") {
49-
self.shuffle_u8x4 = unsafe {
50-
transmute::<unsafe fn(&mut [[u8; 4]], [u8; 4]), _>(x86_avx2::shuffle_u8x4)
51-
};
52-
self.shuffle_u16x4 = unsafe {
53-
transmute::<unsafe fn(&mut [[u16; 4]], [u8; 4]), _>(x86_avx2::shuffle_u16x4)
54-
};
55-
}
113+
114+
self.shuffle_u8x4 =
115+
unsafe { transmute::<unsafe fn(&mut [[u8; 4]], [u8; 4]), _>(x86_avx2::shuffle_u8x4) };
116+
self.shuffle_u16x4 =
117+
unsafe { transmute::<unsafe fn(&mut [[u16; 4]], [u8; 4]), _>(x86_avx2::shuffle_u16x4) };
56118

57119
self
58120
}

canvas/src/bits.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use crate::layout::{SampleBits, SampleParts};
2-
use image_texel::{AsTexel, Texel};
2+
use image_texel::AsTexel;
3+
use image_texel::Texel;
34

45
/// Specifies which bits a channel comes from, within a `TexelKind` aggregate.
56
#[derive(Clone, Copy, Debug)]

canvas/src/color/oklab.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use crate::color_matrix::ColMatrix;
2+
use libm::powf;
23

34
const M1: ColMatrix = ColMatrix([
45
[0.8189330101, 0.0329845436, 0.0482003018],
@@ -94,7 +95,7 @@ pub(crate) fn f_lms_inv(lms: [f32; 3]) -> [f32; 3] {
9495
}
9596

9697
fn pow([a, b, c]: [f32; 3], exp: f32) -> [f32; 3] {
97-
[a.powf(exp), b.powf(exp), c.powf(exp)]
98+
[powf(a, exp), powf(b, exp), powf(c, exp)]
9899
}
99100

100101
fn copysign([a, b, c]: [f32; 3], [sa, sb, sc]: [f32; 3]) -> [f32; 3] {

canvas/src/color/srlab2.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use crate::color::Whitepoint;
22
use crate::color_matrix::ColMatrix;
3+
use libm::powf;
34

45
#[rustfmt::skip]
56
const M_CAT02: ColMatrix = ColMatrix([
@@ -119,7 +120,7 @@ fn non_linearity(lms: [f32; 3]) -> [f32; 3] {
119120
// Limited to 0.08 precisely
120121
v * 24389.0 / 2700.0
121122
} else {
122-
1.16 * v.powf(1.0 / 3.0) - 0.16
123+
1.16 * powf(v, 1.0 / 3.0) - 0.16
123124
}
124125
}
125126

@@ -131,7 +132,7 @@ fn non_linearity_inv(lms: [f32; 3]) -> [f32; 3] {
131132
if v.abs() < 0.08 {
132133
v * 2700.0 / 24389.0
133134
} else {
134-
((v + 0.16) / 1.16).powf(3.0)
135+
powf((v + 0.16) / 1.16, 3.0)
135136
}
136137
}
137138

canvas/src/color/transfer.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/// To emulate the syntax used in GLSL more closely.
22
#[inline]
33
fn pow(base: f32, exp: f32) -> f32 {
4-
base.powf(exp)
4+
libm::powf(base, exp)
55
}
66

77
pub fn transfer_oe_bt709(val: f32) -> f32 {
@@ -143,6 +143,7 @@ pub fn transfer_display_scene_smpte2084(val: f32) -> f32 {
143143
pub fn transfer_oe_smpte2084(val: f32) -> f32 {
144144
transfer_eo_inv_smpte2084(transfer_scene_display_smpte2084(val))
145145
}
146+
#[expect(dead_code)]
146147
pub fn transfer_oe_inv_smpte2084(val: f32) -> f32 {
147148
transfer_display_scene_smpte2084(transfer_eo_smpte2084(val))
148149
}

canvas/src/frame.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
11
//! A byte-buffer based image descriptor.
2+
3+
use alloc::borrow::ToOwned;
4+
use alloc::vec::Vec;
5+
26
use image_texel::image::{ImageMut, ImageRef};
37
use image_texel::Image;
48

canvas/src/layout.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
//! Defines layout and buffer of our images.
2-
use crate::color::{Color, ColorChannel, ColorChannelModel};
2+
3+
use alloc::boxed::Box;
34

45
use image_texel::image::{Coord, ImageRef};
56
use image_texel::layout::{
67
Decay, Layout as ImageLayout, MatrixBytes, Raster, SliceLayout, StrideSpec, StridedBytes,
78
Strides, TexelLayout,
89
};
910

11+
use crate::color::{Color, ColorChannel, ColorChannelModel};
1012
use crate::shader::ChunkSpec;
1113

1214
/// The byte layout of a buffer.

canvas/src/lib.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,14 @@
5555
// Deny, not forbid, unsafe code. In `arch` module we have inherently unsafe code, for the moment.
5656
// Maybe at a future point we gain some possibility to write such code safely.
5757
#![deny(unsafe_code)]
58+
// Be std for doctests, avoids a weird warning about missing allocator.
59+
#![cfg_attr(not(doctest), no_std)]
60+
61+
#[cfg(feature = "runtime-features")]
62+
extern crate std;
63+
64+
#[macro_use]
65+
extern crate alloc;
5866

5967
mod arch;
6068
mod bits;

canvas/src/shader.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
//!
33
//! Takes quite a lot of inspiration from how GPUs work. We have a primitive sampler unit, a
44
//! fragment unit, and pipeline multiple texels in parallel.
5+
use alloc::vec::Vec;
56
use core::ops::Range;
67
use image_texel::image::{ImageMut, ImageRef};
78
use image_texel::{AsTexel, Texel, TexelBuffer};
@@ -1482,7 +1483,9 @@ impl CommonPixel {
14821483
// FIXME: do the transform u32::from_ne_bytes(x.as_ne_bytes()) when appropriate.
14831484
join_fn: |num, bits, idx| {
14841485
let max_val = bits.mask();
1485-
let raw = (num[(idx & 0x3) as usize] * max_val as f32).round() as u32;
1486+
// Equivalent to `x.round() as u32` for positive-normal f32
1487+
let round = |x| (x + 0.5) as u32;
1488+
let raw = round(num[(idx & 0x3) as usize] * max_val as f32);
14861489
raw.min(max_val)
14871490
},
14881491
bits,

0 commit comments

Comments
 (0)