diff --git a/canvas/Cargo.toml b/canvas/Cargo.toml index a0b2f58..cb86618 100644 --- a/canvas/Cargo.toml +++ b/canvas/Cargo.toml @@ -14,6 +14,11 @@ categories = ["multimedia::images"] [dependencies] image-texel = { path = "../texel", version = "0.5.0" } bytemuck = "1.1" +libm = { version = "0.2", default-features = false, features = ["arch"] } + +[features] +# Use runtime feature detection on x86 and x86_64 targets. +runtime-features = [] [dev-dependencies] brunch = "0.6.1" diff --git a/canvas/src/arch.rs b/canvas/src/arch.rs index a311f2b..bdcffcd 100644 --- a/canvas/src/arch.rs +++ b/canvas/src/arch.rs @@ -1,4 +1,6 @@ #![allow(unsafe_code)] +// May be unused if no architecture features are detected at compile time or runtime. +#[allow(unused_imports)] use core::mem::transmute; // For when we want to make sure we have a texel at compile time based on bytemuck. @@ -12,8 +14,12 @@ macro_rules! expect_texel { }; } +// May be unused if no architecture features are detected at compile time or runtime. +#[allow(dead_code)] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] mod x86_avx2; +// May be unused if no architecture features are detected at compile time or runtime. +#[allow(dead_code)] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] mod x86_ssse3; @@ -30,29 +36,85 @@ pub(crate) struct ShuffleOps { impl ShuffleOps { /// FIXME(perf): implement and choose arch-specific shuffles. + // May be unused if no architecture features are detected at compile time or runtime. + #[allow(unused_mut)] pub fn with_arch(mut self) -> Self { #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + self = self.with_x86(); + } + + self + } + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + // May be unused if no architecture features are detected at compile time or runtime. + #[allow(unused_mut)] + fn with_x86(mut self) -> Self { + #[cfg(target_feature = "ssse3")] + // SAFETY: `ssse3` detected at compile time + unsafe { + self = self.with_x86_ssse3(); + } + + #[cfg(not(target_feature = "ssse3"))] + #[cfg(feature = "runtime-features")] if std::is_x86_feature_detected!("ssse3") { - self.shuffle_u8x4 = unsafe { - transmute::(x86_ssse3::shuffle_u8x4) - }; - self.shuffle_u16x4 = unsafe { - transmute::(x86_ssse3::shuffle_u16x4) - }; + // SAFETY: `ssse3` detected at runtime + unsafe { + self = self.with_x86_ssse3(); + } + } + + #[cfg(target_feature = "avx2")] + // SAFETY: `avx2` detected at compile time + unsafe { + self = self.with_x86_avx2(); + } + + #[cfg(not(target_feature = "avx2"))] + #[cfg(feature = "runtime-features")] + if std::is_x86_feature_detected!("avx2") { + // SAFETY: `avx2` detected at runtime + unsafe { + self = self.with_x86_avx2(); + } } + self + } + + /// # Safety + /// + /// Must only be used when the `ssse3` feature is available. + // May be unused if no architecture features are detected at compile time or runtime. + #[allow(dead_code)] + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + unsafe fn with_x86_ssse3(mut self) -> Self { + self.shuffle_u8x4 = + unsafe { transmute::(x86_ssse3::shuffle_u8x4) }; + self.shuffle_u16x4 = unsafe { + transmute::(x86_ssse3::shuffle_u16x4) + }; + + self + } + + /// # Safety + /// + /// Must only be used when the `avx2` feature is available. + // May be unused if no architecture features are detected at compile time or runtime. + #[allow(dead_code)] + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + unsafe fn with_x86_avx2(mut self) -> Self { // Note: On Ivy Bridge these have the same *throughput* of 256bit-per-cycle as their SSSE3 // equivalents until Icelake. With Icelake they are twice as fast at 512bit-per-cycle. // Therefore, we don't select them until we find a way to predict/select this. - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - if std::is_x86_feature_detected!("avx2") { - self.shuffle_u8x4 = unsafe { - transmute::(x86_avx2::shuffle_u8x4) - }; - self.shuffle_u16x4 = unsafe { - transmute::(x86_avx2::shuffle_u16x4) - }; - } + + self.shuffle_u8x4 = + unsafe { transmute::(x86_avx2::shuffle_u8x4) }; + self.shuffle_u16x4 = + unsafe { transmute::(x86_avx2::shuffle_u16x4) }; self } diff --git a/canvas/src/bits.rs b/canvas/src/bits.rs index 3038cdc..6e55551 100644 --- a/canvas/src/bits.rs +++ b/canvas/src/bits.rs @@ -1,5 +1,6 @@ use crate::layout::{SampleBits, SampleParts}; -use image_texel::{AsTexel, Texel}; +use image_texel::AsTexel; +use image_texel::Texel; /// Specifies which bits a channel comes from, within a `TexelKind` aggregate. #[derive(Clone, Copy, Debug)] diff --git a/canvas/src/color/oklab.rs b/canvas/src/color/oklab.rs index bdd9bd2..e2291fe 100644 --- a/canvas/src/color/oklab.rs +++ b/canvas/src/color/oklab.rs @@ -1,4 +1,5 @@ use crate::color_matrix::ColMatrix; +use libm::powf; const M1: ColMatrix = ColMatrix([ [0.8189330101, 0.0329845436, 0.0482003018], @@ -94,7 +95,7 @@ pub(crate) fn f_lms_inv(lms: [f32; 3]) -> [f32; 3] { } fn pow([a, b, c]: [f32; 3], exp: f32) -> [f32; 3] { - [a.powf(exp), b.powf(exp), c.powf(exp)] + [powf(a, exp), powf(b, exp), powf(c, exp)] } fn copysign([a, b, c]: [f32; 3], [sa, sb, sc]: [f32; 3]) -> [f32; 3] { diff --git a/canvas/src/color/srlab2.rs b/canvas/src/color/srlab2.rs index f53e9ad..849a0df 100644 --- a/canvas/src/color/srlab2.rs +++ b/canvas/src/color/srlab2.rs @@ -1,5 +1,6 @@ use crate::color::Whitepoint; use crate::color_matrix::ColMatrix; +use libm::powf; #[rustfmt::skip] const M_CAT02: ColMatrix = ColMatrix([ @@ -119,7 +120,7 @@ fn non_linearity(lms: [f32; 3]) -> [f32; 3] { // Limited to 0.08 precisely v * 24389.0 / 2700.0 } else { - 1.16 * v.powf(1.0 / 3.0) - 0.16 + 1.16 * powf(v, 1.0 / 3.0) - 0.16 } } @@ -131,7 +132,7 @@ fn non_linearity_inv(lms: [f32; 3]) -> [f32; 3] { if v.abs() < 0.08 { v * 2700.0 / 24389.0 } else { - ((v + 0.16) / 1.16).powf(3.0) + powf((v + 0.16) / 1.16, 3.0) } } diff --git a/canvas/src/color/transfer.rs b/canvas/src/color/transfer.rs index ec7b06e..e1b9ecb 100644 --- a/canvas/src/color/transfer.rs +++ b/canvas/src/color/transfer.rs @@ -1,7 +1,7 @@ /// To emulate the syntax used in GLSL more closely. #[inline] fn pow(base: f32, exp: f32) -> f32 { - base.powf(exp) + libm::powf(base, exp) } pub fn transfer_oe_bt709(val: f32) -> f32 { @@ -143,6 +143,7 @@ pub fn transfer_display_scene_smpte2084(val: f32) -> f32 { pub fn transfer_oe_smpte2084(val: f32) -> f32 { transfer_eo_inv_smpte2084(transfer_scene_display_smpte2084(val)) } +#[expect(dead_code)] pub fn transfer_oe_inv_smpte2084(val: f32) -> f32 { transfer_display_scene_smpte2084(transfer_eo_smpte2084(val)) } diff --git a/canvas/src/frame.rs b/canvas/src/frame.rs index 963b143..09ae416 100644 --- a/canvas/src/frame.rs +++ b/canvas/src/frame.rs @@ -1,4 +1,8 @@ //! A byte-buffer based image descriptor. + +use alloc::borrow::ToOwned; +use alloc::vec::Vec; + use image_texel::image::{ImageMut, ImageRef}; use image_texel::Image; diff --git a/canvas/src/layout.rs b/canvas/src/layout.rs index bae5308..3f471b4 100644 --- a/canvas/src/layout.rs +++ b/canvas/src/layout.rs @@ -1,5 +1,6 @@ //! Defines layout and buffer of our images. -use crate::color::{Color, ColorChannel, ColorChannelModel}; + +use alloc::boxed::Box; use image_texel::image::{Coord, ImageRef}; use image_texel::layout::{ @@ -7,6 +8,7 @@ use image_texel::layout::{ Strides, TexelLayout, }; +use crate::color::{Color, ColorChannel, ColorChannelModel}; use crate::shader::ChunkSpec; /// The byte layout of a buffer. diff --git a/canvas/src/lib.rs b/canvas/src/lib.rs index a0c0908..3930042 100644 --- a/canvas/src/lib.rs +++ b/canvas/src/lib.rs @@ -55,6 +55,14 @@ // Deny, not forbid, unsafe code. In `arch` module we have inherently unsafe code, for the moment. // Maybe at a future point we gain some possibility to write such code safely. #![deny(unsafe_code)] +// Be std for doctests, avoids a weird warning about missing allocator. +#![cfg_attr(not(doctest), no_std)] + +#[cfg(feature = "runtime-features")] +extern crate std; + +#[macro_use] +extern crate alloc; mod arch; mod bits; diff --git a/canvas/src/shader.rs b/canvas/src/shader.rs index a1102c4..97ef2f9 100644 --- a/canvas/src/shader.rs +++ b/canvas/src/shader.rs @@ -2,6 +2,7 @@ //! //! Takes quite a lot of inspiration from how GPUs work. We have a primitive sampler unit, a //! fragment unit, and pipeline multiple texels in parallel. +use alloc::vec::Vec; use core::ops::Range; use image_texel::image::{ImageMut, ImageRef}; use image_texel::{AsTexel, Texel, TexelBuffer}; @@ -1482,7 +1483,9 @@ impl CommonPixel { // FIXME: do the transform u32::from_ne_bytes(x.as_ne_bytes()) when appropriate. join_fn: |num, bits, idx| { let max_val = bits.mask(); - let raw = (num[(idx & 0x3) as usize] * max_val as f32).round() as u32; + // Equivalent to `x.round() as u32` for positive-normal f32 + let round = |x| (x + 0.5) as u32; + let raw = round(num[(idx & 0x3) as usize] * max_val as f32); raw.min(max_val) }, bits, diff --git a/drm/src/lib.rs b/drm/src/lib.rs index 8ab5702..6123ca6 100644 --- a/drm/src/lib.rs +++ b/drm/src/lib.rs @@ -13,6 +13,9 @@ //! pixel matrix. Then some of those formats map cleanly to planes of color information that can be //! viewed as a matrix with strides, which finally enables useful operations such as //! initialization. +// Be std for doctests, avoids a weird warning about missing allocator. +#![cfg_attr(not(doctest), no_std)] + use canvas::{layout, texels}; use core::convert::TryFrom; use core::fmt;