Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions canvas/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ categories = ["multimedia::images"]
[dependencies]
image-texel = { path = "../texel", version = "0.5.0" }
bytemuck = "1.1"
libm = { version = "0.2", default-features = false, features = ["arch"] }

[features]
# Use runtime feature detection on x86 and x86_64 targets.
runtime-features = []

[dev-dependencies]
brunch = "0.6.1"
Expand Down
92 changes: 77 additions & 15 deletions canvas/src/arch.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#![allow(unsafe_code)]
// May be unused if no architecture features are detected at compile time or runtime.
#[allow(unused_imports)]
use core::mem::transmute;

// For when we want to make sure we have a texel at compile time based on bytemuck.
Expand All @@ -12,8 +14,12 @@ macro_rules! expect_texel {
};
}

// May be unused if no architecture features are detected at compile time or runtime.
#[allow(dead_code)]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
mod x86_avx2;
// May be unused if no architecture features are detected at compile time or runtime.
#[allow(dead_code)]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
mod x86_ssse3;

Expand All @@ -30,29 +36,85 @@ pub(crate) struct ShuffleOps {

impl ShuffleOps {
/// FIXME(perf): implement and choose arch-specific shuffles.
// May be unused if no architecture features are detected at compile time or runtime.
#[allow(unused_mut)]
pub fn with_arch(mut self) -> Self {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
self = self.with_x86();
}

self
}

#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
// May be unused if no architecture features are detected at compile time or runtime.
#[allow(unused_mut)]
fn with_x86(mut self) -> Self {
#[cfg(target_feature = "ssse3")]
// SAFETY: `ssse3` detected at compile time
unsafe {
self = self.with_x86_ssse3();
}

#[cfg(not(target_feature = "ssse3"))]
#[cfg(feature = "runtime-features")]
if std::is_x86_feature_detected!("ssse3") {
self.shuffle_u8x4 = unsafe {
transmute::<unsafe fn(&mut [[u8; 4]], [u8; 4]), _>(x86_ssse3::shuffle_u8x4)
};
self.shuffle_u16x4 = unsafe {
transmute::<unsafe fn(&mut [[u16; 4]], [u8; 4]), _>(x86_ssse3::shuffle_u16x4)
};
// SAFETY: `ssse3` detected at runtime
unsafe {
self = self.with_x86_ssse3();
}
}

#[cfg(target_feature = "avx2")]
// SAFETY: `avx2` detected at compile time
unsafe {
self = self.with_x86_avx2();
}

#[cfg(not(target_feature = "avx2"))]
#[cfg(feature = "runtime-features")]
if std::is_x86_feature_detected!("avx2") {
// SAFETY: `avx2` detected at runtime
unsafe {
self = self.with_x86_avx2();
}
}

self
}

/// # Safety
///
/// Must only be used when the `ssse3` feature is available.
// May be unused if no architecture features are detected at compile time or runtime.
#[allow(dead_code)]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
unsafe fn with_x86_ssse3(mut self) -> Self {
self.shuffle_u8x4 =
unsafe { transmute::<unsafe fn(&mut [[u8; 4]], [u8; 4]), _>(x86_ssse3::shuffle_u8x4) };
self.shuffle_u16x4 = unsafe {
transmute::<unsafe fn(&mut [[u16; 4]], [u8; 4]), _>(x86_ssse3::shuffle_u16x4)
};

self
}

/// # Safety
///
/// Must only be used when the `avx2` feature is available.
// May be unused if no architecture features are detected at compile time or runtime.
#[allow(dead_code)]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
unsafe fn with_x86_avx2(mut self) -> Self {
// Note: On Ivy Bridge these have the same *throughput* of 256bit-per-cycle as their SSSE3
// equivalents until Icelake. With Icelake they are twice as fast at 512bit-per-cycle.
// Therefore, we don't select them until we find a way to predict/select this.
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if std::is_x86_feature_detected!("avx2") {
self.shuffle_u8x4 = unsafe {
transmute::<unsafe fn(&mut [[u8; 4]], [u8; 4]), _>(x86_avx2::shuffle_u8x4)
};
self.shuffle_u16x4 = unsafe {
transmute::<unsafe fn(&mut [[u16; 4]], [u8; 4]), _>(x86_avx2::shuffle_u16x4)
};
}

self.shuffle_u8x4 =
unsafe { transmute::<unsafe fn(&mut [[u8; 4]], [u8; 4]), _>(x86_avx2::shuffle_u8x4) };
self.shuffle_u16x4 =
unsafe { transmute::<unsafe fn(&mut [[u16; 4]], [u8; 4]), _>(x86_avx2::shuffle_u16x4) };

self
}
Expand Down
3 changes: 2 additions & 1 deletion canvas/src/bits.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use crate::layout::{SampleBits, SampleParts};
use image_texel::{AsTexel, Texel};
use image_texel::AsTexel;
use image_texel::Texel;

/// Specifies which bits a channel comes from, within a `TexelKind` aggregate.
#[derive(Clone, Copy, Debug)]
Expand Down
3 changes: 2 additions & 1 deletion canvas/src/color/oklab.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use crate::color_matrix::ColMatrix;
use libm::powf;

const M1: ColMatrix = ColMatrix([
[0.8189330101, 0.0329845436, 0.0482003018],
Expand Down Expand Up @@ -94,7 +95,7 @@ pub(crate) fn f_lms_inv(lms: [f32; 3]) -> [f32; 3] {
}

fn pow([a, b, c]: [f32; 3], exp: f32) -> [f32; 3] {
[a.powf(exp), b.powf(exp), c.powf(exp)]
[powf(a, exp), powf(b, exp), powf(c, exp)]
}

fn copysign([a, b, c]: [f32; 3], [sa, sb, sc]: [f32; 3]) -> [f32; 3] {
Expand Down
5 changes: 3 additions & 2 deletions canvas/src/color/srlab2.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use crate::color::Whitepoint;
use crate::color_matrix::ColMatrix;
use libm::powf;

#[rustfmt::skip]
const M_CAT02: ColMatrix = ColMatrix([
Expand Down Expand Up @@ -119,7 +120,7 @@ fn non_linearity(lms: [f32; 3]) -> [f32; 3] {
// Limited to 0.08 precisely
v * 24389.0 / 2700.0
} else {
1.16 * v.powf(1.0 / 3.0) - 0.16
1.16 * powf(v, 1.0 / 3.0) - 0.16
}
}

Expand All @@ -131,7 +132,7 @@ fn non_linearity_inv(lms: [f32; 3]) -> [f32; 3] {
if v.abs() < 0.08 {
v * 2700.0 / 24389.0
} else {
((v + 0.16) / 1.16).powf(3.0)
powf((v + 0.16) / 1.16, 3.0)
}
}

Expand Down
3 changes: 2 additions & 1 deletion canvas/src/color/transfer.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/// To emulate the syntax used in GLSL more closely.
#[inline]
fn pow(base: f32, exp: f32) -> f32 {
base.powf(exp)
libm::powf(base, exp)
}

pub fn transfer_oe_bt709(val: f32) -> f32 {
Expand Down Expand Up @@ -143,6 +143,7 @@ pub fn transfer_display_scene_smpte2084(val: f32) -> f32 {
pub fn transfer_oe_smpte2084(val: f32) -> f32 {
transfer_eo_inv_smpte2084(transfer_scene_display_smpte2084(val))
}
#[expect(dead_code)]
pub fn transfer_oe_inv_smpte2084(val: f32) -> f32 {
transfer_display_scene_smpte2084(transfer_eo_smpte2084(val))
}
Expand Down
4 changes: 4 additions & 0 deletions canvas/src/frame.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
//! A byte-buffer based image descriptor.

use alloc::borrow::ToOwned;
use alloc::vec::Vec;

use image_texel::image::{ImageMut, ImageRef};
use image_texel::Image;

Expand Down
4 changes: 3 additions & 1 deletion canvas/src/layout.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
//! Defines layout and buffer of our images.
use crate::color::{Color, ColorChannel, ColorChannelModel};

use alloc::boxed::Box;

use image_texel::image::{Coord, ImageRef};
use image_texel::layout::{
Decay, Layout as ImageLayout, MatrixBytes, Raster, SliceLayout, StrideSpec, StridedBytes,
Strides, TexelLayout,
};

use crate::color::{Color, ColorChannel, ColorChannelModel};
use crate::shader::ChunkSpec;

/// The byte layout of a buffer.
Expand Down
8 changes: 8 additions & 0 deletions canvas/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,14 @@
// Deny, not forbid, unsafe code. In `arch` module we have inherently unsafe code, for the moment.
// Maybe at a future point we gain some possibility to write such code safely.
#![deny(unsafe_code)]
// Be std for doctests, avoids a weird warning about missing allocator.
#![cfg_attr(not(doctest), no_std)]

#[cfg(feature = "runtime-features")]
extern crate std;

#[macro_use]
extern crate alloc;

mod arch;
mod bits;
Expand Down
5 changes: 4 additions & 1 deletion canvas/src/shader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
//!
//! Takes quite a lot of inspiration from how GPUs work. We have a primitive sampler unit, a
//! fragment unit, and pipeline multiple texels in parallel.
use alloc::vec::Vec;
use core::ops::Range;
use image_texel::image::{ImageMut, ImageRef};
use image_texel::{AsTexel, Texel, TexelBuffer};
Expand Down Expand Up @@ -1482,7 +1483,9 @@ impl CommonPixel {
// FIXME: do the transform u32::from_ne_bytes(x.as_ne_bytes()) when appropriate.
join_fn: |num, bits, idx| {
let max_val = bits.mask();
let raw = (num[(idx & 0x3) as usize] * max_val as f32).round() as u32;
// Equivalent to `x.round() as u32` for positive-normal f32
let round = |x| (x + 0.5) as u32;
let raw = round(num[(idx & 0x3) as usize] * max_val as f32);
raw.min(max_val)
},
bits,
Expand Down
3 changes: 3 additions & 0 deletions drm/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
//! pixel matrix. Then some of those formats map cleanly to planes of color information that can be
//! viewed as a matrix with strides, which finally enables useful operations such as
//! initialization.
// Be std for doctests, avoids a weird warning about missing allocator.
#![cfg_attr(not(doctest), no_std)]

use canvas::{layout, texels};
use core::convert::TryFrom;
use core::fmt;
Expand Down
Loading