Skip to content

Commit 6a4ea72

Browse files
committed
Improve stores into atomic slice
Firstly, reduces the amount of soundness documentation required by making different data sources distinct. (The loads are funnily asymetric here since the output mutable reference is very strong and can be easily converted to cells, but there is not opposite conversion to a shared reference). Secondly, optimizes the 'bulk' of a long atomic store operation by doing the interior in a single store without any of the xor dance. Here we modify all bytes so none of the previous value needs to be preserved. Only when we straddle the edge to some neighbor values that should not be affected by our modifications do we need to do the modifications by xor to mask to the bits we want to affect.
1 parent efa9bcb commit 6a4ea72

File tree

2 files changed

+142
-32
lines changed

2 files changed

+142
-32
lines changed

texel/src/buf.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1712,7 +1712,6 @@ mod tests {
17121712
let buffer = AtomicBuffer::with_buffer(initial_state);
17131713
// And receive all the results in this shared copy of our buffer.
17141714
let output_tap = buffer.clone();
1715-
// assert!(buffer.ptr_eq(&output_tap));
17161715

17171716
// Map those numbers in-place.
17181717
buffer.map_within(..LEN, 0, |n: u32| n as u8, U32, U8);

texel/src/texel.rs

Lines changed: 142 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -682,6 +682,14 @@ impl<T, const N: usize> Texel<[T; N]> {
682682
}
683683
}
684684

685+
/// Protocol for [`Texel::store_atomic_slice_unchecked`] argument. Implementation detail.
686+
trait DataSource {
687+
fn init(&mut self, init: usize);
688+
fn load_head(&mut self, val: &mut [u8; MaxAtomic::PART_SIZE]);
689+
fn load(&mut self, val: &mut [u8; MaxAtomic::PART_SIZE]);
690+
fn load_tail(&mut self, val: &mut [u8; MaxAtomic::PART_SIZE]);
691+
}
692+
685693
/// Operations that can be performed based on the evidence of Texel.
686694
impl<P> Texel<P> {
687695
/// Copy a texel.
@@ -839,7 +847,7 @@ impl<P> Texel<P> {
839847
/// bytes *not* covered by this particular representation will not inherently block progress.
840848
pub fn store_atomic(self, val: AtomicRef<P>, value: P) {
841849
let slice = AtomicSliceRef::from_ref(val);
842-
self.store_atomic_slice_unchecked(slice, core::slice::from_ref(&value));
850+
self.store_atomic_slice(slice, core::slice::from_ref(&value));
843851
}
844852

845853
/// Store values to an atomic slice.
@@ -857,7 +865,53 @@ impl<P> Texel<P> {
857865
/// This method panics if the slice and the source buffer do not have the same logical length.
858866
#[track_caller]
859867
pub fn store_atomic_slice(self, val: AtomicSliceRef<P>, source: &[P]) {
868+
struct SliceSource<'lt> {
869+
skip: usize,
870+
head: &'lt [u8],
871+
chunks: core::slice::ChunksExact<'lt, u8>,
872+
tail: &'lt [u8],
873+
}
874+
875+
impl DataSource for SliceSource<'_> {
876+
fn init(&mut self, init: usize) {
877+
let len = self.head.len().min(init);
878+
let (head, body) = self.head.split_at(len);
879+
self.head = head;
880+
self.skip = MaxAtomic::PART_SIZE - init;
881+
882+
let chunks = body.chunks_exact(MaxAtomic::PART_SIZE);
883+
self.tail = chunks.remainder();
884+
self.chunks = chunks;
885+
}
886+
887+
fn load_head(&mut self, val: &mut [u8; MaxAtomic::PART_SIZE]) {
888+
let target = &mut val[self.skip..][..self.head.len()];
889+
target.copy_from_slice(self.head);
890+
}
891+
892+
fn load(&mut self, val: &mut [u8; core::mem::size_of::<AtomicPart>()]) {
893+
if let Some(next) = self.chunks.next() {
894+
val.copy_from_slice(next);
895+
} else {
896+
debug_assert!(false);
897+
}
898+
}
899+
900+
fn load_tail(&mut self, val: &mut [u8; MaxAtomic::PART_SIZE]) {
901+
let target = &mut val[..self.tail.len()];
902+
target.copy_from_slice(self.tail);
903+
}
904+
}
905+
860906
assert_eq!(val.len(), source.len());
907+
908+
let source = SliceSource {
909+
head: self.to_bytes(source),
910+
skip: 0,
911+
chunks: [].chunks_exact(MaxAtomic::PART_SIZE),
912+
tail: &[],
913+
};
914+
861915
self.store_atomic_slice_unchecked(val, source);
862916
}
863917

@@ -875,6 +929,44 @@ impl<P> Texel<P> {
875929
///
876930
/// This method panics if the slice and the source buffer do not have the same logical length.
877931
pub fn store_atomic_from_cells(self, val: AtomicSliceRef<P>, source: &[Cell<P>]) {
932+
struct CellSource<'lt> {
933+
skip: usize,
934+
head: &'lt [Cell<u8>],
935+
chunks: core::slice::ChunksExact<'lt, Cell<u8>>,
936+
tail: &'lt [Cell<u8>],
937+
}
938+
939+
impl DataSource for CellSource<'_> {
940+
fn init(&mut self, init: usize) {
941+
let len = self.head.len().min(init);
942+
let (head, body) = self.head.split_at(len);
943+
self.head = head;
944+
self.skip = MaxAtomic::PART_SIZE - init;
945+
946+
let chunks = body.chunks_exact(MaxAtomic::PART_SIZE);
947+
self.tail = chunks.remainder();
948+
self.chunks = chunks;
949+
}
950+
951+
fn load_head(&mut self, val: &mut [u8; MaxAtomic::PART_SIZE]) {
952+
let target = &mut val[self.skip..][..self.head.len()];
953+
constants::U8.load_cell_slice(self.head, target);
954+
}
955+
956+
fn load(&mut self, val: &mut [u8; core::mem::size_of::<AtomicPart>()]) {
957+
if let Some(next) = self.chunks.next() {
958+
constants::U8.load_cell_slice(next, val);
959+
} else {
960+
debug_assert!(false);
961+
}
962+
}
963+
964+
fn load_tail(&mut self, val: &mut [u8; MaxAtomic::PART_SIZE]) {
965+
let target = &mut val[..self.tail.len()];
966+
constants::U8.load_cell_slice(self.tail, target);
967+
}
968+
}
969+
878970
assert_eq!(val.len(), source.len());
879971

880972
assert!(
@@ -888,46 +980,63 @@ impl<P> Texel<P> {
888980
these values across non-local API boundaries"
889981
);
890982

891-
// SAFETY
892-
// - this covers the exact memory range as the underlying slice of cells.
893-
// - the Texel certifies it is initialized memory.
894-
// - the lifetime is the same.
895-
// - the memory in the slice is not mutated. `Cell` is not `Sync` so this thread is the
896-
// only that could modify those contents. But also in this thread this function _is
897-
// currently running_ and so it suffices that it does not to modify the contents. Only
898-
// the memory at `val` is modified in the later call. We checked aliasing above.
899-
// - the total size is at most `isize::MAX` since it was already a reference to it.
900-
let source: &[P] =
901-
unsafe { slice::from_raw_parts(source.as_ptr() as *const P, source.len()) };
983+
let source = CellSource {
984+
head: self.cell_bytes(source).as_slice_of_cells(),
985+
skip: 0,
986+
chunks: [].chunks_exact(MaxAtomic::PART_SIZE),
987+
tail: &[],
988+
};
989+
902990
self.store_atomic_slice_unchecked(val, source);
903991
}
904992

905-
fn store_atomic_slice_unchecked(self, val: AtomicSliceRef<P>, from: &[P]) {
906-
let offset = val.start / core::mem::size_of::<AtomicPart>();
907-
let mut initial_skip = val.start % core::mem::size_of::<AtomicPart>();
908-
909-
let mut source = self.to_bytes(from);
910-
let mut buffer = val.buf.0[offset..].iter();
911-
912-
loop {
913-
let into = buffer.next().unwrap();
914-
let original = into.load(atomic::Ordering::Relaxed);
993+
// Store a data source to a slice, assuming they cover the same number of bytes.
994+
fn store_atomic_slice_unchecked(self, val: AtomicSliceRef<P>, mut from: impl DataSource) {
995+
// Modify only some bits of an atomic value.
996+
fn modify_parts_with(
997+
part: &AtomicPart,
998+
with: impl FnOnce(&mut [u8; MaxAtomic::PART_SIZE]),
999+
) {
1000+
let original = part.load(atomic::Ordering::Relaxed);
9151001
let mut value = original;
9161002

917-
let target = &mut bytemuck::bytes_of_mut(&mut value)[initial_skip..];
918-
let copy_len = target.len().min(source.len());
919-
target[..copy_len].copy_from_slice(&source[..copy_len]);
920-
source = &source[copy_len..];
1003+
let buffer = bytemuck::bytes_of_mut(&mut value);
1004+
with(buffer.try_into().unwrap());
9211005

9221006
// Any bits we did not modify, including those outside our own range, will not get
9231007
// modified by this instruction. This provides the basic conflict guarantee.
924-
into.fetch_xor(original ^ value, atomic::Ordering::Relaxed);
1008+
part.fetch_xor(original ^ value, atomic::Ordering::Relaxed);
1009+
}
9251010

926-
if source.is_empty() {
927-
break;
928-
}
1011+
let offset = val.start / MaxAtomic::PART_SIZE;
1012+
let mut buffer = val.buf.0[offset..].iter();
9291013

930-
initial_skip = 0;
1014+
// How many bytes from the start to first atomic boundary?
1015+
let head_len = val.start.next_multiple_of(MaxAtomic::PART_SIZE) - val.start;
1016+
from.init(head_len);
1017+
1018+
let after_head = (val.end - val.start).saturating_sub(head_len);
1019+
// How many bytes is the end from its previous atomic boundary?
1020+
let tail_skip = after_head % MaxAtomic::PART_SIZE;
1021+
let body_count = after_head / MaxAtomic::PART_SIZE;
1022+
1023+
if head_len > 0 {
1024+
let into = buffer.next().unwrap();
1025+
modify_parts_with(into, |buffer| from.load_head(buffer));
1026+
}
1027+
1028+
let body = buffer.as_slice();
1029+
for part in &body[..body_count] {
1030+
// Here we modify all bytes so just store..
1031+
let mut value = Default::default();
1032+
let buffer = bytemuck::bytes_of_mut(&mut value);
1033+
from.load(buffer.try_into().unwrap());
1034+
part.store(value, atomic::Ordering::Relaxed);
1035+
}
1036+
1037+
if tail_skip > 0 {
1038+
let into = &body[body_count];
1039+
modify_parts_with(into, |buffer| from.load_tail(buffer));
9311040
}
9321041
}
9331042

@@ -1313,6 +1422,8 @@ const _: () = {
13131422
};
13141423

13151424
impl MaxAtomic {
1425+
pub(crate) const PART_SIZE: usize = core::mem::size_of::<AtomicPart>();
1426+
13161427
/// Create a vector of atomic zero-bytes.
13171428
pub const fn zero() -> Self {
13181429
const Z: AtomicPart = AtomicPart::new(0);

0 commit comments

Comments
 (0)