@@ -682,6 +682,14 @@ impl<T, const N: usize> Texel<[T; N]> {
682682 }
683683}
684684
685+ /// Protocol for [`Texel::store_atomic_slice_unchecked`] argument. Implementation detail.
686+ trait DataSource {
687+ fn init ( & mut self , init : usize ) ;
688+ fn load_head ( & mut self , val : & mut [ u8 ; MaxAtomic :: PART_SIZE ] ) ;
689+ fn load ( & mut self , val : & mut [ u8 ; MaxAtomic :: PART_SIZE ] ) ;
690+ fn load_tail ( & mut self , val : & mut [ u8 ; MaxAtomic :: PART_SIZE ] ) ;
691+ }
692+
685693/// Operations that can be performed based on the evidence of Texel.
686694impl < P > Texel < P > {
687695 /// Copy a texel.
@@ -839,7 +847,7 @@ impl<P> Texel<P> {
839847 /// bytes *not* covered by this particular representation will not inherently block progress.
840848 pub fn store_atomic ( self , val : AtomicRef < P > , value : P ) {
841849 let slice = AtomicSliceRef :: from_ref ( val) ;
842- self . store_atomic_slice_unchecked ( slice, core:: slice:: from_ref ( & value) ) ;
850+ self . store_atomic_slice ( slice, core:: slice:: from_ref ( & value) ) ;
843851 }
844852
845853 /// Store values to an atomic slice.
@@ -857,7 +865,53 @@ impl<P> Texel<P> {
857865 /// This method panics if the slice and the source buffer do not have the same logical length.
858866 #[ track_caller]
859867 pub fn store_atomic_slice ( self , val : AtomicSliceRef < P > , source : & [ P ] ) {
868+ struct SliceSource < ' lt > {
869+ skip : usize ,
870+ head : & ' lt [ u8 ] ,
871+ chunks : core:: slice:: ChunksExact < ' lt , u8 > ,
872+ tail : & ' lt [ u8 ] ,
873+ }
874+
875+ impl DataSource for SliceSource < ' _ > {
876+ fn init ( & mut self , init : usize ) {
877+ let len = self . head . len ( ) . min ( init) ;
878+ let ( head, body) = self . head . split_at ( len) ;
879+ self . head = head;
880+ self . skip = MaxAtomic :: PART_SIZE - init;
881+
882+ let chunks = body. chunks_exact ( MaxAtomic :: PART_SIZE ) ;
883+ self . tail = chunks. remainder ( ) ;
884+ self . chunks = chunks;
885+ }
886+
887+ fn load_head ( & mut self , val : & mut [ u8 ; MaxAtomic :: PART_SIZE ] ) {
888+ let target = & mut val[ self . skip ..] [ ..self . head . len ( ) ] ;
889+ target. copy_from_slice ( self . head ) ;
890+ }
891+
892+ fn load ( & mut self , val : & mut [ u8 ; core:: mem:: size_of :: < AtomicPart > ( ) ] ) {
893+ if let Some ( next) = self . chunks . next ( ) {
894+ val. copy_from_slice ( next) ;
895+ } else {
896+ debug_assert ! ( false ) ;
897+ }
898+ }
899+
900+ fn load_tail ( & mut self , val : & mut [ u8 ; MaxAtomic :: PART_SIZE ] ) {
901+ let target = & mut val[ ..self . tail . len ( ) ] ;
902+ target. copy_from_slice ( self . tail ) ;
903+ }
904+ }
905+
860906 assert_eq ! ( val. len( ) , source. len( ) ) ;
907+
908+ let source = SliceSource {
909+ head : self . to_bytes ( source) ,
910+ skip : 0 ,
911+ chunks : [ ] . chunks_exact ( MaxAtomic :: PART_SIZE ) ,
912+ tail : & [ ] ,
913+ } ;
914+
861915 self . store_atomic_slice_unchecked ( val, source) ;
862916 }
863917
@@ -875,6 +929,44 @@ impl<P> Texel<P> {
875929 ///
876930 /// This method panics if the slice and the source buffer do not have the same logical length.
877931 pub fn store_atomic_from_cells ( self , val : AtomicSliceRef < P > , source : & [ Cell < P > ] ) {
932+ struct CellSource < ' lt > {
933+ skip : usize ,
934+ head : & ' lt [ Cell < u8 > ] ,
935+ chunks : core:: slice:: ChunksExact < ' lt , Cell < u8 > > ,
936+ tail : & ' lt [ Cell < u8 > ] ,
937+ }
938+
939+ impl DataSource for CellSource < ' _ > {
940+ fn init ( & mut self , init : usize ) {
941+ let len = self . head . len ( ) . min ( init) ;
942+ let ( head, body) = self . head . split_at ( len) ;
943+ self . head = head;
944+ self . skip = MaxAtomic :: PART_SIZE - init;
945+
946+ let chunks = body. chunks_exact ( MaxAtomic :: PART_SIZE ) ;
947+ self . tail = chunks. remainder ( ) ;
948+ self . chunks = chunks;
949+ }
950+
951+ fn load_head ( & mut self , val : & mut [ u8 ; MaxAtomic :: PART_SIZE ] ) {
952+ let target = & mut val[ self . skip ..] [ ..self . head . len ( ) ] ;
953+ constants:: U8 . load_cell_slice ( self . head , target) ;
954+ }
955+
956+ fn load ( & mut self , val : & mut [ u8 ; core:: mem:: size_of :: < AtomicPart > ( ) ] ) {
957+ if let Some ( next) = self . chunks . next ( ) {
958+ constants:: U8 . load_cell_slice ( next, val) ;
959+ } else {
960+ debug_assert ! ( false ) ;
961+ }
962+ }
963+
964+ fn load_tail ( & mut self , val : & mut [ u8 ; MaxAtomic :: PART_SIZE ] ) {
965+ let target = & mut val[ ..self . tail . len ( ) ] ;
966+ constants:: U8 . load_cell_slice ( self . tail , target) ;
967+ }
968+ }
969+
878970 assert_eq ! ( val. len( ) , source. len( ) ) ;
879971
880972 assert ! (
@@ -888,46 +980,63 @@ impl<P> Texel<P> {
888980 these values across non-local API boundaries"
889981 ) ;
890982
891- // SAFETY
892- // - this covers the exact memory range as the underlying slice of cells.
893- // - the Texel certifies it is initialized memory.
894- // - the lifetime is the same.
895- // - the memory in the slice is not mutated. `Cell` is not `Sync` so this thread is the
896- // only that could modify those contents. But also in this thread this function _is
897- // currently running_ and so it suffices that it does not to modify the contents. Only
898- // the memory at `val` is modified in the later call. We checked aliasing above.
899- // - the total size is at most `isize::MAX` since it was already a reference to it.
900- let source: & [ P ] =
901- unsafe { slice:: from_raw_parts ( source. as_ptr ( ) as * const P , source. len ( ) ) } ;
983+ let source = CellSource {
984+ head : self . cell_bytes ( source) . as_slice_of_cells ( ) ,
985+ skip : 0 ,
986+ chunks : [ ] . chunks_exact ( MaxAtomic :: PART_SIZE ) ,
987+ tail : & [ ] ,
988+ } ;
989+
902990 self . store_atomic_slice_unchecked ( val, source) ;
903991 }
904992
905- fn store_atomic_slice_unchecked ( self , val : AtomicSliceRef < P > , from : & [ P ] ) {
906- let offset = val. start / core:: mem:: size_of :: < AtomicPart > ( ) ;
907- let mut initial_skip = val. start % core:: mem:: size_of :: < AtomicPart > ( ) ;
908-
909- let mut source = self . to_bytes ( from) ;
910- let mut buffer = val. buf . 0 [ offset..] . iter ( ) ;
911-
912- loop {
913- let into = buffer. next ( ) . unwrap ( ) ;
914- let original = into. load ( atomic:: Ordering :: Relaxed ) ;
993+ // Store a data source to a slice, assuming they cover the same number of bytes.
994+ fn store_atomic_slice_unchecked ( self , val : AtomicSliceRef < P > , mut from : impl DataSource ) {
995+ // Modify only some bits of an atomic value.
996+ fn modify_parts_with (
997+ part : & AtomicPart ,
998+ with : impl FnOnce ( & mut [ u8 ; MaxAtomic :: PART_SIZE ] ) ,
999+ ) {
1000+ let original = part. load ( atomic:: Ordering :: Relaxed ) ;
9151001 let mut value = original;
9161002
917- let target = & mut bytemuck:: bytes_of_mut ( & mut value) [ initial_skip..] ;
918- let copy_len = target. len ( ) . min ( source. len ( ) ) ;
919- target[ ..copy_len] . copy_from_slice ( & source[ ..copy_len] ) ;
920- source = & source[ copy_len..] ;
1003+ let buffer = bytemuck:: bytes_of_mut ( & mut value) ;
1004+ with ( buffer. try_into ( ) . unwrap ( ) ) ;
9211005
9221006 // Any bits we did not modify, including those outside our own range, will not get
9231007 // modified by this instruction. This provides the basic conflict guarantee.
924- into. fetch_xor ( original ^ value, atomic:: Ordering :: Relaxed ) ;
1008+ part. fetch_xor ( original ^ value, atomic:: Ordering :: Relaxed ) ;
1009+ }
9251010
926- if source. is_empty ( ) {
927- break ;
928- }
1011+ let offset = val. start / MaxAtomic :: PART_SIZE ;
1012+ let mut buffer = val. buf . 0 [ offset..] . iter ( ) ;
9291013
930- initial_skip = 0 ;
1014+ // How many bytes from the start to first atomic boundary?
1015+ let head_len = val. start . next_multiple_of ( MaxAtomic :: PART_SIZE ) - val. start ;
1016+ from. init ( head_len) ;
1017+
1018+ let after_head = ( val. end - val. start ) . saturating_sub ( head_len) ;
1019+ // How many bytes is the end from its previous atomic boundary?
1020+ let tail_skip = after_head % MaxAtomic :: PART_SIZE ;
1021+ let body_count = after_head / MaxAtomic :: PART_SIZE ;
1022+
1023+ if head_len > 0 {
1024+ let into = buffer. next ( ) . unwrap ( ) ;
1025+ modify_parts_with ( into, |buffer| from. load_head ( buffer) ) ;
1026+ }
1027+
1028+ let body = buffer. as_slice ( ) ;
1029+ for part in & body[ ..body_count] {
1030+ // Here we modify all bytes so just store..
1031+ let mut value = Default :: default ( ) ;
1032+ let buffer = bytemuck:: bytes_of_mut ( & mut value) ;
1033+ from. load ( buffer. try_into ( ) . unwrap ( ) ) ;
1034+ part. store ( value, atomic:: Ordering :: Relaxed ) ;
1035+ }
1036+
1037+ if tail_skip > 0 {
1038+ let into = & body[ body_count] ;
1039+ modify_parts_with ( into, |buffer| from. load_tail ( buffer) ) ;
9311040 }
9321041 }
9331042
@@ -1313,6 +1422,8 @@ const _: () = {
13131422} ;
13141423
13151424impl MaxAtomic {
1425+ pub ( crate ) const PART_SIZE : usize = core:: mem:: size_of :: < AtomicPart > ( ) ;
1426+
13161427 /// Create a vector of atomic zero-bytes.
13171428 pub const fn zero ( ) -> Self {
13181429 const Z : AtomicPart = AtomicPart :: new ( 0 ) ;
0 commit comments