@@ -672,32 +672,28 @@ impl [u8] {
672
672
#[ unstable( issue = "none" , feature = "std_internals" ) ]
673
673
#[ allow( dead_code) ]
674
674
/// Safety:
675
- /// - Must be UTF-8
675
+ /// - Must be valid UTF-8
676
676
pub unsafe fn make_utf8_uppercase ( & mut self ) -> Result < usize , VecDeque < u8 > > {
677
677
let mut queue = VecDeque :: new ( ) ;
678
678
679
679
let mut read_offset = 0 ;
680
680
let mut write_offset = 0 ;
681
681
682
- let mut buffer = [ 0 ; 4 ] ;
683
682
while let Some ( ( codepoint, width) ) =
684
683
unsafe { core:: str:: next_code_point_with_width ( & mut self [ read_offset..] . iter ( ) ) }
685
684
{
686
685
read_offset += width;
686
+ // Queue must be flushed before encode_to_slice_or_else_to_queue is
687
+ // called to ensure proper order of bytes
688
+ dump_queue ( & mut queue, & mut self [ ..read_offset] , & mut write_offset) ;
687
689
let lowercase_char = unsafe { char:: from_u32_unchecked ( codepoint) } ;
688
690
for c in lowercase_char. to_uppercase ( ) {
689
- let l = c. len_utf8 ( ) ;
690
- c. encode_utf8 ( & mut buffer) ;
691
- queue. extend ( & buffer[ ..l] ) ;
692
- }
693
- while write_offset < read_offset {
694
- match queue. pop_front ( ) {
695
- Some ( b) => {
696
- self [ write_offset] = b;
697
- write_offset += 1 ;
698
- }
699
- None => break ,
700
- }
691
+ encode_to_slice_or_else_to_queue (
692
+ c,
693
+ & mut queue,
694
+ & mut self [ ..read_offset] ,
695
+ & mut write_offset,
696
+ ) ;
701
697
}
702
698
}
703
699
assert_eq ! ( read_offset, self . len( ) ) ;
@@ -708,19 +704,21 @@ impl [u8] {
708
704
#[ unstable( issue = "none" , feature = "std_internals" ) ]
709
705
#[ allow( dead_code) ]
710
706
/// Safety:
711
- /// - Must be UTF-8
707
+ /// - Must be valid UTF-8
712
708
pub unsafe fn make_utf8_lowercase ( & mut self ) -> Result < usize , VecDeque < u8 > > {
713
709
let mut queue = VecDeque :: new ( ) ;
714
710
715
711
let mut read_offset = 0 ;
716
712
let mut write_offset = 0 ;
717
713
718
- let mut buffer = [ 0 ; 4 ] ;
719
714
let mut final_sigma_automata = FinalSigmaAutomata :: new ( ) ;
720
715
while let Some ( ( codepoint, width) ) =
721
716
unsafe { core:: str:: next_code_point_with_width ( & mut self [ read_offset..] . iter ( ) ) }
722
717
{
723
718
read_offset += width;
719
+ // Queue must be flushed before encode_to_slice_or_else_to_queue is
720
+ // called to ensure proper order of bytes
721
+ dump_queue ( & mut queue, & mut self [ ..read_offset] , & mut write_offset) ;
724
722
let uppercase_char = unsafe { char:: from_u32_unchecked ( codepoint) } ;
725
723
if uppercase_char == 'Σ' {
726
724
// Σ maps to σ, except at the end of a word where it maps to ς.
@@ -729,26 +727,23 @@ impl [u8] {
729
727
let is_word_final =
730
728
final_sigma_automata. is_accepting ( ) && !case_ignorable_then_cased ( rest. chars ( ) ) ;
731
729
let sigma_lowercase = if is_word_final { 'ς' } else { 'σ' } ;
732
- let l = sigma_lowercase. len_utf8 ( ) ;
733
- sigma_lowercase. encode_utf8 ( & mut buffer) ;
734
- queue. extend ( & buffer[ ..l] ) ;
730
+ encode_to_slice_or_else_to_queue (
731
+ sigma_lowercase,
732
+ & mut queue,
733
+ & mut self [ ..read_offset] ,
734
+ & mut write_offset,
735
+ ) ;
735
736
} else {
736
737
for c in uppercase_char. to_lowercase ( ) {
737
- let l = c. len_utf8 ( ) ;
738
- c. encode_utf8 ( & mut buffer) ;
739
- queue. extend ( & buffer[ ..l] ) ;
738
+ encode_to_slice_or_else_to_queue (
739
+ c,
740
+ & mut queue,
741
+ & mut self [ ..read_offset] ,
742
+ & mut write_offset,
743
+ ) ;
740
744
}
741
745
}
742
746
final_sigma_automata. step ( uppercase_char) ;
743
- while write_offset < read_offset {
744
- match queue. pop_front ( ) {
745
- Some ( b) => {
746
- self [ write_offset] = b;
747
- write_offset += 1 ;
748
- }
749
- None => break ,
750
- }
751
- }
752
747
}
753
748
assert_eq ! ( read_offset, self . len( ) ) ;
754
749
return if write_offset < read_offset { Ok ( write_offset) } else { Err ( queue) } ;
@@ -764,6 +759,33 @@ impl [u8] {
764
759
}
765
760
}
766
761
762
+ fn encode_to_slice_or_else_to_queue (
763
+ c : char ,
764
+ queue : & mut VecDeque < u8 > ,
765
+ slice : & mut [ u8 ] ,
766
+ write_offset : & mut usize ,
767
+ ) {
768
+ let mut buffer = [ 0 ; 4 ] ;
769
+ let len = c. encode_utf8 ( & mut buffer) . len ( ) ;
770
+ let writable_slice = & mut slice[ * write_offset..] ;
771
+ let direct_copy_length = core:: cmp:: min ( len, writable_slice. len ( ) ) ;
772
+ writable_slice[ ..direct_copy_length] . copy_from_slice ( & buffer[ ..direct_copy_length] ) ;
773
+ * write_offset += direct_copy_length;
774
+ queue. extend ( & buffer[ direct_copy_length..len] ) ;
775
+ }
776
+
777
+ fn dump_queue ( queue : & mut VecDeque < u8 > , slice : & mut [ u8 ] , write_offset : & mut usize ) {
778
+ while * write_offset < slice. len ( ) {
779
+ match queue. pop_front ( ) {
780
+ Some ( b) => {
781
+ slice[ * write_offset] = b;
782
+ * write_offset += 1 ;
783
+ }
784
+ None => break ,
785
+ }
786
+ }
787
+ }
788
+
767
789
#[ derive( Clone ) ]
768
790
enum FinalSigmaAutomata {
769
791
Init ,
0 commit comments