@@ -703,6 +703,106 @@ impl [u8] {
703
703
assert_eq ! ( read_offset, self . len( ) ) ;
704
704
if write_offset < read_offset { Ok ( write_offset) } else { Err ( queue) }
705
705
}
706
+
707
+ #[ rustc_allow_incoherent_impl]
708
+ #[ unstable( issue = "none" , feature = "std_internals" ) ]
709
+ #[ allow( dead_code) ]
710
+ /// Safety:
711
+ /// - Must be UTF-8
712
+ pub unsafe fn make_utf8_lowercase ( & mut self ) -> Result < usize , VecDeque < u8 > > {
713
+ let mut queue = VecDeque :: new ( ) ;
714
+
715
+ let mut read_offset = 0 ;
716
+ let mut write_offset = 0 ;
717
+
718
+ let mut buffer = [ 0 ; 4 ] ;
719
+ let mut final_sigma_automata = FinalSigmaAutomata :: new ( ) ;
720
+ while let Some ( ( codepoint, width) ) =
721
+ unsafe { core:: str:: next_code_point_with_width ( & mut self [ read_offset..] . iter ( ) ) }
722
+ {
723
+ read_offset += width;
724
+ let uppercase_char = unsafe { char:: from_u32_unchecked ( codepoint) } ;
725
+ if uppercase_char == 'Σ' {
726
+ // Σ maps to σ, except at the end of a word where it maps to ς.
727
+ // See core::str::to_lowercase
728
+ let rest = unsafe { core:: str:: from_utf8_unchecked ( & self [ read_offset..] ) } ;
729
+ let is_word_final =
730
+ final_sigma_automata. is_accepting ( ) && !case_ignorable_then_cased ( rest. chars ( ) ) ;
731
+ let sigma_lowercase = if is_word_final { 'ς' } else { 'σ' } ;
732
+ let l = sigma_lowercase. len_utf8 ( ) ;
733
+ sigma_lowercase. encode_utf8 ( & mut buffer) ;
734
+ queue. extend ( & buffer[ ..l] ) ;
735
+ } else {
736
+ for c in uppercase_char. to_lowercase ( ) {
737
+ let l = c. len_utf8 ( ) ;
738
+ c. encode_utf8 ( & mut buffer) ;
739
+ queue. extend ( & buffer[ ..l] ) ;
740
+ }
741
+ }
742
+ final_sigma_automata. step ( uppercase_char) ;
743
+ while write_offset < read_offset {
744
+ match queue. pop_front ( ) {
745
+ Some ( b) => {
746
+ self [ write_offset] = b;
747
+ write_offset += 1 ;
748
+ }
749
+ None => break ,
750
+ }
751
+ }
752
+ }
753
+ assert_eq ! ( read_offset, self . len( ) ) ;
754
+ return if write_offset < read_offset { Ok ( write_offset) } else { Err ( queue) } ;
755
+
756
+ // For now this is copy pasted from core::str, FIXME: DRY
757
+ fn case_ignorable_then_cased < I : Iterator < Item = char > > ( iter : I ) -> bool {
758
+ use core:: unicode:: { Case_Ignorable , Cased } ;
759
+ match iter. skip_while ( |& c| Case_Ignorable ( c) ) . next ( ) {
760
+ Some ( c) => Cased ( c) ,
761
+ None => false ,
762
+ }
763
+ }
764
+ }
765
+ }
766
+
767
+ #[ derive( Clone ) ]
768
+ enum FinalSigmaAutomata {
769
+ Init ,
770
+ Accepted ,
771
+ }
772
+
773
+ impl FinalSigmaAutomata {
774
+ fn new ( ) -> Self {
775
+ Self :: Init
776
+ }
777
+
778
+ fn is_accepting ( & self ) -> bool {
779
+ match self {
780
+ FinalSigmaAutomata :: Accepted => true ,
781
+ FinalSigmaAutomata :: Init => false ,
782
+ }
783
+ }
784
+
785
+ fn step ( & mut self , c : char ) {
786
+ use core:: unicode:: { Case_Ignorable , Cased } ;
787
+
788
+ use FinalSigmaAutomata :: * ;
789
+ * self = match self {
790
+ Init => {
791
+ if Cased ( c) {
792
+ Accepted
793
+ } else {
794
+ Init
795
+ }
796
+ }
797
+ Accepted => {
798
+ if Cased ( c) || Case_Ignorable ( c) {
799
+ Accepted
800
+ } else {
801
+ Init
802
+ }
803
+ }
804
+ }
805
+ }
706
806
}
707
807
708
808
#[ cfg( not( test) ) ]
0 commit comments