Skip to content

Commit a00b4ef

Browse files
committed
Bypass queue when possible in slice::make_*case
1 parent fffbb33 commit a00b4ef

File tree

1 file changed

+53
-31
lines changed

1 file changed

+53
-31
lines changed

library/alloc/src/slice.rs

+53-31
Original file line numberDiff line numberDiff line change
@@ -672,32 +672,28 @@ impl [u8] {
672672
#[unstable(issue = "none", feature = "std_internals")]
673673
#[allow(dead_code)]
674674
/// Safety:
675-
/// - Must be UTF-8
675+
/// - Must be valid UTF-8
676676
pub unsafe fn make_utf8_uppercase(&mut self) -> Result<usize, VecDeque<u8>> {
677677
let mut queue = VecDeque::new();
678678

679679
let mut read_offset = 0;
680680
let mut write_offset = 0;
681681

682-
let mut buffer = [0; 4];
683682
while let Some((codepoint, width)) =
684683
unsafe { core::str::next_code_point_with_width(&mut self[read_offset..].iter()) }
685684
{
686685
read_offset += width;
686+
// Queue must be flushed before encode_to_slice_or_else_to_queue is
687+
// called to ensure proper order of bytes
688+
dump_queue(&mut queue, &mut self[..read_offset], &mut write_offset);
687689
let lowercase_char = unsafe { char::from_u32_unchecked(codepoint) };
688690
for c in lowercase_char.to_uppercase() {
689-
let l = c.len_utf8();
690-
c.encode_utf8(&mut buffer);
691-
queue.extend(&buffer[..l]);
692-
}
693-
while write_offset < read_offset {
694-
match queue.pop_front() {
695-
Some(b) => {
696-
self[write_offset] = b;
697-
write_offset += 1;
698-
}
699-
None => break,
700-
}
691+
encode_to_slice_or_else_to_queue(
692+
c,
693+
&mut queue,
694+
&mut self[..read_offset],
695+
&mut write_offset,
696+
);
701697
}
702698
}
703699
assert_eq!(read_offset, self.len());
@@ -708,19 +704,21 @@ impl [u8] {
708704
#[unstable(issue = "none", feature = "std_internals")]
709705
#[allow(dead_code)]
710706
/// Safety:
711-
/// - Must be UTF-8
707+
/// - Must be valid UTF-8
712708
pub unsafe fn make_utf8_lowercase(&mut self) -> Result<usize, VecDeque<u8>> {
713709
let mut queue = VecDeque::new();
714710

715711
let mut read_offset = 0;
716712
let mut write_offset = 0;
717713

718-
let mut buffer = [0; 4];
719714
let mut final_sigma_automata = FinalSigmaAutomata::new();
720715
while let Some((codepoint, width)) =
721716
unsafe { core::str::next_code_point_with_width(&mut self[read_offset..].iter()) }
722717
{
723718
read_offset += width;
719+
// Queue must be flushed before encode_to_slice_or_else_to_queue is
720+
// called to ensure proper order of bytes
721+
dump_queue(&mut queue, &mut self[..read_offset], &mut write_offset);
724722
let uppercase_char = unsafe { char::from_u32_unchecked(codepoint) };
725723
if uppercase_char == 'Σ' {
726724
// Σ maps to σ, except at the end of a word where it maps to ς.
@@ -729,26 +727,23 @@ impl [u8] {
729727
let is_word_final =
730728
final_sigma_automata.is_accepting() && !case_ignorable_then_cased(rest.chars());
731729
let sigma_lowercase = if is_word_final { 'ς' } else { 'σ' };
732-
let l = sigma_lowercase.len_utf8();
733-
sigma_lowercase.encode_utf8(&mut buffer);
734-
queue.extend(&buffer[..l]);
730+
encode_to_slice_or_else_to_queue(
731+
sigma_lowercase,
732+
&mut queue,
733+
&mut self[..read_offset],
734+
&mut write_offset,
735+
);
735736
} else {
736737
for c in uppercase_char.to_lowercase() {
737-
let l = c.len_utf8();
738-
c.encode_utf8(&mut buffer);
739-
queue.extend(&buffer[..l]);
738+
encode_to_slice_or_else_to_queue(
739+
c,
740+
&mut queue,
741+
&mut self[..read_offset],
742+
&mut write_offset,
743+
);
740744
}
741745
}
742746
final_sigma_automata.step(uppercase_char);
743-
while write_offset < read_offset {
744-
match queue.pop_front() {
745-
Some(b) => {
746-
self[write_offset] = b;
747-
write_offset += 1;
748-
}
749-
None => break,
750-
}
751-
}
752747
}
753748
assert_eq!(read_offset, self.len());
754749
return if write_offset < read_offset { Ok(write_offset) } else { Err(queue) };
@@ -764,6 +759,33 @@ impl [u8] {
764759
}
765760
}
766761

762+
fn encode_to_slice_or_else_to_queue(
763+
c: char,
764+
queue: &mut VecDeque<u8>,
765+
slice: &mut [u8],
766+
write_offset: &mut usize,
767+
) {
768+
let mut buffer = [0; 4];
769+
let len = c.encode_utf8(&mut buffer).len();
770+
let writable_slice = &mut slice[*write_offset..];
771+
let direct_copy_length = core::cmp::min(len, writable_slice.len());
772+
writable_slice[..direct_copy_length].copy_from_slice(&buffer[..direct_copy_length]);
773+
*write_offset += direct_copy_length;
774+
queue.extend(&buffer[direct_copy_length..len]);
775+
}
776+
777+
fn dump_queue(queue: &mut VecDeque<u8>, slice: &mut [u8], write_offset: &mut usize) {
778+
while *write_offset < slice.len() {
779+
match queue.pop_front() {
780+
Some(b) => {
781+
slice[*write_offset] = b;
782+
*write_offset += 1;
783+
}
784+
None => break,
785+
}
786+
}
787+
}
788+
767789
#[derive(Clone)]
768790
enum FinalSigmaAutomata {
769791
Init,

0 commit comments

Comments
 (0)