@@ -1795,39 +1795,71 @@ const fn len_utf16(code: u32) -> usize {
17951795#[ inline]
17961796pub const fn encode_utf8_raw ( code : u32 , dst : & mut [ u8 ] ) -> & mut [ u8 ] {
17971797 let len = len_utf8 ( code) ;
1798- match ( len, & mut * dst) {
1799- ( 1 , [ a, ..] ) => {
1800- * a = code as u8 ;
1801- }
1802- ( 2 , [ a, b, ..] ) => {
1803- * a = ( code >> 6 & 0x1F ) as u8 | TAG_TWO_B ;
1804- * b = ( code & 0x3F ) as u8 | TAG_CONT ;
1805- }
1806- ( 3 , [ a, b, c, ..] ) => {
1807- * a = ( code >> 12 & 0x0F ) as u8 | TAG_THREE_B ;
1808- * b = ( code >> 6 & 0x3F ) as u8 | TAG_CONT ;
1809- * c = ( code & 0x3F ) as u8 | TAG_CONT ;
1810- }
1811- ( 4 , [ a, b, c, d, ..] ) => {
1812- * a = ( code >> 18 & 0x07 ) as u8 | TAG_FOUR_B ;
1813- * b = ( code >> 12 & 0x3F ) as u8 | TAG_CONT ;
1814- * c = ( code >> 6 & 0x3F ) as u8 | TAG_CONT ;
1815- * d = ( code & 0x3F ) as u8 | TAG_CONT ;
1816- }
1817- _ => {
1818- const_panic ! (
1819- "encode_utf8: buffer does not have enough bytes to encode code point" ,
1820- "encode_utf8: need {len} bytes to encode U+{code:04X} but buffer has just {dst_len}" ,
1821- code: u32 = code,
1822- len: usize = len,
1823- dst_len: usize = dst. len( ) ,
1824- )
1825- }
1826- } ;
1798+ if dst. len ( ) < len {
1799+ const_panic ! (
1800+ "encode_utf8: buffer does not have enough bytes to encode code point" ,
1801+ "encode_utf8: need {len} bytes to encode U+{code:04X} but buffer has just {dst_len}" ,
1802+ code: u32 = code,
1803+ len: usize = len,
1804+ dst_len: usize = dst. len( ) ,
1805+ ) ;
1806+ }
1807+
1808+ // SAFETY: `dst` is checked to be at least the length needed to encode the codepoint.
1809+ unsafe { encode_utf8_raw_unchecked ( code, dst. as_mut_ptr ( ) ) } ;
1810+
18271811 // SAFETY: `<&mut [u8]>::as_mut_ptr` is guaranteed to return a valid pointer and `len` has been tested to be within bounds.
18281812 unsafe { slice:: from_raw_parts_mut ( dst. as_mut_ptr ( ) , len) }
18291813}
18301814
1815+ /// Encodes a raw u32 value as UTF-8 to the provided destination buffer.
1816+ ///
1817+ /// Unlike `char::encode_utf8`, this method also handles codepoints in the surrogate range.
1818+ /// (Creating a `char` in the surrogate range is UB.)
1819+ /// The result is valid [generalized UTF-8] but not valid UTF-8.
1820+ ///
1821+ /// [generalized UTF-8]: https://simonsapin.github.io/wtf-8/#generalized-utf8
1822+ ///
1823+ /// # Safety
1824+ ///
1825+ /// The behavior is undefined if the buffer pointed to by `dst` is not
1826+ /// large enough to hold the encoded codepoint. A buffer of length four
1827+ /// is large enough to encode any `char`.
1828+ ///
1829+ /// For a safe version of this function, see the [`encode_utf8_raw`] function.
1830+ #[ unstable( feature = "char_internals" , reason = "exposed only for libstd" , issue = "none" ) ]
1831+ #[ doc( hidden) ]
1832+ #[ inline]
1833+ pub const unsafe fn encode_utf8_raw_unchecked ( code : u32 , dst : * mut u8 ) {
1834+ let len = len_utf8 ( code) ;
1835+ // SAFETY: The caller must guarantee that the buffer pointed to by `dst`
1836+ // is at least `len` bytes long.
1837+ unsafe {
1838+ match len {
1839+ 1 => {
1840+ * dst = code as u8 ;
1841+ }
1842+ 2 => {
1843+ * dst = ( code >> 6 & 0x1F ) as u8 | TAG_TWO_B ;
1844+ * dst. add ( 1 ) = ( code & 0x3F ) as u8 | TAG_CONT ;
1845+ }
1846+ 3 => {
1847+ * dst = ( code >> 12 & 0x0F ) as u8 | TAG_THREE_B ;
1848+ * dst. add ( 1 ) = ( code >> 6 & 0x3F ) as u8 | TAG_CONT ;
1849+ * dst. add ( 2 ) = ( code & 0x3F ) as u8 | TAG_CONT ;
1850+ }
1851+ 4 => {
1852+ * dst = ( code >> 18 & 0x07 ) as u8 | TAG_FOUR_B ;
1853+ * dst. add ( 1 ) = ( code >> 12 & 0x3F ) as u8 | TAG_CONT ;
1854+ * dst. add ( 2 ) = ( code >> 6 & 0x3F ) as u8 | TAG_CONT ;
1855+ * dst. add ( 3 ) = ( code & 0x3F ) as u8 | TAG_CONT ;
1856+ }
1857+ // SAFETY: `char` always takes between 1 and 4 bytes to encode in UTF-8.
1858+ _ => crate :: hint:: unreachable_unchecked ( ) ,
1859+ }
1860+ }
1861+ }
1862+
18311863/// Encodes a raw `u32` value as UTF-16 into the provided `u16` buffer,
18321864/// and then returns the subslice of the buffer that contains the encoded character.
18331865///
0 commit comments