1212//! # use env_preferences::LocaleError;
1313//! # fn main() -> Result<(), LocaleError> {
1414//! let posix_locale = PosixLocale::try_from_str("en_US")?;
15- //! assert_eq!(posix_locale.try_convert_lossy()?, locale!("en-US-posix "));
15+ //! assert_eq!(posix_locale.try_convert_lossy()?, locale!("en-US"));
1616//! # Ok(())
1717//! # }
1818//! ```
@@ -21,12 +21,10 @@ use displaydoc::Display;
2121use icu_locale_core:: extensions:: unicode:: { key, value} ;
2222use icu_locale_core:: extensions:: Extensions ;
2323use icu_locale_core:: subtags:: { language, script, variant, Language , Region , Variants } ;
24- use icu_locale_core:: { LanguageIdentifier , Locale } ;
24+ use icu_locale_core:: { locale , LanguageIdentifier , Locale } ;
2525
2626use crate :: ParseError ;
2727
28- use super :: aliases:: find_posix_alias;
29-
3028#[ derive( Display , Debug , PartialEq ) ]
3129/// An error while parsing a POSIX locale identifier
3230pub enum PosixParseError {
@@ -208,17 +206,17 @@ impl<'src> PosixLocale<'src> {
208206 /// // Locales will always include the `posix` variant
209207 /// assert_eq!(
210208 /// PosixLocale::try_from_str("en_US")?.try_convert_lossy()?,
211- /// locale!("en-US-posix ")
209+ /// locale!("en-US")
212210 /// );
213211 /// // The codeset field will be ignored
214212 /// assert_eq!(
215213 /// PosixLocale::try_from_str("en_US.iso88591")?.try_convert_lossy()?,
216- /// locale!("en-US-posix ")
214+ /// locale!("en-US")
217215 /// );
218216 /// // Any unknown modifiers will be ignored
219217 /// assert_eq!(
220218 /// PosixLocale::try_from_str("en_US@unknown")?.try_convert_lossy()?,
221- /// locale!("en-US-posix ")
219+ /// locale!("en-US")
222220 /// );
223221 /// # Ok(())
224222 /// # }
@@ -230,63 +228,57 @@ impl<'src> PosixLocale<'src> {
230228 /// # use env_preferences::parse::posix::PosixLocale;
231229 /// # use env_preferences::LocaleError;
232230 /// # fn main() -> Result<(), LocaleError> {
233- /// // The default "C"/"POSIX" locale will be converted to "und "
231+ /// // The default "C"/"POSIX" locale will be converted to "en-US-posix "
234232 /// assert_eq!(
235233 /// PosixLocale::try_from_str("C")?.try_convert_lossy()?,
236- /// locale!("und -posix")
234+ /// locale!("en-US -posix")
237235 /// );
238236 /// assert_eq!(
239237 /// PosixLocale::try_from_str("POSIX")?.try_convert_lossy()?,
240- /// locale!("und-posix")
241- /// );
242- ///
243- /// // Known language aliases will be converted to the matching BCP-47 identifier
244- /// assert_eq!(
245- /// PosixLocale::try_from_str("french")?.try_convert_lossy()?,
246- /// locale!("fr-FR-posix")
238+ /// locale!("en-US-posix")
247239 /// );
248240 ///
249241 /// // Known script modifiers will be converted to the matching CLDR keys
250242 /// assert_eq!(
251243 /// PosixLocale::try_from_str("uz_UZ@cyrillic")?.try_convert_lossy()?,
252- /// locale!("uz-Cyrl-UZ-posix ")
244+ /// locale!("uz-Cyrl-UZ")
253245 /// );
254246 /// assert_eq!(
255247 /// PosixLocale::try_from_str("ks_IN@devanagari")?.try_convert_lossy()?,
256- /// locale!("ks-Deva-IN-posix ")
248+ /// locale!("ks-Deva-IN")
257249 /// );
258250 /// assert_eq!(
259251 /// PosixLocale::try_from_str("be_BY@latin")?.try_convert_lossy()?,
260- /// locale!("be-Latn-BY-posix ")
252+ /// locale!("be-Latn-BY")
261253 /// );
262254 ///
263255 /// // Other known modifiers are handled accordingly
264256 /// assert_eq!(
265257 /// PosixLocale::try_from_str("en_US@euro")?.try_convert_lossy()?,
266- /// locale!("en-US-posix- u-cu-eur")
258+ /// locale!("en-US-u-cu-eur")
267259 /// );
268260 /// assert_eq!(
269261 /// PosixLocale::try_from_str("aa_ER@saaho")?.try_convert_lossy()?,
270- /// locale!("ssy-ER-posix ")
262+ /// locale!("ssy-ER")
271263 /// );
272264 /// # Ok(())
273265 /// # }
274266 /// ```
275267 pub fn try_convert_lossy ( & self ) -> Result < Locale , ParseError > {
276- // Check if the language matches a known alias (e.g. "nynorsk"->("nn", "NO"))
277- let ( mut language, region) = match find_posix_alias ( self . language ) {
278- Some ( ( language, region) ) => ( language, region) ,
279- None => {
280- let language = Language :: try_from_str ( self . language ) ?;
281- let region = self . territory . map ( Region :: try_from_str) . transpose ( ) ?;
282-
283- ( language, region)
284- }
285- } ;
268+ // The default "C"/"POSIX" locale should map to "en-US-posix",
269+ // which is the default behaviour in ICU4C:
270+ // https://github.com/unicode-org/icu/blob/795d7ac82c4b29cf721d0ad62c0b178347d453bf/icu4c/source/common/putil.cpp#L1738
271+ if self . language == "C" || self . language == "POSIX" {
272+ return Ok ( locale ! ( "en-US-posix" ) ) ;
273+ }
286274
287275 let mut extensions = Extensions :: new ( ) ;
288276 let mut script = None ;
289- let mut variants = vec ! [ variant!( "posix" ) ] ;
277+ let mut variant = None ;
278+
279+ // Parse the language/region
280+ let mut language = Language :: try_from_str ( self . language ) ?;
281+ let region = self . territory . map ( Region :: try_from_str) . transpose ( ) ?;
290282
291283 if let Some ( modifier) = self . modifier {
292284 match modifier. to_ascii_lowercase ( ) . as_str ( ) {
@@ -300,8 +292,7 @@ impl<'src> PosixLocale<'src> {
300292 // Saaho seems to be the only "legacy variant" that appears as a modifier:
301293 // https://www.unicode.org/reports/tr35/#table-legacy-variant-mappings
302294 "saaho" => language = language ! ( "ssy" ) ,
303- // This keeps `variants` sorted; "-posix" comes before "-valencia"
304- "valencia" => variants. push ( variant ! ( "valencia" ) ) ,
295+ "valencia" => variant = Some ( variant ! ( "valencia" ) ) ,
305296 // Some modifiers are known but can't be expressed as a BCP-47 identifier
306297 // e.g. "@abegede", "@iqtelif"
307298 _ => ( ) ,
@@ -313,7 +304,7 @@ impl<'src> PosixLocale<'src> {
313304 language,
314305 region,
315306 script,
316- variants : Variants :: from_vec_unchecked ( variants ) ,
307+ variants : variant . map_or_else ( Variants :: new , Variants :: from_variant ) ,
317308 } ,
318309 extensions,
319310 } )
0 commit comments