Skip to content

Commit 83f4b9e

Browse files
committed
Split IDNA into a separate crate.
1 parent be00f8f commit 83f4b9e

15 files changed

+155
-58
lines changed

.gitignore

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
/target
2-
/Cargo.lock
1+
target
2+
Cargo.lock
33
/.cargo/config

Cargo.toml

+1-6
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,6 @@ name = "format"
1616
[[test]]
1717
name = "form_urlencoded"
1818
[[test]]
19-
name = "idna"
20-
[[test]]
21-
name = "punycode"
22-
[[test]]
2319
name = "tests"
2420
[[test]]
2521
name = "wpt"
@@ -50,8 +46,7 @@ version = ">=0.6.1, <0.8"
5046
optional = true
5147

5248
[dependencies]
49+
idna = { version = "0.1.0", path = "./idna" }
5350
uuid = { version = "0.2", features = ["v4"] }
5451
rustc-serialize = "0.3"
55-
unicode-bidi = "0.2.3"
56-
unicode-normalization = "0.1.2"
5752
matches = "0.1"

idna/Cargo.toml

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
[package]
2+
name = "idna"
3+
version = "0.1.0"
4+
authors = ["Simon Sapin <[email protected]>"]
5+
description = "IDNA (Internationalizing Domain Names in Applications) and Punycode."
6+
repository = "https://github.com/servo/rust-url/"
7+
license = "MIT/Apache-2.0"
8+
9+
[dependencies]
10+
unicode-bidi = "0.2.3"
11+
unicode-normalization = "0.1.2"
12+
matches = "0.1"
13+
14+
[dev-dependencies]
15+
rustc-serialize = "0.3"
16+
17+
[[test]]
18+
name = "tests"
File renamed without changes.

idna/src/lib.rs

+73
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
// Copyright 2016 Simon Sapin.
2+
//
3+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6+
// option. This file may not be copied, modified, or distributed
7+
// except according to those terms.
8+
9+
//! This Rust crate implements IDNA
10+
//! [per the WHATWG URL Standard](https://url.spec.whatwg.org/#idna).
11+
//!
12+
//! It also exposes the underlying algorithms from [*Unicode IDNA Compatibility Processing*
13+
//! (Unicode Technical Standard #46)](http://www.unicode.org/reports/tr46/)
14+
//! and [Punycode (RFC 3492)](https://tools.ietf.org/html/rfc3492).
15+
//!
16+
//! Quoting from [UTS #46’s introduction](http://www.unicode.org/reports/tr46/#Introduction):
17+
//!
18+
//! > Initially, domain names were restricted to ASCII characters.
19+
//! > A system was introduced in 2003 for internationalized domain names (IDN).
20+
//! > This system is called Internationalizing Domain Names for Applications,
21+
//! > or IDNA2003 for short.
22+
//! > This mechanism supports IDNs by means of a client software transformation
23+
//! > into a format known as Punycode.
24+
//! > A revision of IDNA was approved in 2010 (IDNA2008).
25+
//! > This revision has a number of incompatibilities with IDNA2003.
26+
//! >
27+
//! > The incompatibilities force implementers of client software,
28+
//! > such as browsers and emailers,
29+
//! > to face difficult choices during the transition period
30+
//! > as registries shift from IDNA2003 to IDNA2008.
31+
//! > This document specifies a mechanism
32+
//! > that minimizes the impact of this transition for client software,
33+
//! > allowing client software to access domains that are valid under either system.
34+
35+
#[macro_use] extern crate matches;
36+
extern crate unicode_bidi;
37+
extern crate unicode_normalization;
38+
39+
pub mod punycode;
40+
pub mod uts46;
41+
42+
/// The [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) algorithm.
43+
///
44+
/// Return the ASCII representation a domain name,
45+
/// normalizing characters (upper-case to lower-case and other kinds of equivalence)
46+
/// and using Punycode as necessary.
47+
///
48+
/// This process may fail.
49+
pub fn domain_to_ascii(domain: &str) -> Result<String, uts46::Errors> {
50+
uts46::to_ascii(domain, uts46::Flags {
51+
use_std3_ascii_rules: false,
52+
transitional_processing: true, // XXX: switch when Firefox does
53+
verify_dns_length: false,
54+
})
55+
}
56+
57+
/// The [domain to Unicode](https://url.spec.whatwg.org/#concept-domain-to-unicode) algorithm.
58+
///
59+
/// Return the Unicode representation of a domain name,
60+
/// normalizing characters (upper-case to lower-case and other kinds of equivalence)
61+
/// and decoding Punycode as necessary.
62+
///
63+
/// This may indicate [syntax violations](https://url.spec.whatwg.org/#syntax-violation)
64+
/// but always returns a string for the mapped domain.
65+
pub fn domain_to_unicode(domain: &str) -> (String, Result<(), uts46::Errors>) {
66+
uts46::to_unicode(domain, uts46::Flags {
67+
use_std3_ascii_rules: false,
68+
69+
// Unused:
70+
transitional_processing: true,
71+
verify_dns_length: false,
72+
})
73+
}

make_idna_table.py idna/src/make_uts46_mapping_table.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,7 @@
66
# option. This file may not be copied, modified, or distributed
77
# except according to those terms.
88

9-
10-
# Run as: python make_idna_table.py idna_table.txt > src/idna_table.rs
9+
# Run as: python make_uts46_mapping_table.py IdnaMappingTable.txt > uts46_mapping_table.rs
1110
# You can get the latest idna table from
1211
# http://www.unicode.org/Public/idna/latest/IdnaMappingTable.txt
1312

File renamed without changes.

src/idna.rs idna/src/uts46.rs

+37-38
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
1-
//! International domain names
2-
//!
3-
//! https://url.spec.whatwg.org/#idna
1+
// Copyright 2013-2014 Valentin Gosu.
2+
//
3+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6+
// option. This file may not be copied, modified, or distributed
7+
// except according to those terms.
8+
9+
//! [*Unicode IDNA Compatibility Processing*
10+
//! (Unicode Technical Standard #46)](http://www.unicode.org/reports/tr46/)
411
512
use self::Mapping::*;
613
use punycode;
@@ -9,7 +16,7 @@ use unicode_normalization::UnicodeNormalization;
916
use unicode_normalization::char::is_combining_mark;
1017
use unicode_bidi::{BidiClass, bidi_class};
1118

12-
include!("idna_mapping.rs");
19+
include!("uts46_mapping_table.rs");
1320

1421
#[derive(Debug)]
1522
enum Mapping {
@@ -23,9 +30,9 @@ enum Mapping {
2330
}
2431

2532
struct Range {
26-
pub from: char,
27-
pub to: char,
28-
pub mapping: Mapping,
33+
from: char,
34+
to: char,
35+
mapping: Mapping,
2936
}
3037

3138
fn find_char(codepoint: char) -> &'static Mapping {
@@ -45,7 +52,7 @@ fn find_char(codepoint: char) -> &'static Mapping {
4552
&TABLE[min].mapping
4653
}
4754

48-
fn map_char(codepoint: char, flags: Uts46Flags, output: &mut String, errors: &mut Vec<Error>) {
55+
fn map_char(codepoint: char, flags: Flags, output: &mut String, errors: &mut Vec<Error>) {
4956
match *find_char(codepoint) {
5057
Mapping::Valid => output.push(codepoint),
5158
Mapping::Ignored => {},
@@ -185,7 +192,7 @@ fn passes_bidi(label: &str, transitional_processing: bool) -> bool {
185192
}
186193

187194
/// http://www.unicode.org/reports/tr46/#Validity_Criteria
188-
fn validate(label: &str, flags: Uts46Flags, errors: &mut Vec<Error>) {
195+
fn validate(label: &str, flags: Flags, errors: &mut Vec<Error>) {
189196
if label.nfc().ne(label.chars()) {
190197
errors.push(Error::ValidityCriteria);
191198
}
@@ -212,7 +219,7 @@ fn validate(label: &str, flags: Uts46Flags, errors: &mut Vec<Error>) {
212219
}
213220

214221
/// http://www.unicode.org/reports/tr46/#Processing
215-
fn uts46_processing(domain: &str, flags: Uts46Flags, errors: &mut Vec<Error>) -> String {
222+
fn processing(domain: &str, flags: Flags, errors: &mut Vec<Error>) -> String {
216223
let mut mapped = String::new();
217224
for c in domain.chars() {
218225
map_char(c, flags, &mut mapped, errors)
@@ -226,7 +233,7 @@ fn uts46_processing(domain: &str, flags: Uts46Flags, errors: &mut Vec<Error>) ->
226233
if label.starts_with("xn--") {
227234
match punycode::decode_to_string(&label["xn--".len()..]) {
228235
Some(decoded_label) => {
229-
let flags = Uts46Flags { transitional_processing: false, ..flags };
236+
let flags = Flags { transitional_processing: false, ..flags };
230237
validate(&decoded_label, flags, errors);
231238
validated.push_str(&decoded_label)
232239
}
@@ -241,14 +248,14 @@ fn uts46_processing(domain: &str, flags: Uts46Flags, errors: &mut Vec<Error>) ->
241248
}
242249

243250
#[derive(Copy, Clone)]
244-
pub struct Uts46Flags {
251+
pub struct Flags {
245252
pub use_std3_ascii_rules: bool,
246253
pub transitional_processing: bool,
247254
pub verify_dns_length: bool,
248255
}
249256

250257
#[derive(PartialEq, Eq, Clone, Copy, Debug)]
251-
pub enum Error {
258+
enum Error {
252259
PunycodeError,
253260
ValidityCriteria,
254261
DissallowedByStd3AsciiRules,
@@ -257,11 +264,18 @@ pub enum Error {
257264
TooLongForDns,
258265
}
259266

267+
/// Errors recorded during UTS #46 processing.
268+
///
269+
/// This is opaque for now, only indicating the precense of at least one error.
270+
/// More details may be exposed in the future.
271+
#[derive(Debug)]
272+
pub struct Errors(Vec<Error>);
273+
260274
/// http://www.unicode.org/reports/tr46/#ToASCII
261-
pub fn uts46_to_ascii(domain: &str, flags: Uts46Flags) -> Result<String, Vec<Error>> {
275+
pub fn to_ascii(domain: &str, flags: Flags) -> Result<String, Errors> {
262276
let mut errors = Vec::new();
263277
let mut result = String::new();
264-
for label in uts46_processing(domain, flags, &mut errors).split('.') {
278+
for label in processing(domain, flags, &mut errors).split('.') {
265279
if result.len() > 0 {
266280
result.push('.');
267281
}
@@ -288,36 +302,21 @@ pub fn uts46_to_ascii(domain: &str, flags: Uts46Flags) -> Result<String, Vec<Err
288302
if errors.is_empty() {
289303
Ok(result)
290304
} else {
291-
Err(errors)
305+
Err(Errors(errors))
292306
}
293307
}
294308

295-
/// https://url.spec.whatwg.org/#concept-domain-to-ascii
296-
pub fn domain_to_ascii(domain: &str) -> Result<String, Vec<Error>> {
297-
uts46_to_ascii(domain, Uts46Flags {
298-
use_std3_ascii_rules: false,
299-
transitional_processing: true, // XXX: switch when Firefox does
300-
verify_dns_length: false,
301-
})
302-
}
303-
304309
/// http://www.unicode.org/reports/tr46/#ToUnicode
305310
///
306311
/// Only `use_std3_ascii_rules` is used in `flags`.
307-
pub fn uts46_to_unicode(domain: &str, mut flags: Uts46Flags) -> (String, Vec<Error>) {
312+
pub fn to_unicode(domain: &str, mut flags: Flags) -> (String, Result<(), Errors>) {
308313
flags.transitional_processing = false;
309314
let mut errors = Vec::new();
310-
let domain = uts46_processing(domain, flags, &mut errors);
315+
let domain = processing(domain, flags, &mut errors);
316+
let errors = if errors.is_empty() {
317+
Ok(())
318+
} else {
319+
Err(Errors(errors))
320+
};
311321
(domain, errors)
312322
}
313-
314-
/// https://url.spec.whatwg.org/#concept-domain-to-unicode
315-
pub fn domain_to_unicode(domain: &str) -> (String, Vec<Error>) {
316-
uts46_to_unicode(domain, Uts46Flags {
317-
use_std3_ascii_rules: false,
318-
319-
// Unused:
320-
transitional_processing: true,
321-
verify_dns_length: false,
322-
})
323-
}
File renamed without changes.
File renamed without changes.

tests/punycode.rs idna/tests/punycode.rs

+8-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
1-
extern crate url;
2-
extern crate rustc_serialize;
1+
// Copyright 2013 Simon Sapin.
2+
//
3+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6+
// option. This file may not be copied, modified, or distributed
7+
// except according to those terms.
38

4-
use url::punycode::{decode, encode_str};
9+
use idna::punycode::{decode, encode_str};
510
use rustc_serialize::json::{Json, Object};
611

712
fn one_test(description: &str, decoded: &str, encoded: &str) {
File renamed without changes.

idna/tests/tests.rs

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
extern crate idna;
2+
extern crate rustc_serialize;
3+
4+
mod punycode;
5+
mod uts46;

tests/idna.rs idna/tests/uts46.rs

+9-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,13 @@
1-
extern crate url;
1+
// Copyright 2013-2014 Valentin Gosu.
2+
//
3+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6+
// option. This file may not be copied, modified, or distributed
7+
// except according to those terms.
28

39
use std::char;
4-
use url::idna;
10+
use idna::uts46;
511

612
#[test]
713
fn test_uts46() {
@@ -35,7 +41,7 @@ fn test_uts46() {
3541
continue;
3642
}
3743

38-
let result = idna::uts46_to_ascii(&source, idna::Uts46Flags {
44+
let result = uts46::to_ascii(&source, uts46::Flags {
3945
use_std3_ascii_rules: true,
4046
transitional_processing: test_type == "T",
4147
verify_dns_length: true,

src/lib.rs

+1-4
Original file line numberDiff line numberDiff line change
@@ -141,8 +141,7 @@ extern crate serde;
141141
#[cfg(feature="heap_size")]
142142
#[macro_use] extern crate heapsize;
143143

144-
extern crate unicode_normalization;
145-
extern crate unicode_bidi;
144+
extern crate idna;
146145

147146
use std::fmt::{self, Formatter};
148147
use std::str;
@@ -170,9 +169,7 @@ mod parser;
170169
pub mod urlutils;
171170
pub mod percent_encoding;
172171
pub mod form_urlencoded;
173-
pub mod punycode;
174172
pub mod format;
175-
pub mod idna;
176173

177174
/// The parsed representation of an absolute URL.
178175
#[derive(PartialEq, Eq, Clone, Debug, Hash, PartialOrd, Ord)]

0 commit comments

Comments
 (0)