Skip to content

Commit efde18f

Browse files
committedMay 13, 2017
Update TokenTree's, remove Quasiquote
Updated AST as per <rust-lang/rust#39419>, thereby closing #19. Since those changes involved removing MathcNt and SubstNt, I've removed the unsafe parts of Quote that relied on that. Quote is now a safe module (where '$x' and '$x:ty' don't work), enabled by default. Difference tests now work on 'ast.rs' as well as 'parser.rs'. Additional work was done on cleaning up tests around macros and tokens. Closes #16.
1 parent 8b1fb8d commit efde18f

19 files changed

+231
-329
lines changed
 

‎README.md

+10-1
Original file line numberDiff line numberDiff line change
@@ -22,19 +22,28 @@ from nightly as they come out, but in general will only target compatibility wit
2222

2323
## Bugs
2424

25+
### Parser
26+
2527
Any difference between what is accepted by the `rustc` parser and the `language-rust` parser
2628
indicates
2729

2830
* a bug in `language-rust` (this is almost always the case)
2931
* a bug in `rustc`
3032
* that there is a newer version of `rustc` which made a breaking change to this syntax
3133

34+
If the AST/parser of `rustc` changes, the `rustc-tests` test suite should start failing - it
35+
compares the JSON AST debug output of `rustc` to our parsed AST.
36+
37+
### Pretty-printer
38+
3239
For the pretty-printer, bugs are a bit tougher to list exhaustively. Suggestions for better layout
33-
algorithms are most welcome!
40+
algorithms are most welcome! The [`fmt-rfcs`][6] repo is loosely used as the reference for "correct"
41+
pretty-printing.
3442

3543
[0]: https://www.rust-lang.org/en-US/
3644
[1]: https://docs.haskellstack.org/en/stable/README/
3745
[2]: https://hackage.haskell.org/package/alex
3846
[3]: https://hackage.haskell.org/package/happy
3947
[4]: https://travis-ci.org/harpocrates/language-rust.svg?branch=master
4048
[5]: https://travis-ci.org/harpocrates/language-rust
49+
[6]: https://github.com/rust-lang-nursery/fmt-rfcs

‎language-rust.cabal

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ flag useByteStrings
2525

2626
flag enableQuasiquotes
2727
description: Provide the experimental 'Language.Rust.Quote' module
28-
default: False
28+
default: True
2929

3030
library
3131
hs-source-dirs: src

‎sample-sources/expressions.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,8 @@ fn main() {
3434
let x = &a;
3535
let x = &mut a;
3636
let x = return 1;
37-
// let x = asm!("NOP");
38-
// let x = println!("hi");
37+
let x = asm!("NOP");
38+
let x = println!("hi");
3939
let x = Foo { x: 1, y: 2 };
4040
let x = Foo { x: 1, ..base };
4141
let x = [1; 5];

‎sample-sources/items.rs

+14-9
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22
extern crate foo;
33
extern crate foo_bar as foo;
44

5-
// use foo;
6-
// use foo::bar;
7-
// use foo::bar as FooBar;
5+
use foo;
6+
use foo::bar;
7+
use foo::bar as FooBar;
88

99
static FOO: i32 = 42;
1010
static mut FOO: i32 = 42;
@@ -34,14 +34,20 @@ mod bar {
3434
const ID1: i32;
3535
const ID2: i32 = 1;
3636

37+
fn area1(self) -> f64;
38+
fn area2(mut self) -> f64 { 1f64 }
3739
fn area1(&self) -> f64;
38-
fn area2(&self) -> f64 { 1f64 }
40+
fn area2(&mut self) -> f64 { 1f64 }
41+
fn area1(&'lt self) -> f64;
42+
fn area2(&'lt mut self) -> f64 { 1f64 }
43+
fn area1(self: Foo<T>) -> f64;
44+
fn area2(mut self: Foo<T>) -> f64 { 1f64 }
3945

4046
type N;
4147
type N: fmt::Display;
4248
type N: fmt::Display = i32;
4349

44-
// foo!{}
50+
foo!{}
4551
}
4652

4753
fn foo<T: ?Sized>(x: &T) { }
@@ -56,14 +62,13 @@ mod bar {
5662
const ID: i32 = 1;
5763
fn area(&self) -> f64 { 1f64 }
5864
type N = i32;
59-
// foo!()
60-
65+
foo!();
6166
}
6267
impl<A> Trait for Foo<A> { }
6368
impl<A> !Trait for Foo<A> { }
6469

65-
// macro_rules! foo { }
66-
// foo!();
70+
macro_rules! foo { }
71+
foo!();
6772

6873
enum Foo {
6974
Baz {

‎sample-sources/macros.rs

+66
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
#!/bin/sh
2+
3+
// Every token
4+
token!{
5+
[= < > & | ! ~ + - * / % ^]
6+
(>= >>= && || << >> == != <= <<= -= &= |= += *= /= ^= %=)
7+
{@ . .. ... , ; : :: -> <- => # $ ?}
8+
( ) [ ] { } 1.0foo x _ 'lt /*! doc comment */ #! $x
9+
}
10+
11+
// Check new way of tokenization
12+
tokentrees!{
13+
$x
14+
$x:ty
15+
$($xs:expr),+
16+
$(1+2),+
17+
br##"hello "#
18+
world!"###suf
19+
}
20+
21+
// literals
22+
literals!{
23+
b'a' b'\n' b'a'suffix
24+
'a' '\n' 'a'suffix
25+
26+
123 123i32
27+
0b1100_1101 0b1100_1101isize
28+
0o3170 0o3170i64
29+
0xAFAC 0xAFACu32
30+
31+
123.1 123.1f32
32+
// 123.f32 123e-9f32 0e+10
33+
34+
strings!{
35+
"hello \n world!"
36+
37+
"hello \n world!"suffix
38+
39+
r"hello
40+
world!"
41+
42+
r"hello
43+
world!"suffix
44+
45+
b"hello \n world!"
46+
47+
b"hello \n world!"suffix
48+
49+
br"hello
50+
world!"
51+
52+
br"hello
53+
world!"suffix
54+
55+
56+
"hello \
57+
world!"
58+
59+
b"hello \
60+
world!"
61+
62+
br##"hello "#
63+
world!"##suf
64+
}
65+
}
66+

‎sample-sources/patterns.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ fn main() {
2020
[a, b, i.., y, z] => 0,
2121
[a, b, .., y, z] => 0,
2222
[a, b, c] => 0,
23-
// LinkedList!(1,2,3) => 0,
23+
LinkedList!(1,2,3) => 0,
2424
}
2525
}
2626

‎sample-sources/statements.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,5 @@ fn main() {
1515
2 + { 1 };
1616

1717
// Mac
18-
// println!("hi")
18+
println!("hi")
1919
}

‎sample-sources/types.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@ fn main() {
88
let x: &'a i32;
99
let x: &i32;
1010
let x: fn() -> i32;
11-
// let x: fn(i32) -> i32;
12-
// let x: fn(i32,i32);
11+
let x: fn(i32) -> i32;
12+
let x: fn(i32,i32);
1313
let x: !;
1414
let x: (i32,);
1515
let x: (i32,!);
@@ -20,7 +20,7 @@ fn main() {
2020
let x: (i32);
2121
let x: typeof(1i32);
2222
let x: _;
23-
// let x: HList![i32,(),u8];
23+
let x: HList![i32,(),u8];
2424
}
2525

2626
fn foo() -> impl Bound1 + Bound2 + Bound3 { }

‎src/Language/Rust/Parser/Internal.y

+6-21
Original file line numberDiff line numberDiff line change
@@ -209,10 +209,6 @@ import Text.Read (readMaybe)
209209
-- Lifetimes.
210210
LIFETIME { Spanned (LifetimeTok _) _ }
211211
212-
-- macro related
213-
substNt { Spanned (SubstNt _ ) _ }
214-
matchNt { Spanned (MatchNt _ _) _ }
215-
216212
-- Interpolated
217213
ntItem { Spanned (Interpolated (NtItem $$)) _ }
218214
ntBlock { Spanned (Interpolated (NtBlock $$)) _ }
@@ -1380,26 +1376,21 @@ token_tree :: { TokenTree }
13801376
-- # Delimited
13811377
| '(' many(token_tree) ')' { Delimited mempty Paren mempty $2 mempty }
13821378
| '{' many(token_tree) '}' { Delimited mempty Brace mempty $2 mempty }
1383-
| '[' many(token_tree) ']' { Delimited mempty Bracket mempty $2 mempty }
1384-
-- # Sequence
1385-
| '$' '(' many(token_tree) ')' token_not_plus_star '+' { Sequence mempty $3 (Just (unspan $5)) OneOrMore }
1386-
| '$' '(' many(token_tree) ')' token_not_plus_star '*' { Sequence mempty $3 (Just (unspan $5)) ZeroOrMore }
1387-
| '$' '(' many(token_tree) ')' '+' { Sequence mempty $3 Nothing OneOrMore }
1388-
| '$' '(' many(token_tree) ')' '*' { Sequence mempty $3 Nothing ZeroOrMore }
1379+
| '[' many(token_tree) ']' { Delimited mempty Bracket mempty $2 mempty }
13891380
-- # Token
13901381
-- Expression-operator symbols.
1391-
| token_not_plus_star { mkTokenTree $1 }
1392-
| '+' { mkTokenTree $1 }
1393-
| '*' { mkTokenTree $1 }
1382+
| token { let Spanned t s = $1 in Token s t }
13941383

1395-
token_not_plus_star :: { Spanned Token }
1384+
token :: { Spanned Token }
13961385
: '=' { $1 }
13971386
| '<' { $1 }
13981387
| '>' { $1 }
13991388
| '!' { $1 }
14001389
| '~' { $1 }
14011390
| '-' { $1 }
14021391
| '/' { $1 }
1392+
| '+' { $1 }
1393+
| '*' { $1 }
14031394
| '%' { $1 }
14041395
| '^' { $1 }
14051396
| '&' { $1 }
@@ -1437,6 +1428,7 @@ token_not_plus_star :: { Spanned Token }
14371428
| '#' { $1 }
14381429
| '$' %prec DOLLAR { $1 }
14391430
| '?' { $1 }
1431+
| '#!' { $1 }
14401432
-- Literals.
14411433
| byte { $1 }
14421434
| char { $1 }
@@ -1511,9 +1503,6 @@ token_not_plus_star :: { Spanned Token }
15111503
| '_' { $1 }
15121504
-- Lifetimes.
15131505
| LIFETIME { $1 }
1514-
-- Macro related
1515-
| substNt { $1 }
1516-
| matchNt { $1 }
15171506

15181507

15191508
{
@@ -1603,10 +1592,6 @@ addAttrs as (ParenExpr as' e s) = ParenExpr (as ++ as') e s
16031592
addAttrs as (Try as' e s) = Try (as ++ as') e s
16041593

16051594

1606-
-- | Given a spanned token, convert it to a token tree. Basically just move the Span
1607-
mkTokenTree :: Spanned Token -> TokenTree
1608-
mkTokenTree (Spanned t s) = Token s t
1609-
16101595
-- | Given a 'Doc' token, convert it into an attribute
16111596
mkDocAttribute :: Spanned Token -> Attribute Span
16121597
mkDocAttribute (Spanned (Doc docStr sty) s) = Attribute sty' doc True s

‎src/Language/Rust/Parser/Lexer.x

+13-27
Original file line numberDiff line numberDiff line change
@@ -974,7 +974,6 @@ $hexit = [0-9a-fA-F]
974974
-- Macro related
975975

976976
@subst_nt = "$" @ident
977-
@match_nt = @subst_nt ":" @ident
978977

979978
tokens :-
980979

@@ -1076,13 +1075,6 @@ $white+ { \s -> pure (Space Whitespace s) }
10761075
@line_comment { \c -> pure (Space Comment (drop 2 c)) }
10771076
@inline_comment { \_ -> Space Comment <$> nestedComment }
10781077

1079-
"#!" { token Shebang }
1080-
1081-
@subst_nt { \(_:i) -> pure (SubstNt (mkIdent i)) }
1082-
@match_nt { \(_:s) -> let (i,':':n) = Prelude.span (/= ':') s
1083-
in pure (MatchNt (mkIdent i) (mkIdent n))
1084-
}
1085-
10861078
{
10871079

10881080
-- | Make a token.
@@ -1106,8 +1098,7 @@ literal lit = do
11061098
_ -> pure (LiteralTok lit Nothing)
11071099

11081100
-- | Parses a raw string, the closing quotation, and the appropriate number of
1109-
-- '#' characters. Note that there can be more closing '#' characters than
1110-
-- opening ones (this is as per Rust's standard).
1101+
-- '#' characters.
11111102
rawString :: Int -> P String
11121103
rawString n = do
11131104
c_m <- nextChar
@@ -1117,8 +1108,8 @@ rawString n = do
11171108

11181109
-- The string has a chance of being closed
11191110
Just '"' -> do
1120-
n' <- greedyChar '#'
1121-
if n' >= n
1111+
n' <- greedyChar '#' n
1112+
if n' == n
11221113
then pure ""
11231114
else (('"' : replicate n' '#') ++) <$> rawString n
11241115
@@ -1174,12 +1165,13 @@ peekChar = do
11741165
in pure (Just c)
11751166

11761167
-- | Greedily try to eat as many of a given character as possible (and return
1177-
-- how many characters were eaten).
1178-
greedyChar :: Char -> P Int
1179-
greedyChar c = do
1168+
-- how many characters were eaten). The second argument is an upper limit.
1169+
greedyChar :: Char -> Int -> P Int
1170+
greedyChar _ 0 = pure 0
1171+
greedyChar c limit = do
11801172
c_m <- peekChar
11811173
case c_m of
1182-
Just c' | c == c' -> do { _ <- nextChar; n <- greedyChar c; pure (n+1) }
1174+
Just c' | c == c' -> do { _ <- nextChar; n <- greedyChar c (limit - 1); pure (n+1) }
11831175
_ -> pure 0
11841176

11851177
-- | Signal a lexical error.
@@ -1257,19 +1249,13 @@ lexTokens lexer = do
12571249
_ -> (tok :) <$> lexTokens lexer
12581250

12591251
-- | Lex the first line, if it immediately starts with @#!@ (but not @#![@ - that should be an
1260-
-- inner attribute). If this fails to find a shebang line, it consumes no input (in reality it does
1261-
-- consume one token, but it pushed it back).
1252+
-- inner attribute). If this fails to find a shebang line, it consumes no input.
12621253
lexShebangLine :: P (Maybe String)
12631254
lexShebangLine = do
1264-
tok <- lexNonSpace
1265-
case unspan tok of
1266-
Shebang -> do
1267-
c <- peekChar
1268-
case c of
1269-
Just '[' -> pushToken tok *> pure Nothing
1270-
_ -> Just <$> toNewline
1271-
_ -> pushToken tok *> pure Nothing
1272-
1255+
inp <- getInput
1256+
case takeChars 3 inp of
1257+
'#':'!':r | r /= "[" -> Just <$> toNewline
1258+
_ -> pure Nothing
12731259
where
12741260
-- Lexes a string until a newline
12751261
toNewline :: P String

‎src/Language/Rust/Pretty/Internal.hs

+1-7
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ import Language.Rust.Syntax.Token
2222
import Language.Rust.Syntax.Ident
2323

2424
import Text.PrettyPrint.Annotated.WL (
25-
hcat, cat, punctuate, group, angles, flatten, align, fillSep, text, vcat, char, annotate,
25+
hcat, punctuate, group, angles, flatten, align, fillSep, text, vcat, char, annotate,
2626
noAnnotate, flatAlt, parens, brackets, (<>), Doc
2727
)
2828
import qualified Text.PrettyPrint.Annotated.WL as WL
@@ -197,10 +197,6 @@ printMac (Mac path tts x) d = annotate x (printPath path False <> "!" <> body)
197197
printTt :: TokenTree -> Doc a
198198
printTt (Token _ t) = printToken t
199199
printTt (Delimited _ d _ tts _) = block d True mempty mempty [ fillSep (printTt <$> tts) ]
200-
printTt (Sequence _ tts s op) = "$" <> parens body <> perhaps printToken s <> suf
201-
where body = cat [ printTt tt | tt <- tts ]
202-
suf = case op of ZeroOrMore -> "*"
203-
OneOrMore -> "+"
204200

205201
-- | Print a token (@token_to_string@)
206202
-- Single character expression-operator symbols.
@@ -274,8 +270,6 @@ printToken (Doc d OuterDoc) = "/**" <> text d <> "*/"
274270
printToken Shebang = "#!"
275271
-- Macro related
276272
printToken (Interpolated n) = noAnnotate (printNonterminal n)
277-
printToken (MatchNt i s) = "$" <> printIdent i <> ":" <> printIdent s
278-
printToken (SubstNt s) = "$" <> printIdent s
279273
-- Other
280274
printToken t = error $ "printToken: " ++ show t
281275

0 commit comments

Comments
 (0)