Skip to content

Commit f02dcb4

Browse files
committed
Add index field to Position
1 parent 059a959 commit f02dcb4

File tree

5 files changed

+73
-70
lines changed

5 files changed

+73
-70
lines changed

Diff for: CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ Breaking changes:
2828
- Rename module prefix from `Text.Parsing.Parser` to `Parsing` (#169 by @jamesdbrock)
2929
- Replace the `regex` parser. (#170 by @jamesdbrock)
3030
- Reorganize Combinators for #154 (#182 by @jamesdbrock)
31+
- Add the `index` field to `Position`. (#171 by @jamesdbrock)
3132

3233
New features:
3334

Diff for: src/Parsing/Indent.purs

+13-27
Original file line numberDiff line numberDiff line change
@@ -85,18 +85,6 @@ get' = do
8585
put' :: forall s. Position -> IndentParser s Unit
8686
put' p = lift (put p)
8787

88-
sourceColumn :: Position -> Int
89-
sourceColumn (Position { line: _, column: c }) = c
90-
91-
sourceLine :: Position -> Int
92-
sourceLine (Position { line: l, column: _ }) = l
93-
94-
setSourceLine :: Position -> Int -> Position
95-
setSourceLine (Position { line: _, column: c }) l = Position { line: l, column: c }
96-
97-
biAp :: forall a b c. (a -> b) -> (b -> b -> c) -> a -> a -> c
98-
biAp f c v1 v2 = c (f v1) (f v2)
99-
10088
many1 :: forall s m a. ParserT s m a -> ParserT s m (List a)
10189
many1 p = lift2 Cons p (many p)
10290

@@ -121,19 +109,17 @@ withBlock' = withBlock (flip const)
121109
-- | Parses only when indented past the level of the reference
122110
indented :: forall s. IndentParser s Unit
123111
indented = do
124-
pos <- position
125-
s <- get'
126-
if biAp sourceColumn (<=) pos s then fail "not indented"
127-
else do
128-
put' $ setSourceLine s (sourceLine pos)
129-
pure unit
112+
Position p <- position
113+
Position s <- get'
114+
if p.column <= s.column then fail "not indented"
115+
else put' $ Position { index: 0, line: p.line, column: s.column }
130116

131117
-- | Same as `indented`, but does not change internal state
132118
indented' :: forall s. IndentParser s Unit
133119
indented' = do
134-
pos <- position
135-
s <- get'
136-
if biAp sourceColumn (<=) pos s then fail "not indented" else pure unit
120+
Position p <- position
121+
Position s <- get'
122+
if p.column <= s.column then fail "not indented" else pure unit
137123

138124
-- | Parses only when indented past the level of the reference or on the same line
139125
sameOrIndented :: forall s. IndentParser s Unit
@@ -142,9 +128,9 @@ sameOrIndented = sameLine <|> indented
142128
-- | Parses only on the same line as the reference
143129
sameLine :: forall s. IndentParser s Unit
144130
sameLine = do
145-
pos <- position
146-
s <- get'
147-
if biAp sourceLine (==) pos s then pure unit else fail "over one line"
131+
Position p <- position
132+
Position s <- get'
133+
if p.line == s.line then pure unit else fail "over one line"
148134

149135
-- | Parses a block of lines at the same indentation level
150136
block1 :: forall s a. IndentParser s a -> IndentParser s (List a)
@@ -169,9 +155,9 @@ withPos x = do
169155
-- | Ensures the current indentation level matches that of the reference
170156
checkIndent :: forall s. IndentParser s Unit
171157
checkIndent = do
172-
s <- get'
173-
p <- position
174-
if biAp sourceColumn (==) p s then pure unit else fail "indentation doesn't match"
158+
Position p <- position
159+
Position s <- get'
160+
if p.column == s.column then pure unit else fail "indentation doesn't match"
175161

176162
-- | Run the result of an indentation sensitive parse
177163
runIndent :: forall a. State Position a -> a

Diff for: src/Parsing/Pos.purs

+15-10
Original file line numberDiff line numberDiff line change
@@ -3,25 +3,30 @@ module Parsing.Pos where
33
import Prelude
44

55
import Data.Generic.Rep (class Generic)
6+
import Data.Show.Generic (genericShow)
67

78
-- | `Position` represents the position of the parser in the input.
89
-- |
9-
-- | - `line` is the current line in the input
10-
-- | - `column` is the column of the next character in the current line that will be parsed
10+
-- | - `index` is the position since the start of the input. Starts at 0.
11+
-- | - `line` is the current line in the input. Starts at 1.
12+
-- | - `column` is the column of the next character in the current line that
13+
-- | will be parsed. Starts at 1.
1114
newtype Position = Position
12-
{ line :: Int
15+
{ index :: Int
16+
, line :: Int
1317
, column :: Int
1418
}
1519

16-
derive instance genericPosition :: Generic Position _
20+
derive instance Generic Position _
21+
instance Show Position where
22+
show x = genericShow x
1723

18-
instance showPosition :: Show Position where
19-
show (Position { line: line, column: column }) =
20-
"(Position { line: " <> show line <> ", column: " <> show column <> " })"
24+
instance Eq Position where
25+
eq (Position l) (Position r) = l.index == r.index
2126

22-
derive instance eqPosition :: Eq Position
23-
derive instance ordPosition :: Ord Position
27+
instance Ord Position where
28+
compare (Position l) (Position r) = compare l.index r.index
2429

2530
-- | The `Position` before any input has been parsed.
2631
initialPos :: Position
27-
initialPos = Position { line: 1, column: 1 }
32+
initialPos = Position { index: 0, line: 1, column: 1 }

Diff for: src/Parsing/String.purs

+20-6
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,20 @@
1919
-- | The other primitive parsers, which return `CodePoint` and `String` types,
2020
-- | can parse the full Unicode character set. All of the primitive parsers
2121
-- | in this module can be used together.
22+
-- |
23+
-- | ### Position
24+
-- |
25+
-- | In a `String` parser, the `Position {index}` counts the number of
26+
-- | unicode `CodePoint`s since the beginning of the input string.
27+
-- |
28+
-- | Each tab character (`0x09`) encountered in a `String` parser will advance
29+
-- | the `Position {column}` by 8.
30+
-- |
31+
-- | These patterns will advance the `Position {line}` by 1 and reset
32+
-- | the `Position {column}` to 1:
33+
-- | - newline (`0x0A`)
34+
-- | - carriage-return (`0x0D`)
35+
-- | - carriage-return-newline (`0x0D 0x0A`)
2236
module Parsing.String
2337
( string
2438
, eof
@@ -187,14 +201,14 @@ updatePosString pos before after = case uncons before of
187201
-- | Updates a `Position` by adding the columns and lines in a
188202
-- | single `CodePoint`.
189203
updatePosSingle :: Position -> CodePoint -> String -> Position
190-
updatePosSingle (Position { line, column }) cp after = case fromEnum cp of
191-
10 -> Position { line: line + 1, column: 1 } -- "\n"
204+
updatePosSingle (Position { index, line, column }) cp after = case fromEnum cp of
205+
10 -> Position { index: index + 1, line: line + 1, column: 1 } -- "\n"
192206
13 ->
193207
case codePointAt 0 after of
194-
Just nextCp | fromEnum nextCp == 10 -> Position { line, column } -- "\r\n" lookahead
195-
_ -> Position { line: line + 1, column: 1 } -- "\r"
196-
9 -> Position { line, column: column + 8 - ((column - 1) `mod` 8) } -- "\t" Who says that one tab is 8 columns?
197-
_ -> Position { line, column: column + 1 }
208+
Just nextCp | fromEnum nextCp == 10 -> Position { index: index + 1, line, column } -- "\r\n" lookahead
209+
_ -> Position { index: index + 1, line: line + 1, column: 1 } -- "\r"
210+
9 -> Position { index: index + 1, line, column: column + 8 - ((column - 1) `mod` 8) } -- "\t" Who says that one tab is 8 columns?
211+
_ -> Position { index: index + 1, line, column: column + 1 }
198212

199213
-- | Combinator which returns both the result of a parse and the slice of
200214
-- | the input that was consumed while it was being parsed.

Diff for: test/Main.purs

+24-27
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ stackSafeLoopsTest = do
126126
parseErrorTestPosition
127127
(many1Till (string "a") (string "b"))
128128
"baa"
129-
(Position { line: 1, column: 1 })
129+
(Position { index: 0, line: 1, column: 1 })
130130

131131
parseTest "a,a,a,b,a,a" (toUnfoldable [ "a", "a", "a" ]) $
132132
sepEndBy (string "a") (string ",")
@@ -142,7 +142,7 @@ stackSafeLoopsTest = do
142142
parseErrorTestPosition
143143
(sepEndBy1 (string "a") (string ","))
144144
"b,a,a"
145-
(Position { line: 1, column: 1 })
145+
(Position { index: 0, line: 1, column: 1 })
146146

147147
-- 8 `div` (8 `div` 2) == 2
148148
parseTest "8x8x2" 2 $
@@ -154,7 +154,7 @@ stackSafeLoopsTest = do
154154
parseErrorTestPosition
155155
(chainr1 digit (string "x" $> div))
156156
""
157-
(Position { line: 1, column: 1 })
157+
(Position { index: 0, line: 1, column: 1 })
158158

159159
-- (8 `div` 2) `div` 2 == 2
160160
parseTest "8x2x2" 2 $
@@ -166,15 +166,15 @@ stackSafeLoopsTest = do
166166
parseErrorTestPosition
167167
(chainl1 digit (string "x" $> div))
168168
""
169-
(Position { line: 1, column: 1 })
169+
(Position { index: 0, line: 1, column: 1 })
170170

171171
parseTest "aaaabcd" "b"
172172
$ skipMany1 (string "a")
173173
*> string "b"
174174
parseErrorTestPosition
175175
(skipMany1 (string "a"))
176176
"bcd"
177-
(Position { line: 1, column: 1 })
177+
(Position { index: 0, line: 1, column: 1 })
178178

179179
parseTest "aaaabcd" "b"
180180
$ skipMany (string "a")
@@ -188,7 +188,7 @@ stackSafeLoopsTest = do
188188
parseErrorTestPosition
189189
(many1 (string "a"))
190190
""
191-
(Position { line: 1, column: 1 })
191+
(Position { index: 0, line: 1, column: 1 })
192192

193193
parseTest "a,a,ab" (toUnfoldable [ "a", "a", "a" ])
194194
$ sepBy (string "a") (string ",")
@@ -202,11 +202,11 @@ stackSafeLoopsTest = do
202202
parseErrorTestPosition
203203
(sepBy1 (string "a") (string ","))
204204
""
205-
(Position { line: 1, column: 1 })
205+
(Position { index: 0, line: 1, column: 1 })
206206
parseErrorTestPosition
207207
(sepBy1 (string "a") (string ","))
208208
"a,"
209-
(Position { line: 1, column: 3 })
209+
(Position { index: 2, line: 1, column: 3 })
210210

211211
parseTest "a,a,a,b" (toUnfoldable [ "a", "a", "a" ])
212212
$ endBy (string "a") (string ",")
@@ -220,11 +220,11 @@ stackSafeLoopsTest = do
220220
parseErrorTestPosition
221221
(endBy1 (string "a") (string ","))
222222
""
223-
(Position { line: 1, column: 1 })
223+
(Position { index: 0, line: 1, column: 1 })
224224
parseErrorTestPosition
225225
(endBy1 (string "a") (string ","))
226226
"a,a"
227-
(Position { line: 1, column: 4 })
227+
(Position { index: 3, line: 1, column: 4 })
228228

229229
data TestToken = A | B
230230

@@ -245,10 +245,7 @@ testTokenParser :: TokenParser
245245
testTokenParser = makeTokenParser haskellDef
246246

247247
mkPos :: Int -> Position
248-
mkPos n = mkPos' n 1
249-
250-
mkPos' :: Int -> Int -> Position
251-
mkPos' column line = Position { column: column, line: line }
248+
mkPos n = Position { index: n - 1, line: 1, column: n }
252249

253250
type TestM = Effect Unit
254251

@@ -575,12 +572,12 @@ main = do
575572
parseErrorTestPosition
576573
(many $ char 'f' *> char '?')
577574
"foo"
578-
(Position { column: 2, line: 1 })
575+
(Position { index: 1, column: 2, line: 1 })
579576

580577
parseErrorTestPosition
581578
(satisfy (_ == '?'))
582579
"foo"
583-
(Position { column: 1, line: 1 })
580+
(Position { index: 0, column: 1, line: 1 })
584581

585582
parseTest
586583
"foo"
@@ -605,17 +602,17 @@ main = do
605602

606603
parseTest "rest" "rest" rest
607604
parseTest "rest" unit (rest *> eof)
608-
parseTest "rest\nrest" (Position { line: 2, column: 5 }) (rest *> position)
605+
parseTest "rest\nrest" (Position { index: 9, line: 2, column: 5 }) (rest *> position)
609606

610607
parseErrorTestPosition
611608
(rest *> notFollowedBy eof)
612609
"aa\naa"
613-
(Position { column: 3, line: 2 })
610+
(Position { index: 5, column: 3, line: 2 })
614611

615612
parseErrorTestPosition
616-
anyChar
617-
"𝅘𝅥𝅯"
618-
(Position { column: 1, line: 1 })
613+
(string "𝅘𝅥𝅘𝅥𝅮" *> string "𝅘𝅥𝅘𝅥𝅮")
614+
"𝅘𝅥𝅘𝅥𝅮x𝅘𝅥𝅯"
615+
(Position { index: 2, column: 3, line: 1 })
619616

620617
parseTest "𝅘𝅥𝅘𝅥𝅮x𝅘𝅥𝅯" [ "𝅘𝅥", "𝅘𝅥𝅮", "x", "𝅘𝅥𝅯" ] do
621618
quarter <- anyCodePoint
@@ -631,8 +628,8 @@ main = do
631628

632629
parseTest "abcd" "ab" $ takeN 2
633630
parseTest "abcd" "" $ takeN 0
634-
parseErrorTestPosition (takeN 10) "abcd" (Position { column: 1, line: 1 })
635-
parseErrorTestPosition (takeN (-1)) "abcd" (Position { column: 1, line: 1 })
631+
parseErrorTestPosition (takeN 10) "abcd" (Position { index: 0, column: 1, line: 1 })
632+
parseErrorTestPosition (takeN (-1)) "abcd" (Position { index: 0, column: 1, line: 1 })
636633

637634
parseErrorTestMessage
638635
(noneOfCodePoints $ SCP.toCodePointArray "❓✅")
@@ -673,10 +670,10 @@ main = do
673670
parseTest "ababab" [ 'b', 'b', 'b' ] $ Array.many (char 'a' *> char 'b')
674671
parseTest "abaXab" [ 'b' ] $ Array.many (try (char 'a' *> char 'b'))
675672

676-
parseErrorTestPosition (string "abc") "bcd" (Position { column: 1, line: 1 })
677-
parseErrorTestPosition (string "abc" *> eof) "abcdefg" (Position { column: 4, line: 1 })
678-
parseErrorTestPosition (string "a\nb\nc\n" *> eof) "a\nb\nc\nd\n" (Position { column: 1, line: 4 })
679-
parseErrorTestPosition (string "\ta" *> eof) "\tab" (Position { column: 10, line: 1 })
673+
parseErrorTestPosition (string "abc") "bcd" (Position { index: 0, column: 1, line: 1 })
674+
parseErrorTestPosition (string "abc" *> eof) "abcdefg" (Position { index: 3, column: 4, line: 1 })
675+
parseErrorTestPosition (string "a\nb\nc\n" *> eof) "a\nb\nc\nd\n" (Position { index: 6, column: 1, line: 4 })
676+
parseErrorTestPosition (string "\ta" *> eof) "\tab" (Position { index: 2, column: 10, line: 1 })
680677

681678
log "\nTESTS number\n"
682679

0 commit comments

Comments
 (0)