Skip to content

Add index field to Position #171

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ Breaking changes:
- Rename module prefix from `Text.Parsing.Parser` to `Parsing` (#169 by @jamesdbrock)
- Replace the `regex` parser. (#170 by @jamesdbrock)
- Reorganize Combinators for #154 (#182 by @jamesdbrock)
- Add the `index` field to `Position`. (#171 by @jamesdbrock)

New features:

Expand Down
40 changes: 13 additions & 27 deletions src/Parsing/Indent.purs
Original file line number Diff line number Diff line change
Expand Up @@ -85,18 +85,6 @@ get' = do
put' :: forall s. Position -> IndentParser s Unit
put' p = lift (put p)

sourceColumn :: Position -> Int
sourceColumn (Position { line: _, column: c }) = c

sourceLine :: Position -> Int
sourceLine (Position { line: l, column: _ }) = l

setSourceLine :: Position -> Int -> Position
setSourceLine (Position { line: _, column: c }) l = Position { line: l, column: c }

biAp :: forall a b c. (a -> b) -> (b -> b -> c) -> a -> a -> c
biAp f c v1 v2 = c (f v1) (f v2)

many1 :: forall s m a. ParserT s m a -> ParserT s m (List a)
many1 p = lift2 Cons p (many p)

Expand All @@ -121,19 +109,17 @@ withBlock' = withBlock (flip const)
-- | Parses only when indented past the level of the reference
indented :: forall s. IndentParser s Unit
indented = do
pos <- position
s <- get'
if biAp sourceColumn (<=) pos s then fail "not indented"
else do
put' $ setSourceLine s (sourceLine pos)
pure unit
Position p <- position
Position s <- get'
if p.column <= s.column then fail "not indented"
else put' $ Position { index: 0, line: p.line, column: s.column }

-- | Same as `indented`, but does not change internal state
indented' :: forall s. IndentParser s Unit
indented' = do
pos <- position
s <- get'
if biAp sourceColumn (<=) pos s then fail "not indented" else pure unit
Position p <- position
Position s <- get'
if p.column <= s.column then fail "not indented" else pure unit

-- | Parses only when indented past the level of the reference or on the same line
sameOrIndented :: forall s. IndentParser s Unit
Expand All @@ -142,9 +128,9 @@ sameOrIndented = sameLine <|> indented
-- | Parses only on the same line as the reference
sameLine :: forall s. IndentParser s Unit
sameLine = do
pos <- position
s <- get'
if biAp sourceLine (==) pos s then pure unit else fail "over one line"
Position p <- position
Position s <- get'
if p.line == s.line then pure unit else fail "over one line"

-- | Parses a block of lines at the same indentation level
block1 :: forall s a. IndentParser s a -> IndentParser s (List a)
Expand All @@ -169,9 +155,9 @@ withPos x = do
-- | Ensures the current indentation level matches that of the reference
checkIndent :: forall s. IndentParser s Unit
checkIndent = do
s <- get'
p <- position
if biAp sourceColumn (==) p s then pure unit else fail "indentation doesn't match"
Position p <- position
Position s <- get'
if p.column == s.column then pure unit else fail "indentation doesn't match"

-- | Run the result of an indentation sensitive parse
runIndent :: forall a. State Position a -> a
Expand Down
25 changes: 15 additions & 10 deletions src/Parsing/Pos.purs
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,30 @@ module Parsing.Pos where
import Prelude

import Data.Generic.Rep (class Generic)
import Data.Show.Generic (genericShow)

-- | `Position` represents the position of the parser in the input.
-- |
-- | - `line` is the current line in the input
-- | - `column` is the column of the next character in the current line that will be parsed
-- | - `index` is the position since the start of the input. Starts at 0.
-- | - `line` is the current line in the input. Starts at 1.
-- | - `column` is the column of the next character in the current line that
-- | will be parsed. Starts at 1.
newtype Position = Position
{ line :: Int
{ index :: Int
, line :: Int
, column :: Int
}

derive instance genericPosition :: Generic Position _
derive instance Generic Position _
instance Show Position where
show x = genericShow x

instance showPosition :: Show Position where
show (Position { line: line, column: column }) =
"(Position { line: " <> show line <> ", column: " <> show column <> " })"
instance Eq Position where
eq (Position l) (Position r) = l.index == r.index

derive instance eqPosition :: Eq Position
derive instance ordPosition :: Ord Position
instance Ord Position where
compare (Position l) (Position r) = compare l.index r.index

-- | The `Position` before any input has been parsed.
initialPos :: Position
initialPos = Position { line: 1, column: 1 }
initialPos = Position { index: 0, line: 1, column: 1 }
26 changes: 20 additions & 6 deletions src/Parsing/String.purs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,20 @@
-- | The other primitive parsers, which return `CodePoint` and `String` types,
-- | can parse the full Unicode character set. All of the primitive parsers
-- | in this module can be used together.
-- |
-- | ### Position
-- |
-- | In a `String` parser, the `Position {index}` counts the number of
-- | unicode `CodePoint`s since the beginning of the input string.
-- |
-- | Each tab character (`0x09`) encountered in a `String` parser will advance
-- | the `Position {column}` by 8.
-- |
-- | These patterns will advance the `Position {line}` by 1 and reset
-- | the `Position {column}` to 1:
-- | - newline (`0x0A`)
-- | - carriage-return (`0x0D`)
-- | - carriage-return-newline (`0x0D 0x0A`)
module Parsing.String
( string
, eof
Expand Down Expand Up @@ -187,14 +201,14 @@ updatePosString pos before after = case uncons before of
-- | Updates a `Position` by adding the columns and lines in a
-- | single `CodePoint`.
updatePosSingle :: Position -> CodePoint -> String -> Position
updatePosSingle (Position { line, column }) cp after = case fromEnum cp of
10 -> Position { line: line + 1, column: 1 } -- "\n"
updatePosSingle (Position { index, line, column }) cp after = case fromEnum cp of
10 -> Position { index: index + 1, line: line + 1, column: 1 } -- "\n"
13 ->
case codePointAt 0 after of
Just nextCp | fromEnum nextCp == 10 -> Position { line, column } -- "\r\n" lookahead
_ -> Position { line: line + 1, column: 1 } -- "\r"
9 -> Position { line, column: column + 8 - ((column - 1) `mod` 8) } -- "\t" Who says that one tab is 8 columns?
_ -> Position { line, column: column + 1 }
Just nextCp | fromEnum nextCp == 10 -> Position { index: index + 1, line, column } -- "\r\n" lookahead
_ -> Position { index: index + 1, line: line + 1, column: 1 } -- "\r"
9 -> Position { index: index + 1, line, column: column + 8 - ((column - 1) `mod` 8) } -- "\t" Who says that one tab is 8 columns?
_ -> Position { index: index + 1, line, column: column + 1 }

-- | Combinator which returns both the result of a parse and the slice of
-- | the input that was consumed while it was being parsed.
Expand Down
51 changes: 24 additions & 27 deletions test/Main.purs
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ stackSafeLoopsTest = do
parseErrorTestPosition
(many1Till (string "a") (string "b"))
"baa"
(Position { line: 1, column: 1 })
(Position { index: 0, line: 1, column: 1 })

parseTest "a,a,a,b,a,a" (toUnfoldable [ "a", "a", "a" ]) $
sepEndBy (string "a") (string ",")
Expand All @@ -142,7 +142,7 @@ stackSafeLoopsTest = do
parseErrorTestPosition
(sepEndBy1 (string "a") (string ","))
"b,a,a"
(Position { line: 1, column: 1 })
(Position { index: 0, line: 1, column: 1 })

-- 8 `div` (8 `div` 2) == 2
parseTest "8x8x2" 2 $
Expand All @@ -154,7 +154,7 @@ stackSafeLoopsTest = do
parseErrorTestPosition
(chainr1 digit (string "x" $> div))
""
(Position { line: 1, column: 1 })
(Position { index: 0, line: 1, column: 1 })

-- (8 `div` 2) `div` 2 == 2
parseTest "8x2x2" 2 $
Expand All @@ -166,15 +166,15 @@ stackSafeLoopsTest = do
parseErrorTestPosition
(chainl1 digit (string "x" $> div))
""
(Position { line: 1, column: 1 })
(Position { index: 0, line: 1, column: 1 })

parseTest "aaaabcd" "b"
$ skipMany1 (string "a")
*> string "b"
parseErrorTestPosition
(skipMany1 (string "a"))
"bcd"
(Position { line: 1, column: 1 })
(Position { index: 0, line: 1, column: 1 })

parseTest "aaaabcd" "b"
$ skipMany (string "a")
Expand All @@ -188,7 +188,7 @@ stackSafeLoopsTest = do
parseErrorTestPosition
(many1 (string "a"))
""
(Position { line: 1, column: 1 })
(Position { index: 0, line: 1, column: 1 })

parseTest "a,a,ab" (toUnfoldable [ "a", "a", "a" ])
$ sepBy (string "a") (string ",")
Expand All @@ -202,11 +202,11 @@ stackSafeLoopsTest = do
parseErrorTestPosition
(sepBy1 (string "a") (string ","))
""
(Position { line: 1, column: 1 })
(Position { index: 0, line: 1, column: 1 })
parseErrorTestPosition
(sepBy1 (string "a") (string ","))
"a,"
(Position { line: 1, column: 3 })
(Position { index: 2, line: 1, column: 3 })

parseTest "a,a,a,b" (toUnfoldable [ "a", "a", "a" ])
$ endBy (string "a") (string ",")
Expand All @@ -220,11 +220,11 @@ stackSafeLoopsTest = do
parseErrorTestPosition
(endBy1 (string "a") (string ","))
""
(Position { line: 1, column: 1 })
(Position { index: 0, line: 1, column: 1 })
parseErrorTestPosition
(endBy1 (string "a") (string ","))
"a,a"
(Position { line: 1, column: 4 })
(Position { index: 3, line: 1, column: 4 })

data TestToken = A | B

Expand All @@ -245,10 +245,7 @@ testTokenParser :: TokenParser
testTokenParser = makeTokenParser haskellDef

mkPos :: Int -> Position
mkPos n = mkPos' n 1

mkPos' :: Int -> Int -> Position
mkPos' column line = Position { column: column, line: line }
mkPos n = Position { index: n - 1, line: 1, column: n }

type TestM = Effect Unit

Expand Down Expand Up @@ -575,12 +572,12 @@ main = do
parseErrorTestPosition
(many $ char 'f' *> char '?')
"foo"
(Position { column: 2, line: 1 })
(Position { index: 1, column: 2, line: 1 })

parseErrorTestPosition
(satisfy (_ == '?'))
"foo"
(Position { column: 1, line: 1 })
(Position { index: 0, column: 1, line: 1 })

parseTest
"foo"
Expand All @@ -605,17 +602,17 @@ main = do

parseTest "rest" "rest" rest
parseTest "rest" unit (rest *> eof)
parseTest "rest\nrest" (Position { line: 2, column: 5 }) (rest *> position)
parseTest "rest\nrest" (Position { index: 9, line: 2, column: 5 }) (rest *> position)

parseErrorTestPosition
(rest *> notFollowedBy eof)
"aa\naa"
(Position { column: 3, line: 2 })
(Position { index: 5, column: 3, line: 2 })

parseErrorTestPosition
anyChar
"𝅘𝅥𝅯"
(Position { column: 1, line: 1 })
(string "𝅘𝅥𝅘𝅥𝅮" *> string "𝅘𝅥𝅘𝅥𝅮")
"𝅘𝅥𝅘𝅥𝅮x𝅘𝅥𝅯"
(Position { index: 2, column: 3, line: 1 })

parseTest "𝅘𝅥𝅘𝅥𝅮x𝅘𝅥𝅯" [ "𝅘𝅥", "𝅘𝅥𝅮", "x", "𝅘𝅥𝅯" ] do
quarter <- anyCodePoint
Expand All @@ -631,8 +628,8 @@ main = do

parseTest "abcd" "ab" $ takeN 2
parseTest "abcd" "" $ takeN 0
parseErrorTestPosition (takeN 10) "abcd" (Position { column: 1, line: 1 })
parseErrorTestPosition (takeN (-1)) "abcd" (Position { column: 1, line: 1 })
parseErrorTestPosition (takeN 10) "abcd" (Position { index: 0, column: 1, line: 1 })
parseErrorTestPosition (takeN (-1)) "abcd" (Position { index: 0, column: 1, line: 1 })

parseErrorTestMessage
(noneOfCodePoints $ SCP.toCodePointArray "❓✅")
Expand Down Expand Up @@ -673,10 +670,10 @@ main = do
parseTest "ababab" [ 'b', 'b', 'b' ] $ Array.many (char 'a' *> char 'b')
parseTest "abaXab" [ 'b' ] $ Array.many (try (char 'a' *> char 'b'))

parseErrorTestPosition (string "abc") "bcd" (Position { column: 1, line: 1 })
parseErrorTestPosition (string "abc" *> eof) "abcdefg" (Position { column: 4, line: 1 })
parseErrorTestPosition (string "a\nb\nc\n" *> eof) "a\nb\nc\nd\n" (Position { column: 1, line: 4 })
parseErrorTestPosition (string "\ta" *> eof) "\tab" (Position { column: 10, line: 1 })
parseErrorTestPosition (string "abc") "bcd" (Position { index: 0, column: 1, line: 1 })
parseErrorTestPosition (string "abc" *> eof) "abcdefg" (Position { index: 3, column: 4, line: 1 })
parseErrorTestPosition (string "a\nb\nc\n" *> eof) "a\nb\nc\nd\n" (Position { index: 6, column: 1, line: 4 })
parseErrorTestPosition (string "\ta" *> eof) "\tab" (Position { index: 2, column: 10, line: 1 })

log "\nTESTS number\n"

Expand Down