diff --git a/src/Parlot/Scanner.cs b/src/Parlot/Scanner.cs index 3b642b9..54282b5 100644 --- a/src/Parlot/Scanner.cs +++ b/src/Parlot/Scanner.cs @@ -65,7 +65,7 @@ public bool SkipWhiteSpace() [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool ReadFirstThenOthers(Func first, Func other) - => ReadFirstThenOthers(first, other, out _); + => ReadFirstThenOthers(first, other, out _); public bool ReadFirstThenOthers(Func first, Func other, out TokenResult result) { @@ -143,7 +143,7 @@ public bool ReadDecimal(out TokenResult result) [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool ReadInteger() => ReadInteger(out _); - + public bool ReadInteger(out TokenResult result) { // perf: fast path to prevent a copy of the position @@ -198,7 +198,7 @@ public bool ReadWhile(Func predicate, out TokenResult result) } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool ReadNonWhiteSpace() => ReadNonWhiteSpace(out _); + public bool ReadNonWhiteSpace() => ReadNonWhiteSpace(out _); public bool ReadNonWhiteSpace(out TokenResult result) { @@ -206,7 +206,7 @@ public bool ReadNonWhiteSpace(out TokenResult result) } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool ReadNonWhiteSpaceOrNewLine() => ReadNonWhiteSpaceOrNewLine(out _); + public bool ReadNonWhiteSpaceOrNewLine() => ReadNonWhiteSpaceOrNewLine(out _); public bool ReadNonWhiteSpaceOrNewLine(out TokenResult result) { @@ -241,8 +241,8 @@ public bool ReadChar(char c, out TokenResult result) /// Reads the specific expected text. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool ReadText(string text, StringComparer comparer) => ReadText(text, comparer, out _); - + public bool ReadText(string text, StringComparer comparer) => ReadText(text, comparer, out _); + /// /// Reads the specific expected text. /// @@ -261,7 +261,7 @@ public bool ReadText(string text, StringComparer comparer, out TokenResult resul int start = Cursor.Offset; Cursor.Advance(text.Length); result = TokenResult.Succeed(Buffer, start, Cursor.Offset); - + return true; } @@ -472,5 +472,56 @@ private bool ReadQuotedString(char quoteChar, out TokenResult result) return true; } + + /// + /// Reads a sequence token enclosed in arbritrary start and end characters. + /// + /// + /// This method doesn't escape the string, but only validates its content is syntactically correct. + /// The resulting Span contains the original quotes. + /// + public bool ReadNonEscapableSequence(char startSequenceChar, char endSequenceChar, out TokenResult result) + { + var startChar = Cursor.Current; + + if (startChar != startSequenceChar) + { + result = TokenResult.Fail(); + return false; + } + + // Fast path if there aren't any escape char until next quote + var startOffset = Cursor.Offset + 1; + var lastQuote = startOffset; + + int nextQuote ; + do + { + nextQuote = Cursor.Buffer.IndexOf(endSequenceChar, lastQuote + 1); + + if (nextQuote == -1) + { + if(startOffset == lastQuote) + { + // There is no end sequence character, not a valid escapable sequence + result = TokenResult.Fail(); + return false; + } + nextQuote = lastQuote - 1; + break; + } + + lastQuote = nextQuote + 1; + } + while(Cursor.Buffer.Length > lastQuote && Cursor.Buffer[lastQuote] == endSequenceChar); + + var start = Cursor.Position; + + // If the next escape if not before the next quote, we can return the string as-is + Cursor.Advance(nextQuote + 2 - startOffset); + + result = TokenResult.Succeed(Buffer, start.Offset, Cursor.Offset); + return true; + } } -} +} \ No newline at end of file diff --git a/test/Parlot.Tests/ScannerTests.cs b/test/Parlot.Tests/ScannerTests.cs index 736209c..ded8c03 100644 --- a/test/Parlot.Tests/ScannerTests.cs +++ b/test/Parlot.Tests/ScannerTests.cs @@ -44,6 +44,43 @@ public void ShouldReadStringWithEscapes(string text, string expected) Assert.Equal(expected, result.GetText()); } + [Theory] + [InlineData("'Lorem ipsum'", "'Lorem ipsum'")] + [InlineData("'Lorem \n ipsum'", "'Lorem \n ipsum'")] + [InlineData("'Lorem '' ipsum'", "'Lorem '' ipsum'")] + [InlineData("'Lorem ipsum", "")] + [InlineData("Lorem ' ipsum", "")] + [InlineData("'Lorem ' ipsum", "'Lorem '")] + [InlineData("Lorem ' ipsum'", "")] + [InlineData("'Lorem '' i''ps''um'", "'Lorem '' i''ps''um'")] + [InlineData(@"""Lorem """" ipsum""", "\"Lorem \"\" ipsum\"")] + [InlineData("[mytable]", "[mytable]")] + [InlineData("[myta[ble]", "[myta[ble]")] + [InlineData("[myta]]ble]", "[myta]]ble]")] + [InlineData(@"""Lorem """""""" ipsum""", "\"Lorem \"\"\"\" ipsum\"")] + public void ShouldReadNonEscapableString(string text, string expected) + { + Scanner s = new(text); + char start, end; + if(expected.Length==0) + { + start=end='\''; + } + else + { + start=expected[0]; + end=expected[expected.Length - 1]; + } + var success = s.ReadNonEscapableSequence(start, end, out var result); + if(expected.Length==0) + Assert.False(success); + else + { + Assert.True(success); + Assert.Equal(expected, result.GetText()); + } + } + [Theory] [InlineData("'Lorem \\w ipsum'")] [InlineData("'Lorem \\u12 ipsum'")]