From 378c82352167b0a0a625a9d8d8b0fdc883b1a4d1 Mon Sep 17 00:00:00 2001 From: PENINNI2 Date: Thu, 11 Mar 2021 10:41:19 +0100 Subject: [PATCH 1/6] Added possibility to read non escapable sequences --- src/Parlot/Scanner.cs | 62 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 54 insertions(+), 8 deletions(-) diff --git a/src/Parlot/Scanner.cs b/src/Parlot/Scanner.cs index 3b642b9..65106b7 100644 --- a/src/Parlot/Scanner.cs +++ b/src/Parlot/Scanner.cs @@ -65,7 +65,7 @@ public bool SkipWhiteSpace() [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool ReadFirstThenOthers(Func first, Func other) - => ReadFirstThenOthers(first, other, out _); + => ReadFirstThenOthers(first, other, out _); public bool ReadFirstThenOthers(Func first, Func other, out TokenResult result) { @@ -143,7 +143,7 @@ public bool ReadDecimal(out TokenResult result) [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool ReadInteger() => ReadInteger(out _); - + public bool ReadInteger(out TokenResult result) { // perf: fast path to prevent a copy of the position @@ -198,7 +198,7 @@ public bool ReadWhile(Func predicate, out TokenResult result) } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool ReadNonWhiteSpace() => ReadNonWhiteSpace(out _); + public bool ReadNonWhiteSpace() => ReadNonWhiteSpace(out _); public bool ReadNonWhiteSpace(out TokenResult result) { @@ -206,7 +206,7 @@ public bool ReadNonWhiteSpace(out TokenResult result) } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool ReadNonWhiteSpaceOrNewLine() => ReadNonWhiteSpaceOrNewLine(out _); + public bool ReadNonWhiteSpaceOrNewLine() => ReadNonWhiteSpaceOrNewLine(out _); public bool ReadNonWhiteSpaceOrNewLine(out TokenResult result) { @@ -241,8 +241,8 @@ public bool ReadChar(char c, out TokenResult result) /// Reads the specific expected text. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool ReadText(string text, StringComparer comparer) => ReadText(text, comparer, out _); - + public bool ReadText(string text, StringComparer comparer) => ReadText(text, comparer, out _); + /// /// Reads the specific expected text. /// @@ -261,7 +261,7 @@ public bool ReadText(string text, StringComparer comparer, out TokenResult resul int start = Cursor.Offset; Cursor.Advance(text.Length); result = TokenResult.Succeed(Buffer, start, Cursor.Offset); - + return true; } @@ -472,5 +472,51 @@ private bool ReadQuotedString(char quoteChar, out TokenResult result) return true; } + + /// + /// Reads a sequence token enclosed in arbritrary start and end characters. + /// + /// + /// This method doesn't escape the string, but only validates its content is syntactically correct. + /// The resulting Span contains the original quotes. + /// + public bool ReadNonEscapableSequence(char startSequenceChar, char endSequenceChar, out TokenResult result) + { + var startChar = Cursor.Current; + + if (startChar != startSequenceChar) + { + result = TokenResult.Fail(); + return false; + } + + // Fast path if there aren't any escape char until next quote + var startOffset = Cursor.Offset + 1; + + int nextQuote; + do + { + nextQuote = Cursor.Buffer.IndexOf(endSequenceChar, startOffset); + + if (nextQuote == -1) + { + // There is no end sequence character, not a valid escapable sequence + result = TokenResult.Fail(); + return false; + } + } + while(Cursor.Buffer.Length>nextQuote+1 && Cursor.Buffer[nextQuote+1]==endSequenceChar); + + var start = Cursor.Position; + + Cursor.Advance(); + + +// If the next escape if not before the next quote, we can return the string as-is + Cursor.Advance(nextQuote + 1 - startOffset); + + result = TokenResult.Succeed(Buffer, start.Offset, Cursor.Offset); + return true; + } } -} +} \ No newline at end of file From 4d2fb6be16de1f979176417426afaee61e4d0db9 Mon Sep 17 00:00:00 2001 From: npenin Date: Sat, 13 Mar 2021 10:47:26 +0100 Subject: [PATCH 2/6] added unittest for non escapable strings --- test/Parlot.Tests/ScannerTests.cs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/test/Parlot.Tests/ScannerTests.cs b/test/Parlot.Tests/ScannerTests.cs index 736209c..062e92c 100644 --- a/test/Parlot.Tests/ScannerTests.cs +++ b/test/Parlot.Tests/ScannerTests.cs @@ -44,6 +44,18 @@ public void ShouldReadStringWithEscapes(string text, string expected) Assert.Equal(expected, result.GetText()); } + [Theory] + [InlineData("'Lorem \n ipsum'", "'Lorem \n ipsum'")] + [InlineData("'Lorem '' ipsum'", "'Lorem '' ipsum'")] + [InlineData(@"""Lorem """""""" ipsum""", "\"Lorem \"\" ipsum\"")] + public void ShouldReadNonEscapableString(string text, string expected) + { + Scanner s = new(text); + var success = s.ReadNonEscapableSequence(text[0], text[text.Length - 1], out var result); + Assert.True(success); + Assert.Equal(expected, result.GetText()); + } + [Theory] [InlineData("'Lorem \\w ipsum'")] [InlineData("'Lorem \\u12 ipsum'")] From acd22b5b5f45ff37767f0641846d5d8c03f1a315 Mon Sep 17 00:00:00 2001 From: npenin Date: Sat, 13 Mar 2021 18:02:22 +0100 Subject: [PATCH 3/6] fixed unit test and nonescapablesequence implementation --- src/Parlot/Scanner.cs | 20 ++++++++++++++------ test/Parlot.Tests/ScannerTests.cs | 3 ++- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/src/Parlot/Scanner.cs b/src/Parlot/Scanner.cs index 65106b7..555d039 100644 --- a/src/Parlot/Scanner.cs +++ b/src/Parlot/Scanner.cs @@ -492,20 +492,28 @@ public bool ReadNonEscapableSequence(char startSequenceChar, char endSequenceCha // Fast path if there aren't any escape char until next quote var startOffset = Cursor.Offset + 1; + var lastQuote = startOffset; - int nextQuote; + int nextQuote ; do { - nextQuote = Cursor.Buffer.IndexOf(endSequenceChar, startOffset); + nextQuote = Cursor.Buffer.IndexOf(endSequenceChar, lastQuote + 1); if (nextQuote == -1) { - // There is no end sequence character, not a valid escapable sequence - result = TokenResult.Fail(); - return false; + if(startOffset == lastQuote) + { + // There is no end sequence character, not a valid escapable sequence + result = TokenResult.Fail(); + return false; + } + nextQuote = lastQuote - 1; + break; } + + lastQuote = nextQuote + 1; } - while(Cursor.Buffer.Length>nextQuote+1 && Cursor.Buffer[nextQuote+1]==endSequenceChar); + while(Cursor.Buffer.Length > lastQuote && Cursor.Buffer[lastQuote] == endSequenceChar); var start = Cursor.Position; diff --git a/test/Parlot.Tests/ScannerTests.cs b/test/Parlot.Tests/ScannerTests.cs index 062e92c..a6a5fd2 100644 --- a/test/Parlot.Tests/ScannerTests.cs +++ b/test/Parlot.Tests/ScannerTests.cs @@ -45,9 +45,10 @@ public void ShouldReadStringWithEscapes(string text, string expected) } [Theory] + [InlineData("'Lorem ipsum'", "'Lorem ipsum'")] [InlineData("'Lorem \n ipsum'", "'Lorem \n ipsum'")] [InlineData("'Lorem '' ipsum'", "'Lorem '' ipsum'")] - [InlineData(@"""Lorem """""""" ipsum""", "\"Lorem \"\" ipsum\"")] + [InlineData(@"""Lorem """" ipsum""", "\"Lorem \"\" ipsum\"")] public void ShouldReadNonEscapableString(string text, string expected) { Scanner s = new(text); From 3fe9d8f442bd71842bb3fb5ab3d0bdb58cd5b89e Mon Sep 17 00:00:00 2001 From: npenin Date: Sat, 13 Mar 2021 18:03:56 +0100 Subject: [PATCH 4/6] remove useless Advance operation --- src/Parlot/Scanner.cs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/Parlot/Scanner.cs b/src/Parlot/Scanner.cs index 555d039..54282b5 100644 --- a/src/Parlot/Scanner.cs +++ b/src/Parlot/Scanner.cs @@ -517,11 +517,8 @@ public bool ReadNonEscapableSequence(char startSequenceChar, char endSequenceCha var start = Cursor.Position; - Cursor.Advance(); - - -// If the next escape if not before the next quote, we can return the string as-is - Cursor.Advance(nextQuote + 1 - startOffset); + // If the next escape if not before the next quote, we can return the string as-is + Cursor.Advance(nextQuote + 2 - startOffset); result = TokenResult.Succeed(Buffer, start.Offset, Cursor.Offset); return true; From d45fe20ee6ff3ea8fc6cfdc2429e095178f99cc0 Mon Sep 17 00:00:00 2001 From: npenin Date: Sat, 13 Mar 2021 22:39:23 +0100 Subject: [PATCH 5/6] added more test cases --- test/Parlot.Tests/ScannerTests.cs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/test/Parlot.Tests/ScannerTests.cs b/test/Parlot.Tests/ScannerTests.cs index a6a5fd2..923e047 100644 --- a/test/Parlot.Tests/ScannerTests.cs +++ b/test/Parlot.Tests/ScannerTests.cs @@ -48,11 +48,15 @@ public void ShouldReadStringWithEscapes(string text, string expected) [InlineData("'Lorem ipsum'", "'Lorem ipsum'")] [InlineData("'Lorem \n ipsum'", "'Lorem \n ipsum'")] [InlineData("'Lorem '' ipsum'", "'Lorem '' ipsum'")] + [InlineData("'Lorem ' ipsum", "'Lorem '")] + [InlineData("'Lorem '' i''ps''um'", "'Lorem '' i''ps''um'")] [InlineData(@"""Lorem """" ipsum""", "\"Lorem \"\" ipsum\"")] + [InlineData("[mytable]", "[mytable]")] + [InlineData(@"""Lorem """""""" ipsum""", "\"Lorem \"\"\"\" ipsum\"")] public void ShouldReadNonEscapableString(string text, string expected) { Scanner s = new(text); - var success = s.ReadNonEscapableSequence(text[0], text[text.Length - 1], out var result); + var success = s.ReadNonEscapableSequence(expected[0], expected[expected.Length - 1], out var result); Assert.True(success); Assert.Equal(expected, result.GetText()); } From dc0b55d812ee85261e5923106f24744cf90add6f Mon Sep 17 00:00:00 2001 From: npenin Date: Sun, 14 Mar 2021 08:55:18 +0100 Subject: [PATCH 6/6] added EOF testing as suggested added some more examples for tsql like escapes --- test/Parlot.Tests/ScannerTests.cs | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/test/Parlot.Tests/ScannerTests.cs b/test/Parlot.Tests/ScannerTests.cs index 923e047..ded8c03 100644 --- a/test/Parlot.Tests/ScannerTests.cs +++ b/test/Parlot.Tests/ScannerTests.cs @@ -48,17 +48,37 @@ public void ShouldReadStringWithEscapes(string text, string expected) [InlineData("'Lorem ipsum'", "'Lorem ipsum'")] [InlineData("'Lorem \n ipsum'", "'Lorem \n ipsum'")] [InlineData("'Lorem '' ipsum'", "'Lorem '' ipsum'")] + [InlineData("'Lorem ipsum", "")] + [InlineData("Lorem ' ipsum", "")] [InlineData("'Lorem ' ipsum", "'Lorem '")] + [InlineData("Lorem ' ipsum'", "")] [InlineData("'Lorem '' i''ps''um'", "'Lorem '' i''ps''um'")] [InlineData(@"""Lorem """" ipsum""", "\"Lorem \"\" ipsum\"")] [InlineData("[mytable]", "[mytable]")] + [InlineData("[myta[ble]", "[myta[ble]")] + [InlineData("[myta]]ble]", "[myta]]ble]")] [InlineData(@"""Lorem """""""" ipsum""", "\"Lorem \"\"\"\" ipsum\"")] public void ShouldReadNonEscapableString(string text, string expected) { Scanner s = new(text); - var success = s.ReadNonEscapableSequence(expected[0], expected[expected.Length - 1], out var result); - Assert.True(success); - Assert.Equal(expected, result.GetText()); + char start, end; + if(expected.Length==0) + { + start=end='\''; + } + else + { + start=expected[0]; + end=expected[expected.Length - 1]; + } + var success = s.ReadNonEscapableSequence(start, end, out var result); + if(expected.Length==0) + Assert.False(success); + else + { + Assert.True(success); + Assert.Equal(expected, result.GetText()); + } } [Theory]