-
-
Notifications
You must be signed in to change notification settings - Fork 45
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Added possibility to read non escapable sequences #34
base: main
Are you sure you want to change the base?
Changes from 4 commits
378c823
4d2fb6b
acd22b5
3fe9d8f
d45fe20
dc0b55d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -65,7 +65,7 @@ public bool SkipWhiteSpace() | |
|
||
[MethodImpl(MethodImplOptions.AggressiveInlining)] | ||
public bool ReadFirstThenOthers(Func<char, bool> first, Func<char, bool> other) | ||
=> ReadFirstThenOthers(first, other, out _); | ||
=> ReadFirstThenOthers(first, other, out _); | ||
|
||
public bool ReadFirstThenOthers(Func<char, bool> first, Func<char, bool> other, out TokenResult result) | ||
{ | ||
|
@@ -143,7 +143,7 @@ public bool ReadDecimal(out TokenResult result) | |
|
||
[MethodImpl(MethodImplOptions.AggressiveInlining)] | ||
public bool ReadInteger() => ReadInteger(out _); | ||
|
||
public bool ReadInteger(out TokenResult result) | ||
{ | ||
// perf: fast path to prevent a copy of the position | ||
|
@@ -198,15 +198,15 @@ public bool ReadWhile(Func<char, bool> predicate, out TokenResult result) | |
} | ||
|
||
[MethodImpl(MethodImplOptions.AggressiveInlining)] | ||
public bool ReadNonWhiteSpace() => ReadNonWhiteSpace(out _); | ||
public bool ReadNonWhiteSpace() => ReadNonWhiteSpace(out _); | ||
|
||
public bool ReadNonWhiteSpace(out TokenResult result) | ||
{ | ||
return ReadWhile(static x => !Character.IsWhiteSpace(x), out result); | ||
} | ||
|
||
[MethodImpl(MethodImplOptions.AggressiveInlining)] | ||
public bool ReadNonWhiteSpaceOrNewLine() => ReadNonWhiteSpaceOrNewLine(out _); | ||
public bool ReadNonWhiteSpaceOrNewLine() => ReadNonWhiteSpaceOrNewLine(out _); | ||
|
||
public bool ReadNonWhiteSpaceOrNewLine(out TokenResult result) | ||
{ | ||
|
@@ -241,8 +241,8 @@ public bool ReadChar(char c, out TokenResult result) | |
/// Reads the specific expected text. | ||
/// </summary> | ||
[MethodImpl(MethodImplOptions.AggressiveInlining)] | ||
public bool ReadText(string text, StringComparer comparer) => ReadText(text, comparer, out _); | ||
public bool ReadText(string text, StringComparer comparer) => ReadText(text, comparer, out _); | ||
|
||
/// <summary> | ||
/// Reads the specific expected text. | ||
/// </summary> | ||
|
@@ -261,7 +261,7 @@ public bool ReadText(string text, StringComparer comparer, out TokenResult resul | |
int start = Cursor.Offset; | ||
Cursor.Advance(text.Length); | ||
result = TokenResult.Succeed(Buffer, start, Cursor.Offset); | ||
|
||
return true; | ||
} | ||
|
||
|
@@ -472,5 +472,56 @@ private bool ReadQuotedString(char quoteChar, out TokenResult result) | |
|
||
return true; | ||
} | ||
|
||
/// <summary> | ||
/// Reads a sequence token enclosed in arbritrary start and end characters. | ||
/// </summary> | ||
/// <remarks> | ||
/// This method doesn't escape the string, but only validates its content is syntactically correct. | ||
/// The resulting Span contains the original quotes. | ||
/// </remarks> | ||
public bool ReadNonEscapableSequence(char startSequenceChar, char endSequenceChar, out TokenResult result) | ||
{ | ||
var startChar = Cursor.Current; | ||
|
||
if (startChar != startSequenceChar) | ||
{ | ||
result = TokenResult.Fail(); | ||
return false; | ||
} | ||
|
||
// Fast path if there aren't any escape char until next quote | ||
var startOffset = Cursor.Offset + 1; | ||
var lastQuote = startOffset; | ||
|
||
int nextQuote ; | ||
do | ||
{ | ||
nextQuote = Cursor.Buffer.IndexOf(endSequenceChar, lastQuote + 1); | ||
|
||
if (nextQuote == -1) | ||
{ | ||
if(startOffset == lastQuote) | ||
{ | ||
// There is no end sequence character, not a valid escapable sequence | ||
result = TokenResult.Fail(); | ||
return false; | ||
} | ||
nextQuote = lastQuote - 1; | ||
break; | ||
} | ||
|
||
lastQuote = nextQuote + 1; | ||
} | ||
while(Cursor.Buffer.Length > lastQuote && Cursor.Buffer[lastQuote] == endSequenceChar); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Infinite loop, or if not then it should be a single There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the purpose is to catch multiple cases, so it has to be a while. It would be an infinite loop if I did not have a break on line 511. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is a ; at the end of the while line. No? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it's a do/while, not a while/do ;) |
||
|
||
var start = Cursor.Position; | ||
|
||
// If the next escape if not before the next quote, we can return the string as-is | ||
Cursor.Advance(nextQuote + 2 - startOffset); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's a nice optimization to call Advance only once, but then you are missing on |
||
|
||
result = TokenResult.Succeed(Buffer, start.Offset, Cursor.Offset); | ||
return true; | ||
} | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -44,6 +44,19 @@ public void ShouldReadStringWithEscapes(string text, string expected) | |
Assert.Equal(expected, result.GetText()); | ||
} | ||
|
||
[Theory] | ||
[InlineData("'Lorem ipsum'", "'Lorem ipsum'")] | ||
[InlineData("'Lorem \n ipsum'", "'Lorem \n ipsum'")] | ||
[InlineData("'Lorem '' ipsum'", "'Lorem '' ipsum'")] | ||
[InlineData(@"""Lorem """" ipsum""", "\"Lorem \"\" ipsum\"")] | ||
public void ShouldReadNonEscapableString(string text, string expected) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missing some tests that shows when it's invalid. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also when 2,3, more quotes are used. Or when more groups of n quotes are used. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. when what is invalid ? I don't get it ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. All the tests are positive. You need to add some to show when it's not valid and Fails() is called. To be sure that it doesn't "match" when it shouldn't. |
||
{ | ||
Scanner s = new(text); | ||
var success = s.ReadNonEscapableSequence(text[0], text[text.Length - 1], out var result); | ||
Assert.True(success); | ||
Assert.Equal(expected, result.GetText()); | ||
} | ||
|
||
[Theory] | ||
[InlineData("'Lorem \\w ipsum'")] | ||
[InlineData("'Lorem \\u12 ipsum'")] | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't understand the need of
endSequenceChar
. If the goal of the PR is to read strings that are not escaped, then the method should be calledReadNonEscapableString
and fetch the first char (delimiter) by itself, like the other ReadString methods are doing in this class.This one looks more generic, but at the same time I am not convinced about the added complexity of being able to set a different start/end delimiter.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
the purpose is also to read sql escapes like [ ] in tsql or `` in mysql, ...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Cool. Any link to examples of things that are not word and still can be escaped by doubling it?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not sure about the[] example. It's more like a Between parser that exists if there is no escaping to check. Unless we can use [ between[] ? If not then I continue to think no arguments are necessary.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I only found a stackoverflow link https://stackoverflow.com/questions/4200351/what-characters-are-valid-in-an-sql-server-database-name, but I can tell you for sure that you can have almost anything as a character as long as you escape it with []. Here is a doc for mysql: https://dev.mysql.com/doc/refman/8.0/en/identifiers.html