Skip to content

Commit d841e3c

Browse files
committed
feat: add splitInputAfterSyntaxError
1 parent 4f05a97 commit d841e3c

File tree

8 files changed

+155
-34
lines changed

8 files changed

+155
-34
lines changed

src/parser/common/basicSQL.ts

+141-34
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,11 @@ export abstract class BasicSQL<
4848
*/
4949
protected abstract preferredRules: Set<number>;
5050

51+
/**
52+
* keywords which can start a single statement
53+
*/
54+
protected abstract statementStartKeywords: Set<string>;
55+
5156
/**
5257
* Create a antlr4 Lexer instance.
5358
* @param input source string
@@ -251,6 +256,63 @@ export abstract class BasicSQL<
251256
return res;
252257
}
253258

259+
/**
260+
* Try to get a small range as possible after syntax error.
261+
* @param allTokens all tokens from input
262+
* @param caretTokenIndex tokenIndex of caretPosition
263+
* @returns { startToken: Token; stopToken: Token }
264+
*/
265+
private splitInputAfterSyntaxError(
266+
allTokens: Token[],
267+
caretTokenIndex: number
268+
): { startToken: Token; stopToken: Token } {
269+
let startToken: Token | null = null;
270+
for (let tokenIndex = caretTokenIndex; tokenIndex >= 0; tokenIndex--) {
271+
const token = allTokens[tokenIndex];
272+
// end with semi
273+
if (token?.text === ';') {
274+
startToken = allTokens[tokenIndex + 1];
275+
break;
276+
}
277+
// keywords which can start a single statement
278+
if (
279+
Array.from(this.statementStartKeywords).some((item) => item === token?.text) &&
280+
tokenIndex !== 0
281+
) {
282+
startToken = allTokens[tokenIndex - 1];
283+
break;
284+
}
285+
}
286+
// If there is no semicolon, start from the first token
287+
if (startToken === null) {
288+
startToken = allTokens[0];
289+
}
290+
291+
let stopToken: Token | null = null;
292+
for (let tokenIndex = caretTokenIndex; tokenIndex < allTokens.length; tokenIndex++) {
293+
const token = allTokens[tokenIndex];
294+
// end with semi
295+
if (token?.text === ';') {
296+
stopToken = token;
297+
break;
298+
}
299+
// keywords which can start a single statement
300+
if (
301+
Array.from(this.statementStartKeywords).some((item) => item === token?.text) &&
302+
tokenIndex !== 0
303+
) {
304+
stopToken = allTokens[tokenIndex - 1];
305+
break;
306+
}
307+
}
308+
// If there is no semicolon, start from the first token
309+
if (stopToken === null) {
310+
stopToken = allTokens[allTokens.length - 1];
311+
}
312+
313+
return { startToken, stopToken };
314+
}
315+
254316
/**
255317
* Get suggestions of syntax and token at caretPosition
256318
* @param input source string
@@ -282,53 +344,98 @@ export abstract class BasicSQL<
282344
const statementCount = splitListener.statementsContext?.length;
283345
const statementsContext = splitListener.statementsContext;
284346

285-
// If there are multiple statements.
286-
if (statementCount > 1) {
287-
/**
288-
* Find a minimum valid range, reparse the fragment, and provide a new parse tree to C3.
289-
* The boundaries of this range must be statements with no syntax errors.
290-
* This can ensure the stable performance of the C3.
291-
*/
292-
let startStatement: ParserRuleContext | null = null;
293-
let stopStatement: ParserRuleContext | null = null;
347+
const { startToken, stopToken } = this.splitInputAfterSyntaxError(
348+
allTokens,
349+
caretTokenIndex
350+
);
294351

295-
for (let index = 0; index < statementCount; index++) {
296-
const ctx = statementsContext[index];
297-
const isCurrentCtxValid = !ctx.exception;
298-
if (!isCurrentCtxValid) continue;
352+
let startIndex: number = 0;
353+
let stopIndex: number = 0;
299354

355+
/**
356+
* If there is no semi
357+
* and if there is no keyword which can start a single statement
358+
* and if there are multiple statements
359+
*/
360+
if (startToken.tokenIndex === 1 && stopToken.tokenIndex === allTokens.length - 1) {
361+
if (statementCount > 1) {
300362
/**
301-
* Ensure that the statementContext before the left boundary
302-
* and the last statementContext on the right boundary are qualified SQL statements.
363+
* Find a minimum valid range, reparse the fragment, and provide a new parse tree to C3.
364+
* The boundaries of this range must be statements with no syntax errors.
365+
* This can ensure the stable performance of the C3.
303366
*/
304-
const isPrevCtxValid = index === 0 || !statementsContext[index - 1]?.exception;
305-
const isNextCtxValid =
306-
index === statementCount - 1 || !statementsContext[index + 1]?.exception;
307-
308-
if (ctx.stop && ctx.stop.tokenIndex < caretTokenIndex && isPrevCtxValid) {
309-
startStatement = ctx;
367+
let startStatement: ParserRuleContext | null = null;
368+
let stopStatement: ParserRuleContext | null = null;
369+
370+
for (let index = 0; index < statementCount; index++) {
371+
const ctx = statementsContext[index];
372+
const isCurrentCtxValid = !ctx.exception;
373+
if (!isCurrentCtxValid) continue;
374+
375+
/**
376+
* Ensure that the statementContext before the left boundary
377+
* and the last statementContext on the right boundary are qualified SQL statements.
378+
*/
379+
const isPrevCtxValid = index === 0 || !statementsContext[index - 1]?.exception;
380+
const isNextCtxValid =
381+
index === statementCount - 1 || !statementsContext[index + 1]?.exception;
382+
383+
if (ctx.stop && ctx.stop.tokenIndex < caretTokenIndex && isPrevCtxValid) {
384+
startStatement = ctx;
385+
}
386+
387+
if (
388+
ctx.start &&
389+
!stopStatement &&
390+
ctx.start.tokenIndex > caretTokenIndex &&
391+
isNextCtxValid
392+
) {
393+
stopStatement = ctx;
394+
break;
395+
}
310396
}
311397

312-
if (
313-
ctx.start &&
314-
!stopStatement &&
315-
ctx.start.tokenIndex > caretTokenIndex &&
316-
isNextCtxValid
317-
) {
318-
stopStatement = ctx;
319-
break;
320-
}
321-
}
398+
// A boundary consisting of the index of the input.
399+
startIndex = startStatement?.start?.start ?? 0;
400+
stopIndex = stopStatement?.stop?.stop ?? input.length - 1;
322401

402+
/**
403+
* Save offset of the tokenIndex in the range of input
404+
* compared to the tokenIndex in the whole input
405+
*/
406+
tokenIndexOffset = startStatement?.start?.tokenIndex ?? 0;
407+
caretTokenIndex = caretTokenIndex - tokenIndexOffset;
408+
409+
/**
410+
* Reparse the input fragment,
411+
* and c3 will collect candidates in the newly generated parseTree.
412+
*/
413+
const inputSlice = input.slice(startIndex, stopIndex);
414+
415+
const lexer = this.createLexer(inputSlice);
416+
lexer.removeErrorListeners();
417+
const tokenStream = new CommonTokenStream(lexer);
418+
tokenStream.fill();
419+
420+
const parser = this.createParserFromTokenStream(tokenStream);
421+
parser.interpreter.predictionMode = PredictionMode.SLL;
422+
parser.removeErrorListeners();
423+
parser.buildParseTrees = true;
424+
parser.errorHandler = new ErrorStrategy();
425+
426+
sqlParserIns = parser;
427+
c3Context = parser.program();
428+
}
429+
} else {
323430
// A boundary consisting of the index of the input.
324-
const startIndex = startStatement?.start?.start ?? 0;
325-
const stopIndex = stopStatement?.stop?.stop ?? input.length - 1;
431+
startIndex = startToken?.start ?? 0;
432+
stopIndex = stopToken?.stop + 1 ?? input.length;
326433

327434
/**
328435
* Save offset of the tokenIndex in the range of input
329436
* compared to the tokenIndex in the whole input
330437
*/
331-
tokenIndexOffset = startStatement?.start?.tokenIndex ?? 0;
438+
tokenIndexOffset = startToken?.tokenIndex ?? 0;
332439
caretTokenIndex = caretTokenIndex - tokenIndexOffset;
333440

334441
/**

src/parser/flink/index.ts

+2
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ export class FlinkSQL extends BasicSQL<FlinkSqlLexer, ProgramContext, FlinkSqlPa
3535
FlinkSqlParser.RULE_columnNameCreate,
3636
]);
3737

38+
protected statementStartKeywords: Set<string> = new Set(['SELECT', 'INSERT']);
39+
3840
protected get splitListener() {
3941
return new FlinkSqlSplitListener();
4042
}

src/parser/hive/index.ts

+2
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ export class HiveSQL extends BasicSQL<HiveSqlLexer, ProgramContext, HiveSqlParse
3636
HiveSqlParser.RULE_columnNameCreate,
3737
]);
3838

39+
protected statementStartKeywords: Set<string> = new Set(['SELECT', 'INSERT']);
40+
3941
protected get splitListener() {
4042
return new HiveSqlSplitListener();
4143
}

src/parser/impala/index.ts

+2
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ export class ImpalaSQL extends BasicSQL<ImpalaSqlLexer, ProgramContext, ImpalaSq
3434
ImpalaSqlParser.RULE_columnNamePath,
3535
]);
3636

37+
protected statementStartKeywords: Set<string> = new Set(['SELECT', 'INSERT']);
38+
3739
protected get splitListener() {
3840
return new ImpalaSqlSplitListener();
3941
}

src/parser/mysql/index.ts

+2
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ export class MySQL extends BasicSQL<MySqlLexer, ProgramContext, MySqlParser> {
3434
MySqlParser.RULE_columnNameCreate,
3535
]);
3636

37+
protected statementStartKeywords: Set<string> = new Set(['SELECT', 'INSERT']);
38+
3739
protected get splitListener() {
3840
return new MysqlSplitListener();
3941
}

src/parser/postgresql/index.ts

+2
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ export class PostgreSQL extends BasicSQL<PostgreSqlLexer, ProgramContext, Postgr
3939
PostgreSqlParser.RULE_column_name, // column name
4040
]);
4141

42+
protected statementStartKeywords: Set<string> = new Set(['SELECT', 'INSERT']);
43+
4244
protected get splitListener() {
4345
return new PostgreSqlSplitListener();
4446
}

src/parser/spark/index.ts

+2
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ export class SparkSQL extends BasicSQL<SparkSqlLexer, ProgramContext, SparkSqlPa
3434
SparkSqlParser.RULE_columnNameCreate,
3535
]);
3636

37+
protected statementStartKeywords: Set<string> = new Set(['SELECT', 'INSERT']);
38+
3739
protected get splitListener() {
3840
return new SparkSqlSplitListener();
3941
}

src/parser/trino/index.ts

+2
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ export class TrinoSQL extends BasicSQL<TrinoSqlLexer, ProgramContext, TrinoSqlPa
2121
return new TrinoSqlParser(tokenStream);
2222
}
2323

24+
protected statementStartKeywords: Set<string> = new Set(['SELECT', 'INSERT']);
25+
2426
protected get splitListener() {
2527
return new TrinoSqlSplitListener();
2628
}

0 commit comments

Comments
 (0)