Skip to content

Commit 58a65fe

Browse files
committed
feat: add sqlSplitStrategy options
1 parent 2f5e9e3 commit 58a65fe

File tree

24 files changed

+367
-38
lines changed

24 files changed

+367
-38
lines changed

src/parser/common/basicSQL.ts

+17-4
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,13 @@ import { CandidatesCollection, CodeCompletionCore } from 'antlr4-c3';
1414
import { SQLParserBase } from '../../lib/SQLParserBase';
1515
import { findCaretTokenIndex } from './findCaretTokenIndex';
1616
import { ctxToText, tokenToWord, WordRange, TextSlice } from './textAndWord';
17-
import { CaretPosition, LOCALE_TYPE, Suggestions, SyntaxSuggestion } from './types';
17+
import {
18+
CaretPosition,
19+
LOCALE_TYPE,
20+
SemanticCollectOptions,
21+
Suggestions,
22+
SyntaxSuggestion,
23+
} from './types';
1824
import { ParseError, ErrorListener } from './parseErrorListener';
1925
import { ErrorStrategy } from './errorStrategy';
2026
import type { SplitListener } from './splitListener';
@@ -101,7 +107,8 @@ export abstract class BasicSQL<
101107
protected abstract createSemanticContextCollector(
102108
input: string,
103109
caretPosition: CaretPosition,
104-
allTokens: Token[]
110+
allTokens: Token[],
111+
options?: SemanticCollectOptions
105112
): SemanticContextCollector;
106113

107114
/**
@@ -466,15 +473,21 @@ export abstract class BasicSQL<
466473
* Get semantic context infos
467474
* @param input source string
468475
* @param caretPosition caret position, such as cursor position
476+
* @param options semantic context options
469477
* @returns analyzed semantic context
470478
*/
471-
public getSemanticContextAtCaretPosition(input: string, caretPosition: CaretPosition) {
479+
public getSemanticContextAtCaretPosition(
480+
input: string,
481+
caretPosition: CaretPosition,
482+
options?: SemanticCollectOptions
483+
) {
472484
const allTokens = this.getAllTokens(input);
473485
const parseTree = this.parseWithCache(input);
474486
const statementContextListener = this.createSemanticContextCollector(
475487
input,
476488
caretPosition,
477-
allTokens
489+
allTokens,
490+
options
478491
);
479492
this.listen(statementContextListener, parseTree);
480493

src/parser/common/semanticContextCollector.ts

+34-4
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,32 @@
11
import { ErrorNode, ParserRuleContext, TerminalNode, Token } from 'antlr4ng';
22
import { findCaretTokenIndex } from '../common/findCaretTokenIndex';
3-
import { CaretPosition, SemanticContext } from '../common/types';
3+
import {
4+
CaretPosition,
5+
SemanticCollectOptions,
6+
SemanticContext,
7+
SqlSplitStrategy,
8+
} from '../common/types';
49

510
export const SQL_SPLIT_SYMBOL_TEXT = ';';
611

712
abstract class SemanticContextCollector {
8-
constructor(_input: string, caretPosition: CaretPosition, allTokens: Token[]) {
13+
constructor(
14+
_input: string,
15+
caretPosition: CaretPosition,
16+
allTokens: Token[],
17+
options?: SemanticCollectOptions
18+
) {
919
// If caretPosition token is whiteSpace, tokenIndex may be undefined.
1020
const tokenIndex = findCaretTokenIndex(caretPosition, allTokens);
1121

1222
if (tokenIndex !== undefined) {
1323
this._tokenIndex = tokenIndex;
1424
}
1525
this._allTokens = allTokens;
26+
this.options = {
27+
...this.options,
28+
...options,
29+
};
1630

1731
if (allTokens?.length) {
1832
let i = tokenIndex ? tokenIndex - 1 : allTokens.length - 1;
@@ -50,6 +64,10 @@ abstract class SemanticContextCollector {
5064
}
5165
}
5266

67+
public readonly options: SemanticCollectOptions = {
68+
sqlSplitStrategy: SqlSplitStrategy.LOOSE,
69+
};
70+
5371
private _tokenIndex: number;
5472
private _allTokens: Token[] = [];
5573

@@ -117,6 +135,8 @@ abstract class SemanticContextCollector {
117135
* It should be called in each language's own `enterStatement`.
118136
*/
119137
protected visitStatement(ctx: ParserRuleContext) {
138+
if (this.options.sqlSplitStrategy === SqlSplitStrategy.STRICT) return;
139+
120140
const isWhiteSpaceToken =
121141
this._tokenIndex === undefined ||
122142
this._allTokens[this._tokenIndex]?.type === this.getWhiteSpaceRuleType() ||
@@ -135,7 +155,12 @@ abstract class SemanticContextCollector {
135155
* Uncomplete keyword will be error node
136156
*/
137157
visitErrorNode(node: ErrorNode): void {
138-
if (node.symbol.tokenIndex !== this._tokenIndex || this._isNewStatement) return;
158+
if (
159+
node.symbol.tokenIndex !== this._tokenIndex ||
160+
this._isNewStatement ||
161+
this.options.sqlSplitStrategy === SqlSplitStrategy.STRICT
162+
)
163+
return;
139164

140165
let parent: ParserRuleContext | null = node.parent as ParserRuleContext;
141166
let currentNode: TerminalNode | ParserRuleContext = node;
@@ -188,7 +213,12 @@ abstract class SemanticContextCollector {
188213
}
189214

190215
visitTerminal(node: TerminalNode): void {
191-
if (node.symbol.tokenIndex !== this._tokenIndex || this._isNewStatement) return;
216+
if (
217+
node.symbol.tokenIndex !== this._tokenIndex ||
218+
this._isNewStatement ||
219+
this.options.sqlSplitStrategy === SqlSplitStrategy.STRICT
220+
)
221+
return;
192222

193223
let currentNode: TerminalNode | ParserRuleContext = node;
194224
let parent = node.parent as ParserRuleContext | null;

src/parser/common/types.ts

+24
Original file line numberDiff line numberDiff line change
@@ -73,3 +73,27 @@ export type LOCALE_TYPE = 'zh_CN' | 'en_US';
7373
export interface SemanticContext {
7474
isNewStatement: boolean;
7575
}
76+
77+
export enum SqlSplitStrategy {
78+
/** Only end the statement with semicolon symbol */
79+
STRICT,
80+
/** Based on parse tree to split statements */
81+
LOOSE,
82+
}
83+
84+
export interface SemanticCollectOptions {
85+
/**
86+
* `sqlSplitStrategy` will affects the result of `isNewStatement`;
87+
*
88+
* For example:
89+
*
90+
* The sql is "select id from t1 create\<cart_position\>"
91+
*
92+
* - `SqlSplitStrategy.STRICT`: split symbol `;` is missing after select statement so that it considerd as one statement, and `isNewStatement` is false
93+
*
94+
* - `SqlSplitStrategy.LOOSE`: in parse tree, it will parse to "select id from t1" and "create" two single statement, so `isNewStatement` is true
95+
*
96+
* @default SqlSplitStrategy.STRICT
97+
*/
98+
sqlSplitStrategy?: SqlSplitStrategy;
99+
}

src/parser/flink/index.ts

+10-3
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,13 @@ import { CharStream, CommonTokenStream, Token } from 'antlr4ng';
22
import { CandidatesCollection } from 'antlr4-c3';
33
import { FlinkSqlLexer } from '../../lib/flink/FlinkSqlLexer';
44
import { FlinkSqlParser, ProgramContext } from '../../lib/flink/FlinkSqlParser';
5-
import { CaretPosition, EntityContextType, Suggestions, SyntaxSuggestion } from '../common/types';
5+
import {
6+
CaretPosition,
7+
EntityContextType,
8+
SemanticCollectOptions,
9+
Suggestions,
10+
SyntaxSuggestion,
11+
} from '../common/types';
612
import { BasicSQL } from '../common/basicSQL';
713
import { StmtContextType } from '../common/entityCollector';
814
import { FlinkSqlSplitListener } from './flinkSplitListener';
@@ -52,9 +58,10 @@ export class FlinkSQL extends BasicSQL<FlinkSqlLexer, ProgramContext, FlinkSqlPa
5258
protected createSemanticContextCollector(
5359
input: string,
5460
caretPosition: CaretPosition,
55-
allTokens: Token[]
61+
allTokens: Token[],
62+
options?: SemanticCollectOptions
5663
) {
57-
return new FlinkSemanticContextCollector(input, caretPosition, allTokens);
64+
return new FlinkSemanticContextCollector(input, caretPosition, allTokens, options);
5865
}
5966

6067
protected processCandidates(

src/parser/hive/index.ts

+10-3
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,13 @@ import { HiveSqlLexer } from '../../lib/hive/HiveSqlLexer';
44
import { HiveSqlParser, ProgramContext } from '../../lib/hive/HiveSqlParser';
55
import { BasicSQL } from '../common/basicSQL';
66

7-
import { CaretPosition, EntityContextType, Suggestions, SyntaxSuggestion } from '../common/types';
7+
import {
8+
CaretPosition,
9+
EntityContextType,
10+
SemanticCollectOptions,
11+
Suggestions,
12+
SyntaxSuggestion,
13+
} from '../common/types';
814
import { StmtContextType } from '../common/entityCollector';
915
import { HiveSqlSplitListener } from './hiveSplitListener';
1016
import { HiveEntityCollector } from './hiveEntityCollector';
@@ -53,9 +59,10 @@ export class HiveSQL extends BasicSQL<HiveSqlLexer, ProgramContext, HiveSqlParse
5359
protected createSemanticContextCollector(
5460
input: string,
5561
caretPosition: CaretPosition,
56-
allTokens: Token[]
62+
allTokens: Token[],
63+
options?: SemanticCollectOptions
5764
) {
58-
return new HiveSemanticContextCollector(input, caretPosition, allTokens);
65+
return new HiveSemanticContextCollector(input, caretPosition, allTokens, options);
5966
}
6067

6168
protected processCandidates(

src/parser/impala/index.ts

+10-3
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,13 @@ import { CandidatesCollection } from 'antlr4-c3';
33
import { ImpalaSqlLexer } from '../../lib/impala/ImpalaSqlLexer';
44
import { ImpalaSqlParser, ProgramContext } from '../../lib/impala/ImpalaSqlParser';
55
import { BasicSQL } from '../common/basicSQL';
6-
import { CaretPosition, EntityContextType, Suggestions, SyntaxSuggestion } from '../common/types';
6+
import {
7+
CaretPosition,
8+
EntityContextType,
9+
SemanticCollectOptions,
10+
Suggestions,
11+
SyntaxSuggestion,
12+
} from '../common/types';
713
import { StmtContextType } from '../common/entityCollector';
814
import { ImpalaSqlSplitListener } from './impalaSplitListener';
915
import { ImpalaEntityCollector } from './impalaEntityCollector';
@@ -51,9 +57,10 @@ export class ImpalaSQL extends BasicSQL<ImpalaSqlLexer, ProgramContext, ImpalaSq
5157
protected createSemanticContextCollector(
5258
input: string,
5359
caretPosition: CaretPosition,
54-
allTokens: Token[]
60+
allTokens: Token[],
61+
options?: SemanticCollectOptions
5562
) {
56-
return new ImpalaSemanticContextCollector(input, caretPosition, allTokens);
63+
return new ImpalaSemanticContextCollector(input, caretPosition, allTokens, options);
5764
}
5865

5966
protected processCandidates(

src/parser/mysql/index.ts

+10-3
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,13 @@ import { CandidatesCollection } from 'antlr4-c3';
33
import { MySqlLexer } from '../../lib/mysql/MySqlLexer';
44
import { MySqlParser, ProgramContext } from '../../lib/mysql/MySqlParser';
55
import { BasicSQL } from '../common/basicSQL';
6-
import { Suggestions, EntityContextType, SyntaxSuggestion, CaretPosition } from '../common/types';
6+
import {
7+
Suggestions,
8+
EntityContextType,
9+
SyntaxSuggestion,
10+
CaretPosition,
11+
SemanticCollectOptions,
12+
} from '../common/types';
713
import { StmtContextType } from '../common/entityCollector';
814
import { MysqlSplitListener } from './mysqlSplitListener';
915
import { MySqlEntityCollector } from './mysqlEntityCollector';
@@ -51,9 +57,10 @@ export class MySQL extends BasicSQL<MySqlLexer, ProgramContext, MySqlParser> {
5157
protected createSemanticContextCollector(
5258
input: string,
5359
caretPosition: CaretPosition,
54-
allTokens: Token[]
60+
allTokens: Token[],
61+
options?: SemanticCollectOptions
5562
) {
56-
return new MySqlSemanticContextCollector(input, caretPosition, allTokens);
63+
return new MySqlSemanticContextCollector(input, caretPosition, allTokens, options);
5764
}
5865

5966
protected processCandidates(

src/parser/postgresql/index.ts

+10-3
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,13 @@ import { CharStream, CommonTokenStream, Token } from 'antlr4ng';
33

44
import { PostgreSqlLexer } from '../../lib/postgresql/PostgreSqlLexer';
55
import { PostgreSqlParser, ProgramContext } from '../../lib/postgresql/PostgreSqlParser';
6-
import { CaretPosition, EntityContextType, Suggestions, SyntaxSuggestion } from '../common/types';
6+
import {
7+
CaretPosition,
8+
EntityContextType,
9+
SemanticCollectOptions,
10+
Suggestions,
11+
SyntaxSuggestion,
12+
} from '../common/types';
713
import { BasicSQL } from '../common/basicSQL';
814
import { StmtContextType } from '../common/entityCollector';
915
import { PostgreSqlEntityCollector } from './postgreEntityCollector';
@@ -56,9 +62,10 @@ export class PostgreSQL extends BasicSQL<PostgreSqlLexer, ProgramContext, Postgr
5662
protected createSemanticContextCollector(
5763
input: string,
5864
caretPosition: CaretPosition,
59-
allTokens: Token[]
65+
allTokens: Token[],
66+
options?: SemanticCollectOptions
6067
) {
61-
return new PostgreSemanticContextCollector(input, caretPosition, allTokens);
68+
return new PostgreSemanticContextCollector(input, caretPosition, allTokens, options);
6269
}
6370

6471
protected processCandidates(

src/parser/spark/index.ts

+10-3
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,13 @@ import { CandidatesCollection } from 'antlr4-c3';
33
import { SparkSqlLexer } from '../../lib/spark/SparkSqlLexer';
44
import { SparkSqlParser, ProgramContext } from '../../lib/spark/SparkSqlParser';
55
import { BasicSQL } from '../common/basicSQL';
6-
import { Suggestions, EntityContextType, SyntaxSuggestion, CaretPosition } from '../common/types';
6+
import {
7+
Suggestions,
8+
EntityContextType,
9+
SyntaxSuggestion,
10+
CaretPosition,
11+
SemanticCollectOptions,
12+
} from '../common/types';
713
import { StmtContextType } from '../common/entityCollector';
814
import { SparkSqlSplitListener } from './sparkSplitListener';
915
import { SparkEntityCollector } from './sparkEntityCollector';
@@ -51,9 +57,10 @@ export class SparkSQL extends BasicSQL<SparkSqlLexer, ProgramContext, SparkSqlPa
5157
protected createSemanticContextCollector(
5258
input: string,
5359
caretPosition: CaretPosition,
54-
allTokens: Token[]
60+
allTokens: Token[],
61+
options?: SemanticCollectOptions
5562
) {
56-
return new SparkSemanticContextCollector(input, caretPosition, allTokens);
63+
return new SparkSemanticContextCollector(input, caretPosition, allTokens, options);
5764
}
5865

5966
protected processCandidates(

src/parser/trino/index.ts

+10-3
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,13 @@ import { CandidatesCollection } from 'antlr4-c3';
33
import { TrinoSqlLexer } from '../../lib/trino/TrinoSqlLexer';
44
import { TrinoSqlParser, ProgramContext } from '../../lib/trino/TrinoSqlParser';
55
import { BasicSQL } from '../common/basicSQL';
6-
import { Suggestions, EntityContextType, SyntaxSuggestion, CaretPosition } from '../common/types';
6+
import {
7+
Suggestions,
8+
EntityContextType,
9+
SyntaxSuggestion,
10+
CaretPosition,
11+
SemanticCollectOptions,
12+
} from '../common/types';
713
import { StmtContextType } from '../common/entityCollector';
814
import { TrinoSqlSplitListener } from './trinoSplitListener';
915
import { TrinoEntityCollector } from './trinoEntityCollector';
@@ -38,9 +44,10 @@ export class TrinoSQL extends BasicSQL<TrinoSqlLexer, ProgramContext, TrinoSqlPa
3844
protected createSemanticContextCollector(
3945
input: string,
4046
caretPosition: CaretPosition,
41-
allTokens: Token[]
47+
allTokens: Token[],
48+
options?: SemanticCollectOptions
4249
) {
43-
return new TrinoSemanticContextCollector(input, caretPosition, allTokens);
50+
return new TrinoSemanticContextCollector(input, caretPosition, allTokens, options);
4451
}
4552

4653
protected preferredRules: Set<number> = new Set([

test/parser/flink/contextCollect/fixtures/semantic.sql

+1-2
Original file line numberDiff line numberDiff line change
@@ -28,5 +28,4 @@ SEL
2828
INSERT INTO t1 VALUES(1);
2929

3030
CREATE TABLE a1(id INT) WITH ('connector' = 'kafka')
31-
CREATE VIEW
32-
INSERT INTO t1 VALUES(1);
31+
CREATE

0 commit comments

Comments
 (0)