feat: add splitInputBySeparator

liuxy0551 · liuxy0551 · commit de3b760b75a0 · 2024-11-19T10:47:07.000+08:00
diff --git a/src/parser/common/basicSQL.ts b/src/parser/common/basicSQL.ts
@@ -21,6 +21,8 @@ import type { SplitListener } from './splitListener';
 import type { EntityCollector } from './entityCollector';
 import { EntityContext } from './entityCollector';
 
+const SEPARATOR: string = ';';
+
 /**
  * Basic SQL class, every sql needs extends it.
  */
@@ -65,13 +67,11 @@ export abstract class BasicSQL<
      * @param candidates candidate list
      * @param allTokens all tokens from input
      * @param caretTokenIndex tokenIndex of caretPosition
-     * @param tokenIndexOffset offset of the tokenIndex in the candidates compared to the tokenIndex in allTokens
      */
     protected abstract processCandidates(
         candidates: CandidatesCollection,
         allTokens: Token[],
-        caretTokenIndex: number,
-        tokenIndexOffset: number
+        caretTokenIndex: number
     ): Suggestions<Token>;
 
     /**
@@ -251,6 +251,78 @@ export abstract class BasicSQL<
         return res;
     }
 
+    /**
+     * Get the smaller range of input
+     * @param input string
+     * @param allTokens all tokens from input
+     * @param tokenIndexOffset offset of the tokenIndex in the range of input
+     * @param caretTokenIndex tokenIndex of caretPosition
+     * @returns inputSlice: string, caretTokenIndex: number
+     */
+    private splitInputBySeparator(
+        input: string,
+        allTokens: Token[],
+        tokenIndexOffset: number,
+        caretTokenIndex: number
+    ): { inputSlice: string; allTokens: Token[]; caretTokenIndex: number } {
+        const tokens = allTokens.slice(tokenIndexOffset);
+        /**
+         * Set startToken
+         */
+        let startToken: Token | null = null;
+        for (let tokenIndex = caretTokenIndex - tokenIndexOffset; tokenIndex >= 0; tokenIndex--) {
+            const token = tokens[tokenIndex];
+            if (token?.text === SEPARATOR) {
+                startToken = tokens[tokenIndex + 1];
+                break;
+            }
+        }
+        if (startToken === null) {
+            startToken = tokens[0];
+        }
+
+        /**
+         * Set stopToken
+         */
+        let stopToken: Token | null = null;
+        for (
+            let tokenIndex = caretTokenIndex - tokenIndexOffset;
+            tokenIndex < tokens.length;
+            tokenIndex++
+        ) {
+            const token = tokens[tokenIndex];
+            if (token?.text === SEPARATOR) {
+                stopToken = token;
+                break;
+            }
+        }
+        if (stopToken === null) {
+            stopToken = tokens[tokens.length - 1];
+        }
+
+        const indexOffset = tokens[0].start;
+        let startIndex = startToken.start - indexOffset;
+        let stopIndex = stopToken.stop + 1 - indexOffset;
+
+        /**
+         * Save offset of the tokenIndex in the range of input
+         * compared to the tokenIndex in the whole input
+         */
+        const _tokenIndexOffset = startToken.tokenIndex;
+        const _caretTokenIndex = caretTokenIndex - _tokenIndexOffset;
+
+        /**
+         * Get the smaller range of _input
+         */
+        const _input = input.slice(startIndex, stopIndex);
+
+        return {
+            inputSlice: _input,
+            allTokens: allTokens.slice(_tokenIndexOffset),
+            caretTokenIndex: _caretTokenIndex,
+        };
+    }
+
     /**
      * Get suggestions of syntax and token at caretPosition
      * @param input source string
@@ -262,12 +334,13 @@ export abstract class BasicSQL<
         caretPosition: CaretPosition
     ): Suggestions | null {
         const splitListener = this.splitListener;
+        let inputSlice = input;
 
-        this.parseWithCache(input);
+        this.parseWithCache(inputSlice);
         if (!this._parseTree) return null;
 
         let sqlParserIns = this._parser;
-        const allTokens = this.getAllTokens(input);
+        let allTokens = this.getAllTokens(inputSlice);
         let caretTokenIndex = findCaretTokenIndex(caretPosition, allTokens);
         let c3Context: ParserRuleContext = this._parseTree;
         let tokenIndexOffset: number = 0;
@@ -321,22 +394,43 @@ export abstract class BasicSQL<
             }
 
             // A boundary consisting of the index of the input.
-            const startIndex = startStatement?.start?.start ?? 0;
-            const stopIndex = stopStatement?.stop?.stop ?? input.length - 1;
+            let startIndex = startStatement?.start?.start ?? 0;
+            let stopIndex = stopStatement?.stop?.stop ?? inputSlice.length - 1;
 
             /**
              * Save offset of the tokenIndex in the range of input
              * compared to the tokenIndex in the whole input
              */
             tokenIndexOffset = startStatement?.start?.tokenIndex ?? 0;
-            caretTokenIndex = caretTokenIndex - tokenIndexOffset;
+            inputSlice = inputSlice.slice(startIndex, stopIndex);
+        }
 
-            /**
-             * Reparse the input fragment，
-             * and c3 will collect candidates in the newly generated parseTree.
-             */
-            const inputSlice = input.slice(startIndex, stopIndex);
+        /**
+         * Split the inputSlice by separator to get the smaller range of inputSlice.
+         */
+        if (inputSlice.includes(SEPARATOR)) {
+            const {
+                inputSlice: _input,
+                allTokens: _allTokens,
+                caretTokenIndex: _caretTokenIndex,
+            } = this.splitInputBySeparator(
+                inputSlice,
+                allTokens,
+                tokenIndexOffset,
+                caretTokenIndex
+            );
+
+            allTokens = _allTokens;
+            caretTokenIndex = _caretTokenIndex;
+            inputSlice = _input;
+        } else {
+            caretTokenIndex = caretTokenIndex - tokenIndexOffset;
+        }
 
+        /**
+         * Reparse the input fragment, and c3 will collect candidates in the newly generated parseTree when input changed.
+         */
+        if (inputSlice !== input) {
             const lexer = this.createLexer(inputSlice);
             lexer.removeErrorListeners();
             const tokenStream = new CommonTokenStream(lexer);
@@ -356,12 +450,7 @@ export abstract class BasicSQL<
         core.preferredRules = this.preferredRules;
 
         const candidates = core.collectCandidates(caretTokenIndex, c3Context);
-        const originalSuggestions = this.processCandidates(
-            candidates,
-            allTokens,
-            caretTokenIndex,
-            tokenIndexOffset
-        );
+        const originalSuggestions = this.processCandidates(candidates, allTokens, caretTokenIndex);
 
         const syntaxSuggestions: SyntaxSuggestion<WordRange>[] = originalSuggestions.syntax.map(
             (syntaxCtx) => {
diff --git a/src/parser/flink/index.ts b/src/parser/flink/index.ts
@@ -50,19 +50,15 @@ export class FlinkSQL extends BasicSQL<FlinkSqlLexer, ProgramContext, FlinkSqlPa
     protected processCandidates(
         candidates: CandidatesCollection,
         allTokens: Token[],
-        caretTokenIndex: number,
-        tokenIndexOffset: number
+        caretTokenIndex: number
     ): Suggestions<Token> {
         const originalSyntaxSuggestions: SyntaxSuggestion<Token>[] = [];
         const keywords: string[] = [];
 
         for (let candidate of candidates.rules) {
             const [ruleType, candidateRule] = candidate;
-            const startTokenIndex = candidateRule.startTokenIndex + tokenIndexOffset;
-            const tokenRanges = allTokens.slice(
-                startTokenIndex,
-                caretTokenIndex + tokenIndexOffset + 1
-            );
+            const startTokenIndex = candidateRule.startTokenIndex;
+            const tokenRanges = allTokens.slice(startTokenIndex, caretTokenIndex + 1);
 
             let syntaxContextType: EntityContextType | StmtContextType | undefined = void 0;
             switch (ruleType) {
diff --git a/src/parser/hive/index.ts b/src/parser/hive/index.ts
@@ -51,18 +51,14 @@ export class HiveSQL extends BasicSQL<HiveSqlLexer, ProgramContext, HiveSqlParse
     protected processCandidates(
         candidates: CandidatesCollection,
         allTokens: Token[],
-        caretTokenIndex: number,
-        tokenIndexOffset: number
+        caretTokenIndex: number
     ): Suggestions<Token> {
         const originalSyntaxSuggestions: SyntaxSuggestion<Token>[] = [];
         const keywords: string[] = [];
         for (let candidate of candidates.rules) {
             const [ruleType, candidateRule] = candidate;
-            const startTokenIndex = candidateRule.startTokenIndex + tokenIndexOffset;
-            const tokenRanges = allTokens.slice(
-                startTokenIndex,
-                caretTokenIndex + tokenIndexOffset + 1
-            );
+            const startTokenIndex = candidateRule.startTokenIndex;
+            const tokenRanges = allTokens.slice(startTokenIndex, caretTokenIndex + 1);
 
             let syntaxContextType: EntityContextType | StmtContextType | undefined = void 0;
             switch (ruleType) {
diff --git a/src/parser/impala/index.ts b/src/parser/impala/index.ts
@@ -49,18 +49,14 @@ export class ImpalaSQL extends BasicSQL<ImpalaSqlLexer, ProgramContext, ImpalaSq
     protected processCandidates(
         candidates: CandidatesCollection,
         allTokens: Token[],
-        caretTokenIndex: number,
-        tokenIndexOffset: number
+        caretTokenIndex: number
     ): Suggestions<Token> {
         const originalSyntaxSuggestions: SyntaxSuggestion<Token>[] = [];
         const keywords: string[] = [];
         for (let candidate of candidates.rules) {
             const [ruleType, candidateRule] = candidate;
-            const startTokenIndex = candidateRule.startTokenIndex + tokenIndexOffset;
-            const tokenRanges = allTokens.slice(
-                startTokenIndex,
-                caretTokenIndex + tokenIndexOffset + 1
-            );
+            const startTokenIndex = candidateRule.startTokenIndex;
+            const tokenRanges = allTokens.slice(startTokenIndex, caretTokenIndex + 1);
 
             let syntaxContextType: EntityContextType | StmtContextType | undefined = void 0;
             switch (ruleType) {
diff --git a/src/parser/mysql/index.ts b/src/parser/mysql/index.ts
@@ -49,19 +49,15 @@ export class MySQL extends BasicSQL<MySqlLexer, ProgramContext, MySqlParser> {
     protected processCandidates(
         candidates: CandidatesCollection,
         allTokens: Token[],
-        caretTokenIndex: number,
-        tokenIndexOffset: number
+        caretTokenIndex: number
     ): Suggestions<Token> {
         const originalSyntaxSuggestions: SyntaxSuggestion<Token>[] = [];
         const keywords: string[] = [];
 
         for (const candidate of candidates.rules) {
             const [ruleType, candidateRule] = candidate;
-            const startTokenIndex = candidateRule.startTokenIndex + tokenIndexOffset;
-            const tokenRanges = allTokens.slice(
-                startTokenIndex,
-                caretTokenIndex + tokenIndexOffset + 1
-            );
+            const startTokenIndex = candidateRule.startTokenIndex;
+            const tokenRanges = allTokens.slice(startTokenIndex, caretTokenIndex + 1);
 
             let syntaxContextType: EntityContextType | StmtContextType | undefined = void 0;
             switch (ruleType) {
diff --git a/src/parser/postgresql/index.ts b/src/parser/postgresql/index.ts
@@ -54,18 +54,14 @@ export class PostgreSQL extends BasicSQL<PostgreSqlLexer, ProgramContext, Postgr
     protected processCandidates(
         candidates: CandidatesCollection,
         allTokens: Token[],
-        caretTokenIndex: number,
-        tokenIndexOffset: number
+        caretTokenIndex: number
     ): Suggestions<Token> {
         const originalSyntaxSuggestions: SyntaxSuggestion<Token>[] = [];
         const keywords: string[] = [];
         for (let candidate of candidates.rules) {
             const [ruleType, candidateRule] = candidate;
-            const startTokenIndex = candidateRule.startTokenIndex + tokenIndexOffset;
-            const tokenRanges = allTokens.slice(
-                startTokenIndex,
-                caretTokenIndex + tokenIndexOffset + 1
-            );
+            const startTokenIndex = candidateRule.startTokenIndex;
+            const tokenRanges = allTokens.slice(startTokenIndex, caretTokenIndex + 1);
 
             let syntaxContextType: EntityContextType | StmtContextType | undefined = void 0;
             switch (ruleType) {
diff --git a/src/parser/spark/index.ts b/src/parser/spark/index.ts
@@ -49,19 +49,15 @@ export class SparkSQL extends BasicSQL<SparkSqlLexer, ProgramContext, SparkSqlPa
     protected processCandidates(
         candidates: CandidatesCollection,
         allTokens: Token[],
-        caretTokenIndex: number,
-        tokenIndexOffset: number
+        caretTokenIndex: number
     ): Suggestions<Token> {
         const originalSyntaxSuggestions: SyntaxSuggestion<Token>[] = [];
         const keywords: string[] = [];
 
         for (const candidate of candidates.rules) {
             const [ruleType, candidateRule] = candidate;
-            const startTokenIndex = candidateRule.startTokenIndex + tokenIndexOffset;
-            const tokenRanges = allTokens.slice(
-                startTokenIndex,
-                caretTokenIndex + tokenIndexOffset + 1
-            );
+            const startTokenIndex = candidateRule.startTokenIndex;
+            const tokenRanges = allTokens.slice(startTokenIndex, caretTokenIndex + 1);
 
             let syntaxContextType: EntityContextType | StmtContextType | undefined = void 0;
             switch (ruleType) {
diff --git a/src/parser/trino/index.ts b/src/parser/trino/index.ts
@@ -51,19 +51,15 @@ export class TrinoSQL extends BasicSQL<TrinoSqlLexer, ProgramContext, TrinoSqlPa
     protected processCandidates(
         candidates: CandidatesCollection,
         allTokens: Token[],
-        caretTokenIndex: number,
-        tokenIndexOffset: number
+        caretTokenIndex: number
     ): Suggestions<Token> {
         const originalSyntaxSuggestions: SyntaxSuggestion<Token>[] = [];
         const keywords: string[] = [];
 
         for (let candidate of candidates.rules) {
             const [ruleType, candidateRule] = candidate;
-            const startTokenIndex = candidateRule.startTokenIndex + tokenIndexOffset;
-            const tokenRanges = allTokens.slice(
-                startTokenIndex,
-                caretTokenIndex + tokenIndexOffset + 1
-            );
+            const startTokenIndex = candidateRule.startTokenIndex;
+            const tokenRanges = allTokens.slice(startTokenIndex, caretTokenIndex + 1);
 
             let syntaxContextType: EntityContextType | StmtContextType | undefined = void 0;
             switch (ruleType) {