diff --git a/src/main/scala/org/apache/spark/sql/OapExtensions.scala b/src/main/scala/org/apache/spark/sql/OapExtensions.scala index 78810cd1f16..1bee17d6f9e 100644 --- a/src/main/scala/org/apache/spark/sql/OapExtensions.scala +++ b/src/main/scala/org/apache/spark/sql/OapExtensions.scala @@ -29,6 +29,6 @@ class OapExtensions extends (SparkSessionExtensions => Unit) { extensions.injectPlannerStrategy(_ => OapGroupAggregateStrategy) extensions.injectPlannerStrategy(_ => OapFileSourceStrategy) // Oap Custom SqlParser. - extensions.injectParser((session, _) => new OapSparkSqlParser(session)) + extensions.injectParser((session, delegate) => new OapSparkSqlParser(session, delegate)) } } diff --git a/src/main/scala/org/apache/spark/sql/execution/OapSparkSqlParser.scala b/src/main/scala/org/apache/spark/sql/execution/OapSparkSqlParser.scala index 8ff5e699f94..e07f38d994d 100644 --- a/src/main/scala/org/apache/spark/sql/execution/OapSparkSqlParser.scala +++ b/src/main/scala/org/apache/spark/sql/execution/OapSparkSqlParser.scala @@ -17,32 +17,109 @@ package org.apache.spark.sql.execution -import org.apache.spark.sql.SparkSession -import org.apache.spark.sql.catalyst.parser._ -import org.apache.spark.sql.catalyst.parser.SqlBaseParser._ -import org.apache.spark.sql.catalyst.plans.logical._ +import scala.collection.JavaConverters._ + +import org.antlr.v4.runtime._ +import org.antlr.v4.runtime.atn.PredictionMode +import org.antlr.v4.runtime.misc.{Interval, ParseCancellationException} +import org.antlr.v4.runtime.tree._ + +import org.apache.spark.sql.{AnalysisException, SparkSession} +import org.apache.spark.sql.catalyst._ +import org.apache.spark.sql.catalyst.expressions.Expression +import org.apache.spark.sql.catalyst.parser.{OapSqlBaseBaseListener, OapSqlBaseBaseVisitor, OapSqlBaseLexer, OapSqlBaseParser, ParseErrorListener, ParseException, ParserInterface} +import org.apache.spark.sql.catalyst.parser.OapSqlBaseParser._ +import org.apache.spark.sql.catalyst.parser.ParserUtils._ +import org.apache.spark.sql.catalyst.parser.ParserUtils.{string, withOrigin} +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.catalyst.trees.Origin import org.apache.spark.sql.execution.datasources.oap.index._ -import org.apache.spark.sql.internal.{SQLConf, VariableSubstitution} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types._ /** - * Concrete parser for Spark SQL statements. + * A SQL parser that tries to parse OAP commands. If failing to parse the SQL text, it will + * forward the call to `delegate`. */ - class OapSparkSqlParser(session: SparkSession) extends AbstractSqlParser { - lazy val conf = session.sessionState.conf - lazy val astBuilder = new OapSparkSqlAstBuilder(conf) +class OapSparkSqlParser(session: SparkSession, delegate: ParserInterface) extends ParserInterface { + + lazy val conf: SQLConf = session.sessionState.conf + lazy val builder = new OapSqlBaseAstBuilder(conf) + + + override def parsePlan(sqlText: String): LogicalPlan = parse(sqlText) { parser => + builder.visitSingleStatement(parser.singleStatement()) match { + case plan: LogicalPlan => plan + case _ => delegate.parsePlan(sqlText) + } + } + // scalastyle:off line.size.limit + /** + * Fork from `org.apache.spark.sql.catalyst.parser.AbstractSqlParser#parse(java.lang.String, scala.Function1)`. + * + * @see https://github.com/apache/spark/blob/v2.4.4/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala#L81 + */ + // scalastyle:on + protected def parse[T](command: String)(toResult: OapSqlBaseParser => T): T = { + val lexer = new OapSqlBaseLexer( + new UpperCaseCharStream(CharStreams.fromString(command))) + lexer.removeErrorListeners() + lexer.addErrorListener(ParseErrorListener) - private lazy val substitutor = new VariableSubstitution(conf) + val tokenStream = new CommonTokenStream(lexer) + val parser = new OapSqlBaseParser(tokenStream) + parser.addParseListener(PostProcessor) + parser.removeErrorListeners() + parser.addErrorListener(ParseErrorListener) - protected override def parse[T](command: String)(toResult: SqlBaseParser => T): T = { - super.parse(substitutor.substitute(command))(toResult) + try { + try { + // first, try parsing with potentially faster SLL mode + parser.getInterpreter.setPredictionMode(PredictionMode.SLL) + toResult(parser) + } catch { + case e: ParseCancellationException => + // if we fail, parse with LL mode + tokenStream.seek(0) // rewind input stream + parser.reset() + + // Try Again. + parser.getInterpreter.setPredictionMode(PredictionMode.LL) + toResult(parser) + } + } catch { + case e: ParseException if e.command.isDefined => + throw e + case e: ParseException => + throw e.withCommand(command) + case e: AnalysisException => + val position = Origin(e.line, e.startPosition) + throw new ParseException(Option(command), e.message, position, position) + } } - } + + override def parseExpression(sqlText: String): Expression = delegate.parseExpression(sqlText) + + override def parseTableIdentifier(sqlText: String): TableIdentifier = + delegate.parseTableIdentifier(sqlText) + + override def parseFunctionIdentifier(sqlText: String): FunctionIdentifier = + delegate.parseFunctionIdentifier(sqlText) + + override def parseTableSchema(sqlText: String): StructType = delegate.parseTableSchema(sqlText) + + override def parseDataType(sqlText: String): DataType = delegate.parseDataType(sqlText) + +} /** - * Builder that converts an ANTLR ParseTree into a LogicalPlan/Expression/TableIdentifier. + * Define how to convert an AST generated from `OapSqlBase.g4` to a `LogicalPlan`. The parent + * class `OapSqlBaseBaseVisitor` defines all visitXXX methods generated from `#` instructions in + * `OapSqlBase.g4` (such as `#oapCreateIndex`). */ -class OapSparkSqlAstBuilder(conf: SQLConf) extends SparkSqlAstBuilder(conf) { - import org.apache.spark.sql.catalyst.parser.ParserUtils._ +class OapSqlBaseAstBuilder(conf: SQLConf) extends OapSqlBaseBaseVisitor[AnyRef] { + + override def visitPassThrough(ctx: PassThroughContext): LogicalPlan = null /** * Create an index. Create a [[CreateIndexCommand]] command. @@ -124,4 +201,132 @@ class OapSparkSqlAstBuilder(conf: SQLConf) extends SparkSqlAstBuilder(conf) { override def visitOapEnableIndex(ctx: OapEnableIndexContext): LogicalPlan = OapEnableIndexCommand(ctx.IDENTIFIER.getText) + override def visitTableIdentifier(ctx: TableIdentifierContext): TableIdentifier = withOrigin(ctx) { + TableIdentifier(ctx.table.getText, Option(ctx.db).map(_.getText)) + } + + protected def visitNonOptionalPartitionSpec( + ctx: PartitionSpecContext): Map[String, String] = withOrigin(ctx) { + visitPartitionSpec(ctx).map { + case (key, None) => throw new ParseException(s"Found an empty partition key '$key'.", ctx) + case (key, Some(value)) => key -> value + } + } + + override def visitPartitionSpec( + ctx: PartitionSpecContext): Map[String, Option[String]] = withOrigin(ctx) { + val parts = ctx.partitionVal.asScala.map { pVal => + val name = pVal.identifier.getText + val value = Option(pVal.constant).map(visitStringConstant) + name -> value + } + // Before calling `toMap`, we check duplicated keys to avoid silently ignore partition values + // in partition spec like PARTITION(a='1', b='2', a='3'). The real semantical check for + // partition columns will be done in analyzer. + checkDuplicateKeys(parts, ctx) + parts.toMap + } + + override def visitSingleStatement(ctx: SingleStatementContext): LogicalPlan = withOrigin(ctx) { + visit(ctx.statement).asInstanceOf[LogicalPlan] + } + + protected def visitStringConstant(ctx: ConstantContext): String = withOrigin(ctx) { + ctx match { + case s: StringLiteralContext => createString(s) + case o => o.getText + } + } + + private def createString(ctx: StringLiteralContext): String = { + if (conf.escapedStringLiterals) { + ctx.STRING().asScala.map(stringWithoutUnescape).mkString + } else { + ctx.STRING().asScala.map(string).mkString + } + } + + + protected def typedVisit[T](ctx: ParseTree): T = { + ctx.accept(this).asInstanceOf[T] + } +} + +// scalastyle:off line.size.limit +/** + * Fork from `org.apache.spark.sql.catalyst.parser.UpperCaseCharStream`. + * + * @see https://github.com/apache/spark/blob/v2.4.4/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala#L157 + */ +// scalastyle:on +class UpperCaseCharStream(wrapped: CodePointCharStream) extends CharStream { + override def consume(): Unit = wrapped.consume + override def getSourceName(): String = wrapped.getSourceName + override def index(): Int = wrapped.index + override def mark(): Int = wrapped.mark + override def release(marker: Int): Unit = wrapped.release(marker) + override def seek(where: Int): Unit = wrapped.seek(where) + override def size(): Int = wrapped.size + + override def getText(interval: Interval): String = { + // ANTLR 4.7's CodePointCharStream implementations have bugs when + // getText() is called with an empty stream, or intervals where + // the start > end. See + // https://github.com/antlr/antlr4/commit/ac9f7530 for one fix + // that is not yet in a released ANTLR artifact. + if (size() > 0 && (interval.b - interval.a >= 0)) { + wrapped.getText(interval) + } else { + "" + } + } + + override def LA(i: Int): Int = { + val la = wrapped.LA(i) + if (la == 0 || la == IntStream.EOF) la + else Character.toUpperCase(la) + } +} + +// scalastyle:off line.size.limit +/** + * Fork from `org.apache.spark.sql.catalyst.parser.PostProcessor`. + * + * @see https://github.com/apache/spark/blob/v2.4.4/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala#L248 + */ +// scalastyle:on +case object PostProcessor extends OapSqlBaseBaseListener { + + /** Remove the back ticks from an Identifier. */ + override def exitQuotedIdentifier(ctx: QuotedIdentifierContext): Unit = { + replaceTokenByIdentifier(ctx, 1) { token => + // Remove the double back ticks in the string. + token.setText(token.getText.replace("``", "`")) + token + } + } + + /** Treat non-reserved keywords as Identifiers. */ + override def exitNonReserved(ctx: NonReservedContext): Unit = { + replaceTokenByIdentifier(ctx, 0)(identity) + } + + private def replaceTokenByIdentifier( + ctx: ParserRuleContext, + stripMargins: Int)( + f: CommonToken => CommonToken = identity): Unit = { + val parent = ctx.getParent + parent.removeLastChild() + val token = ctx.getChild(0).getPayload.asInstanceOf[Token] + val newToken = new CommonToken( + new org.antlr.v4.runtime.misc.Pair(token.getTokenSource, token.getInputStream), + OapSqlBaseParser.IDENTIFIER, + token.getChannel, + token.getStartIndex + stripMargins, + token.getStopIndex - stripMargins) + parent.addChild(new TerminalNodeImpl(f(newToken))) + } } + + + diff --git a/src/main/spark2.4.4/antlr4/org/apache/spark/sql/catalyst/parser/OapSqlBase.g4 b/src/main/spark2.4.4/antlr4/org/apache/spark/sql/catalyst/parser/OapSqlBase.g4 new file mode 100644 index 00000000000..da76f4cfa46 --- /dev/null +++ b/src/main/spark2.4.4/antlr4/org/apache/spark/sql/catalyst/parser/OapSqlBase.g4 @@ -0,0 +1,230 @@ +grammar OapSqlBase; + +@members { + /** + * Verify whether current token is a valid decimal token (which contains dot). + * Returns true if the character that follows the token is not a digit or letter or underscore. + * + * For example: + * For char stream "2.3", "2." is not a valid decimal token, because it is followed by digit '3'. + * For char stream "2.3_", "2.3" is not a valid decimal token, because it is followed by '_'. + * For char stream "2.3W", "2.3" is not a valid decimal token, because it is followed by 'W'. + * For char stream "12.0D 34.E2+0.12 " 12.0D is a valid decimal token because it is folllowed + * by a space. 34.E2 is a valid decimal token because it is followed by symbol '+' + * which is not a digit or letter or underscore. + */ + public boolean isValidDecimal() { + int nextChar = _input.LA(1); + if (nextChar >= 'A' && nextChar <= 'Z' || nextChar >= '0' && nextChar <= '9' || + nextChar == '_') { + return false; + } else { + return true; + } + } +} + +singleStatement + : statement EOF + ; + +statement + : REFRESH SINDEX ON tableIdentifier partitionSpec? #oapRefreshIndices + | CREATE SINDEX (IF NOT EXISTS)? IDENTIFIER ON + tableIdentifier indexCols (USING indexType)? + partitionSpec? #oapCreateIndex + | DROP SINDEX (IF EXISTS)? IDENTIFIER ON tableIdentifier + partitionSpec? #oapDropIndex + | DISABLE SINDEX IDENTIFIER #oapDisableIndex + | ENABLE SINDEX IDENTIFIER #oapEnableIndex + | SHOW SINDEX (FROM | IN) tableIdentifier #oapShowIndex + | CHECK SINDEX ON tableIdentifier partitionSpec? #oapCheckIndex + | .*? #passThrough + ; + +tableIdentifier + : (db=identifier '.')? table=identifier + ; + +partitionSpec + : PARTITION '(' partitionVal (',' partitionVal)* ')' + ; + +partitionVal + : identifier (EQ constant)? + ; + +constant + : NULL #nullLiteral + | interval #intervalLiteral + | identifier STRING #typeConstructor + | number #numericLiteral + | booleanValue #booleanLiteral + | STRING+ #stringLiteral + ; + +indexCols + : '(' indexCol (',' indexCol)* ')' + ; + +indexCol + : identifier (ASC | DESC)? + ; + +indexType + : BTREE + | BITMAP + ; + +nonReserved + : CHECK + | DISABLE | ENABLE | TRUE | FALSE | REFRESH | CREATE | IF | NOT | EXISTS + | DROP | SHOW | FROM | IN | PARTITION | AS | ASC | DESC | INTERVAL | TO + ; + +interval + : INTERVAL intervalField* + ; + +intervalField + : value=intervalValue unit=identifier (TO to=identifier)? + ; + +intervalValue + : (PLUS | MINUS)? (INTEGER_VALUE | DECIMAL_VALUE) + | STRING + ; + +identifier + : IDENTIFIER #unquotedIdentifier + | quotedIdentifier #quotedIdentifierAlternative + | nonReserved #unquotedIdentifier + ; + +number + : MINUS? DECIMAL_VALUE #decimalLiteral + | MINUS? INTEGER_VALUE #integerLiteral + | MINUS? BIGINT_LITERAL #bigIntLiteral + | MINUS? SMALLINT_LITERAL #smallIntLiteral + | MINUS? TINYINT_LITERAL #tinyIntLiteral + | MINUS? DOUBLE_LITERAL #doubleLiteral + | MINUS? BIGDECIMAL_LITERAL #bigDecimalLiteral + ; +booleanValue + : TRUE | FALSE + ; + +quotedIdentifier + : BACKQUOTED_IDENTIFIER + ; + +DISABLE: 'DISABLE'; +ENABLE: 'ENABLE'; +CHECK: 'CHECK'; +SINDEX: 'SINDEX' | 'OINDEX'; +SINDICES: 'SINDICES' | 'OINDICES'; +BTREE: 'BTREE'; +BLOOM: 'BLOOM'; +BITMAP: 'BITMAP'; +NULL: 'NULL'; +TRUE: 'TRUE'; +FALSE: 'FALSE'; +ON: 'ON'; +REFRESH: 'REFRESH'; +CREATE: 'CREATE'; +IF: 'IF'; +NOT: 'NOT' | '!'; +EXISTS: 'EXISTS'; +USING: 'USING'; +DROP: 'DROP'; +SHOW: 'SHOW'; +FROM: 'FROM'; +IN: 'IN'; +PARTITION: 'PARTITION'; +AS: 'AS'; +ASC: 'ASC'; +DESC: 'DESC'; +INTERVAL: 'INTERVAL'; +TO: 'TO'; +PLUS: '+'; +MINUS: '-'; +EQ : '=' | '=='; + +STRING + : '\'' ( ~('\''|'\\') | ('\\' .) )* '\'' + | '"' ( ~('"'|'\\') | ('\\' .) )* '"' + ; + +BIGINT_LITERAL + : DIGIT+ 'L' + ; + +SMALLINT_LITERAL + : DIGIT+ 'S' + ; + +TINYINT_LITERAL + : DIGIT+ 'Y' + ; + +INTEGER_VALUE + : DIGIT+ + ; + +DECIMAL_VALUE + : DIGIT+ EXPONENT + | DECIMAL_DIGITS EXPONENT? {isValidDecimal()}? + ; + +DOUBLE_LITERAL + : DIGIT+ EXPONENT? 'D' + | DECIMAL_DIGITS EXPONENT? 'D' {isValidDecimal()}? + ; + +BIGDECIMAL_LITERAL + : DIGIT+ EXPONENT? 'BD' + | DECIMAL_DIGITS EXPONENT? 'BD' {isValidDecimal()}? + ; + +IDENTIFIER + : (LETTER | DIGIT | '_')+ + ; + +BACKQUOTED_IDENTIFIER + : '`' ( ~'`' | '``' )* '`' + ; + +fragment DECIMAL_DIGITS + : DIGIT+ '.' DIGIT* + | '.' DIGIT+ + ; + +fragment EXPONENT + : 'E' [+-]? DIGIT+ + ; + +fragment DIGIT + : [0-9] + ; + +fragment LETTER + : [A-Z] + ; + +SIMPLE_COMMENT + : '--' ~[\r\n]* '\r'? '\n'? -> channel(HIDDEN) + ; + +BRACKETED_COMMENT + : '/*' .*? '*/' -> channel(HIDDEN) + ; + +WS : [ \r\n\t]+ -> channel(HIDDEN) + ; + +// Catch-all for anything we can't recognize. +// We use this to be able to ignore and recover all the text +// when splitting statements with DelimiterLexer +UNRECOGNIZED + : . + ; diff --git a/src/main/spark2.4.4/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/src/main/spark2.4.4/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 deleted file mode 100644 index f93980f46c3..00000000000 --- a/src/main/spark2.4.4/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ /dev/null @@ -1,1130 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * This file is an adaptation of Presto's presto-parser/src/main/antlr4/com/facebook/presto/sql/parser/SqlBase.g4 grammar. - */ - -grammar SqlBase; - -@members { - /** - * When false, INTERSECT is given the greater precedence over the other set - * operations (UNION, EXCEPT and MINUS) as per the SQL standard. - */ - public boolean legacy_setops_precedence_enbled = false; - - /** - * Verify whether current token is a valid decimal token (which contains dot). - * Returns true if the character that follows the token is not a digit or letter or underscore. - * - * For example: - * For char stream "2.3", "2." is not a valid decimal token, because it is followed by digit '3'. - * For char stream "2.3_", "2.3" is not a valid decimal token, because it is followed by '_'. - * For char stream "2.3W", "2.3" is not a valid decimal token, because it is followed by 'W'. - * For char stream "12.0D 34.E2+0.12 " 12.0D is a valid decimal token because it is followed - * by a space. 34.E2 is a valid decimal token because it is followed by symbol '+' - * which is not a digit or letter or underscore. - */ - public boolean isValidDecimal() { - int nextChar = _input.LA(1); - if (nextChar >= 'A' && nextChar <= 'Z' || nextChar >= '0' && nextChar <= '9' || - nextChar == '_') { - return false; - } else { - return true; - } - } -} - -singleStatement - : statement EOF - ; - -singleExpression - : namedExpression EOF - ; - -singleTableIdentifier - : tableIdentifier EOF - ; - -singleFunctionIdentifier - : functionIdentifier EOF - ; - -singleDataType - : dataType EOF - ; - -singleTableSchema - : colTypeList EOF - ; - -statement - : query #statementDefault - | USE db=identifier #use - | CREATE DATABASE (IF NOT EXISTS)? identifier - (COMMENT comment=STRING)? locationSpec? - (WITH DBPROPERTIES tablePropertyList)? #createDatabase - | ALTER DATABASE identifier SET DBPROPERTIES tablePropertyList #setDatabaseProperties - | DROP DATABASE (IF EXISTS)? identifier (RESTRICT | CASCADE)? #dropDatabase - | createTableHeader ('(' colTypeList ')')? tableProvider - ((OPTIONS options=tablePropertyList) | - (PARTITIONED BY partitionColumnNames=identifierList) | - bucketSpec | - locationSpec | - (COMMENT comment=STRING) | - (TBLPROPERTIES tableProps=tablePropertyList))* - (AS? query)? #createTable - | createTableHeader ('(' columns=colTypeList ')')? - ((COMMENT comment=STRING) | - (PARTITIONED BY '(' partitionColumns=colTypeList ')') | - bucketSpec | - skewSpec | - rowFormat | - createFileFormat | - locationSpec | - (TBLPROPERTIES tableProps=tablePropertyList))* - (AS? query)? #createHiveTable - | CREATE TABLE (IF NOT EXISTS)? target=tableIdentifier - LIKE source=tableIdentifier locationSpec? #createTableLike - | ANALYZE TABLE tableIdentifier partitionSpec? COMPUTE STATISTICS - (identifier | FOR COLUMNS identifierSeq)? #analyze - | ALTER TABLE tableIdentifier - ADD COLUMNS '(' columns=colTypeList ')' #addTableColumns - | ALTER (TABLE | VIEW) from=tableIdentifier - RENAME TO to=tableIdentifier #renameTable - | ALTER (TABLE | VIEW) tableIdentifier - SET TBLPROPERTIES tablePropertyList #setTableProperties - | ALTER (TABLE | VIEW) tableIdentifier - UNSET TBLPROPERTIES (IF EXISTS)? tablePropertyList #unsetTableProperties - | ALTER TABLE tableIdentifier partitionSpec? - CHANGE COLUMN? identifier colType colPosition? #changeColumn - | ALTER TABLE tableIdentifier (partitionSpec)? - SET SERDE STRING (WITH SERDEPROPERTIES tablePropertyList)? #setTableSerDe - | ALTER TABLE tableIdentifier (partitionSpec)? - SET SERDEPROPERTIES tablePropertyList #setTableSerDe - | ALTER TABLE tableIdentifier ADD (IF NOT EXISTS)? - partitionSpecLocation+ #addTablePartition - | ALTER VIEW tableIdentifier ADD (IF NOT EXISTS)? - partitionSpec+ #addTablePartition - | ALTER TABLE tableIdentifier - from=partitionSpec RENAME TO to=partitionSpec #renameTablePartition - | ALTER TABLE tableIdentifier - DROP (IF EXISTS)? partitionSpec (',' partitionSpec)* PURGE? #dropTablePartitions - | ALTER VIEW tableIdentifier - DROP (IF EXISTS)? partitionSpec (',' partitionSpec)* #dropTablePartitions - | ALTER TABLE tableIdentifier partitionSpec? SET locationSpec #setTableLocation - | ALTER TABLE tableIdentifier RECOVER PARTITIONS #recoverPartitions - | DROP TABLE (IF EXISTS)? tableIdentifier PURGE? #dropTable - | DROP VIEW (IF EXISTS)? tableIdentifier #dropTable - | CREATE (OR REPLACE)? (GLOBAL? TEMPORARY)? - VIEW (IF NOT EXISTS)? tableIdentifier - identifierCommentList? (COMMENT STRING)? - (PARTITIONED ON identifierList)? - (TBLPROPERTIES tablePropertyList)? AS query #createView - | CREATE (OR REPLACE)? GLOBAL? TEMPORARY VIEW - tableIdentifier ('(' colTypeList ')')? tableProvider - (OPTIONS tablePropertyList)? #createTempViewUsing - | ALTER VIEW tableIdentifier AS? query #alterViewQuery - | CREATE (OR REPLACE)? TEMPORARY? FUNCTION (IF NOT EXISTS)? - qualifiedName AS className=STRING - (USING resource (',' resource)*)? #createFunction - | DROP TEMPORARY? FUNCTION (IF EXISTS)? qualifiedName #dropFunction - | EXPLAIN (LOGICAL | FORMATTED | EXTENDED | CODEGEN | COST)? - statement #explain - | SHOW TABLES ((FROM | IN) db=identifier)? - (LIKE? pattern=STRING)? #showTables - | SHOW TABLE EXTENDED ((FROM | IN) db=identifier)? - LIKE pattern=STRING partitionSpec? #showTable - | SHOW DATABASES (LIKE? pattern=STRING)? #showDatabases - | SHOW TBLPROPERTIES table=tableIdentifier - ('(' key=tablePropertyKey ')')? #showTblProperties - | SHOW COLUMNS (FROM | IN) tableIdentifier - ((FROM | IN) db=identifier)? #showColumns - | SHOW PARTITIONS tableIdentifier partitionSpec? #showPartitions - | SHOW identifier? FUNCTIONS - (LIKE? (qualifiedName | pattern=STRING))? #showFunctions - | SHOW CREATE TABLE tableIdentifier #showCreateTable - | (DESC | DESCRIBE) FUNCTION EXTENDED? describeFuncName #describeFunction - | (DESC | DESCRIBE) DATABASE EXTENDED? identifier #describeDatabase - | (DESC | DESCRIBE) TABLE? option=(EXTENDED | FORMATTED)? - tableIdentifier partitionSpec? describeColName? #describeTable - | REFRESH TABLE tableIdentifier #refreshTable - | REFRESH SINDEX ON tableIdentifier partitionSpec? #oapRefreshIndices - | REFRESH (STRING | .*?) #refreshResource - | CACHE LAZY? TABLE tableIdentifier (AS? query)? #cacheTable - | UNCACHE TABLE (IF EXISTS)? tableIdentifier #uncacheTable - | CLEAR CACHE #clearCache - | LOAD DATA LOCAL? INPATH path=STRING OVERWRITE? INTO TABLE - tableIdentifier partitionSpec? #loadData - | TRUNCATE TABLE tableIdentifier partitionSpec? #truncateTable - | MSCK REPAIR TABLE tableIdentifier #repairTable - | op=(ADD | LIST) identifier .*? #manageResource - | SET ROLE .*? #failNativeCommand - | SET .*? #setConfiguration - | RESET #resetConfiguration - | CREATE SINDEX (IF NOT EXISTS)? IDENTIFIER ON - tableIdentifier indexCols (USING indexType)? - partitionSpec? #oapCreateIndex - | DROP SINDEX (IF EXISTS)? IDENTIFIER ON tableIdentifier - partitionSpec? #oapDropIndex - | DISABLE SINDEX IDENTIFIER #oapDisableIndex - | ENABLE SINDEX IDENTIFIER #oapEnableIndex - | SHOW SINDEX (FROM | IN) tableIdentifier #oapShowIndex - | CHECK SINDEX ON tableIdentifier partitionSpec? #oapCheckIndex - | unsupportedHiveNativeCommands .*? #failNativeCommand - ; - -indexCols - : '(' indexCol (',' indexCol)* ')' - ; - -indexCol - : identifier (ASC | DESC)? - ; - -indexType - : BTREE - | BITMAP - ; - -unsupportedHiveNativeCommands - : kw1=CREATE kw2=ROLE - | kw1=DROP kw2=ROLE - | kw1=GRANT kw2=ROLE? - | kw1=REVOKE kw2=ROLE? - | kw1=SHOW kw2=GRANT - | kw1=SHOW kw2=ROLE kw3=GRANT? - | kw1=SHOW kw2=PRINCIPALS - | kw1=SHOW kw2=ROLES - | kw1=SHOW kw2=CURRENT kw3=ROLES - | kw1=EXPORT kw2=TABLE - | kw1=IMPORT kw2=TABLE - | kw1=SHOW kw2=COMPACTIONS - | kw1=SHOW kw2=CREATE kw3=TABLE - | kw1=SHOW kw2=TRANSACTIONS - | kw1=SHOW kw2=INDEXES - | kw1=SHOW kw2=LOCKS - | kw1=CREATE kw2=INDEX - | kw1=DROP kw2=INDEX - | kw1=ALTER kw2=INDEX - | kw1=LOCK kw2=TABLE - | kw1=LOCK kw2=DATABASE - | kw1=UNLOCK kw2=TABLE - | kw1=UNLOCK kw2=DATABASE - | kw1=CREATE kw2=TEMPORARY kw3=MACRO - | kw1=DROP kw2=TEMPORARY kw3=MACRO - | kw1=ALTER kw2=TABLE tableIdentifier kw3=NOT kw4=CLUSTERED - | kw1=ALTER kw2=TABLE tableIdentifier kw3=CLUSTERED kw4=BY - | kw1=ALTER kw2=TABLE tableIdentifier kw3=NOT kw4=SORTED - | kw1=ALTER kw2=TABLE tableIdentifier kw3=SKEWED kw4=BY - | kw1=ALTER kw2=TABLE tableIdentifier kw3=NOT kw4=SKEWED - | kw1=ALTER kw2=TABLE tableIdentifier kw3=NOT kw4=STORED kw5=AS kw6=DIRECTORIES - | kw1=ALTER kw2=TABLE tableIdentifier kw3=SET kw4=SKEWED kw5=LOCATION - | kw1=ALTER kw2=TABLE tableIdentifier kw3=EXCHANGE kw4=PARTITION - | kw1=ALTER kw2=TABLE tableIdentifier kw3=ARCHIVE kw4=PARTITION - | kw1=ALTER kw2=TABLE tableIdentifier kw3=UNARCHIVE kw4=PARTITION - | kw1=ALTER kw2=TABLE tableIdentifier kw3=TOUCH - | kw1=ALTER kw2=TABLE tableIdentifier partitionSpec? kw3=COMPACT - | kw1=ALTER kw2=TABLE tableIdentifier partitionSpec? kw3=CONCATENATE - | kw1=ALTER kw2=TABLE tableIdentifier partitionSpec? kw3=SET kw4=FILEFORMAT - | kw1=ALTER kw2=TABLE tableIdentifier partitionSpec? kw3=REPLACE kw4=COLUMNS - | kw1=START kw2=TRANSACTION - | kw1=COMMIT - | kw1=ROLLBACK - | kw1=DFS - | kw1=DELETE kw2=FROM - ; - -createTableHeader - : CREATE TEMPORARY? EXTERNAL? TABLE (IF NOT EXISTS)? tableIdentifier - ; - -bucketSpec - : CLUSTERED BY identifierList - (SORTED BY orderedIdentifierList)? - INTO INTEGER_VALUE BUCKETS - ; - -skewSpec - : SKEWED BY identifierList - ON (constantList | nestedConstantList) - (STORED AS DIRECTORIES)? - ; - -locationSpec - : LOCATION STRING - ; - -query - : ctes? queryNoWith - ; - -insertInto - : INSERT OVERWRITE TABLE tableIdentifier (partitionSpec (IF NOT EXISTS)?)? #insertOverwriteTable - | INSERT INTO TABLE? tableIdentifier partitionSpec? #insertIntoTable - | INSERT OVERWRITE LOCAL? DIRECTORY path=STRING rowFormat? createFileFormat? #insertOverwriteHiveDir - | INSERT OVERWRITE LOCAL? DIRECTORY (path=STRING)? tableProvider (OPTIONS options=tablePropertyList)? #insertOverwriteDir - ; - -partitionSpecLocation - : partitionSpec locationSpec? - ; - -partitionSpec - : PARTITION '(' partitionVal (',' partitionVal)* ')' - ; - -partitionVal - : identifier (EQ constant)? - ; - -describeFuncName - : qualifiedName - | STRING - | comparisonOperator - | arithmeticOperator - | predicateOperator - ; - -describeColName - : nameParts+=identifier ('.' nameParts+=identifier)* - ; - -ctes - : WITH namedQuery (',' namedQuery)* - ; - -namedQuery - : name=identifier AS? '(' query ')' - ; - -tableProvider - : USING qualifiedName - ; - -tablePropertyList - : '(' tableProperty (',' tableProperty)* ')' - ; - -tableProperty - : key=tablePropertyKey (EQ? value=tablePropertyValue)? - ; - -tablePropertyKey - : identifier ('.' identifier)* - | STRING - ; - -tablePropertyValue - : INTEGER_VALUE - | DECIMAL_VALUE - | booleanValue - | STRING - ; - -constantList - : '(' constant (',' constant)* ')' - ; - -nestedConstantList - : '(' constantList (',' constantList)* ')' - ; - -createFileFormat - : STORED AS fileFormat - | STORED BY storageHandler - ; - -fileFormat - : INPUTFORMAT inFmt=STRING OUTPUTFORMAT outFmt=STRING #tableFileFormat - | identifier #genericFileFormat - ; - -storageHandler - : STRING (WITH SERDEPROPERTIES tablePropertyList)? - ; - -resource - : identifier STRING - ; - -queryNoWith - : insertInto? queryTerm queryOrganization #singleInsertQuery - | fromClause multiInsertQueryBody+ #multiInsertQuery - ; - -queryOrganization - : (ORDER BY order+=sortItem (',' order+=sortItem)*)? - (CLUSTER BY clusterBy+=expression (',' clusterBy+=expression)*)? - (DISTRIBUTE BY distributeBy+=expression (',' distributeBy+=expression)*)? - (SORT BY sort+=sortItem (',' sort+=sortItem)*)? - windows? - (LIMIT (ALL | limit=expression))? - ; - -multiInsertQueryBody - : insertInto? - querySpecification - queryOrganization - ; - -queryTerm - : queryPrimary #queryTermDefault - | left=queryTerm {legacy_setops_precedence_enbled}? - operator=(INTERSECT | UNION | EXCEPT | SETMINUS) setQuantifier? right=queryTerm #setOperation - | left=queryTerm {!legacy_setops_precedence_enbled}? - operator=INTERSECT setQuantifier? right=queryTerm #setOperation - | left=queryTerm {!legacy_setops_precedence_enbled}? - operator=(UNION | EXCEPT | SETMINUS) setQuantifier? right=queryTerm #setOperation - ; - -queryPrimary - : querySpecification #queryPrimaryDefault - | TABLE tableIdentifier #table - | inlineTable #inlineTableDefault1 - | '(' queryNoWith ')' #subquery - ; - -sortItem - : expression ordering=(ASC | DESC)? (NULLS nullOrder=(LAST | FIRST))? - ; - -querySpecification - : (((SELECT kind=TRANSFORM '(' namedExpressionSeq ')' - | kind=MAP namedExpressionSeq - | kind=REDUCE namedExpressionSeq)) - inRowFormat=rowFormat? - (RECORDWRITER recordWriter=STRING)? - USING script=STRING - (AS (identifierSeq | colTypeList | ('(' (identifierSeq | colTypeList) ')')))? - outRowFormat=rowFormat? - (RECORDREADER recordReader=STRING)? - fromClause? - (WHERE where=booleanExpression)?) - | ((kind=SELECT (hints+=hint)* setQuantifier? namedExpressionSeq fromClause? - | fromClause (kind=SELECT setQuantifier? namedExpressionSeq)?) - lateralView* - (WHERE where=booleanExpression)? - aggregation? - (HAVING having=booleanExpression)? - windows?) - ; - -hint - : '/*+' hintStatements+=hintStatement (','? hintStatements+=hintStatement)* '*/' - ; - -hintStatement - : hintName=identifier - | hintName=identifier '(' parameters+=primaryExpression (',' parameters+=primaryExpression)* ')' - ; - -fromClause - : FROM relation (',' relation)* lateralView* pivotClause? - ; - -aggregation - : GROUP BY groupingExpressions+=expression (',' groupingExpressions+=expression)* ( - WITH kind=ROLLUP - | WITH kind=CUBE - | kind=GROUPING SETS '(' groupingSet (',' groupingSet)* ')')? - | GROUP BY kind=GROUPING SETS '(' groupingSet (',' groupingSet)* ')' - ; - -groupingSet - : '(' (expression (',' expression)*)? ')' - | expression - ; - -pivotClause - : PIVOT '(' aggregates=namedExpressionSeq FOR pivotColumn IN '(' pivotValues+=pivotValue (',' pivotValues+=pivotValue)* ')' ')' - ; - -pivotColumn - : identifiers+=identifier - | '(' identifiers+=identifier (',' identifiers+=identifier)* ')' - ; - -pivotValue - : expression (AS? identifier)? - ; - -lateralView - : LATERAL VIEW (OUTER)? qualifiedName '(' (expression (',' expression)*)? ')' tblName=identifier (AS? colName+=identifier (',' colName+=identifier)*)? - ; - -setQuantifier - : DISTINCT - | ALL - ; - -relation - : relationPrimary joinRelation* - ; - -joinRelation - : (joinType) JOIN right=relationPrimary joinCriteria? - | NATURAL joinType JOIN right=relationPrimary - ; - -joinType - : INNER? - | CROSS - | LEFT OUTER? - | LEFT SEMI - | RIGHT OUTER? - | FULL OUTER? - | LEFT? ANTI - ; - -joinCriteria - : ON booleanExpression - | USING '(' identifier (',' identifier)* ')' - ; - -sample - : TABLESAMPLE '(' sampleMethod? ')' - ; - -sampleMethod - : negativeSign=MINUS? percentage=(INTEGER_VALUE | DECIMAL_VALUE) PERCENTLIT #sampleByPercentile - | expression ROWS #sampleByRows - | sampleType=BUCKET numerator=INTEGER_VALUE OUT OF denominator=INTEGER_VALUE - (ON (identifier | qualifiedName '(' ')'))? #sampleByBucket - | bytes=expression #sampleByBytes - ; - -identifierList - : '(' identifierSeq ')' - ; - -identifierSeq - : identifier (',' identifier)* - ; - -orderedIdentifierList - : '(' orderedIdentifier (',' orderedIdentifier)* ')' - ; - -orderedIdentifier - : identifier ordering=(ASC | DESC)? - ; - -identifierCommentList - : '(' identifierComment (',' identifierComment)* ')' - ; - -identifierComment - : identifier (COMMENT STRING)? - ; - -relationPrimary - : tableIdentifier sample? tableAlias #tableName - | '(' queryNoWith ')' sample? tableAlias #aliasedQuery - | '(' relation ')' sample? tableAlias #aliasedRelation - | inlineTable #inlineTableDefault2 - | functionTable #tableValuedFunction - ; - -inlineTable - : VALUES expression (',' expression)* tableAlias - ; - -functionTable - : identifier '(' (expression (',' expression)*)? ')' tableAlias - ; - -tableAlias - : (AS? strictIdentifier identifierList?)? - ; - -rowFormat - : ROW FORMAT SERDE name=STRING (WITH SERDEPROPERTIES props=tablePropertyList)? #rowFormatSerde - | ROW FORMAT DELIMITED - (FIELDS TERMINATED BY fieldsTerminatedBy=STRING (ESCAPED BY escapedBy=STRING)?)? - (COLLECTION ITEMS TERMINATED BY collectionItemsTerminatedBy=STRING)? - (MAP KEYS TERMINATED BY keysTerminatedBy=STRING)? - (LINES TERMINATED BY linesSeparatedBy=STRING)? - (NULL DEFINED AS nullDefinedAs=STRING)? #rowFormatDelimited - ; - -tableIdentifier - : (db=identifier '.')? table=identifier - ; - -functionIdentifier - : (db=identifier '.')? function=identifier - ; - -namedExpression - : expression (AS? (identifier | identifierList))? - ; - -namedExpressionSeq - : namedExpression (',' namedExpression)* - ; - -expression - : booleanExpression - ; - -booleanExpression - : NOT booleanExpression #logicalNot - | EXISTS '(' query ')' #exists - | valueExpression predicate? #predicated - | left=booleanExpression operator=AND right=booleanExpression #logicalBinary - | left=booleanExpression operator=OR right=booleanExpression #logicalBinary - ; - -predicate - : NOT? kind=BETWEEN lower=valueExpression AND upper=valueExpression - | NOT? kind=IN '(' expression (',' expression)* ')' - | NOT? kind=IN '(' query ')' - | NOT? kind=(RLIKE | LIKE) pattern=valueExpression - | IS NOT? kind=NULL - | IS NOT? kind=DISTINCT FROM right=valueExpression - ; - -valueExpression - : primaryExpression #valueExpressionDefault - | operator=(MINUS | PLUS | TILDE) valueExpression #arithmeticUnary - | left=valueExpression operator=(ASTERISK | SLASH | PERCENT | DIV) right=valueExpression #arithmeticBinary - | left=valueExpression operator=(PLUS | MINUS | CONCAT_PIPE) right=valueExpression #arithmeticBinary - | left=valueExpression operator=AMPERSAND right=valueExpression #arithmeticBinary - | left=valueExpression operator=HAT right=valueExpression #arithmeticBinary - | left=valueExpression operator=PIPE right=valueExpression #arithmeticBinary - | left=valueExpression comparisonOperator right=valueExpression #comparison - ; - -primaryExpression - : CASE whenClause+ (ELSE elseExpression=expression)? END #searchedCase - | CASE value=expression whenClause+ (ELSE elseExpression=expression)? END #simpleCase - | CAST '(' expression AS dataType ')' #cast - | STRUCT '(' (argument+=namedExpression (',' argument+=namedExpression)*)? ')' #struct - | FIRST '(' expression (IGNORE NULLS)? ')' #first - | LAST '(' expression (IGNORE NULLS)? ')' #last - | POSITION '(' substr=valueExpression IN str=valueExpression ')' #position - | constant #constantDefault - | ASTERISK #star - | qualifiedName '.' ASTERISK #star - | '(' namedExpression (',' namedExpression)+ ')' #rowConstructor - | '(' query ')' #subqueryExpression - | qualifiedName '(' (setQuantifier? argument+=expression (',' argument+=expression)*)? ')' - (OVER windowSpec)? #functionCall - | qualifiedName '(' trimOption=(BOTH | LEADING | TRAILING) argument+=expression - FROM argument+=expression ')' #functionCall - | IDENTIFIER '->' expression #lambda - | '(' IDENTIFIER (',' IDENTIFIER)+ ')' '->' expression #lambda - | value=primaryExpression '[' index=valueExpression ']' #subscript - | identifier #columnReference - | base=primaryExpression '.' fieldName=identifier #dereference - | '(' expression ')' #parenthesizedExpression - | EXTRACT '(' field=identifier FROM source=valueExpression ')' #extract - ; - -constant - : NULL #nullLiteral - | interval #intervalLiteral - | identifier STRING #typeConstructor - | number #numericLiteral - | booleanValue #booleanLiteral - | STRING+ #stringLiteral - ; - -comparisonOperator - : EQ | NEQ | NEQJ | LT | LTE | GT | GTE | NSEQ - ; - -arithmeticOperator - : PLUS | MINUS | ASTERISK | SLASH | PERCENT | DIV | TILDE | AMPERSAND | PIPE | CONCAT_PIPE | HAT - ; - -predicateOperator - : OR | AND | IN | NOT - ; - -booleanValue - : TRUE | FALSE - ; - -interval - : INTERVAL intervalField* - ; - -intervalField - : value=intervalValue unit=identifier (TO to=identifier)? - ; - -intervalValue - : (PLUS | MINUS)? (INTEGER_VALUE | DECIMAL_VALUE) - | STRING - ; - -colPosition - : FIRST | AFTER identifier - ; - -dataType - : complex=ARRAY '<' dataType '>' #complexDataType - | complex=MAP '<' dataType ',' dataType '>' #complexDataType - | complex=STRUCT ('<' complexColTypeList? '>' | NEQ) #complexDataType - | identifier ('(' INTEGER_VALUE (',' INTEGER_VALUE)* ')')? #primitiveDataType - ; - -colTypeList - : colType (',' colType)* - ; - -colType - : identifier dataType (COMMENT STRING)? - ; - -complexColTypeList - : complexColType (',' complexColType)* - ; - -complexColType - : identifier ':' dataType (COMMENT STRING)? - ; - -whenClause - : WHEN condition=expression THEN result=expression - ; - -windows - : WINDOW namedWindow (',' namedWindow)* - ; - -namedWindow - : identifier AS windowSpec - ; - -windowSpec - : name=identifier #windowRef - | '('name=identifier')' #windowRef - | '(' - ( CLUSTER BY partition+=expression (',' partition+=expression)* - | ((PARTITION | DISTRIBUTE) BY partition+=expression (',' partition+=expression)*)? - ((ORDER | SORT) BY sortItem (',' sortItem)*)?) - windowFrame? - ')' #windowDef - ; - -windowFrame - : frameType=RANGE start=frameBound - | frameType=ROWS start=frameBound - | frameType=RANGE BETWEEN start=frameBound AND end=frameBound - | frameType=ROWS BETWEEN start=frameBound AND end=frameBound - ; - -frameBound - : UNBOUNDED boundType=(PRECEDING | FOLLOWING) - | boundType=CURRENT ROW - | expression boundType=(PRECEDING | FOLLOWING) - ; - -qualifiedName - : identifier ('.' identifier)* - ; - -identifier - : strictIdentifier - | ANTI | FULL | INNER | LEFT | SEMI | RIGHT | NATURAL | JOIN | CROSS | ON - | UNION | INTERSECT | EXCEPT | SETMINUS - ; - -strictIdentifier - : IDENTIFIER #unquotedIdentifier - | quotedIdentifier #quotedIdentifierAlternative - | nonReserved #unquotedIdentifier - ; - -quotedIdentifier - : BACKQUOTED_IDENTIFIER - ; - -number - : MINUS? DECIMAL_VALUE #decimalLiteral - | MINUS? INTEGER_VALUE #integerLiteral - | MINUS? BIGINT_LITERAL #bigIntLiteral - | MINUS? SMALLINT_LITERAL #smallIntLiteral - | MINUS? TINYINT_LITERAL #tinyIntLiteral - | MINUS? DOUBLE_LITERAL #doubleLiteral - | MINUS? BIGDECIMAL_LITERAL #bigDecimalLiteral - ; - -nonReserved - : SHOW | TABLES | COLUMNS | COLUMN | PARTITIONS | FUNCTIONS | DATABASES - | ADD - | OVER | PARTITION | RANGE | ROWS | PRECEDING | FOLLOWING | CURRENT | ROW | LAST | FIRST | AFTER - | MAP | ARRAY | STRUCT - | PIVOT | LATERAL | WINDOW | REDUCE | TRANSFORM | SERDE | SERDEPROPERTIES | RECORDREADER - | DELIMITED | FIELDS | TERMINATED | COLLECTION | ITEMS | KEYS | ESCAPED | LINES | SEPARATED - | EXTENDED | REFRESH | CLEAR | CACHE | UNCACHE | LAZY | GLOBAL | TEMPORARY | OPTIONS - | GROUPING | CUBE | ROLLUP - | EXPLAIN | FORMAT | LOGICAL | FORMATTED | CODEGEN | COST - | TABLESAMPLE | USE | TO | BUCKET | PERCENTLIT | OUT | OF - | SET | RESET - | VIEW | REPLACE - | IF - | POSITION - | EXTRACT - | NO | DATA - | START | TRANSACTION | COMMIT | ROLLBACK | IGNORE - | SORT | CLUSTER | DISTRIBUTE | UNSET | TBLPROPERTIES | SKEWED | STORED | DIRECTORIES | LOCATION - | EXCHANGE | ARCHIVE | UNARCHIVE | FILEFORMAT | TOUCH | COMPACT | CONCATENATE | CHANGE - | CASCADE | RESTRICT | BUCKETS | CLUSTERED | SORTED | PURGE | INPUTFORMAT | OUTPUTFORMAT - | DBPROPERTIES | DFS | TRUNCATE | COMPUTE | LIST - | STATISTICS | ANALYZE | PARTITIONED | EXTERNAL | DEFINED | RECORDWRITER - | REVOKE | GRANT | LOCK | UNLOCK | MSCK | REPAIR | RECOVER | EXPORT | IMPORT | LOAD | VALUES | COMMENT | ROLE - | ROLES | COMPACTIONS | PRINCIPALS | TRANSACTIONS | INDEX | INDEXES | LOCKS | OPTION | LOCAL | INPATH - | ASC | DESC | LIMIT | RENAME | SETS - | AT | NULLS | OVERWRITE | ALL | ANY | ALTER | AS | BETWEEN | BY | CREATE | DELETE - | DESCRIBE | DROP | EXISTS | FALSE | FOR | GROUP | IN | INSERT | INTO | IS |LIKE - | NULL | ORDER | OUTER | TABLE | TRUE | WITH | RLIKE - | AND | CASE | CAST | DISTINCT | DIV | ELSE | END | FUNCTION | INTERVAL | MACRO | OR | STRATIFY | THEN - | UNBOUNDED | WHEN - | DATABASE | SELECT | FROM | WHERE | HAVING | TO | TABLE | WITH | NOT - | DIRECTORY - | BOTH | LEADING | TRAILING - ; - -SELECT: 'SELECT'; -FROM: 'FROM'; -ADD: 'ADD'; -AS: 'AS'; -ALL: 'ALL'; -ANY: 'ANY'; -DISTINCT: 'DISTINCT'; -WHERE: 'WHERE'; -GROUP: 'GROUP'; -BY: 'BY'; -GROUPING: 'GROUPING'; -SETS: 'SETS'; -CUBE: 'CUBE'; -ROLLUP: 'ROLLUP'; -ORDER: 'ORDER'; -HAVING: 'HAVING'; -LIMIT: 'LIMIT'; -AT: 'AT'; -OR: 'OR'; -AND: 'AND'; -IN: 'IN'; -NOT: 'NOT' | '!'; -NO: 'NO'; -EXISTS: 'EXISTS'; -BETWEEN: 'BETWEEN'; -LIKE: 'LIKE'; -RLIKE: 'RLIKE' | 'REGEXP'; -IS: 'IS'; -NULL: 'NULL'; -TRUE: 'TRUE'; -FALSE: 'FALSE'; -NULLS: 'NULLS'; -ASC: 'ASC'; -DESC: 'DESC'; -FOR: 'FOR'; -INTERVAL: 'INTERVAL'; -CASE: 'CASE'; -WHEN: 'WHEN'; -THEN: 'THEN'; -ELSE: 'ELSE'; -END: 'END'; -JOIN: 'JOIN'; -CROSS: 'CROSS'; -OUTER: 'OUTER'; -INNER: 'INNER'; -LEFT: 'LEFT'; -SEMI: 'SEMI'; -RIGHT: 'RIGHT'; -FULL: 'FULL'; -NATURAL: 'NATURAL'; -ON: 'ON'; -PIVOT: 'PIVOT'; -LATERAL: 'LATERAL'; -WINDOW: 'WINDOW'; -OVER: 'OVER'; -PARTITION: 'PARTITION'; -RANGE: 'RANGE'; -ROWS: 'ROWS'; -UNBOUNDED: 'UNBOUNDED'; -PRECEDING: 'PRECEDING'; -FOLLOWING: 'FOLLOWING'; -CURRENT: 'CURRENT'; -FIRST: 'FIRST'; -AFTER: 'AFTER'; -LAST: 'LAST'; -ROW: 'ROW'; -WITH: 'WITH'; -VALUES: 'VALUES'; -CREATE: 'CREATE'; -TABLE: 'TABLE'; -DIRECTORY: 'DIRECTORY'; -VIEW: 'VIEW'; -REPLACE: 'REPLACE'; -INSERT: 'INSERT'; -DELETE: 'DELETE'; -INTO: 'INTO'; -DESCRIBE: 'DESCRIBE'; -EXPLAIN: 'EXPLAIN'; -FORMAT: 'FORMAT'; -LOGICAL: 'LOGICAL'; -CODEGEN: 'CODEGEN'; -COST: 'COST'; -CAST: 'CAST'; -SHOW: 'SHOW'; -TABLES: 'TABLES'; -COLUMNS: 'COLUMNS'; -COLUMN: 'COLUMN'; -USE: 'USE'; -PARTITIONS: 'PARTITIONS'; -FUNCTIONS: 'FUNCTIONS'; -DROP: 'DROP'; -UNION: 'UNION'; -EXCEPT: 'EXCEPT'; -SETMINUS: 'MINUS'; -INTERSECT: 'INTERSECT'; -TO: 'TO'; -TABLESAMPLE: 'TABLESAMPLE'; -STRATIFY: 'STRATIFY'; -ALTER: 'ALTER'; -RENAME: 'RENAME'; -ARRAY: 'ARRAY'; -MAP: 'MAP'; -STRUCT: 'STRUCT'; -COMMENT: 'COMMENT'; -SET: 'SET'; -RESET: 'RESET'; -DATA: 'DATA'; -START: 'START'; -TRANSACTION: 'TRANSACTION'; -COMMIT: 'COMMIT'; -ROLLBACK: 'ROLLBACK'; -MACRO: 'MACRO'; -IGNORE: 'IGNORE'; -DISABLE: 'DISABLE'; -ENABLE: 'ENABLE'; -BOTH: 'BOTH'; -LEADING: 'LEADING'; -TRAILING: 'TRAILING'; - -IF: 'IF'; -POSITION: 'POSITION'; -EXTRACT: 'EXTRACT'; - -EQ : '=' | '=='; -NSEQ: '<=>'; -NEQ : '<>'; -NEQJ: '!='; -LT : '<'; -LTE : '<=' | '!>'; -GT : '>'; -GTE : '>=' | '!<'; - -PLUS: '+'; -MINUS: '-'; -ASTERISK: '*'; -SLASH: '/'; -PERCENT: '%'; -DIV: 'DIV'; -TILDE: '~'; -AMPERSAND: '&'; -PIPE: '|'; -CONCAT_PIPE: '||'; -HAT: '^'; - -PERCENTLIT: 'PERCENT'; -BUCKET: 'BUCKET'; -OUT: 'OUT'; -OF: 'OF'; - -SORT: 'SORT'; -CLUSTER: 'CLUSTER'; -DISTRIBUTE: 'DISTRIBUTE'; -OVERWRITE: 'OVERWRITE'; -TRANSFORM: 'TRANSFORM'; -REDUCE: 'REDUCE'; -USING: 'USING'; -SERDE: 'SERDE'; -SERDEPROPERTIES: 'SERDEPROPERTIES'; -RECORDREADER: 'RECORDREADER'; -RECORDWRITER: 'RECORDWRITER'; -DELIMITED: 'DELIMITED'; -FIELDS: 'FIELDS'; -TERMINATED: 'TERMINATED'; -COLLECTION: 'COLLECTION'; -ITEMS: 'ITEMS'; -KEYS: 'KEYS'; -ESCAPED: 'ESCAPED'; -LINES: 'LINES'; -SEPARATED: 'SEPARATED'; -FUNCTION: 'FUNCTION'; -EXTENDED: 'EXTENDED'; -REFRESH: 'REFRESH'; -CLEAR: 'CLEAR'; -CACHE: 'CACHE'; -UNCACHE: 'UNCACHE'; -LAZY: 'LAZY'; -FORMATTED: 'FORMATTED'; -GLOBAL: 'GLOBAL'; -TEMPORARY: 'TEMPORARY' | 'TEMP'; -OPTIONS: 'OPTIONS'; -UNSET: 'UNSET'; -TBLPROPERTIES: 'TBLPROPERTIES'; -DBPROPERTIES: 'DBPROPERTIES'; -BUCKETS: 'BUCKETS'; -SKEWED: 'SKEWED'; -STORED: 'STORED'; -DIRECTORIES: 'DIRECTORIES'; -LOCATION: 'LOCATION'; -EXCHANGE: 'EXCHANGE'; -ARCHIVE: 'ARCHIVE'; -UNARCHIVE: 'UNARCHIVE'; -FILEFORMAT: 'FILEFORMAT'; -TOUCH: 'TOUCH'; -COMPACT: 'COMPACT'; -CONCATENATE: 'CONCATENATE'; -CHANGE: 'CHANGE'; -CASCADE: 'CASCADE'; -RESTRICT: 'RESTRICT'; -CLUSTERED: 'CLUSTERED'; -SORTED: 'SORTED'; -PURGE: 'PURGE'; -INPUTFORMAT: 'INPUTFORMAT'; -OUTPUTFORMAT: 'OUTPUTFORMAT'; -DATABASE: 'DATABASE' | 'SCHEMA'; -DATABASES: 'DATABASES' | 'SCHEMAS'; -DFS: 'DFS'; -TRUNCATE: 'TRUNCATE'; -ANALYZE: 'ANALYZE'; -COMPUTE: 'COMPUTE'; -LIST: 'LIST'; -STATISTICS: 'STATISTICS'; -PARTITIONED: 'PARTITIONED'; -EXTERNAL: 'EXTERNAL'; -DEFINED: 'DEFINED'; -REVOKE: 'REVOKE'; -GRANT: 'GRANT'; -LOCK: 'LOCK'; -UNLOCK: 'UNLOCK'; -MSCK: 'MSCK'; -REPAIR: 'REPAIR'; -RECOVER: 'RECOVER'; -EXPORT: 'EXPORT'; -IMPORT: 'IMPORT'; -LOAD: 'LOAD'; -ROLE: 'ROLE'; -ROLES: 'ROLES'; -COMPACTIONS: 'COMPACTIONS'; -PRINCIPALS: 'PRINCIPALS'; -TRANSACTIONS: 'TRANSACTIONS'; -INDEX: 'INDEX'; -INDEXES: 'INDEXES'; -LOCKS: 'LOCKS'; -OPTION: 'OPTION'; -ANTI: 'ANTI'; -LOCAL: 'LOCAL'; -INPATH: 'INPATH'; - -CHECK: 'CHECK'; -SINDEX: 'SINDEX' | 'OINDEX'; -SINDICES: 'SINDICES' | 'OINDICES'; -BTREE: 'BTREE'; -BLOOM: 'BLOOM'; -BITMAP: 'BITMAP'; - -STRING - : '\'' ( ~('\''|'\\') | ('\\' .) )* '\'' - | '"' ( ~('"'|'\\') | ('\\' .) )* '"' - ; - -BIGINT_LITERAL - : DIGIT+ 'L' - ; - -SMALLINT_LITERAL - : DIGIT+ 'S' - ; - -TINYINT_LITERAL - : DIGIT+ 'Y' - ; - -INTEGER_VALUE - : DIGIT+ - ; - -DECIMAL_VALUE - : DIGIT+ EXPONENT - | DECIMAL_DIGITS EXPONENT? {isValidDecimal()}? - ; - -DOUBLE_LITERAL - : DIGIT+ EXPONENT? 'D' - | DECIMAL_DIGITS EXPONENT? 'D' {isValidDecimal()}? - ; - -BIGDECIMAL_LITERAL - : DIGIT+ EXPONENT? 'BD' - | DECIMAL_DIGITS EXPONENT? 'BD' {isValidDecimal()}? - ; - -IDENTIFIER - : (LETTER | DIGIT | '_')+ - ; - -BACKQUOTED_IDENTIFIER - : '`' ( ~'`' | '``' )* '`' - ; - -fragment DECIMAL_DIGITS - : DIGIT+ '.' DIGIT* - | '.' DIGIT+ - ; - -fragment EXPONENT - : 'E' [+-]? DIGIT+ - ; - -fragment DIGIT - : [0-9] - ; - -fragment LETTER - : [A-Z] - ; - -SIMPLE_COMMENT - : '--' ~[\r\n]* '\r'? '\n'? -> channel(HIDDEN) - ; - -BRACKETED_EMPTY_COMMENT - : '/**/' -> channel(HIDDEN) - ; - -BRACKETED_COMMENT - : '/*' ~[+] .*? '*/' -> channel(HIDDEN) - ; - -WS - : [ \r\n\t]+ -> channel(HIDDEN) - ; - -// Catch-all for anything we can't recognize. -// We use this to be able to ignore and recover all the text -// when splitting statements with DelimiterLexer -UNRECOGNIZED - : . - ;