From 651fc44a4d974d20114f7188b77be9b3c18f44d1 Mon Sep 17 00:00:00 2001 From: Soumyakanti Das Date: Wed, 24 Sep 2025 16:52:47 -0700 Subject: [PATCH 1/4] HIVE-28280: SemanticException when querying VIEW with DISTINCT clause --- .../hadoop/hive/ql/parse/CalcitePlanner.java | 19 ++++---- ...ew_top_relnode_not_project_authorization.q | 6 +++ ...op_relnode_not_project_authorization.q.out | 45 +++++++++++++++++++ 3 files changed, 62 insertions(+), 8 deletions(-) create mode 100644 ql/src/test/queries/clientpositive/view_top_relnode_not_project_authorization.q create mode 100644 ql/src/test/results/clientpositive/llap/view_top_relnode_not_project_authorization.q.out diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 5150de01baac..48a4cd35b37d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -28,6 +28,7 @@ import com.google.common.collect.Multimap; import java.util.Map.Entry; +import java.util.Objects; import java.util.Optional; import java.util.function.Function; import java.util.regex.Pattern; @@ -79,6 +80,7 @@ import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.RelVisitor; +import org.apache.calcite.rel.SingleRel; import org.apache.calcite.rel.convert.ConverterImpl; import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.core.AggregateCall; @@ -4918,15 +4920,16 @@ private RelNode genLogicalPlan(QB qb, boolean outerMostQB, aliasToRel.put(subqAlias, relNode); if (qb.getViewToTabSchema().containsKey(subqAlias)) { - if (relNode instanceof HiveProject) { - if (this.viewProjectToTableSchema == null) { - this.viewProjectToTableSchema = new LinkedHashMap<>(); - } - viewProjectToTableSchema.put((HiveProject) relNode, qb.getViewToTabSchema().get(subqAlias)); - } else { - throw new SemanticException("View " + subqAlias + " is corresponding to " - + relNode.toString() + ", rather than a HiveProject."); + HiveProject project = switch (Objects.requireNonNull(relNode)) { + case HiveProject hiveProject -> hiveProject; + case SingleRel singleRel when singleRel.getInput() instanceof HiveProject hiveProject -> hiveProject; + default -> throw new SemanticException("View " + subqAlias + " is corresponding to " + + relNode + ", rather than a HiveProject or a SingleRel with HiveProject as its child."); + }; + if (this.viewProjectToTableSchema == null) { + this.viewProjectToTableSchema = new LinkedHashMap<>(); } + viewProjectToTableSchema.put(project, qb.getViewToTabSchema().get(subqAlias)); } } diff --git a/ql/src/test/queries/clientpositive/view_top_relnode_not_project_authorization.q b/ql/src/test/queries/clientpositive/view_top_relnode_not_project_authorization.q new file mode 100644 index 000000000000..7561c853e3d0 --- /dev/null +++ b/ql/src/test/queries/clientpositive/view_top_relnode_not_project_authorization.q @@ -0,0 +1,6 @@ +set hive.security.authorization.enabled=true; +create table t1 (username string); + +create view vw_t1 as select distinct username from t1 limit 5; +explain cbo select * from vw_t1; +select * from vw_t1; diff --git a/ql/src/test/results/clientpositive/llap/view_top_relnode_not_project_authorization.q.out b/ql/src/test/results/clientpositive/llap/view_top_relnode_not_project_authorization.q.out new file mode 100644 index 000000000000..0b4ea4757ae7 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/view_top_relnode_not_project_authorization.q.out @@ -0,0 +1,45 @@ +PREHOOK: query: create table t1 (username string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1 +POSTHOOK: query: create table t1 (username string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1 +PREHOOK: query: create view vw_t1 as select distinct username from t1 limit 5 +PREHOOK: type: CREATEVIEW +PREHOOK: Input: default@t1 +PREHOOK: Output: database:default +PREHOOK: Output: default@vw_t1 +POSTHOOK: query: create view vw_t1 as select distinct username from t1 limit 5 +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: default@t1 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vw_t1 +POSTHOOK: Lineage: vw_t1.username SIMPLE [(t1)t1.FieldSchema(name:username, type:string, comment:null), ] +PREHOOK: query: explain cbo select * from vw_t1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@vw_t1 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo select * from vw_t1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@vw_t1 +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(fetch=[5]) + HiveProject(username=[$0]) + HiveAggregate(group=[{0}]) + HiveTableScan(table=[[default, t1]], table:alias=[t1]) + +PREHOOK: query: select * from vw_t1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@vw_t1 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select * from vw_t1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@vw_t1 +POSTHOOK: Output: hdfs://### HDFS PATH ### From d399943bdb396f6d53e9234bb494496e6e19b73c Mon Sep 17 00:00:00 2001 From: Soumyakanti Das Date: Thu, 25 Sep 2025 08:36:25 -0700 Subject: [PATCH 2/4] fix failing test --- .../llap/view_top_relnode_not_project_authorization.q.out | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ql/src/test/results/clientpositive/llap/view_top_relnode_not_project_authorization.q.out b/ql/src/test/results/clientpositive/llap/view_top_relnode_not_project_authorization.q.out index 0b4ea4757ae7..0efb804be39b 100644 --- a/ql/src/test/results/clientpositive/llap/view_top_relnode_not_project_authorization.q.out +++ b/ql/src/test/results/clientpositive/llap/view_top_relnode_not_project_authorization.q.out @@ -21,12 +21,12 @@ PREHOOK: query: explain cbo select * from vw_t1 PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@vw_t1 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: explain cbo select * from vw_t1 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@vw_t1 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### CBO PLAN: HiveSortLimit(fetch=[5]) HiveProject(username=[$0]) @@ -37,9 +37,9 @@ PREHOOK: query: select * from vw_t1 PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@vw_t1 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from vw_t1 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@vw_t1 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### From a406ce79dddc059bec4c2448be821d6b4b460ebc Mon Sep 17 00:00:00 2001 From: Soumyakanti Das Date: Thu, 9 Oct 2025 12:54:09 -0700 Subject: [PATCH 3/4] address review comments --- .../hadoop/hive/ql/parse/CalcitePlanner.java | 23 +++-- ...ew_top_relnode_not_project_authorization.q | 12 ++- ...op_relnode_not_project_authorization.q.out | 88 ++++++++++++++++++- 3 files changed, 114 insertions(+), 9 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 48a4cd35b37d..ab26dd73bcac 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -4920,12 +4920,8 @@ private RelNode genLogicalPlan(QB qb, boolean outerMostQB, aliasToRel.put(subqAlias, relNode); if (qb.getViewToTabSchema().containsKey(subqAlias)) { - HiveProject project = switch (Objects.requireNonNull(relNode)) { - case HiveProject hiveProject -> hiveProject; - case SingleRel singleRel when singleRel.getInput() instanceof HiveProject hiveProject -> hiveProject; - default -> throw new SemanticException("View " + subqAlias + " is corresponding to " - + relNode + ", rather than a HiveProject or a SingleRel with HiveProject as its child."); - }; + HiveProject project = extractFirstProject(relNode) + .orElseThrow(() -> new SemanticException("Could not obtain a HiveProject from " + relNode)); if (this.viewProjectToTableSchema == null) { this.viewProjectToTableSchema = new LinkedHashMap<>(); } @@ -5051,6 +5047,21 @@ private RelNode genLogicalPlan(QB qb, boolean outerMostQB, return srcRel; } + /** + * Extract the first HiveProject from a RelNode tree of SingleRel nodes. + * This doesn't search through inputs of multi-input nodes (like Joins). + * + * @param rel RelNode + * @return Optional HiveProject + */ + private Optional extractFirstProject(RelNode rel) { + return switch (rel) { + case HiveProject hiveProject -> Optional.of(hiveProject); + case SingleRel sr -> extractFirstProject(sr.getInput()); + case null, default -> Optional.empty(); + }; + } + private RelNode genGBHavingLogicalPlan(QB qb, RelNode srcRel) throws SemanticException { RelNode gbFilter = null; QBParseInfo qbp = getQBParseInfo(qb); diff --git a/ql/src/test/queries/clientpositive/view_top_relnode_not_project_authorization.q b/ql/src/test/queries/clientpositive/view_top_relnode_not_project_authorization.q index 7561c853e3d0..9f83328986cb 100644 --- a/ql/src/test/queries/clientpositive/view_top_relnode_not_project_authorization.q +++ b/ql/src/test/queries/clientpositive/view_top_relnode_not_project_authorization.q @@ -1,6 +1,16 @@ set hive.security.authorization.enabled=true; -create table t1 (username string); +create table t1 (username string, id int); create view vw_t1 as select distinct username from t1 limit 5; explain cbo select * from vw_t1; select * from vw_t1; + +create view vw_t2 as +select username from (select username, id from t1 where id > 10 limit 1) x where username > 'a' order by id; +explain cbo select * from vw_t2; +select * from vw_t2; + +create view vw_t3 as +select username from (select username, id from t1 where id > 10 limit 10) x where username > 'a' limit 5; +explain cbo select * from vw_t3; +select * from vw_t3; diff --git a/ql/src/test/results/clientpositive/llap/view_top_relnode_not_project_authorization.q.out b/ql/src/test/results/clientpositive/llap/view_top_relnode_not_project_authorization.q.out index 0efb804be39b..2bf398147b33 100644 --- a/ql/src/test/results/clientpositive/llap/view_top_relnode_not_project_authorization.q.out +++ b/ql/src/test/results/clientpositive/llap/view_top_relnode_not_project_authorization.q.out @@ -1,8 +1,8 @@ -PREHOOK: query: create table t1 (username string) +PREHOOK: query: create table t1 (username string, id int) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@t1 -POSTHOOK: query: create table t1 (username string) +POSTHOOK: query: create table t1 (username string, id int) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@t1 @@ -43,3 +43,87 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@vw_t1 #### A masked pattern was here #### +PREHOOK: query: create view vw_t2 as +select username from (select username, id from t1 where id > 10 limit 1) x where username > 'a' order by id +PREHOOK: type: CREATEVIEW +PREHOOK: Input: default@t1 +PREHOOK: Output: database:default +PREHOOK: Output: default@vw_t2 +POSTHOOK: query: create view vw_t2 as +select username from (select username, id from t1 where id > 10 limit 1) x where username > 'a' order by id +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: default@t1 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vw_t2 +POSTHOOK: Lineage: vw_t2.username SIMPLE [(t1)t1.FieldSchema(name:username, type:string, comment:null), ] +PREHOOK: query: explain cbo select * from vw_t2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@vw_t2 +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select * from vw_t2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@vw_t2 +#### A masked pattern was here #### +CBO PLAN: +HiveFilter(condition=[>($0, _UTF-16LE'a')]) + HiveProject(username=[$0]) + HiveSortLimit(fetch=[1]) + HiveProject(username=[$0]) + HiveFilter(condition=[>($1, 10)]) + HiveTableScan(table=[[default, t1]], table:alias=[t1]) + +PREHOOK: query: select * from vw_t2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@vw_t2 +#### A masked pattern was here #### +POSTHOOK: query: select * from vw_t2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@vw_t2 +#### A masked pattern was here #### +PREHOOK: query: create view vw_t3 as +select username from (select username, id from t1 where id > 10 limit 10) x where username > 'a' limit 5 +PREHOOK: type: CREATEVIEW +PREHOOK: Input: default@t1 +PREHOOK: Output: database:default +PREHOOK: Output: default@vw_t3 +POSTHOOK: query: create view vw_t3 as +select username from (select username, id from t1 where id > 10 limit 10) x where username > 'a' limit 5 +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: default@t1 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vw_t3 +POSTHOOK: Lineage: vw_t3.username SIMPLE [(t1)t1.FieldSchema(name:username, type:string, comment:null), ] +PREHOOK: query: explain cbo select * from vw_t3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@vw_t3 +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select * from vw_t3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@vw_t3 +#### A masked pattern was here #### +CBO PLAN: +HiveSortLimit(fetch=[5]) + HiveProject(username=[$0]) + HiveFilter(condition=[>($0, _UTF-16LE'a')]) + HiveProject(username=[$0]) + HiveSortLimit(fetch=[10]) + HiveProject(username=[$0]) + HiveFilter(condition=[>($1, 10)]) + HiveTableScan(table=[[default, t1]], table:alias=[t1]) + +PREHOOK: query: select * from vw_t3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@vw_t3 +#### A masked pattern was here #### +POSTHOOK: query: select * from vw_t3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@vw_t3 +#### A masked pattern was here #### From 11f28be560374b111ed09d3c5ef877464f113b89 Mon Sep 17 00:00:00 2001 From: Soumyakanti Das Date: Thu, 9 Oct 2025 15:19:16 -0700 Subject: [PATCH 4/4] remove unused import --- ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java | 1 - 1 file changed, 1 deletion(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index ab26dd73bcac..8353a0eeda51 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -28,7 +28,6 @@ import com.google.common.collect.Multimap; import java.util.Map.Entry; -import java.util.Objects; import java.util.Optional; import java.util.function.Function; import java.util.regex.Pattern;