Skip to content

Commit

Permalink
HIVE-27855: Create external tables for Hive and Tez protologging even…
Browse files Browse the repository at this point in the history
…ts (Kiran Velumuri, reviewed by Denys Kuzmenko, Shohei Okumiya)

Closes #5036
  • Loading branch information
KiranVelumuri authored Feb 3, 2025
1 parent dee6546 commit 84cdf60
Show file tree
Hide file tree
Showing 8 changed files with 273 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,21 +22,30 @@
import org.apache.commons.cli.OptionGroup;
import org.apache.commons.io.output.NullOutputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HiveMetaException;
import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
import org.apache.hadoop.hive.metastore.tools.schematool.HiveSchemaHelper;
import org.apache.hadoop.hive.metastore.tools.schematool.MetastoreSchemaTool;
import org.apache.hadoop.hive.metastore.tools.schematool.HiveSchemaHelper.MetaStoreConnectionInfo;
import org.apache.hadoop.hive.metastore.tools.schematool.HiveSchemaHelper.NestedScriptParser;
import org.apache.hive.beeline.BeeLine;
import org.apache.tez.dag.api.TezConfiguration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintStream;
import java.util.HashMap;
import java.util.Map;

import static org.apache.hadoop.hive.metastore.utils.StringUtils.isEmpty;

public class HiveSchemaTool extends MetastoreSchemaTool {
private static final Logger LOG = LoggerFactory.getLogger(HiveSchemaTool.class.getName());
Expand Down Expand Up @@ -90,6 +99,7 @@ protected void execSql(String scriptDir, String scriptFile)

@Override
protected void execSql(String sqlScriptFile) throws IOException {
replaceLocationForProtoLogTables(sqlScriptFile);
CommandBuilder builder = new HiveSchemaToolCommandBuilder(conf, url, driver,
userName, passWord, sqlScriptFile);

Expand All @@ -112,6 +122,57 @@ protected void execSql(String sqlScriptFile) throws IOException {
}
}

void replaceLocationForProtoLogTables(String sqlScriptFile) throws IOException {
TezConfiguration tezConf = new TezConfiguration(true);
boolean hiveProtoLoggingEnabled = true;
boolean tezProtoLoggingEnabled = true;
String hiveProtoBaseDir = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_PROTO_EVENTS_BASE_PATH);
String tezProtoBaseDir = tezConf.get(TezConfiguration.TEZ_HISTORY_LOGGING_PROTO_BASE_DIR);
String hiveLocation = "/tmp/query_data"; // if Hive protologging is not enabled, use dummy location for Hive protolog tables
String tezLocation = "/tmp"; // if Tez protologging is not enabled, use dummy location for Tez protolog tables
String line;
StringBuilder newLine = new StringBuilder();
Map<String, String> replacements = new HashMap<>();

if (isEmpty(hiveProtoBaseDir)) {
LOG.error("Hive conf variable hive.hook.proto.base-directory is not set for creating protologging tables");
hiveProtoLoggingEnabled = false;
}
if (isEmpty(tezProtoBaseDir)) {
LOG.error("Tez conf variable tez.history.logging.proto-base-dir is not set for creating protologging tables");
tezProtoLoggingEnabled = false;
}

if (hiveProtoLoggingEnabled) {
String hiveProtoScheme = new Path(hiveProtoBaseDir).getFileSystem(conf).getScheme() + ":///";
hiveLocation = new Path(hiveProtoBaseDir).getFileSystem(conf).getUri().isAbsolute() ? hiveProtoBaseDir : hiveProtoScheme + hiveProtoBaseDir;
}
if (tezProtoLoggingEnabled) {
String tezProtoScheme = new Path(tezProtoBaseDir).getFileSystem(tezConf).getScheme() + ":///";
tezLocation = new Path(tezProtoBaseDir).getFileSystem(tezConf).getUri().isAbsolute() ? tezProtoBaseDir : tezProtoScheme + tezProtoBaseDir;
}

replacements.put("_REPLACE_WITH_QUERY_DATA_LOCATION_", hiveLocation);
replacements.put("_REPLACE_WITH_APP_DATA_LOCATION_", tezLocation + "/app_data");
replacements.put("_REPLACE_WITH_DAG_DATA_LOCATION_", tezLocation + "/dag_data");
replacements.put("_REPLACE_WITH_DAG_META_LOCATION_", tezLocation + "/dag_meta");

try (BufferedReader reader = new BufferedReader(new FileReader(sqlScriptFile))) {
while ((line = reader.readLine()) != null) {
for (Map.Entry<String, String> entry : replacements.entrySet()) {
if (line.contains(entry.getKey())) {
line = line.replace(entry.getKey(), entry.getValue());
}
}
newLine.append(line).append("\n");
}
}

try (BufferedWriter writer = new BufferedWriter(new FileWriter(sqlScriptFile))) {
writer.write(newLine.toString());
}
}

static class HiveSchemaToolCommandBuilder extends MetastoreSchemaTool.CommandBuilder {

HiveSchemaToolCommandBuilder(Configuration conf, String url, String driver, String userName,
Expand Down
1 change: 1 addition & 0 deletions common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
Original file line number Diff line number Diff line change
Expand Up @@ -5648,6 +5648,7 @@ public static enum ConfVars {
"hive.zookeeper.ssl.truststore.password," +
"hive.zookeeper.ssl.truststore.type," +
"hive.iceberg.allow.datafiles.in.table.location.only," +
"hive.hook.proto.base-directory," +
"hive.rewrite.data.policy",
"Comma separated list of configuration options which are immutable at runtime"),
HIVE_CONF_HIDDEN_LIST("hive.conf.hidden.list",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ public static void startServices() throws Exception {
addToExpectedRestrictedMap("hive.zookeeper.ssl.truststore.password");
addToExpectedRestrictedMap("hive.zookeeper.ssl.truststore.type");
addToExpectedRestrictedMap("hive.iceberg.allow.datafiles.in.table.location.only");
addToExpectedRestrictedMap("hive.hook.proto.base-directory");
addToExpectedRestrictedMap("hive.rewrite.data.policy");

checkRestrictedListMatch();
Expand Down
16 changes: 16 additions & 0 deletions ql/src/test/results/clientpositive/llap/resourceplan.q.out
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,22 @@ sys partitions hive_test_user USER DELETE true -1 hive_test_user
sys partitions hive_test_user USER INSERT true -1 hive_test_user
sys partitions hive_test_user USER SELECT true -1 hive_test_user
sys partitions hive_test_user USER UPDATE true -1 hive_test_user
sys proto_hive_query_data hive_test_user USER DELETE true -1 hive_test_user
sys proto_hive_query_data hive_test_user USER INSERT true -1 hive_test_user
sys proto_hive_query_data hive_test_user USER SELECT true -1 hive_test_user
sys proto_hive_query_data hive_test_user USER UPDATE true -1 hive_test_user
sys proto_tez_app_data hive_test_user USER DELETE true -1 hive_test_user
sys proto_tez_app_data hive_test_user USER INSERT true -1 hive_test_user
sys proto_tez_app_data hive_test_user USER SELECT true -1 hive_test_user
sys proto_tez_app_data hive_test_user USER UPDATE true -1 hive_test_user
sys proto_tez_dag_data hive_test_user USER DELETE true -1 hive_test_user
sys proto_tez_dag_data hive_test_user USER INSERT true -1 hive_test_user
sys proto_tez_dag_data hive_test_user USER SELECT true -1 hive_test_user
sys proto_tez_dag_data hive_test_user USER UPDATE true -1 hive_test_user
sys proto_tez_dag_meta hive_test_user USER DELETE true -1 hive_test_user
sys proto_tez_dag_meta hive_test_user USER INSERT true -1 hive_test_user
sys proto_tez_dag_meta hive_test_user USER SELECT true -1 hive_test_user
sys proto_tez_dag_meta hive_test_user USER UPDATE true -1 hive_test_user
sys replication_failover_failback_metrics hive_test_user USER DELETE true -1 hive_test_user
sys replication_failover_failback_metrics hive_test_user USER INSERT true -1 hive_test_user
sys replication_failover_failback_metrics hive_test_user USER SELECT true -1 hive_test_user
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,22 @@ sys partitions hive_test_user USER DELETE true -1 hive_test_user
sys partitions hive_test_user USER INSERT true -1 hive_test_user
sys partitions hive_test_user USER SELECT true -1 hive_test_user
sys partitions hive_test_user USER UPDATE true -1 hive_test_user
sys proto_hive_query_data hive_test_user USER DELETE true -1 hive_test_user
sys proto_hive_query_data hive_test_user USER INSERT true -1 hive_test_user
sys proto_hive_query_data hive_test_user USER SELECT true -1 hive_test_user
sys proto_hive_query_data hive_test_user USER UPDATE true -1 hive_test_user
sys proto_tez_app_data hive_test_user USER DELETE true -1 hive_test_user
sys proto_tez_app_data hive_test_user USER INSERT true -1 hive_test_user
sys proto_tez_app_data hive_test_user USER SELECT true -1 hive_test_user
sys proto_tez_app_data hive_test_user USER UPDATE true -1 hive_test_user
sys proto_tez_dag_data hive_test_user USER DELETE true -1 hive_test_user
sys proto_tez_dag_data hive_test_user USER INSERT true -1 hive_test_user
sys proto_tez_dag_data hive_test_user USER SELECT true -1 hive_test_user
sys proto_tez_dag_data hive_test_user USER UPDATE true -1 hive_test_user
sys proto_tez_dag_meta hive_test_user USER DELETE true -1 hive_test_user
sys proto_tez_dag_meta hive_test_user USER INSERT true -1 hive_test_user
sys proto_tez_dag_meta hive_test_user USER SELECT true -1 hive_test_user
sys proto_tez_dag_meta hive_test_user USER UPDATE true -1 hive_test_user
sys replication_failover_failback_metrics hive_test_user USER DELETE true -1 hive_test_user
sys replication_failover_failback_metrics hive_test_user USER INSERT true -1 hive_test_user
sys replication_failover_failback_metrics hive_test_user USER SELECT true -1 hive_test_user
Expand Down Expand Up @@ -520,6 +536,10 @@ PREHOOK: Output: sys@partition_keys
PREHOOK: Output: sys@partition_params
PREHOOK: Output: sys@partition_stats_view
PREHOOK: Output: sys@partitions
PREHOOK: Output: sys@proto_hive_query_data
PREHOOK: Output: sys@proto_tez_app_data
PREHOOK: Output: sys@proto_tez_dag_data
PREHOOK: Output: sys@proto_tez_dag_meta
PREHOOK: Output: sys@replication_failover_failback_metrics
PREHOOK: Output: sys@replication_metrics
PREHOOK: Output: sys@replication_metrics_orig
Expand Down Expand Up @@ -584,6 +604,10 @@ POSTHOOK: Output: sys@partition_keys
POSTHOOK: Output: sys@partition_params
POSTHOOK: Output: sys@partition_stats_view
POSTHOOK: Output: sys@partitions
POSTHOOK: Output: sys@proto_hive_query_data
POSTHOOK: Output: sys@proto_tez_app_data
POSTHOOK: Output: sys@proto_tez_dag_data
POSTHOOK: Output: sys@proto_tez_dag_meta
POSTHOOK: Output: sys@replication_failover_failback_metrics
POSTHOOK: Output: sys@replication_metrics
POSTHOOK: Output: sys@replication_metrics_orig
Expand Down
71 changes: 67 additions & 4 deletions ql/src/test/results/clientpositive/llap/sysdb.q.out
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,22 @@ sys partitions hive_test_user USER DELETE true -1 hive_test_user
sys partitions hive_test_user USER INSERT true -1 hive_test_user
sys partitions hive_test_user USER SELECT true -1 hive_test_user
sys partitions hive_test_user USER UPDATE true -1 hive_test_user
sys proto_hive_query_data hive_test_user USER DELETE true -1 hive_test_user
sys proto_hive_query_data hive_test_user USER INSERT true -1 hive_test_user
sys proto_hive_query_data hive_test_user USER SELECT true -1 hive_test_user
sys proto_hive_query_data hive_test_user USER UPDATE true -1 hive_test_user
sys proto_tez_app_data hive_test_user USER DELETE true -1 hive_test_user
sys proto_tez_app_data hive_test_user USER INSERT true -1 hive_test_user
sys proto_tez_app_data hive_test_user USER SELECT true -1 hive_test_user
sys proto_tez_app_data hive_test_user USER UPDATE true -1 hive_test_user
sys proto_tez_dag_data hive_test_user USER DELETE true -1 hive_test_user
sys proto_tez_dag_data hive_test_user USER INSERT true -1 hive_test_user
sys proto_tez_dag_data hive_test_user USER SELECT true -1 hive_test_user
sys proto_tez_dag_data hive_test_user USER UPDATE true -1 hive_test_user
sys proto_tez_dag_meta hive_test_user USER DELETE true -1 hive_test_user
sys proto_tez_dag_meta hive_test_user USER INSERT true -1 hive_test_user
sys proto_tez_dag_meta hive_test_user USER SELECT true -1 hive_test_user
sys proto_tez_dag_meta hive_test_user USER UPDATE true -1 hive_test_user
sys replication_failover_failback_metrics hive_test_user USER DELETE true -1 hive_test_user
sys replication_failover_failback_metrics hive_test_user USER INSERT true -1 hive_test_user
sys replication_failover_failback_metrics hive_test_user USER SELECT true -1 hive_test_user
Expand Down Expand Up @@ -781,6 +797,45 @@ partitions part_id
partitions part_name
partitions sd_id
partitions tbl_id
proto_hive_query_data eventtype
proto_hive_query_data executionmode
proto_hive_query_data hivequeryid
proto_hive_query_data operationid
proto_hive_query_data otherinfo
proto_hive_query_data queue
proto_hive_query_data requestuser
proto_hive_query_data tablesread
proto_hive_query_data tableswritten
proto_hive_query_data timestamp
proto_hive_query_data user
proto_tez_app_data app_attempt_id
proto_tez_app_data app_id
proto_tez_app_data dag_id
proto_tez_app_data event_data
proto_tez_app_data event_time
proto_tez_app_data event_type
proto_tez_app_data task_attempt_id
proto_tez_app_data task_id
proto_tez_app_data user
proto_tez_app_data vertex_id
proto_tez_dag_data app_attempt_id
proto_tez_dag_data app_id
proto_tez_dag_data dag_id
proto_tez_dag_data event_data
proto_tez_dag_data event_time
proto_tez_dag_data event_type
proto_tez_dag_data task_attempt_id
proto_tez_dag_data task_id
proto_tez_dag_data user
proto_tez_dag_data vertex_id
proto_tez_dag_meta app_file_path
proto_tez_dag_meta app_id
proto_tez_dag_meta app_launched_event_offset
proto_tez_dag_meta dag_file_path
proto_tez_dag_meta dag_finished_event_offset
proto_tez_dag_meta dag_id
proto_tez_dag_meta dag_submitted_event_offset
proto_tez_dag_meta writetime
replication_failover_failback_metrics db_name
replication_failover_failback_metrics db_repl_metrics_name
replication_failover_failback_metrics db_repl_metrics_value
Expand Down Expand Up @@ -1124,6 +1179,10 @@ POSTHOOK: query: select pkey_name, pkey_type from partition_keys order by pkey_n
POSTHOOK: type: QUERY
POSTHOOK: Input: sys@partition_keys
#### A masked pattern was here ####
date string
date string
date string
date string
PREHOOK: query: select part_key_val, integer_idx from partition_key_vals order by part_key_val, integer_idx limit 5
PREHOOK: type: QUERY
PREHOOK: Input: sys@partition_key_vals
Expand Down Expand Up @@ -1308,7 +1367,7 @@ POSTHOOK: query: select count(*) from sds
POSTHOOK: type: QUERY
POSTHOOK: Input: sys@sds
#### A masked pattern was here ####
79
83
PREHOOK: query: select param_key, param_value from sd_params order by param_key, param_value limit 5
PREHOOK: type: QUERY
PREHOOK: Input: sys@sd_params
Expand All @@ -1327,9 +1386,9 @@ POSTHOOK: Input: sys@serdes
#### A masked pattern was here ####
NULL org.apache.hadoop.hive.ql.io.orc.OrcSerde
NULL org.apache.hadoop.hive.ql.io.orc.OrcSerde
NULL org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
NULL org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
NULL org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
NULL org.apache.hadoop.hive.ql.io.protobuf.ProtobufMessageSerDe
NULL org.apache.hadoop.hive.ql.io.protobuf.ProtobufMessageSerDe
NULL org.apache.hadoop.hive.ql.io.protobuf.ProtobufMessageSerDe
PREHOOK: query: select param_key, param_value from serde_params order by param_key, param_value limit 5
PREHOOK: type: QUERY
PREHOOK: Input: sys@serde_params
Expand Down Expand Up @@ -1732,6 +1791,10 @@ default sys partition_keys BASE_TABLE NULL NULL NULL NULL NULL YES NO NULL
default sys partition_params BASE_TABLE NULL NULL NULL NULL NULL YES NO NULL
default sys partition_stats_view VIEW NULL NULL NULL NULL NULL NO NO NULL
default sys partitions BASE_TABLE NULL NULL NULL NULL NULL YES NO NULL
default sys proto_hive_query_data BASE_TABLE NULL NULL NULL NULL NULL YES NO NULL
default sys proto_tez_app_data BASE_TABLE NULL NULL NULL NULL NULL YES NO NULL
default sys proto_tez_dag_data BASE_TABLE NULL NULL NULL NULL NULL YES NO NULL
default sys proto_tez_dag_meta BASE_TABLE NULL NULL NULL NULL NULL YES NO NULL
default sys replication_failover_failback_metrics VIEW NULL NULL NULL NULL NULL NO NO NULL
default sys replication_metrics VIEW NULL NULL NULL NULL NULL NO NO NULL
default sys replication_metrics_orig BASE_TABLE NULL NULL NULL NULL NULL YES NO NULL
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1592,6 +1592,57 @@ WHERE
AND
B.PARAM_KEY LIKE 'repl_metrics%';

CREATE EXTERNAL TABLE IF NOT EXISTS `PROTO_HIVE_QUERY_DATA`
PARTITIONED BY (
`date` string
)
ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.protobuf.ProtobufMessageSerDe'
STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.protobuf.ProtobufMessageInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION '_REPLACE_WITH_QUERY_DATA_LOCATION_'
TBLPROPERTIES (
'proto.class'='org.apache.hadoop.hive.ql.hooks.proto.HiveHookEvents$HiveHookEventProto',
'proto.maptypes'='org.apache.hadoop.hive.ql.hooks.proto.MapFieldEntry'
);

CREATE EXTERNAL TABLE IF NOT EXISTS `PROTO_TEZ_APP_DATA`
PARTITIONED BY (
`date` string
)
ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.protobuf.ProtobufMessageSerDe'
STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.protobuf.ProtobufMessageInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION '_REPLACE_WITH_APP_DATA_LOCATION_'
TBLPROPERTIES (
'proto.class'='org.apache.tez.dag.history.logging.proto.HistoryLoggerProtos$HistoryEventProto',
'proto.maptypes'='KVPair'
);

CREATE EXTERNAL TABLE IF NOT EXISTS `PROTO_TEZ_DAG_DATA`
PARTITIONED BY (
`date` string
)
ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.protobuf.ProtobufMessageSerDe'
STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.protobuf.ProtobufMessageInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION '_REPLACE_WITH_DAG_DATA_LOCATION_'
TBLPROPERTIES (
'proto.class'='org.apache.tez.dag.history.logging.proto.HistoryLoggerProtos$HistoryEventProto',
'proto.maptypes'='KVPair'
);

CREATE EXTERNAL TABLE IF NOT EXISTS `PROTO_TEZ_DAG_META`
PARTITIONED BY (
`date` string
)
ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.protobuf.ProtobufMessageSerDe'
STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.protobuf.ProtobufMessageInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION '_REPLACE_WITH_DAG_META_LOCATION_'
TBLPROPERTIES (
'proto.class'='org.apache.tez.dag.history.logging.proto.HistoryLoggerProtos$ManifestEntryProto'
);


CREATE DATABASE IF NOT EXISTS INFORMATION_SCHEMA;

Expand Down
Loading

0 comments on commit 84cdf60

Please sign in to comment.