apache · JkSelf · Apr 14, 2026 · Apr 14, 2026 · Apr 14, 2026
diff --git a/backends-velox/src/main/java/org/apache/gluten/vectorized/HashJoinBuilder.java b/backends-velox/src/main/java/org/apache/gluten/vectorized/HashJoinBuilder.java
@@ -39,10 +39,12 @@ public long rtHandle() {
 
   public static native long cloneHashTable(long hashTableData);
 
-  public static native long nativeBuild(
+  public native long nativeBuild(
       String buildHashTableId,
       long[] batchHandlers,
       String[] joinKeys,
+      String[] filterBuildColumns,
+      boolean filterPropagatesNulls,
       int joinType,
       boolean hasMixedFiltCondition,
       boolean isExistenceJoin,

diff --git a/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala b/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala
@@ -585,6 +585,22 @@ object VeloxConfig extends ConfigRegistry {
       .intConf
       .createWithDefault(100000)
 
+  val VELOX_MIN_TABLE_ROWS_FOR_PARALLEL_JOIN_BUILD =
+    buildConf("spark.gluten.velox.minTableRowsForParallelJoinBuild")
+      .experimental()
+      .doc("Experimental: the minimum number of table rows that can trigger " +
+        "the parallel hash join table build.")
+      .intConf
+      .createWithDefault(1000)
+
+  val VELOX_JOIN_BUILD_VECTOR_HASHER_MAX_NUM_DISTINCT =
+    buildConf("spark.gluten.velox.joinBuildVectorHasherMaxNumDistinct")
+      .experimental()
+      .doc("Experimental: maximum number of distinct values to keep when " +
+        "merging vector hashers in join HashBuild.")
+      .intConf
+      .createWithDefault(1000000)
+
   val VELOX_HASHMAP_ABANDON_BUILD_DUPHASH_MIN_PCT =
     buildConf("spark.gluten.velox.abandonDedupHashMap.minPct")
       .experimental()

diff --git a/backends-velox/src/main/scala/org/apache/gluten/execution/HashJoinExecTransformer.scala b/backends-velox/src/main/scala/org/apache/gluten/execution/HashJoinExecTransformer.scala
@@ -17,6 +17,8 @@
 package org.apache.gluten.execution
 
 import org.apache.gluten.config.VeloxConfig
+import org.apache.gluten.expression.ConverterUtils
+import org.apache.gluten.sql.shims.SparkShimLoader
 
 import org.apache.spark.rdd.RDD
 import org.apache.spark.rpc.GlutenDriverEndpoint
@@ -148,6 +150,20 @@ case class BroadcastHashJoinExecTransformer(
     } else {
       -1
     }
+
+    val (filterBuildColumns: Array[String], filterPropagatesNulls: Boolean) = condition match {
+      case Some(expr) =>
+        val buildOutputSet = buildPlan.outputSet
+        val cols: Array[String] = expr.references.toSeq.collect {
+          case a: Attribute if buildOutputSet.contains(a) =>
+            ConverterUtils.genColumnNameWithExprId(a)
+        }.toArray
+        val propagates = SparkShimLoader.getSparkShims.isNullIntolerant(expr)
+        (cols, propagates)
+      case None =>
+        (Array.empty[String], false)
+    }
+
     val context =
       BroadcastHashJoinContext(
         buildKeyExprs,
@@ -156,6 +172,8 @@ case class BroadcastHashJoinExecTransformer(
         condition.isDefined,
         joinType.isInstanceOf[ExistenceJoin],
         buildPlan.output,
+        filterBuildColumns,
+        filterPropagatesNulls,
         buildBroadcastTableId,
         isNullAwareAntiJoin,
         bloomFilterPushdownSize,
@@ -174,6 +192,8 @@ case class BroadcastHashJoinContext(
     hasMixedFiltCondition: Boolean,
     isExistenceJoin: Boolean,
     buildSideStructure: Seq[Attribute],
+    filterBuildColumns: Array[String],
+    filterPropagatesNulls: Boolean,
     buildHashTableId: String,
     isNullAwareAntiJoin: Boolean = false,
     bloomFilterPushdownSize: Long,

diff --git a/backends-velox/src/main/scala/org/apache/spark/sql/execution/ColumnarBuildSideRelation.scala b/backends-velox/src/main/scala/org/apache/spark/sql/execution/ColumnarBuildSideRelation.scala
@@ -207,12 +207,16 @@ case class ColumnarBuildSideRelation(
             ConverterUtils.genColumnNameWithExprId(attr)
         }.toArray
 
+        val hashJoinBuilder = HashJoinBuilder.create(runtime)
+
         // Build the hash table
-        hashTableData = HashJoinBuilder
+        hashTableData = hashJoinBuilder
           .nativeBuild(
             broadcastContext.buildHashTableId,
             batchArray.toArray,
             joinKeys,
+            broadcastContext.filterBuildColumns,
+            broadcastContext.filterPropagatesNulls,
             broadcastContext.substraitJoinType.ordinal(),
             broadcastContext.hasMixedFiltCondition,
             broadcastContext.isExistenceJoin,

diff --git a/...rc/main/scala/org/apache/spark/sql/execution/unsafe/UnsafeColumnarBuildSideRelation.scala b/...rc/main/scala/org/apache/spark/sql/execution/unsafe/UnsafeColumnarBuildSideRelation.scala
@@ -177,12 +177,16 @@ class UnsafeColumnarBuildSideRelation(
             ConverterUtils.genColumnNameWithExprId(attr)
         }.toArray
 
+        val hashJoinBuilder = HashJoinBuilder.create(runtime)
+
         // Build the hash table
-        hashTableData = HashJoinBuilder
+        hashTableData = hashJoinBuilder
           .nativeBuild(
             broadcastContext.buildHashTableId,
             batchArray.toArray,
             joinKeys,
+            broadcastContext.filterBuildColumns,
+            broadcastContext.filterPropagatesNulls,
             broadcastContext.substraitJoinType.ordinal(),
             broadcastContext.hasMixedFiltCondition,
             broadcastContext.isExistenceJoin,

diff --git a/cpp/velox/compute/VeloxRuntime.h b/cpp/velox/compute/VeloxRuntime.h
@@ -112,6 +112,10 @@ class VeloxRuntime final : public Runtime {
     return veloxPlan_;
   }
 
+  const std::shared_ptr<facebook::velox::config::ConfigBase>& veloxCfg() const {
+    return veloxCfg_;
+  }
+
   bool debugModeEnabled() const {
     return debugModeEnabled_;
   }

diff --git a/cpp/velox/config/VeloxConfig.h b/cpp/velox/config/VeloxConfig.h
@@ -63,8 +63,16 @@ const std::string kAbandonPartialAggregationMinRows =
     "spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows";
 
 // hashmap build
+const std::string kMinTableRowsForParallelJoinBuild = "spark.gluten.velox.minTableRowsForParallelJoinBuild";
+const uint32_t kMinTableRowsForParallelJoinBuildDefault = 1'000;
+
+const std::string kJoinBuildVectorHasherMaxNumDistinct = "spark.gluten.velox.joinBuildVectorHasherMaxNumDistinct";
+const uint32_t kJoinBuildVectorHasherMaxNumDistinctDefault = 1'000'000;
+
 const std::string kAbandonDedupHashMapMinRows = "spark.gluten.velox.abandonDedupHashMap.minRows";
+const uint32_t kAbandonDedupHashMapMinRowsDefault = 100'000;
 const std::string kAbandonDedupHashMapMinPct = "spark.gluten.velox.abandonDedupHashMap.minPct";
+const uint32_t kAbandonDedupHashMapMinPctDefault = 0;
 
 // execution
 const std::string kSparkBloomFilterExpectedNumItems = "spark.sql.optimizer.runtime.bloomFilter.expectedNumItems";

diff --git a/cpp/velox/jni/JniHashTable.cc b/cpp/velox/jni/JniHashTable.cc
@@ -18,6 +18,7 @@
 #include <arrow/c/abi.h>
 
 #include <jni/JniCommon.h>
+#include <algorithm>
 #include "JniHashTable.h"
 #include "folly/String.h"
 #include "memory/ColumnarBatch.h"
@@ -57,13 +58,19 @@ jlong JniHashTableContext::callJavaGet(const std::string& id) const {
 // Return the velox's hash table.
 std::shared_ptr<HashTableBuilder> nativeHashTableBuild(
     const std::vector<std::string>& joinKeys,
+    const std::vector<std::string>& filterBuildColumns,
+    bool filterPropagatesNulls,
     std::vector<std::string> names,
     std::vector<facebook::velox::TypePtr> veloxTypeList,
     int joinType,
     bool hasMixedJoinCondition,
     bool isExistenceJoin,
     bool isNullAwareAntiJoin,
     int64_t bloomFilterPushdownSize,
+    uint32_t minTableRowsForParallelJoinBuild,
+    uint32_t joinBuildVectorHasherMaxNumDistinct,
+    uint32_t abandonHashBuildDedupMinRows,
+    uint32_t abandonHashBuildDedupMinPct,
     std::vector<std::shared_ptr<ColumnarBatch>>& batches,
     std::shared_ptr<facebook::velox::memory::MemoryPool> memoryPool) {
   auto rowType = std::make_shared<facebook::velox::RowType>(std::move(names), std::move(veloxTypeList));
@@ -115,18 +122,38 @@ std::shared_ptr<HashTableBuilder> nativeHashTableBuild(
         std::make_shared<facebook::velox::core::FieldAccessTypedExpr>(rowType->findChild(name), name));
   }
 
+  std::vector<uint32_t> filterInputChannels;
+  filterInputChannels.reserve(filterBuildColumns.size());
+  for (const auto& name : filterBuildColumns) {
+    if (const auto idx = rowType->getChildIdxIfExists(name)) {
+      filterInputChannels.push_back(*idx);
+    }
+  }
+  std::sort(filterInputChannels.begin(), filterInputChannels.end());
+  filterInputChannels.erase(
+      std::unique(filterInputChannels.begin(), filterInputChannels.end()), filterInputChannels.end());
+
   auto hashTableBuilder = std::make_shared<HashTableBuilder>(
       vJoin,
       isNullAwareAntiJoin,
       hasMixedJoinCondition,
       bloomFilterPushdownSize,
       joinKeyTypes,
+      filterInputChannels,
+      filterPropagatesNulls,
       rowType,
-      memoryPool.get());
+      memoryPool.get(),
+      minTableRowsForParallelJoinBuild,
+      joinBuildVectorHasherMaxNumDistinct,
+      abandonHashBuildDedupMinRows,
+      abandonHashBuildDedupMinPct);
 
   for (auto i = 0; i < batches.size(); i++) {
     auto rowVector = VeloxColumnarBatch::from(memoryPool.get(), batches[i])->getRowVector();
     hashTableBuilder->addInput(rowVector);
+    if (hashTableBuilder->noMoreInput()) {
+      break;
+    }
   }
 
   return hashTableBuilder;

diff --git a/cpp/velox/jni/JniHashTable.h b/cpp/velox/jni/JniHashTable.h
@@ -73,13 +73,19 @@ class JniHashTableContext {
 // Return the hash table builder address.
 std::shared_ptr<HashTableBuilder> nativeHashTableBuild(
     const std::vector<std::string>& joinKeys,
+    const std::vector<std::string>& filterBuildColumns,
+    bool filterPropagatesNulls,
     std::vector<std::string> names,
     std::vector<facebook::velox::TypePtr> veloxTypeList,
     int joinType,
     bool hasMixedJoinCondition,
     bool isExistenceJoin,
     bool isNullAwareAntiJoin,
     int64_t bloomFilterPushdownSize,
+    uint32_t minTableRowsForParallelJoinBuild,
+    uint32_t joinBuildVectorHasherMaxNumDistinct,
+    uint32_t abandonHashBuildDedupMinRows,
+    uint32_t abandonHashBuildDedupMinPct,
     std::vector<std::shared_ptr<ColumnarBatch>>& batches,
     std::shared_ptr<facebook::velox::memory::MemoryPool> memoryPool);
 

diff --git a/cpp/velox/jni/VeloxJniWrapper.cc b/cpp/velox/jni/VeloxJniWrapper.cc
@@ -30,6 +30,7 @@
 #include "compute/VeloxBackend.h"
 #include "compute/VeloxRuntime.h"
 #include "config/GlutenConfig.h"
+#include "config/VeloxConfig.h"
 #include "jni/JniError.h"
 #include "jni/JniFileSystem.h"
 #include "jni/JniHashTable.h"
@@ -938,10 +939,12 @@ JNIEXPORT jobject JNICALL Java_org_apache_gluten_execution_IcebergWriteJniWrappe
 
 JNIEXPORT jlong JNICALL Java_org_apache_gluten_vectorized_HashJoinBuilder_nativeBuild( // NOLINT
     JNIEnv* env,
-    jclass,
+    jobject wrapper,
     jstring tableId,
     jlongArray batchHandles,
     jobjectArray joinKeys,
+    jobjectArray filterBuildColumns,
+    jboolean filterPropagatesNulls,
     jint joinType,
     jboolean hasMixedJoinCondition,
     jboolean isExistenceJoin,
@@ -950,6 +953,18 @@ JNIEXPORT jlong JNICALL Java_org_apache_gluten_vectorized_HashJoinBuilder_native
     jlong bloomFilterPushdownSize,
     jint numThreads) {
   JNI_METHOD_START
+  auto ctx = getRuntime(env, wrapper);
+  auto* runtime = dynamic_cast<VeloxRuntime*>(ctx);
+  GLUTEN_CHECK(runtime != nullptr, "Not a Velox runtime");
+  const auto& queryConf = *(runtime->veloxCfg());
+  const auto minTableRowsForParallelJoinBuild =
+      queryConf.get<uint32_t>(kMinTableRowsForParallelJoinBuild, kMinTableRowsForParallelJoinBuildDefault);
+  const auto joinBuildVectorHasherMaxNumDistinct =
+      queryConf.get<uint32_t>(kJoinBuildVectorHasherMaxNumDistinct, kJoinBuildVectorHasherMaxNumDistinctDefault);
+  const auto abandonHashBuildDedupMinRows =
+      queryConf.get<uint32_t>(kAbandonDedupHashMapMinRows, kAbandonDedupHashMapMinRowsDefault);
+  const auto abandonHashBuildDedupMinPct =
+      queryConf.get<uint32_t>(kAbandonDedupHashMapMinPct, kAbandonDedupHashMapMinPctDefault);
   const auto hashTableId = jStringToCString(env, tableId);
 
   // Convert Java String array to C++ vector<string>
@@ -961,6 +976,16 @@ JNIEXPORT jlong JNICALL Java_org_apache_gluten_vectorized_HashJoinBuilder_native
     hashJoinKeys.emplace_back(jStringToCString(env, jkey));
   }
 
+  std::vector<std::string> filterColumns;
+  if (filterBuildColumns != nullptr) {
+    jsize filterColumnsCount = env->GetArrayLength(filterBuildColumns);
+    filterColumns.reserve(filterColumnsCount);
+    for (jsize i = 0; i < filterColumnsCount; ++i) {
+      jstring jcol = (jstring)env->GetObjectArrayElement(filterBuildColumns, i);
+      filterColumns.emplace_back(jStringToCString(env, jcol));
+    }
+  }
+
   const auto inputType = gluten::getByteArrayElementsSafe(env, namedStruct);
   std::string structString{
       reinterpret_cast<const char*>(inputType.elems()), static_cast<std::string::size_type>(inputType.length())};
@@ -990,21 +1015,27 @@ JNIEXPORT jlong JNICALL Java_org_apache_gluten_vectorized_HashJoinBuilder_native
   if (numThreads == 1) {
     auto builder = nativeHashTableBuild(
         hashJoinKeys,
+        filterColumns,
+        filterPropagatesNulls,
         names,
         veloxTypeList,
         joinType,
         hasMixedJoinCondition,
         isExistenceJoin,
         isNullAwareAntiJoin,
         bloomFilterPushdownSize,
+        minTableRowsForParallelJoinBuild,
+        joinBuildVectorHasherMaxNumDistinct,
+        abandonHashBuildDedupMinRows,
+        abandonHashBuildDedupMinPct,
         cb,
         defaultLeafVeloxMemoryPool());
 
     auto mainTable = builder->uniqueTable();
     mainTable->prepareJoinTable(
         {},
         facebook::velox::exec::BaseHashTable::kNoSpillInputStartPartitionBit,
-        1'000'000,
+        builder->joinBuildVectorHasherMaxNumDistinct(),
         builder->dropDuplicates(),
         nullptr);
     builder->setHashTable(std::move(mainTable));
@@ -1027,19 +1058,26 @@ JNIEXPORT jlong JNICALL Java_org_apache_gluten_vectorized_HashJoinBuilder_native
     // Submit task to thread pool
     auto future = folly::via(executor, [&, t, start, end]() {
       std::vector<std::shared_ptr<gluten::ColumnarBatch>> threadBatches;
+      threadBatches.reserve(end - start);
       for (size_t i = start; i < end; ++i) {
         threadBatches.push_back(cb[i]);
       }
 
       auto builder = nativeHashTableBuild(
           hashJoinKeys,
+          filterColumns,
+          filterPropagatesNulls,
           names,
           veloxTypeList,
           joinType,
           hasMixedJoinCondition,
           isExistenceJoin,
           isNullAwareAntiJoin,
           bloomFilterPushdownSize,
+          minTableRowsForParallelJoinBuild,
+          joinBuildVectorHasherMaxNumDistinct,
+          abandonHashBuildDedupMinRows,
+          abandonHashBuildDedupMinPct,
           threadBatches,
           defaultLeafVeloxMemoryPool());
 
@@ -1067,7 +1105,7 @@ JNIEXPORT jlong JNICALL Java_org_apache_gluten_vectorized_HashJoinBuilder_native
   mainTable->prepareJoinTable(
       std::move(tables),
       facebook::velox::exec::BaseHashTable::kNoSpillInputStartPartitionBit,
-      1'000'000,
+      hashTableBuilders[0]->joinBuildVectorHasherMaxNumDistinct(),
       hashTableBuilders[0]->dropDuplicates(),
       allowParallelJoinBuild ? VeloxBackend::get()->executor() : nullptr);