apache
diff --git a/‎iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/BaseHiveIcebergMetaHook.java‎
Lines changed: 61 additions & 17 deletions b/‎iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/BaseHiveIcebergMetaHook.java‎
Lines changed: 61 additions & 17 deletions
diff --git a/‎iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java‎
Lines changed: 58 additions & 0 deletions b/‎iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java‎
Lines changed: 58 additions & 0 deletions
diff --git a/‎iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/udf/GenericUDFIcebergZorder.java‎
Lines changed: 174 additions & 0 deletions b/‎iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/udf/GenericUDFIcebergZorder.java‎
Lines changed: 174 additions & 0 deletions
@@ -41,6 +41,8 @@
 import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
 import org.apache.hadoop.hive.ql.ddl.misc.sortoder.SortFieldDesc;
 import org.apache.hadoop.hive.ql.ddl.misc.sortoder.SortFields;
+import org.apache.hadoop.hive.ql.ddl.misc.sortoder.ZOrderFieldDesc;
+import org.apache.hadoop.hive.ql.ddl.misc.sortoder.ZorderFields;
 import org.apache.hadoop.hive.ql.util.NullOrdering;
 import org.apache.iceberg.BaseMetastoreTableOperations;
 import org.apache.iceberg.BaseTable;
@@ -217,31 +219,73 @@ private void validateCatalogConfigsDefined() {
     }
   }
 
+  /**
+   *  Persists the table's write sort order based on the HMS property 'default-sort-order'
+   *  that is populated by the DDL layer.
+   *
+   * Behaviour:
+   * - If the JSON represents Z-order, we remove DEFAULT_SORT_ORDER
+   *   as Iceberg does not have Z-order support in its spec.
+   *   So, we persist Z-order metadata in 'sort.order' and 'sort.columns' to be used by Hive Writer.
+   * - Otherwise, the JSON is a list of SortFields; we convert it to Iceberg
+   *   SortOrder JSON and keep it in DEFAULT_SORT_ORDER for Iceberg to use it.
+   */
   private void setSortOrder(org.apache.hadoop.hive.metastore.api.Table hmsTable, Schema schema,
       Properties properties) {
-    String sortOderJSONString = hmsTable.getParameters().get(TableProperties.DEFAULT_SORT_ORDER);
-    SortFields sortFields = null;
-    if (!Strings.isNullOrEmpty(sortOderJSONString)) {
+    String sortOrderJSONString = hmsTable.getParameters().get(TableProperties.DEFAULT_SORT_ORDER);
+    if (!Strings.isNullOrEmpty(sortOrderJSONString)) {
       try {
-        sortFields = JSON_OBJECT_MAPPER.reader().readValue(sortOderJSONString, SortFields.class);
+        if (isZOrderJSON(sortOrderJSONString)) {
+          properties.remove(TableProperties.DEFAULT_SORT_ORDER);
+          ZorderFields zorderFields = JSON_OBJECT_MAPPER.reader().readValue(sortOrderJSONString, ZorderFields.class);
+          if (zorderFields != null && !zorderFields.getZOrderFields().isEmpty()) {
+            setZOrderSortOrder(zorderFields, properties);
+          }
+        } else {
+          SortFields sortFields = JSON_OBJECT_MAPPER.reader().readValue(sortOrderJSONString, SortFields.class);
+          if (sortFields != null && !sortFields.getSortFields().isEmpty()) {
+            SortOrder.Builder sortOrderBuilder = SortOrder.builderFor(schema);
+            sortFields.getSortFields().forEach(fieldDesc -> {
+              NullOrder nullOrder = fieldDesc.getNullOrdering() == NullOrdering.NULLS_FIRST ?
+                      NullOrder.NULLS_FIRST : NullOrder.NULLS_LAST;
+              SortDirection sortDirection = fieldDesc.getDirection() == SortFieldDesc.SortDirection.ASC ?
+                      SortDirection.ASC : SortDirection.DESC;
+              sortOrderBuilder.sortBy(fieldDesc.getColumnName(), sortDirection, nullOrder);
+            });
+            properties.put(TableProperties.DEFAULT_SORT_ORDER, SortOrderParser.toJson(sortOrderBuilder.build()));
+          }
+        }
       } catch (Exception e) {
-        LOG.warn("Can not read write order json: {}", sortOderJSONString, e);
-        return;
-      }
-      if (sortFields != null && !sortFields.getSortFields().isEmpty()) {
-        SortOrder.Builder sortOderBuilder = SortOrder.builderFor(schema);
-        sortFields.getSortFields().forEach(fieldDesc -> {
-          NullOrder nullOrder = fieldDesc.getNullOrdering() == NullOrdering.NULLS_FIRST ?
-              NullOrder.NULLS_FIRST : NullOrder.NULLS_LAST;
-          SortDirection sortDirection = fieldDesc.getDirection() == SortFieldDesc.SortDirection.ASC ?
-              SortDirection.ASC : SortDirection.DESC;
-          sortOderBuilder.sortBy(fieldDesc.getColumnName(), sortDirection, nullOrder);
-        });
-        properties.put(TableProperties.DEFAULT_SORT_ORDER, SortOrderParser.toJson(sortOderBuilder.build()));
+        LOG.warn("Can not read write order json: {}", sortOrderJSONString, e);
       }
     }
   }
 
+  /**
+   * Configures the Z-order sort order metadata in the given properties
+   * based on the specified Z-order fields.
+   *
+   * @param zOrderFields the ZorderFields containing columns for Z-order sorting
+   * @param properties the Properties object to store sort order metadata
+   */
+  private void setZOrderSortOrder(ZorderFields zOrderFields, Properties properties) {
+    List<String> columnNames = zOrderFields.getZOrderFields().stream()
+            .map(ZOrderFieldDesc::getColumnName)
+            .collect(Collectors.toList());
+
+    LOG.info("Setting Z-order sort order for columns: {}", columnNames);
+
+    properties.put("sort.order", "ZORDER");
+    properties.put("sort.columns", String.join(",", columnNames));
+
+    LOG.info("Z-order sort order configured for Iceberg table with columns: {}", columnNames);
+  }
+
+  private boolean isZOrderJSON(String jsonString) {
+    return jsonString.contains("zorderFields");
+  }
+
+
   @Override
   public void rollbackCreateTable(org.apache.hadoop.hive.metastore.api.Table hmsTable) {
     // do nothing
 
@@ -82,6 +82,7 @@
 import org.apache.hadoop.hive.ql.ddl.table.create.like.CreateTableLikeDesc;
 import org.apache.hadoop.hive.ql.ddl.table.misc.properties.AlterTableSetPropertiesDesc;
 import org.apache.hadoop.hive.ql.exec.ColumnInfo;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
 import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.hooks.WriteEntity;
 import org.apache.hadoop.hive.ql.io.IOConstants;
@@ -119,6 +120,7 @@
 import org.apache.hadoop.hive.ql.session.SessionState;
 import org.apache.hadoop.hive.ql.session.SessionStateUtil;
 import org.apache.hadoop.hive.ql.stats.Partish;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
 import org.apache.hadoop.hive.ql.util.NullOrdering;
 import org.apache.hadoop.hive.serde2.AbstractSerDe;
 import org.apache.hadoop.hive.serde2.Deserializer;
@@ -184,6 +186,7 @@
 import org.apache.iceberg.mr.InputFormatConfig;
 import org.apache.iceberg.mr.hive.actions.HiveIcebergDeleteOrphanFiles;
 import org.apache.iceberg.mr.hive.plan.IcebergBucketFunction;
+import org.apache.iceberg.mr.hive.udf.GenericUDFIcebergZorder;
 import org.apache.iceberg.puffin.Blob;
 import org.apache.iceberg.puffin.BlobMetadata;
 import org.apache.iceberg.puffin.Puffin;
@@ -929,9 +932,64 @@ public DynamicPartitionCtx createDPContext(
       addCustomSortExpr(table, hmsTable, writeOperation, dpCtx, getSortTransformSpec(table));
     }
 
+    // Even if table has no explicit sort order, honor z-order if configured
+    Map<String, String> props = table.properties();
+    if ("ZORDER".equalsIgnoreCase(props.getOrDefault("sort.order", ""))) {
+      createZOrderCustomSort(props, dpCtx, table, hmsTable, writeOperation);
+    }
+
     return dpCtx;
   }
 
+  /**
+   * Adds a custom sort expression to the DynamicPartitionCtx that performs local Z-ordering on write.
+   *
+   * Behavior:
+   * - Reads Z-order properties from 'sort.order' and 'sort.columns' (comma-separated).
+   * - Resolves the referenced columns to their positions in the physical row (taking into account
+   *   ACID virtual columns offset for overwrite/update operations).
+   * - Configures a single ASC sort key with NULLS FIRST and injects a custom key expression for
+   *   Z-order
+   */
+  private void createZOrderCustomSort(Map<String, String> props, DynamicPartitionCtx dpCtx, Table table,
+          org.apache.hadoop.hive.ql.metadata.Table hmsTable, Operation writeOperation) {
+    String colsProp = props.get("sort.columns");
+    if (StringUtils.isNotBlank(colsProp)) {
+      List<String> zCols = Arrays.stream(colsProp.split(",")).map(String::trim)
+              .filter(s -> !s.isEmpty()).collect(Collectors.toList());
+
+      Map<String, Integer> fieldOrderMap = Maps.newHashMap();
+      List<Types.NestedField> fields = table.schema().columns();
+      for (int i = 0; i < fields.size(); ++i) {
+        fieldOrderMap.put(fields.get(i).name(), i);
+      }
+      int offset = (shouldOverwrite(hmsTable, writeOperation) ?
+              ACID_VIRTUAL_COLS_AS_FIELD_SCHEMA : acidSelectColumns(hmsTable, writeOperation)).size();
+
+      List<Integer> zIndices = zCols.stream().map(col -> {
+        Integer base = fieldOrderMap.get(col);
+        Preconditions.checkArgument(base != null, "Z-order column not found in schema: %s", col);
+        return base + offset;
+      }).collect(Collectors.toList());
+
+      dpCtx.setCustomSortOrder(Lists.newArrayList(Collections.singletonList(1)));
+      dpCtx.setCustomSortNullOrder(Lists.newArrayList(Collections.singletonList(NullOrdering.NULLS_FIRST.getCode())));
+
+      dpCtx.addCustomSortExpressions(Collections.singletonList(allCols -> {
+        List<ExprNodeDesc> args = Lists.newArrayListWithExpectedSize(zIndices.size());
+        for (Integer idx : zIndices) {
+          args.add(allCols.get(idx));
+        }
+        try {
+          GenericUDF udf = new GenericUDFIcebergZorder();
+          return ExprNodeGenericFuncDesc.newInstance(udf, "iceberg_zorder", args);
+        } catch (UDFArgumentException e) {
+          throw new RuntimeException(e);
+        }
+      }));
+    }
+  }
+
   private void addCustomSortExpr(Table table,  org.apache.hadoop.hive.ql.metadata.Table hmsTable,
       Operation writeOperation, DynamicPartitionCtx dpCtx,
       List<TransformSpec> transformSpecs) {
 
@@ -0,0 +1,174 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.mr.hive.udf;
+
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.iceberg.util.ZOrderByteUtils;
+
+/**
+ * Hive UDF to compute the Z-order value of given input columns using Iceberg's ZOrderByteUtils.
+ * Supports various primitive types and converts inputs into interleaved binary representation.
+ */
+@Description(name = "iceberg_zorder",
+        value = "_FUNC_(value) - " +
+                "Returns the z-value calculated by Iceberg ZOrderByteUtils class")
+public class GenericUDFIcebergZorder extends GenericUDF {
+  private PrimitiveObjectInspector[] argOIs;
+  // For variable-length types (e.g., strings), how many bytes contribute to z-order
+  private final int varLengthContribution = 8;
+  private transient ByteBuffer[] reUseBuffer;
+  private static final int MAX_OUTPUT_SIZE = Integer.MAX_VALUE;
+
+  /**
+   * Initializes the UDF, validating argument types are primitives and preparing buffers.
+   */
+  @Override
+  public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
+    if (arguments.length < 2) {
+      throw new UDFArgumentException("iceberg_zorder requires at least 2 arguments");
+    }
+    argOIs = new PrimitiveObjectInspector[arguments.length];
+    reUseBuffer = new ByteBuffer[arguments.length];
+    for (int i = 0; i < arguments.length; i++) {
+      if (!(arguments[i] instanceof PrimitiveObjectInspector poi)) {
+        throw new UDFArgumentTypeException(i, "Only primitive types supported for z-order");
+      }
+      argOIs[i] = poi;
+    }
+    return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
+  }
+
+  /**
+   * Evaluates the UDF by converting input values to ordered bytes, interleaving them,
+   * and returning the resulting Z-order binary value.
+   */
+  @Override
+  public Object evaluate(DeferredObject[] arguments) throws HiveException {
+    byte[][] inputs = new byte[arguments.length][];
+    int totalLength = 0;
+
+    for (int i = 0; i < arguments.length; i++) {
+      byte[] orderedBytes = convertToOrderedBytes(arguments[i].get(), argOIs[i], i);
+      inputs[i] = orderedBytes;
+      totalLength += orderedBytes.length;
+    }
+
+    int outputLength = Math.min(totalLength, MAX_OUTPUT_SIZE);
+    ByteBuffer buffer = ByteBuffer.allocate(outputLength);
+
+    byte[] interleaved = ZOrderByteUtils.interleaveBits(inputs, outputLength, buffer);
+    return new BytesWritable(interleaved);
+  }
+
+  @Override
+  public String getDisplayString(String[] children) {
+    return "iceberg_zorder(" + String.join(", ", children) + ")";
+  }
+
+  /**
+   * Converts a single input value to its ordered byte representation based on type.
+   * @return fixed-length byte arrays to be used in interleaving.
+   */
+  private byte[] convertToOrderedBytes(Object value, PrimitiveObjectInspector oi,
+                                       int position) throws HiveException {
+    if (value == null) {
+      // For NULL values, we have primitive buffer size of 8 with values of 0
+      return ByteBuffer.wrap(new byte[ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE]).array();
+    }
+
+    if (reUseBuffer[position] == null) {
+      reUseBuffer[position] = ByteBuffer.allocate(ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE);
+    }
+    switch (oi.getPrimitiveCategory()) {
+      case BOOLEAN:
+        boolean boolValue = (Boolean) oi.getPrimitiveJavaObject(value);
+        return ZOrderByteUtils.intToOrderedBytes(boolValue ? 1 : 0, reUseBuffer[position]).array();
+
+      case BYTE:
+        byte byteValue = (Byte) oi.getPrimitiveJavaObject(value);
+        return ZOrderByteUtils.tinyintToOrderedBytes(byteValue, reUseBuffer[position]).array();
+
+      case SHORT:
+        short shortValue = (Short) oi.getPrimitiveJavaObject(value);
+        return ZOrderByteUtils.shortToOrderedBytes(shortValue, reUseBuffer[position]).array();
+
+      case INT:
+        int intValue = (Integer) oi.getPrimitiveJavaObject(value);
+        return ZOrderByteUtils.intToOrderedBytes(intValue, reUseBuffer[position]).array();
+
+      case LONG:
+        long longValue = (Long) oi.getPrimitiveJavaObject(value);
+        return ZOrderByteUtils.longToOrderedBytes(longValue, reUseBuffer[position]).array();
+
+      case FLOAT:
+        float floatValue = (Float) oi.getPrimitiveJavaObject(value);
+        return ZOrderByteUtils.floatToOrderedBytes(floatValue, reUseBuffer[position]).array();
+
+      case DOUBLE:
+        double doubleValue = (Double) oi.getPrimitiveJavaObject(value);
+        return ZOrderByteUtils.doubleToOrderedBytes(doubleValue, reUseBuffer[position]).array();
+
+      case DATE:
+        // Get data in epoch seconds and convert it to long
+        Object dateValue = oi.getPrimitiveJavaObject(value);
+        long dateInSeconds;
+        if (dateValue instanceof java.sql.Date dd) {
+          dateInSeconds = dd.getTime() / 1000L;
+        } else if (dateValue instanceof org.apache.hadoop.hive.common.type.Date dd) {
+          dateInSeconds = dd.toEpochSecond();
+        } else {
+          throw new HiveException("Unsupported DATE backing type: " + dateValue.getClass());
+        }
+        return ZOrderByteUtils.longToOrderedBytes(dateInSeconds, reUseBuffer[position]).array();
+
+      case TIMESTAMP:
+        Object tsValue = oi.getPrimitiveJavaObject(value);
+        long tsInSeconds;
+        if (tsValue instanceof org.apache.hadoop.hive.common.type.Timestamp ts) {
+          tsInSeconds = ts.toEpochSecond();
+        } else if (tsValue instanceof java.sql.Timestamp ts) {
+          tsInSeconds = ts.getTime() / 1000L;
+        } else {
+          throw new HiveException("Unsupported TIMESTAMP backing type: " + tsValue.getClass());
+        }
+        return ZOrderByteUtils.longToOrderedBytes(tsInSeconds, reUseBuffer[position]).array();
+
+      case CHAR:
+      case VARCHAR:
+      case STRING:
+        String strVal = String.valueOf(oi.getPrimitiveJavaObject(value));
+        return ZOrderByteUtils.stringToOrderedBytes(strVal, varLengthContribution,
+                reUseBuffer[position], StandardCharsets.UTF_8.newEncoder()).array();
+
+      default:
+        throw new HiveException("Unsupported type in z-order: " + oi.getPrimitiveCategory());
+    }
+  }
+}