From 0ee57b79e54574cf6827553129ce4f248e309099 Mon Sep 17 00:00:00 2001 From: Ala Luszczak Date: Thu, 15 Feb 2024 12:29:04 +0100 Subject: [PATCH] [Spark] Handle NullType in normalizeColumnNames() The sanity check in normalizeColumnNamesInDataType() introduced by that change is a bit too restrictive, and fails to handle NullType correctly. Closes delta-io/delta#2634 GitOrigin-RevId: faaf3d981c57ef3ceb4081e0bc94d457359fc9d8 --- .../scala/org/apache/spark/sql/delta/schema/SchemaUtils.scala | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/schema/SchemaUtils.scala b/spark/src/main/scala/org/apache/spark/sql/delta/schema/SchemaUtils.scala index b028f97a977..c1fdf654da6 100644 --- a/spark/src/main/scala/org/apache/spark/sql/delta/schema/SchemaUtils.scala +++ b/spark/src/main/scala/org/apache/spark/sql/delta/schema/SchemaUtils.scala @@ -256,6 +256,10 @@ def normalizeColumnNamesInDataType( keyType = normalizedKeyType, valueType = normalizedValueType ) + case (_: NullType, _) => + // When schema evolution adds a new column during MERGE, it can be represented with + // a NullType in the schema of the data written by the MERGE. + sourceDataType case _ => if (Utils.isTesting) { assert(sourceDataType == tableDataType,