apache · Ngone51 · Oct 14, 2025 · karuppayya · Oct 14, 2025 · Ngone51
diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
@@ -942,7 +942,10 @@ private[spark] class MapOutputTrackerMaster(
     }
   }
 
-  /** Unregister shuffle data */
+  /**
+   * Unregister shuffle metadata. This currently should only be called through
+   * `ContextCleaner` when the shuffle is considered no longer referenced anywhere.
+   */
   def unregisterShuffle(shuffleId: Int): Unit = {
     shuffleStatuses.remove(shuffleId).foreach { shuffleStatus =>
       shuffleStatus.invalidateSerializedMapOutputStatusCache()

diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerStorageEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerStorageEndpoint.scala
@@ -19,7 +19,7 @@ package org.apache.spark.storage
 
 import scala.concurrent.{ExecutionContext, ExecutionContextExecutorService, Future}
 
-import org.apache.spark.{MapOutputTracker, SparkEnv}
+import org.apache.spark.{MapOutputTracker, MapOutputTrackerMaster, SparkEnv}
 import org.apache.spark.internal.{Logging, MessageWithContext}
 import org.apache.spark.internal.LogKeys.{BLOCK_ID, BROADCAST_ID, RDD_ID, SHUFFLE_ID}
 import org.apache.spark.rpc.{IsolatedThreadSafeRpcEndpoint, RpcCallContext, RpcEnv}
@@ -57,7 +57,14 @@ class BlockManagerStorageEndpoint(
 
     case RemoveShuffle(shuffleId) =>
       doAsync[Boolean](log"removing shuffle ${MDC(SHUFFLE_ID, shuffleId)}", context) {
-        if (mapOutputTracker != null) {
+        if (mapOutputTracker != null && !mapOutputTracker.isInstanceOf[MapOutputTrackerMaster]) {
+          // SPARK-53898: `MapOutputTrackerMaster.unregisterShuffle()` should only be called
+          // through `ContextCleaner` when the shuffle is considered no longer referenced anywhere.
+          // Otherwise, we might hit exceptions if there is any subsequent access (which still
+          // reference that shuffle) to that shuffle metadata in `MapOutputTrackerMaster`. E.g.,
+          // an ongoing subquery could access the same shuffle metadata which could have been
+          // cleaned up after the main query completes. Note this currently only happens in local
+          // cluster where both driver and executor use the `MapOutputTrackerMaster`.
           mapOutputTracker.unregisterShuffle(shuffleId)
         }
         val shuffleManager = SparkEnv.get.shuffleManager

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala
@@ -20,6 +20,7 @@ import scala.collection.mutable
 import scala.io.Source
 import scala.util.Try
 
+import org.apache.spark.MapOutputTrackerMaster
 import org.apache.spark.sql.{AnalysisException, ExtendedExplainGenerator, FastOperator, SaveMode}
 import org.apache.spark.sql.catalyst.{QueryPlanningTracker, QueryPlanningTrackerCallback, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis.{CurrentNamespace, UnresolvedFunction, UnresolvedRelation}
@@ -320,11 +321,15 @@ class QueryExecutionSuite extends SharedSparkSession {
 
   private def cleanupShuffles(): Unit = {
     val blockManager = spark.sparkContext.env.blockManager
+    val mapOutputTrackerMaster =
+      spark.sparkContext.env.mapOutputTracker.asInstanceOf[MapOutputTrackerMaster]
     blockManager.diskBlockManager.getAllBlocks().foreach {
       case ShuffleIndexBlockId(shuffleId, _, _) =>
         spark.sparkContext.shuffleDriverComponents.removeShuffle(shuffleId, true)
       case _ =>
     }
+    // Shuffle cleanup should not clean up shuffle metadata on the driver
+    assert(mapOutputTrackerMaster.shuffleStatuses.nonEmpty)
   }
 
   test("SPARK-53413: Cleanup shuffle dependencies for commands") {