Skip to content

Commit 09b5075

Browse files
changes lazy val to def
1 parent 4d15f64 commit 09b5075

File tree

4 files changed

+16
-10
lines changed

4 files changed

+16
-10
lines changed

sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -624,7 +624,7 @@ case class FileSourceScanExec(
624624
logInfo(s"Planning with ${bucketSpec.numBuckets} buckets")
625625
val filesGroupedToBuckets =
626626
selectedPartitions.flatMap { p =>
627-
p.files.map(f => PartitionedFileUtil.getPartitionedFile(f, p.values))
627+
p.files.map(f => PartitionedFileUtil.getPartitionedFile(f, f.getPath, p.values))
628628
}.groupBy { f =>
629629
BucketingUtils
630630
.getBucketId(f.toPath.getName)
@@ -689,12 +689,15 @@ case class FileSourceScanExec(
689689

690690
val splitFiles = selectedPartitions.flatMap { partition =>
691691
partition.files.flatMap { file =>
692-
if (shouldProcess(file.getPath)) {
692+
// getPath() is very expensive so we only want to call it once in this block:
693+
val filePath = file.getPath
694+
if (shouldProcess(filePath)) {
693695
val isSplitable = relation.fileFormat.isSplitable(
694-
relation.sparkSession, relation.options, file.getPath)
696+
relation.sparkSession, relation.options, filePath)
695697
PartitionedFileUtil.splitFiles(
696698
sparkSession = relation.sparkSession,
697699
file = file,
700+
filePath = filePath,
698701
isSplitable = isSplitable,
699702
maxSplitBytes = maxSplitBytes,
700703
partitionValues = partition.values

sql/core/src/main/scala/org/apache/spark/sql/execution/PartitionedFileUtil.scala

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
package org.apache.spark.sql.execution
1919

20-
import org.apache.hadoop.fs.{BlockLocation, FileStatus, LocatedFileStatus}
20+
import org.apache.hadoop.fs.{BlockLocation, FileStatus, LocatedFileStatus, Path}
2121

2222
import org.apache.spark.paths.SparkPath
2323
import org.apache.spark.sql.SparkSession
@@ -28,6 +28,7 @@ object PartitionedFileUtil {
2828
def splitFiles(
2929
sparkSession: SparkSession,
3030
file: FileStatusWithMetadata,
31+
filePath: Path,
3132
isSplitable: Boolean,
3233
maxSplitBytes: Long,
3334
partitionValues: InternalRow): Seq[PartitionedFile] = {
@@ -36,19 +37,20 @@ object PartitionedFileUtil {
3637
val remaining = file.getLen - offset
3738
val size = if (remaining > maxSplitBytes) maxSplitBytes else remaining
3839
val hosts = getBlockHosts(getBlockLocations(file.fileStatus), offset, size)
39-
PartitionedFile(partitionValues, SparkPath.fromPath(file.getPath), offset, size, hosts,
40+
PartitionedFile(partitionValues, SparkPath.fromPath(filePath), offset, size, hosts,
4041
file.getModificationTime, file.getLen, file.metadata)
4142
}
4243
} else {
43-
Seq(getPartitionedFile(file, partitionValues))
44+
Seq(getPartitionedFile(file, filePath, partitionValues))
4445
}
4546
}
4647

4748
def getPartitionedFile(
4849
file: FileStatusWithMetadata,
50+
filePath: Path,
4951
partitionValues: InternalRow): PartitionedFile = {
5052
val hosts = getBlockHosts(getBlockLocations(file.fileStatus), 0, file.getLen)
51-
PartitionedFile(partitionValues, SparkPath.fromPath(file.getPath), 0, file.getLen, hosts,
53+
PartitionedFile(partitionValues, SparkPath.fromPath(filePath), 0, file.getLen, hosts,
5254
file.getModificationTime, file.getLen, file.metadata)
5355
}
5456

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileIndex.scala

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,7 @@ import org.apache.spark.sql.types.StructType
3030
*/
3131
case class FileStatusWithMetadata(fileStatus: FileStatus, metadata: Map[String, Any] = Map.empty) {
3232
// Wrapper methods to improve source compatibility in code that still expects a [[FileStatus]].
33-
// NOTE: getPath() is very expensive, so we only want to call it once (if accessed at all).
34-
lazy val getPath: Path = fileStatus.getPath
33+
def getPath: Path = fileStatus.getPath
3534
def getLen: Long = fileStatus.getLen
3635
def getModificationTime: Long = fileStatus.getModificationTime
3736
def isDirectory: Boolean = fileStatus.isDirectory

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,10 +151,12 @@ trait FileScan extends Scan
151151
partition.values
152152
}
153153
partition.files.flatMap { file =>
154+
val filePath = file.getPath
154155
PartitionedFileUtil.splitFiles(
155156
sparkSession = sparkSession,
156157
file = file,
157-
isSplitable = isSplitable(file.getPath),
158+
filePath = filePath,
159+
isSplitable = isSplitable(filePath),
158160
maxSplitBytes = maxSplitBytes,
159161
partitionValues = partitionValues
160162
)

0 commit comments

Comments
 (0)