|
23 | 23 | import static org.apache.iceberg.PlanningMode.LOCAL; |
24 | 24 | import static org.apache.iceberg.data.FileHelpers.encrypt; |
25 | 25 | import static org.assertj.core.api.Assertions.assertThat; |
| 26 | +import static org.assertj.core.api.Assumptions.assumeThat; |
26 | 27 |
|
27 | 28 | import java.io.File; |
28 | 29 | import java.io.IOException; |
|
73 | 74 | import org.apache.spark.sql.sources.Not; |
74 | 75 | import org.apache.spark.sql.sources.StringStartsWith; |
75 | 76 | import org.apache.spark.sql.util.CaseInsensitiveStringMap; |
| 77 | +import org.assertj.core.api.AbstractObjectAssert; |
76 | 78 | import org.junit.jupiter.api.AfterAll; |
77 | 79 | import org.junit.jupiter.api.BeforeAll; |
78 | 80 | import org.junit.jupiter.api.BeforeEach; |
@@ -267,6 +269,98 @@ public void testUnpartitionedTimestampFilter() { |
267 | 269 | "ts < cast('2017-12-22 00:00:00+00:00' as timestamp)")); |
268 | 270 | } |
269 | 271 |
|
| 272 | + @TestTemplate |
| 273 | + public void limitPushedDownToSparkScan() { |
| 274 | + assumeThat(fileFormat) |
| 275 | + .as("no need to run this across the entire test matrix") |
| 276 | + .isEqualTo(FileFormat.PARQUET); |
| 277 | + |
| 278 | + CaseInsensitiveStringMap options = |
| 279 | + new CaseInsensitiveStringMap(ImmutableMap.of("path", unpartitioned.toString())); |
| 280 | + |
| 281 | + SparkScanBuilder builder = |
| 282 | + new SparkScanBuilder(spark, TABLES.load(options.get("path")), options); |
| 283 | + |
| 284 | + long limit = 23; |
| 285 | + // simulate Spark pushing down the limit to the scan builder |
| 286 | + builder.pushLimit((int) limit); |
| 287 | + assertThat(builder).extracting("limit").isEqualTo((int) limit); |
| 288 | + |
| 289 | + // verify batch scan |
| 290 | + AbstractObjectAssert<?, ?> scanAssert = assertThat(builder.build()).extracting("scan"); |
| 291 | + if (LOCAL == planningMode) { |
| 292 | + scanAssert = scanAssert.extracting("scan"); |
| 293 | + } |
| 294 | + |
| 295 | + scanAssert.extracting("context").extracting("minRowsRequested").isEqualTo(limit); |
| 296 | + |
| 297 | + // verify changelog scan |
| 298 | + assertThat(builder.buildChangelogScan()) |
| 299 | + .extracting("scan") |
| 300 | + .extracting("context") |
| 301 | + .extracting("minRowsRequested") |
| 302 | + .isEqualTo(limit); |
| 303 | + |
| 304 | + // verify CoW scan |
| 305 | + assertThat(builder.buildCopyOnWriteScan()) |
| 306 | + .extracting("scan") |
| 307 | + .extracting("scan") |
| 308 | + .extracting("context") |
| 309 | + .extracting("minRowsRequested") |
| 310 | + .isEqualTo(limit); |
| 311 | + |
| 312 | + // verify MoR scan |
| 313 | + scanAssert = assertThat(builder.buildMergeOnReadScan()).extracting("scan"); |
| 314 | + if (LOCAL == planningMode) { |
| 315 | + scanAssert = scanAssert.extracting("scan"); |
| 316 | + } |
| 317 | + |
| 318 | + scanAssert.extracting("context").extracting("minRowsRequested").isEqualTo(limit); |
| 319 | + } |
| 320 | + |
| 321 | + @TestTemplate |
| 322 | + public void limitPushedDownToSparkScanForMetadataTable() { |
| 323 | + assumeThat(fileFormat) |
| 324 | + .as("no need to run this across the entire test matrix") |
| 325 | + .isEqualTo(FileFormat.PARQUET); |
| 326 | + |
| 327 | + CaseInsensitiveStringMap options = |
| 328 | + new CaseInsensitiveStringMap(ImmutableMap.of("path", unpartitioned.toString())); |
| 329 | + |
| 330 | + // load the snapshots metadata table |
| 331 | + SparkScanBuilder builder = |
| 332 | + new SparkScanBuilder(spark, TABLES.load(options.get("path") + "#snapshots"), options); |
| 333 | + |
| 334 | + long limit = 23; |
| 335 | + // simulate Spark pushing down the limit to the scan builder |
| 336 | + builder.pushLimit((int) limit); |
| 337 | + assertThat(builder).extracting("limit").isEqualTo((int) limit); |
| 338 | + |
| 339 | + // verify batch scan |
| 340 | + assertThat(builder.build()) |
| 341 | + .extracting("scan") |
| 342 | + .extracting("scan") |
| 343 | + .extracting("context") |
| 344 | + .extracting("minRowsRequested") |
| 345 | + .isEqualTo(limit); |
| 346 | + |
| 347 | + // verify CoW scan |
| 348 | + assertThat(builder.buildCopyOnWriteScan()) |
| 349 | + .extracting("scan") |
| 350 | + .extracting("scan") |
| 351 | + .extracting("context") |
| 352 | + .extracting("minRowsRequested") |
| 353 | + .isEqualTo(limit); |
| 354 | + |
| 355 | + // verify MoR scan |
| 356 | + assertThat(builder.buildMergeOnReadScan()) |
| 357 | + .extracting("scan") |
| 358 | + .extracting("scan") |
| 359 | + .extracting("context") |
| 360 | + .extracting("minRowsRequested") |
| 361 | + .isEqualTo(limit); |
| 362 | + } |
| 363 | + |
270 | 364 | @TestTemplate |
271 | 365 | public void testBucketPartitionedIDFilters() { |
272 | 366 | Table table = buildPartitionedTable("bucketed_by_id", BUCKET_BY_ID); |
|
0 commit comments