Skip to content

Commit d233524

Browse files
committed
Merge pull request JanKaul#248 from gruuya/object-store-url-fix
fix: make fake object store url always parsable
2 parents cbcbf5d + 1da130e commit d233524

File tree

2 files changed

+28
-29
lines changed

2 files changed

+28
-29
lines changed

catalogs/iceberg-file-catalog/src/lib.rs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -896,10 +896,7 @@ pub mod tests {
896896
.await
897897
.unwrap();
898898

899-
assert_eq!(
900-
std::str::from_utf8(&version_hint).unwrap(),
901-
"s3://warehouse/tpch/lineitem/metadata/v1.metadata.json"
902-
);
899+
assert_eq!(std::str::from_utf8(&version_hint).unwrap(), "1");
903900

904901
let files = object_store.list(None).collect::<Vec<_>>().await;
905902

datafusion_iceberg/src/table.rs

Lines changed: 27 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -336,24 +336,17 @@ impl TableProvider for DataFusionTable {
336336
// Create a fake object store URL. Different table paths should produce fake URLs
337337
// that differ in the host name, because DF's DefaultObjectStoreRegistry only takes
338338
// hostname into account for object store keys
339-
fn fake_object_store_url(table_location_url: &str) -> Option<ObjectStoreUrl> {
340-
let mut u = url::Url::parse(table_location_url).ok()?;
341-
u.set_host(Some(&format!(
342-
"{}-{}",
343-
u.host_str().unwrap_or(""),
344-
// Hex-encode the path to ensure it produces a valid hostname
345-
u.path()
346-
.as_bytes()
347-
.iter()
348-
.map(|b| format!("{b:02x}"))
349-
.collect::<Vec<_>>()
350-
.join("")
351-
)))
352-
.unwrap();
353-
u.set_path("");
354-
u.set_query(None);
355-
u.set_fragment(None);
356-
ObjectStoreUrl::parse(&u).ok()
339+
fn fake_object_store_url(table_location_url: &str) -> ObjectStoreUrl {
340+
// Use quasi url-encoding to escape the characters not allowed in host names, (i.e. for `/` use
341+
// `-2F` instead of `%2F`)
342+
ObjectStoreUrl::parse(format!(
343+
"iceberg-rust://{}",
344+
table_location_url
345+
.replace('-', "-2D")
346+
.replace('/', "-2F")
347+
.replace(':', "-3A")
348+
))
349+
.expect("Invalid object store url.")
357350
}
358351

359352
#[allow(clippy::too_many_arguments)]
@@ -381,8 +374,7 @@ async fn table_scan(
381374
.unwrap_or_else(|| table.current_schema(None).unwrap().clone());
382375

383376
// Create a unique URI for this particular object store
384-
let object_store_url = fake_object_store_url(&table.metadata().location)
385-
.unwrap_or_else(ObjectStoreUrl::local_filesystem);
377+
let object_store_url = fake_object_store_url(&table.metadata().location);
386378
session
387379
.runtime_env()
388380
.register_object_store(object_store_url.as_ref(), table.object_store());
@@ -1144,8 +1136,7 @@ async fn write_parquet_files(
11441136

11451137
let bucket = Bucket::from_path(&metadata.location).map_err(DataFusionIcebergError::from)?;
11461138

1147-
let object_store_url =
1148-
fake_object_store_url(&metadata.location).unwrap_or(ObjectStoreUrl::local_filesystem());
1139+
let object_store_url = fake_object_store_url(&metadata.location);
11491140

11501141
context.runtime_env().register_object_store(
11511142
&object_store_url
@@ -2485,12 +2476,23 @@ mod tests {
24852476
fn test_fake_object_store_url() {
24862477
assert_eq!(
24872478
fake_object_store_url("s3://a"),
2488-
Some(ObjectStoreUrl::parse("s3://a-").unwrap()),
2479+
ObjectStoreUrl::parse("iceberg-rust://s3-3A-2F-2Fa").unwrap(),
24892480
);
24902481
assert_eq!(
24912482
fake_object_store_url("s3://a/b"),
2492-
Some(ObjectStoreUrl::parse("s3://a-2f62").unwrap()),
2483+
ObjectStoreUrl::parse("iceberg-rust://s3-3A-2F-2Fa-2Fb").unwrap(),
2484+
);
2485+
assert_eq!(
2486+
fake_object_store_url("/warehouse/tpch/lineitem"),
2487+
ObjectStoreUrl::parse("iceberg-rust://-2Fwarehouse-2Ftpch-2Flineitem").unwrap()
2488+
);
2489+
assert_ne!(
2490+
fake_object_store_url("s3://a/-/--"),
2491+
fake_object_store_url("s3://a/--/-"),
2492+
);
2493+
assert_ne!(
2494+
fake_object_store_url("s3://a/table-2Fpath"),
2495+
fake_object_store_url("s3://a/table/path"),
24932496
);
2494-
assert_eq!(fake_object_store_url("invalid url"), None);
24952497
}
24962498
}

0 commit comments

Comments
 (0)