@@ -9,6 +9,8 @@ use object_store::{Attributes, ObjectStore, PutOptions, TagSet};
99
1010use crate :: error:: Error ;
1111use flate2:: read:: GzDecoder ;
12+ use lazy_static:: lazy_static;
13+ use regex:: Regex ;
1214use std:: io:: Read ;
1315
1416/// Simplify interaction with iceberg files
@@ -59,7 +61,7 @@ impl<T: ObjectStore> IcebergStore for T {
5961 "Path for version-hint for {location}"
6062 ) ) ) ?
6163 . into ( ) ,
62- location . to_string ( ) . into ( ) ,
64+ version_hint_content ( location ) . into ( ) ,
6365 PutOptions {
6466 mode : object_store:: PutMode :: Overwrite ,
6567 tags : TagSet :: default ( ) ,
@@ -83,6 +85,39 @@ fn version_hint_path(original: &str) -> Option<String> {
8385 )
8486}
8587
88+ lazy_static ! {
89+ static ref SUPPORTED_METADATA_FILE_FORMATS : Vec <Regex > = vec![
90+ // The standard metastore format https://iceberg.apache.org/spec/#metastore-tables
91+ Regex :: new(
92+ r"^(?<version>[0-9]{5}-[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}).(?:gz.)?metadata.json$"
93+ )
94+ . unwrap( ) ,
95+ // The legacy file-system format https://iceberg.apache.org/spec/#file-system-tables
96+ Regex :: new( r"^v(?<version>[0-9]+).metadata.json$" ) . unwrap( ) ,
97+ ] ;
98+ }
99+
100+ /// Given a full path to a metadata file, extract an appropriate version hint that other readers
101+ /// without access to the catalog can parse.
102+ pub fn version_hint_content ( original : & str ) -> String {
103+ original
104+ . split ( "/" )
105+ . last ( )
106+ . and_then ( |filename| {
107+ SUPPORTED_METADATA_FILE_FORMATS
108+ . iter ( )
109+ . filter_map ( |regex| {
110+ regex. captures ( filename) . and_then ( |capture| {
111+ capture
112+ . name ( "version" )
113+ . and_then ( |m| m. as_str ( ) . parse ( ) . ok ( ) )
114+ } )
115+ } )
116+ . next ( )
117+ } )
118+ . unwrap_or ( original. to_string ( ) )
119+ }
120+
86121fn parse_metadata ( location : & str , bytes : & [ u8 ] ) -> Result < TabularMetadata , Error > {
87122 if location. ends_with ( ".gz.metadata.json" ) {
88123 let mut decoder = GzDecoder :: new ( bytes) ;
@@ -99,6 +134,7 @@ fn parse_metadata(location: &str, bytes: &[u8]) -> Result<TabularMetadata, Error
99134#[ cfg( test) ]
100135mod tests {
101136 use super :: * ;
137+ use rstest:: rstest;
102138 use std:: io:: Write ;
103139
104140 #[ test]
@@ -142,6 +178,21 @@ mod tests {
142178 assert_eq ! ( version_hint_path( input) , Some ( expected. to_string( ) ) ) ;
143179 }
144180
181+ #[ rstest]
182+ #[ case:: file_format( "/path/to/metadata/v2.metadata.json" , "2" ) ]
183+ #[ case:: metastore_format_no_gzip(
184+ "/path/to/metadata/00004-3f569e94-5601-48f3-9199-8d71df4ea7b0.metadata.json" ,
185+ "00004-3f569e94-5601-48f3-9199-8d71df4ea7b0"
186+ ) ]
187+ #[ case:: metastore_format_with_gzip(
188+ "/path/to/metadata/00004-3f569e94-5601-48f3-9199-8d71df4ea7b0.gz.metadata.json" ,
189+ "00004-3f569e94-5601-48f3-9199-8d71df4ea7b0"
190+ ) ]
191+ #[ test]
192+ fn test_version_hint_content ( #[ case] input : & str , #[ case] expected : & str ) {
193+ assert_eq ! ( version_hint_content( input) , expected) ;
194+ }
195+
145196 #[ test]
146197 fn test_parse_metadata_table_plain_json ( ) {
147198 let location = "/path/to/metadata/v1.metadata.json" ;
0 commit comments