|
15 | 15 | // specific language governing permissions and limitations |
16 | 16 | // under the License. |
17 | 17 |
|
| 18 | +#[cfg(feature = "arrow")] |
| 19 | +use parquet::file::metadata::ParquetMetaData; |
18 | 20 | use parquet::file::metadata::ParquetMetaDataReader; |
19 | 21 | use rand::Rng; |
20 | 22 | use thrift::protocol::TCompactOutputProtocol; |
@@ -164,7 +166,7 @@ fn get_footer_bytes(data: Bytes) -> Bytes { |
164 | 166 | } |
165 | 167 |
|
166 | 168 | #[cfg(feature = "arrow")] |
167 | | -fn rewrite_file(bytes: Bytes) -> (Bytes, FileMetaData) { |
| 169 | +fn rewrite_file(bytes: Bytes) -> (Bytes, ParquetMetaData) { |
168 | 170 | use arrow::array::RecordBatchReader; |
169 | 171 | use parquet::arrow::{arrow_reader::ParquetRecordBatchReaderBuilder, ArrowWriter}; |
170 | 172 | use parquet::file::properties::{EnabledStatistics, WriterProperties}; |
@@ -242,40 +244,36 @@ fn criterion_benchmark(c: &mut Criterion) { |
242 | 244 | #[cfg(feature = "arrow")] |
243 | 245 | c.bench_function("page headers", |b| { |
244 | 246 | b.iter(|| { |
245 | | - metadata.row_groups.iter().for_each(|rg| { |
246 | | - rg.columns.iter().for_each(|col| { |
247 | | - if let Some(col_meta) = &col.meta_data { |
248 | | - if let Some(dict_offset) = col_meta.dictionary_page_offset { |
249 | | - parquet::thrift::bench_page_header( |
250 | | - &file_bytes.slice(dict_offset as usize..), |
251 | | - ); |
252 | | - } |
| 247 | + for rg in metadata.row_groups() { |
| 248 | + for col in rg.columns() { |
| 249 | + if let Some(dict_offset) = col.dictionary_page_offset() { |
253 | 250 | parquet::thrift::bench_page_header( |
254 | | - &file_bytes.slice(col_meta.data_page_offset as usize..), |
| 251 | + &file_bytes.slice(dict_offset as usize..), |
255 | 252 | ); |
256 | 253 | } |
257 | | - }); |
258 | | - }); |
| 254 | + parquet::thrift::bench_page_header( |
| 255 | + &file_bytes.slice(col.data_page_offset() as usize..), |
| 256 | + ); |
| 257 | + } |
| 258 | + } |
259 | 259 | }) |
260 | 260 | }); |
261 | 261 |
|
262 | 262 | #[cfg(feature = "arrow")] |
263 | 263 | c.bench_function("page headers (no stats)", |b| { |
264 | 264 | b.iter(|| { |
265 | | - metadata.row_groups.iter().for_each(|rg| { |
266 | | - rg.columns.iter().for_each(|col| { |
267 | | - if let Some(col_meta) = &col.meta_data { |
268 | | - if let Some(dict_offset) = col_meta.dictionary_page_offset { |
269 | | - parquet::thrift::bench_page_header_no_stats( |
270 | | - &file_bytes.slice(dict_offset as usize..), |
271 | | - ); |
272 | | - } |
| 265 | + for rg in metadata.row_groups() { |
| 266 | + for col in rg.columns() { |
| 267 | + if let Some(dict_offset) = col.dictionary_page_offset() { |
273 | 268 | parquet::thrift::bench_page_header_no_stats( |
274 | | - &file_bytes.slice(col_meta.data_page_offset as usize..), |
| 269 | + &file_bytes.slice(dict_offset as usize..), |
275 | 270 | ); |
276 | 271 | } |
277 | | - }); |
278 | | - }); |
| 272 | + parquet::thrift::bench_page_header_no_stats( |
| 273 | + &file_bytes.slice(col.data_page_offset() as usize..), |
| 274 | + ); |
| 275 | + } |
| 276 | + } |
279 | 277 | }) |
280 | 278 | }); |
281 | 279 | } |
|
0 commit comments