Skip to content

Commit 5b03df8

Browse files
authored
Add more arrow examples (#142)
* update fmt * use macro_rules * use box value * use fn instead of macro
1 parent 7ceaa5e commit 5b03df8

File tree

4 files changed

+64
-24
lines changed

4 files changed

+64
-24
lines changed

.rustfmt.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1-
max_width = 120
1+
max_width = 120
2+
imports_granularity = "Crate"

add_rustfmt_hook.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ for file in $files; do
1616
if [ ! -f "${file}" ]; then
1717
continue
1818
fi
19-
if [ "${file}" -eq 'libduckdb-sys/duckdb/bindgen_bundled_version.rs' ]; then
19+
if [ "${file}" = 'libduckdb-sys/src/bindgen_bundled_version.rs' ]; then
2020
continue
2121
fi
2222
if [[ "${file}" == *.rs ]]; then
@@ -25,7 +25,7 @@ for file in $files; do
2525
done
2626
if [ ${#rust_files[@]} -ne 0 ]; then
2727
command -v rustfmt >/dev/null 2>&1 || { echo >&2 "Rustfmt is required but it's not installed. Aborting."; exit 1; }
28-
$(command -v rustfmt) ${rust_files[@]} &
28+
$(command -v rustfmt) +nightly ${rust_files[@]} &
2929
fi
3030
wait
3131
if [ ${#rust_files[@]} -ne 0 ]; then

src/vtab/arrow.rs

Lines changed: 56 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,13 @@ impl Free for ArrowInitData {}
4444
struct ArrowVTab;
4545

4646
unsafe fn address_to_arrow_schema(address: usize) -> FFI_ArrowSchema {
47-
let ptr = address as *const FFI_ArrowSchema;
48-
std::ptr::read(ptr)
47+
let ptr = address as *mut FFI_ArrowSchema;
48+
*Box::from_raw(ptr)
4949
}
5050

5151
unsafe fn address_to_arrow_array(address: usize) -> FFI_ArrowArray {
52-
let ptr = address as *const FFI_ArrowArray;
53-
std::ptr::read(ptr)
52+
let ptr = address as *mut FFI_ArrowArray;
53+
*Box::from_raw(ptr)
5454
}
5555

5656
unsafe fn address_to_arrow_ffi(array: usize, schema: usize) -> (FFI_ArrowArray, FFI_ArrowSchema) {
@@ -446,28 +446,49 @@ fn as_fixed_size_list_array(arr: &dyn Array) -> &FixedSizeListArray {
446446
// }
447447
// }
448448

449+
/// Pass RecordBatch to duckdb.
450+
///
451+
/// # Safety
452+
/// The caller must ensure that the pointer is valid
453+
/// It's recommended to always use this function with arrow()
454+
pub fn arrow_recordbatch_to_query_params(rb: RecordBatch) -> [usize; 2] {
455+
let data = ArrayData::from(StructArray::from(rb));
456+
arrow_arraydata_to_query_params(data)
457+
}
458+
459+
/// Pass ArrayData to duckdb.
460+
///
461+
/// # Safety
462+
/// The caller must ensure that the pointer is valid
463+
/// It's recommended to always use this function with arrow()
464+
pub fn arrow_arraydata_to_query_params(data: ArrayData) -> [usize; 2] {
465+
let array = FFI_ArrowArray::new(&data);
466+
let schema = FFI_ArrowSchema::try_from(data.data_type()).expect("Failed to convert schema");
467+
arrow_ffi_to_query_params(array, schema)
468+
}
469+
449470
/// Pass array and schema as a pointer to duckdb.
450471
///
451472
/// # Safety
452473
/// The caller must ensure that the pointer is valid
453474
/// It's recommended to always use this function with arrow()
454-
pub unsafe fn arrow_ffi_to_query_params(array: FFI_ArrowArray, schema: FFI_ArrowSchema) -> [usize; 2] {
455-
let param = [&array as *const _ as usize, &schema as *const _ as usize];
456-
std::mem::forget(array);
457-
std::mem::forget(schema);
458-
param
475+
pub fn arrow_ffi_to_query_params(array: FFI_ArrowArray, schema: FFI_ArrowSchema) -> [usize; 2] {
476+
let arr = Box::into_raw(Box::new(array));
477+
let sch = Box::into_raw(Box::new(schema));
478+
479+
[arr as *mut _ as usize, sch as *mut _ as usize]
459480
}
460481

461482
#[cfg(test)]
462483
mod test {
463-
use super::ArrowVTab;
484+
use super::{arrow_recordbatch_to_query_params, ArrowVTab};
464485
use crate::{Connection, Result};
465486
use arrow::{
466-
array::{ArrayData, Float64Array, StructArray},
467-
ffi::{FFI_ArrowArray, FFI_ArrowSchema},
487+
array::{Float64Array, Int32Array},
488+
datatypes::{DataType, Field, Schema},
468489
record_batch::RecordBatch,
469490
};
470-
use std::error::Error;
491+
use std::{error::Error, sync::Arc};
471492

472493
#[test]
473494
fn test_vtab_arrow() -> Result<(), Box<dyn Error>> {
@@ -478,12 +499,7 @@ mod test {
478499
.prepare("SELECT * FROM read_parquet('./examples/int32_decimal.parquet');")?
479500
.query_arrow([])?
480501
.collect();
481-
let data = ArrayData::from(StructArray::from(rbs.into_iter().next().unwrap()));
482-
let array = FFI_ArrowArray::new(&data);
483-
let schema = FFI_ArrowSchema::try_from(data.data_type()).expect("Failed to convert schema");
484-
let param = [&array as *const _ as usize, &schema as *const _ as usize];
485-
std::mem::forget(array);
486-
std::mem::forget(schema);
502+
let param = arrow_recordbatch_to_query_params(rbs.into_iter().next().unwrap());
487503
let mut stmt = db.prepare("select sum(value) from arrow(?, ?)")?;
488504
let mut arr = stmt.query_arrow(param)?;
489505
let rb = arr.next().expect("no record batch");
@@ -493,4 +509,25 @@ mod test {
493509
assert_eq!(column.value(0), 300.0);
494510
Ok(())
495511
}
512+
513+
#[test]
514+
fn test_vtab_arrow_rust_array() -> Result<(), Box<dyn Error>> {
515+
let db = Connection::open_in_memory()?;
516+
db.register_table_function::<ArrowVTab>("arrow")?;
517+
518+
// This is a show case that it's easy for you to build an in-memory data
519+
// and pass into duckdb
520+
let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
521+
let array = Int32Array::from(vec![1, 2, 3, 4, 5]);
522+
let rb = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(array)]).expect("failed to create record batch");
523+
let param = arrow_recordbatch_to_query_params(rb);
524+
let mut stmt = db.prepare("select sum(a)::int32 from arrow(?, ?)")?;
525+
let mut arr = stmt.query_arrow(param)?;
526+
let rb = arr.next().expect("no record batch");
527+
assert_eq!(rb.num_columns(), 1);
528+
let column = rb.column(0).as_any().downcast_ref::<Int32Array>().unwrap();
529+
assert_eq!(column.len(), 1);
530+
assert_eq!(column.value(0), 15);
531+
Ok(())
532+
}
496533
}

src/vtab/mod.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,10 @@ mod vector;
1212
#[cfg(feature = "vtab-arrow")]
1313
mod arrow;
1414
#[cfg(feature = "vtab-arrow")]
15-
pub use self::arrow::arrow_ffi_to_query_params;
16-
pub use self::arrow::record_batch_to_duckdb_data_chunk;
15+
pub use self::arrow::{
16+
arrow_arraydata_to_query_params, arrow_ffi_to_query_params, arrow_recordbatch_to_query_params,
17+
record_batch_to_duckdb_data_chunk,
18+
};
1719
#[cfg(feature = "vtab-excel")]
1820
mod excel;
1921

0 commit comments

Comments
 (0)