@@ -795,6 +795,14 @@ fn parse(
795
795
} )
796
796
. collect :: < StringArray > ( ) ,
797
797
) as ArrayRef ) ,
798
+ DataType :: Utf8View => Ok ( Arc :: new (
799
+ rows. iter ( )
800
+ . map ( |row| {
801
+ let s = row. get ( i) ;
802
+ ( !null_regex. is_null ( s) ) . then_some ( s)
803
+ } )
804
+ . collect :: < StringViewArray > ( ) ,
805
+ ) as ArrayRef ) ,
798
806
DataType :: Dictionary ( key_type, value_type)
799
807
if value_type. as_ref ( ) == & DataType :: Utf8 =>
800
808
{
@@ -2380,17 +2388,27 @@ mod tests {
2380
2388
}
2381
2389
2382
2390
fn err_test ( csv : & [ u8 ] , expected : & str ) {
2383
- let schema = Arc :: new ( Schema :: new ( vec ! [
2391
+ fn err_test_with_schema ( csv : & [ u8 ] , expected : & str , schema : Arc < Schema > ) {
2392
+ let buffer = std:: io:: BufReader :: with_capacity ( 2 , Cursor :: new ( csv) ) ;
2393
+ let b = ReaderBuilder :: new ( schema)
2394
+ . with_batch_size ( 2 )
2395
+ . build_buffered ( buffer)
2396
+ . unwrap ( ) ;
2397
+ let err = b. collect :: < Result < Vec < _ > , _ > > ( ) . unwrap_err ( ) . to_string ( ) ;
2398
+ assert_eq ! ( err, expected)
2399
+ }
2400
+
2401
+ let schema_utf8 = Arc :: new ( Schema :: new ( vec ! [
2384
2402
Field :: new( "text1" , DataType :: Utf8 , true ) ,
2385
2403
Field :: new( "text2" , DataType :: Utf8 , true ) ,
2386
2404
] ) ) ;
2387
- let buffer = std :: io :: BufReader :: with_capacity ( 2 , Cursor :: new ( csv ) ) ;
2388
- let b = ReaderBuilder :: new ( schema )
2389
- . with_batch_size ( 2 )
2390
- . build_buffered ( buffer )
2391
- . unwrap ( ) ;
2392
- let err = b . collect :: < Result < Vec < _ > , _ > > ( ) . unwrap_err ( ) . to_string ( ) ;
2393
- assert_eq ! ( err , expected)
2405
+ err_test_with_schema ( csv , expected , schema_utf8 ) ;
2406
+
2407
+ let schema_utf8view = Arc :: new ( Schema :: new ( vec ! [
2408
+ Field :: new ( "text1" , DataType :: Utf8View , true ) ,
2409
+ Field :: new ( "text2" , DataType :: Utf8View , true ) ,
2410
+ ] ) ) ;
2411
+ err_test_with_schema ( csv , expected, schema_utf8view ) ;
2394
2412
}
2395
2413
2396
2414
#[ test]
@@ -2587,4 +2605,64 @@ mod tests {
2587
2605
& vec![ 2 , 22 ]
2588
2606
) ;
2589
2607
}
2608
+
2609
+ #[ test]
2610
+ fn test_parse_string_view_single_column ( ) {
2611
+ let csv = [ "foo" , "something_cannot_be_inlined" , "foobar" ] . join ( "\n " ) ;
2612
+ let schema = Arc :: new ( Schema :: new ( vec ! [ Field :: new(
2613
+ "c1" ,
2614
+ DataType :: Utf8View ,
2615
+ true ,
2616
+ ) ] ) ) ;
2617
+
2618
+ let mut decoder = ReaderBuilder :: new ( schema) . build_decoder ( ) ;
2619
+
2620
+ let decoded = decoder. decode ( csv. as_bytes ( ) ) . unwrap ( ) ;
2621
+ assert_eq ! ( decoded, csv. len( ) ) ;
2622
+ decoder. decode ( & [ ] ) . unwrap ( ) ;
2623
+
2624
+ let batch = decoder. flush ( ) . unwrap ( ) . unwrap ( ) ;
2625
+ assert_eq ! ( batch. num_columns( ) , 1 ) ;
2626
+ assert_eq ! ( batch. num_rows( ) , 3 ) ;
2627
+ let col = batch. column ( 0 ) . as_string_view ( ) ;
2628
+ assert_eq ! ( col. data_type( ) , & DataType :: Utf8View ) ;
2629
+ assert_eq ! ( col. value( 0 ) , "foo" ) ;
2630
+ assert_eq ! ( col. value( 1 ) , "something_cannot_be_inlined" ) ;
2631
+ assert_eq ! ( col. value( 2 ) , "foobar" ) ;
2632
+ }
2633
+
2634
+ #[ test]
2635
+ fn test_parse_string_view_multi_column ( ) {
2636
+ let csv = [ "foo," , ",something_cannot_be_inlined" , "foobarfoobar,bar" ] . join ( "\n " ) ;
2637
+ let schema = Arc :: new ( Schema :: new ( vec ! [
2638
+ Field :: new( "c1" , DataType :: Utf8View , true ) ,
2639
+ Field :: new( "c2" , DataType :: Utf8View , true ) ,
2640
+ ] ) ) ;
2641
+
2642
+ let mut decoder = ReaderBuilder :: new ( schema) . build_decoder ( ) ;
2643
+
2644
+ let decoded = decoder. decode ( csv. as_bytes ( ) ) . unwrap ( ) ;
2645
+ assert_eq ! ( decoded, csv. len( ) ) ;
2646
+ decoder. decode ( & [ ] ) . unwrap ( ) ;
2647
+
2648
+ let batch = decoder. flush ( ) . unwrap ( ) . unwrap ( ) ;
2649
+ assert_eq ! ( batch. num_columns( ) , 2 ) ;
2650
+ assert_eq ! ( batch. num_rows( ) , 3 ) ;
2651
+ let c1 = batch. column ( 0 ) . as_string_view ( ) ;
2652
+ let c2 = batch. column ( 1 ) . as_string_view ( ) ;
2653
+ assert_eq ! ( c1. data_type( ) , & DataType :: Utf8View ) ;
2654
+ assert_eq ! ( c2. data_type( ) , & DataType :: Utf8View ) ;
2655
+
2656
+ assert ! ( !c1. is_null( 0 ) ) ;
2657
+ assert ! ( c1. is_null( 1 ) ) ;
2658
+ assert ! ( !c1. is_null( 2 ) ) ;
2659
+ assert_eq ! ( c1. value( 0 ) , "foo" ) ;
2660
+ assert_eq ! ( c1. value( 2 ) , "foobarfoobar" ) ;
2661
+
2662
+ assert ! ( c2. is_null( 0 ) ) ;
2663
+ assert ! ( !c2. is_null( 1 ) ) ;
2664
+ assert ! ( !c2. is_null( 2 ) ) ;
2665
+ assert_eq ! ( c2. value( 1 ) , "something_cannot_be_inlined" ) ;
2666
+ assert_eq ! ( c2. value( 2 ) , "bar" ) ;
2667
+ }
2590
2668
}
0 commit comments