19
19
20
20
use arrow:: array:: {
21
21
builder:: { Int64Builder , StringBuilder } ,
22
- Float32Array , Float64Array , RecordBatch , StringArray , UInt64Array ,
22
+ ArrayRef , Float32Array , Float64Array , RecordBatch , StringArray , StringViewBuilder ,
23
+ UInt64Array ,
23
24
} ;
24
25
use arrow:: datatypes:: { DataType , Field , Schema , SchemaRef } ;
25
26
use datafusion:: datasource:: MemTable ;
@@ -158,13 +159,39 @@ pub fn create_record_batches(
158
159
. collect :: < Vec < _ > > ( )
159
160
}
160
161
162
+ /// An enum that wraps either a regular StringBuilder or a GenericByteViewBuilder
163
+ /// so that both can be used interchangeably.
164
+ enum TraceIdBuilder {
165
+ Utf8 ( StringBuilder ) ,
166
+ Utf8View ( StringViewBuilder ) ,
167
+ }
168
+
169
+ impl TraceIdBuilder {
170
+ /// Append a value to the builder.
171
+ fn append_value ( & mut self , value : & str ) {
172
+ match self {
173
+ TraceIdBuilder :: Utf8 ( builder) => builder. append_value ( value) ,
174
+ TraceIdBuilder :: Utf8View ( builder) => builder. append_value ( value) ,
175
+ }
176
+ }
177
+
178
+ /// Finish building and return the ArrayRef.
179
+ fn finish ( self ) -> ArrayRef {
180
+ match self {
181
+ TraceIdBuilder :: Utf8 ( mut builder) => Arc :: new ( builder. finish ( ) ) ,
182
+ TraceIdBuilder :: Utf8View ( mut builder) => Arc :: new ( builder. finish ( ) ) ,
183
+ }
184
+ }
185
+ }
186
+
161
187
/// Create time series data with `partition_cnt` partitions and `sample_cnt` rows per partition
162
188
/// in ascending order, if `asc` is true, otherwise randomly sampled using a Pareto distribution
163
189
#[ allow( dead_code) ]
164
190
pub ( crate ) fn make_data (
165
191
partition_cnt : i32 ,
166
192
sample_cnt : i32 ,
167
193
asc : bool ,
194
+ use_view : bool ,
168
195
) -> Result < ( Arc < Schema > , Vec < Vec < RecordBatch > > ) , DataFusionError > {
169
196
// constants observed from trace data
170
197
let simultaneous_group_cnt = 2000 ;
@@ -177,11 +204,17 @@ pub(crate) fn make_data(
177
204
let mut rng = rand:: rngs:: SmallRng :: from_seed ( [ 0 ; 32 ] ) ;
178
205
179
206
// populate data
180
- let schema = test_schema ( ) ;
207
+ let schema = test_schema ( use_view ) ;
181
208
let mut partitions = vec ! [ ] ;
182
209
let mut cur_time = 16909000000000i64 ;
183
210
for _ in 0 ..partition_cnt {
184
- let mut id_builder = StringBuilder :: new ( ) ;
211
+ // Choose the appropriate builder based on use_view.
212
+ let mut id_builder = if use_view {
213
+ TraceIdBuilder :: Utf8View ( StringViewBuilder :: new ( ) )
214
+ } else {
215
+ TraceIdBuilder :: Utf8 ( StringBuilder :: new ( ) )
216
+ } ;
217
+
185
218
let mut ts_builder = Int64Builder :: new ( ) ;
186
219
let gen_id = |rng : & mut rand:: rngs:: SmallRng | {
187
220
rng. gen :: < [ u8 ; 16 ] > ( )
@@ -230,10 +263,19 @@ pub(crate) fn make_data(
230
263
Ok ( ( schema, partitions) )
231
264
}
232
265
233
- /// The Schema used by make_data
234
- fn test_schema ( ) -> SchemaRef {
235
- Arc :: new ( Schema :: new ( vec ! [
236
- Field :: new( "trace_id" , DataType :: Utf8 , false ) ,
237
- Field :: new( "timestamp_ms" , DataType :: Int64 , false ) ,
238
- ] ) )
266
+ /// Returns a Schema based on the use_view flag
267
+ fn test_schema ( use_view : bool ) -> SchemaRef {
268
+ if use_view {
269
+ // Return Utf8View schema
270
+ Arc :: new ( Schema :: new ( vec ! [
271
+ Field :: new( "trace_id" , DataType :: Utf8View , false ) ,
272
+ Field :: new( "timestamp_ms" , DataType :: Int64 , false ) ,
273
+ ] ) )
274
+ } else {
275
+ // Return regular Utf8 schema
276
+ Arc :: new ( Schema :: new ( vec ! [
277
+ Field :: new( "trace_id" , DataType :: Utf8 , false ) ,
278
+ Field :: new( "timestamp_ms" , DataType :: Int64 , false ) ,
279
+ ] ) )
280
+ }
239
281
}
0 commit comments