@@ -10,7 +10,7 @@ use itertools::Itertools;
1010use log:: info;
1111use roaring:: RoaringBitmap ;
1212use serde_json:: { Map , Value } ;
13- use smallvec :: SmallVec ;
13+ use vec_utils :: VecExt ;
1414
1515use super :: helpers:: {
1616 create_sorter, create_writer, keep_latest_obkv, merge_obkvs, merge_two_obkvs, MergeFn ,
@@ -128,28 +128,29 @@ impl Transform<'_, '_> {
128128
129129 let mut obkv_buffer = Vec :: new ( ) ;
130130 let mut documents_count = 0 ;
131+ let mut external_id_buffer = Vec :: new ( ) ;
132+ let mut field_buffer: Vec < ( u16 , & [ u8 ] ) > = Vec :: new ( ) ;
131133 while let Some ( ( addition_index, document) ) = reader. next_document_with_index ( ) ? {
134+ let mut field_buffer_cache = field_buffer. drop_and_reuse ( ) ;
132135 if self . log_every_n . map_or ( false , |len| documents_count % len == 0 ) {
133136 progress_callback ( UpdateIndexingStep :: RemapDocumentAddition {
134137 documents_seen : documents_count,
135138 } ) ;
136139 }
137140
138- let mut external_id_buffer = SmallVec :: < [ u8 ; 512 ] > :: new ( ) ;
139- let mut field_buffer = SmallVec :: < [ ( u16 , & [ u8 ] ) ; 128 ] > :: new ( ) ;
140- let mut uuid_buffer = [ 0 ; uuid:: adapter:: Hyphenated :: LENGTH ] ;
141141
142142 for ( k, v) in document. iter ( ) {
143143 let mapped_id = * mapping. get ( & k) . unwrap ( ) ;
144- field_buffer . push ( ( mapped_id, v) ) ;
144+ field_buffer_cache . push ( ( mapped_id, v) ) ;
145145 }
146146
147147 // We need to make sure that every document has a primary key. After we have remapped
148148 // all the fields in the document, we try to find the primary key value. If we can find
149149 // it, transform it into a string and validate it, and then update it in the
150150 // document. If none is found, and we were told to generate missing document ids, then
151151 // we create the missing field, and update the new document.
152- let external_id = match field_buffer. iter_mut ( ) . find ( |( id, _) | * id == primary_key_id) {
152+ let mut uuid_buffer = [ 0 ; uuid:: adapter:: Hyphenated :: LENGTH ] ;
153+ let external_id = match field_buffer_cache. iter_mut ( ) . find ( |( id, _) | * id == primary_key_id) {
153154 Some ( ( _, bytes) ) => {
154155 let value = match serde_json:: from_slice ( bytes) . unwrap ( ) {
155156 Value :: String ( string) => match validate_document_id ( & string) {
@@ -191,19 +192,19 @@ impl Transform<'_, '_> {
191192
192193 let uuid = uuid:: Uuid :: new_v4 ( ) . to_hyphenated ( ) . encode_lower ( & mut uuid_buffer) ;
193194 serde_json:: to_writer ( & mut external_id_buffer, & uuid) . unwrap ( ) ;
194- field_buffer . push ( ( primary_key_id, & external_id_buffer) ) ;
195+ field_buffer_cache . push ( ( primary_key_id, & external_id_buffer) ) ;
195196 Cow :: Borrowed ( & * uuid)
196197 }
197198 } ;
198199
199200 // Insertion in a obkv need to be done with keys ordered. For now they are ordered
200201 // according to the document addition key order, so we sort it according to the
201202 // fieldids map keys order.
202- field_buffer . sort_unstable_by ( |( f1, _) , ( f2, _) | f1. cmp ( & f2) ) ;
203+ field_buffer_cache . sort_unstable_by ( |( f1, _) , ( f2, _) | f1. cmp ( & f2) ) ;
203204
204205 // The last step is to build the new obkv document, and insert it in the sorter.
205206 let mut writer = obkv:: KvWriter :: new ( & mut obkv_buffer) ;
206- for ( k, v) in field_buffer . iter ( ) {
207+ for ( k, v) in field_buffer_cache . iter ( ) {
207208 writer. insert ( * k, v) ?;
208209 }
209210
@@ -216,6 +217,8 @@ impl Transform<'_, '_> {
216217 } ) ;
217218
218219 obkv_buffer. clear ( ) ;
220+ field_buffer = field_buffer_cache. drop_and_reuse ( ) ;
221+ external_id_buffer. clear ( ) ;
219222 }
220223
221224 progress_callback ( UpdateIndexingStep :: RemapDocumentAddition {
0 commit comments