@@ -20,13 +20,23 @@ import (
2020 "go.mongodb.org/mongo-driver/v2/mongo"
2121 "go.mongodb.org/mongo-driver/v2/mongo/options"
2222 "go.mongodb.org/mongo-driver/v2/x/bsonx/bsoncore"
23+ "golang.org/x/exp/slices"
2324)
2425
2526const (
2627 recheckBatchByteLimit = 1024 * 1024
2728 recheckBatchCountLimit = 1000
2829
2930 recheckQueueCollectionNameBase = "recheckQueue"
31+
32+ maxTasksPerRequest = 500
33+ maxInsertSize = 256 << 10
34+
35+ // TODO: Try something beneath 256 KiB instead.
36+ maxRecheckIDsBytes = 1024 * 1024 // 1 MiB
37+
38+ // The max # of docs that we want each recheck task’s cursor to return.
39+ maxRecheckIDs = 10_000
3040)
3141
3242// InsertFailedCompareRecheckDocs is for inserting RecheckDocs based on failures during Check.
@@ -298,15 +308,20 @@ func (verifier *Verifier) GenerateRecheckTasks(ctx context.Context) error {
298308 }
299309 defer cursor .Close (ctx )
300310
311+ var curTasks []bson.Raw
312+ var curTasksBytes int
313+
314+ eg , egCtx := contextplus .ErrGroup (ctx )
315+
316+ var totalTasks , totalInserts int
301317 persistBufferedRechecks := func () error {
302318 if len (idAccum ) == 0 {
303319 return nil
304320 }
305321
306322 namespace := prevDBName + "." + prevCollName
307323
308- task , err := verifier .InsertDocumentRecheckTask (
309- ctx ,
324+ task , err := verifier .createDocumentRecheckTask (
310325 idAccum ,
311326 types .ByteCount (dataSizeAccum ),
312327 namespace ,
@@ -319,6 +334,32 @@ func (verifier *Verifier) GenerateRecheckTasks(ctx context.Context) error {
319334 namespace ,
320335 )
321336 }
337+ totalTasks ++
338+
339+ taskRaw , err := bson .Marshal (task )
340+ if err != nil {
341+ return errors .Wrapf (
342+ err ,
343+ "failed to marshal a %d-document recheck task for collection %#q" ,
344+ len (idAccum ),
345+ namespace ,
346+ )
347+ }
348+
349+ curTasks = append (curTasks , taskRaw )
350+ curTasksBytes += len (taskRaw )
351+ if len (curTasks ) == maxTasksPerRequest || curTasksBytes >= maxInsertSize {
352+ tasksClone := slices .Clone (curTasks )
353+ curTasks = curTasks [:0 ]
354+
355+ eg .Go (
356+ func () error {
357+ return verifier .insertDocumentRecheckTasks (egCtx , tasksClone )
358+ },
359+ )
360+
361+ totalInserts ++
362+ }
322363
323364 verifier .logger .Debug ().
324365 Any ("task" , task .PrimaryKey ).
@@ -393,11 +434,29 @@ func (verifier *Verifier) GenerateRecheckTasks(ctx context.Context) error {
393434 }
394435
395436 err = persistBufferedRechecks ()
437+ if err != nil {
438+ return err
439+ }
440+
441+ if len (curTasks ) > 0 {
442+ eg .Go (
443+ func () error {
444+ return verifier .insertDocumentRecheckTasks (egCtx , curTasks )
445+ },
446+ )
447+ }
448+
449+ err = eg .Wait ()
450+ if err != nil {
451+ return errors .Wrapf (err , "persisting document recheck tasks" )
452+ }
396453
397- if err == nil && totalDocs > 0 {
454+ if totalDocs > 0 {
398455 verifier .logger .Info ().
399456 Int ("generation" , generation ).
400457 Int64 ("totalDocs" , int64 (totalDocs )).
458+ Int ("tasks" , totalTasks ).
459+ Int ("insertRequests" , totalInserts ).
401460 Str ("totalData" , reportutils .FmtBytes (totalRecheckData )).
402461 Stringer ("timeElapsed" , time .Since (startTime )).
403462 Msg ("Scheduled documents for recheck in the new generation." )
0 commit comments