@@ -333,9 +333,9 @@ func TestScheduleOneWayConflict(t *testing.T) {
333
333
334
334
}
335
335
336
- // TestScheduleReplace starts 1 unit , followed by starting another unit
337
- // that replaces the 1st unit. Then it verifies that the 2 units are
338
- // started on different machines .
336
+ // TestScheduleReplace starts 3 units , followed by starting another unit
337
+ // that replaces the 1st unit. Then it verifies that the original unit
338
+ // got rescheduled on a different machine .
339
339
func TestScheduleReplace (t * testing.T ) {
340
340
cluster , err := platform .NewNspawnCluster ("smoke" )
341
341
if err != nil {
@@ -348,43 +348,30 @@ func TestScheduleReplace(t *testing.T) {
348
348
t .Fatal (err )
349
349
}
350
350
m0 := members [0 ]
351
+ m1 := members [1 ]
351
352
if _ , err := cluster .WaitForNMachines (m0 , 2 ); err != nil {
352
353
t .Fatal (err )
353
354
}
354
355
355
- // Start a unit without Replaces
356
+ // Start 3 units without Replaces, replace.0.service on m0, while both 1 and 2 on m1.
357
+ // That's possible as replace.2.service has an option "MachineOf=replace.1.service".
356
358
uNames := []string {
357
359
"fixtures/units/replace.0.service" ,
358
360
"fixtures/units/replace.1.service" ,
361
+ "fixtures/units/replace.2.service" ,
362
+ "fixtures/units/replace-kick0.service" ,
359
363
}
360
364
if stdout , stderr , err := cluster .Fleetctl (m0 , "start" , "--no-block" , uNames [0 ]); err != nil {
361
365
t .Fatalf ("Failed starting unit %s: \n stdout: %s\n stderr: %s\n err: %v" , uNames [0 ], stdout , stderr , err )
362
366
}
363
-
364
- active , err := cluster .WaitForNActiveUnits (m0 , 1 )
365
- if err != nil {
366
- t .Fatal (err )
367
- }
368
- _ , err = util .ActiveToSingleStates (active )
369
- if err != nil {
370
- t .Fatal (err )
371
- }
372
-
373
- // Start a unit that replaces the former one, replace.0.service
374
- if stdout , stderr , err := cluster .Fleetctl (m0 , "start" , "--no-block" , uNames [1 ]); err != nil {
367
+ if stdout , stderr , err := cluster .Fleetctl (m1 , "start" , "--no-block" , uNames [1 ]); err != nil {
375
368
t .Fatalf ("Failed starting unit %s: \n stdout: %s\n stderr: %s\n err: %v" , uNames [1 ], stdout , stderr , err )
376
369
}
377
-
378
- // Check that both units should show up
379
- stdout , stderr , err := cluster .Fleetctl (m0 , "list-unit-files" , "--no-legend" )
380
- if err != nil {
381
- t .Fatalf ("Failed to run list-unit-files:\n stdout: %s\n stderr: %s\n err: %v" , stdout , stderr , err )
382
- }
383
- units := strings .Split (strings .TrimSpace (stdout ), "\n " )
384
- if len (units ) != 2 {
385
- t .Fatalf ("Did not find two units in cluster: \n %s" , stdout )
370
+ if stdout , stderr , err := cluster .Fleetctl (m1 , "start" , "--no-block" , uNames [2 ]); err != nil {
371
+ t .Fatalf ("Failed starting unit %s: \n stdout: %s\n stderr: %s\n err: %v" , uNames [2 ], stdout , stderr , err )
386
372
}
387
- active , err = cluster .WaitForNActiveUnits (m0 , 2 )
373
+
374
+ active , err := cluster .WaitForNActiveUnits (m0 , 3 )
388
375
if err != nil {
389
376
t .Fatal (err )
390
377
}
@@ -393,16 +380,66 @@ func TestScheduleReplace(t *testing.T) {
393
380
t .Fatal (err )
394
381
}
395
382
396
- // Check that the unit 1 is located on a different machine from that of unit 0
397
- nUnits := 2
398
- uNameBase := make ([] string , nUnits )
399
- machs := make ([] string , nUnits )
400
- for i , uName := range uNames {
401
- uNameBase [ i ] = path . Base ( uName )
402
- machs [ i ] = states [ uNameBase [ i ]]. Machine
383
+ oldMach := states [ path . Base ( uNames [ 0 ])]. Machine
384
+
385
+ // Start a unit replace-kick0.service that replaces replace.0.service
386
+ // Then the kick0 unit will be scheduled to m0, as m0 is least loaded than m1.
387
+ // So it's possible to trigger a situation where kick0 could kick the original unit 0.
388
+ if stdout , stderr , err := cluster . Fleetctl ( m0 , "start" , "--no-block" , uNames [ 3 ]); err != nil {
389
+ t . Fatalf ( "Failed starting unit %s: \n stdout: %s \n stderr: %s \n err: %v" , uNames [ 3 ], stdout , stderr , err )
403
390
}
404
- if machs [0 ] == machs [1 ] {
405
- t .Fatalf ("machine for %s is %s, the same as that of %s." , uNameBase [0 ], machs [0 ], uNameBase [1 ])
391
+
392
+ // Here we need to wait up to 15 seconds, to avoid races, because the unit state
393
+ // publisher could otherwise report unit states with old machine IDs to registry.
394
+ checkReplacedMachines := func () bool {
395
+ // Check that 4 units show up
396
+ nUnits := 4
397
+ stdout , stderr , err := cluster .Fleetctl (m0 , "list-unit-files" , "--no-legend" )
398
+ if err != nil {
399
+ t .Logf ("Failed to run list-unit-files:\n stdout: %s\n stderr: %s\n err: %v" , stdout , stderr , err )
400
+ return false
401
+ }
402
+ units := strings .Split (strings .TrimSpace (stdout ), "\n " )
403
+ if len (units ) != nUnits {
404
+ t .Logf ("Did not find two units in cluster: \n %s" , stdout )
405
+ return false
406
+ }
407
+ active , err = cluster .WaitForNActiveUnits (m0 , nUnits )
408
+ if err != nil {
409
+ t .Log (err )
410
+ return false
411
+ }
412
+ states , err = util .ActiveToSingleStates (active )
413
+ if err != nil {
414
+ t .Log (err )
415
+ return false
416
+ }
417
+
418
+ // Check that replace.0.service is located on a different machine from
419
+ // that of replace-kick0.service.
420
+ uNameBase := make ([]string , nUnits )
421
+ machs := make ([]string , nUnits )
422
+ for i , uName := range uNames {
423
+ uNameBase [i ] = path .Base (uName )
424
+ machs [i ] = states [uNameBase [i ]].Machine
425
+ }
426
+ if machs [0 ] == machs [3 ] {
427
+ t .Logf ("machine for %s is %s, the same as that of %s." , uNameBase [0 ], machs [0 ], uNameBase [3 ])
428
+ return false
429
+ }
430
+ if machs [3 ] != oldMach {
431
+ t .Logf ("machine for %s is %s, different from old machine %s." , uNameBase [3 ], machs [3 ], oldMach )
432
+ return false
433
+ }
434
+ if machs [0 ] == oldMach {
435
+ t .Logf ("machine for %s is %s, the same as that of %s." , uNameBase [0 ], machs [0 ], oldMach )
436
+ return false
437
+ }
438
+
439
+ return true
440
+ }
441
+ if timeout , err := util .WaitForState (checkReplacedMachines ); err != nil {
442
+ t .Fatalf ("Cannot verify replaced units within %v\n err: %v" , timeout , err )
406
443
}
407
444
}
408
445
@@ -429,7 +466,7 @@ func TestScheduleCircularReplace(t *testing.T) {
429
466
// it under /tmp. Also store the original service 1 that replace 0.
430
467
uNames := []string {
431
468
"fixtures/units/replace.0.service" ,
432
- "fixtures/units/replace.1 .service" ,
469
+ "fixtures/units/replace-kick0 .service" ,
433
470
}
434
471
nUnits := 2
435
472
nActiveUnits := 1
@@ -439,12 +476,12 @@ func TestScheduleCircularReplace(t *testing.T) {
439
476
}
440
477
uName0tmp := path .Join ("/tmp" , uNameBase [0 ])
441
478
err = util .GenNewFleetService (uName0tmp , uNames [1 ],
442
- "Replaces=replace.1 .service" , "Replaces=replace.0.service" )
479
+ "Replaces=replace-kick0 .service" , "Replaces=replace.0.service" )
443
480
if err != nil {
444
481
t .Fatalf ("Failed to generate a temp fleet service: %v" , err )
445
482
}
446
483
447
- // Start replace.0 unit that replaces replace.1 .service,
484
+ // Start replace.0 unit that replaces replace-kick0 .service,
448
485
// then fleetctl list-unit-files should show only return 1 launched unit.
449
486
stdout , stderr , err := cluster .Fleetctl (m0 , "start" , "--no-block" , uName0tmp )
450
487
if err != nil {
@@ -469,7 +506,7 @@ func TestScheduleCircularReplace(t *testing.T) {
469
506
t .Fatalf ("Failed to run list-unit-files: %v" , err )
470
507
}
471
508
472
- // Start replace.1 unit that replaces replace.0.service,
509
+ // Start replace-kick0 unit that replaces replace.0.service,
473
510
// and then check that only 1 unit is active
474
511
if stdout , stderr , err := cluster .Fleetctl (m0 , "start" , "--no-block" , uNames [1 ]); err != nil {
475
512
t .Fatalf ("Failed starting unit %s: \n stdout: %s\n stderr: %s\n err: %v" , uNames [1 ], stdout , stderr , err )
0 commit comments