@@ -290,7 +290,15 @@ template<typename T>
290
290
class test_allreduce_alg : public test_allreduce <T>
291
291
{};
292
292
293
- using test_allreduce_alg_type = ::testing::Types<TypeOpPair<UCC_DT_INT32, sum>>;
293
+ // Expanded type list for allreduce algorithm tests to cover more data types and operations
294
+ using test_allreduce_alg_type = ::testing::Types<
295
+ TypeOpPair<UCC_DT_INT32, sum>,
296
+ TypeOpPair<UCC_DT_FLOAT32, sum>,
297
+ TypeOpPair<UCC_DT_INT32, prod>,
298
+ TypeOpPair<UCC_DT_INT32, max>,
299
+ TypeOpPair<UCC_DT_INT32, min>,
300
+ TypeOpPair<UCC_DT_FLOAT64, sum>
301
+ >;
294
302
TYPED_TEST_CASE (test_allreduce_alg, test_allreduce_alg_type);
295
303
296
304
TYPED_TEST (test_allreduce_alg, sra_knomial_pipelined) {
@@ -437,6 +445,92 @@ TYPED_TEST(test_allreduce_alg, rab_pipelined) {
437
445
}
438
446
}
439
447
448
+ TYPED_TEST (test_allreduce_alg, ring) {
449
+ int n_procs = 15 ;
450
+ ucc_job_env_t env = {{" UCC_CL_BASIC_TUNE" , " inf" },
451
+ {" UCC_TL_UCP_TUNE" , " allreduce:0-inf:@ring" }};
452
+ UccJob job (n_procs, UccJob::UCC_JOB_CTX_GLOBAL, env);
453
+ UccTeam_h team = job.create_team (n_procs);
454
+ int repeat = 3 ;
455
+ UccCollCtxVec ctxs;
456
+ std::vector<ucc_memory_type_t > mt = {UCC_MEMORY_TYPE_HOST};
457
+
458
+ if (UCC_OK == ucc_mc_available (UCC_MEMORY_TYPE_CUDA)) {
459
+ mt.push_back (UCC_MEMORY_TYPE_CUDA);
460
+ }
461
+ if (UCC_OK == ucc_mc_available (UCC_MEMORY_TYPE_CUDA_MANAGED)) {
462
+ mt.push_back (UCC_MEMORY_TYPE_CUDA_MANAGED);
463
+ }
464
+
465
+ // Test with various data sizes: small, medium, large
466
+ for (auto count : {8 , 65536 , 123567 }) {
467
+ for (auto inplace : {TEST_NO_INPLACE, TEST_INPLACE}) {
468
+ for (auto m : mt) {
469
+ SET_MEM_TYPE (m);
470
+ this ->set_inplace (inplace);
471
+ this ->data_init (n_procs, TypeParam::dt, count, ctxs, true );
472
+ UccReq req (team, ctxs);
473
+
474
+ for (auto i = 0 ; i < repeat; i++) {
475
+ req.start ();
476
+ req.wait ();
477
+ EXPECT_EQ (true , this ->data_validate (ctxs));
478
+ this ->reset (ctxs);
479
+ }
480
+ this ->data_fini (ctxs);
481
+ }
482
+ }
483
+ }
484
+ }
485
+
486
+ TYPED_TEST (test_allreduce_alg, ring_edge_cases) {
487
+ // Test with non-power-of-two team sizes and edge cases
488
+ for (auto team_size : {3 , 7 , 13 }) {
489
+ ucc_job_env_t env = {{" UCC_CL_BASIC_TUNE" , " inf" },
490
+ {" UCC_TL_UCP_TUNE" , " allreduce:0-inf:@ring" }};
491
+ UccJob job (team_size, UccJob::UCC_JOB_CTX_GLOBAL, env);
492
+ UccTeam_h team = job.create_team (team_size);
493
+ UccCollCtxVec ctxs;
494
+
495
+ for (auto count : {0 , 1 , 3 , 17 }) {
496
+ SET_MEM_TYPE (UCC_MEMORY_TYPE_HOST);
497
+ this ->set_inplace (TEST_NO_INPLACE);
498
+ this ->data_init (team_size, TypeParam::dt, count, ctxs, false );
499
+ UccReq req (team, ctxs);
500
+
501
+ req.start ();
502
+ req.wait ();
503
+ EXPECT_EQ (true , this ->data_validate (ctxs));
504
+ this ->data_fini (ctxs);
505
+ }
506
+ }
507
+ }
508
+
509
+ TYPED_TEST (test_allreduce_alg, ring_persistent) {
510
+ // Test persistent operation - results should be consistent across multiple calls
511
+ int n_procs = 8 ;
512
+ ucc_job_env_t env = {{" UCC_CL_BASIC_TUNE" , " inf" },
513
+ {" UCC_TL_UCP_TUNE" , " allreduce:0-inf:@ring" }};
514
+ UccJob job (n_procs, UccJob::UCC_JOB_CTX_GLOBAL, env);
515
+ UccTeam_h team = job.create_team (n_procs);
516
+ UccCollCtxVec ctxs;
517
+
518
+ SET_MEM_TYPE (UCC_MEMORY_TYPE_HOST);
519
+ this ->set_inplace (TEST_NO_INPLACE);
520
+ // Use a larger buffer for persistent test
521
+ size_t count = 1024 ;
522
+ this ->data_init (n_procs, TypeParam::dt, count, ctxs, true );
523
+ UccReq req (team, ctxs);
524
+ // Run multiple iterations to verify persistence
525
+ for (int i = 0 ; i < 5 ; i++) {
526
+ req.start ();
527
+ req.wait ();
528
+ EXPECT_EQ (true , this ->data_validate (ctxs));
529
+ this ->reset (ctxs);
530
+ }
531
+ this ->data_fini (ctxs);
532
+ }
533
+
440
534
#ifdef HAVE_UCX
441
535
TYPED_TEST (test_allreduce_alg, sliding_window)
442
536
{
0 commit comments