@@ -290,7 +290,15 @@ template<typename T>
290290class test_allreduce_alg : public test_allreduce <T>
291291{};
292292
293- using test_allreduce_alg_type = ::testing::Types<TypeOpPair<UCC_DT_INT32, sum>>;
293+ // Expanded type list for allreduce algorithm tests to cover more data types and operations
294+ using test_allreduce_alg_type = ::testing::Types<
295+ TypeOpPair<UCC_DT_INT32, sum>,
296+ TypeOpPair<UCC_DT_FLOAT32, sum>,
297+ TypeOpPair<UCC_DT_INT32, prod>,
298+ TypeOpPair<UCC_DT_INT32, max>,
299+ TypeOpPair<UCC_DT_INT32, min>,
300+ TypeOpPair<UCC_DT_FLOAT64, sum>
301+ >;
294302TYPED_TEST_CASE (test_allreduce_alg, test_allreduce_alg_type);
295303
296304TYPED_TEST (test_allreduce_alg, sra_knomial_pipelined) {
@@ -437,6 +445,92 @@ TYPED_TEST(test_allreduce_alg, rab_pipelined) {
437445 }
438446}
439447
448+ TYPED_TEST (test_allreduce_alg, ring) {
449+ int n_procs = 15 ;
450+ ucc_job_env_t env = {{" UCC_CL_BASIC_TUNE" , " inf" },
451+ {" UCC_TL_UCP_TUNE" , " allreduce:0-inf:@ring" }};
452+ UccJob job (n_procs, UccJob::UCC_JOB_CTX_GLOBAL, env);
453+ UccTeam_h team = job.create_team (n_procs);
454+ int repeat = 3 ;
455+ UccCollCtxVec ctxs;
456+ std::vector<ucc_memory_type_t > mt = {UCC_MEMORY_TYPE_HOST};
457+
458+ if (UCC_OK == ucc_mc_available (UCC_MEMORY_TYPE_CUDA)) {
459+ mt.push_back (UCC_MEMORY_TYPE_CUDA);
460+ }
461+ if (UCC_OK == ucc_mc_available (UCC_MEMORY_TYPE_CUDA_MANAGED)) {
462+ mt.push_back (UCC_MEMORY_TYPE_CUDA_MANAGED);
463+ }
464+
465+ // Test with various data sizes: small, medium, large
466+ for (auto count : {8 , 65536 , 123567 }) {
467+ for (auto inplace : {TEST_NO_INPLACE, TEST_INPLACE}) {
468+ for (auto m : mt) {
469+ SET_MEM_TYPE (m);
470+ this ->set_inplace (inplace);
471+ this ->data_init (n_procs, TypeParam::dt, count, ctxs, true );
472+ UccReq req (team, ctxs);
473+
474+ for (auto i = 0 ; i < repeat; i++) {
475+ req.start ();
476+ req.wait ();
477+ EXPECT_EQ (true , this ->data_validate (ctxs));
478+ this ->reset (ctxs);
479+ }
480+ this ->data_fini (ctxs);
481+ }
482+ }
483+ }
484+ }
485+
486+ TYPED_TEST (test_allreduce_alg, ring_edge_cases) {
487+ // Test with non-power-of-two team sizes and edge cases
488+ for (auto team_size : {3 , 7 , 13 }) {
489+ ucc_job_env_t env = {{" UCC_CL_BASIC_TUNE" , " inf" },
490+ {" UCC_TL_UCP_TUNE" , " allreduce:0-inf:@ring" }};
491+ UccJob job (team_size, UccJob::UCC_JOB_CTX_GLOBAL, env);
492+ UccTeam_h team = job.create_team (team_size);
493+ UccCollCtxVec ctxs;
494+
495+ for (auto count : {0 , 1 , 3 , 17 }) {
496+ SET_MEM_TYPE (UCC_MEMORY_TYPE_HOST);
497+ this ->set_inplace (TEST_NO_INPLACE);
498+ this ->data_init (team_size, TypeParam::dt, count, ctxs, false );
499+ UccReq req (team, ctxs);
500+
501+ req.start ();
502+ req.wait ();
503+ EXPECT_EQ (true , this ->data_validate (ctxs));
504+ this ->data_fini (ctxs);
505+ }
506+ }
507+ }
508+
509+ TYPED_TEST (test_allreduce_alg, ring_persistent) {
510+ // Test persistent operation - results should be consistent across multiple calls
511+ int n_procs = 8 ;
512+ ucc_job_env_t env = {{" UCC_CL_BASIC_TUNE" , " inf" },
513+ {" UCC_TL_UCP_TUNE" , " allreduce:0-inf:@ring" }};
514+ UccJob job (n_procs, UccJob::UCC_JOB_CTX_GLOBAL, env);
515+ UccTeam_h team = job.create_team (n_procs);
516+ UccCollCtxVec ctxs;
517+
518+ SET_MEM_TYPE (UCC_MEMORY_TYPE_HOST);
519+ this ->set_inplace (TEST_NO_INPLACE);
520+ // Use a larger buffer for persistent test
521+ size_t count = 1024 ;
522+ this ->data_init (n_procs, TypeParam::dt, count, ctxs, true );
523+ UccReq req (team, ctxs);
524+ // Run multiple iterations to verify persistence
525+ for (int i = 0 ; i < 5 ; i++) {
526+ req.start ();
527+ req.wait ();
528+ EXPECT_EQ (true , this ->data_validate (ctxs));
529+ this ->reset (ctxs);
530+ }
531+ this ->data_fini (ctxs);
532+ }
533+
440534#ifdef HAVE_UCX
441535TYPED_TEST (test_allreduce_alg, sliding_window)
442536{
0 commit comments