@@ -729,7 +729,8 @@ function SHTnsKit.dist_synthesis(cfg::SHTnsKit.SHTConfig, Alm::AbstractMatrix; p
729729end
730730
731731function SHTnsKit. dist_synthesis(cfg:: SHTnsKit.SHTConfig , Alm:: PencilArray ; prototype_θφ:: PencilArray , real_output:: Bool = true , use_rfft:: Bool = false )
732- return SHTnsKit. dist_synthesis(cfg, Array(Alm); prototype_θφ, real_output, use_rfft)
732+ Alm_dense = SHTnsKit. spectral_pencil_to_matrix(cfg, Alm)
733+ return SHTnsKit. dist_synthesis(cfg, Alm_dense; prototype_θφ, real_output, use_rfft)
733734end
734735
735736function SHTnsKit. dist_synthesis!(plan:: DistPlan , fθφ_out:: PencilArray , Alm:: PencilArray ; real_output:: Bool = true )
@@ -1593,34 +1594,18 @@ function dist_analysis_distributed(cfg::SHTnsKit.SHTConfig, fθφ::PencilArray;
15931594 end
15941595 end
15951596
1596- # Create output distributed array
1597- result = create_distributed_spectral_array(plan, ComplexF64)
1598-
1599- # Pack all coefficients in l-major order grouped by owner rank, then Allreduce
1600- # and extract the local portion for this rank
1601- total_nlm = sum(plan. recv_counts)
1602- local_contribs_packed = Vector{ComplexF64}(undef, total_nlm)
1603-
1604- # Pack in l-major order, grouped by owner rank
1605- # recv_counts[r+1] = count for rank r, where rank r owns l values where l % nprocs == r
1606- idx = 0
1607- for owner_rank in 0 : (plan. nprocs - 1 )
1608- for l in 0 : lmax
1609- if l % plan. nprocs == owner_rank
1610- for m in 0 : min(l, mmax)
1611- idx += 1
1612- local_contribs_packed[idx] = local_contrib[l+ 1 , m+ 1 ]
1613- end
1614- end
1615- end
1597+ # Only reduce if θ is distributed across ranks (if all ranks have all latitudes,
1598+ # each rank's local_contrib is already the complete answer)
1599+ θ_is_distributed = (nθ_local < cfg. nlat)
1600+ if θ_is_distributed
1601+ MPI. Allreduce!(MPI. IN_PLACE, local_contrib, + , comm)
16161602 end
16171603
1618- # Allreduce the packed buffer, then extract the local portion for this rank
1619- full_reduced = similar(local_contribs_packed)
1620- MPI. Allreduce!(local_contribs_packed, full_reduced, + , comm)
1621- offset = plan. recv_displs[plan. rank + 1 ]
1622- count = plan. recv_counts[plan. rank + 1 ]
1623- copyto!(result. local_coeffs, 1 , full_reduced, offset + 1 , count)
1604+ # Create output distributed array and extract local portion
1605+ result = create_distributed_spectral_array(plan, ComplexF64)
1606+ for (i, (l, m)) in enumerate(plan. local_lm_indices)
1607+ result. local_coeffs[i] = local_contrib[l+ 1 , m+ 1 ]
1608+ end
16241609
16251610 return result
16261611end
0 commit comments