Skip to content

Commit ace10ef

Browse files
authored
Fix GPU signal sampling during excitaiton (KomaMRICore v0.11) (#757)
* Fix GPU reduction for partial spin groups * Fix GPU CI test environment activation * Skip excitation signal reduction without ADC * Skip GPU signal reduction without ADC * Restore Julia 1.10 GPU CI activation guard
1 parent cb6cbc1 commit ace10ef

5 files changed

Lines changed: 64 additions & 41 deletions

File tree

AGENTS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ Pkg.test() # KomaMRI umbrella package
9393
- `KomaMRICore` defaults to CPU via `KomaMRICore/test/Project.toml`. To change backend, set `[preferences.KomaMRICore].test_backend` to `CPU`, `CUDA`, `AMDGPU`, `Metal`, or `oneAPI`, or use `Pkg.test("KomaMRICore"; test_args=\`CUDA\`)`.
9494
- To control CPU thread count for `KomaMRICore`, use `Pkg.test("KomaMRICore"; julia_args=\`--threads=4\`)`.
9595
- GPU backend packages must already be available locally before running non-CPU `KomaMRICore` tests. `oneAPI` is experimental.
96+
- Do not commit backend-specific GPU packages such as `CUDA`, `AMDGPU`, `Metal`, or `oneAPI` to shared test projects like `KomaMRICore/test/Project.toml`. Add them only in the backend-specific CI/local test command or a temporary environment.
9697
- For small exploratory tests, use a temporary environment. For longer isolated tests, use a temp folder with an activated environment. Reuse the same REPL.
9798
- If you add behavior, add or update tests in the owning package's `runtests.jl`.
9899
- Use `@testitem` tags correctly:

KomaMRICore/src/simulation/SimMethods/Bloch/gpu/BlochGPU.jl

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,21 +37,24 @@ function run_spin_precession!(
3737
) where {T<:Real, SM<:BlochLikeSimMethods}
3838
#Motion
3939
x, y, z = get_spin_coords(p.motion, p.x, p.y, p.z, seq.t')
40+
has_adc = length(sig) > 0
4041

4142
#Precession
4243
precession_kernel!(backend, groupsize)(
4344
pre.sig_output,
4445
M.xy, M.z,
4546
x, y, z, pre.ΔBz, p.T1, p.T2, p.ρ, UInt32(length(M.xy)),
4647
seq.Gx, seq.Gy, seq.Gz, seq.Δt, seq.ADC, UInt32(length(seq.t)),
47-
Val(!(p.motion isa NoMotion)), Val(supports_warp_reduction(backend)),
48+
Val(!(p.motion isa NoMotion)), Val(supports_warp_reduction(backend)), Val(has_adc),
4849
sim_method,
4950
ndrange=(cld(length(M.xy), groupsize) * groupsize)
5051
)
5152

5253
#Signal
53-
AK.reduce(+, view(pre.sig_output,:,1:length(sig)); init=zero(Complex{T}), dims=1, temp=view(pre.sig_output_final,:,1:length(sig)))
54-
sig .= transpose(view(pre.sig_output_final,:,1:length(sig)))
54+
if has_adc
55+
AK.reduce(+, view(pre.sig_output,:,1:length(sig)); init=zero(Complex{T}), dims=1, temp=view(pre.sig_output_final,:,1:length(sig)))
56+
sig .= transpose(view(pre.sig_output_final,:,1:length(sig)))
57+
end
5558

5659
#Reset Spin-State (Magnetization). Only for FlowPath
5760
outflow_spin_reset!(M, seq.t', p.motion; replace_by=p.ρ)
@@ -71,21 +74,24 @@ function run_spin_excitation!(
7174
) where {T<:Real, SM<:BlochLikeSimMethods}
7275
#Motion
7376
x, y, z = get_spin_coords(p.motion, p.x, p.y, p.z, seq.t')
77+
has_adc = length(sig) > 0
7478

7579
#Excitation
7680
excitation_kernel!(backend, groupsize)(
7781
pre.sig_output,
7882
M.xy, M.z,
7983
x, y, z, pre.ΔBz, p.T1, p.T2, p.ρ, UInt32(length(M.xy)),
8084
seq.Gx, seq.Gy, seq.Gz, seq.Δt, seq.Δf, seq.B1, seq.ψ, seq.ADC, UInt32(length(seq.t)),
81-
Val(!(p.motion isa NoMotion)), Val(supports_warp_reduction(backend)),
85+
Val(!(p.motion isa NoMotion)), Val(supports_warp_reduction(backend)), Val(has_adc),
8286
sim_method,
8387
ndrange=(cld(length(M.xy), groupsize) * groupsize)
8488
)
8589

8690
#Signal
87-
AK.reduce(+, view(pre.sig_output,:,1:length(sig)); init=zero(Complex{T}), dims=1, temp=view(pre.sig_output_final,:,1:length(sig)))
88-
sig .= transpose(view(pre.sig_output_final,:,1:length(sig)))
91+
if has_adc
92+
AK.reduce(+, view(pre.sig_output,:,1:length(sig)); init=zero(Complex{T}), dims=1, temp=view(pre.sig_output_final,:,1:length(sig)))
93+
sig .= transpose(view(pre.sig_output_final,:,1:length(sig)))
94+
end
8995

9096
#Reset Spin-State (Magnetization). Only for FlowPath
9197
outflow_spin_reset!(M, seq.t', p.motion; replace_by=p.ρ) # TODO: reset state inside kernel

KomaMRICore/src/simulation/SimMethods/Bloch/gpu/ExcitationKernel.jl

Lines changed: 38 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -5,21 +5,34 @@
55
M_xy::AbstractVector{Complex{T}}, M_z,
66
@Const(p_x), @Const(p_y), @Const(p_z), @Const(p_ΔBz), @Const(p_T1), @Const(p_T2), @Const(p_ρ), N_spins,
77
@Const(s_Gx), @Const(s_Gy), @Const(s_Gz), @Const(s_Δt), @Const(s_Δf), @Const(s_B1), @Const(s_ψ), @Const(s_ADC), s_length,
8-
::Val{MOTION}, ::Val{USE_WARP_REDUCTION},
8+
::Val{MOTION}, ::Val{USE_WARP_REDUCTION}, ::Val{HAS_ADC},
99
sim_method::SM
10-
) where {T, MOTION, USE_WARP_REDUCTION, SM <: BlochLikeSimMethods}
10+
) where {T, MOTION, USE_WARP_REDUCTION, HAS_ADC, SM <: BlochLikeSimMethods}
1111

1212
@uniform N = @groupsize()[1]
1313
i_l = @index(Local, Linear)
1414
i_g = @index(Group, Linear)
1515
i = (i_g - 1u32) * UInt32(N) + i_l
1616

17-
sig_group_r = @localmem T USE_WARP_REDUCTION ? 32 : N
18-
sig_group_i = @localmem T USE_WARP_REDUCTION ? 32 : N
19-
sig_r = zero(T)
20-
sig_i = zero(T)
17+
sig_group_r = @localmem T HAS_ADC ? (USE_WARP_REDUCTION ? 32 : N) : 1
18+
sig_group_i = @localmem T HAS_ADC ? (USE_WARP_REDUCTION ? 32 : N) : 1
2119

22-
if i <= N_spins
20+
active = i <= N_spins
21+
Mxy_r = zero(T)
22+
Mxy_i = zero(T)
23+
Mz = zero(T)
24+
ρ = zero(T)
25+
ΔBz = zero(T)
26+
T1 = T(1)
27+
T2 = T(1)
28+
x = zero(T)
29+
y = zero(T)
30+
z = zero(T)
31+
Bx_prev = zero(T)
32+
By_prev = zero(T)
33+
Bz_prev = zero(T)
34+
35+
if active
2336
ΔBz = p_ΔBz[i]
2437
Mxy_r, Mxy_i = reim(M_xy[i])
2538
Mz = M_z[i]
@@ -38,10 +51,12 @@
3851
x, y, z = get_spin_coordinates(p_x, p_y, p_z, i, 1)
3952
Bx_prev, By_prev = reim(s_B1[1])
4053
Bz_prev = x * s_Gx[1] + y * s_Gy[1] + z * s_Gz[1] + ΔBz - s_Δf[1] / T(γ)
54+
end
4155

42-
ADC_idx = 1u32
43-
s_idx = 2u32
44-
while (s_idx <= s_length)
56+
ADC_idx = 1u32
57+
s_idx = 2u32
58+
while (s_idx <= s_length)
59+
if active
4560
if MOTION
4661
x, y, z = get_spin_coordinates(p_x, p_y, p_z, i, s_idx)
4762
end
@@ -84,19 +99,22 @@
8499
Mxy_i = Mxy_new_i * E2
85100
Mz = Mz_new * E1 + ρ * (T(1) - E1)
86101

87-
# Acquire Signal
88-
if s_idx <= s_length && s_ADC[s_idx]
89-
sig_r, sig_i = reduce_signal!(Mxy_r, Mxy_i, sig_group_r, sig_group_i, i_l, N, T, Val(USE_WARP_REDUCTION))
90-
if i_l == 1u32
91-
sig_output[i_g, ADC_idx] = complex(sig_r, sig_i)
92-
end
93-
ADC_idx += 1u32
94-
end
95-
96102
Bx_prev, By_prev, Bz_prev = Bx_next, By_next, Bz_next
97-
s_idx += 1u32
98103
end
99104

105+
# Acquire Signal
106+
if HAS_ADC && s_ADC[s_idx]
107+
sig_r, sig_i = reduce_signal!(Mxy_r, Mxy_i, sig_group_r, sig_group_i, i_l, N, T, Val(USE_WARP_REDUCTION))
108+
if i_l == 1u32
109+
sig_output[i_g, ADC_idx] = complex(sig_r, sig_i)
110+
end
111+
ADC_idx += 1u32
112+
end
113+
114+
s_idx += 1u32
115+
end
116+
117+
if active
100118
# RF frame -> Rotating frame
101119
# M * exp(i * ψ)
102120
ψ_end = s_ψ[s_length]

KomaMRICore/src/simulation/SimMethods/Bloch/gpu/PrecessionKernel.jl

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,20 +5,19 @@
55
M_xy, M_z,
66
@Const(p_x), @Const(p_y), @Const(p_z), @Const(p_ΔBz), @Const(p_T1), @Const(p_T2), @Const(p_ρ), N_spins,
77
@Const(s_Gx), @Const(s_Gy), @Const(s_Gz), @Const(s_Δt), @Const(s_ADC), s_length,
8-
::Val{MOTION}, ::Val{USE_WARP_REDUCTION},
8+
::Val{MOTION}, ::Val{USE_WARP_REDUCTION}, ::Val{HAS_ADC},
99
sim_method::BlochLikeSimMethods
10-
) where {T, MOTION, USE_WARP_REDUCTION}
10+
) where {T, MOTION, USE_WARP_REDUCTION, HAS_ADC}
1111

1212
@uniform N = @groupsize()[1]
1313
i_l = @index(Local, Linear)
1414
i_g = @index(Group, Linear)
1515
i = (i_g - 1u32) * UInt32(N) + i_l
1616

17-
sig_group_r = @localmem T USE_WARP_REDUCTION ? 32 : N
18-
sig_group_i = @localmem T USE_WARP_REDUCTION ? 32 : N
19-
sig_r = zero(T)
20-
sig_i = zero(T)
17+
sig_group_r = @localmem T HAS_ADC ? (USE_WARP_REDUCTION ? 32 : N) : 1
18+
sig_group_i = @localmem T HAS_ADC ? (USE_WARP_REDUCTION ? 32 : N) : 1
2119

20+
active = i <= N_spins
2221
Mxy_r = zero(T)
2322
Mxy_i = zero(T)
2423
t = zero(T)
@@ -32,10 +31,8 @@
3231
Bz_next = zero(T)
3332

3433
# Setting per-thread (ith spin) properties, and Bz for t0
35-
if i <= N_spins
34+
if active
3635
Mxy_r, Mxy_i = reim(M_xy[i])
37-
sig_r = Mxy_r
38-
sig_i = Mxy_i
3936
ΔBz = p_ΔBz[i]
4037
T2 = p_T2[i]
4138
x, y, z = get_spin_coordinates(p_x, p_y, p_z, i, 1)
@@ -45,7 +42,7 @@
4542
ADC_idx = 1u32
4643
s_idx = 2u32
4744
while s_idx <= s_length
48-
if i <= N_spins
45+
if active
4946
if MOTION
5047
x, y, z = get_spin_coordinates(p_x, p_y, p_z, i, s_idx)
5148
end
@@ -56,8 +53,10 @@
5653
ϕ += (Bz_prev + Bz_next) * T(-π * γ) * Δt
5754
end
5855
# Acquire Signal
59-
if s_idx <= s_length && s_ADC[s_idx]
60-
if i <= N_spins
56+
if HAS_ADC && s_ADC[s_idx]
57+
sig_r = zero(T)
58+
sig_i = zero(T)
59+
if active
6160
E2 = exp(-t / T2)
6261
cis_ϕ_i, cis_ϕ_r = sincos(ϕ)
6362
sig_r = E2 * (Mxy_r * cis_ϕ_r - Mxy_i * cis_ϕ_i)
@@ -74,7 +73,7 @@
7473
s_idx += 1u32
7574
end
7675
# Save magnetization at end of block
77-
if i <= N_spins
76+
if active
7877
E1 = exp(-t / p_T1[i])
7978
E2 = exp(-t / T2)
8079
cis_ϕ_i, cis_ϕ_r = sincos(ϕ)
@@ -83,4 +82,4 @@
8382
end
8483
end
8584

86-
## COV_EXCL_STOP
85+
## COV_EXCL_STOP

KomaMRICore/test/Project.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
33
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
44
KomaMRIBase = "d0bc0b20-b151-4d03-b2a4-6ca51751cb9c"
55
KomaMRICore = "4baa4f4d-2ae9-40db-8331-a7d1080e3f4e"
6-
Metal = "dde4c033-4e86-420c-a63e-0dd931031962"
76
OrdinaryDiffEqTsit5 = "b1df2697-797e-41e3-8120-5422d3b24e4a"
87
Preferences = "21216c6a-2e73-6563-6e65-726566657250"
98
Suppressor = "fd094767-a336-5f1f-9728-57cf17d0bbfb"

0 commit comments

Comments
 (0)