Skip to content

Commit 3f50b23

Browse files
committed
First version
1 parent 2554c7f commit 3f50b23

File tree

1 file changed

+29
-17
lines changed
  • httomo_backends/methods_database/packages/backends/httomolibgpu/supporting_funcs/recon

1 file changed

+29
-17
lines changed

httomo_backends/methods_database/packages/backends/httomolibgpu/supporting_funcs/recon/algorithm.py

Lines changed: 29 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -161,15 +161,17 @@ def _calc_memory_bytes_LPRec(
161161
) -> Tuple[int, int]:
162162
angles_tot = non_slice_dims_shape[0]
163163
DetectorsLengthH = non_slice_dims_shape[1]
164+
164165
# calculate the output shape
165-
output_dims = _calc_output_dim_LPRec(non_slice_dims_shape, **kwargs)
166+
output_dims = __calc_output_dim_recon(non_slice_dims_shape, **kwargs)
166167

168+
#input and and output slices
167169
in_slice_size = np.prod(non_slice_dims_shape) * dtype.itemsize
168-
out_slice_size = np.prod(DetectorsLengthH * DetectorsLengthH) * dtype.itemsize
170+
out_slice_size = np.prod(output_dims) * dtype.itemsize
169171

170172
# interpolation kernels
171-
grid_size = np.prod(DetectorsLengthH * DetectorsLengthH) * np.float32().nbytes
172-
phi = grid_size
173+
# grid_size = np.prod(DetectorsLengthH * DetectorsLengthH) * np.float32().nbytes
174+
# phi = grid_size
173175

174176
eps = 1e-4 # accuracy of usfft
175177
mu = -np.log(eps) / (2 * DetectorsLengthH * DetectorsLengthH)
@@ -185,21 +187,24 @@ def _calc_memory_bytes_LPRec(
185187
)
186188
)
187189
)
190+
188191
oversampling_level = 2
189192
tmp_oversample_size = (
190193
np.prod(angles_tot * oversampling_level * DetectorsLengthH)
191194
* np.float32().nbytes
192195
)
193-
194196
data_c_size = np.prod(0.5 * angles_tot * DetectorsLengthH) * np.complex64().itemsize
197+
198+
# Oersampling freed during the calculation
199+
max_memory_sampling = tmp_oversample_size + data_c_size
195200

196201
fde_size = (
197-
(2 * m + 2 * DetectorsLengthH) * (2 * m + 2 * DetectorsLengthH)
202+
0.5 * (2 * m + 2 * DetectorsLengthH) * (2 * m + 2 * DetectorsLengthH)
198203
) * np.complex64().itemsize
199204

200-
fde2_size = (
201-
(2 * DetectorsLengthH) * (2 * DetectorsLengthH)
202-
) * np.complex64().itemsize
205+
c1dfftshift_size = (
206+
DetectorsLengthH * np.int8().nbytes
207+
)
203208

204209
c2dfftshift_slice_size = (
205210
np.prod(4 * DetectorsLengthH * DetectorsLengthH) * np.int8().nbytes
@@ -209,20 +214,27 @@ def _calc_memory_bytes_LPRec(
209214
freq_slice = angles_tot * (DetectorsLengthH + 1) * np.complex64().itemsize
210215
fftplan_size = freq_slice * 2
211216

217+
max_memory_per_slice = max(max_memory_sampling + fde_size, 2 * fde_size)
218+
219+
# Add treshold
220+
max_memory_per_slice *= 1.2
221+
212222
tot_memory_bytes = int(
213223
in_slice_size
214224
+ out_slice_size
215-
+ 2 * grid_size
216-
+ phi
217-
+ tmp_oversample_size
218-
+ data_c_size
219-
+ fde_size
220-
+ fde2_size
225+
+ max_memory_per_slice
226+
)
227+
228+
fixed_amount = int(
229+
fde_size
230+
+ fftplan_size
231+
+ filter_size
232+
+ c1dfftshift_size
221233
+ c2dfftshift_slice_size
222234
+ freq_slice
223-
+ fftplan_size
224235
)
225-
return (tot_memory_bytes, filter_size)
236+
237+
return (tot_memory_bytes, fixed_amount)
226238

227239

228240

0 commit comments

Comments
 (0)