diff --git a/httomo_backends/methods_database/packages/backends/httomolibgpu/supporting_funcs/recon/algorithm.py b/httomo_backends/methods_database/packages/backends/httomolibgpu/supporting_funcs/recon/algorithm.py index a6ea6371..6b9d2ab1 100644 --- a/httomo_backends/methods_database/packages/backends/httomolibgpu/supporting_funcs/recon/algorithm.py +++ b/httomo_backends/methods_database/packages/backends/httomolibgpu/supporting_funcs/recon/algorithm.py @@ -77,12 +77,17 @@ def _calc_memory_bytes_FBP3d_tomobar( dtype: np.dtype, **kwargs, ) -> Tuple[int, int]: - det_height = non_slice_dims_shape[0] - det_width = non_slice_dims_shape[1] + if "detector_pad" in kwargs: + detector_pad = kwargs["detector_pad"] + else: + detector_pad = 0 + + angles_tot = non_slice_dims_shape[0] + det_width = non_slice_dims_shape[1] + 2 * detector_pad SLICES = 200 # dummy multiplier+divisor to pass large batch size threshold # 1. input - input_slice_size = np.prod(non_slice_dims_shape) * dtype.itemsize + input_slice_size = (angles_tot * det_width) * dtype.itemsize ########## FFT / filter / IFFT (filtersync_cupy) @@ -91,13 +96,13 @@ def _calc_memory_bytes_FBP3d_tomobar( cufft_estimate_1d( nx=det_width, fft_type=CufftType.CUFFT_R2C, - batch=det_height * SLICES, + batch=angles_tot * SLICES, ) / SLICES ) # 3. RFFT output size (proj_f in code) - proj_f_slice = det_height * (det_width // 2 + 1) * np.complex64().itemsize + proj_f_slice = angles_tot * (det_width // 2 + 1) * np.complex64().itemsize # 4. Filter size (independent of number of slices) filter_size = (det_width // 2 + 1) * np.float32().itemsize @@ -107,7 +112,7 @@ def _calc_memory_bytes_FBP3d_tomobar( cufft_estimate_1d( nx=det_width, fft_type=CufftType.CUFFT_C2R, - batch=det_height * SLICES, + batch=angles_tot * SLICES, ) / SLICES ) @@ -123,9 +128,7 @@ def _calc_memory_bytes_FBP3d_tomobar( # 6. we swap the axes before passing data to Astra in ToMoBAR # https://github.com/dkazanc/ToMoBAR/blob/54137829b6326406e09f6ef9c95eb35c213838a7/tomobar/methodsDIR_CuPy.py#L135 - pre_astra_input_swapaxis_slice = ( - np.prod(non_slice_dims_shape) * np.float32().itemsize - ) + pre_astra_input_swapaxis_slice = (angles_tot * det_width) * np.float32().itemsize # 7. astra backprojection will generate an output array # https://github.com/dkazanc/ToMoBAR/blob/54137829b6326406e09f6ef9c95eb35c213838a7/tomobar/astra_wrappers/astra_base.py#L524 @@ -151,7 +154,7 @@ def _calc_memory_bytes_FBP3d_tomobar( # so it does not add to the memory overall # We assume for safety here that one FFT plan is not freed and one is freed - tot_memory_bytes = ( + tot_memory_bytes = int( projection_mem_size + filtersync_size - ifftplan_slice_size + recon_output_size ) @@ -166,8 +169,14 @@ def _calc_memory_bytes_LPRec3d_tomobar( ) -> Tuple[int, int]: # Based on: https://github.com/dkazanc/ToMoBAR/pull/112/commits/4704ecdc6ded3dd5ec0583c2008aa104f30a8a39 + if "detector_pad" in kwargs: + detector_pad = kwargs["detector_pad"] + else: + detector_pad = 0 + angles_tot = non_slice_dims_shape[0] - DetectorsLengthH = non_slice_dims_shape[1] + DetectorsLengthH_prepad = non_slice_dims_shape[1] + DetectorsLengthH = non_slice_dims_shape[1] + 2 * detector_pad SLICES = 200 # dummy multiplier+divisor to pass large batch size threshold _CENTER_SIZE_MIN = 192 # must be divisible by 8 @@ -210,7 +219,7 @@ def _calc_memory_bytes_LPRec3d_tomobar( if odd_horiz: output_dims = tuple(x + 1 for x in output_dims) - in_slice_size = np.prod(non_slice_dims_shape) * dtype.itemsize + in_slice_size = (angles_tot * DetectorsLengthH) * dtype.itemsize padded_in_slice_size = angles_tot * n * np.float32().itemsize theta_size = angles_tot * np.float32().itemsize @@ -256,7 +265,9 @@ def _calc_memory_bytes_LPRec3d_tomobar( center_size * center_size * (1 + angle_range_pi_count * 2) * np.int16().itemsize ) - recon_output_size = DetectorsLengthH * DetectorsLengthH * np.float32().itemsize + recon_output_size = ( + DetectorsLengthH_prepad * DetectorsLengthH_prepad * np.float32().itemsize + ) ifft2_plan_slice_size = ( cufft_estimate_2d( nx=(2 * m + 2 * n), ny=(2 * m + 2 * n), fft_type=CufftType.CUFFT_C2C @@ -342,24 +353,28 @@ def add_to_memory_counters(amount, per_slice: bool): add_to_memory_counters(after_recon_swapaxis_slice, True) return (tot_memory_bytes * 1.05, fixed_amount + 250 * 1024 * 1024) - # return (tot_memory_bytes, fixed_amount) - def _calc_memory_bytes_SIRT3d_tomobar( non_slice_dims_shape: Tuple[int, int], dtype: np.dtype, **kwargs, ) -> Tuple[int, int]: - DetectorsLengthH = non_slice_dims_shape[1] + + if "detector_pad" in kwargs: + detector_pad = kwargs["detector_pad"] + else: + detector_pad = 0 + anglesnum = non_slice_dims_shape[0] + DetectorsLengthH = non_slice_dims_shape[1] + 2 * detector_pad # calculate the output shape output_dims = _calc_output_dim_SIRT3d_tomobar(non_slice_dims_shape, **kwargs) - in_data_size = np.prod(non_slice_dims_shape) * dtype.itemsize + in_data_size = (anglesnum * DetectorsLengthH) * dtype.itemsize out_data_size = np.prod(output_dims) * dtype.itemsize astra_projection = 2.5 * (in_data_size + out_data_size) - tot_memory_bytes = 2 * in_data_size + 2 * out_data_size + astra_projection + tot_memory_bytes = int(2 * in_data_size + 2 * out_data_size + astra_projection) return (tot_memory_bytes, 0) @@ -368,14 +383,20 @@ def _calc_memory_bytes_CGLS3d_tomobar( dtype: np.dtype, **kwargs, ) -> Tuple[int, int]: - DetectorsLengthH = non_slice_dims_shape[1] + if "detector_pad" in kwargs: + detector_pad = kwargs["detector_pad"] + else: + detector_pad = 0 + + anglesnum = non_slice_dims_shape[0] + DetectorsLengthH = non_slice_dims_shape[1] + 2 * detector_pad # calculate the output shape output_dims = _calc_output_dim_CGLS3d_tomobar(non_slice_dims_shape, **kwargs) - in_data_size = np.prod(non_slice_dims_shape) * dtype.itemsize + in_data_size = (anglesnum * DetectorsLengthH) * dtype.itemsize out_data_size = np.prod(output_dims) * dtype.itemsize astra_projection = 2.5 * (in_data_size + out_data_size) - tot_memory_bytes = 2 * in_data_size + 2 * out_data_size + astra_projection + tot_memory_bytes = int(2 * in_data_size + 2 * out_data_size + astra_projection) return (tot_memory_bytes, 0) diff --git a/tests/test_httomolibgpu.py b/tests/test_httomolibgpu.py index 2e400e2e..815889b0 100644 --- a/tests/test_httomolibgpu.py +++ b/tests/test_httomolibgpu.py @@ -528,17 +528,24 @@ def test_data_sampler_memoryhook(slices, newshape, interpolation, ensure_clean_m @pytest.mark.cupy +@pytest.mark.parametrize("padding_detx", [0, 10, 100, 200]) @pytest.mark.parametrize("projections", [1801, 3601]) @pytest.mark.parametrize("slices", [7, 11, 15]) @pytest.mark.parametrize("detectorX", [1200, 2560]) def test_recon_FBP3d_tomobar_memoryhook( - slices, detectorX, projections, ensure_clean_memory, mocker: MockerFixture + slices, + detectorX, + projections, + padding_detx, + ensure_clean_memory, + mocker: MockerFixture, ): data = cp.random.random_sample((projections, slices, detectorX), dtype=np.float32) kwargs = {} kwargs["angles"] = np.linspace( 0.0 * np.pi / 180.0, 180.0 * np.pi / 180.0, data.shape[0] ) + kwargs["detector_pad"] = padding_detx kwargs["center"] = 500 kwargs["recon_size"] = detectorX kwargs["recon_mask_radius"] = 0.8 @@ -579,45 +586,57 @@ def test_recon_FBP3d_tomobar_memoryhook( @pytest.mark.cupy -# @pytest.mark.parametrize("projections", [1801]) -# @pytest.mark.parametrize("detX_size", [2560]) -# @pytest.mark.parametrize("slices", [15]) -# @pytest.mark.parametrize("projection_angle_range", [(0, np.pi)]) - - +@pytest.mark.parametrize("padding_detx", [0, 10, 50, 100]) @pytest.mark.parametrize("projections", [1500, 1801, 2560]) @pytest.mark.parametrize("detX_size", [2560]) @pytest.mark.parametrize("slices", [3, 4, 5, 10, 15, 20]) @pytest.mark.parametrize("projection_angle_range", [(0, np.pi)]) - -# @pytest.mark.parametrize("projections", [1500, 1801, 2560]) -# @pytest.mark.parametrize("detX_size", [2560]) -# @pytest.mark.parametrize("slices", [3, 4, 5, 10]) -# @pytest.mark.parametrize("projection_angle_range", [(0, np.pi)]) def test_recon_LPRec3d_tomobar_0_pi_memoryhook( - slices, detX_size, projections, projection_angle_range, ensure_clean_memory + slices, + detX_size, + projections, + projection_angle_range, + padding_detx, + ensure_clean_memory, ): __test_recon_LPRec3d_tomobar_memoryhook_common( - slices, detX_size, projections, projection_angle_range, ensure_clean_memory + slices, + detX_size, + projections, + projection_angle_range, + padding_detx, + ensure_clean_memory, ) @pytest.mark.full @pytest.mark.cupy +@pytest.mark.parametrize("padding_detx", [0, 10, 50, 100]) @pytest.mark.parametrize("projections", [1500, 1801, 2560, 3601]) @pytest.mark.parametrize("detX_size", [2560]) @pytest.mark.parametrize("slices", [3, 4, 5, 10, 15, 20]) @pytest.mark.parametrize("projection_angle_range", [(0, np.pi)]) def test_recon_LPRec3d_tomobar_0_pi_memoryhook_full( - slices, detX_size, projections, projection_angle_range, ensure_clean_memory + slices, + detX_size, + projections, + projection_angle_range, + padding_detx, + ensure_clean_memory, ): __test_recon_LPRec3d_tomobar_memoryhook_common( - slices, detX_size, projections, projection_angle_range, ensure_clean_memory + slices, + detX_size, + projections, + projection_angle_range, + padding_detx, + ensure_clean_memory, ) @pytest.mark.full @pytest.mark.cupy +@pytest.mark.parametrize("padding_detx", [0, 10, 50, 100]) @pytest.mark.parametrize("projections", [1500, 1801, 2560, 3601]) @pytest.mark.parametrize("detX_size", [2560]) @pytest.mark.parametrize("slices", [3, 4, 5, 10, 15, 20]) @@ -625,15 +644,30 @@ def test_recon_LPRec3d_tomobar_0_pi_memoryhook_full( "projection_angle_range", [(0, np.pi), (0, 2 * np.pi), (-np.pi / 2, np.pi / 2)] ) def test_recon_LPRec3d_tomobar_memoryhook_full( - slices, detX_size, projections, projection_angle_range, ensure_clean_memory + slices, + detX_size, + projections, + projection_angle_range, + padding_detx, + ensure_clean_memory, ): __test_recon_LPRec3d_tomobar_memoryhook_common( - slices, detX_size, projections, projection_angle_range, ensure_clean_memory + slices, + detX_size, + projections, + projection_angle_range, + padding_detx, + ensure_clean_memory, ) def __test_recon_LPRec3d_tomobar_memoryhook_common( - slices, detX_size, projections, projection_angle_range, ensure_clean_memory + slices, + detX_size, + projections, + projection_angle_range, + padding_detx, + ensure_clean_memory, ): angles_number = projections data = cp.random.random_sample((angles_number, slices, detX_size), dtype=np.float32) @@ -642,6 +676,7 @@ def __test_recon_LPRec3d_tomobar_memoryhook_common( projection_angle_range[0], projection_angle_range[1], data.shape[0] ) kwargs["center"] = 1280 + kwargs["detector_pad"] = padding_detx kwargs["recon_size"] = detX_size kwargs["recon_mask_radius"] = 0.8 @@ -687,9 +722,9 @@ def __test_recon_LPRec3d_tomobar_memoryhook_common( if slices <= 3: assert percents_relative_maxmem <= 75 elif slices <= 5: - assert percents_relative_maxmem <= 60 + assert percents_relative_maxmem <= 63 else: - assert percents_relative_maxmem <= 47 + assert percents_relative_maxmem <= 50 @pytest.mark.cupy