Merge pull request #75 from DiamondLightSource/projection_chunking

dkazanc · web-flow · commit c7fec7616972 · 2025-10-27T15:19:25.000Z
Bring LPRec the memory estimator and its tests up to date with the latest ToMoBAR
diff --git a/httomo_backends/methods_database/packages/backends/httomolibgpu/supporting_funcs/recon/algorithm.py b/httomo_backends/methods_database/packages/backends/httomolibgpu/supporting_funcs/recon/algorithm.py
@@ -218,8 +218,19 @@ def _calc_memory_bytes_LPRec3d_tomobar(
     center_size = 32768
     center_size = min(center_size, n * 2)
 
-    oversampling_level = 2  # at least 2 or larger required
-    ne = oversampling_level * n
+    chunk_count = 4
+    projection_chunk_count = 4
+    oversampling_level = 4  # at least 3 or larger required
+    power_of_2_oversampling = True
+
+    if power_of_2_oversampling:
+        ne = 2 ** math.ceil(math.log2(DetectorsLengthH_prepad * 3))
+        if n > ne:
+            ne = 2 ** math.ceil(math.log2(n))
+    else:
+        ne = int(oversampling_level * DetectorsLengthH_prepad)
+        ne = max(ne, n)
+
     padding_m = ne // 2 - n // 2
 
     if "angles" in kwargs:
@@ -233,8 +244,6 @@ def _calc_memory_bytes_LPRec3d_tomobar(
             np.ceil(2)
         )  # assume a 2 * PI projection angle range
 
-    chunk_count = 4
-
     output_dims = __calc_output_dim_recon(non_slice_dims_shape, **kwargs)
     if odd_horiz:
         output_dims = tuple(x + 1 for x in output_dims)
@@ -346,23 +355,23 @@ def add_to_memory_counters(amount, per_slice: bool):
 
         add_to_memory_counters(-irfft_result_size, False)
     else:
-        add_to_memory_counters(rfft_plan_slice_size / chunk_count * 2, True)
-        add_to_memory_counters(irfft_plan_slice_size / chunk_count * 2, True)
-        # add_to_memory_counters(irfft_scratch_memory_size / chunk_count, True)
+        add_to_memory_counters(rfft_plan_slice_size / chunk_count / projection_chunk_count * 2, True)
+        add_to_memory_counters(irfft_plan_slice_size / chunk_count / projection_chunk_count * 2, True)
+        # add_to_memory_counters(irfft_scratch_memory_size / chunk_count / projection_chunk_count, True)
         for _ in range(0, chunk_count):
-            add_to_memory_counters(padded_tmp_p_input_slice / chunk_count, True)
+            add_to_memory_counters(padded_tmp_p_input_slice / chunk_count / projection_chunk_count, True)
 
-            add_to_memory_counters(rfft_result_size / chunk_count, True)
-            add_to_memory_counters(filtered_rfft_result_size / chunk_count, True)
-            add_to_memory_counters(-rfft_result_size / chunk_count, True)
-            add_to_memory_counters(-padded_tmp_p_input_slice / chunk_count, True)
+            add_to_memory_counters(rfft_result_size / chunk_count / projection_chunk_count, True)
+            add_to_memory_counters(filtered_rfft_result_size / chunk_count / projection_chunk_count, True)
+            add_to_memory_counters(-rfft_result_size / chunk_count / projection_chunk_count, True)
+            add_to_memory_counters(-padded_tmp_p_input_slice / chunk_count / projection_chunk_count, True)
 
-            add_to_memory_counters(irfft_scratch_memory_size / chunk_count, True)
-            add_to_memory_counters(-irfft_scratch_memory_size / chunk_count, True)
-            add_to_memory_counters(irfft_result_size / chunk_count, True)
-            add_to_memory_counters(-filtered_rfft_result_size / chunk_count, True)
+            add_to_memory_counters(irfft_scratch_memory_size / chunk_count / projection_chunk_count, True)
+            add_to_memory_counters(-irfft_scratch_memory_size / chunk_count / projection_chunk_count, True)
+            add_to_memory_counters(irfft_result_size / chunk_count / projection_chunk_count, True)
+            add_to_memory_counters(-filtered_rfft_result_size / chunk_count / projection_chunk_count, True)
 
-            add_to_memory_counters(-irfft_result_size / chunk_count, True)
+            add_to_memory_counters(-irfft_result_size / chunk_count / projection_chunk_count, True)
 
     add_to_memory_counters(-padded_in_slice_size, True)
     add_to_memory_counters(-filter_size, False)
@@ -396,7 +405,7 @@ def add_to_memory_counters(amount, per_slice: bool):
     if min_mem_usage_ifft2 and min_mem_usage_filter:
         return (tot_memory_bytes * 1.1 + 30 * 1024 * 1024, fixed_amount)
     else:
-        return (tot_memory_bytes, fixed_amount)
+        return (tot_memory_bytes * 1.1, fixed_amount)
 
 
 def _calc_memory_bytes_SIRT3d_tomobar(
@@ -551,4 +560,6 @@ def _calc_memory_bytes_FISTA3d_tomobar(
 
 def __estimate_detectorHoriz_padding(detX_size) -> int:
     det_half = detX_size // 2
-    return int(np.sqrt(2 * (det_half**2)) // 2)
+    padded_value_exact = int(np.sqrt(2 * (det_half**2))) - det_half
+    padded_add_margin = int(0.1 * padded_value_exact)
+    return padded_value_exact + padded_add_margin
diff --git a/tests/test_httomolibgpu.py b/tests/test_httomolibgpu.py
@@ -568,7 +568,7 @@ def test_recon_LPRec3d_tomobar_0_pi_memoryhook(
 
 @pytest.mark.full
 @pytest.mark.cupy
-@pytest.mark.parametrize("padding_detx", [0, 10, 50, 100])
+@pytest.mark.parametrize("padding_detx", [0, 10, 50, 100, 800])
 @pytest.mark.parametrize("projections", [1500, 1801, 2560, 3601])
 @pytest.mark.parametrize("detX_size", [2560])
 @pytest.mark.parametrize("slices", [3, 4, 5, 10, 15, 20])
@@ -593,7 +593,7 @@ def test_recon_LPRec3d_tomobar_0_pi_memoryhook_full(
 
 @pytest.mark.full
 @pytest.mark.cupy
-@pytest.mark.parametrize("padding_detx", [0, 10, 50, 100])
+@pytest.mark.parametrize("padding_detx", [0, 10, 50, 100, 800])
 @pytest.mark.parametrize("projections", [1500, 1801, 2560, 3601])
 @pytest.mark.parametrize("detX_size", [2560])
 @pytest.mark.parametrize("slices", [3, 4, 5, 10, 15, 20])
@@ -676,7 +676,7 @@ def __test_recon_LPRec3d_tomobar_memoryhook_common(
     # the estimated_memory_mb should be LARGER or EQUAL to max_mem_mb
     # the resulting percent value should not deviate from max_mem on more than 20%
     assert estimated_memory_mb >= max_mem_mb
-    assert percents_relative_maxmem <= 90
+    assert percents_relative_maxmem <= 60
 
 
 @pytest.mark.cupy