Remove debug logs

ferenc-rad · ferenc-rad · commit 39f7f701b1da · 2025-04-15T11:52:42.000+02:00
diff --git a/httomo_backends/methods_database/packages/backends/httomolibgpu/supporting_funcs/recon/algorithm.py b/httomo_backends/methods_database/packages/backends/httomolibgpu/supporting_funcs/recon/algorithm.py
@@ -172,168 +172,70 @@ def _calc_memory_bytes_LPRec(
         n = n - 1  # dealing with the odd horizontal detector size
         odd_horiz = True
 
-    def debug_print(line_number: int, var_name: str, size_in_bytes: int) -> str:
-        print(f"{line_number} - {var_name}: {size_in_bytes} B / {size_in_bytes / 1024} KB / {size_in_bytes / 1024 ** 2} MB / {size_in_bytes * 16 / 1024 ** 2} MB")
-
     eps = 1e-4  # accuracy of usfft
     mu = -np.log(eps) / (2 * n * n)
-    m = int(
-        np.ceil(
-            2 * n * 1 / np.pi * np.sqrt(-mu * np.log(eps) + (mu * n) * (mu * n) / 4)
-        )
-    )
+    m = int(np.ceil(2 * n * 1 / np.pi * np.sqrt(-mu * np.log(eps) + (mu * n) * (mu * n) / 4)))
 
     center_size = 6144
     center_size = min(center_size, n * 2 + m * 2)
-    print(f"m: {m}")
-    print(f"center_size: {center_size}")
 
     oversampling_level = 2  # at least 2 or larger required
     ne = oversampling_level * n
     padding_m = ne // 2 - n // 2
-    print(f"padding_m: {padding_m}")
 
     output_dims = __calc_output_dim_recon(non_slice_dims_shape, **kwargs)
     if odd_horiz:
         output_dims = tuple(x + 1 for x in output_dims)
-    print(f"output_dims: {output_dims}")
 
     in_slice_size = np.prod(non_slice_dims_shape) * dtype.itemsize
-    debug_print(0, "in_slice_size", in_slice_size)
     padded_in_slice_size = np.prod(non_slice_dims_shape) * np.float32().itemsize
-    debug_print(246, "padded_in_slice_size", padded_in_slice_size)
     theta_size = angles_tot * np.float32().itemsize
-    debug_print(262, "theta_size", theta_size)
     recon_output_size = (n + 1) * (n + 1) * np.float32().itemsize if odd_horiz else n * n * np.float32().itemsize    # 264
-    debug_print(281, "recon_output_size", recon_output_size)
     linspace_size = n * np.float32().itemsize
-    debug_print(285, "linspace_size", linspace_size)
     meshgrid_size = 2 * n * n * np.float32().itemsize
-    debug_print(286, "meshgrid_size", meshgrid_size)
     phi_size = 6 * n * n * np.float32().itemsize
-    debug_print(287, "phi_size", phi_size)
     angle_range_size = center_size * center_size * 3 * np.int32().itemsize
-    debug_print(293, "angle_range_size", angle_range_size)
     c1dfftshift_size = n * np.int8().itemsize
-    debug_print(296, "c1dfftshift_size", c1dfftshift_size)
     c2dfftshift_slice_size = 4 * n * n * np.int8().itemsize
-    debug_print(299, "c2dfftshift_slice_size", c2dfftshift_slice_size)
     filter_size = (n // 2 + 1) * np.float32().itemsize
-    debug_print(309, "filter_size", filter_size)
     rfftfreq_size = filter_size
-    debug_print(312, "rfftfreq_size", rfftfreq_size)
     scaled_filter_size = filter_size
-    debug_print(313, "scaled_filter_size", scaled_filter_size)
     tmp_p_input_slice = np.prod(non_slice_dims_shape) * np.float32().itemsize
-    debug_print(316, "tmp_p_input_slice", tmp_p_input_slice)
     padded_tmp_p_input_slice = angles_tot * (n + padding_m * 2) * dtype.itemsize
-    debug_print(326, "padded_tmp_p_input_slice", padded_tmp_p_input_slice)
     rfft_result_size = padded_tmp_p_input_slice
-    debug_print(327, "rfft_result_size", rfft_result_size)
     filtered_rfft_result_size = rfft_result_size
-    debug_print(327, "filtered_rfft_result_size", filtered_rfft_result_size)
     rfft_plan_slice_size = cufft_estimate_1d(nx=(n + padding_m * 2),fft_type=CufftType.CUFFT_R2C,batch=angles_tot * SLICES) / SLICES
-    debug_print(327, "rfft_plan_slice_size", rfft_plan_slice_size)
     irfft_result_size = filtered_rfft_result_size
-    debug_print(327, "irfft_result_size", irfft_result_size)
     irfft_scratch_memory_size = filtered_rfft_result_size
-    debug_print(327, "irfft_scratch_memory_size", irfft_scratch_memory_size)
     irfft_plan_slice_size = cufft_estimate_1d(nx=(n + padding_m * 2),fft_type=CufftType.CUFFT_C2R,batch=angles_tot * SLICES) / SLICES
-    debug_print(327, "irfft_plan_slice_size", irfft_plan_slice_size)
     conversion_to_complex_size = np.prod(non_slice_dims_shape) * np.complex64().itemsize / 2
-    debug_print(333, "conversion_to_complex_size", conversion_to_complex_size)
     datac_size = np.prod(non_slice_dims_shape) * np.complex64().itemsize / 2
-    debug_print(333, "datac_size", datac_size)
     fde_size = (2 * m + 2 * n) * (2 * m + 2 * n) * np.complex64().itemsize / 2
-    debug_print(341, "fde_size", fde_size)
     shifted_datac_size = datac_size
-    debug_print(344, "shifted_datac_size", shifted_datac_size)
     fft_result_size = datac_size
-    debug_print(344, "fft_result_size", fft_result_size)
     backshifted_datac_size = datac_size
-    debug_print(344, "backshifted_datac_size", backshifted_datac_size)
     scaled_backshifted_datac_size = datac_size
-    debug_print(344, "scaled_backshifted_datac_size", scaled_backshifted_datac_size)
     fft_plan_slice_size = cufft_estimate_1d(nx=n,fft_type=CufftType.CUFFT_C2C,batch=angles_tot * SLICES) / SLICES
-    debug_print(344, "fft_plan_slice_size", fft_plan_slice_size)
     fde_view_size = 4 * n * n * np.complex64().itemsize / 2
     shifted_fde_view_size = fde_view_size
-    debug_print(474, "shifted_fde_view_size", shifted_fde_view_size)
     ifft2_scratch_memory_size = fde_view_size
-    debug_print(474, "ifft2_scratch_memory_size", ifft2_scratch_memory_size)
     ifft2_plan_slice_size = cufft_estimate_2d(nx=(2 * n),ny=(2 * n),fft_type=CufftType.CUFFT_C2C) / 2
-    debug_print(474, "ifft2_plan_slice_size", ifft2_plan_slice_size)
     fde2_size = n * n * np.complex64().itemsize / 2
-    debug_print(479, "fde2_size", fde2_size)
     concatenate_size = fde2_size
-    debug_print(485, "concatenate_size", concatenate_size)
     circular_mask_size = np.prod(output_dims) / 2 * np.int64().itemsize * 4
-    debug_print(496, "circular_mask_size", circular_mask_size)
 
     after_recon_swapaxis_slice = np.prod(non_slice_dims_shape) * np.float32().itemsize
-    debug_print(0, "after_recon_swapaxis_slice", after_recon_swapaxis_slice)
-
-    scope_sums = [
-        in_slice_size + padded_in_slice_size + recon_output_size + rfft_plan_slice_size + irfft_plan_slice_size + tmp_p_input_slice + padded_tmp_p_input_slice + rfft_result_size + filtered_rfft_result_size + irfft_result_size + irfft_scratch_memory_size
-        , in_slice_size + padded_in_slice_size + recon_output_size + rfft_plan_slice_size + irfft_plan_slice_size + tmp_p_input_slice + datac_size + conversion_to_complex_size
-        , in_slice_size + padded_in_slice_size + recon_output_size + rfft_plan_slice_size + irfft_plan_slice_size + fft_plan_slice_size + fde_size + datac_size + shifted_datac_size + fft_result_size + backshifted_datac_size + scaled_backshifted_datac_size
-        , in_slice_size + padded_in_slice_size + recon_output_size + rfft_plan_slice_size + irfft_plan_slice_size + fft_plan_slice_size + ifft2_plan_slice_size + shifted_fde_view_size + ifft2_scratch_memory_size
-        , in_slice_size + padded_in_slice_size + recon_output_size + rfft_plan_slice_size + irfft_plan_slice_size + fft_plan_slice_size + ifft2_plan_slice_size + fde2_size + concatenate_size
-        , in_slice_size + padded_in_slice_size + recon_output_size + rfft_plan_slice_size + irfft_plan_slice_size + fft_plan_slice_size + ifft2_plan_slice_size + after_recon_swapaxis_slice
-    ]
-
-    print(f"all per slice memory estimation: {
-        in_slice_size 
-        + padded_in_slice_size
-        + recon_output_size 
-        + rfft_plan_slice_size 
-        + irfft_plan_slice_size 
-        + tmp_p_input_slice 
-        + padded_tmp_p_input_slice 
-        + rfft_result_size 
-        + filtered_rfft_result_size 
-        + irfft_result_size 
-        + datac_size 
-        + conversion_to_complex_size
-        + fft_plan_slice_size + fde_size + shifted_datac_size + fft_result_size + backshifted_datac_size + scaled_backshifted_datac_size
-        + ifft2_plan_slice_size + shifted_fde_view_size
-        + fde2_size + concatenate_size
-        + after_recon_swapaxis_slice
-        }")
-
-    print(f"all fixed memory estimation: {
-        theta_size + phi_size + linspace_size + meshgrid_size
-        + angle_range_size + c1dfftshift_size + c2dfftshift_slice_size + filter_size + rfftfreq_size + scaled_filter_size + circular_mask_size
-        }")
-    
-    print(f"all memory estimation assuming 15 slices: {
-        in_slice_size * 15 
-        + (
-            padded_in_slice_size
-            + recon_output_size 
-            + rfft_plan_slice_size 
-            + irfft_plan_slice_size 
-            + tmp_p_input_slice 
-            + padded_tmp_p_input_slice 
-            + rfft_result_size 
-            + filtered_rfft_result_size 
-            + irfft_result_size 
-            + datac_size 
-            + conversion_to_complex_size
-            + fft_plan_slice_size + fde_size + shifted_datac_size + fft_result_size + backshifted_datac_size + scaled_backshifted_datac_size
-            + ifft2_plan_slice_size + shifted_fde_view_size
-            + fde2_size + concatenate_size
-        ) * 16
-        + after_recon_swapaxis_slice * 15
-        + theta_size + phi_size + linspace_size + meshgrid_size
-        + angle_range_size + c1dfftshift_size + c2dfftshift_slice_size + filter_size + rfftfreq_size + scaled_filter_size + circular_mask_size
-        }")
-
-    print(f"scoped_sums: {scope_sums}")
-    tot_memory_bytes_peak = max(scope_sums)
-    tot_memory_peak_index = scope_sums.index(tot_memory_bytes_peak)
-    print(f"tot_memory_peak_index: {tot_memory_peak_index}")
-    tot_memory_bytes = int(tot_memory_bytes_peak)
+
+    tot_memory_bytes = int(
+        max(
+            in_slice_size + padded_in_slice_size + recon_output_size + rfft_plan_slice_size + irfft_plan_slice_size + tmp_p_input_slice + padded_tmp_p_input_slice + rfft_result_size + filtered_rfft_result_size + irfft_result_size + irfft_scratch_memory_size
+            , in_slice_size + padded_in_slice_size + recon_output_size + rfft_plan_slice_size + irfft_plan_slice_size + tmp_p_input_slice + datac_size + conversion_to_complex_size
+            , in_slice_size + padded_in_slice_size + recon_output_size + rfft_plan_slice_size + irfft_plan_slice_size + fft_plan_slice_size + fde_size + datac_size + shifted_datac_size + fft_result_size + backshifted_datac_size + scaled_backshifted_datac_size
+            , in_slice_size + padded_in_slice_size + recon_output_size + rfft_plan_slice_size + irfft_plan_slice_size + fft_plan_slice_size + ifft2_plan_slice_size + shifted_fde_view_size + ifft2_scratch_memory_size
+            , in_slice_size + padded_in_slice_size + recon_output_size + rfft_plan_slice_size + irfft_plan_slice_size + fft_plan_slice_size + ifft2_plan_slice_size + fde2_size + concatenate_size
+            , in_slice_size + padded_in_slice_size + recon_output_size + rfft_plan_slice_size + irfft_plan_slice_size + fft_plan_slice_size + ifft2_plan_slice_size + after_recon_swapaxis_slice
+        )
+    )
 
     fixed_amount = int(
         max(
@@ -343,9 +245,6 @@ def debug_print(line_number: int, var_name: str, size_in_bytes: int) -> str:
         )
     )
 
-    print(f"tot_memory_bytes: {tot_memory_bytes}")
-    print(f"fixed_amount: {fixed_amount}")
-
     return (tot_memory_bytes, fixed_amount)
 
 
diff --git a/httomo_backends/methods_database/packages/backends/httomolibgpu/supporting_funcs/recon/peak_memory_line_profile_hook.py b/httomo_backends/methods_database/packages/backends/httomolibgpu/supporting_funcs/recon/peak_memory_line_profile_hook.py
@@ -83,10 +83,12 @@ def _print_frame(self, memory_frame, running_peak_bytes, running_used_bytes, dep
         humanized_running_peak_bytes = None
         humanized_running_used_bytes = None
         if path.basename(st.filename) in self.running_peak_root_file_names:
-            running_used_bytes[0] = running_used_bytes[0] + memory_frame.used_bytes - memory_frame.freed_bytes
-            humanized_running_used_bytes = MemoryFrame.humanized_size(running_used_bytes[0])
+            running_used_bytes[0] += memory_frame.used_bytes
             running_peak_bytes[0] = max(running_peak_bytes[0], running_used_bytes[0])
+            running_used_bytes[0] -= memory_frame.freed_bytes
+
             humanized_running_peak_bytes = MemoryFrame.humanized_size(running_peak_bytes[0])
+            humanized_running_used_bytes = MemoryFrame.humanized_size(running_used_bytes[0])
 
         line = '%s%s:%s:%s (%s, %s, %s, %s, %s)\n' % (
             indent, st.filename, st.lineno, st.name,
diff --git a/tests/test_httomolibgpu.py b/tests/test_httomolibgpu.py
@@ -37,31 +37,23 @@
 from httomo_backends.methods_database.packages.backends.httomolibgpu.supporting_funcs.prep.normalize import *
 
 
-import traceback
-
 module_mem_path = "httomo.methods_database.packages.external."
 
 
 class MaxMemoryHook(cp.cuda.MemoryHook):
     def __init__(self, initial=0):
         self.max_mem = initial
         self.current = initial
-        self.all_allocations = initial
-        self.malloc_stack_traces = []
-        self.free_stack_traces = []
 
     def malloc_postprocess(
         self, device_id: int, size: int, mem_size: int, mem_ptr: int, pmem_id: int
     ):
         self.current += mem_size
         self.max_mem = max(self.max_mem, self.current)
-        self.all_allocations += mem_size
-        self.malloc_stack_traces.append((traceback.extract_stack(), mem_size))
 
     def free_postprocess(
         self, device_id: int, mem_size: int, mem_ptr: int, pmem_id: int
     ):
-        self.free_stack_traces.append((traceback.extract_stack(), mem_size))
         self.current -= mem_size
 
     def alloc_preprocess(self, **kwargs):
@@ -582,10 +574,8 @@ def test_recon_FBP_memoryhook(
 
 
 @pytest.mark.cupy
-# @pytest.mark.parametrize("projections", [1801, 2560, 3601])
-# @pytest.mark.parametrize("slices", [3, 4, 5, 10, 15, 20])
-@pytest.mark.parametrize("projections", [1801])
-@pytest.mark.parametrize("slices", [15])
+@pytest.mark.parametrize("projections", [1801, 2560, 3601])
+@pytest.mark.parametrize("slices", [3, 4, 5, 10, 15, 20])
 def test_recon_LPRec_memoryhook(slices, projections, ensure_clean_memory):
     angles_number = projections
     detX_size = 2560
@@ -598,49 +588,39 @@ def test_recon_LPRec_memoryhook(slices, projections, ensure_clean_memory):
     kwargs["recon_size"] = detX_size
     kwargs["recon_mask_radius"] = 0.8
 
-    # line_profiler = cp.cuda.memory_hooks.LineProfileHook()
-    line_profiler = PeakMemoryLineProfileHook(["methodsDIR_CuPy.py"])
     hook = MaxMemoryHook()
-    with hook, line_profiler:
+    with hook:
         recon_data = LPRec(cp.copy(data), **kwargs)
 
-    line_profiler.print_report()
     # make sure estimator function is within range (80% min, 100% max)
     max_mem = (
         hook.max_mem
     )  # the amount of memory in bytes needed for the method according to memoryhook
 
-    print(f"hook all allocations: {hook.all_allocations}")
-
-    print("****** MALLOC ******")
-    for (stack, memsize) in hook.malloc_stack_traces:
-        for frame in stack:
-            if frame.filename.endswith("methodsDIR_CuPy.py"):
-                print(f"{frame.filename}:{frame.lineno} in {frame.name}: {memsize}")
-
-    print("****** FREE ******")
-    for (stack, memsize) in hook.free_stack_traces:
-        for frame in stack:
-            if frame.filename.endswith("methodsDIR_CuPy.py"):
-                print(f"{frame.filename}:{frame.lineno} in {frame.name}: {memsize}")
+    non_slice_dims_shape = (angles_number, detX_size)
+    input_data_type = np.float32()
 
     # now we estimate how much of the total memory required for this data
     (estimated_memory_bytes, subtract_bytes) = _calc_memory_bytes_LPRec(
-        (angles_number, detX_size), dtype=np.float32(), **kwargs
+        non_slice_dims_shape, dtype=input_data_type, **kwargs
     )
 
+    even_slice_count = True
     padded_slices = slices
     if (slices % 2) != 0:
+        even_slice_count = False
         padded_slices += 1
 
+    if even_slice_count:
+        input_slice_size = np.prod(non_slice_dims_shape) * input_data_type.itemsize
+        estimated_memory_bytes -= input_slice_size
+
     estimated_memory_mb = round(padded_slices * estimated_memory_bytes / (1024**2), 2)
     max_mem -= subtract_bytes
     max_mem_mb = round(max_mem / (1024**2), 2)
 
     # now we compare both memory estimations
     difference_mb = abs(estimated_memory_mb - max_mem_mb)
-    print(f"estimated_memory_mb: {estimated_memory_mb}")
-    print(f"difference_mb: {difference_mb}")
     percents_relative_maxmem = round((difference_mb / max_mem_mb) * 100)
     # the estimated_memory_mb should be LARGER or EQUAL to max_mem_mb
     # the resulting percent value should not deviate from max_mem on more than 20%