Skip to content

Commit d2f8363

Browse files
authored
Merge pull request #167 from neon60/raven_filter
Raven filter
2 parents a77f334 + b49a287 commit d2f8363

File tree

5 files changed

+259
-3
lines changed

5 files changed

+259
-3
lines changed
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#include <cupy/complex.cuh>
2+
3+
template <typename Type>
4+
__global__ void
5+
raven_filter(
6+
complex<Type> *input,
7+
complex<Type> *output,
8+
int width, int images, int height,
9+
int u0, int n, int v0) {
10+
11+
const int px = threadIdx.x + blockIdx.x * blockDim.x;
12+
const int py = threadIdx.y + blockIdx.y * blockDim.y;
13+
const int pz = threadIdx.z + blockIdx.z * blockDim.z;
14+
15+
if (px >= width || py >= images || pz >= height)
16+
return;
17+
18+
int centerx = width / 2;
19+
int centerz = height / 2;
20+
21+
long long index = static_cast<long long>(px) +
22+
width * static_cast<long long>(py) +
23+
width * images * static_cast<long long>(pz);
24+
25+
complex<Type> value = input[index];
26+
if( pz >= (centerz - v0) && pz < (centerz + v0 + 1) ) {
27+
28+
// +1 needed to match with CPU implementation
29+
Type base = Type(px - centerx + 1) / u0;
30+
Type power = base;
31+
for( int i = 1; i < 2 * n; i++ )
32+
power *= base;
33+
34+
Type filtered_value = 1.f / (1.f + power);
35+
value *= complex<Type>(filtered_value, filtered_value);
36+
}
37+
38+
// ifftshifting positions
39+
int xshift = (width + 1) / 2;
40+
int zshift = (height + 1) / 2;
41+
int outX = (px + xshift) % width;
42+
int outZ = (pz + zshift) % height;
43+
44+
long long outIndex = static_cast<long long>(outX) +
45+
width * static_cast<long long>(py) +
46+
width * images * static_cast<long long>(outZ);
47+
48+
output[outIndex] = value;
49+
}

httomolibgpu/prep/stripe.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,17 +30,24 @@
3030

3131
if cupy_run:
3232
from cupyx.scipy.ndimage import median_filter, binary_dilation, uniform_filter1d
33+
from cupyx.scipy.fft import fft2, ifft2, fftshift, ifftshift
34+
from httomolibgpu.cuda_kernels import load_cuda_module
3335
else:
3436
median_filter = Mock()
3537
binary_dilation = Mock()
3638
uniform_filter1d = Mock()
39+
fft2 = Mock()
40+
ifft2 = Mock()
41+
fftshift = Mock()
42+
ifftshift = Mock()
3743

3844
from typing import Union
3945

4046
__all__ = [
4147
"remove_stripe_based_sorting",
4248
"remove_stripe_ti",
4349
"remove_all_stripe",
50+
"raven_filter",
4451
]
4552

4653

@@ -359,6 +366,94 @@ def _rs_dead(sinogram, snr, size, matindex, norm=True):
359366
sinogram = _rs_large(sinogram, snr, size, matindex)
360367
return sinogram
361368

369+
def raven_filter(
370+
sinogram,
371+
uvalue: int = 20,
372+
nvalue: int = 4,
373+
vvalue: int = 2,
374+
pad_y: int = 20,
375+
pad_x: int = 20,
376+
pad_method: str = "edge"):
377+
"""
378+
Applies raven filter to a 3D CuPy array. For more detailed information, see :ref:`method_raven_filter`.
379+
380+
Parameters
381+
----------
382+
data : cp.ndarray
383+
Input CuPy 3D array either float32 or uint16 data type.
384+
385+
pad_y : int, optional
386+
Pad the top and bottom of projections.
387+
388+
pad_x : int, optional
389+
Pad the left and right of projections.
390+
391+
pad_method : str, optional
392+
Numpy pad method to use.
393+
394+
uvalue : int, optional
395+
The shape of filter.
396+
397+
nvalue : int, optional
398+
The shape of filter.
399+
400+
vvalue : int, optional
401+
The number of rows to be applied the filter
402+
403+
Returns
404+
-------
405+
ndarray
406+
Raven filtered 3D CuPy array in float32 data type.
407+
408+
Raises
409+
------
410+
ValueError
411+
If the input array is not three dimensional.
412+
"""
413+
414+
if sinogram.dtype != cp.float32:
415+
raise ValueError("The input data should be float32 data type")
416+
417+
# Padding of the sinogram
418+
sinogram = cp.pad(sinogram, ((pad_y, pad_y), (0, 0), (pad_x, pad_x)), mode=pad_method)
419+
420+
# FFT and shift of sinogram
421+
fft_data = fft2(sinogram, axes=(0, 2), overwrite_x=True)
422+
fft_data_shifted = fftshift(fft_data, axes=(0, 2))
423+
424+
# Calculation type
425+
calc_type = fft_data_shifted.dtype
426+
427+
# Setup various values for the filter
428+
height, images, width = sinogram.shape
429+
430+
# Set the input type of the kernel
431+
kernel_args = "raven_filter<{0}>".format(
432+
"float" if calc_type == "complex64" else "double"
433+
)
434+
435+
# setting grid/block parameters
436+
block_x = 128
437+
block_dims = (block_x, 1, 1)
438+
grid_x = (width + block_x - 1) // block_x
439+
grid_y = images
440+
grid_z = height
441+
grid_dims = (grid_x, grid_y, grid_z)
442+
params = (fft_data_shifted, fft_data, width, images, height, uvalue, nvalue, vvalue)
443+
444+
raven_module = load_cuda_module("raven_filter", name_expressions=[kernel_args])
445+
raven_filt = raven_module.get_function(kernel_args)
446+
447+
raven_filt(grid_dims, block_dims, params)
448+
449+
# raven_filt already doing ifftshifting
450+
# fft_data = ifftshift(fft_data_shifted, axes=(0, 2))
451+
sinogram = ifft2(fft_data, axes=(0, 2), overwrite_x=True)
452+
453+
# Removing padding
454+
sinogram = sinogram[pad_y:height-pad_y, :, pad_x:width-pad_x].real
455+
456+
return sinogram
362457

363458
def _create_matindex(nrow, ncol):
364459
"""

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,8 @@ dev = [
5757
"toml",
5858
"imageio",
5959
"h5py",
60-
"pre-commit"
60+
"pre-commit",
61+
"pyfftw"
6162
]
6263

6364

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
import numpy as np
2+
import pyfftw
3+
import pyfftw.interfaces.numpy_fft as fft
4+
5+
def raven_filter_numpy(
6+
sinogram,
7+
uvalue: int = 20,
8+
nvalue: int = 4,
9+
vvalue: int = 2,
10+
pad_y: int = 20,
11+
pad_x: int = 20,
12+
pad_method: str = "edge"):
13+
14+
# Parameters
15+
v0 = vvalue
16+
n = nvalue
17+
u0 = uvalue
18+
19+
# Make a padded copy
20+
sinogram_padded = np.pad(sinogram, ((pad_y,pad_y), (0, 0), (pad_x,pad_x)), pad_method)
21+
22+
# Size
23+
height, images, width = sinogram_padded.shape
24+
25+
# Generate filter function
26+
centerx = np.ceil(width / 2.0) - 1.0
27+
centery = np.int16(np.ceil(height / 2.0) - 1)
28+
row1 = centery - v0
29+
row2 = centery + v0 + 1
30+
listx = np.arange(width) - centerx
31+
filtershape = 1.0 / (1.0 + np.power(listx / u0, 2 * n))
32+
filtershapepad2d = np.zeros((row2 - row1, filtershape.size))
33+
filtershapepad2d[:] = np.float64(filtershape)
34+
filtercomplex = filtershapepad2d + filtershapepad2d * 1j
35+
36+
# Generate filter objects
37+
a = pyfftw.empty_aligned((height, images, width), dtype='complex128', n=16)
38+
b = pyfftw.empty_aligned((height, images, width), dtype='complex128', n=16)
39+
c = pyfftw.empty_aligned((height, images, width), dtype='complex128', n=16)
40+
d = pyfftw.empty_aligned((height, images, width), dtype='complex128', n=16)
41+
fft_object = pyfftw.FFTW(a, b, axes=(0, 2))
42+
ifft_object = pyfftw.FFTW(c, d, axes=(0, 2), direction='FFTW_BACKWARD')
43+
44+
sino = fft.fftshift(fft_object(sinogram_padded), axes=(0, 2))
45+
for m in range(sino.shape[1]):
46+
sino[row1:row2, m] = sino[row1:row2, m] * filtercomplex
47+
sino = ifft_object(fft.ifftshift(sino, axes=(0, 2)))
48+
sinogram = sino[pad_y:height-pad_y, :, pad_x:width-pad_x]
49+
50+
return sinogram.real

tests/test_prep/test_stripe.py

Lines changed: 63 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,16 @@
33
from cupy.cuda import nvtx
44
import numpy as np
55
import pytest
6+
67
from httomolibgpu.prep.normalize import normalize
78
from httomolibgpu.prep.stripe import (
89
remove_stripe_based_sorting,
910
remove_stripe_ti,
1011
remove_all_stripe,
12+
raven_filter,
1113
)
1214
from numpy.testing import assert_allclose
13-
15+
from .stripe_cpu_reference import raven_filter_numpy
1416

1517
def test_remove_stripe_ti_on_data(data, flats, darks):
1618
# --- testing the CuPy implementation from TomoCupy ---#
@@ -51,7 +53,6 @@ def test_remove_stripe_ti_on_data(data, flats, darks):
5153
# np.median(corrected_data), np.median(corrected_host_data), rtol=1e-6
5254
# )
5355

54-
5556
def test_stripe_removal_sorting_cupy(data, flats, darks):
5657
# --- testing the CuPy port of TomoPy's implementation ---#
5758
data = normalize(data, flats, darks, cutoff=10, minus_log=True)
@@ -66,6 +67,43 @@ def test_stripe_removal_sorting_cupy(data, flats, darks):
6667
assert corrected_data.dtype == np.float32
6768
assert corrected_data.flags.c_contiguous
6869

70+
def test_stripe_raven_cupy(data, flats, darks):
71+
# --- testing the CuPy port of TomoPy's implementation ---#
72+
73+
data = normalize(data, flats, darks, cutoff=10, minus_log=True)
74+
75+
data_after_raven_gpu = raven_filter(cp.copy(data)).get()
76+
data_after_raven_cpu = raven_filter_numpy(cp.copy(data).get())
77+
78+
assert_allclose(data_after_raven_cpu, data_after_raven_gpu, rtol=0, atol=4e-01)
79+
80+
data = None #: free up GPU memory
81+
# make sure the output is float32
82+
assert data_after_raven_gpu.dtype == np.float32
83+
assert data_after_raven_gpu.shape == data_after_raven_cpu.shape
84+
85+
@pytest.mark.parametrize("uvalue", [20, 50, 100])
86+
@pytest.mark.parametrize("nvalue", [2, 4, 6])
87+
@pytest.mark.parametrize("vvalue", [2, 4])
88+
@pytest.mark.parametrize("pad_x", [0, 10, 20])
89+
@pytest.mark.parametrize("pad_y", [0, 10, 20])
90+
@cp.testing.numpy_cupy_allclose(rtol=0, atol=3e-01)
91+
def test_stripe_raven_parameters_cupy(ensure_clean_memory, xp, uvalue, nvalue, vvalue, pad_x, pad_y):
92+
# because it's random, we explicitly seed and use numpy only, to match the data
93+
np.random.seed(12345)
94+
data = np.random.random_sample(size=(256, 5, 512)).astype(np.float32) * 2.0 + 0.001
95+
data = xp.asarray(data)
96+
97+
if xp.__name__ == "numpy":
98+
results = raven_filter_numpy(
99+
data, uvalue=uvalue, nvalue=nvalue, vvalue=vvalue, pad_x=pad_x, pad_y=pad_y
100+
).astype(np.float32)
101+
else:
102+
results = raven_filter(
103+
data, uvalue=uvalue, nvalue=nvalue, vvalue=vvalue, pad_x=pad_x, pad_y=pad_y
104+
).get()
105+
106+
return xp.asarray(results)
69107

70108
@pytest.mark.perf
71109
def test_stripe_removal_sorting_cupy_performance(ensure_clean_memory):
@@ -116,6 +154,29 @@ def test_remove_stripe_ti_performance(ensure_clean_memory):
116154

117155
assert "performance in ms" == duration_ms
118156

157+
@pytest.mark.perf
158+
def test_raven_filter_performance(ensure_clean_memory):
159+
data_host = (
160+
np.random.random_sample(size=(1801, 5, 2560)).astype(np.float32) * 2.0 + 0.001
161+
)
162+
data = cp.asarray(data_host, dtype=np.float32)
163+
164+
# do a cold run first
165+
raven_filter(cp.copy(data))
166+
167+
dev = cp.cuda.Device()
168+
dev.synchronize()
169+
170+
start = time.perf_counter_ns()
171+
nvtx.RangePush("Core")
172+
for _ in range(10):
173+
# have to take copy, as data is modified in-place
174+
raven_filter(cp.copy(data))
175+
nvtx.RangePop()
176+
dev.synchronize()
177+
duration_ms = float(time.perf_counter_ns() - start) * 1e-6 / 10
178+
179+
assert "performance in ms" == duration_ms
119180

120181
def test_remove_all_stripe_on_data(data, flats, darks):
121182
# --- testing the CuPy implementation from TomoCupy ---#

0 commit comments

Comments
 (0)