Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions docs/source/bibliography/references.bib
Original file line number Diff line number Diff line change
Expand Up @@ -160,3 +160,14 @@ @article{beck2009fast
year={2009},
publisher={SIAM}
}

@article{munch2009stripe,
title={Stripe and ring artifact removal with combined wavelet—Fourier filtering},
author={M{\"u}nch, Beat and Trtik, Pavel and Marone, Federica and Stampanoni, Marco},
journal={Optics express},
volume={17},
number={10},
pages={8567--8591},
year={2009},
publisher={Optical Society of America}
}
1 change: 1 addition & 0 deletions httomolibgpu/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from httomolibgpu.prep.phase import paganin_filter, paganin_filter_savu_legacy
from httomolibgpu.prep.stripe import (
remove_stripe_based_sorting,
remove_stripe_fw,
remove_stripe_ti,
remove_all_stripe,
raven_filter,
Expand Down
155 changes: 155 additions & 0 deletions httomolibgpu/cuda_kernels/remove_stripe_fw.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
template<int WSize>
__global__ void grouped_convolution_x(
int dim_x,
int dim_y,
int dim_z,
const float* in,
int in_stride_x,
int in_stride_y,
int in_stride_z,
float* out,
int out_stride_z,
int out_stride_group,
const float* w
)
{
const int g_thd_x = blockDim.x * blockIdx.x + threadIdx.x;
const int g_thd_y = blockDim.y * blockIdx.y + threadIdx.y;
const int g_thd_z = blockDim.z * blockIdx.z + threadIdx.z;
if (g_thd_x >= dim_x || g_thd_y >= dim_y || g_thd_z >= dim_z)
{
return;
}

constexpr int out_groups = 2;
for (int i = 0; i < out_groups; ++i)
{
float acc = 0.F;
for (int j = 0; j < WSize; ++j)
{
const int w_idx = i * WSize + j;
const int in_idx = (g_thd_x * in_stride_x + j) + g_thd_y * in_stride_y + g_thd_z * in_stride_z;
acc += w[w_idx] * in[in_idx];
}
const int out_idx = g_thd_x + g_thd_y * dim_x + g_thd_z * out_stride_z + i * out_stride_group;
out[out_idx] = acc;
}
}

template<int WSize>
__global__ void grouped_convolution_y(
int dim_x,
int dim_y,
int dim_z,
const float* in,
int in_stride_x,
int in_stride_y,
int in_stride_z,
int in_stride_group,
float* out,
int out_stride_z,
int out_stride_group,
const float* w
)
{
const int g_thd_x = blockDim.x * blockIdx.x + threadIdx.x;
const int g_thd_y = blockDim.y * blockIdx.y + threadIdx.y;
const int g_thd_z = blockDim.z * blockIdx.z + threadIdx.z;
if (g_thd_x >= dim_x || g_thd_y >= dim_y || g_thd_z >= dim_z)
{
return;
}

constexpr int in_groups = 2;
constexpr int out_groups = 2;
constexpr int item_stride_y = 2;
for (int group = 0; group < in_groups; ++group)
{
for (int i = 0; i < out_groups; ++i)
{
float acc = 0.F;
for (int j = 0; j < WSize; ++j)
{
const int w_idx = (out_groups * group + i) * WSize + j;
const int in_idx = g_thd_x * in_stride_x + (item_stride_y * g_thd_y + j) * in_stride_y + group * in_stride_group + g_thd_z * in_stride_z;
acc += w[w_idx] * in[in_idx];
}
const int out_idx = g_thd_x + g_thd_y * dim_x + g_thd_z * out_stride_z + (out_groups * group + i) * out_stride_group;
out[out_idx] = acc;
}
}
}

template<int WSize>
__global__ void transposed_convolution_x(
int dim_x,
int dim_y,
int dim_z,
const float* in,
int in_dim_x,
int in_stride_y,
int in_stride_z,
const float* w,
float* out
)
{
const int g_thd_x = blockDim.x * blockIdx.x + threadIdx.x;
const int g_thd_y = blockDim.y * blockIdx.y + threadIdx.y;
const int g_thd_z = blockDim.z * blockIdx.z + threadIdx.z;
if (g_thd_x >= dim_x || g_thd_y >= dim_y || g_thd_z >= dim_z)
{
return;
}

constexpr int item_out_stride = 2;
float acc = 0.F;
for (int i = 0; i < WSize; ++i)
{
const int in_x = (g_thd_x - i) / item_out_stride;
const int in_x_mod = (g_thd_x - i) % item_out_stride;
if (in_x_mod == 0 && in_x >= 0 && in_x < in_dim_x)
{
const int in_idx = in_x + g_thd_y * in_stride_y + g_thd_z * in_stride_z;
acc += in[in_idx] * w[i];
}
}
const int out_idx = g_thd_x + dim_x * g_thd_y + dim_x * dim_y * g_thd_z;
out[out_idx] = acc;
}

template<int WSize>
__global__ void transposed_convolution_y(
int dim_x,
int dim_y,
int dim_z,
const float* in,
int in_dim_y,
int in_stride_y,
int in_stride_z,
const float* w,
float* out
)
{
const int g_thd_x = blockDim.x * blockIdx.x + threadIdx.x;
const int g_thd_y = blockDim.y * blockIdx.y + threadIdx.y;
const int g_thd_z = blockDim.z * blockIdx.z + threadIdx.z;
if (g_thd_x >= dim_x || g_thd_y >= dim_y || g_thd_z >= dim_z)
{
return;
}

constexpr int item_out_stride = 2;
float acc = 0.F;
for (int i = 0; i < WSize; ++i)
{
const int in_y = (g_thd_y - i) / item_out_stride;
const int in_y_mod = (g_thd_y - i) % item_out_stride;
if (in_y_mod == 0 && in_y >= 0 && in_y < in_dim_y)
{
const int in_idx = g_thd_x + in_y * in_stride_y + g_thd_z * in_stride_z;
acc += in[in_idx] * w[i];
}
}
const int out_idx = g_thd_x + dim_x * g_thd_y + dim_x * dim_y * g_thd_z;
out[out_idx] = acc;
}
Loading
Loading