Skip to content

Commit 51aad78

Browse files
authored
Merge pull request #184 from DiamondLightSource/vocentering_gpu_offload
Vocentering gpu offload
2 parents d9f56b4 + ff1f90f commit 51aad78

File tree

13 files changed

+366
-450
lines changed

13 files changed

+366
-450
lines changed

.scripts/download_zenodo.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,15 @@ def calculate_md5(filename):
1919

2020
def download_zenodo_files(output_dir: Path):
2121
"""
22-
Download all files from Zenodo record 14627503 and verify their checksums.
22+
Download all files from Zenodo record 14652312 and verify their checksums.
2323
2424
Args:
2525
output_dir: Directory where files should be downloaded
2626
"""
2727
try:
28-
print("Fetching files from Zenodo record 14627503...")
28+
print("Fetching files from Zenodo record 14652312...")
2929
with urllib.request.urlopen(
30-
"https://zenodo.org/api/records/14627503"
30+
"https://zenodo.org/api/records/14652312"
3131
) as response:
3232
data = json.loads(response.read())
3333

docs/source/examples/Cor_largesino.ipynb

Lines changed: 0 additions & 207 deletions
This file was deleted.

docs/source/index.rst

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,5 +30,4 @@
3030

3131
examples/pipeline1_FBP
3232
examples/pipeline2_iterative
33-
examples/Cor_largesino
34-
examples/DistortionCorr
33+
examples/DistortionCorr

httomolibgpu/cuda_kernels/center_360_shifts.cu

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#include <cupy/complex.cuh>
22

33
extern "C" __global__ void
4-
shift_whole_shifts(const float *sino2, const float *sino3,
4+
shift_whole_shifts(const float *flip_sino, const float *comp_sino,
55
const float *__restrict__ list_shift, float *mat, int nx,
66
int nymat) {
77
int xid = threadIdx.x + blockIdx.x * blockDim.x;
@@ -17,14 +17,14 @@ shift_whole_shifts(const float *sino2, const float *sino3,
1717
float frac_part = modf(shift_col, &int_part);
1818
if (abs(frac_part) > 1e-5f) {
1919
// we have a floating point shift, so we only roll in
20-
// sino3, but we leave the rest for later using scipy
20+
// comp_sino, but we leave the rest for later using scipy
2121
int shift_int =
2222
shift_col >= 0.0 ? int(ceil(shift_col)) : int(floor(shift_col));
2323
if (shift_int >= 0 && xid < shift_int) {
24-
mat[zid * nymat * nx + yid * nx + xid] = sino3[yid * nx + xid];
24+
mat[zid * nymat * nx + yid * nx + xid] = comp_sino[yid * nx + xid];
2525
}
2626
if (shift_int < 0 && xid >= nx + shift_int) {
27-
mat[zid * nymat * nx + yid * nx + xid] = sino3[yid * nx + xid];
27+
mat[zid * nymat * nx + yid * nx + xid] = comp_sino[yid * nx + xid];
2828
}
2929
} else {
3030
// we have an integer shift, so we can roll in directly
@@ -33,16 +33,16 @@ shift_whole_shifts(const float *sino2, const float *sino3,
3333
if (shift_int >= 0) {
3434
if (xid >= shift_int) {
3535
mat[zid * nymat * nx + yid * nx + xid] =
36-
sino2[yid * nx + xid - shift_int];
36+
flip_sino[yid * nx + xid - shift_int];
3737
} else {
38-
mat[zid * nymat * nx + yid * nx + xid] = sino3[yid * nx + xid];
38+
mat[zid * nymat * nx + yid * nx + xid] = comp_sino[yid * nx + xid];
3939
}
4040
} else {
4141
if (xid < nx + shift_int) {
4242
mat[zid * nymat * nx + yid * nx + xid] =
43-
sino2[yid * nx + xid - shift_int];
43+
flip_sino[yid * nx + xid - shift_int];
4444
} else {
45-
mat[zid * nymat * nx + yid * nx + xid] = sino3[yid * nx + xid];
45+
mat[zid * nymat * nx + yid * nx + xid] = comp_sino[yid * nx + xid];
4646
}
4747
}
4848
}

httomolibgpu/cuda_kernels/downsample_sino.cu

Lines changed: 0 additions & 36 deletions
This file was deleted.

httomolibgpu/cuda_kernels/generate_mask.cu

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,4 +48,56 @@ extern "C" __global__ void generate_mask(const int ncol, const int nrow,
4848
}
4949

5050
mask[j * (ncol/2+1) + outi] = outval;
51+
}
52+
53+
extern "C" __global__ void generate_mask_full(const int ncol, const int nrow,
54+
const int cen_col, const int cen_row,
55+
const float du, const float dv,
56+
const float radius, const float drop,
57+
float *mask) {
58+
int i = blockDim.x * blockIdx.x + threadIdx.x;
59+
int j = blockIdx.y;
60+
61+
if (i >= ncol)
62+
return;
63+
64+
// we only need to look at the right half as we're using a real2complex FFT
65+
int outi = i;
66+
//i += ncol-1;
67+
68+
int pos = __float2int_ru(((j - cen_row) * dv / radius) / du);
69+
int pos1 = -pos + cen_col;
70+
int pos2 = pos + cen_col;
71+
72+
if (pos1 > pos2) {
73+
int temp = pos1;
74+
pos1 = pos2;
75+
pos2 = temp;
76+
if (pos1 >= ncol) {
77+
pos1 = ncol - 1;
78+
}
79+
if (pos2 < 0) {
80+
pos2 = 0;
81+
}
82+
} else {
83+
if (pos1 < 0) {
84+
pos1 = 0;
85+
}
86+
if (pos2 >= ncol) {
87+
pos2 = ncol - 1;
88+
}
89+
}
90+
91+
float outval = (pos1 <= i && i <= pos2) ? 1.0 : 0.0;
92+
93+
// mask[cen_row - drop: cen_row + drop + 1, :] = 0
94+
if (j >= cen_row - drop && j <= cen_row + drop) {
95+
outval = 0;
96+
}
97+
// mask[:, cen_col - 1: cen_col + 2] = 0
98+
if (i >= cen_col - 1 && i <= cen_col + 1) {
99+
outval = 0;
100+
}
101+
102+
mask[j * ncol + outi] = outval;
51103
}

0 commit comments

Comments
 (0)