-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathsarbp.cpp
More file actions
337 lines (314 loc) · 12.5 KB
/
sarbp.cpp
File metadata and controls
337 lines (314 loc) · 12.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
#undef NDEBUG // force assertions
#include <assert.h>
#include <chrono>
#include <complex>
#include <getopt.h>
#include <iostream>
#include <Halide.h>
#include <halide_image_io.h>
#include <cnpy.h>
#if defined(WITH_MPI)
#include <mpi.h>
#endif // WITH_MPI
#include "dft.h"
#include "PlatformData.h"
#include "ImgPlane.h"
// Halide generators
#include "backprojection.h"
#include "backprojection_cuda.h"
#if defined(WITH_DISTRIBUTE)
#include "backprojection_distributed.h"
#include "backprojection_cuda_distributed.h"
#endif // WITH_DISTRIBUTE
#include "backprojection_ritsar.h"
#include "backprojection_ritsar_s.h"
#include "backprojection_ritsar_p.h"
#include "backprojection_ritsar_vp.h"
#include "backprojection_auto_m16.h"
#include "img_output_u8.h"
#include "img_output_to_dB.h"
using namespace std;
using namespace std::chrono;
using Halide::Runtime::Buffer;
// local to this file
#define DEBUG_BP 0
#define DEBUG_BP_DB 0
#define SCHED_DEFAULT "cpu"
// the following defaults are the same as RITSAR
#define DB_MIN_DEFAULT 0
#define DB_MAX_DEFAULT 0
#define UPSAMPLE_DEFAULT 6
#define TAYLOR_DEFAULT 20
#define RES_FACTOR_DEFAULT RES_FACTOR
#define ASPECT_DEFAULT ASPECT
static const char short_options[] = "p:o:d:D:s:t:u:r:a:n:h";
static const struct option long_options[] = {
{"platform-dir", required_argument, NULL, 'p'},
{"output", required_argument, NULL, 'o'},
{"db-min", required_argument, NULL, 'd'},
{"db-max", required_argument, NULL, 'D'},
{"schedule", required_argument, NULL, 's'},
{"taylor", required_argument, NULL, 't'},
{"upsample", required_argument, NULL, 'u'},
{"res-factor", required_argument, NULL, 'r'},
{"aspect", required_argument, NULL, 'a'},
{"num-iters", required_argument, NULL, 'n'},
{"help", no_argument, NULL, 'h'},
{0, 0, 0, 0}
};
static void print_usage(string prog, ostream& os) {
os << "Usage: " << prog << " -p DIR -o FILE [OPTION]..." << endl;
os << "Options:" << endl;
os << " -p, --platform-dir=DIR Platform input directory" << endl;
os << " -o, --output=FILE Output image file (PNG)" << endl;
os << " -d, --db-min=REAL Output image min dB" << endl;
os << " Default: " << DB_MIN_DEFAULT << endl;
os << " -D, --db-max=REAL Output image max dB" << endl;
os << " Default: " << DB_MAX_DEFAULT << endl;
os << " -s, --schedule=NAME One of: cpu[_distributed]" << endl;
os << " cuda[_distributed]" << endl;
os << " ritsar[-s|-p|-vp]" << endl;
os << " auto-m16" << endl;
os << " Default: " << SCHED_DEFAULT << endl;
os << " -t, --taylor=INT Taylor count" << endl;
os << " Default: " << TAYLOR_DEFAULT << endl;
os << " -u, --upsample=INT Upsample factor" << endl;
os << " Default: " << UPSAMPLE_DEFAULT << endl;
os << " -r, --res-factor=REAL Image res in units of theoretical resolution size" << endl;
os << " Default: " << RES_FACTOR_DEFAULT << endl;
os << " -a, --aspect=REAL Aspect ratio of range to cross range" << endl;
os << " Default: " << ASPECT_DEFAULT << endl;
os << " -n, --num-iters=INT Number of backprojection iterations" << endl;
os << " Default: 1" << endl;
os << " -h, --help Print this message and exit" << endl;
}
int main(int argc, char **argv) {
string platform_dir = "";
string output_png = "";
string bp_sched = SCHED_DEFAULT;
int taylor = TAYLOR_DEFAULT;
int upsample = UPSAMPLE_DEFAULT;
double dB_min = DB_MIN_DEFAULT;
double dB_max = DB_MAX_DEFAULT;
double res_factor = RES_FACTOR_DEFAULT;
double aspect = ASPECT_DEFAULT;
uint32_t num_iters = 1;
while (1) {
switch (getopt_long(argc, argv, short_options, long_options, NULL)) {
case -1:
break;
case 'p':
platform_dir = string(optarg);
continue;
case 'o':
output_png = string(optarg);
continue;
case 'd':
dB_min = atof(optarg);
continue;
case 'D':
dB_max = atof(optarg);
continue;
case 's':
bp_sched = string(optarg);
continue;
case 't':
taylor = atoi(optarg);
continue;
case 'u':
upsample = atoi(optarg);
continue;
case 'r':
res_factor = atof(optarg);
continue;
case 'a':
aspect = atof(optarg);
continue;
case 'n':
num_iters = atoi(optarg);
continue;
case 'h':
print_usage(argv[0], cout);
return 0;
default:
print_usage(argv[0], cerr);
return EXIT_FAILURE;
}
break;
}
if (platform_dir.empty() || output_png.empty()) {
cerr << "Missing required parameters" << endl;
print_usage(argv[0], cerr);
return EXIT_FAILURE;
}
if (dB_min >= dB_max) {
// Can add support in img_output_u8(...) to avoid this constraint
cerr << "Constraint failed: dB_min < dB_max" << endl;
print_usage(argv[0], cerr);
return EXIT_FAILURE;
}
if (taylor < 1) {
cerr << "Constraint failed: taylor >= 1" << endl;
print_usage(argv[0], cerr);
return EXIT_FAILURE;
}
// TODO: How can upsample be disabled?
if (upsample < 1) {
cerr << "Constraint failed: upsample >= 1" << endl;
print_usage(argv[0], cerr);
return EXIT_FAILURE;
}
if (res_factor <= 0) {
cerr << "Constraint failed: res-factor > 0" << endl;
print_usage(argv[0], cerr);
return EXIT_FAILURE;
}
if (aspect <= 0) {
cerr << "Constraint failed: aspect > 0" << endl;
print_usage(argv[0], cerr);
return EXIT_FAILURE;
}
// determine which backprojection implementation to use
auto backprojection_impl = backprojection;
bool is_distributed = false;
if (bp_sched == "cpu") {
backprojection_impl = backprojection;
cout << "Using schedule for CPU only" << endl;
} else if (bp_sched == "cpu_distributed") {
#if defined(WITH_DISTRIBUTE)
backprojection_impl = backprojection_distributed;
is_distributed = true;
cout << "Using schedule for distributed CPU" << endl;
#else
cerr << "Distributed schedules require distributed support in Halide" << endl;
return EXIT_FAILURE;
#endif // WITH_DISTRIBUTE
} else if (bp_sched == "cuda") {
backprojection_impl = backprojection_cuda;
cout << "Using schedule with CUDA" << endl;
} else if (bp_sched == "cuda_distributed") {
#if defined(WITH_DISTRIBUTE)
backprojection_impl = backprojection_cuda_distributed;
is_distributed = true;
cout << "Using schedule for distributed CUDA" << endl;
#else
cerr << "Distributed schedules require distributed support in Halide" << endl;
return EXIT_FAILURE;
#endif // WITH_DISTRIBUTE
} else if (bp_sched == "ritsar") {
backprojection_impl = backprojection_ritsar;
cout << "Using RITSAR baseline (vectorize)" << endl;
} else if (bp_sched == "ritsar-s") {
backprojection_impl = backprojection_ritsar_s;
cout << "Using RITSAR baseline (serial)" << endl;
} else if (bp_sched == "ritsar-p") {
backprojection_impl = backprojection_ritsar_p;
cout << "Using RITSAR baseline (parallel)" << endl;
} else if (bp_sched == "ritsar-vp") {
backprojection_impl = backprojection_ritsar_vp;
cout << "Using RITSAR baseline (vectorize+parallel)" << endl;
} else if (bp_sched == "auto-m16") {
backprojection_impl = backprojection_auto_m16;
cout << "Using CPU autoschedule (Mullapudi2016)" << endl;
} else {
cerr << "Unknown schedule: " << bp_sched << endl;
print_usage(argv[0], cerr);
return EXIT_FAILURE;
}
int rank = 0, numprocs = 0;
#if defined(WITH_MPI)
if (is_distributed) {
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
}
#endif // WITH_MPI
auto start = steady_clock::now();
PlatformData pd = platform_load(platform_dir);
auto stop = steady_clock::now();
cout << "Loaded platform data in "
<< duration_cast<milliseconds>(stop - start).count() << " ms" << endl;
cout << "Number of pulses: " << pd.npulses << endl;
cout << "Pulse sample size: " << pd.nsamples << endl;
const float *n_hat = pd.n_hat.has_value() ? pd.n_hat.value().begin() : &N_HAT[0];
start = steady_clock::now();
ImgPlane ip = img_plane_create(pd, res_factor, n_hat, aspect, upsample);
stop = steady_clock::now();
cout << "Computed image plane parameters in "
<< duration_cast<milliseconds>(stop - start).count() << " ms" << endl;
cout << "X length: " << ip.nu << endl;
cout << "Y length: " << ip.nv << endl;
// Compute FFT width (power of 2)
int N_fft = static_cast<int>(pow(2, static_cast<int>(log2(pd.nsamples * upsample)) + 1));
// FFTW: init shared context
dft_init_fftw(static_cast<size_t>(N_fft));
// backprojection
Buffer<double, 3> buf_bp(nullptr, {2, ip.u.dim(0).extent(), ip.v.dim(0).extent()});
#if defined(WITH_DISTRIBUTE)
if (is_distributed) {
buf_bp.set_distributed({2, ip.u.dim(0).extent(), ip.v.dim(0).extent()});
// Query local buffer size
backprojection_impl(pd.phs, pd.k_r, taylor, N_fft, pd.delta_r, ip.u, ip.v, pd.pos, ip.pixel_locs, buf_bp);
}
#endif // WITH_DISTRIBUTE
buf_bp.allocate();
int rv = 0;
for (uint32_t i = 1; i <= num_iters; i++) {
cout << "(" << i << ") Halide backprojection start " << endl;
start = steady_clock::now();
rv = backprojection_impl(pd.phs, pd.k_r, taylor, N_fft, pd.delta_r, ip.u, ip.v, pd.pos, ip.pixel_locs, buf_bp);
stop = steady_clock::now();
cout << "(" << i << ") Halide backprojection returned " << rv << " in "
<< duration_cast<milliseconds>(stop - start).count() << " ms" << endl;
if (rv != 0) {
return rv;
}
}
#if DEBUG_BP
vector<size_t> shape_bp { static_cast<size_t>(buf_bp.dim(2).extent()),
static_cast<size_t>(buf_bp.dim(1).extent()) };
cnpy::npy_save("sarbp_debug-bp.npy", (complex<double> *)buf_bp.begin(), shape_bp);
#endif
// FFTW: clean up shared context
dft_destroy_fftw();
if (!is_distributed || rank == 0) {
// Convert to dB
Buffer<double, 2> buf_bp_dB(ip.u.dim(0).extent(), ip.v.dim(0).extent());
cout << "Halide dB conversion start" << endl;
start = steady_clock::now();
rv = img_output_to_dB(buf_bp, buf_bp_dB);
stop = steady_clock::now();
cout << "Halide dB conversion returned " << rv << " in "
<< duration_cast<milliseconds>(stop - start).count() << " ms" << endl;
if (rv != 0) {
return rv;
}
#if DEBUG_BP_DB
vector<size_t> shape_bp_dB { static_cast<size_t>(buf_bp_dB.dim(1).extent()),
static_cast<size_t>(buf_bp_dB.dim(0).extent()) };
cnpy::npy_save("sarbp_debug-bp_dB.npy", (double *)buf_bp_dB.begin(), shape_bp_dB);
#endif
// Produce output image
Buffer<uint8_t, 2> buf_bp_u8(buf_bp_dB.dim(0).extent(), buf_bp_dB.dim(1).extent());
cout << "Halide PNG production start" << endl;
start = steady_clock::now();
rv = img_output_u8(buf_bp_dB, dB_min, dB_max, buf_bp_u8);
stop = steady_clock::now();
cout << "Halide PNG production returned " << rv << " in "
<< duration_cast<milliseconds>(stop - start).count() << " ms" << endl;
if (rv != 0) {
return rv;
}
start = steady_clock::now();
Halide::Tools::convert_and_save_image(buf_bp_u8, output_png);
stop = steady_clock::now();
cout << "Wrote " << output_png << " in "
<< duration_cast<milliseconds>(stop - start).count() << " ms" << endl;
}
#if defined(WITH_MPI)
if (is_distributed) {
MPI_Finalize();
}
#endif // WITH_MPI
return rv;
}