Skip to content

Commit 5f1dc29

Browse files
Merge branch 'fix/residuals_interpolation', remote-tracking branch 'origin' into development
2 parents 1dfcc5e + a62c9f1 commit 5f1dc29

11 files changed

Lines changed: 1165 additions & 762 deletions

File tree

include/MSFITSIO.cuh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,8 @@ __host__ void MScopy(const char* in_dir, const char* in_dir_dest);
221221
__host__ void modelToHost(std::vector<Field>& fields,
222222
MSData data,
223223
int num_gpus,
224-
int firstgpu);
224+
int firstgpu,
225+
bool apply_hermitian_conjugation = true);
225226
__host__ void writeMS(const char* outfile,
226227
const char* out_col,
227228
std::vector<Field> fields,

include/classes/ckernel.cuh

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -548,5 +548,33 @@ class CKernel {
548548
sizeof(float) * this->m_times_n,
549549
cudaMemcpyHostToDevice));
550550
};
551+
552+
/**
553+
* Normalize kernel so that sum of all values equals 1.
554+
* This ensures proper energy conservation for gridding/degridding operations.
555+
* For convolution gridding: grid[i] = Σ(kernel * vis)
556+
* For degridding: vis = Σ(kernel * grid[i])
557+
* Normalization ensures these operations are properly matched.
558+
*/
559+
__host__ void normalizeKernel() {
560+
float kernel_sum = 0.0f;
561+
562+
// Compute sum of all kernel values
563+
for (int i = 0; i < this->m; i++) {
564+
for (int j = 0; j < this->n; j++) {
565+
kernel_sum += this->kernel[this->n * i + j];
566+
}
567+
}
568+
569+
// Normalize kernel so sum = 1
570+
if (kernel_sum > 0.0f) {
571+
float norm_factor = 1.0f / kernel_sum;
572+
for (int i = 0; i < this->m; i++) {
573+
for (int j = 0; j < this->n; j++) {
574+
this->kernel[this->n * i + j] *= norm_factor;
575+
}
576+
}
577+
}
578+
};
551579
};
552580
#endif // CKERNEL_CUH

include/functions.cuh

Lines changed: 52 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -96,11 +96,6 @@ __host__ void griddedTogrid(std::vector<cufftComplex>& Vm_gridded,
9696
long M,
9797
long N,
9898
int numvis);
99-
__host__ void getOriginalVisibilitiesBack(std::vector<Field>& fields,
100-
MSData data,
101-
int num_gpus,
102-
int firstgpu,
103-
int blockSizeV);
10499
__host__ void degridding(std::vector<Field>& fields,
105100
MSData data,
106101
double deltau,
@@ -110,7 +105,10 @@ __host__ void degridding(std::vector<Field>& fields,
110105
int blockSizeV,
111106
long M,
112107
long N,
113-
CKernel* ckernel);
108+
CKernel* ckernel,
109+
float* I,
110+
VirtualImageProcessor* ip,
111+
MSDataset& dataset);
114112
__host__ float calculateNoiseAndBeam(std::vector<MSDataset>& datasets,
115113
int* total_visibilities,
116114
int blockSizeV,
@@ -130,6 +128,27 @@ __host__ void initFFT(varsPerGPU* vars_gpu,
130128
long N,
131129
int firstgpu,
132130
int num_gpus);
131+
// Helper function to compute visibility grid from image (common pipeline)
132+
// This encapsulates the repeated pattern: calculateInu -> apply_beam -> apply_GCF -> FFT2D -> phase_rotate
133+
__host__ void computeImageToVisibilityGrid(
134+
float* I,
135+
VirtualImageProcessor* ip,
136+
varsPerGPU* vars_gpu,
137+
int gpu_idx,
138+
long M,
139+
long N,
140+
float nu,
141+
float ref_xobs_pix,
142+
float ref_yobs_pix,
143+
float phs_xobs_pix,
144+
float phs_yobs_pix,
145+
float antenna_diameter,
146+
float pb_factor,
147+
float pb_cutoff,
148+
int primary_beam,
149+
float fg_scale,
150+
CKernel* ckernel,
151+
bool fft_shift);
133152
__host__ void FFT2D(cufftComplex* output_data,
134153
cufftComplex* input_data,
135154
cufftHandle plan,
@@ -356,10 +375,12 @@ __global__ void newPNoPositivity(cufftComplex* p,
356375
float xmin,
357376
long N);
358377
__global__ void clip(cufftComplex* I, long N, float MINPIX);
359-
__global__ void hermitianSymmetry(double3* UVW,
360-
cufftComplex* Vo,
361-
float freq,
362-
int numVisibilities);
378+
__global__ void applyHermitianSymmetry(double3* UVW,
379+
cufftComplex* Vo,
380+
int numVisibilities);
381+
__global__ void convertUVWToLambda(double3* UVW,
382+
float freq,
383+
int numVisibilities);
363384
__global__ void distance_image(float* distance_image,
364385
float xobs,
365386
float yobs,
@@ -388,26 +409,25 @@ __global__ void phase_rotate(cufftComplex* __restrict__ data,
388409
long M,
389410
long N,
390411
double xphs,
391-
double yphs);
412+
double yphs,
413+
double crpix1,
414+
double crpix2,
415+
bool dc_at_center);
392416
// Optimized bilinear interpolation kernels using regular global memory with
393417
// __ldg()
394-
__global__ void vis_mod(cufftComplex* __restrict__ Vm,
395-
const cufftComplex* __restrict__ V,
396-
const double3* __restrict__ UVW,
397-
float* __restrict__ weight,
398-
const double deltau,
399-
const double deltav,
400-
const long numVisibilities,
401-
const long N);
402-
__global__ void vis_mod2(cufftComplex* __restrict__ Vm,
403-
const cufftComplex* __restrict__ V,
404-
const double3* __restrict__ UVW,
405-
float* __restrict__ weight,
406-
const double deltau,
407-
const double deltav,
408-
const long numVisibilities,
409-
const long N,
410-
const float N_half);
418+
// Bilinear interpolation of visibilities from gridded visibility plane
419+
// dc_at_center: true if DC component is at center (N/2, M/2), false if at corner (0,0)
420+
__global__ void bilinearInterpolateVisibility(
421+
cufftComplex* __restrict__ Vm,
422+
const cufftComplex* __restrict__ V,
423+
const double3* __restrict__ UVW,
424+
float* __restrict__ weight,
425+
const double deltau,
426+
const double deltav,
427+
const long numVisibilities,
428+
const long M,
429+
const long N,
430+
const bool dc_at_center);
411431
__global__ void residual(cufftComplex* __restrict__ Vr,
412432
const cufftComplex* __restrict__ Vm,
413433
const cufftComplex* __restrict__ Vo,
@@ -633,6 +653,7 @@ __global__ void CGGradCondition(float* temp,
633653
int image);
634654
__global__ void searchDirection_LBFGS(float* xi, long N, long M, int image);
635655
__global__ void fftshift_2D(cufftComplex* data, int N1, int N2);
656+
__global__ void ifftshift_2D(cufftComplex* data, int N1, int N2);
636657
__global__ void do_griddingGPU(float3* uvw,
637658
cufftComplex* Vo,
638659
cufftComplex* Vo_g,
@@ -657,5 +678,8 @@ __global__ void degriddingGPU(double3* uvw,
657678
int kernel_n,
658679
int supportX,
659680
int supportY);
681+
__global__ void apply_GCF(cufftComplex* __restrict__ image,
682+
const float* __restrict__ gcf,
683+
long N);
660684

661685
#endif

src/MSFITSIO.cu

Lines changed: 32 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -487,16 +487,16 @@ __host__ void readMS(const char* MS_name,
487487
casacore::Table maxuv_metres_tab(
488488
casacore::tableCommand(maxuv_metres_query.c_str()));
489489

490-
casacore::ROScalarColumn<casacore::Double> max_blength_col(
490+
casacore::ROScalarColumn<double> max_blength_col(
491491
maxmin_baseline_tab, "MAX_BLENGTH");
492-
casacore::ROScalarColumn<casacore::Double> min_blength_col(
492+
casacore::ROScalarColumn<double> min_blength_col(
493493
maxmin_baseline_tab, "MIN_BLENGTH");
494-
casacore::ROScalarColumn<casacore::Double> maxuv_metres_col(maxuv_metres_tab,
494+
casacore::ROScalarColumn<double> maxuv_metres_col(maxuv_metres_tab,
495495
"MAXUV");
496496

497-
casacore::ROScalarColumn<casacore::Double> min_freq_col(freq_tab, "MIN_FREQ");
498-
casacore::ROScalarColumn<casacore::Double> max_freq_col(freq_tab, "MAX_FREQ");
499-
casacore::ROScalarColumn<casacore::Double> ref_freq_col(freq_tab, "REF_FREQ");
497+
casacore::ROScalarColumn<double> min_freq_col(freq_tab, "MIN_FREQ");
498+
casacore::ROScalarColumn<double> max_freq_col(freq_tab, "MAX_FREQ");
499+
casacore::ROScalarColumn<double> ref_freq_col(freq_tab, "REF_FREQ");
500500

501501
data->nantennas = antenna_tab.nrow();
502502
data->nbaselines = (data->nantennas) * (data->nantennas - 1) / 2;
@@ -509,9 +509,9 @@ __host__ void readMS(const char* MS_name,
509509

510510
float max_wavelength = freq_to_wavelength(data->min_freq);
511511

512-
casacore::ROArrayColumn<casacore::Double> dishposition_col(antenna_tab,
512+
casacore::ROArrayColumn<double> dishposition_col(antenna_tab,
513513
"POSITION");
514-
casacore::ROScalarColumn<casacore::Double> dishdiameter_col(antenna_tab,
514+
casacore::ROScalarColumn<double> dishdiameter_col(antenna_tab,
515515
"DISH_DIAMETER");
516516
casacore::ROScalarColumn<casacore::String> dishname_col(antenna_tab, "NAME");
517517
casacore::ROScalarColumn<casacore::String> dishstation_col(antenna_tab,
@@ -584,7 +584,7 @@ __host__ void readMS(const char* MS_name,
584584

585585
data->n_internal_frequencies = spectral_window_tab.nrow();
586586

587-
casacore::ROArrayColumn<casacore::Float> chan_freq_col(spectral_window_tab,
587+
casacore::ROArrayColumn<double> chan_freq_col(spectral_window_tab,
588588
"CHAN_FREQ");
589589

590590
casacore::ROScalarColumn<casacore::Int64> n_chan_freq(spectral_window_tab,
@@ -609,7 +609,7 @@ __host__ void readMS(const char* MS_name,
609609

610610
for (int f = 0; f < data->nfields; f++) {
611611
for (int i = 0; i < data->n_internal_frequencies; i++) {
612-
casacore::Vector<float> chan_freq_vector;
612+
casacore::Vector<double> chan_freq_vector;
613613
chan_freq_vector = chan_freq_col(i);
614614
for (int j = 0; j < data->channels[i]; j++) {
615615
fields[f].nu.push_back(chan_freq_vector[j]);
@@ -837,16 +837,16 @@ __host__ void readMS(const char* MS_name,
837837
casacore::Table maxuv_metres_tab(
838838
casacore::tableCommand(maxuv_metres_query.c_str()));
839839

840-
casacore::ROScalarColumn<casacore::Double> max_blength_col(
840+
casacore::ROScalarColumn<double> max_blength_col(
841841
maxmin_baseline_tab, "MAX_BLENGTH");
842-
casacore::ROScalarColumn<casacore::Double> min_blength_col(
842+
casacore::ROScalarColumn<double> min_blength_col(
843843
maxmin_baseline_tab, "MIN_BLENGTH");
844-
casacore::ROScalarColumn<casacore::Double> maxuv_metres_col(maxuv_metres_tab,
844+
casacore::ROScalarColumn<double> maxuv_metres_col(maxuv_metres_tab,
845845
"MAXUV");
846846

847-
casacore::ROScalarColumn<casacore::Double> min_freq_col(freq_tab, "MIN_FREQ");
848-
casacore::ROScalarColumn<casacore::Double> max_freq_col(freq_tab, "MAX_FREQ");
849-
casacore::ROScalarColumn<casacore::Double> ref_freq_col(freq_tab, "REF_FREQ");
847+
casacore::ROScalarColumn<double> min_freq_col(freq_tab, "MIN_FREQ");
848+
casacore::ROScalarColumn<double> max_freq_col(freq_tab, "MAX_FREQ");
849+
casacore::ROScalarColumn<double> ref_freq_col(freq_tab, "REF_FREQ");
850850

851851
data->nantennas = antenna_tab.nrow();
852852
data->nbaselines = (data->nantennas) * (data->nantennas - 1) / 2;
@@ -859,9 +859,9 @@ __host__ void readMS(const char* MS_name,
859859

860860
float max_wavelength = freq_to_wavelength(data->min_freq);
861861

862-
casacore::ROArrayColumn<casacore::Double> dishposition_col(antenna_tab,
862+
casacore::ROArrayColumn<double> dishposition_col(antenna_tab,
863863
"POSITION");
864-
casacore::ROScalarColumn<casacore::Double> dishdiameter_col(antenna_tab,
864+
casacore::ROScalarColumn<double> dishdiameter_col(antenna_tab,
865865
"DISH_DIAMETER");
866866
casacore::ROScalarColumn<casacore::String> dishname_col(antenna_tab, "NAME");
867867
casacore::ROScalarColumn<casacore::String> dishstation_col(antenna_tab,
@@ -934,7 +934,7 @@ __host__ void readMS(const char* MS_name,
934934

935935
data->n_internal_frequencies = spectral_window_tab.nrow();
936936

937-
casacore::ROArrayColumn<casacore::Float> chan_freq_col(spectral_window_tab,
937+
casacore::ROArrayColumn<float> chan_freq_col(spectral_window_tab,
938938
"CHAN_FREQ");
939939

940940
casacore::ROScalarColumn<casacore::Int64> n_chan_freq(spectral_window_tab,
@@ -1114,7 +1114,8 @@ __host__ void MScopy(const char* in_dir, const char* in_dir_dest) {
11141114
__host__ void modelToHost(std::vector<Field>& fields,
11151115
MSData data,
11161116
int num_gpus,
1117-
int firstgpu) {
1117+
int firstgpu,
1118+
bool apply_hermitian_conjugation) {
11181119
for (int f = 0; f < data.nfields; f++) {
11191120
for (int i = 0; i < data.total_frequencies; i++) {
11201121
cudaSetDevice((i % num_gpus) + firstgpu);
@@ -1125,11 +1126,17 @@ __host__ void modelToHost(std::vector<Field>& fields,
11251126
sizeof(cufftComplex) *
11261127
fields[f].numVisibilitiesPerFreqPerStoke[i][s],
11271128
cudaMemcpyDeviceToHost));
1128-
for (int j = 0; j < fields[f].numVisibilitiesPerFreqPerStoke[i][s];
1129-
j++) {
1130-
if (fields[f].visibilities[i][s].uvw[j].x > 0) {
1131-
fields[f].visibilities[i][s].Vm[j] =
1132-
cuConjf(fields[f].visibilities[i][s].Vm[j]);
1129+
1130+
// Apply conjugation only if Hermitian symmetry was applied to coordinates
1131+
// (e.g., via hermitianSymmetry kernel). If degriddingGPU was used, it already
1132+
// handles Hermitian symmetry internally, so no conjugation needed here.
1133+
if (apply_hermitian_conjugation) {
1134+
for (int j = 0; j < fields[f].numVisibilitiesPerFreqPerStoke[i][s];
1135+
j++) {
1136+
if (fields[f].visibilities[i][s].uvw[j].x > 0) {
1137+
fields[f].visibilities[i][s].Vm[j] =
1138+
cuConjf(fields[f].visibilities[i][s].Vm[j]);
1139+
}
11331140
}
11341141
}
11351142
}

0 commit comments

Comments
 (0)