@@ -1993,13 +1993,13 @@ __host__ void degridding(std::vector<Field>& fields,
19931993 blockSizeV);
19941994 }
19951995
1996- hermitianSymmetry<<<
1996+ /* hermitianSymmetry<<<
19971997 fields[f].device_visibilities[i][s].numBlocksUV,
19981998 fields[f].device_visibilities[i][s].threadsPerBlockUV>>>(
19991999 fields[f].device_visibilities[i][s].uvw,
20002000 fields[f].device_visibilities[i][s].Vo, fields[f].nu[i],
20012001 fields[f].numVisibilitiesPerFreqPerStoke[i][s]);
2002- checkCudaErrors (cudaDeviceSynchronize ());
2002+ checkCudaErrors(cudaDeviceSynchronize());*/
20032003
20042004 // Interpolation / Degridding
20052005 vis_mod2<<<fields[f].device_visibilities[i][s].numBlocksUV,
@@ -3596,8 +3596,9 @@ __global__ void DChi2_SharedMemory(float* noise,
35963596 int y0 = phs_yobs;
35973597 double x = (j - x0) * DELTAX * RPDEG_D;
35983598 double y = (i - y0) * DELTAY * RPDEG_D;
3599+ double z = sqrtf (1.0 - x * x - y * y);
35993600
3600- float Ukv, Vkv, cosk, sink, atten;
3601+ float Ukv, Vkv, Wkv, cosk, sink, atten;
36013602
36023603 double * u_shared = s_array;
36033604 double * v_shared = (double *)&u_shared[numVisibilities];
@@ -3608,7 +3609,8 @@ __global__ void DChi2_SharedMemory(float* noise,
36083609 for (int v = 0 ; v < numVisibilities; v++) {
36093610 u_shared[v] = UVW[v].x ;
36103611 v_shared[v] = UVW[v].y ;
3611- w_shared[v] = w[v];
3612+ w_shared[v] = UVW[v].z ;
3613+ weight_shared[v] = w[v];
36123614 Vr_shared[v] = Vr[v];
36133615 printf (" u: %f, v:%f, weight: %f, real: %f, imag: %f\n " , u_shared[v],
36143616 v_shared[v], w_shared[v], Vr_shared[v].x , Vr_shared[v].y );
@@ -3624,20 +3626,22 @@ __global__ void DChi2_SharedMemory(float* noise,
36243626 for (int v = 0 ; v < numVisibilities; v++) {
36253627 Ukv = x * u_shared[v];
36263628 Vkv = y * v_shared[v];
3629+ Wkv = (z - 1.0 ) * w_shared[v];
36273630#if (__CUDA_ARCH__ >= 300)
36283631 sincospif (2.0 * (Ukv + Vkv), &sink, &cosk);
36293632#else
3630- cosk = cospif (2.0 * (Ukv + Vkv));
3631- sink = sinpif (2.0 * (Ukv + Vkv));
3633+ cosk = cospif (2.0 * (Ukv + Vkv + Wkv ));
3634+ sink = sinpif (2.0 * (Ukv + Vkv + Wkv ));
36323635#endif
3633- dchi2 +=
3634- w_shared[v] * ((Vr_shared[v].x * cosk) + (Vr_shared[v].y * sink));
3636+ dchi2 += weight_shared[v] *
3637+ ((Vr_shared[v].x * cosk) + (Vr_shared[v].y * sink));
36353638 }
36363639
36373640 dchi2 *= atten;
36383641
36393642 if (normalize)
36403643 dchi2 /= numVisibilities;
3644+
36413645 dChi2[N * i + j] = -1 .0f * dchi2;
36423646 }
36433647}
@@ -3671,7 +3675,6 @@ __global__ void DChi2(float* noise,
36713675 double x = (j - x0) * DELTAX * RPDEG_D;
36723676 double y = (i - y0) * DELTAY * RPDEG_D;
36733677 double z = sqrtf (1 - x * x - y * y);
3674- double z_minus_one = z - 1.0 ;
36753678
36763679 float Ukv, Vkv, Wkv, cosk, sink, atten;
36773680
@@ -3683,7 +3686,7 @@ __global__ void DChi2(float* noise,
36833686 for (int v = 0 ; v < numVisibilities; v++) {
36843687 Ukv = x * UVW[v].x ;
36853688 Vkv = y * UVW[v].y ;
3686- Wkv = z_minus_one * UVW[v].z ;
3689+ Wkv = (z - 1.0 ) * UVW[v].z ;
36873690
36883691#if (__CUDA_ARCH__ >= 300)
36893692 sincospif (2.0 * (Ukv + Vkv + Wkv), &sink, &cosk);
@@ -3695,6 +3698,7 @@ __global__ void DChi2(float* noise,
36953698 }
36963699
36973700 dchi2 *= fg_scale * atten;
3701+
36983702 if (normalize)
36993703 dchi2 /= numVisibilities;
37003704
@@ -3732,7 +3736,6 @@ __global__ void DChi2(float* noise,
37323736 double x = (j - x0) * DELTAX * RPDEG_D;
37333737 double y = (i - y0) * DELTAY * RPDEG_D;
37343738 double z = sqrtf (1 - x * x - y * y);
3735- double z_minus_one = z - 1.0 ;
37363739
37373740 float Ukv, Vkv, Wkv, cosk, sink, atten, gcf_i;
37383741
@@ -3744,7 +3747,7 @@ __global__ void DChi2(float* noise,
37443747 for (int v = 0 ; v < numVisibilities; v++) {
37453748 Ukv = x * UVW[v].x ;
37463749 Vkv = y * UVW[v].y ;
3747- Wkv = z_minus_one * UVW[v].z ;
3750+ Wkv = (z - 1.0 ) * UVW[v].z ;
37483751#if (__CUDA_ARCH__ >= 300)
37493752 sincospif (2.0 * (Ukv + Vkv + Wkv), &sink, &cosk);
37503753#else
@@ -3758,6 +3761,7 @@ __global__ void DChi2(float* noise,
37583761
37593762 if (normalize)
37603763 dchi2 /= numVisibilities;
3764+
37613765 dChi2[N * i + j] = -1 .0f * dchi2;
37623766 }
37633767}
0 commit comments