@@ -1521,6 +1521,48 @@ __global__ void searchDirection(float *g, float *xi, float *h, long N)
15211521 xi[N*i+j] = h[N*i+j] = g[N*i+j];
15221522}
15231523
1524+ __global__ void searchDirection_LBFGS (float *xi, long N, long M, int image)
1525+ {
1526+ int j = threadIdx .x + blockDim .x * blockIdx .x ;
1527+ int i = threadIdx .y + blockDim .y * blockIdx .y ;
1528+
1529+ xi[M*N*image+N*i+j] *= -1 .0f ;
1530+ }
1531+
1532+ __global__ void getDot_LBFGS_ff (float *aux_vector, float *vec_1, float *vec_2, int k, int h, int M, int N, int image)
1533+ {
1534+ int j = threadIdx .x + blockDim .x * blockIdx .x ;
1535+ int i = threadIdx .y + blockDim .y * blockIdx .y ;
1536+
1537+ aux_vector[N*i+j] = vec_1[M*N*image*k + M*N*image + (N*i+j)]*vec_2[M*N*image*h + M*N*image + (N*i+j)];
1538+ }
1539+
1540+ __global__ void updateQ (float *d_q, float alpha, float *d_y, int k, int M, int N, int image)
1541+ {
1542+ int j = threadIdx .x + blockDim .x * blockIdx .x ;
1543+ int i = threadIdx .y + blockDim .y * blockIdx .y ;
1544+
1545+ d_q[M*N*image+N*i+j] += alpha *d_y[M*N*image + M*N*k + (N*i+j)];
1546+ }
1547+
1548+ __global__ void getR (float *d_r, float *d_q, float scalar, int M, int N, int image)
1549+ {
1550+ int j = threadIdx .x + blockDim .x * blockIdx .x ;
1551+ int i = threadIdx .y + blockDim .y * blockIdx .y ;
1552+
1553+ d_r[M*N*image+N*i+j] = d_q[M*N*image+N*i+j] * scalar;
1554+ }
1555+
1556+ __global__ void calculateSandY (float *d_y, float *d_s, float *p, float *xi, float *p_old, float *xi_old, int iter, int M, int N, int image)
1557+ {
1558+ int j = threadIdx .x + blockDim .x * blockIdx .x ;
1559+ int i = threadIdx .y + blockDim .y * blockIdx .y ;
1560+
1561+ d_y[M*N*image*iter + M*N*image + (N*i+j)] = xi[M*N*image+N*i+j] - (-1 .0f *xi_old[M*N*image+N*i+j]);
1562+ d_s[M*N*image*iter + M*N*image + (N*i+j)] = p[M*N*image+N*i+j] - p_old[M*N*image+N*i+j];
1563+
1564+ }
1565+
15241566__global__ void searchDirection (float * g, float * xi, float * h, long N, long M, int image)
15251567{
15261568 int j = threadIdx .x + blockDim .x * blockIdx .x ;
0 commit comments