@@ -595,7 +595,7 @@ def get_drhodA_dgammadA(mol, grids, ni, dm0, opt = None):
595
595
ngrids = grids .coords .shape [0 ]
596
596
597
597
ao = ni .eval_ao (mol , grids .coords , deriv = 2 , gdftopt = opt , transpose = False )
598
- rho_drho = numint .eval_rho (mol , ao , dm0 , xctype = "NLC" , hermi = 1 , with_lapl = False )
598
+ rho_drho = numint .eval_rho (mol , ao [: 4 , :] , dm0 , xctype = "NLC" , hermi = 1 , with_lapl = False )
599
599
drho = rho_drho [1 :4 , :]
600
600
mu = ao [0 , :, :]
601
601
dmu_dr = ao [1 :4 , :, :]
@@ -1021,7 +1021,7 @@ def _get_vxc_deriv1(hessobj, mo_coeff, mo_occ, max_memory):
1021
1021
vmat = reduce_to_device (vmat_dist , inplace = True )
1022
1022
return vmat
1023
1023
1024
- def _get_vnlc_deriv1 (hessobj , mo_coeff , mo_occ , max_memory ):
1024
+ def _get_vnlc_deriv1_numerical (hessobj , mo_coeff , mo_occ , max_memory ):
1025
1025
mol = hessobj .mol
1026
1026
mf = hessobj .base
1027
1027
mocc = mo_coeff [:,mo_occ > 0 ]
@@ -1062,6 +1062,196 @@ def get_nlc_vmat(mol, mf, dm):
1062
1062
1063
1063
vmat [i_atom , i_xyz , :, :] = (vmat_p - vmat_m ) / (2 * dx )
1064
1064
1065
+ # vmat = contract('Adij,jq->Adiq', vmat, mocc)
1066
+ # vmat = contract('Adiq,ip->Adpq', vmat, mo_coeff)
1067
+ return vmat
1068
+
1069
+ def _get_vnlc_deriv1 (hessobj , mo_coeff , mo_occ , max_memory ):
1070
+ mol = hessobj .mol
1071
+ mf = hessobj .base
1072
+ nao = mol .nao
1073
+ natm = mol .natm
1074
+
1075
+ mocc = mo_coeff [:,mo_occ > 0 ]
1076
+ dm0 = 2 * mocc @ mocc .T
1077
+
1078
+ grids = mf .nlcgrids
1079
+ ngrids = grids .coords .shape [0 ]
1080
+
1081
+ ni = mf ._numint
1082
+ opt = getattr (ni , 'gdftopt' , None )
1083
+ if opt is None :
1084
+ ni .build (mol , grids .coords )
1085
+ opt = ni .gdftopt
1086
+
1087
+ if ni .libxc .is_nlc (mf .xc ):
1088
+ xc_code = mf .xc
1089
+ else :
1090
+ xc_code = mf .nlc
1091
+ nlc_coefs = ni .nlc_coeff (xc_code )
1092
+ if len (nlc_coefs ) != 1 :
1093
+ raise NotImplementedError ('Additive NLC' )
1094
+ nlc_pars , fac = nlc_coefs [0 ]
1095
+
1096
+ kappa_prefactor = nlc_pars [0 ] * 1.5 * numpy .pi * (9 * numpy .pi )** (- 1.0 / 6.0 )
1097
+ C_in_omega = nlc_pars [1 ]
1098
+ beta = 0.03125 * (3.0 / nlc_pars [0 ]** 2 )** 0.75
1099
+
1100
+ with opt .gdft_envs_cache ():
1101
+ ao = ni .eval_ao (mol , grids .coords , deriv = 2 , gdftopt = opt , transpose = False )
1102
+ rho_drho = numint .eval_rho (mol , ao [:4 , :], dm0 , xctype = "NLC" , hermi = 1 , with_lapl = False )
1103
+
1104
+ rho_i = rho_drho [0 ,:]
1105
+
1106
+ rho_nonzero_mask = (rho_i >= NLC_REMOVE_ZERO_RHO_GRID_THRESHOLD )
1107
+
1108
+ rho_i = rho_i [rho_nonzero_mask ]
1109
+ nabla_rho_i = rho_drho [1 :4 , rho_nonzero_mask ]
1110
+ grids_coords = cupy .ascontiguousarray (grids .coords [rho_nonzero_mask , :])
1111
+ grids_weights = grids .weights [rho_nonzero_mask ]
1112
+ ngrids = grids_coords .shape [0 ]
1113
+
1114
+ ao_nonzero_rho = ao [:,:,rho_nonzero_mask ]
1115
+
1116
+ gamma_i = nabla_rho_i [0 ,:]** 2 + nabla_rho_i [1 ,:]** 2 + nabla_rho_i [2 ,:]** 2
1117
+ omega_i = cupy .sqrt (C_in_omega * gamma_i ** 2 / rho_i ** 4 + (4.0 / 3.0 * numpy .pi ) * rho_i )
1118
+ kappa_i = kappa_prefactor * rho_i ** (1.0 / 6.0 )
1119
+
1120
+ U_i = cupy .empty (ngrids )
1121
+ W_i = cupy .empty (ngrids )
1122
+ A_i = cupy .empty (ngrids )
1123
+ B_i = cupy .empty (ngrids )
1124
+ C_i = cupy .empty (ngrids )
1125
+ E_i = cupy .empty (ngrids )
1126
+
1127
+ stream = cupy .cuda .get_current_stream ()
1128
+ libgdft .VXC_vv10nlc_hess_eval_UWABCE (
1129
+ ctypes .cast (stream .ptr , ctypes .c_void_p ),
1130
+ ctypes .cast (U_i .data .ptr , ctypes .c_void_p ),
1131
+ ctypes .cast (W_i .data .ptr , ctypes .c_void_p ),
1132
+ ctypes .cast (A_i .data .ptr , ctypes .c_void_p ),
1133
+ ctypes .cast (B_i .data .ptr , ctypes .c_void_p ),
1134
+ ctypes .cast (C_i .data .ptr , ctypes .c_void_p ),
1135
+ ctypes .cast (E_i .data .ptr , ctypes .c_void_p ),
1136
+ ctypes .cast (grids_coords .data .ptr , ctypes .c_void_p ),
1137
+ ctypes .cast (grids_weights .data .ptr , ctypes .c_void_p ),
1138
+ ctypes .cast (rho_i .data .ptr , ctypes .c_void_p ),
1139
+ ctypes .cast (omega_i .data .ptr , ctypes .c_void_p ),
1140
+ ctypes .cast (kappa_i .data .ptr , ctypes .c_void_p ),
1141
+ ctypes .c_int (ngrids )
1142
+ )
1143
+
1144
+ domega_drho_i = cupy .empty (ngrids )
1145
+ domega_dgamma_i = cupy .empty (ngrids )
1146
+ d2omega_drho2_i = cupy .empty (ngrids )
1147
+ d2omega_dgamma2_i = cupy .empty (ngrids )
1148
+ d2omega_drho_dgamma_i = cupy .empty (ngrids )
1149
+ libgdft .VXC_vv10nlc_hess_eval_omega_derivative (
1150
+ ctypes .cast (stream .ptr , ctypes .c_void_p ),
1151
+ ctypes .cast (domega_drho_i .data .ptr , ctypes .c_void_p ),
1152
+ ctypes .cast (domega_dgamma_i .data .ptr , ctypes .c_void_p ),
1153
+ ctypes .cast (d2omega_drho2_i .data .ptr , ctypes .c_void_p ),
1154
+ ctypes .cast (d2omega_dgamma2_i .data .ptr , ctypes .c_void_p ),
1155
+ ctypes .cast (d2omega_drho_dgamma_i .data .ptr , ctypes .c_void_p ),
1156
+ ctypes .cast (rho_i .data .ptr , ctypes .c_void_p ),
1157
+ ctypes .cast (gamma_i .data .ptr , ctypes .c_void_p ),
1158
+ ctypes .c_double (C_in_omega ),
1159
+ ctypes .c_int (ngrids )
1160
+ )
1161
+ dkappa_drho_i = kappa_prefactor * (1.0 / 6.0 ) * rho_i ** (- 5.0 / 6.0 )
1162
+ d2kappa_drho2_i = kappa_prefactor * (- 5.0 / 36.0 ) * rho_i ** (- 11.0 / 6.0 )
1163
+
1164
+ f_rho_i = beta + E_i + rho_i * (dkappa_drho_i * U_i + domega_drho_i * W_i )
1165
+ f_gamma_i = rho_i * domega_dgamma_i * W_i
1166
+
1167
+ drho_dA , dgamma_dA = get_drhodA_dgammadA (mol , grids , ni , dm0 , opt )
1168
+
1169
+ drho_dA = drho_dA [:,:,rho_nonzero_mask ]
1170
+ dgamma_dA = dgamma_dA [:,:,rho_nonzero_mask ]
1171
+
1172
+ drho_dA = cupy .ascontiguousarray (drho_dA )
1173
+ dgamma_dA = cupy .ascontiguousarray (dgamma_dA )
1174
+ dfdrho_drhodA = cupy .empty ([mol .natm , 3 , ngrids ], order = "C" )
1175
+ dfdgamma_dgammadA = cupy .empty ([mol .natm , 3 , ngrids ], order = "C" )
1176
+
1177
+ libgdft .VXC_vv10nlc_hess_eval_f_t (
1178
+ ctypes .cast (stream .ptr , ctypes .c_void_p ),
1179
+ ctypes .cast (dfdrho_drhodA .data .ptr , ctypes .c_void_p ),
1180
+ ctypes .cast (dfdgamma_dgammadA .data .ptr , ctypes .c_void_p ),
1181
+ ctypes .cast (grids_coords .data .ptr , ctypes .c_void_p ),
1182
+ ctypes .cast (grids_weights .data .ptr , ctypes .c_void_p ),
1183
+ ctypes .cast (rho_i .data .ptr , ctypes .c_void_p ),
1184
+ ctypes .cast (omega_i .data .ptr , ctypes .c_void_p ),
1185
+ ctypes .cast (kappa_i .data .ptr , ctypes .c_void_p ),
1186
+ ctypes .cast (U_i .data .ptr , ctypes .c_void_p ),
1187
+ ctypes .cast (W_i .data .ptr , ctypes .c_void_p ),
1188
+ ctypes .cast (A_i .data .ptr , ctypes .c_void_p ),
1189
+ ctypes .cast (B_i .data .ptr , ctypes .c_void_p ),
1190
+ ctypes .cast (C_i .data .ptr , ctypes .c_void_p ),
1191
+ ctypes .cast (domega_drho_i .data .ptr , ctypes .c_void_p ),
1192
+ ctypes .cast (domega_dgamma_i .data .ptr , ctypes .c_void_p ),
1193
+ ctypes .cast (dkappa_drho_i .data .ptr , ctypes .c_void_p ),
1194
+ ctypes .cast (d2omega_drho2_i .data .ptr , ctypes .c_void_p ),
1195
+ ctypes .cast (d2omega_dgamma2_i .data .ptr , ctypes .c_void_p ),
1196
+ ctypes .cast (d2omega_drho_dgamma_i .data .ptr , ctypes .c_void_p ),
1197
+ ctypes .cast (d2kappa_drho2_i .data .ptr , ctypes .c_void_p ),
1198
+ ctypes .cast (drho_dA .data .ptr , ctypes .c_void_p ),
1199
+ ctypes .cast (dgamma_dA .data .ptr , ctypes .c_void_p ),
1200
+ ctypes .c_int (ngrids ),
1201
+ ctypes .c_int (3 * mol .natm ),
1202
+ )
1203
+
1204
+ # TODO: reduce memory footprint from here on
1205
+
1206
+ vmat = cupy .zeros ([mol .natm , 3 , nao , nao ])
1207
+
1208
+ mu = ao_nonzero_rho [0 , :, :]
1209
+ dmu_dr = ao_nonzero_rho [1 :4 , :, :]
1210
+ d2mu_dr2 = cupy .empty ([3 , 3 , nao , ngrids ])
1211
+ d2mu_dr2 [0 ,0 ,:,:] = ao_nonzero_rho [XX , :, :]
1212
+ d2mu_dr2 [0 ,1 ,:,:] = ao_nonzero_rho [XY , :, :]
1213
+ d2mu_dr2 [1 ,0 ,:,:] = ao_nonzero_rho [XY , :, :]
1214
+ d2mu_dr2 [0 ,2 ,:,:] = ao_nonzero_rho [XZ , :, :]
1215
+ d2mu_dr2 [2 ,0 ,:,:] = ao_nonzero_rho [XZ , :, :]
1216
+ d2mu_dr2 [1 ,1 ,:,:] = ao_nonzero_rho [YY , :, :]
1217
+ d2mu_dr2 [1 ,2 ,:,:] = ao_nonzero_rho [YZ , :, :]
1218
+ d2mu_dr2 [2 ,1 ,:,:] = ao_nonzero_rho [YZ , :, :]
1219
+ d2mu_dr2 [2 ,2 ,:,:] = ao_nonzero_rho [ZZ , :, :]
1220
+
1221
+ aoslices = mol .aoslice_by_atom ()
1222
+
1223
+ # w_i 2 f_i^\gamma \nabla_A \nabla\rho \cdot \nabla(\phi_\mu \phi_nu)_i
1224
+ d2rho_dAdr = cupy .zeros ([natm , 3 , 3 , ngrids ])
1225
+ for i_atom in range (natm ):
1226
+ p0 , p1 = aoslices [i_atom ][2 :]
1227
+ d2rho_dAdr [i_atom , :, :, :] += cupy .einsum ('dDig,jg,ij->dDg' , - d2mu_dr2 [:, :, p0 :p1 , :], mu , dm0 [p0 :p1 , :])
1228
+ d2rho_dAdr [i_atom , :, :, :] += cupy .einsum ('dDig,jg,ij->dDg' , - d2mu_dr2 [:, :, p0 :p1 , :], mu , dm0 [:, p0 :p1 ].T )
1229
+ d2rho_dAdr [i_atom , :, :, :] += cupy .einsum ('dig,Djg,ij->dDg' , - dmu_dr [:, p0 :p1 , :], dmu_dr , dm0 [p0 :p1 , :])
1230
+ d2rho_dAdr [i_atom , :, :, :] += cupy .einsum ('dig,Djg,ij->dDg' , - dmu_dr [:, p0 :p1 , :], dmu_dr , dm0 [:, p0 :p1 ].T )
1231
+ dmunu_dr = cupy .einsum ('dig,jg->dijg' , dmu_dr , mu ) + cupy .einsum ('dig,jg->djig' , dmu_dr , mu )
1232
+ vmat += 2 * cupy .einsum ('AdDg,Dijg,g->Adij' , d2rho_dAdr , dmunu_dr , f_gamma_i * grids_weights )
1233
+
1234
+ for i_atom in range (natm ):
1235
+ p0 , p1 = aoslices [i_atom ][2 :]
1236
+ # w_i f_i^\rho \nabla_A (\phi_\mu \phi_nu)_i
1237
+ vmat [i_atom , :, p0 :p1 , :] += cupy .einsum ('dig,jg->dij' , - dmu_dr [:, p0 :p1 , :], mu * f_rho_i * grids_weights )
1238
+ vmat [i_atom , :, :, p0 :p1 ] += cupy .einsum ('dig,jg->dji' , - dmu_dr [:, p0 :p1 , :], mu * f_rho_i * grids_weights )
1239
+
1240
+ # w_i 2 f_i^\gamma \nabla\rho \cdot \nabla_A \nabla(\phi_\mu \phi_nu)_i
1241
+ vmat [i_atom , :, p0 :p1 , :] += 2 * cupy .einsum ('dDig,jg,Dg->dij' , - d2mu_dr2 [:, :, p0 :p1 , :], mu , nabla_rho_i * f_gamma_i * grids_weights )
1242
+ vmat [i_atom , :, :, p0 :p1 ] += 2 * cupy .einsum ('dDig,jg,Dg->dji' , - d2mu_dr2 [:, :, p0 :p1 , :], mu , nabla_rho_i * f_gamma_i * grids_weights )
1243
+ vmat [i_atom , :, p0 :p1 , :] += 2 * cupy .einsum ('dig,Djg,Dg->dij' , - dmu_dr [:, p0 :p1 , :], dmu_dr , nabla_rho_i * f_gamma_i * grids_weights )
1244
+ vmat [i_atom , :, :, p0 :p1 ] += 2 * cupy .einsum ('dig,Djg,Dg->dji' , - dmu_dr [:, p0 :p1 , :], dmu_dr , nabla_rho_i * f_gamma_i * grids_weights )
1245
+
1246
+ vmat += cupy .einsum ('Adg,ig,jg,g->Adij' , dfdrho_drhodA , mu , mu , grids_weights )
1247
+ vmat += 2 * cupy .einsum ('Adg,Dijg,Dg->Adij' , dfdgamma_dgammadA , dmunu_dr , nabla_rho_i * grids_weights )
1248
+
1249
+ vmat_numerical = _get_vnlc_deriv1_numerical (hessobj , mo_coeff , mo_occ , max_memory )
1250
+
1251
+ print (cupy .max (cupy .abs (vmat - vmat_numerical )))
1252
+
1253
+ vmat = vmat_numerical
1254
+
1065
1255
vmat = contract ('Adij,jq->Adiq' , vmat , mocc )
1066
1256
vmat = contract ('Adiq,ip->Adpq' , vmat , mo_coeff )
1067
1257
return vmat
0 commit comments