Skip to content

Commit e5bfa67

Browse files
committed
vv10 hessian Fock 1st derivative term without grid response done, correct but requires huge memory
1 parent fbf5c41 commit e5bfa67

File tree

1 file changed

+192
-2
lines changed

1 file changed

+192
-2
lines changed

gpu4pyscf/hessian/rks.py

+192-2
Original file line numberDiff line numberDiff line change
@@ -595,7 +595,7 @@ def get_drhodA_dgammadA(mol, grids, ni, dm0, opt = None):
595595
ngrids = grids.coords.shape[0]
596596

597597
ao = ni.eval_ao(mol, grids.coords, deriv = 2, gdftopt = opt, transpose = False)
598-
rho_drho = numint.eval_rho(mol, ao, dm0, xctype = "NLC", hermi = 1, with_lapl = False)
598+
rho_drho = numint.eval_rho(mol, ao[:4, :], dm0, xctype = "NLC", hermi = 1, with_lapl = False)
599599
drho = rho_drho[1:4, :]
600600
mu = ao[0, :, :]
601601
dmu_dr = ao[1:4, :, :]
@@ -1021,7 +1021,7 @@ def _get_vxc_deriv1(hessobj, mo_coeff, mo_occ, max_memory):
10211021
vmat = reduce_to_device(vmat_dist, inplace=True)
10221022
return vmat
10231023

1024-
def _get_vnlc_deriv1(hessobj, mo_coeff, mo_occ, max_memory):
1024+
def _get_vnlc_deriv1_numerical(hessobj, mo_coeff, mo_occ, max_memory):
10251025
mol = hessobj.mol
10261026
mf = hessobj.base
10271027
mocc = mo_coeff[:,mo_occ>0]
@@ -1062,6 +1062,196 @@ def get_nlc_vmat(mol, mf, dm):
10621062

10631063
vmat[i_atom, i_xyz, :, :] = (vmat_p - vmat_m) / (2 * dx)
10641064

1065+
# vmat = contract('Adij,jq->Adiq', vmat, mocc)
1066+
# vmat = contract('Adiq,ip->Adpq', vmat, mo_coeff)
1067+
return vmat
1068+
1069+
def _get_vnlc_deriv1(hessobj, mo_coeff, mo_occ, max_memory):
1070+
mol = hessobj.mol
1071+
mf = hessobj.base
1072+
nao = mol.nao
1073+
natm = mol.natm
1074+
1075+
mocc = mo_coeff[:,mo_occ>0]
1076+
dm0 = 2 * mocc @ mocc.T
1077+
1078+
grids = mf.nlcgrids
1079+
ngrids = grids.coords.shape[0]
1080+
1081+
ni = mf._numint
1082+
opt = getattr(ni, 'gdftopt', None)
1083+
if opt is None:
1084+
ni.build(mol, grids.coords)
1085+
opt = ni.gdftopt
1086+
1087+
if ni.libxc.is_nlc(mf.xc):
1088+
xc_code = mf.xc
1089+
else:
1090+
xc_code = mf.nlc
1091+
nlc_coefs = ni.nlc_coeff(xc_code)
1092+
if len(nlc_coefs) != 1:
1093+
raise NotImplementedError('Additive NLC')
1094+
nlc_pars, fac = nlc_coefs[0]
1095+
1096+
kappa_prefactor = nlc_pars[0] * 1.5 * numpy.pi * (9 * numpy.pi)**(-1.0/6.0)
1097+
C_in_omega = nlc_pars[1]
1098+
beta = 0.03125 * (3.0 / nlc_pars[0]**2)**0.75
1099+
1100+
with opt.gdft_envs_cache():
1101+
ao = ni.eval_ao(mol, grids.coords, deriv = 2, gdftopt = opt, transpose = False)
1102+
rho_drho = numint.eval_rho(mol, ao[:4, :], dm0, xctype = "NLC", hermi = 1, with_lapl = False)
1103+
1104+
rho_i = rho_drho[0,:]
1105+
1106+
rho_nonzero_mask = (rho_i >= NLC_REMOVE_ZERO_RHO_GRID_THRESHOLD)
1107+
1108+
rho_i = rho_i[rho_nonzero_mask]
1109+
nabla_rho_i = rho_drho[1:4, rho_nonzero_mask]
1110+
grids_coords = cupy.ascontiguousarray(grids.coords[rho_nonzero_mask, :])
1111+
grids_weights = grids.weights[rho_nonzero_mask]
1112+
ngrids = grids_coords.shape[0]
1113+
1114+
ao_nonzero_rho = ao[:,:,rho_nonzero_mask]
1115+
1116+
gamma_i = nabla_rho_i[0,:]**2 + nabla_rho_i[1,:]**2 + nabla_rho_i[2,:]**2
1117+
omega_i = cupy.sqrt(C_in_omega * gamma_i**2 / rho_i**4 + (4.0/3.0*numpy.pi) * rho_i)
1118+
kappa_i = kappa_prefactor * rho_i**(1.0/6.0)
1119+
1120+
U_i = cupy.empty(ngrids)
1121+
W_i = cupy.empty(ngrids)
1122+
A_i = cupy.empty(ngrids)
1123+
B_i = cupy.empty(ngrids)
1124+
C_i = cupy.empty(ngrids)
1125+
E_i = cupy.empty(ngrids)
1126+
1127+
stream = cupy.cuda.get_current_stream()
1128+
libgdft.VXC_vv10nlc_hess_eval_UWABCE(
1129+
ctypes.cast(stream.ptr, ctypes.c_void_p),
1130+
ctypes.cast(U_i.data.ptr, ctypes.c_void_p),
1131+
ctypes.cast(W_i.data.ptr, ctypes.c_void_p),
1132+
ctypes.cast(A_i.data.ptr, ctypes.c_void_p),
1133+
ctypes.cast(B_i.data.ptr, ctypes.c_void_p),
1134+
ctypes.cast(C_i.data.ptr, ctypes.c_void_p),
1135+
ctypes.cast(E_i.data.ptr, ctypes.c_void_p),
1136+
ctypes.cast(grids_coords.data.ptr, ctypes.c_void_p),
1137+
ctypes.cast(grids_weights.data.ptr, ctypes.c_void_p),
1138+
ctypes.cast(rho_i.data.ptr, ctypes.c_void_p),
1139+
ctypes.cast(omega_i.data.ptr, ctypes.c_void_p),
1140+
ctypes.cast(kappa_i.data.ptr, ctypes.c_void_p),
1141+
ctypes.c_int(ngrids)
1142+
)
1143+
1144+
domega_drho_i = cupy.empty(ngrids)
1145+
domega_dgamma_i = cupy.empty(ngrids)
1146+
d2omega_drho2_i = cupy.empty(ngrids)
1147+
d2omega_dgamma2_i = cupy.empty(ngrids)
1148+
d2omega_drho_dgamma_i = cupy.empty(ngrids)
1149+
libgdft.VXC_vv10nlc_hess_eval_omega_derivative(
1150+
ctypes.cast(stream.ptr, ctypes.c_void_p),
1151+
ctypes.cast(domega_drho_i.data.ptr, ctypes.c_void_p),
1152+
ctypes.cast(domega_dgamma_i.data.ptr, ctypes.c_void_p),
1153+
ctypes.cast(d2omega_drho2_i.data.ptr, ctypes.c_void_p),
1154+
ctypes.cast(d2omega_dgamma2_i.data.ptr, ctypes.c_void_p),
1155+
ctypes.cast(d2omega_drho_dgamma_i.data.ptr, ctypes.c_void_p),
1156+
ctypes.cast(rho_i.data.ptr, ctypes.c_void_p),
1157+
ctypes.cast(gamma_i.data.ptr, ctypes.c_void_p),
1158+
ctypes.c_double(C_in_omega),
1159+
ctypes.c_int(ngrids)
1160+
)
1161+
dkappa_drho_i = kappa_prefactor * (1.0/6.0) * rho_i**(-5.0/6.0)
1162+
d2kappa_drho2_i = kappa_prefactor * (-5.0/36.0) * rho_i**(-11.0/6.0)
1163+
1164+
f_rho_i = beta + E_i + rho_i * (dkappa_drho_i * U_i + domega_drho_i * W_i)
1165+
f_gamma_i = rho_i * domega_dgamma_i * W_i
1166+
1167+
drho_dA, dgamma_dA = get_drhodA_dgammadA(mol, grids, ni, dm0, opt)
1168+
1169+
drho_dA = drho_dA[:,:,rho_nonzero_mask]
1170+
dgamma_dA = dgamma_dA[:,:,rho_nonzero_mask]
1171+
1172+
drho_dA = cupy.ascontiguousarray(drho_dA)
1173+
dgamma_dA = cupy.ascontiguousarray(dgamma_dA)
1174+
dfdrho_drhodA = cupy.empty([mol.natm, 3, ngrids], order = "C")
1175+
dfdgamma_dgammadA = cupy.empty([mol.natm, 3, ngrids], order = "C")
1176+
1177+
libgdft.VXC_vv10nlc_hess_eval_f_t(
1178+
ctypes.cast(stream.ptr, ctypes.c_void_p),
1179+
ctypes.cast(dfdrho_drhodA.data.ptr, ctypes.c_void_p),
1180+
ctypes.cast(dfdgamma_dgammadA.data.ptr, ctypes.c_void_p),
1181+
ctypes.cast(grids_coords.data.ptr, ctypes.c_void_p),
1182+
ctypes.cast(grids_weights.data.ptr, ctypes.c_void_p),
1183+
ctypes.cast(rho_i.data.ptr, ctypes.c_void_p),
1184+
ctypes.cast(omega_i.data.ptr, ctypes.c_void_p),
1185+
ctypes.cast(kappa_i.data.ptr, ctypes.c_void_p),
1186+
ctypes.cast(U_i.data.ptr, ctypes.c_void_p),
1187+
ctypes.cast(W_i.data.ptr, ctypes.c_void_p),
1188+
ctypes.cast(A_i.data.ptr, ctypes.c_void_p),
1189+
ctypes.cast(B_i.data.ptr, ctypes.c_void_p),
1190+
ctypes.cast(C_i.data.ptr, ctypes.c_void_p),
1191+
ctypes.cast(domega_drho_i.data.ptr, ctypes.c_void_p),
1192+
ctypes.cast(domega_dgamma_i.data.ptr, ctypes.c_void_p),
1193+
ctypes.cast(dkappa_drho_i.data.ptr, ctypes.c_void_p),
1194+
ctypes.cast(d2omega_drho2_i.data.ptr, ctypes.c_void_p),
1195+
ctypes.cast(d2omega_dgamma2_i.data.ptr, ctypes.c_void_p),
1196+
ctypes.cast(d2omega_drho_dgamma_i.data.ptr, ctypes.c_void_p),
1197+
ctypes.cast(d2kappa_drho2_i.data.ptr, ctypes.c_void_p),
1198+
ctypes.cast(drho_dA.data.ptr, ctypes.c_void_p),
1199+
ctypes.cast(dgamma_dA.data.ptr, ctypes.c_void_p),
1200+
ctypes.c_int(ngrids),
1201+
ctypes.c_int(3 * mol.natm),
1202+
)
1203+
1204+
# TODO: reduce memory footprint from here on
1205+
1206+
vmat = cupy.zeros([mol.natm, 3, nao, nao])
1207+
1208+
mu = ao_nonzero_rho[0, :, :]
1209+
dmu_dr = ao_nonzero_rho[1:4, :, :]
1210+
d2mu_dr2 = cupy.empty([3, 3, nao, ngrids])
1211+
d2mu_dr2[0,0,:,:] = ao_nonzero_rho[XX, :, :]
1212+
d2mu_dr2[0,1,:,:] = ao_nonzero_rho[XY, :, :]
1213+
d2mu_dr2[1,0,:,:] = ao_nonzero_rho[XY, :, :]
1214+
d2mu_dr2[0,2,:,:] = ao_nonzero_rho[XZ, :, :]
1215+
d2mu_dr2[2,0,:,:] = ao_nonzero_rho[XZ, :, :]
1216+
d2mu_dr2[1,1,:,:] = ao_nonzero_rho[YY, :, :]
1217+
d2mu_dr2[1,2,:,:] = ao_nonzero_rho[YZ, :, :]
1218+
d2mu_dr2[2,1,:,:] = ao_nonzero_rho[YZ, :, :]
1219+
d2mu_dr2[2,2,:,:] = ao_nonzero_rho[ZZ, :, :]
1220+
1221+
aoslices = mol.aoslice_by_atom()
1222+
1223+
# w_i 2 f_i^\gamma \nabla_A \nabla\rho \cdot \nabla(\phi_\mu \phi_nu)_i
1224+
d2rho_dAdr = cupy.zeros([natm, 3, 3, ngrids])
1225+
for i_atom in range(natm):
1226+
p0, p1 = aoslices[i_atom][2:]
1227+
d2rho_dAdr[i_atom, :, :, :] += cupy.einsum('dDig,jg,ij->dDg', -d2mu_dr2[:, :, p0:p1, :], mu, dm0[p0:p1, :])
1228+
d2rho_dAdr[i_atom, :, :, :] += cupy.einsum('dDig,jg,ij->dDg', -d2mu_dr2[:, :, p0:p1, :], mu, dm0[:, p0:p1].T)
1229+
d2rho_dAdr[i_atom, :, :, :] += cupy.einsum('dig,Djg,ij->dDg', -dmu_dr[:, p0:p1, :], dmu_dr, dm0[p0:p1, :])
1230+
d2rho_dAdr[i_atom, :, :, :] += cupy.einsum('dig,Djg,ij->dDg', -dmu_dr[:, p0:p1, :], dmu_dr, dm0[:, p0:p1].T)
1231+
dmunu_dr = cupy.einsum('dig,jg->dijg', dmu_dr, mu) + cupy.einsum('dig,jg->djig', dmu_dr, mu)
1232+
vmat += 2 * cupy.einsum('AdDg,Dijg,g->Adij', d2rho_dAdr, dmunu_dr, f_gamma_i * grids_weights)
1233+
1234+
for i_atom in range(natm):
1235+
p0, p1 = aoslices[i_atom][2:]
1236+
# w_i f_i^\rho \nabla_A (\phi_\mu \phi_nu)_i
1237+
vmat[i_atom, :, p0:p1, :] += cupy.einsum('dig,jg->dij', -dmu_dr[:, p0:p1, :], mu * f_rho_i * grids_weights)
1238+
vmat[i_atom, :, :, p0:p1] += cupy.einsum('dig,jg->dji', -dmu_dr[:, p0:p1, :], mu * f_rho_i * grids_weights)
1239+
1240+
# w_i 2 f_i^\gamma \nabla\rho \cdot \nabla_A \nabla(\phi_\mu \phi_nu)_i
1241+
vmat[i_atom, :, p0:p1, :] += 2 * cupy.einsum('dDig,jg,Dg->dij', -d2mu_dr2[:, :, p0:p1, :], mu, nabla_rho_i * f_gamma_i * grids_weights)
1242+
vmat[i_atom, :, :, p0:p1] += 2 * cupy.einsum('dDig,jg,Dg->dji', -d2mu_dr2[:, :, p0:p1, :], mu, nabla_rho_i * f_gamma_i * grids_weights)
1243+
vmat[i_atom, :, p0:p1, :] += 2 * cupy.einsum('dig,Djg,Dg->dij', -dmu_dr[:, p0:p1, :], dmu_dr, nabla_rho_i * f_gamma_i * grids_weights)
1244+
vmat[i_atom, :, :, p0:p1] += 2 * cupy.einsum('dig,Djg,Dg->dji', -dmu_dr[:, p0:p1, :], dmu_dr, nabla_rho_i * f_gamma_i * grids_weights)
1245+
1246+
vmat += cupy.einsum('Adg,ig,jg,g->Adij', dfdrho_drhodA, mu, mu, grids_weights)
1247+
vmat += 2 * cupy.einsum('Adg,Dijg,Dg->Adij', dfdgamma_dgammadA, dmunu_dr, nabla_rho_i * grids_weights)
1248+
1249+
vmat_numerical = _get_vnlc_deriv1_numerical(hessobj, mo_coeff, mo_occ, max_memory)
1250+
1251+
print(cupy.max(cupy.abs(vmat - vmat_numerical)))
1252+
1253+
vmat = vmat_numerical
1254+
10651255
vmat = contract('Adij,jq->Adiq', vmat, mocc)
10661256
vmat = contract('Adiq,ip->Adpq', vmat, mo_coeff)
10671257
return vmat

0 commit comments

Comments
 (0)