deepmodeling
diff --git a/‎dpnegf/negf/device_property.py‎
Lines changed: 71 additions & 30 deletions b/‎dpnegf/negf/device_property.py‎
Lines changed: 71 additions & 30 deletions
diff --git a/‎dpnegf/negf/lead_property.py‎
Lines changed: 1 addition & 1 deletion b/‎dpnegf/negf/lead_property.py‎
Lines changed: 1 addition & 1 deletion
@@ -16,6 +16,20 @@
 """
 log = logging.getLogger(__name__)
 
+
+def _build_s_in_batched(hd, seinL, seinR, idx0, idy0, idx1, idy1):
+    '''Allocate per-block [B, n_q, n_q] zeros and inject the corner self-energy slices.
+
+    Mirrors the scalar construction in cal_green_function (the seinL/seinR fed in
+    here are the already-scaled 1j*(seL - seL.mH) * f tensors of shape [B,n,n]).
+    '''
+    B = seinL.shape[0]
+    s_in = [torch.zeros((B,) + tuple(blk.shape), dtype=torch.complex128) for blk in hd]
+    s_in[0][:, :idx0, :idy0] = s_in[0][:, :idx0, :idy0] + seinL[:, :idx0, :idy0]
+    s_in[-1][:, -idx1:, -idy1:] = s_in[-1][:, -idx1:, -idy1:] + seinR[:, -idx1:, -idy1:]
+    return s_in
+
+
 class DeviceProperty(object):
     '''Device object for NEGF calculation
 
@@ -150,8 +164,12 @@ def cal_green_function(self, energy, kpoint, eta_device=0., block_tridiagonal=Tr
             A boolean parameter that indicates whether the last column blocks of the retarded Green's function are needed.
         '''
         assert len(np.array(kpoint).reshape(-1)) == 3
-        if not isinstance(energy, torch.Tensor):
-            energy = torch.tensor(energy, dtype=torch.complex128)
+        energy = torch.as_tensor(energy, dtype=torch.complex128)
+        if energy.ndim == 0:
+            energy = energy.reshape(1)
+        assert energy.ndim == 1, f"energy must be 0-d, scalar, or 1-D [B]; got shape {tuple(energy.shape)}"
+        B = energy.shape[0]
+        batched_mode = B > 1
 
         self.block_tridiagonal = block_tridiagonal
         if self.kpoint is None or abs(self.kpoint - torch.tensor(kpoint)).sum() > 1e-5:
@@ -200,33 +218,45 @@ def cal_green_function(self, energy, kpoint, eta_device=0., block_tridiagonal=Tr
 
         seL = self.lead_L.se
         seR = self.lead_R.se
+        if batched_mode:
+            assert seL.ndim == 3 and seR.ndim == 3, f"In batched mode, the self-energy should have shape [B,n,n], but got {seL.shape} and {seR.shape}"
+        else:
+            assert seL.ndim == 2 and seR.ndim == 2, f"In non-batched mode, the self-energy should have shape [n,n], but got {seL.shape} and {seR.shape}"
+           
         s01, s02 = self.hd[0].shape  # The shape of the first H block
-        se01, se02 = seL.shape       # The shape of the left self-energy
+        se01, se02 = seL.shape[-2], seL.shape[-1]   # last two dims work for [n,n] and [B,n,n]
         s11, s12 = self.hd[-1].shape
-        se11, se12 = seR.shape
+        se11, se12 = seR.shape[-2], seR.shape[-1]
         idx0, idy0 = min(s01, se01), min(s02, se02)
         idx1, idy1 = min(s11, se11), min(s12, se12)
         if block_tridiagonal:
             # Based on the block tridiagonal algorithm, the shape of the self-energy should be
-            # equal to or larger than the corresponding Hamiltonian block
+            # equal to or lesser than the corresponding Hamiltonian block
             if se01 > s01 or se02 > s02:
                 log.warning(f"The shape of left self-energy ({se01},{se02}) is larger than\
                              the first Hamiltonian block ({s01},{s02}).")
                 raise ValueError("Left Lead Self Energy size is larger than the first Hamiltonian Block.")
             if se11 > s11 or se12 > s12:
-                log.warning(f"The shape of right self-energy ({se11},{se12}) is different from\
+                log.warning(f"The shape of right self-energy ({se11},{se12}) is larger than\
                              the last Hamiltonian block ({s11},{s12}).")
                 raise ValueError("Right Lead Self Energy size is larger than the last Hamiltonian Block.")
 
         green_funcs = {}
 
         if need_lesser:
             # Fluctuation-Dissipation theorem; only build s_in when the lesser GF is consumed
-            seinL = 1j*(seL-seL.conj().T) * self.lead_L.fermi_dirac(energy+self.E_ref).reshape(-1)
-            seinR = 1j*(seR-seR.conj().T) * self.lead_R.fermi_dirac(energy+self.E_ref).reshape(-1)
-            s_in = [torch.zeros(i.shape).cdouble() for i in self.hd]
-            s_in[0][:idx0,:idy0] = s_in[0][:idx0,:idy0] + seinL[:idx0,:idy0]
-            s_in[-1][-idx1:,-idy1:] = s_in[-1][-idx1:,-idy1:] + seinR[-idx1:,-idy1:]
+            if batched_mode:
+                fL = self.lead_L.fermi_dirac(energy + self.E_ref).reshape(B, 1, 1)
+                fR = self.lead_R.fermi_dirac(energy + self.E_ref).reshape(B, 1, 1)
+                seinL = 1j * (seL - seL.mH) * fL
+                seinR = 1j * (seR - seR.mH) * fR
+                s_in = _build_s_in_batched(self.hd, seinL, seinR, idx0, idy0, idx1, idy1)
+            else:
+                seinL = 1j*(seL-seL.conj().T) * self.lead_L.fermi_dirac(energy+self.E_ref).reshape(-1)
+                seinR = 1j*(seR-seR.conj().T) * self.lead_R.fermi_dirac(energy+self.E_ref).reshape(-1)
+                s_in = [torch.zeros(i.shape).cdouble() for i in self.hd]
+                s_in[0][:idx0,:idy0] = s_in[0][:idx0,:idy0] + seinL[:idx0,:idy0]
+                s_in[-1][-idx1:,-idy1:] = s_in[-1][-idx1:,-idy1:] + seinR[-idx1:,-idy1:]
         else:
             s_in = 0
 
@@ -322,26 +352,36 @@ def _cal_current_nscf_(self, energy_grid, tc):
 
 
     def _cal_tc_(self):
-        '''calculate the transmission coefficient 
-        
+        '''calculate the transmission coefficient
+
         Returns
         -------
-           tc is the transmission coefficient 
-        
+           tc is the transmission coefficient
+
         '''
 
-        tx, ty = self.g_trans.shape
-        lx, ly = self.lead_L.se.shape
-        rx, ry = self.lead_R.se.shape
+        g_trans = self.g_trans
+        batched = g_trans.ndim == 3
+        tx, ty = g_trans.shape[-2], g_trans.shape[-1]
+        gammaL_full = self.lead_L.gamma
+        gammaR_full = self.lead_R.gamma
+        lx = gammaL_full.shape[-2]
+        rx = gammaR_full.shape[-2]
         x0 = min(lx, tx)
         x1 = min(rx, ty)
 
-        gammaL = torch.zeros(size=(tx, tx), dtype=self.cdtype, device=self.device)
-        gammaL[:x0, :x0] += self.lead_L.gamma[:x0, :x0]
-        gammaR = torch.zeros(size=(ty, ty), dtype=self.cdtype, device=self.device)
-        gammaR[-x1:, -x1:] += self.lead_R.gamma[-x1:, -x1:]
+        gL_shape = (g_trans.shape[0], tx, tx) if batched else (tx, tx)
+        gR_shape = (g_trans.shape[0], ty, ty) if batched else (ty, ty)
+        gammaL = torch.zeros(size=gL_shape, dtype=self.cdtype, device=self.device)
+        gammaR = torch.zeros(size=gR_shape, dtype=self.cdtype, device=self.device)
+        if batched:
+            gammaL[:, :x0, :x0] = gammaL[:, :x0, :x0] + gammaL_full[:, :x0, :x0]
+            gammaR[:, -x1:, -x1:] = gammaR[:, -x1:, -x1:] + gammaR_full[:, -x1:, -x1:]
+        else:
+            gammaL[:x0, :x0] += gammaL_full[:x0, :x0]
+            gammaR[-x1:, -x1:] += gammaR_full[-x1:, -x1:]
 
-        tc = torch.mm(torch.mm(gammaL, self.g_trans), torch.mm(gammaR, self.g_trans.conj().T)).diag().real.sum(-1)
+        tc = (gammaL @ g_trans @ gammaR @ g_trans.mH).diagonal(dim1=-2, dim2=-1).real.sum(-1)
 
         return tc
 
@@ -368,7 +408,7 @@ def _cal_dos_(self):
                     temp = self.grd[jj] @ self.sd[jj] + self.grl[jj-1] @ self.su[jj-1]
                 else:
                     temp = self.grd[jj] @ self.sd[jj] + self.grl[jj-1] @ self.su[jj-1] + self.gru[jj] @ self.sl[jj]
-            dos -= temp.imag.diag().sum(-1) / pi
+            dos -= temp.imag.diagonal(dim1=-2, dim2=-1).sum(-1) / pi
         return dos * 2
 
     def _cal_ldos_(self):
@@ -391,27 +431,28 @@ def _cal_ldos_(self):
                     temp = self.grd[jj] @ self.sd[jj] + self.grl[jj-1] @ self.su[jj-1]
                 else:
                     temp = self.grd[jj] @ self.sd[jj] + self.grl[jj-1] @ self.su[jj-1] + self.gru[jj] @ self.sl[jj]
-            ldos.append(-temp.imag.diag() / pi) # shape(Nd(diagonal elements))
+            ldos.append(-temp.imag.diagonal(dim1=-2, dim2=-1) / pi) # [n_q] or [B, n_q]
 
-        ldos = torch.cat(ldos, dim=0).contiguous()
+        ldos = torch.cat(ldos, dim=-1).contiguous()
 
         norbs = [0]+self.norbs_per_atom
         accmap = np.cumsum(norbs)
-        ldos = torch.stack([ldos[accmap[i]:accmap[i+1]].sum() for i in range(len(accmap)-1)])
+        ldos = torch.stack([ldos[..., accmap[i]:accmap[i+1]].sum(-1) for i in range(len(accmap)-1)], dim=-1)
 
         # return ldos*2
         return ldos*2
 
     def _cal_local_current_(self):
-        '''calculate the local current between different atoms 
+        '''calculate the local current between different atoms
 
         At this stage, local current calculation only support non-block-triagonal format Hamiltonian
-        
+
         Returns
         -------
             the local current
-        
+
         '''
+        # TODO(batched-energy): vectorize then batch — currently expects scalar-E gnd[0] (2-D).
         # current only support non-block-triagonal format
         v_L = self.lead_L.voltage
         v_R = self.lead_R.voltage
 
@@ -450,7 +450,7 @@ def sigmaLR2Gamma(self, se):
             The Gamma function, Gamma = 1j(se-se^dagger).
         
         '''
-        return 1j * (se - se.conj().T)
+        return 1j * (se - se.mH)
 
     def fermi_dirac(self, x) -> torch.Tensor:
         return 1 / (1 + torch.exp((x - self.chemiPot_lead)/ self.kBT))