MouseLand
diff --git a/‎pykilosort/cluster.py‎
Lines changed: 28 additions & 0 deletions b/‎pykilosort/cluster.py‎
Lines changed: 28 additions & 0 deletions
diff --git a/‎pykilosort/cuda/spikedetector3.cu‎
Lines changed: 197 additions & 0 deletions b/‎pykilosort/cuda/spikedetector3.cu‎
Lines changed: 197 additions & 0 deletions
diff --git a/‎pykilosort/datashift/datashift.py‎
Lines changed: 137 additions & 0 deletions b/‎pykilosort/datashift/datashift.py‎
Lines changed: 137 additions & 0 deletions
@@ -12,6 +12,34 @@
 logger = logging.getLogger(__name__)
 
 
+def getClosestChannels2(ycup, xcup, yc, xc, NchanClosest):
+    # this function outputs the closest channels to each channel,
+    # as well as a Gaussian-decaying mask as a function of pairwise distances
+    # sigma is the standard deviation of this Gaussian-mask
+
+    # compute distances between all pairs of channels
+    xc = cp.asarray(probe.xc, dtype=np.float32, order='F')
+    yc = cp.asarray(probe.yc, dtype=np.float32, order='F')
+    xcup = cp.asarray(xcup, dtype=np.float32, order='F')
+    ycup = cp.asarray(ycup, dtype=np.float32, order='F')
+    C2C = (xc[:] - xcup[:].T)^2 + (yc[:] - ycup[:].T).^2
+    C2C = cp.sqrt(C2C)
+    Nchan, NchanUp C2C.shape
+
+    # sort distances
+    isort = cp.argsort(C2C, axis=0)
+
+    # take NchanCLosest neighbors for each primary channel
+    iC = isort[:NchanClosest, :]
+
+    # in some cases we want a mask that decays as a function of distance between pairs of channels
+    # this is an awkward indexing to get the corresponding distances
+    ix = iC + cp.arange(0, Nchan * NchanUp, Nchan)
+    dist = C2C[ix]
+    
+    return iC, dist
+
+
 def getClosestChannels(probe, sigma, NchanClosest):
     # this function outputs the closest channels to each channel,
     # as well as a Gaussian-decaying mask as a function of pairwise distances
 
@@ -0,0 +1,197 @@
+const int  Nthreads = 1024,  NrankMax = 6, maxFR = 10000, nt0max=81, NchanMax = 17, nsizes = 5;
+
+
+//////////////////////////////////////////////////////////////////////////////////////////
+__global__ void	Conv1D(const double *Params, const float *data, const float *W, float *conv_sig){
+    volatile __shared__ float  sW[81*NrankMax], sdata[(Nthreads+81)];
+    float y;
+    int tid, tid0, bid, i, nid, Nrank, NT, nt0,  Nchan;
+
+    tid 		= threadIdx.x;
+    bid 		= blockIdx.x;
+    
+    NT        = (int) Params[0];
+    Nchan     = (int) Params[1];
+    nt0       = (int) Params[2];
+    Nrank     = (int) Params[4];
+    
+    if(tid<nt0*Nrank)
+        sW[tid]= W[tid];
+    __syncthreads();
+    
+    tid0 = 0;
+    while (tid0<NT-Nthreads-nt0+1){
+        if (tid<nt0)
+            sdata[tid] = data[tid0 + tid + NT*bid];        
+        sdata[tid + nt0] = data[nt0+tid0 + tid+ NT*bid];
+        __syncthreads();
+                
+        for(nid=0;nid<Nrank;nid++){
+            y = 0.0f;
+            #pragma unroll 4
+            for(i=0;i<nt0;i++)
+                y    += sW[i + nid*nt0] * sdata[i+tid];                        
+            conv_sig[tid0  + tid + NT*bid + nid * NT * Nchan]   = y;
+        }
+        tid0+=Nthreads;
+        __syncthreads();
+    }
+}
+
+//////////////////////////////////////////////////////////////////////////////////////////
+__global__ void  sumChannels(const double *Params, const float *data, 
+	float *datasum, int *kkmax, const int *iC2, const float *dist, const float *v2){
+    
+  int tid, tid0,t,k, kmax, bidx, bidy, NT, Nchan, NchanNear,j,iChan, Nsum, Nrank;
+  float  Cmax, C0;
+  float a[nsizes], d2;
+  float  sigma;
+  volatile __shared__ float  sA[nsizes * 20];
+  
+  
+  tid 		= threadIdx.x;
+  bidx 		= blockIdx.x;
+  bidy 		= blockIdx.y;
+  NT 		= (int) Params[0];
+  Nchan     = (int) Params[1];
+  NchanNear = (int) Params[3];  
+  Nrank     = (int) Params[4];
+  Nsum      = (int) Params[3];
+  sigma = (float) Params[9];
+  
+  if (tid<nsizes*NchanNear){
+      d2 = dist[tid/nsizes + NchanNear * bidy];        
+      k = tid%nsizes;
+      sA[tid] = expf( - (d2 * d2)/((1+k)*(1+k)*sigma*sigma));
+  }
+  __syncthreads();
+  
+  tid0 = tid + bidx * blockDim.x;
+  while (tid0<NT){
+      Cmax = 0.0f;
+      kmax = 0;
+      
+      for (t=0;t<Nrank;t++){                             
+          for(k=0; k<nsizes; k++)
+              a[k] = 0.;
+                
+          for(j=0; j<Nsum; j++){
+              iChan = iC2[j + NchanNear * bidy];              
+              for(k=0; k<nsizes; k++)
+                  a[k]  += sA[k + nsizes * j] * 
+                        data[tid0 + NT * iChan + t * NT * Nchan];
+          }
+          for(k=0; k<nsizes; k++){    
+              a[k] = max(a[k], 0.);
+              if (a[k]*a[k] / v2[k + nsizes*bidy] > Cmax){
+                  Cmax = a[k]*a[k]/v2[k + nsizes*bidy];
+                  kmax = t + k*Nrank;
+               }             
+          }
+      }
+      datasum[tid0 + NT * bidy] = Cmax;
+      kkmax[tid0 + NT * bidy]   = kmax;          
+
+      tid0 += blockDim.x * gridDim.x;
+  }
+}
+
+//////////////////////////////////////////////////////////////////////////////////////////
+__global__ void	max1D(const double *Params, const float *data, float *conv_sig){
+    
+    volatile __shared__ float  sdata[Nthreads+81];
+    float y, spkTh;
+    int tid, tid0, bid, i, NT, nt0, nt0min;
+    
+    NT 		= (int) Params[0];        
+    nt0       = (int) Params[2];        
+    nt0min    = (int) Params[5];
+    spkTh    = (float) Params[6];    
+    
+    tid 		= threadIdx.x;
+    bid 		= blockIdx.x;
+    
+    tid0 = 0;
+    while (tid0<NT-Nthreads-nt0+1){
+        if (tid<nt0)
+            sdata[tid]   = data[tid0 + tid + NT*bid];
+        sdata[tid + nt0] = data[nt0+tid0 + tid+ NT*bid];
+        __syncthreads();
+
+        y = 0.0f;
+        #pragma unroll 4
+        for(i=0;i<2*nt0min;i++)
+            y    = max(y, sdata[tid+i]);
+        
+        if (y>spkTh*spkTh)
+            conv_sig[tid0 + 1*(nt0min) + tid + NT*bid]   = y;
+
+        tid0+=Nthreads;
+        __syncthreads();
+    }
+}
+
+//////////////////////////////////////////////////////////////////////////////////////////
+__global__ void  maxChannels(const double *Params, const float *dataraw, const float *data,
+	const int *iC,  const int *iC2, const float *dist2, const int *kkmax, 
+        const float *dfilt, int *st, int *counter, float *cF){
+    
+  int nt0, indx, tid, tid0, i, bid, NT, j,iChan, nt0min, Nrank, kfilt;
+  int Nchan, NchanNear, NchanUp, NchanNearUp, bidy ;
+  double Cf, d;
+  float spkTh, d2;
+  bool flag;
+ 
+  NT 		= (int) Params[0];
+  Nchan     = (int) Params[1];  
+  NchanNear = (int) Params[3];    
+  NchanUp     = (int) Params[7];  
+  NchanNearUp = (int) Params[8];    
+  nt0       = (int) Params[2];      
+  nt0min    = (int) Params[5];
+  spkTh    = (float) Params[6];
+  Nrank     = (int) Params[4];
+  
+  tid 		= threadIdx.x;
+  bid 		= blockIdx.x;
+  bidy = blockIdx.y;
+  
+  tid0 = tid + bid * blockDim.x;
+  while (tid0<NT-nt0-nt0min){
+      i = bidy;
+      Cf    = (double) data[tid0 + NT * i];
+      flag = true;
+      for(j=1; j<NchanNearUp; j++){
+          if (dist2[j + NchanNearUp * i] < 100.){
+              iChan = iC2[j+ NchanNearUp * i];
+              if (data[tid0 + NT * iChan] > Cf){
+                  flag = false;
+                  break;
+              }
+          }
+      }
+      
+      if (flag){
+          if (Cf>spkTh*spkTh){
+              d = (double) dataraw[tid0+0 * (nt0min-1) + NT*i]; //
+              if (d > Cf-1e-6){
+                  // this is a hit, atomicAdd and return spikes
+                  indx = atomicAdd(&counter[0], 1);
+                  if (indx<maxFR){
+                      st[0+4*indx] = tid0;
+                      st[1+4*indx] = i;
+                      st[2+4*indx] = sqrt(d);
+                      st[3+4*indx] = kkmax[tid0+0*(nt0min-1) + NT*i];
+                      kfilt = st[3+4*indx]%Nrank;
+                      for(j=0; j<NchanNear; j++){
+                          iChan = iC[j+ NchanNear * i];
+                          cF[j + NchanNear * indx] = dfilt[tid0+0*(nt0min-1) + NT * iChan + kfilt * Nchan*NT];
+                      }
+                  }
+              }
+          }
+      }
+      
+      tid0 += blockDim.x * gridDim.x;
+  }
+}
@@ -0,0 +1,137 @@
+ir.xc, ir.yc = probe.xc, probe.yc
+ir.ops = Bunch()
+
+# The min and max of the y and x ranges of the channels
+ymin = min(ir.yc)
+ymax = max(ir.yc)
+xmin = min(ir.xc)
+xmax = max(ir.xc)
+
+# Determine the average vertical spacing between channels.
+# Usually all the vertical spacings are the same, i.e. on Neuropixels probes.
+dmin = np.median(np.diff(np.unique(ir.yc)))
+print(f"pitch is {dmin} um\n")
+ir.ops.yup = np.arange(
+    start=ymin, step=dmin / 2, stop=ymax
+)  # centers of the upsampled y positions
+
+# Determine the template spacings along the x dimension
+x_range = xmax - xmin
+npt = math.floor(
+    x_range / 16
+)  # this would come out as 16um for Neuropixels probes, which aligns with the geometry.
+ir.ops.xup = np.linspace(xmin, xmax, npt + 1)  # centers of the upsampled x positions
+
+spkTh = 10  # same as the usual "template amplitude", but for the generic templates
+
+# Extract all the spikes across the recording that are captured by the
+# generic templates. Very few real spikes are missed in this way.
+st3 = standalone_detector(ir, spkTh)
+
+# binning width across Y (um)
+dd = 5
+
+# detected depths
+dep = st3[:, 2]
+
+# min and max for the range of depths
+dmin = ymin - 1
+dep = dep - dmin
+
+dmax = 1 + ceil(max(dep) / dd)
+Nbatches = ir.temp.Nbatch
+
+# which batch each spike is coming from
+batch_id = st3[:, 5]  # ceil[st3[:,1]/dt]
+
+# preallocate matrix of counts with 20 bins, spaced logarithmically
+F = np.zeros(dmax, 20, Nbatches)
+for t in range(Nbatches):
+    # find spikes in this batch
+    ix = np.where(batch_id == t)
+
+    # subtract offset
+    dep = st3[ix, 2] - dmin
+
+    # amplitude bin relative to the minimum possible value
+    amp = log10(min(99, st3[ix, 3])) - log10(spkTh)
+
+    # normalization by maximum possible value
+    amp = amp / (log10(100) - log10(spkTh))
+
+    # multiply by 20 to distribute a [0,1] variable into 20 bins
+    # sparse is very useful here to do this binning quickly
+    M = sparse(ceil(dep / dd), ceil(1e-5 + amp * 20), ones(numel(ix), 1), dmax, 20)
+
+    # the counts themselves are taken on a logarithmic scale (some neurons
+    # fire too much!)
+    F[:, :, t] = log2(1 + M)
+end
+
+##
+# the 'midpoint' branch is for chronic recordings that have been
+# concatenated in the binary file
+# if isfield(ops, 'midpoint')
+#    # register the first block as usual
+#    [imin1, F1] = align_block(F(:, :, 1:ops.midpoint))
+#    # register the second block as usual
+#    [imin2, F2] = align_block(F(:, :, ops.midpoint+1:end))
+#    # now register the average first block to the average second block
+#    d0 = align_pairs(F1, F2)
+#    # concatenate the shifts
+#    imin = [imin1 imin2 + d0]
+#    imin = imin - mean(imin)
+#    ops.datashift = 1
+# else
+#    # determine registration offsets
+#    ysamp = dmin + dd * [1:dmax] - dd/2
+#    [imin,yblk, F0] = align_block2(F, ysamp, ops.nblocks)
+# end
+
+##
+if opts.get("fig", True):
+    ax = plt.subplot()
+    # plot the shift trace in um
+    ax.plot(imin * dd)
+
+    ax = plt.subplot()
+    # raster plot of all spikes at their original depths
+    st_shift = st3[:, 2]  # + imin(batch_id)' * dd
+    for j in range(spkTh, 100):
+        # for each amplitude bin, plot all the spikes of that size in the
+        # same shade of gray
+        ix = st3[:, 3] == j  # the amplitudes are rounded to integers
+        ax.plot(
+            st3[ix, 1],
+            st_shift[ix],
+            ".",
+            "color",
+            [max(0, 1 - j / 40) for i in range(3)],
+        )  # the marker color here has been carefully tuned
+    plt.tight_layout()
+
+# if we're creating a registered binary file for visualization in Phy
+if opts.get("fbinaryproc", False):
+    with open(opts["fbinaryproc"], "w") as f:
+        pass
+
+# convert to um
+dshift = imin * dd
+# sort in case we still want to do "tracking"
+
+_, ir.iorig = np.sort(np.mean(dshift, 2))
+
+# sigma for the Gaussian process smoothing
+sig = ir.ops.sig
+# register the data batch by batch
+for ibatch in range(Nbatches):
+    shift_batch_on_disk2(ir, ibatch, dshift[ibatch, :], yblk, sig)
+end
+fprintf("time #2.2f, Shifted up/down #d batches. \n", toc, Nbatches)
+
+# keep track of dshift
+ir.dshift = dshift
+# keep track of original spikes
+ir.st0 = st3
+
+# next, we can just run a normal spike sorter, like Kilosort1, and forget about the transformation that has happened in here