Initial commit.

riga · riga · commit 10af7d434771 · 2022-12-09T17:22:47.000+01:00
diff --git a/Makefile b/Makefile
@@ -0,0 +1,18 @@
+ALLIBS := $(patsubst %_module.cc, %.so, $(wildcard *_module.cc))
+
+all: $(ALLIBS)
+
+%.so: %_kernel.o %_module.o 
+	g++ -std=c++14 -shared -o $@ $^ $(TF_OPS_CFLAGS) -fPIC $(TF_OPS_LFLAGS)
+	ln -sf compiled/$@  ../$@ 
+
+%_module.o: %_module.cc	
+	g++ -std=c++14 -c -o $@ $< $(TF_OPS_CFLAGS) -fPIC
+
+%_kernel.o: %_kernel.cc
+	g++ -std=c++14 -c -o $@ $< $(TF_OPS_CFLAGS) -fPIC
+
+clean:
+	rm -f $(ALLIBS)
+
+.PRECIOUS: %.o
diff --git a/README.md b/README.md
@@ -1,2 +1,5 @@
 # tensorflow-custom-ops
+
 Custom TensorFlow Ops for use in CMSSW
+
+**Currently in development**
diff --git a/accknn_op.py b/accknn_op.py
@@ -0,0 +1,90 @@
+
+import tensorflow as tf
+from tensorflow.python.framework import ops
+import globals as gl
+from oc_helper_ops import SelectWithDefault
+
+'''
+Indices MUST be unique in each row.
+Only exception are multiple self-references, that can be used as sort of padding.
+Alternatively, the index -1 is skipped (non TF conpatible padding)
+
+'''
+
+_accknn_op = tf.load_op_library('accumulate_knn.so')
+_accknn_grad_op = tf.load_op_library('accumulate_knn_grad.so')
+
+
+def AccumulateLinKnn(weights,  features, indices, 
+                  mean_and_max=True, force_tf=False):
+    '''
+    Accumulates neighbour features with linear weights (not exp(-w) as AccumulateKnn)
+    '''
+    if (not gl.acc_ops_use_tf_gradients) and (not force_tf):
+        return _accknn_op.AccumulateKnn(distances=weights,  features=features, indices=indices,
+                                    n_moments=0, mean_and_max=mean_and_max)
+    
+    
+    weights = tf.expand_dims(weights,axis=2) #V x K x 1
+    nfeat = SelectWithDefault(indices, features, 0.) # V x K x F
+    wfeat = weights*nfeat
+    fmean = tf.reduce_mean(wfeat,axis=1)# V x F
+    fmax = tf.reduce_max(wfeat,axis=1)
+    fout = fmean
+    if mean_and_max:
+        fout = tf.concat([fmean,fmax],axis=1)
+    return fout,None
+
+
+def AccumulateKnn(distances,  features, indices, 
+                  mean_and_max=True,force_tf=False):
+    '''
+    
+    .Output("out_features: float32")
+    .Output("out_max_idxs: int32");
+    
+    
+    Assumes that neighbour indices can be padded with -1, but not mixed, e.g. [1,4,-1,2] needs to be [1,4,2,-1]
+    Other than the padding, the indices must be unique
+    
+    '''
+    #compatibility
+    distances = tf.exp(-distances)
+
+    
+    if (not gl.acc_ops_use_tf_gradients) and (not force_tf):
+        return _accknn_op.AccumulateKnn(distances=distances,  features=features, indices=indices,
+                                    n_moments=0, mean_and_max=mean_and_max)
+    
+    
+    distances = tf.expand_dims(distances,axis=2) #V x K x 1
+    nfeat = SelectWithDefault(indices, features, 0.) # V x K x F
+    wfeat = distances*nfeat
+    fmean = tf.reduce_mean(wfeat,axis=1)# V x F
+    fmax = tf.reduce_max(wfeat,axis=1)
+    fout = fmean
+    if mean_and_max:
+        fout = tf.concat([fmean,fmax],axis=1)
+    return fout,None
+
+#this refers to the OP called AccumulateKnn, not the function below
+@ops.RegisterGradient("AccumulateKnn")
+def _AccumulateKnnGrad(op, grad, gradmaxidxs):
+    """
+      
+    """
+    
+    
+    distances  = op.inputs[0]
+    features  = op.inputs[1]
+    max_feat_indices = op.outputs[1]
+    neigh_indices = op.inputs[2]
+    
+    dist_grad , feat_grad = _accknn_grad_op.AccumulateKnnGrad(grad_from_out_features=grad,
+                                                               distances=distances,
+                                                               features=features,
+                                                               neigh_indices=neigh_indices,
+                                                               max_feat_indices=max_feat_indices)
+    
+    return [dist_grad , feat_grad, None] #no gradient for indices
+
diff --git a/accumulate_knn_kernel.cc b/accumulate_knn_kernel.cc
@@ -0,0 +1,168 @@
+
+#if GOOGLE_CUDA
+#define EIGEN_USE_GPU
+#endif  // GOOGLE_CUDA
+
+
+#include "tensorflow/core/framework/op_kernel.h"
+#include "accumulate_knn_kernel.h"
+#include "helpers.h"
+#include <string> //size_t, just for helper function
+#include <cmath>
+
+#include <iostream> //remove later DEBUG FIXME
+
+namespace tensorflow {
+typedef Eigen::ThreadPoolDevice CPUDevice;
+typedef Eigen::GpuDevice GPUDevice;
+
+namespace functor {
+
+
+static inline float distanceWeight(const float& distsq){
+    return distsq;
+}
+
+// CPU specialization
+template<typename dummy>
+struct AccumulateKnnOpFunctor<CPUDevice, dummy> {
+    void operator()(const CPUDevice &d,
+
+            const float *d_distances,
+            const float *d_feat,
+            const int *d_idxs,
+
+            float *d_out_feat,
+            int *d_out_maxidxs,
+
+            int n_vert,
+            int n_neigh,
+            int n_feat,
+
+            int n_out_feat,
+
+            int n_moments,
+            bool mean_and_max) {
+
+
+        for (size_t i_v = 0; i_v < n_vert; i_v++) {
+
+            for(size_t i_f=0;i_f<n_feat;i_f++){
+                float t_mean = 0;
+                float t_max = 0;
+                int max_i_n_gidx = 0;
+
+                for(size_t i_n=0;i_n<n_neigh;i_n++){
+                    int nidx = d_idxs[I2D(i_v,i_n,n_neigh)];
+
+                    if(nidx<0) continue;
+
+                    float vnf = d_feat[I2D(nidx,i_f,n_feat)];
+                    float distsq = d_distances[I2D(i_v,i_n,n_neigh)];
+                    float wfeat = vnf * distanceWeight(distsq);
+                    //DEBUGCOUT(wfeat);
+                    t_mean += wfeat;
+                    if(mean_and_max && (wfeat >= t_max || !i_n)){
+                        max_i_n_gidx = nidx;
+                        t_max = wfeat;
+                    }
+                }
+                t_mean /= (float)n_neigh;
+
+                d_out_feat[I2D(i_v,i_f,n_out_feat)] = t_mean;
+                if(mean_and_max){
+                    d_out_maxidxs[I2D(i_v,i_f,n_feat)] = max_i_n_gidx; //just used for gradient
+                    d_out_feat[I2D(i_v,i_f+n_feat,n_out_feat)] = t_max;
+                }
+                //moments in n_coords x n_neigh loop here {}
+
+            }
+
+        }
+    }
+};
+
+template<typename Device>
+class AccumulateKnnOp : public OpKernel {
+public:
+    explicit AccumulateKnnOp(OpKernelConstruction *context) : OpKernel(context) {
+        OP_REQUIRES_OK(context,
+                context->GetAttr("n_moments", &n_moments));
+        OP_REQUIRES_OK(context,
+                context->GetAttr("mean_and_max", &mean_and_max));
+    }
+
+    void Compute(OpKernelContext *context) override {
+
+        const Tensor &d_dist_tensor = context->input(0);
+        const Tensor &d_feat_tensor = context->input(1);
+        const Tensor &d_idxs_tensor = context->input(2);
+
+
+        int n_vert = d_dist_tensor.dim_size(0);
+        int n_neigh = d_idxs_tensor.dim_size(1);
+        int n_coords = d_dist_tensor.dim_size(1);
+        int n_feat = d_feat_tensor.dim_size(1);
+
+        OP_REQUIRES(context, n_vert == d_idxs_tensor.dim_size(0) && n_vert == d_feat_tensor.dim_size(0),
+                    errors::InvalidArgument("AccumulateKnnOp expects first dimensions of all inputs to match."));
+
+        OP_REQUIRES(context, n_neigh == d_dist_tensor.dim_size(1),
+                    errors::InvalidArgument("AccumulateKnnOp expects second dimension of distance and neighbour index tensor to match"));
+
+        int n_out_feat = n_feat; //mean and max
+        if(mean_and_max)
+            n_out_feat*=2;
+
+        // after testing basic functionality!
+        // n_out_feat += n_moments * n_feat * n_coords;
+
+
+        TensorShape outputShape;
+        outputShape.AddDim(n_vert);
+        outputShape.AddDim(n_out_feat);
+
+        Tensor *output_tensor = NULL;
+        OP_REQUIRES_OK(context, context->allocate_output(0, outputShape, &output_tensor));
+
+        TensorShape outputShape_max_idxs;
+        outputShape_max_idxs.AddDim(n_vert);
+        outputShape_max_idxs.AddDim(n_feat);
+
+        Tensor *output_max_idxs_tensor = NULL;
+        OP_REQUIRES_OK(context, context->allocate_output(1, outputShape_max_idxs, &output_max_idxs_tensor));
+
+
+        AccumulateKnnOpFunctor<Device, int>()(
+                context->eigen_device<Device>(),
+                d_dist_tensor.flat<float>().data(),
+                d_feat_tensor.flat<float>().data(),
+                d_idxs_tensor.flat<int>().data(),
+                output_tensor->flat<float>().data(),
+                output_max_idxs_tensor->flat<int>().data(),
+                n_vert,
+                n_neigh,
+                n_feat,
+                n_out_feat,
+                n_moments,
+                mean_and_max
+        );
+
+
+
+    }
+
+private:
+    int n_moments;
+    bool mean_and_max;
+};
+
+REGISTER_KERNEL_BUILDER(Name("AccumulateKnn").Device(DEVICE_CPU), AccumulateKnnOp<CPUDevice>);
+
+#ifdef GOOGLE_CUDA
+//extern template struct AccumulateKnnOpFunctor<GPUDevice, int>;
+//REGISTER_KERNEL_BUILDER(Name("AccumulateKnn").Device(DEVICE_GPU), AccumulateKnnOp<GPUDevice>);
+#endif  // GOOGLE_CUDA
+
+}//functor
+}//tensorflow
diff --git a/accumulate_knn_kernel.h b/accumulate_knn_kernel.h
@@ -0,0 +1,35 @@
+// accumulate_knn_kernel.h
+#ifndef ACCUMULATE_KNN_KERNEL_H
+#define ACCUMULATE_KNN_KERNEL_H
+
+
+namespace tensorflow {
+namespace functor {
+
+template<typename Device, typename dummy>
+struct AccumulateKnnOpFunctor {
+    void operator()(
+            const Device &d,
+
+            const float *d_distances,
+            const float *d_feat,
+            const int *d_idxs,
+
+            float *d_out_feat,
+            int *d_out_maxidxs,
+
+            int n_vert,
+            int n_neigh,
+            int n_feat,
+
+            int n_out_feat,
+
+            int n_moments,
+            bool mean_and_max);
+};
+
+
+}  // namespace functor
+}  // namespace tensorflow
+
+#endif //ACCUMULATE_KNN_KERNEL_H
diff --git a/accumulate_knn_module.cc b/accumulate_knn_module.cc
@@ -0,0 +1,17 @@
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
+
+using namespace tensorflow;
+
+
+REGISTER_OP("AccumulateKnn")
+    .Attr("n_moments: int")
+    .Attr("mean_and_max: bool")
+    .Input("distances: float32") //change to distances!!
+    .Input("features: float32")
+    .Input("indices: int32")
+    .Output("out_features: float32")
+    .Output("out_max_idxs: int32");
+
+
+
diff --git a/helpers.h b/helpers.h
@@ -0,0 +1,23 @@
+/*
+ * helpers.h
+ *
+ *  Created on: 8 May 2020
+ *      Author: jkiesele
+ */
+
+#ifndef HGCALML_MODULES_COMPILED_HELPERS_H_
+#define HGCALML_MODULES_COMPILED_HELPERS_H_
+
+#include <iostream>
+
+#define I2D(i,j,Nj) (j) + (Nj)*(i)
+#define I3D(i,j,k,Nj,Nk) (k) + (Nk)*((j) + (Nj)*(i))
+#define I4D(i,j,k,l,Nj,Nk,Nl) (l) + (Nl)*((k) + (Nk)*((j) + (Nj)*(i)))
+#define I5D(i,j,k,l,m,Nj,Nk,Nl,Nm) (m) + (Nm)*((l) + (Nl)*((k) + (Nk)*((j) + (Nj)*(i))))
+
+
+#define DEBUGCOUT(x) {std::cout << #x <<": " << x << std::endl;}
+
+
+
+#endif /* HGCALML_MODULES_COMPILED_HELPERS_H_ */