[hannk] augment L2NormOp to allow specifying axis (#6335)

steven-johnson · web-flow · commit c3641b6850d1 · 2021-10-19T17:16:04.000-07:00
diff --git a/apps/hannk/delegate/hannk_delegate.cpp b/apps/hannk/delegate/hannk_delegate.cpp
@@ -755,7 +755,8 @@ class HannkDelegateKernel final {
     OpPtr BuildL2Normalization(TfLiteContext *context, TfLiteNode *node) {
         auto input = GetTensorById(context, node->inputs->data[0]);
         auto output = GetTensorById(context, node->outputs->data[0]);
-        return make_op<L2NormalizationOp>(input, output);
+        const int axis = 0;  // In TFLite, normalization is always against the first axis.
+        return make_op<L2NormalizationOp>(input, output, axis);
     }
 
     OpPtr BuildUnary(TfLiteContext *context, TfLiteNode *node, UnaryOp::Operator type) {
diff --git a/apps/hannk/halide/normalizations_generator.cpp b/apps/hannk/halide/normalizations_generator.cpp
@@ -48,6 +48,14 @@ class L2Normalization : public Generator<L2Normalization> {
             .update()
             .atomic()
             .vectorize(rx, vector_size);
+
+        // Normally we'd expect both buffers to be planar, but in unusual
+        // cases, Hannk can transpose the buffers (to normalize along another
+        // dimension), so for those cases, we'll just fall back to less-efficient
+        // code.
+        input_.dim(0).set_stride(Expr());
+        output_.dim(0).set_stride(Expr());
+        output_.specialize(input_.dim(0).stride() == 1 && output_.dim(0).stride() == 1);
     }
 };
 
diff --git a/apps/hannk/interpreter/ops.cpp b/apps/hannk/interpreter/ops.cpp
@@ -1137,10 +1137,23 @@ void L2NormalizationOp::execute() {
     const TensorPtr &in = input();
     const TensorPtr &out = output();
 
+    // Negative values for axis_ must be normalized by the parser
+    assert(axis_ >= 0 && axis_ < in->rank());
+
     if (in->type() == halide_type_of<uint8_t>() &&
         out->type() == halide_type_of<uint8_t>()) {
-        const auto &in_buf = in->buffer();
-        const auto &out_buf = out->buffer();
+        // Make local copies in case we need to transpose them
+        HalideBuffer<void> in_buf = in->buffer();
+        HalideBuffer<void> out_buf = out->buffer();
+
+        // TODO: we currently assume that the axis-is-0 case is the most common
+        // and most important, and optimize for it; the other cases, we just transpose,
+        // which currently requires less-efficient specializations in the Halide code.
+        // Revisit if this proves too slow in practice.
+        if (axis_ != 0) {
+            in_buf.transpose(0, axis_);
+            out_buf.transpose(0, axis_);
+        }
 
         const int input_zero = in->quantization().uniform_zero();
         assert(input_zero >= 0 && input_zero <= 255);
diff --git a/apps/hannk/interpreter/ops.h b/apps/hannk/interpreter/ops.h
@@ -228,9 +228,11 @@ class GatherOp : public Op {
 };
 
 class L2NormalizationOp : public Op {
+    const int axis_;
+
 public:
-    L2NormalizationOp(const TensorPtr &input, const TensorPtr &output)
-        : Op({input}, {output}) {
+    L2NormalizationOp(const TensorPtr &input, const TensorPtr &output, int axis)
+        : Op({input}, {output}), axis_(axis) {
     }
 
     void accept(OpVisitor *v) override;
diff --git a/apps/hannk/tflite/tflite_parser.cpp b/apps/hannk/tflite/tflite_parser.cpp
@@ -365,7 +365,8 @@ class Parser {
     OpPtr parse_l2_normalization(const tflite::Operator *op) {
         TensorPtr input = tensors_[op->inputs()->Get(0)];
         TensorPtr output = tensors_[op->outputs()->Get(0)];
-        return make_op<L2NormalizationOp>(input, output);
+        const int axis = 0;  // In TFLite, normalization is always against the first axis.
+        return make_op<L2NormalizationOp>(input, output, axis);
     }
 
     OpPtr parse_reduction(const tflite::Operator *op, ReductionOp::Operator reduction_op) {

Original file line number	Diff line number	Diff line change
`@@ -755,7 +755,8 @@ class HannkDelegateKernel final {`
`755`	`755`	`OpPtr BuildL2Normalization(TfLiteContext context, TfLiteNode node) {`
`756`	`756`	`auto input = GetTensorById(context, node->inputs->data[0]);`
`757`	`757`	`auto output = GetTensorById(context, node->outputs->data[0]);`
`758`		`- return make_op<L2NormalizationOp>(input, output);`
	`758`	`+ const int axis = 0; // In TFLite, normalization is always against the first axis.`
	`759`	`+ return make_op<L2NormalizationOp>(input, output, axis);`
`759`	`760`	`}`
`760`	`761`
`761`	`762`	`OpPtr BuildUnary(TfLiteContext context, TfLiteNode node, UnaryOp::Operator type) {`
Original file line number	Diff line number	Diff line change
`@@ -365,7 +365,8 @@ class Parser {`
`365`	`365`	`OpPtr parse_l2_normalization(const tflite::Operator *op) {`
`366`	`366`	`TensorPtr input = tensors_[op->inputs()->Get(0)];`
`367`	`367`	`TensorPtr output = tensors_[op->outputs()->Get(0)];`
`368`		`- return make_op<L2NormalizationOp>(input, output);`
	`368`	`+ const int axis = 0; // In TFLite, normalization is always against the first axis.`
	`369`	`+ return make_op<L2NormalizationOp>(input, output, axis);`
`369`	`370`	`}`
`370`	`371`
`371`	`372`	`OpPtr parse_reduction(const tflite::Operator *op, ReductionOp::Operator reduction_op) {`