From fa7264bbe8a2ef3b801067f053babe74b4a85604 Mon Sep 17 00:00:00 2001 From: Dillon Sharlet Date: Mon, 17 Feb 2025 17:28:58 -0800 Subject: [PATCH] Add Tensor helper class for tests This CL does two things: first is adding `Tensor`, a helper class for multi-dimensional arrays that are frequently used in our tests (but currently are duplicating extent/stride computations frequently). Second is modifying two tests (slice and transpose) to make some changes: - Currently, subgraph tests compare subgraph to operator results. This changes tests to directly check the output, without running the operator code. - Currently, subgraph tests run a single random variation, and getting good coverage requires running the test many times. This changes the subgraph tests to test cover many more permutations in a single run. - Currently, subgraph tests dig into the internal implementation details of subgraphs (e.g. checking xnn_node_value state). This makes sense in some cases (e.g. fusion tests), but it is both hard to be certain that this covers real usage, and is brittle. IMO, tests should (as much as possible) attempt to verify the behavior is as expected via the APIs that are visible to the user of the thing they are testing. For the subgraph API, that means we should just make sure the subgraph works as expected. This change required a few minor cleanups: - `xnnpack::Buffer` needs to be able to distinguish between "extra bytes" and real data. - To test things like transpose, concat, slice, etc., I found it helpful to add plain `xnn_datatype_uintX` datatypes. I don't love the idea of adding these to the public API when they don't have a lot of use cases, but I decided this is better than hacking the tests to use whatever datatype is available, which could be complicated (e.g. we'd have to use fp16 or bfloat16 to test transpose of 16-bit elements). - There is now some overlap between `RuntimeTester` and `SubgraphTester`. I think we should deprecate `RuntimeTester` and consolidate everything in `SubgraphTester`, because we can't return `RuntimeTester` from the base class `SubgraphTester` builder methods. This is a minor difficulty, but it also seems like the reason to separate them is minor too. PiperOrigin-RevId: 727983393 --- BUILD.bazel | 7 +- CMakeLists.txt | 1 - include/xnnpack.h | 4 + src/datatype.c | 15 + src/enums/datatype-strings.c | 6 + src/tensor.c | 3 + src/xnnpack/buffer.h | 218 +++++++++++++- src/xnnpack/datatype.h | 6 + test/BUILD.bazel | 46 +-- test/buffer.cc | 45 +++ test/runtime-tester.h | 41 +-- test/static-slice.cc | 555 +++++------------------------------ test/static-transpose.cc | 495 ++++--------------------------- test/subgraph-tester.h | 154 +++++++++- test/transpose-reshape.cc | 71 ----- 15 files changed, 619 insertions(+), 1048 deletions(-) create mode 100644 test/buffer.cc delete mode 100644 test/transpose-reshape.cc diff --git a/BUILD.bazel b/BUILD.bazel index a405263c807..a33b8664896 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -561,7 +561,12 @@ xnnpack_cc_library( xnnpack_cxx_library( name = "buffer", hdrs = ["src/xnnpack/buffer.h"], - deps = [":datatype"], + deps = [ + ":common", + ":datatype", + ":math", + ":xnnpack_h", + ], ) xnnpack_cc_library( diff --git a/CMakeLists.txt b/CMakeLists.txt index 9d35815a295..43e229ef80c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1393,7 +1393,6 @@ IF(XNNPACK_BUILD_TESTS) static-reshape static-resize-bilinear-2d static-transpose - transpose-reshape unary unpooling-2d) FOREACH(TEST ${LIBRARY_SUBGRAPH_UNIT_TESTS}) diff --git a/include/xnnpack.h b/include/xnnpack.h index 7d489f47238..588fd91d9fe 100644 --- a/include/xnnpack.h +++ b/include/xnnpack.h @@ -299,6 +299,10 @@ enum xnn_datatype { /// Packed quantized 8-bit unsigned integer with shared per-Value quantization /// parameters. xnn_datatype_pqint8 = 17, + /// Unsigned integer datatype without quantization parameters. + xnn_datatype_uint8 = 18, + xnn_datatype_uint16 = 19, + xnn_datatype_uint32 = 20, }; /// Define a tensor-type Value and add it to a Subgraph. diff --git a/src/datatype.c b/src/datatype.c index d12d6a08fe5..b85619b3048 100644 --- a/src/datatype.c +++ b/src/datatype.c @@ -11,6 +11,9 @@ bool xnn_datatype_is_real(enum xnn_datatype t) { switch (t) { case xnn_datatype_invalid: case xnn_datatype_int32: + case xnn_datatype_uint8: + case xnn_datatype_uint16: + case xnn_datatype_uint32: return false; case xnn_datatype_fp32: case xnn_datatype_fp16: @@ -55,6 +58,9 @@ bool xnn_datatype_is_integral(enum xnn_datatype t) { case xnn_datatype_pfp32: return false; case xnn_datatype_int32: + case xnn_datatype_uint8: + case xnn_datatype_uint16: + case xnn_datatype_uint32: return true; } XNN_UNREACHABLE; @@ -82,6 +88,9 @@ bool xnn_datatype_is_quantized(enum xnn_datatype t) { case xnn_datatype_int32: case xnn_datatype_pfp16: case xnn_datatype_pfp32: + case xnn_datatype_uint8: + case xnn_datatype_uint16: + case xnn_datatype_uint32: return false; } XNN_UNREACHABLE; @@ -103,16 +112,19 @@ size_t xnn_datatype_log2_size_bits(enum xnn_datatype t) { case xnn_datatype_qdint8: case xnn_datatype_qduint8: case xnn_datatype_qpint8: + case xnn_datatype_uint8: return 3; case xnn_datatype_fp16: case xnn_datatype_bf16: case xnn_datatype_pfp16: + case xnn_datatype_uint16: return 4; case xnn_datatype_qint32: case xnn_datatype_qcint32: case xnn_datatype_int32: case xnn_datatype_fp32: case xnn_datatype_pfp32: + case xnn_datatype_uint32: return 5; } XNN_UNREACHABLE; @@ -154,6 +166,9 @@ bool xnn_datatype_is_byte_addressable(enum xnn_datatype t) { case xnn_datatype_qduint8: case xnn_datatype_int32: case xnn_datatype_fp32: + case xnn_datatype_uint8: + case xnn_datatype_uint16: + case xnn_datatype_uint32: return true; } XNN_UNREACHABLE; diff --git a/src/enums/datatype-strings.c b/src/enums/datatype-strings.c index 87c8d99b6f4..38f48bb0808 100644 --- a/src/enums/datatype-strings.c +++ b/src/enums/datatype-strings.c @@ -50,6 +50,12 @@ const char* xnn_datatype_to_string(enum xnn_datatype type) { return "INT32"; case xnn_datatype_qbint4: return "QBINT4"; + case xnn_datatype_uint8: + return "UINT8"; + case xnn_datatype_uint16: + return "UINT16"; + case xnn_datatype_uint32: + return "UINT32"; } XNN_UNREACHABLE; return NULL; diff --git a/src/tensor.c b/src/tensor.c index a5ac25ecc94..ab810b22913 100644 --- a/src/tensor.c +++ b/src/tensor.c @@ -128,6 +128,9 @@ enum xnn_status xnn_define_tensor_value( case xnn_datatype_int32: case xnn_datatype_pfp16: // TODO: Does this really belong here? case xnn_datatype_pfp32: // TODO: Does this really belong here? + case xnn_datatype_uint8: + case xnn_datatype_uint16: + case xnn_datatype_uint32: break; default: xnn_log_error("failed to create Dense Tensor value: unsupported datatype %s (%d)", diff --git a/src/xnnpack/buffer.h b/src/xnnpack/buffer.h index 2ee17481d1b..2feed0642e1 100644 --- a/src/xnnpack/buffer.h +++ b/src/xnnpack/buffer.h @@ -7,12 +7,14 @@ #define __XNNPACK_TEST_BUFFER_H_ #include +#include #include #include #include #include #include #include +#include #include "xnnpack.h" #include "xnnpack/common.h" @@ -46,6 +48,10 @@ class NumericLimits> { } }; +struct PaddingBytes { + size_t value; +}; + // This is a container similar to std::vector, but it leaves the memory // uninitialized, supports alignment. // TODO: It would be good if this also managed padding in a way that allowed @@ -99,8 +105,10 @@ class Buffer { using const_iterator = const T*; Buffer() : data_(nullptr), size_(0) {} - explicit Buffer(size_t size) - : data_(reinterpret_cast(allocate(size * sizeof(T)))), size_(size) {} + explicit Buffer(size_t size, PaddingBytes extra_bytes = {0}) + : data_(reinterpret_cast( + allocate(size * sizeof(T) + extra_bytes.value))), + size_(size) {} Buffer(size_t size, T value) : Buffer(size) { std::fill(begin(), end(), value); } @@ -165,6 +173,212 @@ void fill_uniform_random_bits(T* data, size_t size, Rng& rng) { } } +// Returns {x[i] for i in perm} +template +std::vector permute(const std::vector& perm, + const std::vector& x) { + std::vector result(perm.size()); + for (size_t i = 0; i < perm.size(); ++i) { + result[i] = x[perm[i]]; + } + return result; +} + +// This stores a multi-dimensional array in a Buffer +class Tensor { + public: + using value_type = T; + using iterator = typename xnnpack::Buffer::iterator; + using const_iterator = typename xnnpack::Buffer::const_iterator; + + using index_type = std::vector; + + Tensor() = default; + // Constructs an array with strides in descending order, with no + // padding/alignment between dimensions. + explicit Tensor(index_type extents, PaddingBytes extra_bytes = {0}) + : extents_(std::move(extents)), strides_(extents_.size()) { + size_t stride = 1; + for (size_t i = rank(); i > 0; --i) { + strides_[i - 1] = stride; + stride *= extents_[i - 1]; + } + data_ = Buffer(stride, extra_bytes); + begin_ = data_.begin(); + end_ = data_.end(); + } + Tensor(Tensor&& other) : Tensor() { + std::swap(data_, other.data_); + std::swap(begin_, other.begin_); + std::swap(end_, other.end_); + std::swap(extents_, other.extents_); + std::swap(strides_, other.strides_); + } + + Tensor& operator=(Tensor&& other) { + std::swap(data_, other.data_); + std::swap(begin_, other.begin_); + std::swap(end_, other.end_); + std::swap(extents_, other.extents_); + std::swap(strides_, other.strides_); + return *this; + } + + // Returns true if every stride is the product of the following extents, i.e. + // the buffer can be interpreted as a flat array without considering the + // strides. + bool is_contiguous() const { + size_t stride = 1; + for (size_t i = rank(); i > 0; --i) { + if (strides_[i - 1] != stride) { + return false; + } + stride *= extents_[i - 1]; + } + return true; + } + + const index_type& extents() const { return extents_; } + const index_type& strides() const { return strides_; } + size_t extent(size_t dim) const { return extents_[dim]; } + size_t stride(size_t dim) const { return strides_[dim]; } + + size_t rank() const { return extents_.size(); } + bool empty() const { return begin_ >= end_; } + + T* base() { return begin_; } + const T* base() const { return begin_; } + + // Form a reference to an element at a particular index. + T& operator()(const index_type& indices) { + return *(begin_ + flat_offset(indices)); + } + const T& operator()(const index_type& indices) const { + return *(begin_ + flat_offset(indices)); + } + + template + T& operator()(Args... args) { + return operator()(index_type{args...}); + } + template + const T& operator()(Args... args) const { + return operator()(index_type{args...}); + } + + // The following functions can only be used if `is_contiguous` is true. + T* data() { + assert(is_contiguous()); + return begin_; + } + const T* data() const { + assert(is_contiguous()); + return begin_; + } + size_t size() const { + assert(is_contiguous()); + return data_.size(); + } + T* begin() { return data(); } + T* end() { return end_; } + const T* begin() const { return data(); } + const T* end() const { return end_; } + const T* cbegin() const { return data(); } + const T* cend() const { return end_; } + T& operator[](size_t index) { return data()[index]; } + const T& operator[](size_t index) const { return data()[index]; } + + // This does not actually transpose any data in memory, it just changes the + // strides. To implement the transpose in memory, use this, followed by + // `make_copy` below. + Tensor& transpose(std::vector perm) { + extents_ = permute(perm, extents_); + strides_ = permute(perm, strides_); + return *this; + } + + // This uses the same rules for indexing as numpy, i.e. negative numbers are + // offset are added to the extents. + Tensor& slice(std::vector begins, + std::vector ends) { + assert(rank() == begins.size()); + assert(rank() == ends.size()); + + std::vector offsets(rank()); + for (size_t i = 0; i < rank(); ++i) { + offsets[i] = begins[i] < 0 ? extents_[i] + begins[i] : begins[i]; + extents_[i] = + (ends[i] <= 0 ? extents_[i] + ends[i] : ends[i]) - offsets[i]; + } + + begin_ = &operator()(offsets); + end_ = &operator()(extents_); + + return *this; + } + + // Make a copy of the buffer. The result will be contiguous, i.e. the strides + // of this buffer are lost when copying. + Tensor make_copy(PaddingBytes extra_bytes = {0}) const { + Tensor result(extents_, extra_bytes); + copy_impl(rank(), extents_.data(), strides_.data(), base(), + result.strides_.data(), result.base()); + return result; + } + + private: + static void copy_impl(size_t rank, const size_t* extents, + const size_t* src_strides, const T* src, + const size_t* dst_strides, T* dst) { + if (rank == 0) { + *dst = *src; + return; + } else { + --rank; + size_t extent = *extents++; + size_t src_stride = *src_strides++; + size_t dst_stride = *dst_strides++; + if (rank == 0 && src_stride == 1 && dst_stride == 1) { + std::copy_n(src, extent, dst); + } else { + for (size_t i = 0; i < extent; ++i) { + copy_impl(rank, extents, src_strides, src, dst_strides, dst); + src += src_stride; + dst += dst_stride; + } + } + } + } + + // Compute the offset of an index from the pointer to element 0. + size_t flat_offset(const index_type& indices) const { + assert(indices.size() == rank()); + size_t result = 0; + for (size_t i = 0; i < rank(); ++i) { + result += strides_[i] * indices[i]; + } + return result; + } + + index_type extents_; + index_type strides_; + xnnpack::Buffer data_; + T* begin_ = nullptr; + T* end_ = nullptr; +}; + +template +std::vector random_shape(Rng& rng, size_t rank, size_t min_dim = 1, size_t max_dim = 9) { + std::vector shape(rank); + for (size_t i = 0; i < rank; ++i) { + shape[i] = (rng() % (max_dim - min_dim + 1)) + min_dim; + } + return shape; +} + }; // namespace xnnpack #endif // __XNNPACK_TEST_BUFFER_H_ diff --git a/src/xnnpack/datatype.h b/src/xnnpack/datatype.h index c8985106173..c4cee620680 100644 --- a/src/xnnpack/datatype.h +++ b/src/xnnpack/datatype.h @@ -105,6 +105,12 @@ xnn_datatype xnn_datatype_of() { return xnn_datatype_fp32; } else if (std::is_same::value) { return xnn_datatype_int32; + } else if (std::is_same::value) { + return xnn_datatype_uint8; + } else if (std::is_same::value) { + return xnn_datatype_uint16; + } else if (std::is_same::value) { + return xnn_datatype_uint32; } else { return xnn_datatype_invalid; } diff --git a/test/BUILD.bazel b/test/BUILD.bazel index e28688a5f6b..53474193b3b 100644 --- a/test/BUILD.bazel +++ b/test/BUILD.bazel @@ -1557,6 +1557,20 @@ xnnpack_cxx_library( ], ) +xnnpack_cxx_library( + name = "subgraph_tester", + testonly = True, + hdrs = [ + "subgraph-tester.h", + ], + deps = xnnpack_test_deps_for_library() + [ + ":replicable_random_device", + ":runtime_flags", + "//:buffer", + "//:xnnpack_h", + ], +) + xnnpack_unit_test( name = "unary_test", srcs = [ @@ -1575,6 +1589,17 @@ xnnpack_unit_test( ], ) +xnnpack_unit_test( + name = "buffer_test", + srcs = [ + "buffer.cc", + ], + deps = [ + ":replicable_random_device", + "//:buffer", + ], +) + [xnnpack_unit_test( name = "%s_test" % operator, srcs = [ @@ -1583,6 +1608,7 @@ xnnpack_unit_test( deps = [ ":replicable_random_device", ":runtime_flags", + ":subgraph_tester", ":subgraph_unary_tester", "//:XNNPACK", "//:buffer", @@ -2040,19 +2066,6 @@ xnnpack_unit_test( ], ) -xnnpack_unit_test( - name = "transpose_reshape_test", - srcs = [ - "transpose-reshape.cc", - ], - deps = [ - ":runtime_flags", - "//:XNNPACK", - "//:node_type", - "//:subgraph", - ], -) - xnnpack_unit_test( name = "unpooling_2d_test", srcs = [ @@ -2140,14 +2153,11 @@ xnnpack_unit_test( xnnpack_unit_test( name = "subgraph_nchw_test", - srcs = [ - "subgraph-nchw.cc", - "subgraph-tester.h", - ], + srcs = ["subgraph-nchw.cc"], deps = [ ":replicable_random_device", + ":subgraph_tester", "//:XNNPACK", - "//:buffer", "//:node_type", "//:subgraph", ], diff --git a/test/buffer.cc b/test/buffer.cc new file mode 100644 index 00000000000..20d02c2a514 --- /dev/null +++ b/test/buffer.cc @@ -0,0 +1,45 @@ +#include + +#include +#include +#include "replicable_random_device.h" +#include "xnnpack/buffer.h" + +TEST(Tensor, Basic) { + xnnpack::ReplicableRandomDevice rng; + + xnnpack::Tensor test({3, 4, 5}); + ASSERT_THAT(test.extents(), testing::ElementsAre(3, 4, 5)); + ASSERT_THAT(test.strides(), testing::ElementsAre(20, 5, 1)); + xnnpack::fill_uniform_random_bits(test.data(), test.size(), rng); + + ASSERT_EQ(&test(0, 0, 0), test.base()); + ASSERT_EQ(&test(1, 0, 0), test.base() + 20); + ASSERT_EQ(&test(0, 1, 0), test.base() + 5); + ASSERT_EQ(&test(0, 0, 1), test.base() + 1); + + test.transpose({2, 1, 0}); + ASSERT_THAT(test.extents(), testing::ElementsAre(5, 4, 3)); + ASSERT_THAT(test.strides(), testing::ElementsAre(1, 5, 20)); + + xnnpack::Tensor transposed = test.make_copy(); + ASSERT_THAT(transposed.extents(), testing::ElementsAre(5, 4, 3)); + ASSERT_THAT(transposed.strides(), testing::ElementsAre(12, 3, 1)); + ASSERT_NE(transposed.base(), test.base()); + ASSERT_EQ(transposed(0, 0, 0), test(0, 0, 0)); + ASSERT_EQ(transposed(1, 0, 0), test(1, 0, 0)); + ASSERT_EQ(transposed(2, 1, 1), test(2, 1, 1)); + + xnnpack::Tensor test2(std::move(transposed)); + ASSERT_EQ(transposed.base(), nullptr); + ASSERT_NE(test2.base(), nullptr); + + ASSERT_TRUE(test2.is_contiguous()); + test2.slice({1, 1, 1}, {4, 3, 2}); + ASSERT_FALSE(test2.is_contiguous()); + + xnnpack::Tensor sliced = test2.make_copy(); + ASSERT_TRUE(sliced.is_contiguous()); + ASSERT_EQ(sliced(0, 0, 0), test(1, 1, 1)); + ASSERT_EQ(sliced(1, 0, 0), test(2, 1, 1)); +} diff --git a/test/runtime-tester.h b/test/runtime-tester.h index c71dc9b9710..a0075374f94 100644 --- a/test/runtime-tester.h +++ b/test/runtime-tester.h @@ -29,7 +29,7 @@ class RuntimeTester : public SubgraphTester { template xnnpack::Buffer RunWithFusion() { Run(); - xnnpack::Buffer& tensor = this->external_tensors_.at(this->output_id_); + xnnpack::Buffer& tensor = this->buffers_.at(this->output_id_); xnnpack::Buffer output = xnnpack::Buffer(tensor.size() / sizeof(float)); std::memcpy(output.data(), tensor.data(), tensor.size()); return output; @@ -38,7 +38,7 @@ class RuntimeTester : public SubgraphTester { template xnnpack::Buffer RunWithoutFusion() { Run(XNN_FLAG_NO_OPERATOR_FUSION | xnn_test_runtime_flags()); - xnnpack::Buffer& tensor = this->external_tensors_.at(this->output_id_); + xnnpack::Buffer& tensor = this->buffers_.at(this->output_id_); xnnpack::Buffer output = xnnpack::Buffer(tensor.size() / sizeof(float)); memcpy(output.data(), tensor.data(), tensor.size()); return output; @@ -46,7 +46,7 @@ class RuntimeTester : public SubgraphTester { template xnnpack::Buffer RepeatRun() { - xnnpack::Buffer& tensor = this->external_tensors_.at(this->output_id_); + xnnpack::Buffer& tensor = this->buffers_.at(this->output_id_); xnn_invoke_runtime(Runtime()); xnnpack::Buffer output = xnnpack::Buffer(tensor.size() / sizeof(float)); memcpy(output.data(), tensor.data(), tensor.size()); @@ -61,31 +61,33 @@ class RuntimeTester : public SubgraphTester { } void SetupRuntime() { + auto& output = buffers_[output_id_]; + // Scramble output tensor. + std::fill(output.begin(), output.end(), 0xA8); + std::vector externals; + externals.reserve(this->external_tensors_.size()); for (auto it = this->external_tensors_.begin(); it != this->external_tensors_.end(); ++it) { - if (it->first == this->output_id_) { - // Scramble output tensor. - std::fill(it->second.begin(), it->second.end(), 0xA8); - } - externals.push_back(xnn_external_value{it->first, it->second.data()}); + externals.push_back(xnn_external_value{it->first, it->second}); } ASSERT_EQ(xnn_status_success, xnn_setup_runtime(Runtime(), externals.size(), externals.data())); - externals_ = externals; + externals_ = std::move(externals); } void SetupRuntimeV2() { + auto& output = buffers_[output_id_]; + // Scramble output tensor. + std::fill(output.begin(), output.end(), 0xA8); + std::vector externals; + externals.reserve(this->external_tensors_.size()); for (auto it = this->external_tensors_.begin(); it != this->external_tensors_.end(); ++it) { - if (it->first == this->output_id_) { - // Scramble output tensor. - std::fill(it->second.begin(), it->second.end(), 0xA8); - } - externals.push_back(xnn_external_value{it->first, it->second.data()}); + externals.push_back(xnn_external_value{it->first, it->second}); } ASSERT_EQ(xnn_status_success, xnn_setup_runtime_v2(Runtime(), externals.size(), externals.data())); - externals_ = externals; + externals_ = std::move(externals); } size_t NumOperators() { @@ -103,12 +105,11 @@ class RuntimeTester : public SubgraphTester { } void ReshapeInput(const std::vector& dims, uint32_t external_id) { - xnn_status status = xnn_reshape_external_value(Runtime(), external_id, dims.size(), dims.data()); - EXPECT_EQ(status, xnn_status_success); size_t num_elements = NumElements(dims); xnnpack::Buffer input(num_elements * sizeof(float) + XNN_EXTRA_BYTES * sizeof(char)); std::generate(input.begin(), input.end(), [&]() { return f32dist(rng_); }); - external_tensors_[external_id] = std::move(input); + ReshapeExternalTensor(dims, input.data(), external_id); + buffers_[external_id] = std::move(input); } void ReshapeRuntime() { @@ -119,7 +120,8 @@ class RuntimeTester : public SubgraphTester { status = xnn_get_external_value_shape(Runtime(), output_id_, &num_dims, output_dims.data()); output_dims.resize(num_dims); EXPECT_EQ(status, xnn_status_success); - external_tensors_[output_id_] = xnnpack::Buffer(NumElements(output_dims) * sizeof(float)); + buffers_[output_id_] = xnnpack::Buffer(NumElements(output_dims) * sizeof(float)); + external_tensors_[output_id_] = buffers_[output_id_].data(); } private: @@ -131,7 +133,6 @@ class RuntimeTester : public SubgraphTester { ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(Runtime())); }; - std::unique_ptr runtime_{nullptr, xnn_delete_runtime}; std::vector externals_; }; diff --git a/test/static-slice.cc b/test/static-slice.cc index 37b3c0a9476..558a69a941f 100644 --- a/test/static-slice.cc +++ b/test/static-slice.cc @@ -3,499 +3,98 @@ // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. -#include -#include -#include #include #include -#include #include -#include #include #include #include #include "xnnpack.h" -#include "xnnpack/math.h" -#include "xnnpack/node-type.h" -#include "xnnpack/operator.h" -#include "xnnpack/subgraph.h" +#include "xnnpack/buffer.h" #include "replicable_random_device.h" -#include "subgraph-unary-tester.h" -#include "runtime-flags.h" - -template class StaticSliceTest : public UnaryTest { -public: - StaticSliceTest() - : UnaryTest{} - { - std::tie(begins, offsets) = RandomBegins(this->dims); - std::tie(ends, sizes) = RandomEnds(this->dims); - - // Overwrite outputs since slice output size is different from input. - this->operator_output = xnnpack::Buffer(this->NumElements(sizes)); - this->subgraph_output = xnnpack::Buffer(this->NumElements(sizes)); - } - -private: - std::tuple, std::vector> RandomBegins(const std::vector& input_dims) - { - std::vector begins(input_dims.size()); - std::vector offsets(input_dims.size()); - for (size_t i = 0; i < input_dims.size(); i++) { - const int64_t range = input_dims[i]; - auto offset_dist = std::uniform_int_distribution(-range, range - 1); - begins[i] = offset_dist(this->rng); - offsets[i] = begins[i] < 0 ? input_dims[i] + begins[i] : begins[i]; +#include "subgraph-tester.h" + +template +void TestSlice(size_t rank) { + xnnpack::ReplicableRandomDevice rng; + + ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); + + for (int iters = 0; iters < 100; ++iters) { + std::vector dims = random_shape(rng, rank); + + std::vector begins(dims.size()); + std::vector ends(dims.size()); + for (size_t i = 0; i < dims.size(); i++) { + const int64_t range = dims[i]; + auto begin_dist = std::uniform_int_distribution(-range, range - 1); + begins[i] = begin_dist(rng); + if (begins[i] < 0) { + // Negative begin, negative end + auto end_dist = + std::uniform_int_distribution(begins[i] + 1, 0); + ends[i] = end_dist(rng); + } else if (rng() % 2 == 0) { + // Positive begin, negative end + auto end_dist = + std::uniform_int_distribution(begins[i] + 1 - range, 0); + ends[i] = end_dist(rng); + } else { + // Positive begin, positive end + auto end_dist = + std::uniform_int_distribution(begins[i] + 1, range); + ends[i] = end_dist(rng); + } } - return {begins, offsets}; - } - std::tuple, std::vector> RandomEnds( - const std::vector& input_dims) - { - std::vector ends(input_dims.size()); - std::vector sizes(input_dims.size()); - for (size_t i = 0; i < input_dims.size(); i++) { - const int64_t range = input_dims[i] - offsets[i]; - auto size_dist = std::uniform_int_distribution(-range + 1, range); - int64_t r = size_dist(this->rng); - // ends[i] == 0 means "infer end as largest interval" - ends[i] = r <= 0 ? r : offsets[i] + r; - sizes[i] = ends[i] <= 0 ? input_dims[i] + ends[i] - offsets[i] : ends[i] - offsets[i]; + std::vector strides(dims.size(), 1); + // Define subgraph + xnnpack::SubgraphTester subgraph(2); + subgraph + .AddInputTensor(rank, xnn_datatype_of(), 0) + .AddOutputTensor(rank, xnn_datatype_of(), 1) + .AddSlice(begins, ends, strides, 0, 1) + .CreateRuntime(); + + for (int reshape = 0; reshape < 2; ++reshape) { + std::vector shape = random_shape(rng, rank); + for (size_t i = 0; i < rank; ++i) { + shape[i] += dims[i]; + } + + xnnpack::Tensor input(shape, xnnpack::PaddingBytes{XNN_EXTRA_BYTES}); + xnnpack::fill_uniform_random_bits(input.data(), input.size(), rng); + + // Use the input prior to slicing in the subgraph. + subgraph.ReshapeExternalTensor(shape, input.data(), 0); + + xnnpack::Tensor sliced_input = input.slice(begins, ends).make_copy(); + xnnpack::Tensor output(sliced_input.extents()); + subgraph + .ReshapeExternalTensor(sliced_input.extents(), output.data(), 1) + .ReshapeRuntime() + .SetupRuntime() + .InvokeRuntime(); + + // Verify results. + ASSERT_THAT(output, testing::ElementsAreArray(sliced_input)); } - return {ends, sizes}; - } - -protected: - std::vector begins; - std::vector ends; - std::vector offsets; - std::vector sizes; -}; - -using StaticSliceTestQS8 = StaticSliceTest; -using StaticSliceTestQU8 = StaticSliceTest; -using StaticSliceTestF16 = StaticSliceTest; -using StaticSliceTestF32 = StaticSliceTest; - -TEST_F(StaticSliceTestQS8, define) -{ - const int32_t zero_point = i8dist(rng); - const float scale = scale_dist(rng); - - ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr)); - - xnn_subgraph_t subgraph = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_subgraph(/*external_value_ids=*/2, /*flags=*/0, &subgraph)); - std::unique_ptr auto_subgraph(subgraph, xnn_delete_subgraph); - - input_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_quantized_tensor_value( - subgraph, xnn_datatype_qint8, zero_point, scale, dims.size(), dims.data(), nullptr, 0, - /*flags=*/XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id)); - ASSERT_NE(input_id, XNN_INVALID_NODE_ID); - - output_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_quantized_tensor_value( - subgraph, xnn_datatype_qint8, zero_point, scale, sizes.size(), sizes.data(), nullptr, 1, - /*flags=*/XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id)); - ASSERT_NE(output_id, XNN_INVALID_NODE_ID); - - ASSERT_EQ( - xnn_status_success, - xnn_define_static_slice_v3(subgraph, dims.size(), begins.data(), ends.data(), /*strides*/nullptr, input_id, output_id, /*flags=*/0)); - - EXPECT_EQ(subgraph->num_nodes, 1); - const struct xnn_node* node = &subgraph->nodes[0]; - EXPECT_EQ(node->type, xnn_node_type_static_slice); - EXPECT_EQ(node->num_inputs, 1); - EXPECT_EQ(node->inputs[0], input_id); - EXPECT_EQ(node->num_outputs, 1); - EXPECT_EQ(node->outputs[0], output_id); - EXPECT_EQ(node->flags, 0); - EXPECT_EQ(node->params.slice.num_dims, dims.size()); - EXPECT_THAT(begins, testing::ElementsAreArray(node->params.slice.begins, dims.size())); - EXPECT_THAT(ends, testing::ElementsAreArray(node->params.slice.ends, dims.size())); -} - -TEST_F(StaticSliceTestQU8, define) -{ - const int32_t zero_point = u8dist(rng); - const float scale = scale_dist(rng); - - ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr)); - - xnn_subgraph_t subgraph = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_subgraph(/*external_value_ids=*/2, /*flags=*/0, &subgraph)); - std::unique_ptr auto_subgraph(subgraph, xnn_delete_subgraph); - - input_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_quantized_tensor_value( - subgraph, xnn_datatype_quint8, zero_point, scale, dims.size(), dims.data(), nullptr, 0, - /*flags=*/XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id)); - ASSERT_NE(input_id, XNN_INVALID_NODE_ID); - - output_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_quantized_tensor_value( - subgraph, xnn_datatype_quint8, zero_point, scale, sizes.size(), sizes.data(), nullptr, 1, - /*flags=*/XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id)); - ASSERT_NE(output_id, XNN_INVALID_NODE_ID); - - ASSERT_EQ( - xnn_status_success, - xnn_define_static_slice_v3(subgraph, dims.size(), begins.data(), ends.data(), /*strides*/nullptr, input_id, output_id, /*flags=*/0)); - - EXPECT_EQ(subgraph->num_nodes, 1); - const struct xnn_node* node = &subgraph->nodes[0]; - EXPECT_EQ(node->type, xnn_node_type_static_slice); - EXPECT_EQ(node->num_inputs, 1); - EXPECT_EQ(node->inputs[0], input_id); - EXPECT_EQ(node->num_outputs, 1); - EXPECT_EQ(node->outputs[0], output_id); - EXPECT_EQ(node->flags, 0); - EXPECT_EQ(node->params.slice.num_dims, dims.size()); - EXPECT_THAT(begins, testing::ElementsAreArray(node->params.slice.begins, dims.size())); - EXPECT_THAT(ends, testing::ElementsAreArray(node->params.slice.ends, dims.size())); -} - -TEST_F(StaticSliceTestF16, define) -{ - ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr)); - - xnn_subgraph_t subgraph = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_subgraph(/*external_value_ids=*/2, /*flags=*/0, &subgraph)); - std::unique_ptr auto_subgraph(subgraph, xnn_delete_subgraph); - - input_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp16, dims.size(), dims.data(), nullptr, 0, - /*flags=*/XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id)); - ASSERT_NE(input_id, XNN_INVALID_NODE_ID); - - output_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp16, sizes.size(), sizes.data(), nullptr, 1, - /*flags=*/XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id)); - ASSERT_NE(output_id, XNN_INVALID_NODE_ID); - - ASSERT_EQ( - xnn_status_success, - xnn_define_static_slice_v3(subgraph, dims.size(), begins.data(), ends.data(), /*strides*/nullptr, input_id, output_id, /*flags=*/0)); - - EXPECT_EQ(subgraph->num_nodes, 1); - const struct xnn_node* node = &subgraph->nodes[0]; - EXPECT_EQ(node->type, xnn_node_type_static_slice); - EXPECT_EQ(node->num_inputs, 1); - EXPECT_EQ(node->inputs[0], input_id); - EXPECT_EQ(node->num_outputs, 1); - EXPECT_EQ(node->outputs[0], output_id); - EXPECT_EQ(node->flags, 0); - EXPECT_EQ(node->params.slice.num_dims, dims.size()); - EXPECT_THAT(begins, testing::ElementsAreArray(node->params.slice.begins, dims.size())); - EXPECT_THAT(ends, testing::ElementsAreArray(node->params.slice.ends, dims.size())); -} - -TEST_F(StaticSliceTestF32, define) -{ - ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr)); - - xnn_subgraph_t subgraph = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_subgraph(/*external_value_ids=*/2, /*flags=*/0, &subgraph)); - std::unique_ptr auto_subgraph(subgraph, xnn_delete_subgraph); - - input_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp32, dims.size(), dims.data(), nullptr, 0, - /*flags=*/XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id)); - ASSERT_NE(input_id, XNN_INVALID_NODE_ID); - - output_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp32, sizes.size(), sizes.data(), nullptr, 1, - /*flags=*/XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id)); - ASSERT_NE(output_id, XNN_INVALID_NODE_ID); - - ASSERT_EQ( - xnn_status_success, - xnn_define_static_slice_v3(subgraph, dims.size(), begins.data(), ends.data(), /*strides*/nullptr, input_id, output_id, /*flags=*/0)); - - EXPECT_EQ(subgraph->num_nodes, 1); - const struct xnn_node* node = &subgraph->nodes[0]; - EXPECT_EQ(node->type, xnn_node_type_static_slice); - EXPECT_EQ(node->num_inputs, 1); - EXPECT_EQ(node->inputs[0], input_id); - EXPECT_EQ(node->num_outputs, 1); - EXPECT_EQ(node->outputs[0], output_id); - EXPECT_EQ(node->flags, 0); - EXPECT_EQ(node->params.slice.num_dims, dims.size()); - EXPECT_THAT(begins, testing::ElementsAreArray(node->params.slice.begins, dims.size())); - EXPECT_THAT(ends, testing::ElementsAreArray(node->params.slice.ends, dims.size())); -} - -TEST_F(StaticSliceTestQS8, matches_operator_api) -{ - const int32_t zero_point = i8dist(rng); - const float scale = scale_dist(rng); - - std::generate(input.begin(), input.end(), [&]() { return i8dist(rng); }); - - ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr)); - - // Call operator API. - xnn_operator_t op = nullptr; - xnn_status status = xnn_create_slice_nd_x8(/*flags=*/0, &op); - if (status == xnn_status_unsupported_hardware) { - GTEST_SKIP(); } - ASSERT_EQ(xnn_status_success, status); - ASSERT_NE(nullptr, op); - std::unique_ptr auto_op(op, xnn_delete_operator); - ASSERT_EQ( - xnn_status_success, - xnn_reshape_slice_nd_x8(op, dims.size(), dims.data(), offsets.data(), sizes.data(), /*threadpool=*/nullptr)); - ASSERT_EQ( - xnn_status_success, xnn_setup_slice_nd_x8(op, input.data(), operator_output.data())); - ASSERT_EQ(xnn_status_success, xnn_run_operator(op, /*threadpool=*/nullptr)); - - // Call subgraph API. - xnn_subgraph_t subgraph = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_subgraph(/*external_value_ids=*/2, /*flags=*/0, &subgraph)); - std::unique_ptr auto_subgraph(subgraph, xnn_delete_subgraph); - - input_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_quantized_tensor_value( - subgraph, xnn_datatype_qint8, zero_point, scale, dims.size(), dims.data(), nullptr, 0, - /*flags=*/XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id)); - ASSERT_NE(input_id, XNN_INVALID_NODE_ID); - - output_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_quantized_tensor_value( - subgraph, xnn_datatype_qint8, zero_point, scale, sizes.size(), sizes.data(), nullptr, 1, - /*flags=*/XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id)); - ASSERT_NE(output_id, XNN_INVALID_NODE_ID); - - ASSERT_EQ( - xnn_status_success, - xnn_define_static_slice_v3(subgraph, dims.size(), begins.data(), ends.data(), /*strides*/nullptr, input_id, output_id, /*flags=*/0)); - xnn_runtime_t runtime = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, xnn_test_runtime_flags(), &runtime)); - ASSERT_NE(nullptr, runtime); - std::unique_ptr auto_runtime(runtime, xnn_delete_runtime); - std::array external = { - xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}}; - ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data())); - ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime)); - - EXPECT_EQ(subgraph_output, operator_output); } -TEST_F(StaticSliceTestQU8, matches_operator_api) -{ - const int32_t zero_point = u8dist(rng); - const float scale = scale_dist(rng); - - std::generate(input.begin(), input.end(), [&]() { return u8dist(rng); }); - - ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr)); - - // Call operator API. - xnn_operator_t op = nullptr; - xnn_status status = xnn_create_slice_nd_x8(/*flags=*/0, &op); - if (status == xnn_status_unsupported_hardware) { - GTEST_SKIP(); - } - ASSERT_EQ(xnn_status_success, status); - ASSERT_NE(nullptr, op); - std::unique_ptr auto_op(op, xnn_delete_operator); - ASSERT_EQ( - xnn_status_success, - xnn_reshape_slice_nd_x8(op, dims.size(), dims.data(), offsets.data(), sizes.data(), /*threadpool=*/nullptr)); - ASSERT_EQ( - xnn_status_success, xnn_setup_slice_nd_x8(op, input.data(), operator_output.data())); - ASSERT_EQ(xnn_status_success, xnn_run_operator(op, /*threadpool=*/nullptr)); - - // Call subgraph API. - xnn_subgraph_t subgraph = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_subgraph(/*external_value_ids=*/2, /*flags=*/0, &subgraph)); - std::unique_ptr auto_subgraph(subgraph, xnn_delete_subgraph); - - input_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_quantized_tensor_value( - subgraph, xnn_datatype_quint8, zero_point, scale, dims.size(), dims.data(), nullptr, 0, - /*flags=*/XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id)); - ASSERT_NE(input_id, XNN_INVALID_NODE_ID); - - output_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_quantized_tensor_value( - subgraph, xnn_datatype_quint8, zero_point, scale, sizes.size(), sizes.data(), nullptr, 1, - /*flags=*/XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id)); - ASSERT_NE(output_id, XNN_INVALID_NODE_ID); - - ASSERT_EQ( - xnn_status_success, - xnn_define_static_slice_v3(subgraph, dims.size(), begins.data(), ends.data(), /*strides*/nullptr, input_id, output_id, /*flags=*/0)); - xnn_runtime_t runtime = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, xnn_test_runtime_flags(), &runtime)); - ASSERT_NE(nullptr, runtime); - std::unique_ptr auto_runtime(runtime, xnn_delete_runtime); - std::array external = { - xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}}; - ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data())); - ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime)); - - EXPECT_EQ(subgraph_output, operator_output); -} - -TEST_F(StaticSliceTestF16, matches_operator_api) -{ - std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); }); - - ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr)); - - // Call operator API. - xnn_operator_t op = nullptr; - xnn_status status = xnn_create_slice_nd_x16(/*flags=*/0, &op); - if (status == xnn_status_unsupported_hardware) { - GTEST_SKIP(); - } - ASSERT_EQ(xnn_status_success, status); - ASSERT_NE(nullptr, op); - std::unique_ptr auto_op(op, xnn_delete_operator); - ASSERT_EQ( - xnn_status_success, - xnn_reshape_slice_nd_x16(op, dims.size(), dims.data(), offsets.data(), sizes.data(), /*threadpool=*/nullptr)); - ASSERT_EQ(xnn_status_success, xnn_setup_slice_nd_x16(op, input.data(), operator_output.data())); - ASSERT_EQ(xnn_status_success, xnn_run_operator(op, /*threadpool=*/nullptr)); - - // Call subgraph API. - xnn_subgraph_t subgraph = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_subgraph(/*external_value_ids=*/2, /*flags=*/0, &subgraph)); - std::unique_ptr auto_subgraph(subgraph, xnn_delete_subgraph); +template +class Slice : public ::testing::TestWithParam {}; - input_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp16, dims.size(), dims.data(), nullptr, 0, - /*flags=*/XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id)); - ASSERT_NE(input_id, XNN_INVALID_NODE_ID); +using SliceX8 = Slice; +using SliceX16 = Slice; +using SliceX32 = Slice; - output_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp16, sizes.size(), sizes.data(), nullptr, 1, - /*flags=*/XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id)); - ASSERT_NE(output_id, XNN_INVALID_NODE_ID); - - ASSERT_EQ( - xnn_status_success, - xnn_define_static_slice_v3(subgraph, dims.size(), begins.data(), ends.data(), /*strides*/nullptr, input_id, output_id, /*flags=*/0)); - xnn_runtime_t runtime = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, xnn_test_runtime_flags(), &runtime)); - ASSERT_NE(nullptr, runtime); - std::unique_ptr auto_runtime(runtime, xnn_delete_runtime); - std::array external = { - xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}}; - ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data())); - ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime)); - - EXPECT_EQ(subgraph_output, operator_output); -} - -TEST_F(StaticSliceTestF32, matches_operator_api) -{ - std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); }); - - ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr)); - - // Call operator API. - xnn_operator_t op = nullptr; - xnn_status status = xnn_create_slice_nd_x32(/*flags=*/0, &op); - if (status == xnn_status_unsupported_hardware) { - GTEST_SKIP(); - } - ASSERT_EQ(xnn_status_success, status); - ASSERT_NE(nullptr, op); - std::unique_ptr auto_op(op, xnn_delete_operator); - ASSERT_EQ( - xnn_status_success, - xnn_reshape_slice_nd_x32(op, dims.size(), dims.data(), offsets.data(), sizes.data(), /*threadpool=*/nullptr)); - ASSERT_EQ(xnn_status_success, xnn_setup_slice_nd_x32(op, input.data(), operator_output.data())); - ASSERT_EQ(xnn_status_success, xnn_run_operator(op, /*threadpool=*/nullptr)); - - // Call subgraph API. - xnn_subgraph_t subgraph = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_subgraph(/*external_value_ids=*/2, /*flags=*/0, &subgraph)); - std::unique_ptr auto_subgraph(subgraph, xnn_delete_subgraph); - - input_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp32, dims.size(), dims.data(), nullptr, 0, - /*flags=*/XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id)); - ASSERT_NE(input_id, XNN_INVALID_NODE_ID); - - output_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp32, sizes.size(), sizes.data(), nullptr, 1, - /*flags=*/XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id)); - ASSERT_NE(output_id, XNN_INVALID_NODE_ID); - - ASSERT_EQ( - xnn_status_success, - xnn_define_static_slice_v3(subgraph, dims.size(), begins.data(), ends.data(), /*strides*/nullptr, input_id, output_id, /*flags=*/0)); - xnn_runtime_t runtime = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, xnn_test_runtime_flags(), &runtime)); - ASSERT_NE(nullptr, runtime); - std::unique_ptr auto_runtime(runtime, xnn_delete_runtime); - std::array external = { - xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}}; - ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data())); - ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime)); - - EXPECT_EQ(subgraph_output, operator_output); -} - -TEST_F(StaticSliceTestF32, illegal_stride_values) -{ - ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr)); - - xnn_subgraph_t subgraph = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_subgraph(/*external_value_ids=*/2, /*flags=*/0, &subgraph)); - std::unique_ptr auto_subgraph(subgraph, xnn_delete_subgraph); - - input_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp32, dims.size(), dims.data(), nullptr, 0, - /*flags=*/XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id)); - ASSERT_NE(input_id, XNN_INVALID_NODE_ID); - - output_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp32, sizes.size(), sizes.data(), nullptr, 1, - /*flags=*/XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id)); - ASSERT_NE(output_id, XNN_INVALID_NODE_ID); - - std::vector strides(this->dims.size(), 1); - strides[0] = 2; - - ASSERT_EQ( - xnn_status_invalid_parameter, - xnn_define_static_slice_v3(subgraph, dims.size(), begins.data(), ends.data(), strides.data(), input_id, output_id, /*flags=*/0)); -} +TEST_P(SliceX8, test) { TestSlice(GetParam()); } +TEST_P(SliceX16, test) { TestSlice(GetParam()); } +TEST_P(SliceX32, test) { TestSlice(GetParam()); } +auto rank_params = testing::Range(1, XNN_MAX_TENSOR_DIMS); +INSTANTIATE_TEST_SUITE_P(Slice, SliceX8, rank_params); +INSTANTIATE_TEST_SUITE_P(Slice, SliceX16, rank_params); +INSTANTIATE_TEST_SUITE_P(Slice, SliceX32, rank_params); diff --git a/test/static-transpose.cc b/test/static-transpose.cc index 63ffd28304a..c607a452550 100644 --- a/test/static-transpose.cc +++ b/test/static-transpose.cc @@ -4,464 +4,69 @@ // LICENSE file in the root directory of this source tree. #include -#include -#include #include #include -#include #include -#include #include +#include #include #include "xnnpack.h" -#include "xnnpack/math.h" -#include "xnnpack/node-type.h" -#include "xnnpack/operator.h" -#include "xnnpack/subgraph.h" -#include "subgraph-unary-tester.h" -#include "runtime-flags.h" +#include "xnnpack/buffer.h" +#include "replicable_random_device.h" +#include "subgraph-tester.h" -using StaticTransposeTestQS8 = UnaryTest; -using StaticTransposeTestQU8 = UnaryTest; -using StaticTransposeTestF16 = UnaryTest; -using StaticTransposeTestF32 = UnaryTest; +template +void TestAllPermutations(size_t rank) { + xnnpack::ReplicableRandomDevice rng; -namespace { -template -std::vector RandomPermutation(const std::vector& input, - Rng& rng) { - std::vector perm = std::vector(input); - std::iota(perm.begin(), perm.end(), 0); - std::shuffle(perm.begin(), perm.end(), rng); - return perm; -} - -std::vector PermuteInputDimensions(const std::vector& input, std::vector perm) -{ - std::vector output = input; - for (size_t i = 0; i < input.size(); i++) { - output[i] = input[perm[i]]; - } - return output; -} -} // namespace - -TEST_F(StaticTransposeTestQS8, define) -{ - const int32_t input_zero_point = i8dist(rng); - const float input_scale = scale_dist(rng); - const int32_t output_zero_point = input_zero_point; - const float output_scale = input_scale; - std::vector perm = RandomPermutation(dims, rng); - std::vector output_dims = PermuteInputDimensions(dims, perm); - - ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr)); - - xnn_subgraph_t subgraph = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_subgraph(/*external_value_ids=*/2, /*flags=*/0, &subgraph)); - std::unique_ptr auto_subgraph(subgraph, xnn_delete_subgraph); - - input_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_quantized_tensor_value( - subgraph, xnn_datatype_qint8, input_zero_point, input_scale, dims.size(), dims.data(), - nullptr, 0, /*flags=*/XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id)); - ASSERT_NE(input_id, XNN_INVALID_NODE_ID); - - output_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_quantized_tensor_value( - subgraph, xnn_datatype_qint8, output_zero_point, output_scale, output_dims.size(), - output_dims.data(), nullptr, 1, /*flags=*/XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id)); - ASSERT_NE(output_id, XNN_INVALID_NODE_ID); - - ASSERT_EQ( - xnn_status_success, - xnn_define_static_transpose(subgraph, perm.size(), perm.data(), input_id, output_id, /*flags=*/0)); - - ASSERT_EQ(subgraph->num_nodes, 1); - const struct xnn_node* node = &subgraph->nodes[0]; - ASSERT_EQ(node->type, xnn_node_type_static_transpose); - ASSERT_EQ(node->params.transpose.num_dims, dims.size()); - for (size_t i = 0; i < dims.size(); i++) { - ASSERT_EQ(node->params.transpose.perm[i], perm[i]); - } - ASSERT_EQ(node->num_inputs, 1); - ASSERT_EQ(node->inputs[0], input_id); - ASSERT_EQ(node->num_outputs, 1); - ASSERT_EQ(node->outputs[0], output_id); - ASSERT_EQ(node->flags, 0); -} - -TEST_F(StaticTransposeTestQU8, define) -{ - const int32_t input_zero_point = u8dist(rng); - const float input_scale = scale_dist(rng); - const int32_t output_zero_point = input_zero_point; - const float output_scale = input_scale; - std::vector perm = RandomPermutation(dims, rng); - std::vector output_dims = PermuteInputDimensions(dims, perm); - - ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr)); - - xnn_subgraph_t subgraph = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_subgraph(/*external_value_ids=*/2, /*flags=*/0, &subgraph)); - std::unique_ptr auto_subgraph(subgraph, xnn_delete_subgraph); - - input_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_quantized_tensor_value( - subgraph, xnn_datatype_quint8, input_zero_point, input_scale, dims.size(), dims.data(), - nullptr, 0, /*flags=*/XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id)); - ASSERT_NE(input_id, XNN_INVALID_NODE_ID); - - output_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_quantized_tensor_value( - subgraph, xnn_datatype_quint8, output_zero_point, output_scale, output_dims.size(), - output_dims.data(), nullptr, 1, /*flags=*/XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id)); - ASSERT_NE(output_id, XNN_INVALID_NODE_ID); - - ASSERT_EQ( - xnn_status_success, - xnn_define_static_transpose(subgraph, perm.size(), perm.data(), input_id, output_id, /*flags=*/0)); - - ASSERT_EQ(subgraph->num_nodes, 1); - const struct xnn_node* node = &subgraph->nodes[0]; - ASSERT_EQ(node->type, xnn_node_type_static_transpose); - for (size_t i = 0; i < dims.size(); i++) { - ASSERT_EQ(node->params.transpose.perm[i], perm[i]); - } - ASSERT_EQ(node->num_inputs, 1); - ASSERT_EQ(node->inputs[0], input_id); - ASSERT_EQ(node->num_outputs, 1); - ASSERT_EQ(node->outputs[0], output_id); - ASSERT_EQ(node->flags, 0); -} - -TEST_F(StaticTransposeTestF16, define) -{ - std::vector perm = RandomPermutation(dims, rng); - std::vector output_dims = PermuteInputDimensions(dims, perm); - - ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr)); - - xnn_subgraph_t subgraph = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_subgraph(/*external_value_ids=*/2, /*flags=*/0, &subgraph)); - std::unique_ptr auto_subgraph(subgraph, xnn_delete_subgraph); - - input_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp16, dims.size(), dims.data(), nullptr, 0, - /*flags=*/XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id)); - ASSERT_NE(input_id, XNN_INVALID_NODE_ID); - - output_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp16, output_dims.size(), output_dims.data(), nullptr, 1, - /*flags=*/XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id)); - ASSERT_NE(output_id, XNN_INVALID_NODE_ID); - - ASSERT_EQ( - xnn_status_success, - xnn_define_static_transpose(subgraph, perm.size(), perm.data(), input_id, output_id, /*flags=*/0)); - - ASSERT_EQ(subgraph->num_nodes, 1); - const struct xnn_node* node = &subgraph->nodes[0]; - ASSERT_EQ(node->type, xnn_node_type_static_transpose); - for (size_t i = 0; i < dims.size(); i++) { - ASSERT_EQ(node->params.transpose.perm[i], perm[i]); - } - ASSERT_EQ(node->num_inputs, 1); - ASSERT_EQ(node->inputs[0], input_id); - ASSERT_EQ(node->num_outputs, 1); - ASSERT_EQ(node->outputs[0], output_id); - ASSERT_EQ(node->flags, 0); -} - -TEST_F(StaticTransposeTestF32, define) -{ - std::vector perm = RandomPermutation(dims, rng); - std::vector output_dims = PermuteInputDimensions(dims, perm); - - ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr)); - - xnn_subgraph_t subgraph = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_subgraph(/*external_value_ids=*/2, /*flags=*/0, &subgraph)); - std::unique_ptr auto_subgraph(subgraph, xnn_delete_subgraph); - - input_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp32, dims.size(), dims.data(), nullptr, 0, - /*flags=*/XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id)); - ASSERT_NE(input_id, XNN_INVALID_NODE_ID); - - output_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp32, output_dims.size(), output_dims.data(), nullptr, 1, - /*flags=*/XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id)); - ASSERT_NE(output_id, XNN_INVALID_NODE_ID); - - ASSERT_EQ( - xnn_status_success, - xnn_define_static_transpose(subgraph, perm.size(), perm.data(), input_id, output_id, /*flags=*/0)); - - ASSERT_EQ(subgraph->num_nodes, 1); - const struct xnn_node* node = &subgraph->nodes[0]; - ASSERT_EQ(node->type, xnn_node_type_static_transpose); - for (size_t i = 0; i < dims.size(); i++) { - ASSERT_EQ(node->params.transpose.perm[i], perm[i]); - } - ASSERT_EQ(node->num_inputs, 1); - ASSERT_EQ(node->inputs[0], input_id); - ASSERT_EQ(node->num_outputs, 1); - ASSERT_EQ(node->outputs[0], output_id); - ASSERT_EQ(node->flags, 0); -} - -TEST_F(StaticTransposeTestQS8, matches_operator_api) -{ - const int32_t input_zero_point = i8dist(rng); - const float input_scale = scale_dist(rng); - const int32_t output_zero_point = input_zero_point; - const float output_scale = input_scale; - std::generate(input.begin(), input.end(), [&]() { return i8dist(rng); }); - std::vector perm = RandomPermutation(dims, rng); - std::vector output_dims = PermuteInputDimensions(dims, perm); + ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); - ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr)); - - // Call operator API. - xnn_operator_t op = nullptr; - const xnn_status status = xnn_create_transpose_nd_x8(/*flags=*/0, &op); - if (status == xnn_status_unsupported_hardware) { - GTEST_SKIP(); - } - ASSERT_EQ(xnn_status_success, status); - ASSERT_NE(nullptr, op); - std::unique_ptr auto_op(op, xnn_delete_operator); - - ASSERT_EQ( - xnn_status_success, xnn_reshape_transpose_nd_x8(op, dims.size(), dims.data(), perm.data(), /*threadpool=*/nullptr)); - ASSERT_EQ(xnn_status_success, xnn_setup_transpose_nd_x8(op, input.data(), operator_output.data())); - ASSERT_EQ(xnn_status_success, xnn_run_operator(op, /*threadpool=*/nullptr)); - - // Call subgraph API. - xnn_subgraph_t subgraph = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_subgraph(/*external_value_ids=*/2, /*flags=*/0, &subgraph)); - std::unique_ptr auto_subgraph(subgraph, xnn_delete_subgraph); - input_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_quantized_tensor_value( - subgraph, xnn_datatype_qint8, input_zero_point, input_scale, dims.size(), dims.data(), - nullptr, /*external_id=*/0, /*flags=*/XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id)); - ASSERT_NE(input_id, XNN_INVALID_NODE_ID); - - output_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_quantized_tensor_value( - subgraph, xnn_datatype_qint8, output_zero_point, output_scale, dims.size(), dims.data(), - nullptr, /*external_id=*/1, /*flags=*/XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id)); - ASSERT_NE(output_id, XNN_INVALID_NODE_ID); - - ASSERT_EQ( - xnn_status_success, - xnn_define_static_transpose(subgraph, perm.size(), perm.data(), input_id, output_id, /*flags=*/0)); - - xnn_runtime_t runtime = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, xnn_test_runtime_flags(), &runtime)); - ASSERT_NE(nullptr, runtime); - std::unique_ptr auto_runtime(runtime, xnn_delete_runtime); - - std::array external = { - xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}}; - ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data())); - ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime)); - - ASSERT_EQ(subgraph_output, operator_output); -} - -TEST_F(StaticTransposeTestQU8, matches_operator_api) -{ - const int32_t input_zero_point = u8dist(rng); - const float input_scale = scale_dist(rng); - const int32_t output_zero_point = input_zero_point; - const float output_scale = input_scale; - std::generate(input.begin(), input.end(), [&]() { return u8dist(rng); }); - std::vector perm = RandomPermutation(dims, rng); - std::vector output_dims = PermuteInputDimensions(dims, perm); - - ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr)); - - // Call operator API. - xnn_operator_t op = nullptr; - const xnn_status status = xnn_create_transpose_nd_x8(/*flags=*/0, &op); - if (status == xnn_status_unsupported_hardware) { - GTEST_SKIP(); - } - ASSERT_EQ(xnn_status_success, status); - ASSERT_NE(nullptr, op); - std::unique_ptr auto_op(op, xnn_delete_operator); - - ASSERT_EQ( - xnn_status_success, xnn_reshape_transpose_nd_x8(op, dims.size(), dims.data(), perm.data(), /*threadpool=*/nullptr)); - ASSERT_EQ(xnn_status_success, xnn_setup_transpose_nd_x8(op, input.data(), operator_output.data())); - ASSERT_EQ(xnn_status_success, xnn_run_operator(op, /*threadpool=*/nullptr)); - - // Call subgraph API. - xnn_subgraph_t subgraph = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_subgraph(/*external_value_ids=*/2, /*flags=*/0, &subgraph)); - std::unique_ptr auto_subgraph(subgraph, xnn_delete_subgraph); - input_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_quantized_tensor_value( - subgraph, xnn_datatype_quint8, input_zero_point, input_scale, dims.size(), dims.data(), - nullptr, /*external_id=*/0, /*flags=*/XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id)); - ASSERT_NE(input_id, XNN_INVALID_NODE_ID); - - output_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_quantized_tensor_value( - subgraph, xnn_datatype_quint8, output_zero_point, output_scale, dims.size(), dims.data(), - nullptr, /*external_id=*/1, /*flags=*/XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id)); - ASSERT_NE(output_id, XNN_INVALID_NODE_ID); - - ASSERT_EQ( - xnn_status_success, - xnn_define_static_transpose(subgraph, perm.size(), perm.data(), input_id, output_id, /*flags=*/0)); - - xnn_runtime_t runtime = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, xnn_test_runtime_flags(), &runtime)); - ASSERT_NE(nullptr, runtime); - std::unique_ptr auto_runtime(runtime, xnn_delete_runtime); - - std::array external = { - xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}}; - ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data())); - ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime)); - - ASSERT_EQ(subgraph_output, operator_output); -} - -TEST_F(StaticTransposeTestF16, matches_operator_api) -{ - std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); }); - std::vector perm = RandomPermutation(dims, rng); - std::vector output_dims = PermuteInputDimensions(dims, perm); - - ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr)); - - // Call operator API. - xnn_operator_t op = nullptr; - const xnn_status status = xnn_create_transpose_nd_x16(/*flags=*/0, &op); - if (status == xnn_status_unsupported_hardware) { - GTEST_SKIP(); - } - - ASSERT_EQ(xnn_status_success, status); - ASSERT_NE(nullptr, op); - std::unique_ptr auto_op(op, xnn_delete_operator); - - ASSERT_EQ( - xnn_status_success, - xnn_reshape_transpose_nd_x16(op, dims.size(), dims.data(), perm.data(), /*threadpool=*/nullptr)); - ASSERT_EQ(xnn_status_success, xnn_setup_transpose_nd_x16(op, input.data(), operator_output.data())); - - ASSERT_EQ(xnn_status_success, xnn_run_operator(op, /*threadpool=*/nullptr)); - - // Call subgraph API. - xnn_subgraph_t subgraph = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_subgraph(/*external_value_ids=*/2, /*flags=*/0, &subgraph)); - std::unique_ptr auto_subgraph(subgraph, xnn_delete_subgraph); - input_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp16, dims.size(), dims.data(), nullptr, /*external_id=*/0, - /*flags=*/XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id)); - ASSERT_NE(input_id, XNN_INVALID_NODE_ID); - - output_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp16, dims.size(), dims.data(), nullptr, /*external_id=*/1, - /*flags=*/XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id)); - ASSERT_NE(output_id, XNN_INVALID_NODE_ID); - - xnn_runtime_t runtime = nullptr; - ASSERT_EQ( - xnn_status_success, - xnn_define_static_transpose(subgraph, perm.size(), perm.data(), input_id, output_id, /*flags=*/0)); - ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, xnn_test_runtime_flags(), &runtime)); - ASSERT_NE(nullptr, runtime); - std::unique_ptr auto_runtime(runtime, xnn_delete_runtime); - std::array external = { - xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}}; - ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data())); - ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime)); + std::vector perm(rank); + std::iota(perm.begin(), perm.end(), 0); - ASSERT_EQ(subgraph_output, operator_output); + do { + // Define subgraph + xnnpack::SubgraphTester subgraph(2); + subgraph.AddInputTensor(rank, xnn_datatype_of(), 0) + .AddOutputTensor(rank, xnn_datatype_of(), 1) + .AddTranspose(perm, 0, 1) + .CreateRuntime(); + + for (int reshape = 0; reshape < 2; ++reshape) { + std::vector shape = random_shape(rng, rank); + + xnnpack::Tensor input(shape, xnnpack::PaddingBytes{XNN_EXTRA_BYTES}); + xnnpack::fill_uniform_random_bits(input.base(), input.size(), rng); + + xnnpack::Tensor transposed_input = input.transpose(perm).make_copy(); + xnnpack::Tensor output(transposed_input.extents()); + + // Run subgraph + subgraph.ReshapeExternalTensor(shape, input.base(), 0) + .ReshapeExternalTensor(output.extents(), output.base(), 1) + .ReshapeRuntime() + .SetupRuntime() + .InvokeRuntime(); + + // Verify results. + ASSERT_THAT(output, testing::ElementsAreArray(transposed_input)); + } + } while (std::next_permutation(perm.begin(), perm.end())); } -TEST_F(StaticTransposeTestF32, matches_operator_api) -{ - std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); }); - std::vector perm = RandomPermutation(dims, rng); - std::vector output_dims = PermuteInputDimensions(dims, perm); - - ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr)); - - // Call operator API. - xnn_operator_t op = nullptr; - const xnn_status status = xnn_create_transpose_nd_x32(/*flags=*/0, &op); - if (status == xnn_status_unsupported_hardware) { - GTEST_SKIP(); - } - - ASSERT_EQ(xnn_status_success, status); - ASSERT_NE(nullptr, op); - std::unique_ptr auto_op(op, xnn_delete_operator); +template +class Transpose : public ::testing::TestWithParam {}; - ASSERT_EQ( - xnn_status_success, - xnn_reshape_transpose_nd_x32(op, dims.size(), dims.data(), perm.data(), /*threadpool=*/nullptr)); - ASSERT_EQ(xnn_status_success, xnn_setup_transpose_nd_x32(op, input.data(), operator_output.data())); +using TransposeX8 = Transpose; +using TransposeX16 = Transpose; +using TransposeX32 = Transpose; - ASSERT_EQ(xnn_status_success, xnn_run_operator(op, /*threadpool=*/nullptr)); +TEST_P(TransposeX8, test) { TestAllPermutations(GetParam()); } +TEST_P(TransposeX16, test) { TestAllPermutations(GetParam()); } +TEST_P(TransposeX32, test) { TestAllPermutations(GetParam()); } - // Call subgraph API. - xnn_subgraph_t subgraph = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_subgraph(/*external_value_ids=*/2, /*flags=*/0, &subgraph)); - std::unique_ptr auto_subgraph(subgraph, xnn_delete_subgraph); - input_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp32, dims.size(), dims.data(), nullptr, /*external_id=*/0, - /*flags=*/XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id)); - ASSERT_NE(input_id, XNN_INVALID_NODE_ID); - - output_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp32, dims.size(), dims.data(), nullptr, /*external_id=*/1, - /*flags=*/XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id)); - ASSERT_NE(output_id, XNN_INVALID_NODE_ID); - - xnn_runtime_t runtime = nullptr; - ASSERT_EQ( - xnn_status_success, - xnn_define_static_transpose(subgraph, perm.size(), perm.data(), input_id, output_id, /*flags=*/0)); - ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, xnn_test_runtime_flags(), &runtime)); - ASSERT_NE(nullptr, runtime); - std::unique_ptr auto_runtime(runtime, xnn_delete_runtime); - std::array external = { - xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}}; - ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data())); - ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime)); - - ASSERT_EQ(subgraph_output, operator_output); -} +auto rank_params = testing::Range(1, XNN_MAX_TENSOR_DIMS); +INSTANTIATE_TEST_SUITE_P(Transpose, TransposeX8, rank_params); +INSTANTIATE_TEST_SUITE_P(Transpose, TransposeX16, rank_params); +INSTANTIATE_TEST_SUITE_P(Transpose, TransposeX32, rank_params); diff --git a/test/subgraph-tester.h b/test/subgraph-tester.h index 34e2ebcfc03..2eca49c5b8e 100644 --- a/test/subgraph-tester.h +++ b/test/subgraph-tester.h @@ -16,13 +16,15 @@ #include #include #include +#include #include #include #include "xnnpack.h" -#include "xnnpack/subgraph.h" #include "xnnpack/buffer.h" +#include "xnnpack/subgraph.h" #include "replicable_random_device.h" +#include "runtime-flags.h" namespace xnnpack { @@ -102,19 +104,37 @@ class SubgraphTester { return *this; } - inline SubgraphTester& AddDynamicTensorF32(const std::vector& dims, - uint32_t external_id, - uint32_t flags = 0) { + inline SubgraphTester& AddDynamicTensor(const std::vector& dims, + uint32_t external_id, + xnn_datatype datatype, + uint32_t flags = 0) { uint32_t id_out = 0; - const xnn_status status = - xnn_define_tensor_value(subgraph_.get(), xnn_datatype_fp32, dims.size(), - dims.data(), nullptr, external_id, flags, &id_out); + const xnn_status status = xnn_define_tensor_value( + subgraph_.get(), datatype, dims.size(), dims.data(), nullptr, + external_id, flags, &id_out); EXPECT_EQ(status, xnn_status_success); EXPECT_EQ(id_out, external_id); return *this; } + template + inline SubgraphTester& ReshapeExternalTensor(const std::vector& dims, + T* data, uint32_t external_id) { + const xnn_status status = xnn_reshape_external_value( + runtime_.get(), external_id, dims.size(), dims.data()); + EXPECT_EQ(status, xnn_status_success); + external_tensors_[external_id] = data; + + return *this; + } + + inline SubgraphTester& AddDynamicTensorF32(const std::vector& dims, + uint32_t external_id, + uint32_t flags = 0) { + return AddDynamicTensor(dims, external_id, xnn_datatype_fp32, flags); + } + inline SubgraphTester& AddStaticTensorF32(const std::vector& dims, uint32_t external_id, void *data, uint32_t flags = 0) { @@ -242,13 +262,34 @@ class SubgraphTester { return *this; } + template + SubgraphTester& AddInputTensor(const std::vector& dims, T* data, + uint32_t external_id) { + AddDynamicTensor(dims, external_id, xnn_datatype_of(), + XNN_VALUE_FLAG_EXTERNAL_INPUT); + auto it = external_tensors_.insert({external_id, data}); + EXPECT_TRUE(it.second); + return *this; + } + + SubgraphTester& AddInputTensor(size_t rank, xnn_datatype datatype, + uint32_t external_id) { + std::vector dims(rank); + AddDynamicTensor(dims, external_id, datatype, + XNN_VALUE_FLAG_EXTERNAL_INPUT); + auto it = external_tensors_.insert({external_id, nullptr}); + EXPECT_TRUE(it.second); + return *this; + } + SubgraphTester& AddInputTensorF32(const std::vector& dims, uint32_t external_id) { AddDynamicTensorF32(dims, external_id, XNN_VALUE_FLAG_EXTERNAL_INPUT); size_t num_elements = NumElements(dims); xnnpack::Buffer input(num_elements * sizeof(float) + XNN_EXTRA_BYTES * sizeof(char)); float* data = reinterpret_cast(input.data()); std::generate(data, data + num_elements, [&]() { return f32dist(rng_); }); - auto it = external_tensors_.insert({external_id, std::move(input)}); + auto it = external_tensors_.insert({external_id, data}); + buffers_[external_id] = std::move(input); EXPECT_TRUE(it.second); return *this; } @@ -259,7 +300,28 @@ class SubgraphTester { xnnpack::Buffer input(num_elements * sizeof(float) + XNN_EXTRA_BYTES * sizeof(char)); float* data = reinterpret_cast(input.data()); std::generate(data, data + num_elements, [&]() { return f32dist(rng_); }); - auto it = external_tensors_.insert({external_id, std::move(input)}); + auto it = external_tensors_.insert({external_id, data}); + buffers_[external_id] = std::move(input); + EXPECT_TRUE(it.second); + return *this; + } + + template + SubgraphTester& AddOutputTensor(const std::vector& dims, T* data, + uint32_t external_id) { + AddDynamicTensor(dims, external_id, xnn_datatype_of(), + XNN_VALUE_FLAG_EXTERNAL_OUTPUT); + auto it = external_tensors_.insert({external_id, data}); + EXPECT_TRUE(it.second); + return *this; + } + + SubgraphTester& AddOutputTensor(size_t rank, xnn_datatype datatype, + uint32_t external_id) { + std::vector dims(rank); + AddDynamicTensor(dims, external_id, datatype, + XNN_VALUE_FLAG_EXTERNAL_OUTPUT); + auto it = external_tensors_.insert({external_id, nullptr}); EXPECT_TRUE(it.second); return *this; } @@ -269,7 +331,8 @@ class SubgraphTester { AddDynamicTensorF32(dims, external_id, XNN_VALUE_FLAG_EXTERNAL_OUTPUT); size_t num_elements = NumElements(dims); xnnpack::Buffer output(num_elements * sizeof(float)); - auto it = external_tensors_.insert({external_id, std::move(output)}); + auto it = external_tensors_.insert({external_id, output.data()}); + buffers_[external_id] = std::move(output); EXPECT_TRUE(it.second); return *this; } @@ -305,6 +368,24 @@ class SubgraphTester { return *this; } + SubgraphTester& AddTranspose(const std::vector& perm, + uint32_t input_id, uint32_t output_id) { + const xnn_status status = + xnn_define_static_transpose(subgraph_.get(), perm.size(), perm.data(), input_id, output_id, 0 /* flags */); + EXPECT_EQ(status, xnn_status_success); + return *this; + } + + SubgraphTester& AddSlice(const std::vector& begins, + const std::vector& ends, + const std::vector& strides, + uint32_t input_id, uint32_t output_id) { + const xnn_status status = xnn_define_static_slice_v3( + subgraph_.get(), begins.size(), begins.data(), ends.data(), strides.data(), input_id, output_id, 0 /* flags */); + EXPECT_EQ(status, xnn_status_success); + return *this; + } + SubgraphTester& AddConvert(uint32_t input_id, uint32_t output_id) { const xnn_status status = xnn_define_unary( subgraph_.get(), xnn_unary_convert, /*params=*/nullptr, input_id, output_id, 0 /* flags */); @@ -579,6 +660,47 @@ class SubgraphTester { return *this; } + SubgraphTester& CreateRuntime(xnn_weights_cache_t weights_cache, + xnn_workspace_t workspace, + pthreadpool_t threadpool, uint32_t flags) { + EXPECT_EQ(runtime_, nullptr); + xnn_runtime_t runtime = nullptr; + const xnn_status status = xnn_create_runtime_v4( + subgraph_.get(), weights_cache, workspace, threadpool, flags, &runtime); + EXPECT_EQ(status, xnn_status_success); + runtime_.reset(runtime); + return *this; + } + + SubgraphTester& ReshapeRuntime() { + const xnn_status status = xnn_reshape_runtime(runtime_.get()); + EXPECT_EQ(status, xnn_status_success); + return *this; + } + + SubgraphTester& SetupRuntime() { + std::vector values; + values.reserve(external_tensors_.size()); + for (const std::pair i : external_tensors_) { + values.push_back({i.first, i.second}); + } + const xnn_status status = + xnn_setup_runtime_v2(runtime_.get(), values.size(), values.data()); + EXPECT_EQ(status, xnn_status_success); + return *this; + } + + SubgraphTester& CreateRuntime(pthreadpool_t threadpool = nullptr, + uint32_t flags = xnn_test_runtime_flags()) { + return CreateRuntime(nullptr, nullptr, threadpool, flags); + } + + SubgraphTester& InvokeRuntime() { + const xnn_status status = xnn_invoke_runtime(runtime_.get()); + EXPECT_EQ(status, xnn_status_success); + return *this; + } + xnn_layout_type GetLayout(uint32_t value_id) const { return subgraph_->values[value_id].layout; } @@ -603,8 +725,13 @@ class SubgraphTester { return subgraph_.get(); } + template + float* GetExternalTensorData(uint32_t external_id) { + return reinterpret_cast(external_tensors_[external_id]); + } + float* GetExternalTensorDataF32(uint32_t external_id) { - return reinterpret_cast(external_tensors_[external_id].data()); + return GetExternalTensorData(external_id); } static inline size_t NumElements(const std::vector& dims) { @@ -613,7 +740,10 @@ class SubgraphTester { protected: std::unique_ptr subgraph_{nullptr, xnn_delete_subgraph}; - std::unordered_map> external_tensors_; + std::unique_ptr runtime_{ + nullptr, xnn_delete_runtime}; + std::unordered_map> buffers_; + std::unordered_map external_tensors_; uint32_t output_id_; xnnpack::ReplicableRandomDevice rng_; std::uniform_real_distribution f32dist = std::uniform_real_distribution(-1.0f, +1.0f); diff --git a/test/transpose-reshape.cc b/test/transpose-reshape.cc deleted file mode 100644 index ebe2dcf71f9..00000000000 --- a/test/transpose-reshape.cc +++ /dev/null @@ -1,71 +0,0 @@ -// Copyright 2023 Google LLC -// -// This source code is licensed under the BSD-style license found in the -// LICENSE file in the root directory of this source tree. - -#include -#include -#include -#include -#include -#include - -#include -#include "xnnpack.h" -#include "xnnpack/node-type.h" -#include "xnnpack/subgraph.h" -#include "runtime-flags.h" - -TEST(TransposeTestF32, Reshape) -{ - ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr)); - - xnn_subgraph_t subgraph = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_subgraph(/*external_value_ids=*/3, /*flags=*/0, &subgraph)); - std::unique_ptr auto_subgraph(subgraph, xnn_delete_subgraph); - - std::vector dims{2, 3, 4}; - uint32_t input_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp32, dims.size(), dims.data(), nullptr, 0, - /*flags=*/XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id)); - ASSERT_NE(input_id, XNN_INVALID_NODE_ID); - - uint32_t output_id = XNN_INVALID_NODE_ID; - ASSERT_EQ( - xnn_status_success, xnn_define_tensor_value( - subgraph, xnn_datatype_fp32, dims.size(), dims.data(), nullptr, 1, - /*flags=*/XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id)); - ASSERT_NE(output_id, XNN_INVALID_NODE_ID); - - std::vector perm{2, 1, 0}; - ASSERT_EQ(xnn_status_success, xnn_define_static_transpose(subgraph, perm.size(), perm.data(), input_id, output_id, /*flags=*/0)); - - ASSERT_EQ(subgraph->num_nodes, 1); - struct xnn_node* node = &subgraph->nodes[0]; - ASSERT_EQ(node->type, xnn_node_type_static_transpose); - ASSERT_EQ(node->num_inputs, 1); - ASSERT_EQ(node->inputs[0], input_id); - ASSERT_EQ(node->num_outputs, 1); - ASSERT_EQ(node->outputs[0], output_id); - ASSERT_EQ(node->flags, 0); - - xnn_runtime_t runtime = nullptr; - ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, xnn_test_runtime_flags(), &runtime)); - ASSERT_NE(nullptr, runtime); - std::unique_ptr auto_runtime(runtime, xnn_delete_runtime); - - ASSERT_EQ(node->reshape(&runtime->opdata[0], subgraph->values, subgraph->num_values, /*threadpool=*/nullptr), xnn_status_success); - - dims[0] = 7; - ASSERT_EQ(xnn_status_success, xnn_reshape_external_value(runtime, 0, dims.size(), dims.data())); - - ASSERT_EQ(node->reshape(&runtime->opdata[0], runtime->values, runtime->num_values, /*threadpool=*/nullptr), xnn_status_reallocation_required); - const xnn_shape* output_shape = &runtime->values[node->outputs[0]].shape; - const size_t num_input_elements = std::accumulate(dims.cbegin(), dims.cend(), size_t{1}, std::multiplies()); - ASSERT_EQ(output_shape->dim[0], dims[perm[0]]); - ASSERT_EQ(output_shape->dim[1], dims[perm[1]]); - ASSERT_EQ(output_shape->dim[2], dims[perm[2]]); - ASSERT_EQ(runtime->values[node->outputs[0]].size, num_input_elements * sizeof(float)); -}