Skip to content

Commit

Permalink
Add Tensor<T> helper class for tests
Browse files Browse the repository at this point in the history
This CL does two things: first is adding `Tensor<T>`, a helper class for multi-dimensional arrays that are frequently used in our tests (but currently are duplicating extent/stride computations frequently). Second is modifying two tests (slice and transpose) to make some changes:
- Currently, subgraph tests compare subgraph to operator results. This changes tests to directly check the output, without running the operator code.
- Currently, subgraph tests run a single random variation, and getting good coverage requires running the test many times. This changes the subgraph tests to test cover many more permutations in a single run.
- Currently, subgraph tests dig into the internal implementation details of subgraphs (e.g. checking xnn_node_value state). This makes sense in some cases (e.g. fusion tests), but it is both hard to be certain that this covers real usage, and is brittle. IMO, tests should (as much as possible) attempt to verify the behavior is as expected via the APIs that are visible to the user of the thing they are testing. For the subgraph API, that means we should just make sure the subgraph works as expected.

This change required a few minor cleanups:
- `xnnpack::Buffer<T>` needs to be able to distinguish between "extra bytes" and real data.
- To test things like transpose, concat, slice, etc., I found it helpful to add plain `xnn_datatype_uintX` datatypes. I don't love the idea of adding these to the public API when they don't have a lot of use cases, but I decided this is better than hacking the tests to use whatever datatype is available, which could be complicated (e.g. we'd have to use fp16 or bfloat16 to test transpose of 16-bit elements).
- There is now some overlap between `RuntimeTester` and `SubgraphTester`. I think we should deprecate `RuntimeTester` and consolidate everything in `SubgraphTester`, because we can't return `RuntimeTester` from the base class `SubgraphTester` builder methods. This is a minor difficulty, but it also seems like the reason to separate them is minor too.

PiperOrigin-RevId: 727983393
  • Loading branch information
dsharletg authored and xnnpack-bot committed Feb 19, 2025
1 parent 9d3accb commit b543f3e
Show file tree
Hide file tree
Showing 15 changed files with 614 additions and 1,048 deletions.
7 changes: 6 additions & 1 deletion BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -561,7 +561,12 @@ xnnpack_cc_library(
xnnpack_cxx_library(
name = "buffer",
hdrs = ["src/xnnpack/buffer.h"],
deps = [":datatype"],
deps = [
":common",
":datatype",
":math",
":xnnpack_h",
],
)

xnnpack_cc_library(
Expand Down
1 change: 0 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1393,7 +1393,6 @@ IF(XNNPACK_BUILD_TESTS)
static-reshape
static-resize-bilinear-2d
static-transpose
transpose-reshape
unary
unpooling-2d)
FOREACH(TEST ${LIBRARY_SUBGRAPH_UNIT_TESTS})
Expand Down
4 changes: 4 additions & 0 deletions include/xnnpack.h
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,10 @@ enum xnn_datatype {
/// Packed quantized 8-bit unsigned integer with shared per-Value quantization
/// parameters.
xnn_datatype_pqint8 = 17,
/// Unsigned integer datatype without quantization parameters.
xnn_datatype_uint8 = 18,
xnn_datatype_uint16 = 19,
xnn_datatype_uint32 = 20,
};

/// Define a tensor-type Value and add it to a Subgraph.
Expand Down
15 changes: 15 additions & 0 deletions src/datatype.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ bool xnn_datatype_is_real(enum xnn_datatype t) {
switch (t) {
case xnn_datatype_invalid:
case xnn_datatype_int32:
case xnn_datatype_uint8:
case xnn_datatype_uint16:
case xnn_datatype_uint32:
return false;
case xnn_datatype_fp32:
case xnn_datatype_fp16:
Expand Down Expand Up @@ -55,6 +58,9 @@ bool xnn_datatype_is_integral(enum xnn_datatype t) {
case xnn_datatype_pfp32:
return false;
case xnn_datatype_int32:
case xnn_datatype_uint8:
case xnn_datatype_uint16:
case xnn_datatype_uint32:
return true;
}
XNN_UNREACHABLE;
Expand Down Expand Up @@ -82,6 +88,9 @@ bool xnn_datatype_is_quantized(enum xnn_datatype t) {
case xnn_datatype_int32:
case xnn_datatype_pfp16:
case xnn_datatype_pfp32:
case xnn_datatype_uint8:
case xnn_datatype_uint16:
case xnn_datatype_uint32:
return false;
}
XNN_UNREACHABLE;
Expand All @@ -103,16 +112,19 @@ size_t xnn_datatype_log2_size_bits(enum xnn_datatype t) {
case xnn_datatype_qdint8:
case xnn_datatype_qduint8:
case xnn_datatype_qpint8:
case xnn_datatype_uint8:
return 3;
case xnn_datatype_fp16:
case xnn_datatype_bf16:
case xnn_datatype_pfp16:
case xnn_datatype_uint16:
return 4;
case xnn_datatype_qint32:
case xnn_datatype_qcint32:
case xnn_datatype_int32:
case xnn_datatype_fp32:
case xnn_datatype_pfp32:
case xnn_datatype_uint32:
return 5;
}
XNN_UNREACHABLE;
Expand Down Expand Up @@ -154,6 +166,9 @@ bool xnn_datatype_is_byte_addressable(enum xnn_datatype t) {
case xnn_datatype_qduint8:
case xnn_datatype_int32:
case xnn_datatype_fp32:
case xnn_datatype_uint8:
case xnn_datatype_uint16:
case xnn_datatype_uint32:
return true;
}
XNN_UNREACHABLE;
Expand Down
6 changes: 6 additions & 0 deletions src/enums/datatype-strings.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,12 @@ const char* xnn_datatype_to_string(enum xnn_datatype type) {
return "INT32";
case xnn_datatype_qbint4:
return "QBINT4";
case xnn_datatype_uint8:
return "UINT8";
case xnn_datatype_uint16:
return "UINT16";
case xnn_datatype_uint32:
return "UINT32";
}
XNN_UNREACHABLE;
return NULL;
Expand Down
3 changes: 3 additions & 0 deletions src/tensor.c
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,9 @@ enum xnn_status xnn_define_tensor_value(
case xnn_datatype_int32:
case xnn_datatype_pfp16: // TODO: Does this really belong here?
case xnn_datatype_pfp32: // TODO: Does this really belong here?
case xnn_datatype_uint8:
case xnn_datatype_uint16:
case xnn_datatype_uint32:
break;
default:
xnn_log_error("failed to create Dense Tensor value: unsupported datatype %s (%d)",
Expand Down
212 changes: 210 additions & 2 deletions src/xnnpack/buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,15 @@
#define __XNNPACK_TEST_BUFFER_H_

#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <initializer_list>
#include <limits>
#include <memory>
#include <type_traits>
#include <vector>

#include "xnnpack.h"
#include "xnnpack/common.h"
Expand Down Expand Up @@ -46,6 +49,10 @@ class NumericLimits<quantized<T>> {
}
};

struct PaddingBytes {
size_t value;
};

// This is a container similar to std::vector, but it leaves the memory
// uninitialized, supports alignment.
// TODO: It would be good if this also managed padding in a way that allowed
Expand Down Expand Up @@ -99,8 +106,10 @@ class Buffer {
using const_iterator = const T*;

Buffer() : data_(nullptr), size_(0) {}
explicit Buffer(size_t size)
: data_(reinterpret_cast<T*>(allocate(size * sizeof(T)))), size_(size) {}
explicit Buffer(size_t size, PaddingBytes extra_bytes = {0})
: data_(reinterpret_cast<T*>(
allocate(size * sizeof(T) + extra_bytes.value))),
size_(size) {}
Buffer(size_t size, T value) : Buffer(size) {
std::fill(begin(), end(), value);
}
Expand Down Expand Up @@ -165,6 +174,205 @@ void fill_uniform_random_bits(T* data, size_t size, Rng& rng) {
}
}

// Returns {x[i] for i in perm}
template <typename T>
std::vector<T> permute(const std::vector<size_t>& perm,
const std::vector<T>& x) {
std::vector<T> result(perm.size());
for (size_t i = 0; i < perm.size(); ++i) {
result[i] = x[perm[i]];
}
return result;
}

// This stores a multi-dimensional array in a Buffer<T, Alignment> object
// (above). The sizes of dimensions are `extent`s, the distance between elements
// in a dimension in memory are `stride`s.
// This buffer holds an std::shared_ptr to the underlying Buffer<T, Alignment>
// objects, i.e. copies are shallow.
template <typename T, size_t Alignment = alignof(T)>
class Tensor {
public:
using value_type = T;
using iterator = typename xnnpack::Buffer<T>::iterator;
using const_iterator = typename xnnpack::Buffer<T>::const_iterator;

using index_type = std::vector<size_t>;

Tensor() = default;
Tensor(const Tensor& other) = default;
Tensor(Tensor&& other) = default;
// Constructs an array with strides in descending order, with no
// padding/alignment between dimensions.
explicit Tensor(index_type extents, PaddingBytes extra_bytes = {0})
: extents_(std::move(extents)), strides_(extents_.size()) {
size_t stride = 1;
for (size_t i = rank(); i > 0; --i) {
strides_[i - 1] = stride;
stride *= extents_[i - 1];
}
data_ = std::make_shared<Buffer<T, Alignment>>(stride, extra_bytes);
begin_ = data_->begin();
end_ = data_->end();
}
Tensor& operator=(const Tensor& other) = default;
Tensor& operator=(Tensor&& other) = default;

// Returns true if every stride is the product of the following extents, i.e.
// the buffer can be interpreted as a flat array without considering the
// strides.
bool is_contiguous() const {
size_t stride = 1;
for (size_t i = rank(); i > 0; --i) {
if (strides_[i - 1] != stride) {
return false;
}
stride *= extents_[i - 1];
}
return true;
}

const index_type& extents() const { return extents_; }
const index_type& strides() const { return strides_; }
size_t extent(size_t dim) const { return extents_[dim]; }
size_t stride(size_t dim) const { return strides_[dim]; }

size_t rank() const { return extents_.size(); }
bool empty() const { return begin_ >= end_; }

T* base() { return begin_; }
const T* base() const { return begin_; }

// Form a reference to an element at a particular index.
T& operator()(const index_type& indices) {
return *(begin_ + flat_offset(indices));
}
const T& operator()(const index_type& indices) const {
return *(begin_ + flat_offset(indices));
}

template <typename... Args>
T& operator()(Args... args) {
return operator()(index_type{args...});
}
template <typename... Args>
const T& operator()(Args... args) const {
return operator()(index_type{args...});
}

// The following functions can only be used if `is_contiguous` is true.
T* data() {
assert(is_contiguous());
return begin_;
}
const T* data() const {
assert(is_contiguous());
return begin_;
}
size_t size() const {
assert(is_contiguous());
return data_->size();
}
T* begin() { return data(); }
T* end() { return end_; }
const T* begin() const { return data(); }
const T* end() const { return end_; }
const T* cbegin() const { return data(); }
const T* cend() const { return end_; }
T& operator[](size_t index) { return data()[index]; }
const T& operator[](size_t index) const { return data()[index]; }

// This does not actually transpose any data in memory, it just changes the
// strides. To implement the transpose in memory, use this, followed by
// `make_copy` below.
Tensor<T, Alignment> transpose(std::vector<size_t> perm) const {
Tensor<T, Alignment> result(*this);
result.extents_ = permute(perm, extents_);
result.strides_ = permute(perm, strides_);
return result;
}

// This uses the same rules for indexing as numpy, i.e. negative numbers are
// offset are added to the extents.
Tensor<T, Alignment> slice(std::vector<int64_t> begins,
std::vector<int64_t> ends) const {
assert(rank() == begins.size());
assert(rank() == ends.size());

Tensor<T, Alignment> result(*this);
std::vector<size_t> offsets(rank());
for (size_t i = 0; i < rank(); ++i) {
offsets[i] = begins[i] < 0 ? extents_[i] + begins[i] : begins[i];
result.extents_[i] =
(ends[i] <= 0 ? extents_[i] + ends[i] : ends[i]) - offsets[i];
}

result.begin_ = begin_ + flat_offset(offsets);
result.end_ = result.begin_ + result.flat_offset(result.extents_);

return result;
}

// Make a copy of the buffer. The result will be contiguous, i.e. the strides
// of this buffer are lost when copying.
Tensor<T, Alignment> deep_copy(PaddingBytes extra_bytes = {0}) const {
Tensor<T, Alignment> result(extents_, extra_bytes);
copy_impl(rank(), extents_.data(), strides_.data(), base(),
result.strides_.data(), result.base());
return result;
}

private:
static void copy_impl(size_t rank, const size_t* extents,
const size_t* src_strides, const T* src,
const size_t* dst_strides, T* dst) {
if (rank == 0) {
*dst = *src;
return;
} else {
--rank;
size_t extent = *extents++;
size_t src_stride = *src_strides++;
size_t dst_stride = *dst_strides++;
if (rank == 0 && src_stride == 1 && dst_stride == 1) {
std::copy_n(src, extent, dst);
} else {
for (size_t i = 0; i < extent; ++i) {
copy_impl(rank, extents, src_strides, src, dst_strides, dst);
src += src_stride;
dst += dst_stride;
}
}
}
}

// Compute the offset of an index from the pointer to element 0.
size_t flat_offset(const index_type& indices) const {
assert(indices.size() == rank());
size_t result = 0;
for (size_t i = 0; i < rank(); ++i) {
result += strides_[i] * indices[i];
}
return result;
}

index_type extents_;
index_type strides_;
std::shared_ptr<xnnpack::Buffer<T, Alignment>> data_;
T* begin_ = nullptr;
T* end_ = nullptr;
};

template <typename Rng>
std::vector<size_t> random_shape(Rng& rng, size_t rank, size_t min_dim = 1,
size_t max_dim = 9) {
std::vector<size_t> shape(rank);
for (size_t i = 0; i < rank; ++i) {
shape[i] = (rng() % (max_dim - min_dim + 1)) + min_dim;
}
return shape;
}

}; // namespace xnnpack

#endif // __XNNPACK_TEST_BUFFER_H_
6 changes: 6 additions & 0 deletions src/xnnpack/datatype.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,12 @@ xnn_datatype xnn_datatype_of() {
return xnn_datatype_fp32;
} else if (std::is_same<T, int32_t>::value) {
return xnn_datatype_int32;
} else if (std::is_same<T, uint8_t>::value) {
return xnn_datatype_uint8;
} else if (std::is_same<T, uint16_t>::value) {
return xnn_datatype_uint16;
} else if (std::is_same<T, uint32_t>::value) {
return xnn_datatype_uint32;
} else {
return xnn_datatype_invalid;
}
Expand Down
Loading

0 comments on commit b543f3e

Please sign in to comment.