Skip to content

Add vectorized_math.h #11204

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .lintrunner.toml
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,8 @@ exclude_patterns = [
'examples/**',
'exir/verification/bindings.cpp',
'extension/**',
# Uses properly-gated (ET_USE_PYTORCH_HEADERS) ATen include.
'kernels/portable/cpu/util/vectorized_math.h',
'kernels/optimized/**',
'runtime/core/exec_aten/**',
# Want to be able to keep c10 in sync with PyTorch core.
Expand Down
10 changes: 10 additions & 0 deletions kernels/portable/cpu/util/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,16 @@ def define_common_targets():
],
)

runtime.cxx_library(
name = "vectorized_math",
exported_headers = ["vectorized_math.h"],
visibility = ["//executorch/..."],
exported_deps = [
"//executorch/runtime/core/portable_type:portable_type",
"//executorch/runtime/core/exec_aten/util:scalar_type_util",
],
)

# Utility functions that can be used by operators that perform reduction
for aten_mode in get_aten_mode_options():
suffix = "_aten" if aten_mode else ""
Expand Down
16 changes: 6 additions & 10 deletions kernels/portable/cpu/util/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,26 +4,22 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

# @generated by test/utils/generate_gtest_cmakelists.py
#
# This file should be formatted with
# ~~~
# cmake-format -i CMakeLists.txt
# ~~~
# It should also be cmake-lint clean.
#

cmake_minimum_required(VERSION 3.19)

set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../../../..)

include(${EXECUTORCH_ROOT}/tools/cmake/Test.cmake)
include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake)

set(_test_srcs broadcast_indexes_range_test.cpp broadcast_test.cpp
reduce_test.cpp
reduce_test.cpp vectorized_math_test.cpp
)

et_cxx_test(
kernels_portable_cpu_util_test SOURCES ${_test_srcs} EXTRA_LIBS
portable_kernels portable_ops_lib
)

find_package_torch_headers()
target_include_directories(kernels_portable_cpu_util_test PRIVATE ${TORCH_INCLUDE_DIRS})
target_compile_definitions(kernels_portable_cpu_util_test PRIVATE ET_USE_PYTORCH_HEADERS)
11 changes: 11 additions & 0 deletions kernels/portable/cpu/util/test/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,14 @@ def define_common_targets():
"//executorch/kernels/portable/cpu/util:reduce_util",
],
)

# this test requires ET_USE_PYTORCH_HEADERS, which doesn't work in OSS Buck.
if not runtime.is_oss:
runtime.cxx_test(
name = "vectorized_math_test",
srcs = ["vectorized_math_test.cpp"],
deps = [
"//executorch/kernels/portable/cpu/util:vectorized_math",
"//executorch/runtime/core/portable_type/c10/c10:c10",
],
)
95 changes: 95 additions & 0 deletions kernels/portable/cpu/util/test/vectorized_math_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#include <executorch/kernels/portable/cpu/util/vectorized_math.h>

#include <c10/util/irange.h>

#include <gtest/gtest.h>

#include <cstdint>

#ifndef ET_USE_PYTORCH_HEADERS
#error "This test requires ET_USE_PYTORCH_HEADERS!"
#endif // ET_USE_PYTORCH_HEADERS

TEST(VectorizedMathTest, BasicUnary) {
__at_align__ float result_floats[at::vec::Vectorized<float>::size()] = {0};
const auto x_vec = at::vec::Vectorized<float>::arange(0, 1);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why are all these tests using arange(0, 1), i.e. a single number instead of an actual vector of numbers?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's not a single number; the arguments are start and step, not start and end. https://github.com/pytorch/pytorch/blob/main/aten/src/ATen/cpu/vec/vec_base.h#L255

const auto result_vec = executorch::math::exp(x_vec);
result_vec.store(result_floats);
for (const auto ii : c10::irange(at::vec::Vectorized<float>::size())) {
EXPECT_FLOAT_EQ(result_floats[ii], std::exp(ii));
}
}

namespace {
template <typename T>
void test_unary_t_to_float() {
__at_align__ float result_floats[at::vec::Vectorized<T>::size()] = {0};
const auto x_vec = at::vec::Vectorized<T>::arange(0, 1);
const auto result_vec = executorch::math::exp(x_vec);
static_assert(decltype(result_vec)::size() >= at::vec::Vectorized<T>::size());
result_vec.store(result_floats, at::vec::Vectorized<T>::size());
for (const auto ii : c10::irange(at::vec::Vectorized<T>::size())) {
EXPECT_EQ(result_floats[ii], std::exp((float)ii)) << ii;
}
}

} // namespace

TEST(VectorizedMathTest, UnaryInt16ToFloat) {
test_unary_t_to_float<std::uint16_t>();
}

TEST(VectorizedMathTest, UnaryInt32ToFloat) {
test_unary_t_to_float<std::uint32_t>();
}

TEST(VectorizedMathTest, UnaryInt64ToFloat) {
test_unary_t_to_float<std::uint64_t>();
}

TEST(VectorizedMathTest, BasicBinary) {
__at_align__ float result_floats[at::vec::Vectorized<float>::size()] = {0};
const auto x_vec = at::vec::Vectorized<float>::arange(0, 1);
const auto y_vec = at::vec::Vectorized<float>(2);
const auto result_vec = executorch::math::pow(x_vec, y_vec);
result_vec.store(result_floats);
for (const auto ii : c10::irange(at::vec::Vectorized<float>::size())) {
EXPECT_FLOAT_EQ(result_floats[ii], std::pow((float)ii, 2.0f));
}
}

namespace {
template <typename T>
void test_binary_t_to_float() {
__at_align__ float result_floats[at::vec::Vectorized<T>::size()] = {0};
const auto x_vec = at::vec::Vectorized<T>::arange(0, 1);
const auto y_vec = at::vec::Vectorized<T>(2);
const auto result_vec = executorch::math::pow(x_vec, y_vec);
static_assert(decltype(result_vec)::size() >= at::vec::Vectorized<T>::size());
result_vec.store(result_floats, at::vec::Vectorized<T>::size());
for (const auto ii : c10::irange(at::vec::Vectorized<T>::size())) {
EXPECT_EQ(result_floats[ii], std::pow((float)ii, 2.0f)) << ii;
}
}

TEST(VectorizedMathTest, BinaryInt16ToFloat) {
test_binary_t_to_float<std::int16_t>();
}

TEST(VectorizedMathTest, BinaryInt32ToFloat) {
test_binary_t_to_float<std::int32_t>();
}

TEST(VectorizedMathTest, BinaryInt64ToFloat) {
test_binary_t_to_float<std::uint64_t>();
}

} // namespace
148 changes: 148 additions & 0 deletions kernels/portable/cpu/util/vectorized_math.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#pragma once

#include <executorch/runtime/core/exec_aten/util/scalar_type_util.h>

#ifdef ET_USE_PYTORCH_HEADERS
#include <ATen/cpu/vec/vec.h>
#endif // ET_USE_PYTORCH_HEADERS

#include <iostream>
#include <type_traits>

#ifdef ET_USE_PYTORCH_HEADERS
namespace executorch {
inline namespace math {
namespace internal {
template <typename T>
auto convert_to_vectorized_n_of_float(at::vec::Vectorized<T> vec) {
static constexpr auto float_vec_size = at::vec::Vectorized<float>::size();
static constexpr auto t_vec_size = at::vec::Vectorized<T>::size();
static constexpr auto result_size =
t_vec_size < float_vec_size ? 1 : t_vec_size / float_vec_size;
static_assert(result_size >= 1);
return at::vec::convert<float, result_size, T, 1, /*keep=*/true>(
at::vec::VectorizedN<T, 1>(vec));
}
} // namespace internal
} // namespace math
} // namespace executorch
#endif // ET_USE_PYTORCH_HEADERS

#define _ET_INTERNAL_STD_MATH_FUNC(name) \
namespace executorch { \
inline namespace math { \
using std::name; \
} \
} // namespace executorch

#ifdef ET_USE_PYTORCH_HEADERS
/**
* Internal-usage macro for making a vectorized variant of a unary
* function available in the executorch::math namespace.
*/
#define ET_INTERNAL_VECTORIZED_FLOAT_UNARY_FUNC(func_name) \
namespace executorch { \
inline namespace math { \
template <typename T> \
auto func_name(at::vec::Vectorized<T> vec) { \
if constexpr (!::executorch::runtime::is_floating_point<T>::value) { \
return internal::convert_to_vectorized_n_of_float(vec).func_name(); \
} else { \
return vec.func_name(); \
} \
} \
} \
}

#define ET_INTERNAL_VECTORIZED_FLOAT_BINARY_FUNC(func_name) \
namespace executorch { \
inline namespace math { \
template <typename T> \
auto func_name(at::vec::Vectorized<T> vec0, at::vec::Vectorized<T> vec1) { \
if constexpr (!::executorch::runtime::is_floating_point<T>::value) { \
const auto vec_float0 = \
internal::convert_to_vectorized_n_of_float(vec0); \
const auto vec_float1 = \
internal::convert_to_vectorized_n_of_float(vec1); \
return vec_float0.func_name(vec_float1); \
} else { \
return vec0.func_name(vec1); \
} \
} \
} \
}

/**
* Internal-usage macro for making a C++ standard library
* floating-point function and a vectorized variant of it available in
* the c10::math namespace. Should be used with functions where the
* corresponding operator is a "float op" in TensorIterator parlance
* (i.e., uses something like build_borrowing_binary_float_op()),
* because it converts non-floating-point arguments to floating point.
*/
#define ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(func_name) \
_ET_INTERNAL_STD_MATH_FUNC(func_name) \
ET_INTERNAL_VECTORIZED_FLOAT_UNARY_FUNC(func_name)

#define ET_INTERNAL_VECTORIZED_STD_FLOAT_BINARY_FUNC(func_name) \
_ET_INTERNAL_STD_MATH_FUNC(func_name) \
ET_INTERNAL_VECTORIZED_FLOAT_BINARY_FUNC(func_name)

#else // ET_USE_PYTORCH_HEADERS
#define ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(name) \
_ET_INTERNAL_STD_MATH_FUNC(name)
#define ET_INTERNAL_VECTORIZED_STD_FLOAT_BINARY_FUNC(name) \
_ET_INTERNAL_STD_MATH_FUNC(name)
#endif // ET_USE_PYTORCH_HEADERS

// To simplify client code, we provide coverage for a bunch of float ops (the
// same ones listed in ATen vml.h) here.
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(abs)
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(acos)
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(asin)
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(atan)
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(ceil)
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(cos)
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(cosh)
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(erf)
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(erfc)
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(exp)
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(expm1)
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(floor)
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(log)
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(log10)
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(log1p)
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(log2)
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(sin)
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(sinh)
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(sqrt)
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(round)
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(tan)
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(tanh)
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(trunc)
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(lgamma)

#ifdef ET_USE_PYTORCH_HEADERS
ET_INTERNAL_VECTORIZED_FLOAT_BINARY_FUNC(rsqrt)
#endif // ET_USE_PYTORCH_HEADERS

namespace executorch {
inline namespace math {
template <typename T, std::enable_if_t<std::is_floating_point_v<T>>>
T rsqrt(T x) {
return T(1) / std::sqrt(x);
}
} // namespace math
} // namespace executorch

ET_INTERNAL_VECTORIZED_STD_FLOAT_BINARY_FUNC(atan2)
ET_INTERNAL_VECTORIZED_STD_FLOAT_BINARY_FUNC(fmod)
ET_INTERNAL_VECTORIZED_STD_FLOAT_BINARY_FUNC(pow)
6 changes: 5 additions & 1 deletion runtime/core/portable_type/c10/c10/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,11 @@ def define_common_targets():
runtime.cxx_library(
name = "aten_headers_for_executorch",
srcs = [],
visibility = ["//executorch/kernels/optimized/...", "@EXECUTORCH_CLIENTS"],
visibility = [
"//executorch/kernels/optimized/...",
"//executorch/kernels/portable/cpu/util/...",
"@EXECUTORCH_CLIENTS",
],
exported_deps = select({
"DEFAULT": [],
"ovr_config//cpu:arm64": [
Expand Down
1 change: 1 addition & 0 deletions runtime/core/portable_type/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def define_common_targets():
visibility = [
"//executorch/backends/...",
"//executorch/extension/fb/dynamic_shim/...",
"//executorch/kernels/portable/cpu/...",
"//executorch/runtime/core/exec_aten/...",
"//executorch/runtime/core/portable_type/test/...",
],
Expand Down
12 changes: 0 additions & 12 deletions test/utils/OSSTestConfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -68,18 +68,6 @@
"extension_threadpool"
]
},
{
"directory": "kernels/portable/cpu/util/test",
"sources": [
"broadcast_indexes_range_test.cpp",
"broadcast_test.cpp",
"reduce_test.cpp"
],
"additional_libs": [
"portable_kernels",
"portable_ops_lib"
]
},
{
"directory": "runtime/core/portable_type/test",
"sources": [
Expand Down
Loading