Skip to content

Commit

Permalink
Merge pull request #1396 from j2kun:caratheodory-fejer
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 726916132
  • Loading branch information
copybara-github committed Feb 14, 2025
2 parents fca1e70 + 5333eae commit a381495
Show file tree
Hide file tree
Showing 7 changed files with 305 additions and 15 deletions.
24 changes: 24 additions & 0 deletions lib/Utils/Approximation/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,27 @@ cc_test(
"@llvm-project//mlir:Support",
],
)

cc_library(
name = "CaratheodoryFejer",
srcs = ["CaratheodoryFejer.cpp"],
hdrs = ["CaratheodoryFejer.h"],
deps = [
":Chebyshev",
"@eigen//:eigen3",
"@heir//lib/Utils/Polynomial",
"@llvm-project//llvm:Support",
],
)

cc_test(
name = "CaratheodoryFejerTest",
srcs = ["CaratheodoryFejerTest.cpp"],
deps = [
":CaratheodoryFejer",
"@googletest//:gtest_main",
"@heir//lib/Utils/Polynomial",
"@llvm-project//llvm:Support",
"@llvm-project//mlir:Support",
],
)
126 changes: 126 additions & 0 deletions lib/Utils/Approximation/CaratheodoryFejer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
#include "lib/Utils/Approximation/CaratheodoryFejer.h"

#include <cassert>
#include <cstdint>
#include <cstdlib>
#include <functional>
#include <optional>

#include "Eigen/Core" // from @eigen
#include "Eigen/Dense" // from @eigen
#include "Eigen/Eigenvalues" // from @eigen
#include "lib/Utils/Approximation/Chebyshev.h"
#include "lib/Utils/Polynomial/Polynomial.h"
#include "llvm/include/llvm/ADT/APFloat.h" // from @llvm-project
#include "llvm/include/llvm/ADT/SmallVector.h" // from @llvm-project

namespace mlir {
namespace heir {
namespace approximation {

using ::Eigen::MatrixXd;
using ::Eigen::SelfAdjointEigenSolver;
using ::Eigen::VectorXd;
using ::llvm::APFloat;
using ::llvm::SmallVector;
using ::mlir::heir::polynomial::FloatPolynomial;

FloatPolynomial caratheodoryFejerApproximation(
const std::function<APFloat(APFloat)> &func, int32_t degree,
std::optional<int32_t> chebDegree) {
// Construct the Chebyshev interpolant.
SmallVector<APFloat> chebPts, chebEvalPts, chebCoeffs;
int32_t actualChebDegree = chebDegree.value_or(2 * degree);
int32_t numChebPts = 1 + actualChebDegree;
assert(numChebPts >= 2 * degree + 1 &&
"Chebyshev degree must be at least twice the CF degree plus 1");
chebPts.reserve(numChebPts);
getChebyshevPoints(numChebPts, chebPts);
for (auto &pt : chebPts) {
chebEvalPts.push_back(func(pt));
}
interpolateChebyshev(chebEvalPts, chebCoeffs);

// Use the tail coefficients to construct a Hankel matrix
// where A[i, j] = c[i+j]
// Cf. https://en.wikipedia.org/wiki/Hankel_matrix
SmallVector<APFloat> tailChebCoeffs(chebCoeffs.begin() + (degree + 1),
chebCoeffs.end());
int32_t hankelSize = tailChebCoeffs.size();
MatrixXd hankel(hankelSize, hankelSize);
for (int i = 0; i < hankelSize; ++i) {
for (int j = 0; j < hankelSize; ++j) {
// upper left triangular region, including diagonal
if (i + j < hankelSize)
hankel(i, j) = tailChebCoeffs[i + j].convertToDouble();
else
hankel(i, j) = 0;
}
}

// Compute the eigenvalues and eigenvectors of the Hankel matrix
SelfAdjointEigenSolver<MatrixXd> solver(hankel);

const VectorXd &eigenvalues = solver.eigenvalues();
// Eigenvectors are columns of the matrix.
const MatrixXd &eigenvectors = solver.eigenvectors();

// Extract the eigenvector for the (absolute value) largest eigenvalue.
int32_t maxIndex = 0;
double maxEigenvalue = eigenvalues(0);
for (int32_t i = 1; i < eigenvalues.size(); ++i) {
if (std::abs(eigenvalues(i)) > maxEigenvalue) {
maxEigenvalue = std::abs(eigenvalues(i));
maxIndex = i;
}
}
VectorXd maxEigenvector = eigenvectors.col(maxIndex);

// A debug for comparing the eigenvalue solver with the reference
// implementation.
// std::cout << "Max eigenvector:" << std::endl;
// for (int32_t i = 0; i < maxEigenvector.size(); ++i) {
// std::cout << std::setprecision(18) << maxEigenvector(i) << ", ";
// }
// std::cout << std::endl;

double v1 = maxEigenvector(0);
VectorXd vv = maxEigenvector.tail(maxEigenvector.size() - 1);

SmallVector<APFloat> b =
SmallVector<APFloat>(tailChebCoeffs.begin(), tailChebCoeffs.end());

int32_t t = actualChebDegree - degree - 1;
for (int32_t i = degree; i > -degree - 1; --i) {
SmallVector<APFloat> sliceB(b.begin(), b.begin() + t);

APFloat sum = APFloat(0.0);
for (int32_t j = 0; j < sliceB.size(); ++j) {
double vvVal = vv(j);
sum = sum + sliceB[j] * APFloat(vvVal);
}

APFloat z = -sum / APFloat(v1);

// I suspect this insert is slow. Once it's working we can optimize this
// loop to avoid the insert.
b.insert(b.begin(), z);
}

SmallVector<APFloat> bb(b.begin() + degree, b.begin() + (2 * degree + 1));
for (int32_t i = 1; i < bb.size(); ++i) {
bb[i] = bb[i] + b[degree - 1 - (i - 1)];
}

SmallVector<APFloat> pk;
pk.reserve(bb.size());
for (int32_t i = 0; i < bb.size(); ++i) {
pk.push_back(chebCoeffs[i] - bb[i]);
}

return chebyshevToMonomial(pk);
}

} // namespace approximation
} // namespace heir
} // namespace mlir
39 changes: 39 additions & 0 deletions lib/Utils/Approximation/CaratheodoryFejer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#ifndef LIB_UTILS_APPROXIMATION_CARATHEODORYFEJER_H_
#define LIB_UTILS_APPROXIMATION_CARATHEODORYFEJER_H_

#include <cstdint>
#include <functional>
#include <optional>

#include "lib/Utils/Polynomial/Polynomial.h"
#include "llvm/include/llvm/ADT/APFloat.h" // from @llvm-project

namespace mlir {
namespace heir {
namespace approximation {

/// Construct the Caratheodory-Fejer approximation of a given function. The
/// result polynomial is represented in the monomial basis and has degree
/// `degree`.
///
/// A port of chebfun's cf.m, cf.
/// https://github.com/chebfun/chebfun/blob/69c12cf75f93cb2f36fd4cfd5e287662cd2f1091/%40chebfun/cf.m
/// Specifically, the path through `cf` that invokes `polynomialCF`
/// (rational functions are not supported here).
///
/// Cf. https://doi.org/10.1007/s10543-011-0331-7 for a mathematical
/// explanation.
/// https://people.maths.ox.ac.uk/trefethen/publication/PDF/2011_138.pdf
///
/// The argument chebDegree provides control over the degree of the Chebyshev
/// interpolation used to seed the CF approximation. If unset, it
/// is heuristically chosen as twice the desired CF approximant degree.
::mlir::heir::polynomial::FloatPolynomial caratheodoryFejerApproximation(
const std::function<::llvm::APFloat(::llvm::APFloat)> &func, int32_t degree,
std::optional<int32_t> chebDegree = std::nullopt);

} // namespace approximation
} // namespace heir
} // namespace mlir

#endif // LIB_UTILS_APPROXIMATION_CARATHEODORYFEJER_H_
101 changes: 101 additions & 0 deletions lib/Utils/Approximation/CaratheodoryFejerTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
#include <cmath>

#include "gmock/gmock.h" // from @googletest
#include "gtest/gtest.h" // from @googletest
#include "lib/Utils/Approximation/CaratheodoryFejer.h"
#include "lib/Utils/Polynomial/Polynomial.h"
#include "llvm/include/llvm/ADT/APFloat.h" // from @llvm-project
#include "mlir/include/mlir/Support/LLVM.h" // from @llvm-project

#define EPSILON 1e-11

namespace mlir {
namespace heir {
namespace approximation {
namespace {

using ::llvm::APFloat;
using ::mlir::heir::polynomial::FloatPolynomial;
using ::testing::DoubleNear;

TEST(CaratheodoryFejerTest, ApproximateExpDegree3) {
auto func = [](const APFloat& x) {
return APFloat(std::exp(x.convertToDouble()));
};
FloatPolynomial actual = caratheodoryFejerApproximation(func, 3);
// Values taken from reference impl are exact.
FloatPolynomial expected = FloatPolynomial::fromCoefficients(
{0.9945811640427066, 0.9956553725361579, 0.5429702814725632,
0.1795458211087378});
EXPECT_EQ(actual, expected);
}

TEST(CaratheodoryFejerTest, ApproximateReluDegree14) {
auto relu = [](const APFloat& x) {
APFloat zero = APFloat::getZero(x.getSemantics());
return x > zero ? x : zero;
};
FloatPolynomial actual = caratheodoryFejerApproximation(relu, 14);

// The reference implementation prints coefficients that are ~1e-12 away from
// our implementation, mainly because the eigenvalue solver details are
// slightly different. For instance, the max eigenvalue in our implementation
// is
//
//
// -0.415033778742867843
// 0.41503377874286651
// 0.358041674766206519
// -0.358041674766206408
// -0.302283813201297547
// 0.302283813201297602
// 0.244610415109838275
// -0.244610415109838636
// -0.182890410854375879
// 0.182890410854376101
// 0.115325658064263425
// -0.115325658064263148
// -0.0399306015339729384
// 0.0399306015339727718
//
// But in the reference implementation, the basis elements are permuted so
// that the adjacent positive values and negative values are swapped. This is
// still a valid eigenvector, but it results in slight difference in floating
// point error accumulation as the rest of the algorithm proceeds.
auto terms = actual.getTerms();
EXPECT_THAT(terms[0].getCoefficient().convertToDouble(),
DoubleNear(0.010384627976349288, EPSILON));
EXPECT_THAT(terms[1].getCoefficient().convertToDouble(),
DoubleNear(0.4999999999999994, EPSILON));
EXPECT_THAT(terms[2].getCoefficient().convertToDouble(),
DoubleNear(3.227328667600437, EPSILON));
EXPECT_THAT(terms[3].getCoefficient().convertToDouble(),
DoubleNear(2.1564570993799688e-14, EPSILON));
EXPECT_THAT(terms[4].getCoefficient().convertToDouble(),
DoubleNear(-27.86732536231614, EPSILON));
EXPECT_THAT(terms[5].getCoefficient().convertToDouble(),
DoubleNear(-1.965772591254676e-13, EPSILON));
EXPECT_THAT(terms[6].getCoefficient().convertToDouble(),
DoubleNear(139.12944753041404, EPSILON));
EXPECT_THAT(terms[7].getCoefficient().convertToDouble(),
DoubleNear(7.496488571843804e-13, EPSILON));
EXPECT_THAT(terms[8].getCoefficient().convertToDouble(),
DoubleNear(-363.6062351528312, EPSILON));
EXPECT_THAT(terms[9].getCoefficient().convertToDouble(),
DoubleNear(-1.3773783921527744e-12, EPSILON));
EXPECT_THAT(terms[10].getCoefficient().convertToDouble(),
DoubleNear(505.9489721657369, EPSILON));
EXPECT_THAT(terms[11].getCoefficient().convertToDouble(),
DoubleNear(1.2076732984649801e-12, EPSILON));
EXPECT_THAT(terms[12].getCoefficient().convertToDouble(),
DoubleNear(-355.4120699445272, EPSILON));
EXPECT_THAT(terms[13].getCoefficient().convertToDouble(),
DoubleNear(-4.050490139246503e-13, EPSILON));
EXPECT_THAT(terms[14].getCoefficient().convertToDouble(),
DoubleNear(99.07988219049058, EPSILON));
}

} // namespace
} // namespace approximation
} // namespace heir
} // namespace mlir
4 changes: 2 additions & 2 deletions lib/Utils/Approximation/Chebyshev.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ void getChebyshevPolynomials(int64_t numPolynomials,
results.push_back(FloatPolynomial::fromCoefficients({1.}));
}
if (numPolynomials >= 2) {
// 2x
results.push_back(FloatPolynomial::fromCoefficients({0., 2.}));
// x
results.push_back(FloatPolynomial::fromCoefficients({0., 1.}));
}

if (numPolynomials <= 2) return;
Expand Down
4 changes: 2 additions & 2 deletions lib/Utils/Approximation/Chebyshev.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@ namespace approximation {
void getChebyshevPoints(int64_t numPoints,
::llvm::SmallVector<::llvm::APFloat> &results);

/// Generate the first `numPolynomials` Chebyshev polynomials of the second
/// Generate the first `numPolynomials` Chebyshev polynomials of the first
/// kind, storing them in the results outparameter.
///
/// The first few polynomials are 1, 2x, 4x^2 - 1, 8x^3 - 4x, ...
/// The first few polynomials are 1, x, 2x^2 - 1, 4x^3 - 3x, ...
void getChebyshevPolynomials(
int64_t numPolynomials,
::llvm::SmallVector<::mlir::heir::polynomial::FloatPolynomial> &results);
Expand Down
22 changes: 11 additions & 11 deletions lib/Utils/Approximation/ChebyshevTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,25 +55,25 @@ TEST(ChebyshevTest, TestGetChebyshevPolynomials) {
chebPolys,
ElementsAre(
FloatPolynomial::fromCoefficients({1.}),
FloatPolynomial::fromCoefficients({0., 2.}),
FloatPolynomial::fromCoefficients({-1., 0., 4.}),
FloatPolynomial::fromCoefficients({0., -4., 0., 8.}),
FloatPolynomial::fromCoefficients({1., 0., -12., 0., 16.}),
FloatPolynomial::fromCoefficients({0., 6., 0., -32., 0., 32.}),
FloatPolynomial::fromCoefficients({-1., 0., 24., 0., -80., 0., 64.}),
FloatPolynomial::fromCoefficients({0., 1.}),
FloatPolynomial::fromCoefficients({-1., 0., 2.}),
FloatPolynomial::fromCoefficients({0., -3., 0., 4.}),
FloatPolynomial::fromCoefficients({1., 0., -8., 0., 8.}),
FloatPolynomial::fromCoefficients({0., 5., 0., -20., 0., 16.}),
FloatPolynomial::fromCoefficients({-1., 0., 18., 0., -48., 0., 32.}),
FloatPolynomial::fromCoefficients(
{0., -8., 0., 80., 0., -192., 0., 128.}),
{0., -7., 0., 56., 0., -112., 0., 64.}),
FloatPolynomial::fromCoefficients(
{1., 0., -40., 0., 240., 0., -448., 0., 256.})));
{1., 0., -32., 0., 160., 0., -256., 0., 128.})));
}

TEST(ChebyshevTest, TestChebyshevToMonomial) {
// 1 (1) - 1 (-1 + 4x^2) + 2 (-4x + 8x^3)
// 1 (1) - 1 (-1 + 2x^2) + 2 (-3x + 4x^3)
SmallVector<APFloat> chebCoeffs = {APFloat(1.0), APFloat(0.0), APFloat(-1.0),
APFloat(2.0)};
// 2 - 8 x - 4 x^2 + 16 x^3
// 2 - 6 x - 2 x^2 + 8 x^3
FloatPolynomial expected =
FloatPolynomial::fromCoefficients({2.0, -8.0, -4.0, 16.0});
FloatPolynomial::fromCoefficients({2.0, -6.0, -2.0, 8.0});
FloatPolynomial actual = chebyshevToMonomial(chebCoeffs);
EXPECT_EQ(actual, expected);
}
Expand Down

0 comments on commit a381495

Please sign in to comment.