Skip to content

Commit 43b9966

Browse files
committed
Merge remote-tracking branch 'origin/better-ownership-semantics'
2 parents c0de4de + 8ecee34 commit 43b9966

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

80 files changed

+1442
-1288
lines changed

CMakeLists.txt

+7-6
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,8 @@ add_library(
6060
source/operators/creator/koza.cpp
6161
source/operators/creator/ptc2.cpp
6262
source/operators/crossover.cpp
63-
source/operators/evaluator.cpp
6463
source/operators/evaluator_error_metrics.cpp
64+
source/operators/evaluator.cpp
6565
source/operators/generator/basic.cpp
6666
source/operators/generator/brood.cpp
6767
source/operators/generator/os.cpp
@@ -84,20 +84,21 @@ add_library(operon::operon ALIAS operon_operon)
8484

8585
# ---- Required dependencies ----
8686
find_package(AriaCsvParser REQUIRED)
87-
find_package(Eigen3 REQUIRED)
88-
find_package(FastFloat REQUIRED)
89-
find_package(Taskflow REQUIRED)
90-
find_package(Threads REQUIRED)
9187
find_package(byte-lite REQUIRED)
92-
find_package(span-lite REQUIRED)
9388
find_package(cpp-sort REQUIRED)
89+
find_package(Eigen3 REQUIRED)
9490
find_package(eve REQUIRED)
91+
find_package(FastFloat REQUIRED)
9592
find_package(fmt REQUIRED)
9693
find_package(lbfgs REQUIRED)
9794
find_package(libassert REQUIRED)
9895
find_package(mdspan REQUIRED)
96+
find_package(Microsoft.GSL CONFIG REQUIRED)
9997
find_package(outcome REQUIRED)
10098
find_package(pratt-parser REQUIRED)
99+
find_package(span-lite REQUIRED)
100+
find_package(Taskflow REQUIRED)
101+
find_package(Threads REQUIRED)
101102
find_package(unordered_dense REQUIRED)
102103
find_package(vstat REQUIRED)
103104
find_package(xxHash)

cli/CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ function(add_operon_cli NAME)
1616
source/${NAME}.cpp
1717
source/operator_factory.cpp
1818
source/util.cpp
19-
)
19+
)
2020

2121
target_link_libraries(${NAME} PRIVATE operon::operon cxxopts::cxxopts scn::scn)
2222
target_compile_features(${NAME} PRIVATE cxx_std_20)

cli/source/operator_factory.cpp

+16-17
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
#include <stdexcept> // for runtime_error
66
#include <fmt/format.h> // for format
77
#include <scn/scan.h>
8-
#include "operon/interpreter/dispatch_table.hpp"
98
#include "operon/operators/creator.hpp" // for CreatorBase, BalancedTreeC...
109
#include "operon/operators/evaluator.hpp" // for Evaluator, EvaluatorBase
1110
#include "operon/operators/generator.hpp" // for OffspringGeneratorBase
@@ -95,11 +94,11 @@ auto ParseCreator(std::string const& str, PrimitiveSet const& pset, std::vector<
9594
}
9695

9796
if (str == "btc") {
98-
creator = std::make_unique<BalancedTreeCreator>(pset, inputs, bias);
97+
creator = std::make_unique<BalancedTreeCreator>(&pset, inputs, bias);
9998
} else if (str == "ptc2") {
100-
creator = std::make_unique<ProbabilisticTreeCreator>(pset, inputs, bias);
99+
creator = std::make_unique<ProbabilisticTreeCreator>(&pset, inputs, bias);
101100
} else if (str == "grow") {
102-
creator = std::make_unique<GrowTreeCreator>(pset, inputs);
101+
creator = std::make_unique<GrowTreeCreator>(&pset, inputs);
103102
} else {
104103
throw std::invalid_argument(detail::GetErrorString("creator", str));
105104
}
@@ -112,23 +111,23 @@ auto ParseEvaluator(std::string const& str, Problem& problem, DefaultDispatch& d
112111

113112
std::unique_ptr<EvaluatorBase> evaluator;
114113
if (str == "r2") {
115-
evaluator = std::make_unique<Operon::Evaluator<T>>(problem, dtable, Operon::R2{}, scale);
114+
evaluator = std::make_unique<Operon::Evaluator<T>>(&problem, &dtable, Operon::R2{}, scale);
116115
} else if (str == "c2") {
117-
evaluator = std::make_unique<Operon::Evaluator<T>>(problem, dtable, Operon::C2{}, scale);
116+
evaluator = std::make_unique<Operon::Evaluator<T>>(&problem, &dtable, Operon::C2{}, scale);
118117
} else if (str == "nmse") {
119-
evaluator = std::make_unique<Operon::Evaluator<T>>(problem, dtable, Operon::NMSE{}, scale);
118+
evaluator = std::make_unique<Operon::Evaluator<T>>(&problem, &dtable, Operon::NMSE{}, scale);
120119
} else if (str == "mse") {
121-
evaluator = std::make_unique<Operon::Evaluator<T>>(problem, dtable, Operon::MSE{}, scale);
120+
evaluator = std::make_unique<Operon::Evaluator<T>>(&problem, &dtable, Operon::MSE{}, scale);
122121
} else if (str == "rmse") {
123-
evaluator = std::make_unique<Operon::Evaluator<T>>(problem, dtable, Operon::RMSE{}, scale);
122+
evaluator = std::make_unique<Operon::Evaluator<T>>(&problem, &dtable, Operon::RMSE{}, scale);
124123
} else if (str == "mae") {
125-
evaluator = std::make_unique<Operon::Evaluator<T>>(problem, dtable, Operon::MAE{}, scale);
124+
evaluator = std::make_unique<Operon::Evaluator<T>>(&problem, &dtable, Operon::MAE{}, scale);
126125
} else if (str == "mdl_gauss") {
127-
evaluator = std::make_unique<Operon::MinimumDescriptionLengthEvaluator<T, GaussianLikelihood<Operon::Scalar>>>(problem, dtable);
126+
evaluator = std::make_unique<Operon::MinimumDescriptionLengthEvaluator<T, GaussianLikelihood<Operon::Scalar>>>(&problem, &dtable);
128127
} else if (str == "mdl_poisson") {
129-
evaluator = std::make_unique<Operon::MinimumDescriptionLengthEvaluator<T, PoissonLikelihood<Operon::Scalar>>>(problem, dtable);
128+
evaluator = std::make_unique<Operon::MinimumDescriptionLengthEvaluator<T, PoissonLikelihood<Operon::Scalar>>>(&problem, &dtable);
130129
} else if (str == "gauss") {
131-
evaluator = std::make_unique<Operon::GaussianLikelihoodEvaluator<T>>(problem, dtable);
130+
evaluator = std::make_unique<Operon::GaussianLikelihoodEvaluator<T>>(&problem, &dtable);
132131
} else {
133132
throw std::runtime_error(fmt::format("unable to parse evaluator metric '{}'\n", str));
134133
}
@@ -141,7 +140,7 @@ auto ParseGenerator(std::string const& str, EvaluatorBase& eval, CrossoverBase&
141140
auto tok = Split(str, ':');
142141
auto name = tok[0];
143142
if (name == "basic") {
144-
generator = std::make_unique<BasicOffspringGenerator>(eval, cx, mut, femSel, maleSel, coeffOptimizer);
143+
generator = std::make_unique<BasicOffspringGenerator>(&eval, &cx, &mut, &femSel, &maleSel, coeffOptimizer);
145144
} else if (name == "os") {
146145
size_t maxSelectionPressure{100};
147146
double comparisonFactor{0};
@@ -151,16 +150,16 @@ auto ParseGenerator(std::string const& str, EvaluatorBase& eval, CrossoverBase&
151150
if (tok.size() > 2) {
152151
comparisonFactor = scn::scan<double>(tok[2], "{}")->value();
153152
}
154-
generator = std::make_unique<OffspringSelectionGenerator>(eval, cx, mut, femSel, maleSel, coeffOptimizer);
153+
generator = std::make_unique<OffspringSelectionGenerator>(&eval, &cx, &mut, &femSel, &maleSel, coeffOptimizer);
155154
dynamic_cast<OffspringSelectionGenerator*>(generator.get())->MaxSelectionPressure(maxSelectionPressure);
156155
dynamic_cast<OffspringSelectionGenerator*>(generator.get())->ComparisonFactor(comparisonFactor);
157156
} else if (name == "brood") {
158-
generator = std::make_unique<BroodOffspringGenerator>(eval, cx, mut, femSel, maleSel, coeffOptimizer);
157+
generator = std::make_unique<BroodOffspringGenerator>(&eval, &cx, &mut, &femSel, &maleSel, coeffOptimizer);
159158
size_t broodSize{BroodOffspringGenerator::DefaultBroodSize};
160159
if (tok.size() > 1) { broodSize = scn::scan<size_t>(tok[1], "{}")->value(); }
161160
dynamic_cast<BroodOffspringGenerator*>(generator.get())->BroodSize(broodSize);
162161
} else if (name == "poly") {
163-
generator = std::make_unique<PolygenicOffspringGenerator>(eval, cx, mut, femSel, maleSel, coeffOptimizer);
162+
generator = std::make_unique<PolygenicOffspringGenerator>(&eval, &cx, &mut, &femSel, &maleSel, coeffOptimizer);
164163
size_t polygenicSize{PolygenicOffspringGenerator::DefaultBroodSize};
165164
if (tok.size() > 1) { polygenicSize = scn::scan<size_t>(tok[1], "{}")->value(); }
166165
dynamic_cast<PolygenicOffspringGenerator*>(generator.get())->PolygenicSize(polygenicSize);

cli/source/operator_factory.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@
99
#include <string> // for operator==, string
1010
#include <utility> // for addressof
1111
#include <vector> // for vector
12+
#include "operon/core/dispatch.hpp" // for DispatchTable
1213
#include "operon/core/types.hpp" // for Span
1314
#include "operon/core/individual.hpp" // for Comparison
14-
#include "operon/interpreter/dispatch_table.hpp"
1515
#include "operon/interpreter/interpreter.hpp" // for Interpreter
1616
#include "operon/optimizer/optimizer.hpp"
1717
#include "util.hpp" // for Split

cli/source/operon_gp.cpp

+23-152
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// SPDX-License-Identifier: MIT
22
// SPDX-FileCopyrightText: Copyright 2019-2023 Heal Research
33

4+
#include "reporter.hpp"
45
#include <chrono>
56
#include <cmath>
67
#include <cstdlib>
@@ -161,7 +162,9 @@ auto main(int argc, char** argv) -> int
161162
}
162163
}
163164
}
164-
Operon::Problem problem(*dataset, trainingRange, testRange);
165+
Operon::Problem problem(std::move(dataset));
166+
problem.SetTrainingRange(trainingRange);
167+
problem.SetTestRange(testRange);
165168
problem.SetTarget(target.Hash);
166169
problem.SetInputs(inputs);
167170
problem.ConfigurePrimitiveSet(primitiveSetConfig);
@@ -170,7 +173,7 @@ auto main(int argc, char** argv) -> int
170173
creator = ParseCreator(result["creator"].as<std::string>(), problem.GetPrimitiveSet(), problem.GetInputs());
171174

172175
auto [amin, amax] = problem.GetPrimitiveSet().FunctionArityLimits();
173-
Operon::UniformTreeInitializer treeInitializer(*creator);
176+
Operon::UniformTreeInitializer treeInitializer(creator.get());
174177

175178
auto const initialMinDepth = result["creator-mindepth"].as<std::size_t>();
176179
auto const initialMaxDepth = result["creator-mindepth"].as<std::size_t>();
@@ -200,30 +203,30 @@ auto main(int argc, char** argv) -> int
200203

201204
Operon::ChangeVariableMutation changeVar { problem.GetInputs() };
202205
Operon::ChangeFunctionMutation changeFunc { problem.GetPrimitiveSet() };
203-
Operon::ReplaceSubtreeMutation replaceSubtree { *creator, *coeffInitializer, maxDepth, maxLength };
204-
Operon::InsertSubtreeMutation insertSubtree { *creator, *coeffInitializer, maxDepth, maxLength };
206+
Operon::ReplaceSubtreeMutation replaceSubtree { creator.get(), coeffInitializer.get(), maxDepth, maxLength };
207+
Operon::InsertSubtreeMutation insertSubtree { creator.get(), coeffInitializer.get(), maxDepth, maxLength };
205208
Operon::RemoveSubtreeMutation removeSubtree { problem.GetPrimitiveSet() };
206209
Operon::DiscretePointMutation discretePoint;
207210
for (auto v : Operon::Math::Constants) {
208211
discretePoint.Add(static_cast<Operon::Scalar>(v), 1);
209212
}
210-
mutator.Add(*onePoint, 1.0);
211-
mutator.Add(changeVar, 1.0);
212-
mutator.Add(changeFunc, 1.0);
213-
mutator.Add(replaceSubtree, 1.0);
214-
mutator.Add(insertSubtree, 1.0);
215-
mutator.Add(removeSubtree, 1.0);
216-
mutator.Add(discretePoint, 1.0);
213+
mutator.Add(onePoint.get(), 1.0);
214+
mutator.Add(&changeVar, 1.0);
215+
mutator.Add(&changeFunc, 1.0);
216+
mutator.Add(&replaceSubtree, 1.0);
217+
mutator.Add(&insertSubtree, 1.0);
218+
mutator.Add(&removeSubtree, 1.0);
219+
mutator.Add(&discretePoint, 1.0);
217220

218221
Operon::DefaultDispatch dtable;
219222
auto scale = result["linear-scaling"].as<bool>();
220223
auto evaluator = Operon::ParseEvaluator(result["objective"].as<std::string>(), problem, dtable, scale);
221224
evaluator->SetBudget(config.Evaluations);
222225

223-
auto optimizer = std::make_unique<Operon::LevenbergMarquardtOptimizer<decltype(dtable), Operon::OptimizerType::Eigen>>(dtable, problem);
226+
auto optimizer = std::make_unique<Operon::LevenbergMarquardtOptimizer<decltype(dtable), Operon::OptimizerType::Eigen>>(&dtable, &problem);
224227
optimizer->SetIterations(config.Iterations);
225228

226-
Operon::CoefficientOptimizer cOpt{*optimizer, config.LamarckianProbability};
229+
Operon::CoefficientOptimizer cOpt{optimizer.get()};
227230

228231
EXPECT(problem.TrainingRange().Size() > 0);
229232

@@ -237,7 +240,7 @@ auto main(int argc, char** argv) -> int
237240

238241
Operon::RandomGenerator random(config.Seed);
239242
if (result["shuffle"].as<bool>()) {
240-
problem.GetDataset().Shuffle(random);
243+
problem.GetDataset()->Shuffle(random);
241244
}
242245
if (result["standardize"].as<bool>()) {
243246
problem.StandardizeData(problem.TrainingRange());
@@ -247,145 +250,13 @@ auto main(int argc, char** argv) -> int
247250

248251
auto t0 = std::chrono::steady_clock::now();
249252

250-
Operon::GeneticProgrammingAlgorithm gp { problem, config, treeInitializer, *coeffInitializer, *generator, *reinserter };
253+
Operon::GeneticProgrammingAlgorithm gp { config, &problem, &treeInitializer, coeffInitializer.get(), generator.get(), reinserter.get() };
251254

252-
Operon::Individual best{};
253-
254-
auto report = [&]() {
255-
auto config = gp.GetConfig();
256-
auto pop = gp.Parents();
257-
auto off = gp.Offspring();
258-
259-
auto const& problem = gp.GetProblem();
260-
auto trainingRange = problem.TrainingRange();
261-
auto testRange = problem.TestRange();
262-
263-
auto targetValues = problem.TargetValues();
264-
auto targetTrain = targetValues.subspan(trainingRange.Start(), trainingRange.Size());
265-
auto targetTest = targetValues.subspan(testRange.Start(), testRange.Size());
266-
267-
auto const& evaluator = gp.GetGenerator().Evaluator();
268-
269-
// some boilerplate for reporting results
270-
auto const idx{0UL};
271-
auto cmp = Operon::SingleObjectiveComparison(idx);
272-
best = *std::min_element(pop.begin(), pop.end(), cmp);
273-
274-
Operon::Vector<Operon::Scalar> estimatedTrain;
275-
Operon::Vector<Operon::Scalar> estimatedTest;
276-
277-
tf::Taskflow taskflow;
278-
279-
using DT = Operon::DefaultDispatch;
280-
281-
auto evalTrain = taskflow.emplace([&]() {
282-
estimatedTrain = Operon::Interpreter<Operon::Scalar, DT>::Evaluate(best.Genotype, problem.GetDataset(), trainingRange);
283-
});
284-
285-
auto evalTest = taskflow.emplace([&]() {
286-
estimatedTest = Operon::Interpreter<Operon::Scalar, DT>::Evaluate(best.Genotype, problem.GetDataset(), testRange);
287-
});
288-
289-
// scale values
290-
Operon::Scalar a{1.0};
291-
Operon::Scalar b{0.0};
292-
auto linearScaling = taskflow.emplace([&]() {
293-
auto [a_, b_] = Operon::FitLeastSquares(estimatedTrain, targetTrain);
294-
a = static_cast<Operon::Scalar>(a_);
295-
b = static_cast<Operon::Scalar>(b_);
296-
// add scaling terms to the tree
297-
auto& nodes = best.Genotype.Nodes();
298-
auto const sz = nodes.size();
299-
if (std::abs(a - Operon::Scalar{1}) > std::numeric_limits<Operon::Scalar>::epsilon()) {
300-
nodes.emplace_back(Operon::Node::Constant(a));
301-
nodes.emplace_back(Operon::NodeType::Mul);
302-
}
303-
if (std::abs(b) > std::numeric_limits<Operon::Scalar>::epsilon()) {
304-
nodes.emplace_back(Operon::Node::Constant(b));
305-
nodes.emplace_back(Operon::NodeType::Add);
306-
}
307-
if (nodes.size() > sz) {
308-
best.Genotype.UpdateNodes();
309-
}
310-
});
311-
312-
double r2Train{};
313-
double r2Test{};
314-
double nmseTrain{};
315-
double nmseTest{};
316-
double maeTrain{};
317-
double maeTest{};
318-
319-
auto scaleTrain = taskflow.emplace([&]() {
320-
Eigen::Map<Eigen::Array<Operon::Scalar, -1, 1>> estimated(estimatedTrain.data(), std::ssize(estimatedTrain));
321-
estimated = estimated * a + b;
322-
});
323-
324-
auto scaleTest = taskflow.emplace([&]() {
325-
Eigen::Map<Eigen::Array<Operon::Scalar, -1, 1>> estimated(estimatedTest.data(), std::ssize(estimatedTest));
326-
estimated = estimated * a + b;
327-
});
328-
329-
auto calcStats = taskflow.emplace([&]() {
330-
// negate the R2 because this is an internal fitness measure (minimization) which we here repurpose
331-
r2Train = -Operon::R2{}(estimatedTrain, targetTrain);
332-
r2Test = -Operon::R2{}(estimatedTest, targetTest);
333-
334-
nmseTrain = Operon::NMSE{}(estimatedTrain, targetTrain);
335-
nmseTest = Operon::NMSE{}(estimatedTest, targetTest);
336-
337-
maeTrain = Operon::MAE{}(estimatedTrain, targetTrain);
338-
maeTest = Operon::MAE{}(estimatedTest, targetTest);
339-
});
340-
341-
double avgLength = 0;
342-
double avgQuality = 0;
343-
double totalMemory = 0;
344-
345-
auto getSize = [](Operon::Individual const& ind) { return sizeof(ind) + sizeof(ind.Genotype) + sizeof(Operon::Node) * ind.Genotype.Nodes().capacity(); };
346-
auto calculateLength = taskflow.transform_reduce(pop.begin(), pop.end(), avgLength, std::plus{}, [](auto const& ind) { return ind.Genotype.Length(); });
347-
auto calculateQuality = taskflow.transform_reduce(pop.begin(), pop.end(), avgQuality, std::plus{}, [idx=idx](auto const& ind) { return ind[idx]; });
348-
auto calculatePopMemory = taskflow.transform_reduce(pop.begin(), pop.end(), totalMemory, std::plus{}, [&](auto const& ind) { return getSize(ind); });
349-
auto calculateOffMemory = taskflow.transform_reduce(off.begin(), off.end(), totalMemory, std::plus{}, [&](auto const& ind) { return getSize(ind); });
350-
351-
// define task graph
352-
linearScaling.succeed(evalTrain, evalTest);
353-
linearScaling.precede(scaleTrain, scaleTest);
354-
calcStats.succeed(scaleTrain, scaleTest);
355-
calcStats.precede(calculateLength, calculateQuality, calculatePopMemory, calculateOffMemory);
356-
357-
executor.corun(taskflow);
358-
359-
avgLength /= static_cast<double>(pop.size());
360-
avgQuality /= static_cast<double>(pop.size());
361-
362-
auto t1 = std::chrono::steady_clock::now();
363-
auto elapsed = static_cast<double>(std::chrono::duration_cast<std::chrono::microseconds>(t1 - t0).count()) / 1e6;
364-
365-
using T = std::tuple<std::string, double, std::string>;
366-
auto const* format = ":>#8.3g";
367-
std::array stats {
368-
T{ "iteration", gp.Generation(), ":>" },
369-
T{ "r2_tr", r2Train, format },
370-
T{ "r2_te", r2Test, format },
371-
T{ "mae_tr", maeTrain, format },
372-
T{ "mae_te", maeTest, format },
373-
T{ "nmse_tr", nmseTrain, format },
374-
T{ "nmse_te", nmseTest, format },
375-
T{ "avg_fit", avgQuality, format },
376-
T{ "avg_len", avgLength, format },
377-
T{ "eval_cnt", evaluator.CallCount , ":>" },
378-
T{ "res_eval", evaluator.ResidualEvaluations, ":>" },
379-
T{ "jac_eval", evaluator.JacobianEvaluations, ":>" },
380-
T{ "opt_time", evaluator.CostFunctionTime, ":>" },
381-
T{ "seed", config.Seed, ":>" },
382-
T{ "elapsed", elapsed, ":>"},
383-
};
384-
Operon::PrintStats({ stats.begin(), stats.end() }, gp.Generation() == 0);
385-
};
386-
387-
gp.Run(executor, random, report);
388-
fmt::print("{}\n", Operon::InfixFormatter::Format(best.Genotype, problem.GetDataset(), 6));
255+
auto const* ptr = dynamic_cast<Operon::Evaluator<decltype(dtable)> const*>(evaluator.get());
256+
Operon::Reporter<Operon::Evaluator<decltype(dtable)>> reporter(ptr);
257+
gp.Run(executor, random, [&](){ reporter(executor, gp); });
258+
auto best = reporter.GetBest();
259+
fmt::print("{}\n", Operon::InfixFormatter::Format(best.Genotype, *problem.GetDataset(), 6));
389260
} catch (std::exception& e) {
390261
fmt::print(stderr, "error: {}\n", e.what());
391262
return EXIT_FAILURE;

0 commit comments

Comments
 (0)