heal-research
diff --git a/‎CMakeLists.txt
Lines changed: 7 additions & 6 deletions b/‎CMakeLists.txt
Lines changed: 7 additions & 6 deletions
diff --git a/‎cli/CMakeLists.txt
Lines changed: 1 addition & 1 deletion b/‎cli/CMakeLists.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎cli/source/operator_factory.cpp
Lines changed: 16 additions & 17 deletions b/‎cli/source/operator_factory.cpp
Lines changed: 16 additions & 17 deletions
diff --git a/‎cli/source/operator_factory.hpp
Lines changed: 1 addition & 1 deletion b/‎cli/source/operator_factory.hpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎cli/source/operon_gp.cpp
Lines changed: 23 additions & 152 deletions b/‎cli/source/operon_gp.cpp
Lines changed: 23 additions & 152 deletions
@@ -60,8 +60,8 @@ add_library(
     source/operators/creator/koza.cpp
     source/operators/creator/ptc2.cpp
     source/operators/crossover.cpp
-    source/operators/evaluator.cpp
     source/operators/evaluator_error_metrics.cpp
+    source/operators/evaluator.cpp
     source/operators/generator/basic.cpp
     source/operators/generator/brood.cpp
     source/operators/generator/os.cpp
@@ -84,20 +84,21 @@ add_library(operon::operon ALIAS operon_operon)
 
 # ---- Required dependencies ----
 find_package(AriaCsvParser REQUIRED)
-find_package(Eigen3 REQUIRED)
-find_package(FastFloat REQUIRED)
-find_package(Taskflow REQUIRED)
-find_package(Threads REQUIRED)
 find_package(byte-lite REQUIRED)
-find_package(span-lite REQUIRED)
 find_package(cpp-sort REQUIRED)
+find_package(Eigen3 REQUIRED)
 find_package(eve REQUIRED)
+find_package(FastFloat REQUIRED)
 find_package(fmt REQUIRED)
 find_package(lbfgs REQUIRED)
 find_package(libassert REQUIRED)
 find_package(mdspan REQUIRED)
+find_package(Microsoft.GSL CONFIG REQUIRED)
 find_package(outcome REQUIRED)
 find_package(pratt-parser REQUIRED)
+find_package(span-lite REQUIRED)
+find_package(Taskflow REQUIRED)
+find_package(Threads REQUIRED)
 find_package(unordered_dense REQUIRED)
 find_package(vstat REQUIRED)
 find_package(xxHash)
 
@@ -16,7 +16,7 @@ function(add_operon_cli NAME)
         source/${NAME}.cpp
         source/operator_factory.cpp
         source/util.cpp
-        )
+    )
 
     target_link_libraries(${NAME} PRIVATE operon::operon cxxopts::cxxopts scn::scn)
     target_compile_features(${NAME} PRIVATE cxx_std_20)
 
@@ -5,7 +5,6 @@
 #include <stdexcept>                       // for runtime_error
 #include <fmt/format.h>                        // for format
 #include <scn/scan.h>
-#include "operon/interpreter/dispatch_table.hpp"
 #include "operon/operators/creator.hpp"    // for CreatorBase, BalancedTreeC...
 #include "operon/operators/evaluator.hpp"  // for Evaluator, EvaluatorBase
 #include "operon/operators/generator.hpp"  // for OffspringGeneratorBase
@@ -95,11 +94,11 @@ auto ParseCreator(std::string const& str, PrimitiveSet const& pset, std::vector<
     }
 
     if (str == "btc") {
-        creator = std::make_unique<BalancedTreeCreator>(pset, inputs, bias);
+        creator = std::make_unique<BalancedTreeCreator>(&pset, inputs, bias);
     } else if (str == "ptc2") {
-        creator = std::make_unique<ProbabilisticTreeCreator>(pset, inputs, bias);
+        creator = std::make_unique<ProbabilisticTreeCreator>(&pset, inputs, bias);
     } else if (str == "grow") {
-        creator = std::make_unique<GrowTreeCreator>(pset, inputs);
+        creator = std::make_unique<GrowTreeCreator>(&pset, inputs);
     } else {
         throw std::invalid_argument(detail::GetErrorString("creator", str));
     }
@@ -112,23 +111,23 @@ auto ParseEvaluator(std::string const& str, Problem& problem, DefaultDispatch& d
 
     std::unique_ptr<EvaluatorBase> evaluator;
     if (str == "r2") {
-        evaluator = std::make_unique<Operon::Evaluator<T>>(problem, dtable, Operon::R2{}, scale);
+        evaluator = std::make_unique<Operon::Evaluator<T>>(&problem, &dtable, Operon::R2{}, scale);
     } else if (str == "c2") {
-        evaluator = std::make_unique<Operon::Evaluator<T>>(problem, dtable, Operon::C2{}, scale);
+        evaluator = std::make_unique<Operon::Evaluator<T>>(&problem, &dtable, Operon::C2{}, scale);
     } else if (str == "nmse") {
-        evaluator = std::make_unique<Operon::Evaluator<T>>(problem, dtable, Operon::NMSE{}, scale);
+        evaluator = std::make_unique<Operon::Evaluator<T>>(&problem, &dtable, Operon::NMSE{}, scale);
     } else if (str == "mse") {
-        evaluator = std::make_unique<Operon::Evaluator<T>>(problem, dtable, Operon::MSE{}, scale);
+        evaluator = std::make_unique<Operon::Evaluator<T>>(&problem, &dtable, Operon::MSE{}, scale);
     } else if (str == "rmse") {
-        evaluator = std::make_unique<Operon::Evaluator<T>>(problem, dtable, Operon::RMSE{}, scale);
+        evaluator = std::make_unique<Operon::Evaluator<T>>(&problem, &dtable, Operon::RMSE{}, scale);
     } else if (str == "mae") {
-        evaluator = std::make_unique<Operon::Evaluator<T>>(problem, dtable, Operon::MAE{}, scale);
+        evaluator = std::make_unique<Operon::Evaluator<T>>(&problem, &dtable, Operon::MAE{}, scale);
     } else if (str == "mdl_gauss") {
-        evaluator = std::make_unique<Operon::MinimumDescriptionLengthEvaluator<T, GaussianLikelihood<Operon::Scalar>>>(problem, dtable);
+        evaluator = std::make_unique<Operon::MinimumDescriptionLengthEvaluator<T, GaussianLikelihood<Operon::Scalar>>>(&problem, &dtable);
     } else if (str == "mdl_poisson") {
-        evaluator = std::make_unique<Operon::MinimumDescriptionLengthEvaluator<T, PoissonLikelihood<Operon::Scalar>>>(problem, dtable);
+        evaluator = std::make_unique<Operon::MinimumDescriptionLengthEvaluator<T, PoissonLikelihood<Operon::Scalar>>>(&problem, &dtable);
     } else if (str == "gauss") {
-        evaluator = std::make_unique<Operon::GaussianLikelihoodEvaluator<T>>(problem, dtable);
+        evaluator = std::make_unique<Operon::GaussianLikelihoodEvaluator<T>>(&problem, &dtable);
     } else {
         throw std::runtime_error(fmt::format("unable to parse evaluator metric '{}'\n", str));
     }
@@ -141,7 +140,7 @@ auto ParseGenerator(std::string const& str, EvaluatorBase& eval, CrossoverBase&
     auto tok = Split(str, ':');
     auto name = tok[0];
     if (name == "basic") {
-        generator = std::make_unique<BasicOffspringGenerator>(eval, cx, mut, femSel, maleSel, coeffOptimizer);
+        generator = std::make_unique<BasicOffspringGenerator>(&eval, &cx, &mut, &femSel, &maleSel, coeffOptimizer);
     } else if (name == "os") {
         size_t maxSelectionPressure{100};
         double comparisonFactor{0};
@@ -151,16 +150,16 @@ auto ParseGenerator(std::string const& str, EvaluatorBase& eval, CrossoverBase&
         if (tok.size() > 2) {
             comparisonFactor = scn::scan<double>(tok[2], "{}")->value();
         }
-        generator = std::make_unique<OffspringSelectionGenerator>(eval, cx, mut, femSel, maleSel, coeffOptimizer);
+        generator = std::make_unique<OffspringSelectionGenerator>(&eval, &cx, &mut, &femSel, &maleSel, coeffOptimizer);
         dynamic_cast<OffspringSelectionGenerator*>(generator.get())->MaxSelectionPressure(maxSelectionPressure);
         dynamic_cast<OffspringSelectionGenerator*>(generator.get())->ComparisonFactor(comparisonFactor);
     } else if (name == "brood") {
-        generator = std::make_unique<BroodOffspringGenerator>(eval, cx, mut, femSel, maleSel, coeffOptimizer);
+        generator = std::make_unique<BroodOffspringGenerator>(&eval, &cx, &mut, &femSel, &maleSel, coeffOptimizer);
         size_t broodSize{BroodOffspringGenerator::DefaultBroodSize};
         if (tok.size() > 1) { broodSize = scn::scan<size_t>(tok[1], "{}")->value(); }
         dynamic_cast<BroodOffspringGenerator*>(generator.get())->BroodSize(broodSize);
     } else if (name == "poly") {
-        generator = std::make_unique<PolygenicOffspringGenerator>(eval, cx, mut, femSel, maleSel, coeffOptimizer);
+        generator = std::make_unique<PolygenicOffspringGenerator>(&eval, &cx, &mut, &femSel, &maleSel, coeffOptimizer);
         size_t polygenicSize{PolygenicOffspringGenerator::DefaultBroodSize};
         if (tok.size() > 1) { polygenicSize = scn::scan<size_t>(tok[1], "{}")->value(); }
         dynamic_cast<PolygenicOffspringGenerator*>(generator.get())->PolygenicSize(polygenicSize);
 
@@ -9,9 +9,9 @@
 #include <string>                              // for operator==, string
 #include <utility>                             // for addressof
 #include <vector>                              // for vector
+#include "operon/core/dispatch.hpp"            // for DispatchTable
 #include "operon/core/types.hpp"               // for Span
 #include "operon/core/individual.hpp"          // for Comparison
-#include "operon/interpreter/dispatch_table.hpp"
 #include "operon/interpreter/interpreter.hpp"  // for Interpreter
 #include "operon/optimizer/optimizer.hpp"
 #include "util.hpp"                            // for Split
 
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: MIT
 // SPDX-FileCopyrightText: Copyright 2019-2023 Heal Research
 
+#include "reporter.hpp"
 #include <chrono>
 #include <cmath>
 #include <cstdlib>
@@ -161,7 +162,9 @@ auto main(int argc, char** argv) -> int
                 }
             }
         }
-        Operon::Problem problem(*dataset, trainingRange, testRange);
+        Operon::Problem problem(std::move(dataset));
+        problem.SetTrainingRange(trainingRange);
+        problem.SetTestRange(testRange);
         problem.SetTarget(target.Hash);
         problem.SetInputs(inputs);
         problem.ConfigurePrimitiveSet(primitiveSetConfig);
@@ -170,7 +173,7 @@ auto main(int argc, char** argv) -> int
         creator = ParseCreator(result["creator"].as<std::string>(), problem.GetPrimitiveSet(), problem.GetInputs());
 
         auto [amin, amax] = problem.GetPrimitiveSet().FunctionArityLimits();
-        Operon::UniformTreeInitializer treeInitializer(*creator);
+        Operon::UniformTreeInitializer treeInitializer(creator.get());
 
         auto const initialMinDepth = result["creator-mindepth"].as<std::size_t>();
         auto const initialMaxDepth = result["creator-mindepth"].as<std::size_t>();
@@ -200,30 +203,30 @@ auto main(int argc, char** argv) -> int
 
         Operon::ChangeVariableMutation changeVar { problem.GetInputs() };
         Operon::ChangeFunctionMutation changeFunc { problem.GetPrimitiveSet() };
-        Operon::ReplaceSubtreeMutation replaceSubtree { *creator, *coeffInitializer, maxDepth, maxLength };
-        Operon::InsertSubtreeMutation insertSubtree { *creator, *coeffInitializer, maxDepth, maxLength };
+        Operon::ReplaceSubtreeMutation replaceSubtree { creator.get(), coeffInitializer.get(), maxDepth, maxLength };
+        Operon::InsertSubtreeMutation insertSubtree { creator.get(), coeffInitializer.get(), maxDepth, maxLength };
         Operon::RemoveSubtreeMutation removeSubtree { problem.GetPrimitiveSet() };
         Operon::DiscretePointMutation discretePoint;
         for (auto v : Operon::Math::Constants) {
             discretePoint.Add(static_cast<Operon::Scalar>(v), 1);
         }
-        mutator.Add(*onePoint, 1.0);
-        mutator.Add(changeVar, 1.0);
-        mutator.Add(changeFunc, 1.0);
-        mutator.Add(replaceSubtree, 1.0);
-        mutator.Add(insertSubtree, 1.0);
-        mutator.Add(removeSubtree, 1.0);
-        mutator.Add(discretePoint, 1.0);
+        mutator.Add(onePoint.get(), 1.0);
+        mutator.Add(&changeVar, 1.0);
+        mutator.Add(&changeFunc, 1.0);
+        mutator.Add(&replaceSubtree, 1.0);
+        mutator.Add(&insertSubtree, 1.0);
+        mutator.Add(&removeSubtree, 1.0);
+        mutator.Add(&discretePoint, 1.0);
 
         Operon::DefaultDispatch dtable;
         auto scale = result["linear-scaling"].as<bool>();
         auto evaluator = Operon::ParseEvaluator(result["objective"].as<std::string>(), problem, dtable, scale);
         evaluator->SetBudget(config.Evaluations);
 
-        auto optimizer = std::make_unique<Operon::LevenbergMarquardtOptimizer<decltype(dtable), Operon::OptimizerType::Eigen>>(dtable, problem);
+        auto optimizer = std::make_unique<Operon::LevenbergMarquardtOptimizer<decltype(dtable), Operon::OptimizerType::Eigen>>(&dtable, &problem);
         optimizer->SetIterations(config.Iterations);
 
-        Operon::CoefficientOptimizer cOpt{*optimizer, config.LamarckianProbability};
+        Operon::CoefficientOptimizer cOpt{optimizer.get()};
 
         EXPECT(problem.TrainingRange().Size() > 0);
 
@@ -237,7 +240,7 @@ auto main(int argc, char** argv) -> int
 
         Operon::RandomGenerator random(config.Seed);
         if (result["shuffle"].as<bool>()) {
-            problem.GetDataset().Shuffle(random);
+            problem.GetDataset()->Shuffle(random);
         }
         if (result["standardize"].as<bool>()) {
             problem.StandardizeData(problem.TrainingRange());
@@ -247,145 +250,13 @@ auto main(int argc, char** argv) -> int
 
         auto t0 = std::chrono::steady_clock::now();
 
-        Operon::GeneticProgrammingAlgorithm gp { problem, config, treeInitializer, *coeffInitializer, *generator, *reinserter };
+        Operon::GeneticProgrammingAlgorithm gp { config, &problem, &treeInitializer, coeffInitializer.get(), generator.get(), reinserter.get() };
 
-        Operon::Individual best{};
-
-        auto report = [&]() {
-            auto config = gp.GetConfig();
-            auto pop = gp.Parents();
-            auto off = gp.Offspring();
-
-            auto const& problem = gp.GetProblem();
-            auto trainingRange  = problem.TrainingRange();
-            auto testRange      = problem.TestRange();
-
-            auto targetValues = problem.TargetValues();
-            auto targetTrain  = targetValues.subspan(trainingRange.Start(), trainingRange.Size());
-            auto targetTest   = targetValues.subspan(testRange.Start(), testRange.Size());
-
-            auto const& evaluator = gp.GetGenerator().Evaluator();
-
-            // some boilerplate for reporting results
-            auto const idx{0UL};
-            auto cmp = Operon::SingleObjectiveComparison(idx);
-            best = *std::min_element(pop.begin(), pop.end(), cmp);
-
-            Operon::Vector<Operon::Scalar> estimatedTrain;
-            Operon::Vector<Operon::Scalar> estimatedTest;
-
-            tf::Taskflow taskflow;
-
-            using DT = Operon::DefaultDispatch;
-
-            auto evalTrain = taskflow.emplace([&]() {
-                estimatedTrain = Operon::Interpreter<Operon::Scalar, DT>::Evaluate(best.Genotype, problem.GetDataset(), trainingRange);
-            });
-
-            auto evalTest = taskflow.emplace([&]() {
-                estimatedTest = Operon::Interpreter<Operon::Scalar, DT>::Evaluate(best.Genotype, problem.GetDataset(), testRange);
-            });
-
-            // scale values
-            Operon::Scalar a{1.0};
-            Operon::Scalar b{0.0};
-            auto linearScaling = taskflow.emplace([&]() {
-                auto [a_, b_] = Operon::FitLeastSquares(estimatedTrain, targetTrain);
-                a = static_cast<Operon::Scalar>(a_);
-                b = static_cast<Operon::Scalar>(b_);
-                // add scaling terms to the tree
-                auto& nodes = best.Genotype.Nodes();
-                auto const sz = nodes.size();
-                if (std::abs(a - Operon::Scalar{1}) > std::numeric_limits<Operon::Scalar>::epsilon()) {
-                    nodes.emplace_back(Operon::Node::Constant(a));
-                    nodes.emplace_back(Operon::NodeType::Mul);
-                }
-                if (std::abs(b) > std::numeric_limits<Operon::Scalar>::epsilon()) {
-                    nodes.emplace_back(Operon::Node::Constant(b));
-                    nodes.emplace_back(Operon::NodeType::Add);
-                }
-                if (nodes.size() > sz) {
-                    best.Genotype.UpdateNodes();
-                }
-            });
-
-            double r2Train{};
-            double r2Test{};
-            double nmseTrain{};
-            double nmseTest{};
-            double maeTrain{};
-            double maeTest{};
-
-            auto scaleTrain = taskflow.emplace([&]() {
-                Eigen::Map<Eigen::Array<Operon::Scalar, -1, 1>> estimated(estimatedTrain.data(), std::ssize(estimatedTrain));
-                estimated = estimated * a + b;
-            });
-
-            auto scaleTest = taskflow.emplace([&]() {
-                Eigen::Map<Eigen::Array<Operon::Scalar, -1, 1>> estimated(estimatedTest.data(), std::ssize(estimatedTest));
-                estimated = estimated * a + b;
-            });
-
-            auto calcStats = taskflow.emplace([&]() {
-                // negate the R2 because this is an internal fitness measure (minimization) which we here repurpose
-                r2Train = -Operon::R2{}(estimatedTrain, targetTrain);
-                r2Test = -Operon::R2{}(estimatedTest, targetTest);
-
-                nmseTrain = Operon::NMSE{}(estimatedTrain, targetTrain);
-                nmseTest = Operon::NMSE{}(estimatedTest, targetTest);
-
-                maeTrain = Operon::MAE{}(estimatedTrain, targetTrain);
-                maeTest = Operon::MAE{}(estimatedTest, targetTest);
-            });
-
-            double avgLength = 0;
-            double avgQuality = 0;
-            double totalMemory = 0;
-
-            auto getSize = [](Operon::Individual const& ind) { return sizeof(ind) + sizeof(ind.Genotype) + sizeof(Operon::Node) * ind.Genotype.Nodes().capacity(); };
-            auto calculateLength = taskflow.transform_reduce(pop.begin(), pop.end(), avgLength, std::plus{}, [](auto const& ind) { return ind.Genotype.Length(); });
-            auto calculateQuality = taskflow.transform_reduce(pop.begin(), pop.end(), avgQuality, std::plus{}, [idx=idx](auto const& ind) { return ind[idx]; });
-            auto calculatePopMemory = taskflow.transform_reduce(pop.begin(), pop.end(), totalMemory, std::plus{}, [&](auto const& ind) { return getSize(ind); });
-            auto calculateOffMemory = taskflow.transform_reduce(off.begin(), off.end(), totalMemory, std::plus{}, [&](auto const& ind) { return getSize(ind); });
-
-            // define task graph
-            linearScaling.succeed(evalTrain, evalTest);
-            linearScaling.precede(scaleTrain, scaleTest);
-            calcStats.succeed(scaleTrain, scaleTest);
-            calcStats.precede(calculateLength, calculateQuality, calculatePopMemory, calculateOffMemory);
-
-            executor.corun(taskflow);
-
-            avgLength /= static_cast<double>(pop.size());
-            avgQuality /= static_cast<double>(pop.size());
-
-            auto t1 = std::chrono::steady_clock::now();
-            auto elapsed = static_cast<double>(std::chrono::duration_cast<std::chrono::microseconds>(t1 - t0).count()) / 1e6;
-
-            using T = std::tuple<std::string, double, std::string>;
-            auto const* format = ":>#8.3g";
-            std::array stats {
-                T{ "iteration", gp.Generation(), ":>" },
-                T{ "r2_tr", r2Train, format },
-                T{ "r2_te", r2Test, format },
-                T{ "mae_tr", maeTrain, format },
-                T{ "mae_te", maeTest, format },
-                T{ "nmse_tr", nmseTrain, format },
-                T{ "nmse_te", nmseTest, format },
-                T{ "avg_fit", avgQuality, format },
-                T{ "avg_len", avgLength, format },
-                T{ "eval_cnt", evaluator.CallCount , ":>" },
-                T{ "res_eval", evaluator.ResidualEvaluations, ":>" },
-                T{ "jac_eval", evaluator.JacobianEvaluations, ":>" },
-                T{ "opt_time", evaluator.CostFunctionTime,    ":>" },
-                T{ "seed", config.Seed, ":>" },
-                T{ "elapsed", elapsed, ":>"},
-            };
-            Operon::PrintStats({ stats.begin(), stats.end() }, gp.Generation() == 0);
-        };
-
-        gp.Run(executor, random, report);
-        fmt::print("{}\n", Operon::InfixFormatter::Format(best.Genotype, problem.GetDataset(), 6));
+        auto const* ptr = dynamic_cast<Operon::Evaluator<decltype(dtable)> const*>(evaluator.get());
+        Operon::Reporter<Operon::Evaluator<decltype(dtable)>> reporter(ptr);
+        gp.Run(executor, random, [&](){ reporter(executor, gp); });
+        auto best = reporter.GetBest();
+        fmt::print("{}\n", Operon::InfixFormatter::Format(best.Genotype, *problem.GetDataset(), 6));
     } catch (std::exception& e) {
         fmt::print(stderr, "error: {}\n", e.what());
         return EXIT_FAILURE;