Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 3 additions & 7 deletions dynamic_expressions/src/compile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,6 @@ pub struct Instr<const D: usize> {
pub dst: u16,
}

pub(crate) fn build_node_hash(nodes: &[PNode]) -> u64 {
let mut hasher = FxHasher::default();
nodes.hash(&mut hasher);
hasher.finish()
}

pub fn compile_plan<const D: usize>(nodes: &[PNode], n_features: usize, n_consts: usize) -> EvalPlan<D> {
assert!(
n_features <= (u16::MAX as usize),
Expand Down Expand Up @@ -99,7 +93,9 @@ pub fn compile_plan<const D: usize>(nodes: &[PNode], n_features: usize, n_consts
assert_eq!(stack.len(), 1, "Postfix did not reduce to a single root");
let root = stack.pop().unwrap();
let n_slots = max_slot as usize;
let hash = build_node_hash(nodes);
let mut hasher = FxHasher::default();
nodes.hash(&mut hasher);
let hash = hasher.finish();
EvalPlan {
instrs,
n_slots,
Expand Down
4 changes: 2 additions & 2 deletions dynamic_expressions/src/evaluate.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use ndarray::{Array2, ArrayView2};
use num_traits::Float;

use crate::compile::{EvalPlan, build_node_hash, compile_plan};
use crate::compile::{EvalPlan, compile_plan};
use crate::dispatch::{EvalKernelCtx, SrcRef};
use crate::expression::PostfixExpr;
use crate::node::Src;
Expand Down Expand Up @@ -66,7 +66,7 @@ impl<T: Float, const D: usize> EvalContext<T, D> {
let Some(plan) = &self.plan else {
return true;
};
plan.hash != build_node_hash(&expr.nodes)
plan.hash != expr.hash_nodes()
}

fn ensure_scratch(&mut self, n_slots: usize) {
Expand Down
4 changes: 2 additions & 2 deletions dynamic_expressions/src/evaluate_derivative.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use ndarray::{Array2, ArrayView2};
use num_traits::Float;

use crate::compile::{EvalPlan, build_node_hash, compile_plan};
use crate::compile::{EvalPlan, compile_plan};
use crate::dispatch::{GradKernelCtx, GradRef, SrcRef};
use crate::evaluate::{EvalOptions, resolve_val_src};
use crate::expression::PostfixExpr;
Expand Down Expand Up @@ -141,7 +141,7 @@ where
let needs_recompile = ctx.plan_nodes_len != expr.nodes.len()
|| ctx.plan_n_consts != expr.consts.len()
|| ctx.plan_n_features != n_features
|| ctx.plan.as_ref().is_none_or(|p| p.hash != build_node_hash(&expr.nodes));
|| ctx.plan.as_ref().is_none_or(|p| p.hash != expr.hash_nodes());
if needs_recompile {
ctx.plan = Some(compile_plan::<D>(&expr.nodes, n_features, expr.consts.len()));
ctx.plan_nodes_len = expr.nodes.len();
Expand Down
9 changes: 9 additions & 0 deletions dynamic_expressions/src/expression.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
use core::hash::{Hash, Hasher};
use core::marker::PhantomData;

use rustc_hash::FxHasher;

use crate::node::PNode;

#[derive(Clone, Debug, Default)]
Expand Down Expand Up @@ -43,6 +46,12 @@ impl<T, Ops, const D: usize> PostfixExpr<T, Ops, D> {
{
Self::new(vec![PNode::Const { idx: 0 }], vec![T::zero()], Metadata::default())
}

pub fn hash_nodes(&self) -> u64 {
let mut hasher = FxHasher::default();
self.nodes.hash(&mut hasher);
hasher.finish()
}
}

pub trait PostfixExpression<const D: usize> {
Expand Down
16 changes: 7 additions & 9 deletions symbolic_regression/benches/optim.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use rand::{Rng, SeedableRng};
use rand_distr::StandardNormal;
use symbolic_regression::operator_selection::OperatorsSampling;
use symbolic_regression::{
Dataset, Evaluator, MemberId, NextGenerationCtx, OptimizeConstantsCtx, Options, PopMember, Population,
Dataset, Evaluator, NextGenerationCtx, OptimizeConstantsCtx, Options, PopMember, Population,
RunningSearchStatistics, TaggedDataset, best_of_sample, check_constraints, equation_search,
insert_random_op_in_place, next_generation, optimize_constants, rotate_tree_in_place,
};
Expand Down Expand Up @@ -175,9 +175,9 @@ fn make_population(
let mut evaluator = Evaluator::new(dataset.n_rows);

let mut members = Vec::with_capacity(pop_size);
for i in 0..pop_size {
for _i in 0..pop_size {
let expr = random_expr::<Ops, D, _>(&mut rng, &options.operators, dataset.n_features, tree_size);
let mut member = PopMember::from_expr(MemberId(i as u64), None, expr, dataset.n_features, options);
let mut member = PopMember::from_expr(expr, dataset.n_features, options);
let _ = member.evaluate(&tagged, options, &mut evaluator);
members.push(member);
}
Expand Down Expand Up @@ -249,10 +249,9 @@ fn bench_utils(c: &mut Criterion) {
let tagged = TaggedDataset::new(&dataset, None);
let evaluator = Evaluator::new(dataset.n_rows);
let rng = FastRand::with_seed(6);
let next_id = population.len() as u64;
(tagged, evaluator, rng, next_id)
(tagged, evaluator, rng)
},
|(tagged, mut evaluator, mut rng, mut next_id)| {
|(tagged, mut evaluator, mut rng)| {
for member in population.members.iter() {
let ctx = NextGenerationCtx {
rng: &mut rng,
Expand All @@ -262,7 +261,6 @@ fn bench_utils(c: &mut Criterion) {
stats: &stats,
options: &options,
evaluator: &mut evaluator,
next_id: &mut next_id,
_ops: PhantomData::<Ops>,
};
let _ = next_generation(member, ctx);
Expand All @@ -280,9 +278,9 @@ fn bench_utils(c: &mut Criterion) {
let mut rng = FastRand::with_seed(42);
let mut expr_rng = StdRng::seed_from_u64(42);
let mut members = Vec::with_capacity(10);
for i in 0..10 {
for _i in 0..10 {
let expr = random_expr::<Ops, D, _>(&mut expr_rng, &options.operators, dataset.n_features, 20);
let member = PopMember::from_expr(MemberId(i as u64), None, expr, dataset.n_features, &options);
let member = PopMember::from_expr(expr, dataset.n_features, &options);
members.push(member);
}

Expand Down
4 changes: 2 additions & 2 deletions symbolic_regression/src/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use crate::dataset::TaggedDataset;
use crate::loss_functions::baseline_loss_from_zero_expression;
use crate::optim::{BackTracking, EvalBudget, Objective, OptimOptions, bfgs_minimize};
use crate::pop_member::Evaluator;
use crate::{Dataset, MemberId, OperatorLibrary, Options, PopMember};
use crate::{Dataset, OperatorLibrary, Options, PopMember};

const D: usize = 3;
type T = f64;
Expand Down Expand Up @@ -121,7 +121,7 @@ pub fn constant_opt_linear_env() -> ConstantOptLinearEnv {

pub fn run_constant_opt_linear(env: &ConstantOptLinearEnv) -> (bool, f64, Vec<f64>) {
let expr = build_linear_expr_for_constant_optimization();
let mut member = PopMember::from_expr(MemberId(0), None, expr, env.dataset.n_features, &env.options);
let mut member = PopMember::from_expr(expr, env.dataset.n_features, &env.options);
let mut evaluator = Evaluator::new(env.dataset.n_rows);
let mut grad_ctx = dynamic_expressions::GradContext::new(env.dataset.n_rows);
let baseline_loss = if env.options.use_baseline {
Expand Down
2 changes: 1 addition & 1 deletion symbolic_regression/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ pub use loss_functions::{LossKind, epsilon_insensitive, huber, log_cosh, lp, mae
pub use operator_library::OperatorLibrary;
pub use operator_selection::OperatorsSampling;
pub use options::{MutationWeights, Options, OutputStyle, WasmOptionsShim};
pub use pop_member::{MemberId, PopMember};
pub use pop_member::PopMember;
pub use search_utils::{SearchEngine, SearchResult, equation_search};
#[cfg(feature = "bench")]
pub use {
Expand Down
6 changes: 1 addition & 5 deletions symbolic_regression/src/migration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use std::cmp::Ordering;
use fastrand::Rng;
use num_traits::Float;

use crate::pop_member::{MemberId, PopMember, get_birth_order};
use crate::pop_member::{PopMember, get_birth_order};
use crate::population::Population;
use crate::random::{choose, poisson_sample, usize_range};

Expand All @@ -27,7 +27,6 @@ pub fn migrate_into<T: Float, Ops, const D: usize>(
migrants: &[PopMember<T, Ops, D>],
frac: f64,
rng: &mut Rng,
next_id: &mut u64,
deterministic: bool,
) {
if migrants.is_empty() {
Expand All @@ -54,9 +53,6 @@ pub fn migrate_into<T: Float, Ops, const D: usize>(
let loc = usize_range(rng, 0..dst.len());
let src = choose(rng, migrants).expect("migrants is non-empty");
let mut m = src.clone();
m.parent = Some(src.id);
m.id = MemberId(*next_id);
*next_id += 1;
m.birth = get_birth_order(deterministic);
dst.members[loc] = m;
}
Expand Down
38 changes: 11 additions & 27 deletions symbolic_regression/src/mutate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use crate::dataset::TaggedDataset;
use crate::loss_functions::loss_to_cost;
use crate::mutation_functions;
use crate::options::{MutationWeights, Options};
use crate::pop_member::{Evaluator, MemberId, PopMember};
use crate::pop_member::{Evaluator, PopMember};
use crate::random::usize_range_inclusive;
use crate::selection::weighted_index;

Expand Down Expand Up @@ -41,7 +41,6 @@ pub struct NextGenerationCtx<'a, T: Float + AddAssign, Ops, const D: usize> {
pub stats: &'a RunningSearchStatistics,
pub options: &'a Options<T, D>,
pub evaluator: &'a mut Evaluator<T, D>,
pub next_id: &'a mut u64,
pub _ops: core::marker::PhantomData<Ops>,
}

Expand All @@ -51,7 +50,6 @@ pub struct CrossoverCtx<'a, T: Float, Ops, const D: usize> {
pub curmaxsize: usize,
pub options: &'a Options<T, D>,
pub evaluator: &'a mut Evaluator<T, D>,
pub next_id: &'a mut u64,
pub _ops: core::marker::PhantomData<Ops>,
}

Expand Down Expand Up @@ -199,7 +197,7 @@ impl MutationChoice {
}
MutationChoice::Simplify => {
let _ = dynamic_expressions::simplify_in_place(&mut expr, &evaluator.eval_opts);
let mut out = PopMember::from_expr(MemberId(0), Some(member.id), expr, n_features, options);
let mut out = PopMember::from_expr(expr, n_features, options);

// Match the intended behavior (and current SymbolicRegression.jl main):
// simplify returns immediately and keeps the old loss, but refreshes complexity/cost.
Expand Down Expand Up @@ -228,7 +226,7 @@ impl MutationChoice {
MutationChoice::DoNothing => {
// Match SymbolicRegression.jl: identity mutation is accepted immediately and keeps
// the old loss/cost.
let mut out = PopMember::from_expr(MemberId(0), Some(member.id), expr, n_features, options);
let mut out = PopMember::from_expr(expr, n_features, options);
out.plan = member.plan.clone();
out.complexity = member.complexity;
out.loss = member.loss;
Expand All @@ -239,7 +237,7 @@ impl MutationChoice {
}
}
MutationChoice::Optimize => {
let mut out = PopMember::from_expr(MemberId(0), Some(member.id), expr, n_features, options);
let mut out = PopMember::from_expr(expr, n_features, options);

// Match SymbolicRegression.jl: optimize returns immediately with loss/cost already
// computed by constant optimization.
Expand Down Expand Up @@ -286,7 +284,6 @@ where
stats,
options,
evaluator,
next_id,
..
} = ctx;

Expand Down Expand Up @@ -324,36 +321,29 @@ where
}
}
MutationResult::ProposedMember {
member: mut out,
member: out,
evals: delta_evals,
} => {
evals += delta_evals;
let id = MemberId(*next_id);
*next_id += 1;
out.id = id;
out.parent = Some(member.id);
return (out, true, evals);
}
}
}

let id = MemberId(*next_id);
*next_id += 1;

if !successful {
let mut baby = PopMember::from_expr(id, Some(member.id), member.expr.clone(), n_features, options);
let mut baby = PopMember::from_expr(member.expr.clone(), n_features, options);
baby.complexity = member.complexity;
baby.loss = member.loss;
baby.cost = member.cost;
return (baby, false, 0.0);
}

let mut baby = PopMember::from_expr(id, Some(member.id), tree, n_features, options);
let mut baby = PopMember::from_expr(tree, n_features, options);
let _ok = baby.evaluate(&dataset, options, evaluator);
evals += 1.0;
let after_cost = baby.cost.to_f64().unwrap_or(f64::INFINITY);
if after_cost.is_nan() {
let mut reject = PopMember::from_expr(id, Some(member.id), member.expr.clone(), n_features, options);
let mut reject = PopMember::from_expr(member.expr.clone(), n_features, options);
reject.complexity = member.complexity;
reject.loss = member.loss;
reject.cost = member.cost;
Expand Down Expand Up @@ -382,7 +372,7 @@ where
}

if prob < rng.f64() {
let mut reject = PopMember::from_expr(id, Some(member.id), member.expr.clone(), n_features, options);
let mut reject = PopMember::from_expr(member.expr.clone(), n_features, options);
reject.complexity = member.complexity;
reject.loss = member.loss;
reject.cost = member.cost;
Expand All @@ -406,7 +396,6 @@ where
curmaxsize,
options,
evaluator,
next_id,
..
} = ctx;

Expand All @@ -416,13 +405,8 @@ where
let (c1_expr, c2_expr) = mutation_functions::crossover_trees(rng, &member1.expr, &member2.expr);
tries += 1;
if check_constraints(&c1_expr, options, curmaxsize) && check_constraints(&c2_expr, options, curmaxsize) {
let id1 = MemberId(*next_id);
*next_id += 1;
let id2 = MemberId(*next_id);
*next_id += 1;

let mut baby1 = PopMember::from_expr(id1, Some(member1.id), c1_expr, dataset.n_features, options);
let mut baby2 = PopMember::from_expr(id2, Some(member2.id), c2_expr, dataset.n_features, options);
let mut baby1 = PopMember::from_expr(c1_expr, dataset.n_features, options);
let mut baby2 = PopMember::from_expr(c2_expr, dataset.n_features, options);
let _ = baby1.evaluate(&dataset, options, evaluator);
let _ = baby2.evaluate(&dataset, options, evaluator);
return (baby1, baby2, true, 2.0);
Expand Down
27 changes: 2 additions & 25 deletions symbolic_regression/src/pop_member.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,8 @@ use crate::dataset::TaggedDataset;
use crate::loss_functions::loss_to_cost;
use crate::options::Options;

#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
pub struct MemberId(pub u64);

#[derive(Debug)]
pub struct PopMember<T: Float, Ops, const D: usize> {
pub id: MemberId,
pub parent: Option<MemberId>,
pub birth: u64,
pub expr: PostfixExpr<T, Ops, D>,
pub plan: EvalPlan<D>,
Expand Down Expand Up @@ -60,8 +55,6 @@ pub(crate) fn reset_pseudo_time_for_tests() {
impl<T: Float, Ops, const D: usize> Clone for PopMember<T, Ops, D> {
fn clone(&self) -> Self {
Self {
id: self.id,
parent: self.parent,
birth: self.birth,
expr: self.expr.clone(),
plan: self.plan.clone(),
Expand Down Expand Up @@ -101,17 +94,9 @@ impl<T: Float, Ops, const D: usize> PopMember<T, Ops, D>
where
Ops: dynamic_expressions::OperatorSet<T = T>,
{
pub fn from_expr(
id: MemberId,
parent: Option<MemberId>,
expr: PostfixExpr<T, Ops, D>,
n_features: usize,
options: &Options<T, D>,
) -> Self {
pub fn from_expr(expr: PostfixExpr<T, Ops, D>, n_features: usize, options: &Options<T, D>) -> Self {
let plan = dynamic_expressions::compile_plan(&expr.nodes, n_features, expr.consts.len());
Self {
id,
parent,
birth: get_birth_order(options.deterministic),
expr,
plan,
Expand All @@ -121,17 +106,9 @@ where
}
}

pub fn from_expr_with_birth(
id: MemberId,
parent: Option<MemberId>,
birth: u64,
expr: PostfixExpr<T, Ops, D>,
n_features: usize,
) -> Self {
pub fn from_expr_with_birth(birth: u64, expr: PostfixExpr<T, Ops, D>, n_features: usize) -> Self {
let plan = dynamic_expressions::compile_plan(&expr.nodes, n_features, expr.consts.len());
Self {
id,
parent,
birth,
expr,
plan,
Expand Down
Loading