Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 40 additions & 38 deletions docs/examples.md
Original file line number Diff line number Diff line change
Expand Up @@ -560,37 +560,25 @@ model = PySRRegressor(
model.fit(X, y)
```

You can also use parameters in your template expressions, which will be optimized during the search:

```python
template = TemplateExpressionSpec(
expressions=["f", "g"],
variable_names=["x1", "x2", "x3"],
parameters={"p1": 2, "p2": 1}, # p1 has length 2, p2 has length 1
combine="p1[1] * sin(f(x1, x2)) + p1[2] * g(x3) + p2[1]",
)
```

This will learn an equation of the form:

$$ y = \alpha_1 \sin(f(x_1, x_2)) + \alpha_2 g(x_3) + \beta $$

where $\alpha_1, \alpha_2$ are stored in `p1` and $\beta$ is stored in `p2`. The parameters will be optimized during the search.

### Parametric Expressions

When your data has categories with shared equation structure but different parameters,
you can use a `ParametricExpressionSpec`. Let's say we would like to learn the expression:
you can use the `parameters` argument of `TemplateExpressionSpec` to specify learned category-specific parameters.

For example, let's say we want to learn an equation of the form:

$$ y = \alpha \sin(x_1) + \beta $$

for three different values of $\alpha$ and $\beta$.
where $\alpha$ and $\beta$ are different for each category.

Further, let's say we have 3 categories,
with $\alpha \in \{0.1, 1.5, -0.5\}$ and $\beta \in \{1.0, 2.0, 0.5\}$.

```python
import numpy as np
from pysr import PySRRegressor, ParametricExpressionSpec
from pysr import PySRRegressor, TemplateExpressionSpec

# Create data with 3 categories
# Create data with 2 features and 3 categories
X = np.random.uniform(-3, 3, (1000, 2))
category = np.random.randint(0, 3, 1000)

Expand All @@ -603,34 +591,48 @@ y = np.array([
scales[c] * np.sin(x1) + offsets[c]
for x1, c in zip(X[:, 0], category)
])
```

Now, let's define our parametric expression:

```python
template = TemplateExpressionSpec(
expressions=["f"],
variable_names=["x1", "x2", "category"],
parameters={"p1": 3, "p2": 3}, # One parameter per category
combine="f(x1, x2, p1[category], p2[category])"
)
```

Next, we pass the category as a _column_ in `X`
corresponding to the index we defined in `variable_names`.

**Note that because Julia is 1-indexed, we need to add 1 to the category index.**

```python
category_p_one = category + 1
X_with_category = np.column_stack([X, category])
```

Now, we can fit our model:

```python
model = PySRRegressor(
expression_spec=ParametricExpressionSpec(max_parameters=2),
expression_spec=template,
binary_operators=["+", "*", "-", "/"],
unary_operators=["sin"],
maxsize=10,
)
model.fit(X, y, category=category)
model.fit(X_with_category, y)

# Predicting on new data:
# model.predict(X_test, category=category_test)
# Predicting on new data
# model.predict(X_test_with_category)
```

See [Expression Specifications](/api/#expression-specifications) for more details.

You can also use `TemplateExpressionSpec` in the same way, passing
the category as a column of `X`:

```python
spec = TemplateExpressionSpec(
expressions=["f", "g"],
variable_names=["x1", "x2", "class"],
parameters={"p1": 3, "p2": 3},
combine="p1[class] * sin(f(x1, x2)) + p2[class]",
)
```

this column will automatically be converted to integers.
You can use this approach for more complex cases,
where you have multiple expressions in the template and parameters that vary by category.


## 12. Using TensorBoard for Logging
Expand Down
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,5 @@ dependencies:
- scikit-learn>=1.0.0,<2.0.0
- pyjuliacall>=0.9.22,<0.9.23
- click>=7.0.0,<9.0.0
- beartype>=0.19,<0.22
- typing-extensions>=4.0.0,<5.0.0
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@ dependencies = [
"juliacall==0.9.24",
"click>=7.0.0,<9.0.0",
"setuptools>=50.0.0",
"beartype>=0.19,<0.22",
"typing-extensions>=4.0.0,<5.0.0",
]

[project.optional-dependencies]
dev = [
"beartype>=0.19,<0.21",
"coverage>=7,<8",
"coveralls>=4,<5",
"ipykernel>=6,<7",
Expand Down
35 changes: 35 additions & 0 deletions pysr/expression_specs.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from __future__ import annotations

import copy
import textwrap
import warnings
from abc import ABC, abstractmethod
from textwrap import dedent
from typing import TYPE_CHECKING, Any, NewType, overload
Expand All @@ -11,6 +13,7 @@
from .export import add_export_formats
from .julia_helpers import jl_array
from .julia_import import AnyValue, SymbolicRegression, jl
from .utils import ArrayLike

try:
from typing import TypeAlias
Expand Down Expand Up @@ -319,9 +322,41 @@ def create_exports(
return _search_output_to_callable_expressions(equations, search_output, i)


def parametric_expression_deprecation_warning(
max_parameters: int, variable_names: ArrayLike[str]
):
function_name = "f"
var_names = list(variable_names)
message = dedent(
f"""
ParametricExpressionSpec is deprecated – you should switch to TemplateExpressionSpec
with explicit parameters indexed by category.

Since you have `max_parameters={max_parameters}` and
`variable_names=[{", ".join(f'"{v}"' for v in var_names)}]`, you could migrate like this:

n_categories = len(np.unique(category)) # count the number of parameters required
expression_spec = TemplateExpressionSpec(
expressions=["{function_name}"],
variable_names=[{", ".join(f'"{v}"' for v in var_names + ["category"])}],
parameters={{{", ".join(f'"p{i+1}": n_categories' for i in range(max_parameters))}}},
combine="{function_name}({', '.join(var_names + [f'p{i+1}[category]' for i in range(max_parameters)])})",
)
X = np.column_stack([X, category]) # add the category column

Finally, do not pass `category` when calling .fit().
"""
).strip()
wrapped = "\n".join(textwrap.fill(line, 88) for line in message.splitlines())
warnings.warn(wrapped, FutureWarning, stacklevel=3)


class ParametricExpressionSpec(AbstractExpressionSpec):
"""Spec for parametric expressions that vary by category.

**This is deprecated in favor of the `TemplateExpressionSpec` class,
which now supports parameters indexed by category.**

This class allows you to specify expressions with parameters that vary across different
categories in your dataset. The expression structure remains the same, but parameters
are optimized separately for each category.
Expand Down
9 changes: 8 additions & 1 deletion pysr/sr.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,11 @@
from io import StringIO
from multiprocessing import cpu_count
from pathlib import Path
from typing import Any, List, Literal, Tuple, Union, cast
from typing import Any, Literal, Tuple, Union, cast

import numpy as np
import pandas as pd
from beartype.typing import List
from numpy import ndarray
from numpy.typing import NDArray
from sklearn.base import BaseEstimator, MultiOutputMixin, RegressorMixin
Expand All @@ -39,6 +40,7 @@
AbstractExpressionSpec,
ExpressionSpec,
ParametricExpressionSpec,
parametric_expression_deprecation_warning,
)
from .feature_selection import run_feature_selection
from .julia_extensions import load_required_packages
Expand Down Expand Up @@ -2251,6 +2253,11 @@ def fit(
random_state = check_random_state(self.random_state) # For np random
seed = cast(int, random_state.randint(0, 2**31 - 1)) # For julia random

if isinstance(self.expression_spec, ParametricExpressionSpec):
parametric_expression_deprecation_warning(
self.expression_spec.max_parameters, variable_names
)

# Pre transformations (feature selection and denoising)
X, y, variable_names, complexity_of_variables, X_units, y_units = (
self._pre_transform_training_data(
Expand Down
21 changes: 21 additions & 0 deletions pysr/test/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import pickle as pkl
import platform
import re
import tempfile
import traceback
import unittest
Expand Down Expand Up @@ -32,6 +33,7 @@
)
from pysr.export_latex import sympy2latex
from pysr.export_sympy import pysr2sympy
from pysr.expression_specs import parametric_expression_deprecation_warning
from pysr.feature_selection import _handle_feature_selection, run_feature_selection
from pysr.julia_helpers import init_julia
from pysr.sr import (
Expand Down Expand Up @@ -1034,6 +1036,25 @@ def test_param_groupings(self):
# Check the sets are equal:
self.assertSetEqual(set(params), set(regressor_params))

def test_parametric_deprecation_warning(self):
"""Test that the helpful warning message is displayed."""
pattern = re.compile(
r"ParametricExpressionSpec is deprecated.*TemplateExpressionSpec.*"
r"max_parameters=2.*"
r"variable_names=\[\"alpha\", \"beta\"\].*"
r"expressions=\[\"f\"\].*"
r"variable_names=\[\"alpha\", \"beta\", \"category\"\].*"
r"parameters=\{\s*\"p1\": n_categories,\s*\"p2\": n_categories\s*\}.*"
r"combine=\"f\(alpha, beta, p1\[category\], p2\[category\]\)\"",
flags=re.S,
)

with self.assertWarnsRegex(FutureWarning, pattern):
parametric_expression_deprecation_warning(
max_parameters=2,
variable_names=["alpha", "beta"],
)

def test_load_all_packages(self):
"""Test we can load all packages at once."""
load_all_packages()
Expand Down
3 changes: 2 additions & 1 deletion pysr/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
import inspect
import re
from pathlib import Path
from typing import Any, List, TypeVar, Union
from typing import Any, TypeVar, Union

from beartype.typing import List
from numpy import ndarray
from sklearn.utils.validation import _check_feature_names_in # type: ignore

Expand Down
Loading