Skip to content

Commit abd071b

Browse files
authored
Merge pull request #920 from MilesCranmer/deprecate-parametric-expression
Deprecate ParametricExpressionSpec in favor of TemplateExpressionSpec
2 parents 94adb6c + 7d75522 commit abd071b

7 files changed

Lines changed: 108 additions & 41 deletions

File tree

docs/examples.md

Lines changed: 40 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -560,37 +560,25 @@ model = PySRRegressor(
560560
model.fit(X, y)
561561
```
562562

563-
You can also use parameters in your template expressions, which will be optimized during the search:
564-
565-
```python
566-
template = TemplateExpressionSpec(
567-
expressions=["f", "g"],
568-
variable_names=["x1", "x2", "x3"],
569-
parameters={"p1": 2, "p2": 1}, # p1 has length 2, p2 has length 1
570-
combine="p1[1] * sin(f(x1, x2)) + p1[2] * g(x3) + p2[1]",
571-
)
572-
```
573-
574-
This will learn an equation of the form:
575-
576-
$$ y = \alpha_1 \sin(f(x_1, x_2)) + \alpha_2 g(x_3) + \beta $$
577-
578-
where $\alpha_1, \alpha_2$ are stored in `p1` and $\beta$ is stored in `p2`. The parameters will be optimized during the search.
579-
580563
### Parametric Expressions
581564

582565
When your data has categories with shared equation structure but different parameters,
583-
you can use a `ParametricExpressionSpec`. Let's say we would like to learn the expression:
566+
you can use the `parameters` argument of `TemplateExpressionSpec` to specify learned category-specific parameters.
567+
568+
For example, let's say we want to learn an equation of the form:
584569

585570
$$ y = \alpha \sin(x_1) + \beta $$
586571

587-
for three different values of $\alpha$ and $\beta$.
572+
where $\alpha$ and $\beta$ are different for each category.
573+
574+
Further, let's say we have 3 categories,
575+
with $\alpha \in \{0.1, 1.5, -0.5\}$ and $\beta \in \{1.0, 2.0, 0.5\}$.
588576

589577
```python
590578
import numpy as np
591-
from pysr import PySRRegressor, ParametricExpressionSpec
579+
from pysr import PySRRegressor, TemplateExpressionSpec
592580

593-
# Create data with 3 categories
581+
# Create data with 2 features and 3 categories
594582
X = np.random.uniform(-3, 3, (1000, 2))
595583
category = np.random.randint(0, 3, 1000)
596584

@@ -603,34 +591,48 @@ y = np.array([
603591
scales[c] * np.sin(x1) + offsets[c]
604592
for x1, c in zip(X[:, 0], category)
605593
])
594+
```
595+
596+
Now, let's define our parametric expression:
606597

598+
```python
599+
template = TemplateExpressionSpec(
600+
expressions=["f"],
601+
variable_names=["x1", "x2", "category"],
602+
parameters={"p1": 3, "p2": 3}, # One parameter per category
603+
combine="f(x1, x2, p1[category], p2[category])"
604+
)
605+
```
606+
607+
Next, we pass the category as a _column_ in `X`
608+
corresponding to the index we defined in `variable_names`.
609+
610+
**Note that because Julia is 1-indexed, we need to add 1 to the category index.**
611+
612+
```python
613+
category_p_one = category + 1
614+
X_with_category = np.column_stack([X, category])
615+
```
616+
617+
Now, we can fit our model:
618+
619+
```python
607620
model = PySRRegressor(
608-
expression_spec=ParametricExpressionSpec(max_parameters=2),
621+
expression_spec=template,
609622
binary_operators=["+", "*", "-", "/"],
610623
unary_operators=["sin"],
611624
maxsize=10,
612625
)
613-
model.fit(X, y, category=category)
626+
model.fit(X_with_category, y)
614627

615-
# Predicting on new data:
616-
# model.predict(X_test, category=category_test)
628+
# Predicting on new data
629+
# model.predict(X_test_with_category)
617630
```
618631

619632
See [Expression Specifications](/api/#expression-specifications) for more details.
620633

621-
You can also use `TemplateExpressionSpec` in the same way, passing
622-
the category as a column of `X`:
623-
624-
```python
625-
spec = TemplateExpressionSpec(
626-
expressions=["f", "g"],
627-
variable_names=["x1", "x2", "class"],
628-
parameters={"p1": 3, "p2": 3},
629-
combine="p1[class] * sin(f(x1, x2)) + p2[class]",
630-
)
631-
```
632-
633-
this column will automatically be converted to integers.
634+
You can use this approach for more complex cases,
635+
where you have multiple expressions in the template and parameters that vary by category.
634636

635637

636638
## 12. Using TensorBoard for Logging

environment.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,5 @@ dependencies:
99
- scikit-learn>=1.0.0,<2.0.0
1010
- pyjuliacall>=0.9.22,<0.9.23
1111
- click>=7.0.0,<9.0.0
12+
- beartype>=0.19,<0.22
1213
- typing-extensions>=4.0.0,<5.0.0

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,12 @@ dependencies = [
2525
"juliacall>=0.9.24,<0.9.26",
2626
"click>=7.0.0,<9.0.0",
2727
"setuptools>=50.0.0",
28+
"beartype>=0.19,<0.22",
2829
"typing-extensions>=4.0.0,<5.0.0",
2930
]
3031

3132
[project.optional-dependencies]
3233
dev = [
33-
"beartype>=0.19,<0.21",
3434
"coverage>=7,<8",
3535
"coveralls>=4,<5",
3636
"ipykernel>=6,<7",

pysr/expression_specs.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
from __future__ import annotations
22

33
import copy
4+
import textwrap
5+
import warnings
46
from abc import ABC, abstractmethod
57
from textwrap import dedent
68
from typing import TYPE_CHECKING, Any, NewType, overload
@@ -11,6 +13,7 @@
1113
from .export import add_export_formats
1214
from .julia_helpers import jl_array
1315
from .julia_import import AnyValue, SymbolicRegression, jl
16+
from .utils import ArrayLike
1417

1518
try:
1619
from typing import TypeAlias
@@ -319,9 +322,41 @@ def create_exports(
319322
return _search_output_to_callable_expressions(equations, search_output, i)
320323

321324

325+
def parametric_expression_deprecation_warning(
326+
max_parameters: int, variable_names: ArrayLike[str]
327+
):
328+
function_name = "f"
329+
var_names = list(variable_names)
330+
message = dedent(
331+
f"""
332+
ParametricExpressionSpec is deprecated – you should switch to TemplateExpressionSpec
333+
with explicit parameters indexed by category.
334+
335+
Since you have `max_parameters={max_parameters}` and
336+
`variable_names=[{", ".join(f'"{v}"' for v in var_names)}]`, you could migrate like this:
337+
338+
n_categories = len(np.unique(category)) # count the number of parameters required
339+
expression_spec = TemplateExpressionSpec(
340+
expressions=["{function_name}"],
341+
variable_names=[{", ".join(f'"{v}"' for v in var_names + ["category"])}],
342+
parameters={{{", ".join(f'"p{i+1}": n_categories' for i in range(max_parameters))}}},
343+
combine="{function_name}({', '.join(var_names + [f'p{i+1}[category]' for i in range(max_parameters)])})",
344+
)
345+
X = np.column_stack([X, category]) # add the category column
346+
347+
Finally, do not pass `category` when calling .fit().
348+
"""
349+
).strip()
350+
wrapped = "\n".join(textwrap.fill(line, 88) for line in message.splitlines())
351+
warnings.warn(wrapped, FutureWarning, stacklevel=3)
352+
353+
322354
class ParametricExpressionSpec(AbstractExpressionSpec):
323355
"""Spec for parametric expressions that vary by category.
324356
357+
**This is deprecated in favor of the `TemplateExpressionSpec` class,
358+
which now supports parameters indexed by category.**
359+
325360
This class allows you to specify expressions with parameters that vary across different
326361
categories in your dataset. The expression structure remains the same, but parameters
327362
are optimized separately for each category.

pysr/sr.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,11 @@
1515
from io import StringIO
1616
from multiprocessing import cpu_count
1717
from pathlib import Path
18-
from typing import Any, List, Literal, Tuple, Union, cast
18+
from typing import Any, Literal, Tuple, Union, cast
1919

2020
import numpy as np
2121
import pandas as pd
22+
from beartype.typing import List
2223
from numpy import ndarray
2324
from numpy.typing import NDArray
2425
from sklearn.base import BaseEstimator, MultiOutputMixin, RegressorMixin
@@ -39,6 +40,7 @@
3940
AbstractExpressionSpec,
4041
ExpressionSpec,
4142
ParametricExpressionSpec,
43+
parametric_expression_deprecation_warning,
4244
)
4345
from .feature_selection import run_feature_selection
4446
from .julia_extensions import load_required_packages
@@ -2251,6 +2253,11 @@ def fit(
22512253
random_state = check_random_state(self.random_state) # For np random
22522254
seed = cast(int, random_state.randint(0, 2**31 - 1)) # For julia random
22532255

2256+
if isinstance(self.expression_spec, ParametricExpressionSpec):
2257+
parametric_expression_deprecation_warning(
2258+
self.expression_spec.max_parameters, variable_names
2259+
)
2260+
22542261
# Pre transformations (feature selection and denoising)
22552262
X, y, variable_names, complexity_of_variables, X_units, y_units = (
22562263
self._pre_transform_training_data(

pysr/test/test_main.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import os
44
import pickle as pkl
55
import platform
6+
import re
67
import tempfile
78
import traceback
89
import unittest
@@ -32,6 +33,7 @@
3233
)
3334
from pysr.export_latex import sympy2latex
3435
from pysr.export_sympy import pysr2sympy
36+
from pysr.expression_specs import parametric_expression_deprecation_warning
3537
from pysr.feature_selection import _handle_feature_selection, run_feature_selection
3638
from pysr.julia_helpers import init_julia
3739
from pysr.sr import (
@@ -1034,6 +1036,25 @@ def test_param_groupings(self):
10341036
# Check the sets are equal:
10351037
self.assertSetEqual(set(params), set(regressor_params))
10361038

1039+
def test_parametric_deprecation_warning(self):
1040+
"""Test that the helpful warning message is displayed."""
1041+
pattern = re.compile(
1042+
r"ParametricExpressionSpec is deprecated.*TemplateExpressionSpec.*"
1043+
r"max_parameters=2.*"
1044+
r"variable_names=\[\"alpha\", \"beta\"\].*"
1045+
r"expressions=\[\"f\"\].*"
1046+
r"variable_names=\[\"alpha\", \"beta\", \"category\"\].*"
1047+
r"parameters=\{\s*\"p1\": n_categories,\s*\"p2\": n_categories\s*\}.*"
1048+
r"combine=\"f\(alpha, beta, p1\[category\], p2\[category\]\)\"",
1049+
flags=re.S,
1050+
)
1051+
1052+
with self.assertWarnsRegex(FutureWarning, pattern):
1053+
parametric_expression_deprecation_warning(
1054+
max_parameters=2,
1055+
variable_names=["alpha", "beta"],
1056+
)
1057+
10371058
def test_load_all_packages(self):
10381059
"""Test we can load all packages at once."""
10391060
load_all_packages()

pysr/utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@
44
import inspect
55
import re
66
from pathlib import Path
7-
from typing import Any, List, TypeVar, Union
7+
from typing import Any, TypeVar, Union
88

9+
from beartype.typing import List
910
from numpy import ndarray
1011
from sklearn.utils.validation import _check_feature_names_in # type: ignore
1112

0 commit comments

Comments
 (0)