@@ -389,6 +389,16 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
389389 the innermost `AbstractExpressionNode`. This is useful
390390 for specifying custom loss functions on `TemplateExpressionSpec`.
391391 Default is `None`.
392+ loss_scale : Literal["log", "linear"]
393+ Determines how loss values are scaled when computing scores.
394+ "log" (default) uses logarithmic scaling of loss ratios; this mode
395+ requires non-negative loss values and is ideal for traditional
396+ loss functions that are always non-negative.
397+ "linear" uses direct
398+ differences between losses; this mode handles any loss values
399+ (including negative) and is useful for custom loss functions,
400+ especially those based on likelihoods.
401+ Default is "log".
392402 complexity_of_operators : dict[str, int | float]
393403 If you would like to use a complexity other than 1 for an
394404 operator, specify the complexity here. For example,
@@ -817,6 +827,7 @@ def __init__(
817827 elementwise_loss : str | None = None ,
818828 loss_function : str | None = None ,
819829 loss_function_expression : str | None = None ,
830+ loss_scale : Literal ["log" , "linear" ] = "log" ,
820831 complexity_of_operators : dict [str , int | float ] | None = None ,
821832 complexity_of_constants : int | float | None = None ,
822833 complexity_of_variables : int | float | list [int | float ] | None = None ,
@@ -924,6 +935,7 @@ def __init__(
924935 self .elementwise_loss = elementwise_loss
925936 self .loss_function = loss_function
926937 self .loss_function_expression = loss_function_expression
938+ self .loss_scale = loss_scale
927939 self .complexity_of_operators = complexity_of_operators
928940 self .complexity_of_constants = complexity_of_constants
929941 self .complexity_of_variables = complexity_of_variables
@@ -1203,7 +1215,11 @@ def __repr__(self) -> str:
12031215 repr_equations = pd .DataFrame (
12041216 dict (
12051217 pick = selected ,
1206- score = equations ["score" ],
1218+ ** (
1219+ {"score" : equations ["score" ]}
1220+ if "score" in equations .columns
1221+ else {}
1222+ ),
12071223 equation = equations ["equation" ],
12081224 loss = equations ["loss" ],
12091225 complexity = equations ["complexity" ],
@@ -1993,6 +2009,7 @@ def _run(
19932009 elementwise_loss = custom_loss ,
19942010 loss_function = custom_full_objective ,
19952011 loss_function_expression = custom_loss_expression ,
2012+ loss_scale = jl .Symbol (self .loss_scale ),
19962013 maxsize = int (self .maxsize ),
19972014 output_directory = _escape_filename (self .output_directory_ ),
19982015 npopulations = int (self .populations ),
@@ -2644,7 +2661,7 @@ def get_hof(self, search_output=None) -> pd.DataFrame | list[pd.DataFrame]:
26442661 pd .concat (
26452662 [
26462663 output ,
2647- calculate_scores (output ),
2664+ * ([ calculate_scores (output )] if self . loss_scale == "log" else [] ),
26482665 self .expression_spec_ .create_exports (
26492666 self , output , search_output , i if self .nout_ > 1 else None
26502667 ),
@@ -2720,6 +2737,10 @@ def latex_table(
27202737
27212738def idx_model_selection (equations : pd .DataFrame , model_selection : str ):
27222739 """Select an expression and return its index."""
2740+
2741+ # We must default to "accuracy" if no score column is present (like in the case of linear loss_scale)
2742+ model_selection = model_selection if "score" in equations .columns else "accuracy"
2743+
27232744 if model_selection == "accuracy" :
27242745 chosen_idx = equations ["loss" ].idxmin ()
27252746 elif model_selection == "best" :
0 commit comments