diff --git a/formulaic/materializers/base.py b/formulaic/materializers/base.py index a717f771..9a097eb5 100644 --- a/formulaic/materializers/base.py +++ b/formulaic/materializers/base.py @@ -50,7 +50,6 @@ if TYPE_CHECKING: # pragma: no cover from formulaic import FormulaSpec, ModelSpec, ModelSpecs - EncodedTermStructure = namedtuple( "EncodedTermStructure", ("term", "scoped_terms", "columns") ) @@ -590,7 +589,12 @@ def _evaluate_factor( return self.factor_cache[factor.expr] def _lookup(self, name: str) -> Tuple[Any, Set[Variable]]: - values, layer = self.layered_context.get_with_layer_name(name) + sentinel = object() + values, layer = self.layered_context.get_with_layer_name(name, default=sentinel) + if values is sentinel: + raise NameError( + f"`{name}` is not present in the dataset or evaluation context." + ) return values, {Variable(name, roles=("value",), source=layer)} def _evaluate( diff --git a/tests/materializers/test_pandas.py b/tests/materializers/test_pandas.py index 2c6de395..889616e8 100644 --- a/tests/materializers/test_pandas.py +++ b/tests/materializers/test_pandas.py @@ -455,3 +455,20 @@ def test_quoted_python_args(self): assert mm.shape == (3, 2) assert len(mm.model_spec.structure) == 2 assert numpy.all(mm.values == numpy.array([[1, 1], [1, 4], [1, 9]])) + + def test_lookup_nonexistent_variable(self): + data = pandas.DataFrame({}) + with pytest.raises( + FactorEvaluationError, + match=re.escape( + "Unable to evaluate factor `a`. [NameError: `a` is not present in the dataset or evaluation context.]" + ), + ): + PandasMaterializer(data).get_model_matrix("a") + with pytest.raises( + FactorEvaluationError, + match=re.escape( + "Unable to evaluate factor `I(a)`. [NameError: name 'a' is not defined]" + ), + ): + PandasMaterializer(data).get_model_matrix("I(a)")