Skip to content

Commit 480e5a2

Browse files
committed
pipeline: implement and document restrictions on pipeline names
1 parent dc16ad7 commit 480e5a2

File tree

4 files changed

+69
-3
lines changed

4 files changed

+69
-3
lines changed

docs/guide/pipeline.rst

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,8 @@ These are arranged in a directed acyclic graph, consisting of:
108108
components and how to provide each component with the inputs it requires; see
109109
:ref:`pipeline-connections` for details.
110110

111-
Each node has a name that can be used to look up the node with
112-
:meth:`Pipeline.node` (or :meth:`PipelineBuilder.node`) and appears in
111+
Each node has a :ref:`name <pipeline-names>` that can be used to look up the
112+
node with :meth:`Pipeline.node` (or :meth:`PipelineBuilder.node`) and appears in
113113
serialization and logging situations. Names must be unique within a pipeline.
114114

115115
.. _pipeline-connections:
@@ -244,6 +244,12 @@ These component names replace the task-specific interfaces in pre-2025 LensKit;
244244
a ``Recommender`` is now just a pipeline with ``recommender`` and/or ``ranker``
245245
components.
246246

247+
.. note:: Limits on Names
248+
249+
Component names must consist of alphanumeric characters and the symbols
250+
``_-.@%!*?``, and may not begin with ``_``. Names beginning with ``_`` are
251+
reserved for LensKit internal use.
252+
247253
.. _pipeline-serialization:
248254

249255
Pipeline Serialization

src/lenskit/pipeline/_builder.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
fallback_on_none,
3939
is_component_class,
4040
)
41-
from .config import PipelineConfig, PipelineHook
41+
from .config import PipelineConfig, PipelineHook, check_name
4242
from .nodes import (
4343
ComponentConstructorNode,
4444
ComponentInstanceNode,
@@ -208,6 +208,7 @@ def create_input[T](self, name: str, *types: type[T] | UnionType | None) -> Node
208208
ValueError:
209209
a node with the specified ``name`` already exists.
210210
"""
211+
check_name(name, what="input")
211212
self._check_available_name(name)
212213

213214
rts: set[type[T | None]] = set()
@@ -234,6 +235,8 @@ def literal[T](self, value: T, *, name: str | None = None) -> LiteralNode[T]:
234235
if name is None:
235236
lit = config.PipelineLiteral.represent(value)
236237
name = str(uuid5(NAMESPACE_LITERAL_DATA, lit.model_dump_json()))
238+
else:
239+
check_name(name)
237240
node = LiteralNode(name, value, types=set([type(value)]))
238241
self._nodes[name] = node
239242
return node
@@ -300,6 +303,7 @@ def alias(self, alias: str, node: Node[Any] | str, *, replace: bool = False) ->
300303
ValueError:
301304
if the alias is already used as an alias or node name.
302305
"""
306+
check_name(alias, what="alias")
303307
node = self.node(node)
304308
if replace:
305309
if alias in self._nodes:
@@ -355,6 +359,7 @@ def add_component[CFG, T](
355359
"""
356360
from lenskit.training import Trainable
357361

362+
check_name(name, what="component")
358363
self._check_available_name(name)
359364

360365
if hasattr(comp, "train"):
@@ -417,6 +422,7 @@ def replace_component[CFG, T](
417422
if isinstance(name, Node):
418423
name = name.name
419424

425+
check_name(name, what="component")
420426
node = ComponentNode[T].create(name, comp, config)
421427
self._nodes[name] = node
422428

src/lenskit/pipeline/config.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
import base64
1515
import pickle
16+
import re
1617
import warnings
1718
from collections import OrderedDict
1819
from hashlib import sha256
@@ -31,6 +32,25 @@
3132
from .components import Component
3233
from .nodes import ComponentConstructorNode, ComponentInstanceNode, ComponentNode, InputNode
3334

35+
VALID_NAME = re.compile(r"^[\w.@%!*?-]+$", re.UNICODE)
36+
37+
38+
def check_name(name: str, *, what: str = "node", allow_reserved: bool = False):
39+
"""
40+
Check that a name is valid.
41+
42+
Raises:
43+
ValueError:
44+
If the specified name is not valid.
45+
"""
46+
47+
if not VALID_NAME.match(name):
48+
raise ValueError(f"invalid {what} name “{name}”")
49+
if name.startswith("_") and not allow_reserved:
50+
raise ValueError(
51+
f"invalid {what} name “{name}”, names beginning with “_” are reserved by LensKit"
52+
)
53+
3454

3555
class PipelineHook(BaseModel):
3656
"""

tests/pipeline/test_pipeline.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,40 @@ def test_alias():
101101
pipe.create_input("person", bytes)
102102

103103

104+
def test_invalid_input_name():
105+
pipe = PipelineBuilder()
106+
107+
with raises(ValueError, match=r"invalid input name.*reserved"):
108+
pipe.create_input("_user", int, str)
109+
110+
with raises(ValueError, match=r"invalid input name"):
111+
pipe.create_input("user 7", int, str)
112+
113+
114+
def test_invalid_component_name():
115+
pipe = PipelineBuilder()
116+
user = pipe.create_input("user", int, str)
117+
118+
def incr(x: int) -> int:
119+
return x + 1
120+
121+
with raises(ValueError, match=r"invalid component name.*reserved"):
122+
pipe.add_component("_incr", incr, x=user)
123+
124+
125+
def test_invalid_node_alias():
126+
pipe = PipelineBuilder()
127+
user = pipe.create_input("user", int, str)
128+
129+
def incr(x: int) -> int:
130+
return x + 1
131+
132+
inode = pipe.add_component("incr", incr, x=user)
133+
134+
with raises(ValueError, match=r"invalid alias.*reserved"):
135+
pipe.alias("_incr", inode)
136+
137+
104138
def test_component_type():
105139
pipe = PipelineBuilder()
106140
msg = pipe.create_input("msg", str)

0 commit comments

Comments
 (0)