Skip to content

Commit fb975d4

Browse files
committed
fix: don't count '=' inside string literals when parsing parameters
PythonTranslator.inspect treated any line with more than one '=' as unparseable, which incorrectly rejected parameter lines like s = "a=b" where the second '=' is inside a string literal. Count only top-level assignment operators using tokenize so the existing PARAMETER_PATTERN regex (which already handles '=' in values correctly) is given a chance to match. Closes #864
1 parent e4e4ddd commit fb975d4

2 files changed

Lines changed: 23 additions & 1 deletion

File tree

papermill/tests/test_translators.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,9 @@ def test_translate_comment_python(test_input, expected):
107107
Parameter("b", "float", "-2.3432", "My b variable"),
108108
],
109109
),
110+
# Regression test for #864: '=' inside string literals shouldn't trip parsing.
111+
('s = "a=b"', [Parameter("s", "None", '"a=b"', "")]),
112+
("s = 'a=b'", [Parameter("s", "None", "'a=b'", "")]),
110113
],
111114
)
112115
def test_inspect_python(test_input, expected):

papermill/translators.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,33 @@
1+
import io
12
import logging
23
import math
34
import re
45
import shlex
6+
import tokenize
57

68
from .exceptions import PapermillException
79
from .models import Parameter
810

911
logger = logging.getLogger(__name__)
1012

1113

14+
def _count_assignment_operators(line):
15+
"""Count top-level assignment operators in a Python source line.
16+
17+
Uses ``tokenize`` so that ``=`` characters appearing inside string
18+
literals (e.g. ``s = "a=b"``) are not counted as assignment
19+
operators. Falls back to a naive ``line.count('=')`` if tokenization
20+
fails (e.g. for incomplete multiline definitions).
21+
"""
22+
try:
23+
tokens = tokenize.tokenize(io.BytesIO(line.encode("utf-8")).readline)
24+
return sum(
25+
1 for tok in tokens if tok.type == tokenize.OP and tok.string == "="
26+
)
27+
except (tokenize.TokenError, SyntaxError):
28+
return line.count("=")
29+
30+
1231
class PapermillTranslators:
1332
'''
1433
The holder which houses any translator registered with the system.
@@ -242,7 +261,7 @@ def flatten_accumulator(accumulator):
242261
if len(line.strip()) == 0 or line.strip().startswith('#'):
243262
continue # Skip blank and comment
244263

245-
nequal = line.count("=")
264+
nequal = _count_assignment_operators(line)
246265
if nequal > 0:
247266
grouped_variable.append(flatten_accumulator(accumulator))
248267
accumulator = []

0 commit comments

Comments
 (0)