Skip to content

Commit 83a2765

Browse files
authored
feat: add quoting support to to_dict (#1052) (#1053)
Differential Revision: D74185528 Pull Request resolved: #1061
1 parent fe90bf8 commit 83a2765

File tree

2 files changed

+32
-1
lines changed

2 files changed

+32
-1
lines changed

torchx/util/test/types_test.py

+5
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,11 @@ def test_to_dict(self) -> None:
197197
to_dict("key1=value1,,foo=bar"),
198198
)
199199

200+
self.assertDictEqual(
201+
{"FOO": "value with = and , and ;"},
202+
to_dict('FOO="value with = and , and ;"'),
203+
)
204+
200205
def test_to_dict_malformed_literal(self) -> None:
201206
for malformed in ["FOO", "FOO,", "FOO;", "FOO=", "FOO=;BAR=v1"]:
202207
with self.subTest(malformed=malformed):

torchx/util/types.py

+27-1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
# pyre-strict
88

99
import inspect
10+
import re
1011
from typing import Any, Callable, Dict, List, Optional, Tuple, Type, TypeVar, Union
1112

1213
import typing_inspect
@@ -31,6 +32,9 @@ def to_dict(arg: str) -> Dict[str, str]:
3132
When values are lists, the last delimiter is used as kv-pair delimiter
3233
(e.g. ``FOO=v1,v2,BAR=v3``). Empty values of ``arg`` returns an empty map.
3334
35+
Values can be quoted with single or double quotes to include special characters
36+
(``"="``, ``","``, ``";"``) without them being interpreted as separators.
37+
3438
Note that values that encode list literals are returned as list literals
3539
NOT actual lists. The caller must further process each value in the returned
3640
map, to cast/decode the value literals as specific types. In this case,
@@ -57,6 +61,7 @@ def to_dict(arg: str) -> Dict[str, str]:
5761
to_dict("FOO=v1;v2,BAR=v3") == {"FOO": "v1;v2", "BAR": "v3"}
5862
to_dict("FOO=v1;v2;BAR=v3") == {"FOO": "v1;v2", "BAR": "v3"}
5963
64+
to_dict('FOO="value with = and , and ;"') == {"FOO": "value with = and , and ;"}
6065
"""
6166

6267
def parse_val_key(vk: str) -> Tuple[str, str]:
@@ -74,19 +79,34 @@ def parse_val_key(vk: str) -> Tuple[str, str]:
7479
return vk[0:idx].strip(), vk[idx + 1 :].strip()
7580

7681
def to_val(val: str) -> str:
82+
if (val.startswith("'") and val.endswith("'")) or (
83+
val.startswith('"') and val.endswith('"')
84+
):
85+
return val[1:-1]
7786
return val if val != '""' and val != "''" else ""
7887

7988
arg_map: Dict[str, str] = {}
8089

8190
if not arg:
8291
return arg_map
8392

93+
# find quoted values
94+
quoted_pattern = r'([\'"])((?:\\.|(?!\1).)*?)\1'
95+
quoted_values: List[str] = []
96+
97+
def replace_quoted(match):
98+
quoted_values.append(match.group(0))
99+
return f"__QUOTED_{len(quoted_values) - 1}__"
100+
101+
# replace quoted values with placeholders
102+
processed_arg = re.sub(quoted_pattern, replace_quoted, arg)
103+
84104
# split cfgs
85105
cfg_kv_delim = "="
86106

87107
# ["FOO", "v1;v2,BAR", v3, "BAZ", "v4,v5"]
88108
split_arg = [
89-
s.strip() for s in arg.split(cfg_kv_delim) if s.strip()
109+
s.strip() for s in processed_arg.split(cfg_kv_delim) if s.strip()
90110
] # remove empty
91111
split_arg_len = len(split_arg)
92112

@@ -98,10 +118,16 @@ def to_val(val: str) -> str:
98118
# middle elements are value_{n}<delim>key_{n+1}
99119
for vk in split_arg[1 : split_arg_len - 1]: # python deals with
100120
val, key_next = parse_val_key(vk)
121+
for i, quoted in enumerate(quoted_values):
122+
val = val.replace(f"__QUOTED_{i}__", quoted)
101123
arg_map[key] = to_val(val)
102124
key = key_next
125+
103126
val = split_arg[-1] # last element is always a value
127+
for i, quoted in enumerate(quoted_values):
128+
val = val.replace(f"__QUOTED_{i}__", quoted)
104129
arg_map[key] = to_val(val)
130+
105131
return arg_map
106132

107133

0 commit comments

Comments
 (0)