Skip to content

feat: add quoting support to to_dict (#1052) (#1053) #1061

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions torchx/util/test/types_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,11 @@ def test_to_dict(self) -> None:
to_dict("key1=value1,,foo=bar"),
)

self.assertDictEqual(
{"FOO": "value with = and , and ;"},
to_dict('FOO="value with = and , and ;"'),
)

def test_to_dict_malformed_literal(self) -> None:
for malformed in ["FOO", "FOO,", "FOO;", "FOO=", "FOO=;BAR=v1"]:
with self.subTest(malformed=malformed):
Expand Down
28 changes: 27 additions & 1 deletion torchx/util/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
# pyre-strict

import inspect
import re
from typing import Any, Callable, Dict, List, Optional, Tuple, Type, TypeVar, Union

import typing_inspect
Expand All @@ -31,6 +32,9 @@ def to_dict(arg: str) -> Dict[str, str]:
When values are lists, the last delimiter is used as kv-pair delimiter
(e.g. ``FOO=v1,v2,BAR=v3``). Empty values of ``arg`` returns an empty map.

Values can be quoted with single or double quotes to include special characters
(``"="``, ``","``, ``";"``) without them being interpreted as separators.

Note that values that encode list literals are returned as list literals
NOT actual lists. The caller must further process each value in the returned
map, to cast/decode the value literals as specific types. In this case,
Expand All @@ -57,6 +61,7 @@ def to_dict(arg: str) -> Dict[str, str]:
to_dict("FOO=v1;v2,BAR=v3") == {"FOO": "v1;v2", "BAR": "v3"}
to_dict("FOO=v1;v2;BAR=v3") == {"FOO": "v1;v2", "BAR": "v3"}

to_dict('FOO="value with = and , and ;"') == {"FOO": "value with = and , and ;"}
"""

def parse_val_key(vk: str) -> Tuple[str, str]:
Expand All @@ -74,19 +79,34 @@ def parse_val_key(vk: str) -> Tuple[str, str]:
return vk[0:idx].strip(), vk[idx + 1 :].strip()

def to_val(val: str) -> str:
if (val.startswith("'") and val.endswith("'")) or (
val.startswith('"') and val.endswith('"')
):
return val[1:-1]
return val if val != '""' and val != "''" else ""

arg_map: Dict[str, str] = {}

if not arg:
return arg_map

# find quoted values
quoted_pattern = r'([\'"])((?:\\.|(?!\1).)*?)\1'
quoted_values: List[str] = []

def replace_quoted(match):
quoted_values.append(match.group(0))
return f"__QUOTED_{len(quoted_values) - 1}__"

# replace quoted values with placeholders
processed_arg = re.sub(quoted_pattern, replace_quoted, arg)

# split cfgs
cfg_kv_delim = "="

# ["FOO", "v1;v2,BAR", v3, "BAZ", "v4,v5"]
split_arg = [
s.strip() for s in arg.split(cfg_kv_delim) if s.strip()
s.strip() for s in processed_arg.split(cfg_kv_delim) if s.strip()
] # remove empty
split_arg_len = len(split_arg)

Expand All @@ -98,10 +118,16 @@ def to_val(val: str) -> str:
# middle elements are value_{n}<delim>key_{n+1}
for vk in split_arg[1 : split_arg_len - 1]: # python deals with
val, key_next = parse_val_key(vk)
for i, quoted in enumerate(quoted_values):
val = val.replace(f"__QUOTED_{i}__", quoted)
arg_map[key] = to_val(val)
key = key_next

val = split_arg[-1] # last element is always a value
for i, quoted in enumerate(quoted_values):
val = val.replace(f"__QUOTED_{i}__", quoted)
arg_map[key] = to_val(val)

return arg_map


Expand Down
Loading