Skip to content

Commit a3465e5

Browse files
committed
feat: allow unicode characters in config generator
1 parent ab26462 commit a3465e5

File tree

6 files changed

+339
-16
lines changed

6 files changed

+339
-16
lines changed

himl/config_generator.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ def process(self, cwd=None,
4242
skip_interpolation_validation=False,
4343
skip_secrets=False,
4444
multi_line_string=False,
45+
allow_unicode=False,
4546
type_strategies=[(list, ["append_unique"]), (dict, ["merge"])],
4647
fallback_strategies=["override"],
4748
type_conflict_strategies=["override"]):
@@ -53,7 +54,7 @@ def process(self, cwd=None,
5354
cwd = cwd or os.getcwd()
5455

5556
generator = self._create_and_initialize_generator(
56-
cwd, path, multi_line_string, type_strategies, fallback_strategies, type_conflict_strategies)
57+
cwd, path, multi_line_string, allow_unicode, type_strategies, fallback_strategies, type_conflict_strategies)
5758

5859
# Process data exclusions and interpolations
5960
self._process_exclusions(generator, exclude_keys)
@@ -73,10 +74,10 @@ def _should_skip_interpolation_validation(self, skip_interpolations, skip_secret
7374
"""Determine if interpolation validation should be skipped."""
7475
return skip_interpolation_validation or skip_interpolations or skip_secrets
7576

76-
def _create_and_initialize_generator(self, cwd, path, multi_line_string, type_strategies,
77+
def _create_and_initialize_generator(self, cwd, path, multi_line_string, allow_unicode, type_strategies,
7778
fallback_strategies, type_conflict_strategies):
7879
"""Create and initialize the ConfigGenerator."""
79-
generator = ConfigGenerator(cwd, path, multi_line_string, type_strategies, fallback_strategies,
80+
generator = ConfigGenerator(cwd, path, multi_line_string, allow_unicode, type_strategies, fallback_strategies,
8081
type_conflict_strategies)
8182
generator.generate_hierarchy()
8283
generator.process_hierarchy()
@@ -179,12 +180,13 @@ class ConfigGenerator(object):
179180
will contain merged data on each layer.
180181
"""
181182

182-
def __init__(self, cwd, path, multi_line_string, type_strategies, fallback_strategies, type_conflict_strategies):
183+
def __init__(self, cwd, path, multi_line_string, allow_unicode, type_strategies, fallback_strategies, type_conflict_strategies):
183184
self.cwd = cwd
184185
self.path = path
185186
self.hierarchy = self.generate_hierarchy()
186187
self.generated_data = OrderedDict()
187188
self.interpolation_validator = InterpolationValidator()
189+
self.allow_unicode = allow_unicode
188190
self.type_strategies = type_strategies
189191
self.fallback_strategies = fallback_strategies
190192
self.type_conflict_strategies = type_conflict_strategies
@@ -338,7 +340,7 @@ def get_values_from_dir_path(self):
338340

339341
def output_yaml_data(self, data):
340342
return yaml.dump(data, Dumper=ConfigGenerator.yaml_dumper(), default_flow_style=False, width=200,
341-
sort_keys=False)
343+
sort_keys=False, allow_unicode=self.allow_unicode)
342344

343345
def yaml_to_json(self, yaml_data):
344346
return json.dumps(yaml.load(yaml_data, Loader=yaml.SafeLoader), indent=4)

himl/config_merger.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,18 +89,19 @@ def __traverse_path(self, path: str, yaml_dict: dict):
8989
Loader.add_constructor('!include', Loader.include)
9090

9191

92-
def merge_configs(directories, levels, output_dir, enable_parallel, filter_rules):
92+
def merge_configs(directories, levels, output_dir, enable_parallel, filter_rules, allow_unicode):
9393
"""
9494
Method for running the merge configuration logic under different formats
9595
:param directories: list of paths for leaf directories
9696
:param levels: list of hierarchy levels to traverse
9797
:param output_dir: where to save the generated configs
9898
:param enable_parallel: to enable parallel config generation
99+
:param allow_unicode: allow unicode characters in output
99100
"""
100101
config_processor = ConfigProcessor()
101102
process_config = []
102103
for path in directories:
103-
process_config.append((config_processor, path, levels, output_dir, filter_rules))
104+
process_config.append((config_processor, path, levels, output_dir, filter_rules, allow_unicode))
104105

105106
if enable_parallel:
106107
logger.info("Processing config in parallel")
@@ -121,6 +122,7 @@ def merge_logic(process_params):
121122
levels = process_params[2]
122123
output_dir = process_params[3]
123124
filter_rules = process_params[4]
125+
allow_unicode = process_params[5]
124126

125127
# load the !include tag
126128
Loader.add_constructor('!include', Loader.include)
@@ -153,7 +155,7 @@ def merge_logic(process_params):
153155
logger.info("Found input config directory: %s", path)
154156
logger.info("Storing generated config to: %s", filename)
155157
with open(filename, "w+") as f:
156-
f.write(yaml.dump(output))
158+
f.write(yaml.dump(output, allow_unicode=allow_unicode))
157159

158160

159161
def is_leaf_directory(dir, leaf_directories):
@@ -203,6 +205,8 @@ def get_parser():
203205
action='store_true', help='Process config using multiprocessing')
204206
parser.add_argument('--filter-rules-key', dest='filter_rules', default=None, type=str,
205207
help='keep these keys from the generated data, based on the configured filter key')
208+
parser.add_argument('--allow-unicode', dest='allow_unicode', default=False,
209+
action='store_true', help='allow unicode characters in output (default: False, outputs escape sequences)')
206210
return parser
207211

208212

@@ -219,4 +223,4 @@ def run(args=None):
219223

220224
# merge the configs using HIML
221225
merge_configs(dirs, opts.hierarchy_levels,
222-
opts.output_dir, opts.enable_parallel, opts.filter_rules)
226+
opts.output_dir, opts.enable_parallel, opts.filter_rules, opts.allow_unicode)

himl/main.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def do_run(self, opts):
4343
config_processor.process(cwd, opts.path, filters, excluded_keys, opts.enclosing_key,
4444
opts.remove_enclosing_key, opts.output_format, opts.print_data, opts.output_file,
4545
opts.skip_interpolation_resolving, opts.skip_interpolation_validation,
46-
opts.skip_secrets, opts.multi_line_string,
46+
opts.skip_secrets, opts.multi_line_string, opts.allow_unicode,
4747
type_strategies=[(list, [opts.merge_list_strategy.value]), (dict, ["merge"])])
4848

4949
@staticmethod
@@ -79,6 +79,8 @@ def get_parser(parser=None):
7979
parser.add_argument('--list-merge-strategy', dest='merge_list_strategy', type=ListMergeStrategy,
8080
choices=list(ListMergeStrategy), default='append_unique',
8181
help='override default merge strategy for list')
82+
parser.add_argument('--allow-unicode', dest='allow_unicode', action='store_true', default=False,
83+
help='allow unicode characters in output (default: False, outputs escape sequences)')
8284
parser.add_argument('--version', action='version', version='%(prog)s v{version}'.format(version="0.18.0"),
8385
help='print himl version')
8486
return parser

tests/test_config_generator.py

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,52 @@ def test_output_formats(self):
191191
)
192192
assert json_result == config_data
193193

194+
def test_unicode_processing_disabled(self):
195+
"""Test Unicode processing with allow_unicode=False"""
196+
config_data = {
197+
'message': 'Hello 世界',
198+
'emoji': '✨ sparkles',
199+
'accents': 'café'
200+
}
201+
self.create_test_yaml('unicode.yaml', config_data)
202+
203+
result = self.config_processor.process(
204+
cwd=self.temp_dir,
205+
path='unicode.yaml',
206+
allow_unicode=False,
207+
print_data=False
208+
)
209+
210+
# Data should be processed correctly regardless of Unicode settings
211+
assert result['message'] == 'Hello 世界'
212+
assert result['emoji'] == '✨ sparkles'
213+
assert result['accents'] == 'café'
214+
215+
def test_unicode_processing_enabled(self):
216+
"""Test Unicode processing with allow_unicode=True"""
217+
config_data = {
218+
'message': 'Hello 世界',
219+
'emoji': '✨ sparkles',
220+
'accents': 'café',
221+
'arabic': 'مرحبا',
222+
'cyrillic': 'Привет'
223+
}
224+
self.create_test_yaml('unicode.yaml', config_data)
225+
226+
result = self.config_processor.process(
227+
cwd=self.temp_dir,
228+
path='unicode.yaml',
229+
allow_unicode=True,
230+
print_data=False
231+
)
232+
233+
# Data should be processed correctly
234+
assert result['message'] == 'Hello 世界'
235+
assert result['emoji'] == '✨ sparkles'
236+
assert result['accents'] == 'café'
237+
assert result['arabic'] == 'مرحبا'
238+
assert result['cyrillic'] == 'Привет'
239+
194240

195241
class TestConfigGenerator:
196242
"""Test cases for ConfigGenerator class"""
@@ -218,6 +264,7 @@ def test_config_generator_initialization(self):
218264
cwd=self.temp_dir,
219265
path='test',
220266
multi_line_string=False,
267+
allow_unicode=False,
221268
type_strategies=[(list, ["append_unique"]), (dict, ["merge"])],
222269
fallback_strategies=["override"],
223270
type_conflict_strategies=["override"]
@@ -238,6 +285,7 @@ def test_hierarchy_generation(self):
238285
cwd=self.temp_dir,
239286
path='production',
240287
multi_line_string=False,
288+
allow_unicode=False,
241289
type_strategies=[(list, ["append_unique"]), (dict, ["merge"])],
242290
fallback_strategies=["override"],
243291
type_conflict_strategies=["override"]
@@ -256,6 +304,7 @@ def test_yaml_content_loading(self):
256304
cwd=self.temp_dir,
257305
path='test',
258306
multi_line_string=False,
307+
allow_unicode=False,
259308
type_strategies=[(list, ["append_unique"]), (dict, ["merge"])],
260309
fallback_strategies=["override"],
261310
type_conflict_strategies=["override"]
@@ -270,6 +319,7 @@ def test_yaml_merging(self):
270319
cwd=self.temp_dir,
271320
path='test',
272321
multi_line_string=False,
322+
allow_unicode=False,
273323
type_strategies=[(list, ["append_unique"]), (dict, ["merge"])],
274324
fallback_strategies=["override"],
275325
type_conflict_strategies=["override"]
@@ -298,6 +348,7 @@ def test_output_data_yaml(self):
298348
cwd=self.temp_dir,
299349
path='test',
300350
multi_line_string=False,
351+
allow_unicode=False,
301352
type_strategies=[(list, ["append_unique"]), (dict, ["merge"])],
302353
fallback_strategies=["override"],
303354
type_conflict_strategies=["override"]
@@ -316,6 +367,7 @@ def test_output_data_json(self):
316367
cwd=self.temp_dir,
317368
path='test',
318369
multi_line_string=False,
370+
allow_unicode=False,
319371
type_strategies=[(list, ["append_unique"]), (dict, ["merge"])],
320372
fallback_strategies=["override"],
321373
type_conflict_strategies=["override"]
@@ -335,6 +387,7 @@ def test_invalid_output_format(self):
335387
cwd=self.temp_dir,
336388
path='test',
337389
multi_line_string=False,
390+
allow_unicode=False,
338391
type_strategies=[(list, ["append_unique"]), (dict, ["merge"])],
339392
fallback_strategies=["override"],
340393
type_conflict_strategies=["override"]
@@ -353,6 +406,7 @@ def test_values_from_dir_path(self):
353406
cwd=self.temp_dir,
354407
path='env=production/region=us-east-1/cluster=web',
355408
multi_line_string=False,
409+
allow_unicode=False,
356410
type_strategies=[(list, ["append_unique"]), (dict, ["merge"])],
357411
fallback_strategies=["override"],
358412
type_conflict_strategies=["override"]
@@ -361,3 +415,88 @@ def test_values_from_dir_path(self):
361415
values = generator.get_values_from_dir_path()
362416
expected = {'env': 'production', 'region': 'us-east-1', 'cluster': 'web'}
363417
assert values == expected
418+
419+
def test_allow_unicode_false(self):
420+
"""Test that Unicode characters are escaped when allow_unicode=False"""
421+
generator = ConfigGenerator(
422+
cwd=self.temp_dir,
423+
path='test',
424+
multi_line_string=False,
425+
allow_unicode=False,
426+
type_strategies=[(list, ["append_unique"]), (dict, ["merge"])],
427+
fallback_strategies=["override"],
428+
type_conflict_strategies=["override"]
429+
)
430+
431+
test_data = {
432+
'greeting': 'Hello 世界',
433+
'emoji': '🚀 rocket',
434+
'special': 'café résumé naïve'
435+
}
436+
yaml_output = generator.output_yaml_data(test_data)
437+
438+
# When allow_unicode=False, Unicode should be escaped
439+
assert '\\u' in yaml_output or '\\x' in yaml_output or 'greeting: Hello' in yaml_output
440+
441+
def test_allow_unicode_true(self):
442+
"""Test that Unicode characters are preserved when allow_unicode=True"""
443+
generator = ConfigGenerator(
444+
cwd=self.temp_dir,
445+
path='test',
446+
multi_line_string=False,
447+
allow_unicode=True,
448+
type_strategies=[(list, ["append_unique"]), (dict, ["merge"])],
449+
fallback_strategies=["override"],
450+
type_conflict_strategies=["override"]
451+
)
452+
453+
test_data = {
454+
'greeting': 'Hello 世界',
455+
'emoji': '🚀 rocket',
456+
'special': 'café résumé naïve'
457+
}
458+
yaml_output = generator.output_yaml_data(test_data)
459+
460+
# When allow_unicode=True, most Unicode should be preserved
461+
# Note: PyYAML may still escape some 4-byte UTF-8 characters (emojis)
462+
assert '世界' in yaml_output # Chinese characters preserved
463+
assert 'café' in yaml_output # Accented characters preserved
464+
assert 'résumé' in yaml_output # Accented characters preserved
465+
assert 'naïve' in yaml_output # Accented characters preserved
466+
# Emoji might be escaped as \U0001F680 even with allow_unicode=True
467+
assert ('🚀' in yaml_output or '\\U0001F680' in yaml_output)
468+
469+
def test_unicode_in_nested_structures(self):
470+
"""Test Unicode handling in nested data structures"""
471+
generator = ConfigGenerator(
472+
cwd=self.temp_dir,
473+
path='test',
474+
multi_line_string=False,
475+
allow_unicode=True,
476+
type_strategies=[(list, ["append_unique"]), (dict, ["merge"])],
477+
fallback_strategies=["override"],
478+
type_conflict_strategies=["override"]
479+
)
480+
481+
test_data = {
482+
'users': [
483+
{'name': 'José García', 'country': 'España'},
484+
{'name': '田中太郎', 'country': '日本'},
485+
{'name': 'François Müller', 'country': 'France'}
486+
],
487+
'config': {
488+
'title': 'Configuration — Настройки',
489+
'description': 'Multi-language support: English, 中文, العربية, हिन्दी'
490+
}
491+
}
492+
yaml_output = generator.output_yaml_data(test_data)
493+
494+
# Verify Unicode characters are preserved (excluding 4-byte emoji which may be escaped)
495+
assert 'José García' in yaml_output
496+
assert '田中太郎' in yaml_output
497+
assert 'España' in yaml_output
498+
assert '日本' in yaml_output
499+
assert 'Настройки' in yaml_output
500+
assert '中文' in yaml_output
501+
assert 'العربية' in yaml_output
502+
assert 'हिन्दी' in yaml_output

0 commit comments

Comments
 (0)