Skip to content

Commit 005c8f2

Browse files
authored
feat: add --allow-unicode flag for Unicode character handling (#207)
* feat: allow unicode characters in config generator * docs: add documentation for Unicode support * docs: revert formatting in README.md
1 parent ab26462 commit 005c8f2

File tree

7 files changed

+408
-17
lines changed

7 files changed

+408
-17
lines changed

README.md

Lines changed: 69 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,12 @@ Idea came from puppet's hiera.
4646
- [Vault](#vault)
4747
- [Merge with Terraform remote state](#merge-with-terraform-remote-state)
4848
- [Merge with env variables](#merge-with-env-variables)
49+
- [Unicode Support](#unicode-support)
4950
- [himl config merger](#himl-config-merger)
51+
- [Output filtering](#output-filtering)
5052
- [Extra merger features](#extra-merger-features)
53+
- [Custom merge strategy](#custom-merge-strategy)
54+
- [Development](#development)
5155

5256
## Installation
5357

@@ -151,8 +155,9 @@ usage: himl [-h] [--output-file OUTPUT_FILE] [--format OUTPUT_FORMAT]
151155
[--filter FILTER] [--exclude EXCLUDE]
152156
[--skip-interpolation-validation]
153157
[--skip-interpolation-resolving] [--enclosing-key ENCLOSING_KEY]
154-
[--cwd CWD]
158+
[--cwd CWD] [--multi-line-string]
155159
[--list-merge-strategy {append,override,prepend,append_unique}]
160+
[--allow-unicode]
156161
path
157162
```
158163

@@ -296,6 +301,63 @@ endpoint: "{{outputs.cluster_composition.output.value.redis_endpoint}}"
296301
kubeconfig_location: "{{env(KUBECONFIG)}}"
297302
```
298303

304+
### Unicode Support
305+
306+
himl supports Unicode characters in configuration files, allowing you to use international languages, special characters, and emoji in your YAML configs.
307+
308+
By default, Unicode characters are escaped in the output to ensure compatibility. You can preserve Unicode characters in their original form using the `--allow-unicode` flag.
309+
310+
**Using the CLI:**
311+
```sh
312+
# With Unicode escaping (default)
313+
himl examples/simple/production --output-file config.yaml
314+
315+
# Preserving Unicode characters
316+
himl examples/simple/production --output-file config.yaml --allow-unicode
317+
```
318+
319+
**Using the Python module:**
320+
```py
321+
from himl import ConfigProcessor
322+
323+
config_processor = ConfigProcessor()
324+
path = "examples/simple/production"
325+
326+
# Process with Unicode preservation
327+
config = config_processor.process(
328+
path=path,
329+
output_format="yaml",
330+
allow_unicode=True, # Preserve Unicode characters
331+
print_data=True
332+
)
333+
```
334+
335+
**Example with Unicode content:**
336+
337+
`config/default.yaml`:
338+
```yaml
339+
service:
340+
name: "My Service"
341+
description: "Multi-language support: English, 中文, العربية, Русский"
342+
343+
messages:
344+
welcome:
345+
en: "Welcome"
346+
zh: "欢迎"
347+
ar: "مرحبا"
348+
ru: "Добро пожаловать"
349+
350+
team:
351+
- name: "José García"
352+
role: "Developer"
353+
- name: "田中太郎"
354+
role: "Designer"
355+
```
356+
357+
When processed with `--allow-unicode`, the output preserves all Unicode characters. Without the flag, non-ASCII characters are escaped (e.g., `\u4e2d\u6587` for Chinese characters).
358+
359+
**Note:** Some emoji and 4-byte UTF-8 characters may be escaped by the YAML library even with `--allow-unicode` enabled.
360+
299361

300362
## himl-config-merger
301363

@@ -394,6 +456,12 @@ Build the output with filtering:
394456
himl-config-merger examples/filters --output-dir merged_output --levels env region cluster --leaf-directories cluster --filter-rules-key _filters
395457
```
396458

459+
The `himl-config-merger` command also supports the `--allow-unicode` flag for preserving Unicode characters in the merged output files:
460+
461+
```sh
462+
himl-config-merger examples/complex --output-dir merged_output --levels env region cluster --leaf-directories cluster --allow-unicode
463+
```
464+
397465
```yaml
398466
# output after filtering
399467
env: dev

himl/config_generator.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ def process(self, cwd=None,
4242
skip_interpolation_validation=False,
4343
skip_secrets=False,
4444
multi_line_string=False,
45+
allow_unicode=False,
4546
type_strategies=[(list, ["append_unique"]), (dict, ["merge"])],
4647
fallback_strategies=["override"],
4748
type_conflict_strategies=["override"]):
@@ -53,7 +54,7 @@ def process(self, cwd=None,
5354
cwd = cwd or os.getcwd()
5455

5556
generator = self._create_and_initialize_generator(
56-
cwd, path, multi_line_string, type_strategies, fallback_strategies, type_conflict_strategies)
57+
cwd, path, multi_line_string, allow_unicode, type_strategies, fallback_strategies, type_conflict_strategies)
5758

5859
# Process data exclusions and interpolations
5960
self._process_exclusions(generator, exclude_keys)
@@ -73,10 +74,10 @@ def _should_skip_interpolation_validation(self, skip_interpolations, skip_secret
7374
"""Determine if interpolation validation should be skipped."""
7475
return skip_interpolation_validation or skip_interpolations or skip_secrets
7576

76-
def _create_and_initialize_generator(self, cwd, path, multi_line_string, type_strategies,
77+
def _create_and_initialize_generator(self, cwd, path, multi_line_string, allow_unicode, type_strategies,
7778
fallback_strategies, type_conflict_strategies):
7879
"""Create and initialize the ConfigGenerator."""
79-
generator = ConfigGenerator(cwd, path, multi_line_string, type_strategies, fallback_strategies,
80+
generator = ConfigGenerator(cwd, path, multi_line_string, allow_unicode, type_strategies, fallback_strategies,
8081
type_conflict_strategies)
8182
generator.generate_hierarchy()
8283
generator.process_hierarchy()
@@ -179,12 +180,13 @@ class ConfigGenerator(object):
179180
will contain merged data on each layer.
180181
"""
181182

182-
def __init__(self, cwd, path, multi_line_string, type_strategies, fallback_strategies, type_conflict_strategies):
183+
def __init__(self, cwd, path, multi_line_string, allow_unicode, type_strategies, fallback_strategies, type_conflict_strategies):
183184
self.cwd = cwd
184185
self.path = path
185186
self.hierarchy = self.generate_hierarchy()
186187
self.generated_data = OrderedDict()
187188
self.interpolation_validator = InterpolationValidator()
189+
self.allow_unicode = allow_unicode
188190
self.type_strategies = type_strategies
189191
self.fallback_strategies = fallback_strategies
190192
self.type_conflict_strategies = type_conflict_strategies
@@ -338,7 +340,7 @@ def get_values_from_dir_path(self):
338340

339341
def output_yaml_data(self, data):
340342
return yaml.dump(data, Dumper=ConfigGenerator.yaml_dumper(), default_flow_style=False, width=200,
341-
sort_keys=False)
343+
sort_keys=False, allow_unicode=self.allow_unicode)
342344

343345
def yaml_to_json(self, yaml_data):
344346
return json.dumps(yaml.load(yaml_data, Loader=yaml.SafeLoader), indent=4)

himl/config_merger.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,18 +89,19 @@ def __traverse_path(self, path: str, yaml_dict: dict):
8989
Loader.add_constructor('!include', Loader.include)
9090

9191

92-
def merge_configs(directories, levels, output_dir, enable_parallel, filter_rules):
92+
def merge_configs(directories, levels, output_dir, enable_parallel, filter_rules, allow_unicode):
9393
"""
9494
Method for running the merge configuration logic under different formats
9595
:param directories: list of paths for leaf directories
9696
:param levels: list of hierarchy levels to traverse
9797
:param output_dir: where to save the generated configs
9898
:param enable_parallel: to enable parallel config generation
99+
:param allow_unicode: allow unicode characters in output
99100
"""
100101
config_processor = ConfigProcessor()
101102
process_config = []
102103
for path in directories:
103-
process_config.append((config_processor, path, levels, output_dir, filter_rules))
104+
process_config.append((config_processor, path, levels, output_dir, filter_rules, allow_unicode))
104105

105106
if enable_parallel:
106107
logger.info("Processing config in parallel")
@@ -121,6 +122,7 @@ def merge_logic(process_params):
121122
levels = process_params[2]
122123
output_dir = process_params[3]
123124
filter_rules = process_params[4]
125+
allow_unicode = process_params[5]
124126

125127
# load the !include tag
126128
Loader.add_constructor('!include', Loader.include)
@@ -153,7 +155,7 @@ def merge_logic(process_params):
153155
logger.info("Found input config directory: %s", path)
154156
logger.info("Storing generated config to: %s", filename)
155157
with open(filename, "w+") as f:
156-
f.write(yaml.dump(output))
158+
f.write(yaml.dump(output, allow_unicode=allow_unicode))
157159

158160

159161
def is_leaf_directory(dir, leaf_directories):
@@ -203,6 +205,8 @@ def get_parser():
203205
action='store_true', help='Process config using multiprocessing')
204206
parser.add_argument('--filter-rules-key', dest='filter_rules', default=None, type=str,
205207
help='keep these keys from the generated data, based on the configured filter key')
208+
parser.add_argument('--allow-unicode', dest='allow_unicode', default=False,
209+
action='store_true', help='allow unicode characters in output (default: False, outputs escape sequences)')
206210
return parser
207211

208212

@@ -219,4 +223,4 @@ def run(args=None):
219223

220224
# merge the configs using HIML
221225
merge_configs(dirs, opts.hierarchy_levels,
222-
opts.output_dir, opts.enable_parallel, opts.filter_rules)
226+
opts.output_dir, opts.enable_parallel, opts.filter_rules, opts.allow_unicode)

himl/main.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def do_run(self, opts):
4343
config_processor.process(cwd, opts.path, filters, excluded_keys, opts.enclosing_key,
4444
opts.remove_enclosing_key, opts.output_format, opts.print_data, opts.output_file,
4545
opts.skip_interpolation_resolving, opts.skip_interpolation_validation,
46-
opts.skip_secrets, opts.multi_line_string,
46+
opts.skip_secrets, opts.multi_line_string, opts.allow_unicode,
4747
type_strategies=[(list, [opts.merge_list_strategy.value]), (dict, ["merge"])])
4848

4949
@staticmethod
@@ -79,6 +79,8 @@ def get_parser(parser=None):
7979
parser.add_argument('--list-merge-strategy', dest='merge_list_strategy', type=ListMergeStrategy,
8080
choices=list(ListMergeStrategy), default='append_unique',
8181
help='override default merge strategy for list')
82+
parser.add_argument('--allow-unicode', dest='allow_unicode', action='store_true', default=False,
83+
help='allow unicode characters in output (default: False, outputs escape sequences)')
8284
parser.add_argument('--version', action='version', version='%(prog)s v{version}'.format(version="0.18.0"),
8385
help='print himl version')
8486
return parser

0 commit comments

Comments
 (0)