@@ -191,6 +191,52 @@ def test_output_formats(self):
191191 )
192192 assert json_result == config_data
193193
194+ def test_unicode_processing_disabled (self ):
195+ """Test Unicode processing with allow_unicode=False"""
196+ config_data = {
197+ 'message' : 'Hello 世界' ,
198+ 'emoji' : '✨ sparkles' ,
199+ 'accents' : 'café'
200+ }
201+ self .create_test_yaml ('unicode.yaml' , config_data )
202+
203+ result = self .config_processor .process (
204+ cwd = self .temp_dir ,
205+ path = 'unicode.yaml' ,
206+ allow_unicode = False ,
207+ print_data = False
208+ )
209+
210+ # Data should be processed correctly regardless of Unicode settings
211+ assert result ['message' ] == 'Hello 世界'
212+ assert result ['emoji' ] == '✨ sparkles'
213+ assert result ['accents' ] == 'café'
214+
215+ def test_unicode_processing_enabled (self ):
216+ """Test Unicode processing with allow_unicode=True"""
217+ config_data = {
218+ 'message' : 'Hello 世界' ,
219+ 'emoji' : '✨ sparkles' ,
220+ 'accents' : 'café' ,
221+ 'arabic' : 'مرحبا' ,
222+ 'cyrillic' : 'Привет'
223+ }
224+ self .create_test_yaml ('unicode.yaml' , config_data )
225+
226+ result = self .config_processor .process (
227+ cwd = self .temp_dir ,
228+ path = 'unicode.yaml' ,
229+ allow_unicode = True ,
230+ print_data = False
231+ )
232+
233+ # Data should be processed correctly
234+ assert result ['message' ] == 'Hello 世界'
235+ assert result ['emoji' ] == '✨ sparkles'
236+ assert result ['accents' ] == 'café'
237+ assert result ['arabic' ] == 'مرحبا'
238+ assert result ['cyrillic' ] == 'Привет'
239+
194240
195241class TestConfigGenerator :
196242 """Test cases for ConfigGenerator class"""
@@ -218,6 +264,7 @@ def test_config_generator_initialization(self):
218264 cwd = self .temp_dir ,
219265 path = 'test' ,
220266 multi_line_string = False ,
267+ allow_unicode = False ,
221268 type_strategies = [(list , ["append_unique" ]), (dict , ["merge" ])],
222269 fallback_strategies = ["override" ],
223270 type_conflict_strategies = ["override" ]
@@ -238,6 +285,7 @@ def test_hierarchy_generation(self):
238285 cwd = self .temp_dir ,
239286 path = 'production' ,
240287 multi_line_string = False ,
288+ allow_unicode = False ,
241289 type_strategies = [(list , ["append_unique" ]), (dict , ["merge" ])],
242290 fallback_strategies = ["override" ],
243291 type_conflict_strategies = ["override" ]
@@ -256,6 +304,7 @@ def test_yaml_content_loading(self):
256304 cwd = self .temp_dir ,
257305 path = 'test' ,
258306 multi_line_string = False ,
307+ allow_unicode = False ,
259308 type_strategies = [(list , ["append_unique" ]), (dict , ["merge" ])],
260309 fallback_strategies = ["override" ],
261310 type_conflict_strategies = ["override" ]
@@ -270,6 +319,7 @@ def test_yaml_merging(self):
270319 cwd = self .temp_dir ,
271320 path = 'test' ,
272321 multi_line_string = False ,
322+ allow_unicode = False ,
273323 type_strategies = [(list , ["append_unique" ]), (dict , ["merge" ])],
274324 fallback_strategies = ["override" ],
275325 type_conflict_strategies = ["override" ]
@@ -298,6 +348,7 @@ def test_output_data_yaml(self):
298348 cwd = self .temp_dir ,
299349 path = 'test' ,
300350 multi_line_string = False ,
351+ allow_unicode = False ,
301352 type_strategies = [(list , ["append_unique" ]), (dict , ["merge" ])],
302353 fallback_strategies = ["override" ],
303354 type_conflict_strategies = ["override" ]
@@ -316,6 +367,7 @@ def test_output_data_json(self):
316367 cwd = self .temp_dir ,
317368 path = 'test' ,
318369 multi_line_string = False ,
370+ allow_unicode = False ,
319371 type_strategies = [(list , ["append_unique" ]), (dict , ["merge" ])],
320372 fallback_strategies = ["override" ],
321373 type_conflict_strategies = ["override" ]
@@ -335,6 +387,7 @@ def test_invalid_output_format(self):
335387 cwd = self .temp_dir ,
336388 path = 'test' ,
337389 multi_line_string = False ,
390+ allow_unicode = False ,
338391 type_strategies = [(list , ["append_unique" ]), (dict , ["merge" ])],
339392 fallback_strategies = ["override" ],
340393 type_conflict_strategies = ["override" ]
@@ -353,6 +406,7 @@ def test_values_from_dir_path(self):
353406 cwd = self .temp_dir ,
354407 path = 'env=production/region=us-east-1/cluster=web' ,
355408 multi_line_string = False ,
409+ allow_unicode = False ,
356410 type_strategies = [(list , ["append_unique" ]), (dict , ["merge" ])],
357411 fallback_strategies = ["override" ],
358412 type_conflict_strategies = ["override" ]
@@ -361,3 +415,88 @@ def test_values_from_dir_path(self):
361415 values = generator .get_values_from_dir_path ()
362416 expected = {'env' : 'production' , 'region' : 'us-east-1' , 'cluster' : 'web' }
363417 assert values == expected
418+
419+ def test_allow_unicode_false (self ):
420+ """Test that Unicode characters are escaped when allow_unicode=False"""
421+ generator = ConfigGenerator (
422+ cwd = self .temp_dir ,
423+ path = 'test' ,
424+ multi_line_string = False ,
425+ allow_unicode = False ,
426+ type_strategies = [(list , ["append_unique" ]), (dict , ["merge" ])],
427+ fallback_strategies = ["override" ],
428+ type_conflict_strategies = ["override" ]
429+ )
430+
431+ test_data = {
432+ 'greeting' : 'Hello 世界' ,
433+ 'emoji' : '🚀 rocket' ,
434+ 'special' : 'café résumé naïve'
435+ }
436+ yaml_output = generator .output_yaml_data (test_data )
437+
438+ # When allow_unicode=False, Unicode should be escaped
439+ assert '\\ u' in yaml_output or '\\ x' in yaml_output or 'greeting: Hello' in yaml_output
440+
441+ def test_allow_unicode_true (self ):
442+ """Test that Unicode characters are preserved when allow_unicode=True"""
443+ generator = ConfigGenerator (
444+ cwd = self .temp_dir ,
445+ path = 'test' ,
446+ multi_line_string = False ,
447+ allow_unicode = True ,
448+ type_strategies = [(list , ["append_unique" ]), (dict , ["merge" ])],
449+ fallback_strategies = ["override" ],
450+ type_conflict_strategies = ["override" ]
451+ )
452+
453+ test_data = {
454+ 'greeting' : 'Hello 世界' ,
455+ 'emoji' : '🚀 rocket' ,
456+ 'special' : 'café résumé naïve'
457+ }
458+ yaml_output = generator .output_yaml_data (test_data )
459+
460+ # When allow_unicode=True, most Unicode should be preserved
461+ # Note: PyYAML may still escape some 4-byte UTF-8 characters (emojis)
462+ assert '世界' in yaml_output # Chinese characters preserved
463+ assert 'café' in yaml_output # Accented characters preserved
464+ assert 'résumé' in yaml_output # Accented characters preserved
465+ assert 'naïve' in yaml_output # Accented characters preserved
466+ # Emoji might be escaped as \U0001F680 even with allow_unicode=True
467+ assert ('🚀' in yaml_output or '\\ U0001F680' in yaml_output )
468+
469+ def test_unicode_in_nested_structures (self ):
470+ """Test Unicode handling in nested data structures"""
471+ generator = ConfigGenerator (
472+ cwd = self .temp_dir ,
473+ path = 'test' ,
474+ multi_line_string = False ,
475+ allow_unicode = True ,
476+ type_strategies = [(list , ["append_unique" ]), (dict , ["merge" ])],
477+ fallback_strategies = ["override" ],
478+ type_conflict_strategies = ["override" ]
479+ )
480+
481+ test_data = {
482+ 'users' : [
483+ {'name' : 'José García' , 'country' : 'España' },
484+ {'name' : '田中太郎' , 'country' : '日本' },
485+ {'name' : 'François Müller' , 'country' : 'France' }
486+ ],
487+ 'config' : {
488+ 'title' : 'Configuration — Настройки' ,
489+ 'description' : 'Multi-language support: English, 中文, العربية, हिन्दी'
490+ }
491+ }
492+ yaml_output = generator .output_yaml_data (test_data )
493+
494+ # Verify Unicode characters are preserved (excluding 4-byte emoji which may be escaped)
495+ assert 'José García' in yaml_output
496+ assert '田中太郎' in yaml_output
497+ assert 'España' in yaml_output
498+ assert '日本' in yaml_output
499+ assert 'Настройки' in yaml_output
500+ assert '中文' in yaml_output
501+ assert 'العربية' in yaml_output
502+ assert 'हिन्दी' in yaml_output
0 commit comments