diff --git a/litestar/dto/_codegen_backend.py b/litestar/dto/_codegen_backend.py index fb3895b54a..80c50dba34 100644 --- a/litestar/dto/_codegen_backend.py +++ b/litestar/dto/_codegen_backend.py @@ -434,7 +434,12 @@ def _create_transfer_instance_data( else: level_1, level_2, *_ = self._re_index_access.split(source_instance_name, maxsplit=1) - new_source_instance_name = self._create_local_name(f"{level_1}_{level_2}") + # Sanitize to a valid identifier: bracket accesses like ['key'] or dots + # can appear in components when source_instance_name is itself an + # expression (e.g. "source_instance_0['wrapper'].inner") rather than a + # plain variable name. + combined = re.sub(r"[^a-zA-Z0-9_]", "_", f"{level_1}_{level_2}") + new_source_instance_name = self._create_local_name(combined) self._add_stmt(f"{new_source_instance_name} = {source_instance_name}") source_instance_name = new_source_instance_name diff --git a/tests/unit/test_dto/test_factory/test_backends/test_backends.py b/tests/unit/test_dto/test_factory/test_backends/test_backends.py index f2df5bf4f8..735c68bcf0 100644 --- a/tests/unit/test_dto/test_factory/test_backends/test_backends.py +++ b/tests/unit/test_dto/test_factory/test_backends/test_backends.py @@ -676,3 +676,55 @@ class Outer: wrapper_attribute_name=None, is_data_field=True, ) + + +def test_codegen_invalid_identifier_from_nested_mapping_then_attribute_access( + asgi_connection: Request[Any, Any, Any], + create_module: Callable[[str], ModuleType], +) -> None: + """Regression test: codegen backend generated an invalid Python identifier when + source_instance_name was a mapping-access expression (e.g. ``source_instance_0['metadata']``) + and the accessed field was itself a nested struct with multiple sub-fields. + + The combination produced ``source_instance_0['metadata']_pagination_0`` as a + variable name, which is a SyntaxError. The pattern that triggers this is: + + Container (2+ fields, so mapping-access optimisation runs) + └── wrapper: Wrapper (exactly 1 field, so optimisation is SKIPPED here, + leaving source_instance_name as the raw expression) + └── inner: Inner (2+ fields, so optimisation runs again — and tries + to build a name from the raw expression) + """ + module = create_module(""" +import msgspec + +class Inner(msgspec.Struct): + a: int + b: str + +class Wrapper(msgspec.Struct): + # Exactly one field so the "assign to local variable" optimisation is skipped + # at this level, leaving the mapping-access expression as source_instance_name. + inner: Inner + +class Container(msgspec.Struct): + data: str + wrapper: Wrapper +""") + + class ContainerDTO(MsgspecDTO[module.Container]): # type: ignore[name-defined] + config = DTOConfig(max_nested_depth=3, experimental_codegen_backend=True) + + # This must not raise SyntaxError / compile error during backend instantiation + backend = DTOCodegenBackend( + handler_id="test", + dto_factory=ContainerDTO, + field_definition=TransferDTOFieldDefinition.from_annotation(module.Container), + model_type=module.Container, + wrapper_attribute_name=None, + is_data_field=False, + ) + + instance = module.Container(data="hello", wrapper=module.Wrapper(inner=module.Inner(a=1, b="two"))) # type: ignore[name-defined] + result = backend.encode_data(instance) + assert msgspec.to_builtins(result) == {"data": "hello", "wrapper": {"inner": {"a": 1, "b": "two"}}}