11"""Helper functions to rename columns from their external name (defined in
22`ColumnMeta(external_name=...)`) to their internal name."""
33
4- from typing import Optional
4+ from typing import Optional , Type
55
66from pyspark .sql import Column , DataFrame
77from pyspark .sql .functions import col , lit , struct , when
88from pyspark .sql .types import StructField , StructType
99
10+ from typedspark ._schema .schema import Schema
1011
11- def rename_columns (df : DataFrame , schema : StructType ) -> DataFrame :
12+
13+ def rename_columns (df : DataFrame , schema : Type [Schema ]) -> DataFrame :
1214 """Helper functions to rename columns from their external name (defined in
13- `ColumnMeta(external_name=...)`) to their internal name."""
14- for field in schema .fields :
15+ `ColumnMeta(external_name=...)`) to their internal name (as used in the Schema) ."""
16+ for field in schema .get_structtype (). fields :
1517 internal_name = field .name
1618
1719 if field .metadata and "external_name" in field .metadata :
@@ -25,6 +27,23 @@ def rename_columns(df: DataFrame, schema: StructType) -> DataFrame:
2527 return df
2628
2729
30+ def rename_columns_2 (df : DataFrame , schema : Type [Schema ]) -> DataFrame :
31+ """Helper functions to rename columns from their internal name (as used in the
32+ Schema) to their external name (defined in `ColumnMeta(external_name=...)`)."""
33+ for field in schema .get_structtype ().fields :
34+ internal_name = field .name
35+
36+ if field .metadata and "external_name" in field .metadata :
37+ external_name = field .metadata ["external_name" ]
38+ df = df .withColumnRenamed (internal_name , external_name ) # swap
39+
40+ if isinstance (field .dataType , StructType ):
41+ structtype = _create_renamed_structtype_2 (field .dataType , internal_name )
42+ df = df .withColumn (external_name , structtype ) # swap
43+
44+ return df
45+
46+
2847def _create_renamed_structtype (
2948 schema : StructType ,
3049 parent : str ,
@@ -35,7 +54,7 @@ def _create_renamed_structtype(
3554
3655 mapping = []
3756 for field in schema .fields :
38- external_name = _get_external_name ( field , full_parent_path )
57+ external_name = _get_updated_parent_path ( full_parent_path , field )
3958
4059 if isinstance (field .dataType , StructType ):
4160 mapping += [
@@ -51,11 +70,44 @@ def _create_renamed_structtype(
5170 return _produce_nested_structtype (mapping , parent , full_parent_path )
5271
5372
54- def _get_external_name (field : StructField , full_parent_path : str ) -> str :
73+ def _create_renamed_structtype_2 (
74+ schema : StructType ,
75+ parent : str ,
76+ full_parent_path : Optional [str ] = None ,
77+ ) -> Column :
78+ if not full_parent_path :
79+ full_parent_path = f"`{ parent } `"
80+
81+ mapping = []
82+ for field in schema .fields :
83+ internal_name = field .name
84+ external_name = field .metadata .get ("external_name" , internal_name )
85+
86+ updated_parent_path = _get_updated_parent_path_2 (full_parent_path , internal_name ) # swap
87+
88+ if isinstance (field .dataType , StructType ):
89+ mapping += [
90+ _create_renamed_structtype (
91+ field .dataType ,
92+ parent = external_name , # swap
93+ full_parent_path = updated_parent_path ,
94+ )
95+ ]
96+ else :
97+ mapping += [col (updated_parent_path ).alias (external_name )] # swap
98+
99+ return _produce_nested_structtype (mapping , parent , full_parent_path )
100+
101+
102+ def _get_updated_parent_path (full_parent_path : str , field : StructField ) -> str :
55103 external_name = field .metadata .get ("external_name" , field .name )
56104 return f"{ full_parent_path } .`{ external_name } `"
57105
58106
107+ def _get_updated_parent_path_2 (full_parent_path : str , field : str ) -> str :
108+ return f"{ full_parent_path } .`{ field } `"
109+
110+
59111def _produce_nested_structtype (
60112 mapping : list [Column ],
61113 parent : str ,
0 commit comments