1212import pandas as pd
1313
1414
15- def data_to_dataframe (file_path ):
15+ def data_to_dataframe (data ):
1616 """
17- Read data from different file types (xls, xlsx, csv, json, jsonld) and
18- convert them into a pandas DataFrame.
17+ Convert data from file path or raw JSON/JSON-LD into a flattened pandas DataFrame.
1918
2019 Args:
21- file_path (str): The path to the data file.
20+ data (str | dict | list ): Path to a data file or a JSON/JSON-LD object .
2221
23- Return :
24- Pandas DataFrame.
22+ Returns :
23+ pd.DataFrame: Flattened data as a DataFrame.
2524 """
2625 df = None
2726 try :
28- if file_path . endswith ( ".xls" ) or file_path . endswith ( ".xlsx" ):
29- df = pd . read_excel ( file_path )
30- elif file_path .endswith (".csv " ):
31- df = pd .read_csv ( file_path )
32- elif file_path . endswith ( ".json" ) or file_path .endswith (".jsonld " ):
33- with open ( file_path , "r" , encoding = "utf-8" ) as file :
34- json_data = json . load ( file )
35-
36- # Handle JSON/JSON-LD data specifically
37- if isinstance (json_data , list ):
38- entities = json_data
39- else :
40- entities = json_data . get (
41- "@graph" , [ json_data ]
42- ) # Handle as a list of entities
43-
44- # Flatten the entities
45- flattened_entities = [ flatten_entity ( entity ) for entity in entities ]
46- df = pd . DataFrame ( flattened_entities )
27+ if isinstance ( data , str ):
28+ # Handle file path
29+ if data .endswith (".xls" ) or data . endswith ( ".xlsx " ):
30+ df = pd .read_excel ( data )
31+ elif data .endswith (".csv " ):
32+ df = pd . read_csv ( data )
33+ elif data . endswith ( ".json" ) or data . endswith ( ".jsonld" ):
34+ with open ( data , "r" , encoding = "utf-8" ) as file :
35+ json_data = json . load ( file )
36+ entities = json_data if isinstance (json_data , list ) else json_data . get ( "@graph" , [ json_data ])
37+ df = pd . DataFrame ([ flatten_dict ( e ) for e in entities ])
38+ df . reset_index ( drop = True , inplace = True )
39+ else :
40+ raise ValueError ( "Unsupported file format. Must be .xls, .xlsx, .csv, .json, or .jsonld" )
41+ elif isinstance ( data , ( dict , list )):
42+ # Handle raw JSON or JSON-LD object directly
43+ entities = data if isinstance ( data , list ) else data . get ( "@graph" , [ data ])
44+ df = pd . DataFrame ([ flatten_dict ( e ) for e in entities ])
45+ df . reset_index ( drop = True , inplace = True )
4746 else :
48- raise ValueError (
49- "Unsupported file format. Supported formats are xls, xlsx, json, jsonld, and csv."
50- )
47+ raise ValueError ("Unsupported input type. Must be file path or JSON object." )
5148 except Exception as e :
52- print (f"Error processing file { file_path } : { e } " )
53-
49+ print (f"Error processing data: { e } " )
5450 return df
5551
5652
@@ -60,63 +56,37 @@ def flatten_dict(d, parent_key="", sep=".", preserve_keys=None):
6056
6157 Args:
6258 d (dict): The dictionary to flatten.
63- parent_key (str): The base key for recursion, used to create hierarchical keys .
64- sep (str): The separator for nested keys (default is '.') .
65- preserve_keys (list): Keys whose values should not be flattened (default is None) .
59+ parent_key (str): Prefix for keys during recursion .
60+ sep (str): Separator used for key hierarchy .
61+ preserve_keys (list): Keys whose values should not be flattened.
6662
67- Return :
68- A flattened dictionary with keys representing the hierarchy .
63+ Returns :
64+ dict: A flattened dictionary.
6965 """
7066 if preserve_keys is None :
71- preserve_keys = ["coordinates" , "@context" ] # Keys to preserve as lists
67+ preserve_keys = ["coordinates" , "@context" ]
68+
7269 items = []
7370 for k , v in d .items ():
74- # Create the new key by appending current key to parent_key
7571 new_key = f"{ parent_key } { sep } { k } " if parent_key else k
76- # Recursively flatten if value is a dictionary
72+
7773 if isinstance (v , dict ):
7874 if k in preserve_keys :
79- # Preserve the dictionary as-is if key is in preserve_keys
8075 items .append ((new_key , v ))
8176 else :
82- items .extend (
83- flatten_dict (
84- v , new_key , sep = sep , preserve_keys = preserve_keys
85- ).items ()
86- )
87- elif isinstance (v , list ) and k in preserve_keys :
88- # Preserve the list as-is
89- items .append ((new_key , v ))
77+ items .extend (flatten_dict (v , new_key , sep = sep , preserve_keys = preserve_keys ).items ())
78+
9079 elif isinstance (v , list ):
91- # Flatten lists unless the key is in preserve_keys
92- for i , item in enumerate (v ):
93- if isinstance (item , dict ):
94- # Flatten each dictionary inside the list
95- items .extend (
96- flatten_dict (
97- item ,
98- f"{ new_key } [{ i } ]" ,
99- sep = sep ,
100- preserve_keys = preserve_keys ,
101- ).items ()
102- )
103- else :
104- # Handle primitive values in the list
105- items .append ((f"{ new_key } [{ i } ]" , item ))
106- # Handle all other key-value pairs
80+ if k in preserve_keys :
81+ items .append ((new_key , v ))
82+ else :
83+ for i , item in enumerate (v ):
84+ if isinstance (item , dict ):
85+ items .extend (flatten_dict (item , f"{ new_key } [{ i } ]" , sep = sep , preserve_keys = preserve_keys ).items ())
86+ else :
87+ items .append ((f"{ new_key } [{ i } ]" , item ))
88+
10789 else :
10890 items .append ((new_key , v ))
109- return dict (items )
110-
111-
112- def flatten_entity (entity ):
113- """
114- Flattens a single NGSI-LD entity by applying flatten_dict.
11591
116- Args:
117- entity (dict): The NGSI-LD entity to flatten.
118-
119- Returns:
120- dict: A flattened version of the entity.
121- """
122- return flatten_dict (entity )
92+ return dict (items )
0 commit comments