@@ -168,35 +168,79 @@ def to_json_schema(cls):
168
168
This function is currently does not fully specify a pandera schema,
169
169
and is primarily used internally to render OpenAPI docs via the
170
170
FastAPI integration.
171
-
172
- :raises ImportError: if ``pandas`` is not installed.
173
171
"""
174
- try :
175
- import pandas as pd
176
- except ImportError as exc :
177
- raise ImportError (
178
- "pandas is required to serialize polars schema to json-schema"
179
- ) from exc
180
-
181
172
schema = cls .to_schema ()
182
- empty = pl .DataFrame (
183
- schema = {k : v .type for k , v in schema .dtypes .items ()}
184
- ).to_pandas ()
185
- table_schema = pd .io .json .build_table_schema (empty )
186
-
187
- def _field_json_schema (field ):
188
- return {
173
+
174
+ # Define a mapping from Polars data types to JSON schema types
175
+ # This is more robust than string parsing
176
+ POLARS_TO_JSON_TYPE_MAP = {
177
+ # Integer types
178
+ pl .Int8 : "integer" ,
179
+ pl .Int16 : "integer" ,
180
+ pl .Int32 : "integer" ,
181
+ pl .Int64 : "integer" ,
182
+ pl .UInt8 : "integer" ,
183
+ pl .UInt16 : "integer" ,
184
+ pl .UInt32 : "integer" ,
185
+ pl .UInt64 : "integer" ,
186
+
187
+ # Float types
188
+ pl .Float32 : "number" ,
189
+ pl .Float64 : "number" ,
190
+
191
+ # Boolean type
192
+ pl .Boolean : "boolean" ,
193
+
194
+ # String types
195
+ pl .Utf8 : "string" ,
196
+ pl .String : "string" ,
197
+
198
+ # Date/Time types
199
+ pl .Date : "datetime" ,
200
+ pl .Datetime : "datetime" ,
201
+ pl .Time : "datetime" ,
202
+ pl .Duration : "datetime" ,
203
+ }
204
+
205
+ def map_dtype_to_json_type (dtype ):
206
+ """
207
+ Map a Polars data type to a JSON schema type.
208
+
209
+ Args:
210
+ dtype: Polars data type
211
+
212
+ Returns:
213
+ str: JSON schema type string
214
+ """
215
+ # First try the direct mapping
216
+ if dtype .__class__ in POLARS_TO_JSON_TYPE_MAP :
217
+ return POLARS_TO_JSON_TYPE_MAP [dtype .__class__ ]
218
+
219
+ # Fallback to string representation check for edge cases
220
+ dtype_str = str (dtype ).lower ()
221
+ if 'float' in dtype_str :
222
+ return "number"
223
+ elif 'int' in dtype_str :
224
+ return "integer"
225
+ elif 'bool' in dtype_str :
226
+ return "boolean"
227
+ elif any (t in dtype_str for t in ['date' , 'time' , 'datetime' ]):
228
+ return "datetime"
229
+ else :
230
+ return "string"
231
+
232
+ properties = {}
233
+ for col_name , col_schema in schema .dtypes .items ():
234
+ json_type = map_dtype_to_json_type (col_schema .type )
235
+ properties [col_name ] = {
189
236
"type" : "array" ,
190
- "items" : {"type" : field [ "type" ] },
237
+ "items" : {"type" : json_type },
191
238
}
192
239
193
240
return {
194
241
"title" : schema .name or "pandera.DataFrameSchema" ,
195
242
"type" : "object" ,
196
- "properties" : {
197
- field ["name" ]: _field_json_schema (field )
198
- for field in table_schema ["fields" ]
199
- },
243
+ "properties" : properties ,
200
244
}
201
245
202
246
@classmethod
0 commit comments