88 field: type, description # required field
99 field?: type, description # optional field
1010 field(array): type # array of values
11+ field(array, description): type # array with description
1112 field?(enum): [val1, val2] # enumeration
13+ field?(enum, description): [val1, val2] # enum with description
1214 field?(object): # nested object
1315 sub_field: type
1416 EntityName as type (capitalized) # entity reference
@@ -59,45 +61,58 @@ class SchemaDefinition:
5961
6062SCALAR_TYPES = frozenset ({"string" , "integer" , "number" , "boolean" , "any" })
6163
64+ MODIFIER_RE = re .compile (r"\(\s*(array|enum|object)\s*(?:,\s*([^)]*?))?\s*\)\s*$" )
65+
6266
6367# --- Field Name Parsing ---
6468
6569
66- def _parse_field_key (key : str ) -> tuple [str , bool , bool , bool , bool ]:
70+ def _parse_field_key_parts (key : str ) -> tuple [str , bool , bool , bool , bool , str | None ]:
6771 """Parse a Picoschema field key into its components.
6872
69- Returns (name, required, is_array, is_enum, is_object).
70- The key format is: name[?][(array|enum|object)]
73+ Returns (name, required, is_array, is_enum, is_object, description ).
74+ The key format is: name[?][(array|enum|object[, description] )]
7175
7276 Examples:
7377 "name" -> ("name", True, False, False)
7478 "role?" -> ("role", False, False, False)
7579 "tags?(array)" -> ("tags", False, True, False)
80+ "tags?(array, labels)" -> ("tags", False, True, False) + "labels"
7681 "status?(enum)" -> ("status", False, False, True)
7782 "metadata?(object)" -> ("metadata", False, False, False) + children
7883 """
7984 required = True
8085 is_array = False
8186 is_enum = False
8287 is_object = False
83-
84- # Check for modifier suffix: (array), (enum), (object)
85- if key .endswith ("(array)" ):
86- is_array = True
87- key = key [: - len ("(array)" )]
88- elif key .endswith ("(enum)" ):
89- is_enum = True
90- key = key [: - len ("(enum)" )]
91- elif key .endswith ("(object)" ):
92- is_object = True
93- key = key [: - len ("(object)" )]
88+ description = None
89+
90+ modifier_match = MODIFIER_RE .search (key )
91+ if modifier_match :
92+ modifier = modifier_match .group (1 )
93+ description_match = modifier_match .group (2 )
94+ description = description_match .strip () if description_match else None
95+ key = key [: modifier_match .start ()].rstrip ()
96+
97+ if modifier == "array" :
98+ is_array = True
99+ elif modifier == "enum" :
100+ is_enum = True
101+ elif modifier == "object" :
102+ is_object = True
94103
95104 # Check for optional marker
96105 if key .endswith ("?" ):
97106 required = False
98107 key = key [:- 1 ]
99108
100- return key , required , is_array , is_enum , is_object
109+ return key .strip (), required , is_array , is_enum , is_object , description
110+
111+
112+ def _parse_field_key (key : str ) -> tuple [str , bool , bool , bool , bool ]:
113+ """Parse a Picoschema field key, discarding any modifier description."""
114+ name , required , is_array , is_enum , is_object , _description = _parse_field_key_parts (key )
115+ return name , required , is_array , is_enum , is_object
101116
102117
103118def _parse_type_and_description (value : str ) -> tuple [str , str | None ]:
@@ -170,7 +185,9 @@ def parse_picoschema(yaml_dict: dict) -> list[SchemaField]:
170185 fields : list [SchemaField ] = []
171186
172187 for key , value in yaml_dict .items ():
173- name , required , is_array , is_enum , is_object = _parse_field_key (key )
188+ name , required , is_array , is_enum , is_object , key_description = _parse_field_key_parts (
189+ key
190+ )
174191
175192 # --- Enum fields ---
176193 # Trigger: value is a list or a string containing bracketed enum values
@@ -179,11 +196,12 @@ def parse_picoschema(yaml_dict: dict) -> list[SchemaField]:
179196 # in YAML to avoid parse errors)
180197 # Outcome: SchemaField with is_enum=True and enum_values populated
181198 if is_enum :
182- description = None
199+ description = key_description
183200 if isinstance (value , list ):
184201 enum_values = [str (v ) for v in value ]
185202 else :
186- enum_values , description = _parse_enum_string (str (value ))
203+ enum_values , value_description = _parse_enum_string (str (value ))
204+ description = description or value_description
187205 fields .append (
188206 SchemaField (
189207 name = name ,
@@ -207,13 +225,15 @@ def parse_picoschema(yaml_dict: dict) -> list[SchemaField]:
207225 name = name ,
208226 type = "object" ,
209227 required = required ,
228+ description = key_description ,
210229 children = children ,
211230 )
212231 )
213232 continue
214233
215234 # --- Scalar and entity ref fields ---
216- type_str , description = _parse_type_and_description (str (value ))
235+ type_str , value_description = _parse_type_and_description (str (value ))
236+ description = key_description or value_description
217237 is_entity_ref = _is_entity_ref_type (type_str )
218238
219239 fields .append (
0 commit comments