88 field: type, description # required field
99 field?: type, description # optional field
1010 field(array): type # array of values
11+ field(array, description): type # array with description
1112 field?(enum): [val1, val2] # enumeration
13+ field?(enum, description): [val1, val2] # enum with description
1214 field?(object): # nested object
1315 sub_field: type
1416 EntityName as type (capitalized) # entity reference
@@ -58,46 +60,74 @@ class SchemaDefinition:
5860# with an uppercase letter is treated as an entity reference.
5961
6062SCALAR_TYPES = frozenset ({"string" , "integer" , "number" , "boolean" , "any" })
63+ MODIFIER_TYPES = frozenset ({"array" , "enum" , "object" })
6164
6265
6366# --- Field Name Parsing ---
6467
6568
66- def _parse_field_key (key : str ) -> tuple [str , bool , bool , bool , bool ]:
69+ def _parse_field_key_parts (key : str ) -> tuple [str , bool , bool , bool , bool , str | None ]:
6770 """Parse a Picoschema field key into its components.
6871
69- Returns (name, required, is_array, is_enum, is_object).
70- The key format is: name[?][(array|enum|object)]
72+ Returns (name, required, is_array, is_enum, is_object, description ).
73+ The key format is: name[?][(array|enum|object[, description] )]
7174
7275 Examples:
73- "name" -> ("name", True, False, False)
74- "role?" -> ("role", False, False, False)
75- "tags?(array)" -> ("tags", False, True, False)
76- "status?(enum)" -> ("status", False, False, True)
77- "metadata?(object)" -> ("metadata", False, False, False) + children
76+ "name" -> ("name", True, False, False, False, None)
77+ "role?" -> ("role", False, False, False, False, None)
78+ "tags?(array)" -> ("tags", False, True, False, False, None)
79+ "tags?(array, labels)" -> ("tags", False, True, False, False, "labels")
80+ "status?(enum)" -> ("status", False, False, True, False, None)
81+ "metadata?(object)" -> ("metadata", False, False, False, True, None)
7882 """
7983 required = True
8084 is_array = False
8185 is_enum = False
8286 is_object = False
87+ description = None
88+
89+ key , modifier , description = _split_modifier_suffix (key )
8390
84- # Check for modifier suffix: (array), (enum), (object)
85- if key .endswith ("(array)" ):
91+ if modifier == "array" :
8692 is_array = True
87- key = key [: - len ("(array)" )]
88- elif key .endswith ("(enum)" ):
93+ elif modifier == "enum" :
8994 is_enum = True
90- key = key [: - len ("(enum)" )]
91- elif key .endswith ("(object)" ):
95+ elif modifier == "object" :
9296 is_object = True
93- key = key [: - len ("(object)" )]
9497
9598 # Check for optional marker
9699 if key .endswith ("?" ):
97100 required = False
98101 key = key [:- 1 ]
99102
100- return key , required , is_array , is_enum , is_object
103+ return key .strip (), required , is_array , is_enum , is_object , description
104+
105+
106+ def _parse_field_key (key : str ) -> tuple [str , bool , bool , bool , bool ]:
107+ """Parse a Picoschema field key, discarding any modifier description."""
108+ name , required , is_array , is_enum , is_object , _description = _parse_field_key_parts (key )
109+ return name , required , is_array , is_enum , is_object
110+
111+
112+ def _split_modifier_suffix (key : str ) -> tuple [str , str | None , str | None ]:
113+ """Split a trailing picoschema modifier from a field key."""
114+ stripped_key = key .rstrip ()
115+ if not stripped_key .endswith (")" ):
116+ return key , None , None
117+
118+ open_paren_index = stripped_key .find ("(" )
119+ if open_paren_index == - 1 :
120+ return key , None , None
121+
122+ modifier_text = stripped_key [open_paren_index + 1 : - 1 ].strip ()
123+ modifier , separator , description = modifier_text .partition ("," )
124+ modifier = modifier .strip ()
125+ if modifier not in MODIFIER_TYPES :
126+ return key , None , None
127+
128+ key_without_modifier = stripped_key [:open_paren_index ].rstrip ()
129+ parsed_description = description .strip () if separator else None
130+ return key_without_modifier , modifier , parsed_description or None
101131
102132
103133def _parse_type_and_description (value : str ) -> tuple [str , str | None ]:
@@ -170,7 +200,9 @@ def parse_picoschema(yaml_dict: dict) -> list[SchemaField]:
170200 fields : list [SchemaField ] = []
171201
172202 for key , value in yaml_dict .items ():
173- name , required , is_array , is_enum , is_object = _parse_field_key (key )
203+ name , required , is_array , is_enum , is_object , key_description = _parse_field_key_parts (
204+ key
205+ )
174206
175207 # --- Enum fields ---
176208 # Trigger: value is a list or a string containing bracketed enum values
@@ -179,11 +211,12 @@ def parse_picoschema(yaml_dict: dict) -> list[SchemaField]:
179211 # in YAML to avoid parse errors)
180212 # Outcome: SchemaField with is_enum=True and enum_values populated
181213 if is_enum :
182- description = None
214+ description = key_description
183215 if isinstance (value , list ):
184216 enum_values = [str (v ) for v in value ]
185217 else :
186- enum_values , description = _parse_enum_string (str (value ))
218+ enum_values , value_description = _parse_enum_string (str (value ))
219+ description = description or value_description
187220 fields .append (
188221 SchemaField (
189222 name = name ,
@@ -207,13 +240,15 @@ def parse_picoschema(yaml_dict: dict) -> list[SchemaField]:
207240 name = name ,
208241 type = "object" ,
209242 required = required ,
243+ description = key_description ,
210244 children = children ,
211245 )
212246 )
213247 continue
214248
215249 # --- Scalar and entity ref fields ---
216- type_str , description = _parse_type_and_description (str (value ))
250+ type_str , value_description = _parse_type_and_description (str (value ))
251+ description = key_description or value_description
217252 is_entity_ref = _is_entity_ref_type (type_str )
218253
219254 fields .append (
0 commit comments