88 field: type, description # required field
99 field?: type, description # optional field
1010 field(array): type # array of values
11+ field(array, description): type # array with description
1112 field?(enum): [val1, val2] # enumeration
13+ field?(enum, description): [val1, val2] # enum with description
1214 field?(object): # nested object
1315 sub_field: type
1416 EntityName as type (capitalized) # entity reference
@@ -58,46 +60,88 @@ class SchemaDefinition:
5860# with an uppercase letter is treated as an entity reference.
5961
6062SCALAR_TYPES = frozenset ({"string" , "integer" , "number" , "boolean" , "any" })
63+ MODIFIER_TYPES = frozenset ({"array" , "enum" , "object" })
6164
6265
6366# --- Field Name Parsing ---
6467
6568
66- def _parse_field_key (key : str ) -> tuple [str , bool , bool , bool , bool ]:
69+ def _parse_field_key_parts (key : str ) -> tuple [str , bool , bool , bool , bool , str | None ]:
6770 """Parse a Picoschema field key into its components.
6871
69- Returns (name, required, is_array, is_enum, is_object).
70- The key format is: name[?][(array|enum|object)]
72+ Returns (name, required, is_array, is_enum, is_object, description ).
73+ The key format is: name[?][(array|enum|object[, description] )]
7174
7275 Examples:
73- "name" -> ("name", True, False, False)
74- "role?" -> ("role", False, False, False)
75- "tags?(array)" -> ("tags", False, True, False)
76- "status?(enum)" -> ("status", False, False, True)
77- "metadata?(object)" -> ("metadata", False, False, False) + children
76+ "name" -> ("name", True, False, False, False, None)
77+ "role?" -> ("role", False, False, False, False, None)
78+ "tags?(array)" -> ("tags", False, True, False, False, None)
79+ "tags?(array, labels)" -> ("tags", False, True, False, False, "labels")
80+ "status?(enum)" -> ("status", False, False, True, False, None)
81+ "metadata?(object)" -> ("metadata", False, False, False, True, None)
7882 """
7983 required = True
8084 is_array = False
8185 is_enum = False
8286 is_object = False
87+ description = None
88+
89+ key , modifier , description = _split_modifier_suffix (key )
8390
84- # Check for modifier suffix: (array), (enum), (object)
85- if key .endswith ("(array)" ):
91+ if modifier == "array" :
8692 is_array = True
87- key = key [: - len ("(array)" )]
88- elif key .endswith ("(enum)" ):
93+ elif modifier == "enum" :
8994 is_enum = True
90- key = key [: - len ("(enum)" )]
91- elif key .endswith ("(object)" ):
95+ elif modifier == "object" :
9296 is_object = True
93- key = key [: - len ("(object)" )]
9497
9598 # Check for optional marker
9699 if key .endswith ("?" ):
97100 required = False
98101 key = key [:- 1 ]
99102
100- return key , required , is_array , is_enum , is_object
103+ return key .strip (), required , is_array , is_enum , is_object , description
104+
105+
106+ def _parse_field_key (key : str ) -> tuple [str , bool , bool , bool , bool ]:
107+ """Parse a Picoschema field key, discarding any modifier description."""
108+ name , required , is_array , is_enum , is_object , _description = _parse_field_key_parts (key )
109+ return name , required , is_array , is_enum , is_object
110+
111+
112+ def _split_modifier_suffix (key : str ) -> tuple [str , str | None , str | None ]:
113+ """Split a trailing picoschema modifier from a field key."""
114+ stripped_key = key .rstrip ()
115+ if not stripped_key .endswith (")" ):
116+ return key , None , None
117+
118+ # Trigger: field names and modifier descriptions may both contain parentheses
119+ # Why: only the parenthesis paired with the final suffix can introduce a modifier
120+ # Outcome: preserves names like "risk(score)" and descriptions like "labels (freeform)"
121+ open_paren_index = - 1
122+ depth = 0
123+ for index in range (len (stripped_key ) - 1 , - 1 , - 1 ):
124+ char = stripped_key [index ]
125+ if char == ")" :
126+ depth += 1
127+ elif char == "(" :
128+ depth -= 1
129+ if depth == 0 :
130+ open_paren_index = index
131+ break
132+
133+ if open_paren_index == - 1 :
134+ return key , None , None
135+
136+ modifier_text = stripped_key [open_paren_index + 1 : - 1 ].strip ()
137+ modifier , separator , description = modifier_text .partition ("," )
138+ modifier = modifier .strip ()
139+ if modifier not in MODIFIER_TYPES :
140+ return key , None , None
141+
142+ key_without_modifier = stripped_key [:open_paren_index ].rstrip ()
143+ parsed_description = description .strip () if separator else None
144+ return key_without_modifier , modifier , parsed_description or None
101145
102146
103147def _parse_type_and_description (value : str ) -> tuple [str , str | None ]:
@@ -170,7 +214,7 @@ def parse_picoschema(yaml_dict: dict) -> list[SchemaField]:
170214 fields : list [SchemaField ] = []
171215
172216 for key , value in yaml_dict .items ():
173- name , required , is_array , is_enum , is_object = _parse_field_key (key )
217+ name , required , is_array , is_enum , is_object , key_description = _parse_field_key_parts (key )
174218
175219 # --- Enum fields ---
176220 # Trigger: value is a list or a string containing bracketed enum values
@@ -179,11 +223,12 @@ def parse_picoschema(yaml_dict: dict) -> list[SchemaField]:
179223 # in YAML to avoid parse errors)
180224 # Outcome: SchemaField with is_enum=True and enum_values populated
181225 if is_enum :
182- description = None
226+ description = key_description
183227 if isinstance (value , list ):
184228 enum_values = [str (v ) for v in value ]
185229 else :
186- enum_values , description = _parse_enum_string (str (value ))
230+ enum_values , value_description = _parse_enum_string (str (value ))
231+ description = description or value_description
187232 fields .append (
188233 SchemaField (
189234 name = name ,
@@ -207,13 +252,15 @@ def parse_picoschema(yaml_dict: dict) -> list[SchemaField]:
207252 name = name ,
208253 type = "object" ,
209254 required = required ,
255+ description = key_description ,
210256 children = children ,
211257 )
212258 )
213259 continue
214260
215261 # --- Scalar and entity ref fields ---
216- type_str , description = _parse_type_and_description (str (value ))
262+ type_str , value_description = _parse_type_and_description (str (value ))
263+ description = key_description or value_description
217264 is_entity_ref = _is_entity_ref_type (type_str )
218265
219266 fields .append (
0 commit comments