Skip to content

Commit 43a5dc6

Browse files
committed
fix(core): parse picoschema modifier descriptions
Signed-off-by: Drew Cain <groksrc@gmail.com>
1 parent 09a4b09 commit 43a5dc6

3 files changed

Lines changed: 253 additions & 20 deletions

File tree

src/basic_memory/schema/parser.py

Lines changed: 55 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88
field: type, description # required field
99
field?: type, description # optional field
1010
field(array): type # array of values
11+
field(array, description): type # array with description
1112
field?(enum): [val1, val2] # enumeration
13+
field?(enum, description): [val1, val2] # enum with description
1214
field?(object): # nested object
1315
sub_field: type
1416
EntityName as type (capitalized) # entity reference
@@ -58,46 +60,74 @@ class SchemaDefinition:
5860
# with an uppercase letter is treated as an entity reference.
5961

6062
SCALAR_TYPES = frozenset({"string", "integer", "number", "boolean", "any"})
63+
MODIFIER_TYPES = frozenset({"array", "enum", "object"})
6164

6265

6366
# --- Field Name Parsing ---
6467

6568

66-
def _parse_field_key(key: str) -> tuple[str, bool, bool, bool, bool]:
69+
def _parse_field_key_parts(key: str) -> tuple[str, bool, bool, bool, bool, str | None]:
6770
"""Parse a Picoschema field key into its components.
6871
69-
Returns (name, required, is_array, is_enum, is_object).
70-
The key format is: name[?][(array|enum|object)]
72+
Returns (name, required, is_array, is_enum, is_object, description).
73+
The key format is: name[?][(array|enum|object[, description])]
7174
7275
Examples:
73-
"name" -> ("name", True, False, False)
74-
"role?" -> ("role", False, False, False)
75-
"tags?(array)" -> ("tags", False, True, False)
76-
"status?(enum)" -> ("status", False, False, True)
77-
"metadata?(object)" -> ("metadata", False, False, False) + children
76+
"name" -> ("name", True, False, False, False, None)
77+
"role?" -> ("role", False, False, False, False, None)
78+
"tags?(array)" -> ("tags", False, True, False, False, None)
79+
"tags?(array, labels)" -> ("tags", False, True, False, False, "labels")
80+
"status?(enum)" -> ("status", False, False, True, False, None)
81+
"metadata?(object)" -> ("metadata", False, False, False, True, None)
7882
"""
7983
required = True
8084
is_array = False
8185
is_enum = False
8286
is_object = False
87+
description = None
88+
89+
key, modifier, description = _split_modifier_suffix(key)
8390

84-
# Check for modifier suffix: (array), (enum), (object)
85-
if key.endswith("(array)"):
91+
if modifier == "array":
8692
is_array = True
87-
key = key[: -len("(array)")]
88-
elif key.endswith("(enum)"):
93+
elif modifier == "enum":
8994
is_enum = True
90-
key = key[: -len("(enum)")]
91-
elif key.endswith("(object)"):
95+
elif modifier == "object":
9296
is_object = True
93-
key = key[: -len("(object)")]
9497

9598
# Check for optional marker
9699
if key.endswith("?"):
97100
required = False
98101
key = key[:-1]
99102

100-
return key, required, is_array, is_enum, is_object
103+
return key.strip(), required, is_array, is_enum, is_object, description
104+
105+
106+
def _parse_field_key(key: str) -> tuple[str, bool, bool, bool, bool]:
107+
"""Parse a Picoschema field key, discarding any modifier description."""
108+
name, required, is_array, is_enum, is_object, _description = _parse_field_key_parts(key)
109+
return name, required, is_array, is_enum, is_object
110+
111+
112+
def _split_modifier_suffix(key: str) -> tuple[str, str | None, str | None]:
113+
"""Split a trailing picoschema modifier from a field key."""
114+
stripped_key = key.rstrip()
115+
if not stripped_key.endswith(")"):
116+
return key, None, None
117+
118+
open_paren_index = stripped_key.find("(")
119+
if open_paren_index == -1:
120+
return key, None, None
121+
122+
modifier_text = stripped_key[open_paren_index + 1 : -1].strip()
123+
modifier, separator, description = modifier_text.partition(",")
124+
modifier = modifier.strip()
125+
if modifier not in MODIFIER_TYPES:
126+
return key, None, None
127+
128+
key_without_modifier = stripped_key[:open_paren_index].rstrip()
129+
parsed_description = description.strip() if separator else None
130+
return key_without_modifier, modifier, parsed_description or None
101131

102132

103133
def _parse_type_and_description(value: str) -> tuple[str, str | None]:
@@ -170,7 +200,9 @@ def parse_picoschema(yaml_dict: dict) -> list[SchemaField]:
170200
fields: list[SchemaField] = []
171201

172202
for key, value in yaml_dict.items():
173-
name, required, is_array, is_enum, is_object = _parse_field_key(key)
203+
name, required, is_array, is_enum, is_object, key_description = _parse_field_key_parts(
204+
key
205+
)
174206

175207
# --- Enum fields ---
176208
# Trigger: value is a list or a string containing bracketed enum values
@@ -179,11 +211,12 @@ def parse_picoschema(yaml_dict: dict) -> list[SchemaField]:
179211
# in YAML to avoid parse errors)
180212
# Outcome: SchemaField with is_enum=True and enum_values populated
181213
if is_enum:
182-
description = None
214+
description = key_description
183215
if isinstance(value, list):
184216
enum_values = [str(v) for v in value]
185217
else:
186-
enum_values, description = _parse_enum_string(str(value))
218+
enum_values, value_description = _parse_enum_string(str(value))
219+
description = description or value_description
187220
fields.append(
188221
SchemaField(
189222
name=name,
@@ -207,13 +240,15 @@ def parse_picoschema(yaml_dict: dict) -> list[SchemaField]:
207240
name=name,
208241
type="object",
209242
required=required,
243+
description=key_description,
210244
children=children,
211245
)
212246
)
213247
continue
214248

215249
# --- Scalar and entity ref fields ---
216-
type_str, description = _parse_type_and_description(str(value))
250+
type_str, value_description = _parse_type_and_description(str(value))
251+
description = key_description or value_description
217252
is_entity_ref = _is_entity_ref_type(type_str)
218253

219254
fields.append(

tests/mcp/test_tool_schema.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,72 @@ async def test_schema_validate_json_output(app, test_project, sync_service):
116116
assert result["results"][0]["passed"] is True
117117

118118

119+
@pytest.mark.asyncio
120+
async def test_schema_validate_picoschema_modifier_descriptions(
121+
app, test_project, sync_service
122+
):
123+
"""Modifier descriptions should not become literal field names."""
124+
project_path = Path(test_project.path)
125+
126+
_write_schema_file(
127+
project_path,
128+
"schemas/PicoTest.md",
129+
"""\
130+
---
131+
title: PicoTest
132+
type: schema
133+
entity: pico_test
134+
schema:
135+
name: string
136+
status(enum, current state): [active, inactive]
137+
tags(array, list of tags): string
138+
settings:
139+
validation: warn
140+
---
141+
142+
# PicoTest
143+
""",
144+
)
145+
_write_schema_file(
146+
project_path,
147+
"pico/PicoTest1.md",
148+
"""\
149+
---
150+
title: PicoTest1
151+
type: pico_test
152+
permalink: pico/pico-test-1
153+
---
154+
155+
# PicoTest1
156+
157+
## Observations
158+
- [name] PicoTest1
159+
- [status] active
160+
- [tags] foo
161+
- [tags] bar
162+
""",
163+
)
164+
165+
await sync_service.sync(project_path)
166+
167+
result = await schema_validate(
168+
note_type="pico_test",
169+
project=test_project.name,
170+
output_format="json",
171+
)
172+
173+
note_result = result["results"][0]
174+
field_statuses = {fr["field_name"]: fr["status"] for fr in note_result["field_results"]}
175+
assert result["valid_count"] == 1
176+
assert note_result["warnings"] == []
177+
assert note_result["unmatched_observations"] == {}
178+
assert field_statuses == {
179+
"name": "present",
180+
"status": "present",
181+
"tags": "present",
182+
}
183+
184+
119185
@pytest.mark.asyncio
120186
async def test_schema_validate_by_identifier(app, test_project, sync_service):
121187
"""Validate a specific note by identifier."""

0 commit comments

Comments
 (0)