Skip to content

Commit 7ac0ea9

Browse files
committed
fix picoschema modifier descriptions
1 parent 09a4b09 commit 7ac0ea9

3 files changed

Lines changed: 194 additions & 19 deletions

File tree

src/basic_memory/schema/parser.py

Lines changed: 39 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88
field: type, description # required field
99
field?: type, description # optional field
1010
field(array): type # array of values
11+
field(array, description): type # array with description
1112
field?(enum): [val1, val2] # enumeration
13+
field?(enum, description): [val1, val2] # enum with description
1214
field?(object): # nested object
1315
sub_field: type
1416
EntityName as type (capitalized) # entity reference
@@ -59,45 +61,58 @@ class SchemaDefinition:
5961

6062
SCALAR_TYPES = frozenset({"string", "integer", "number", "boolean", "any"})
6163

64+
MODIFIER_RE = re.compile(r"\(\s*(array|enum|object)\s*(?:,\s*([^)]*?))?\s*\)\s*$")
65+
6266

6367
# --- Field Name Parsing ---
6468

6569

66-
def _parse_field_key(key: str) -> tuple[str, bool, bool, bool, bool]:
70+
def _parse_field_key_parts(key: str) -> tuple[str, bool, bool, bool, bool, str | None]:
6771
"""Parse a Picoschema field key into its components.
6872
69-
Returns (name, required, is_array, is_enum, is_object).
70-
The key format is: name[?][(array|enum|object)]
73+
Returns (name, required, is_array, is_enum, is_object, description).
74+
The key format is: name[?][(array|enum|object[, description])]
7175
7276
Examples:
7377
"name" -> ("name", True, False, False)
7478
"role?" -> ("role", False, False, False)
7579
"tags?(array)" -> ("tags", False, True, False)
80+
"tags?(array, labels)" -> ("tags", False, True, False) + "labels"
7681
"status?(enum)" -> ("status", False, False, True)
7782
"metadata?(object)" -> ("metadata", False, False, False) + children
7883
"""
7984
required = True
8085
is_array = False
8186
is_enum = False
8287
is_object = False
83-
84-
# Check for modifier suffix: (array), (enum), (object)
85-
if key.endswith("(array)"):
86-
is_array = True
87-
key = key[: -len("(array)")]
88-
elif key.endswith("(enum)"):
89-
is_enum = True
90-
key = key[: -len("(enum)")]
91-
elif key.endswith("(object)"):
92-
is_object = True
93-
key = key[: -len("(object)")]
88+
description = None
89+
90+
modifier_match = MODIFIER_RE.search(key)
91+
if modifier_match:
92+
modifier = modifier_match.group(1)
93+
description_match = modifier_match.group(2)
94+
description = description_match.strip() if description_match else None
95+
key = key[: modifier_match.start()].rstrip()
96+
97+
if modifier == "array":
98+
is_array = True
99+
elif modifier == "enum":
100+
is_enum = True
101+
elif modifier == "object":
102+
is_object = True
94103

95104
# Check for optional marker
96105
if key.endswith("?"):
97106
required = False
98107
key = key[:-1]
99108

100-
return key, required, is_array, is_enum, is_object
109+
return key.strip(), required, is_array, is_enum, is_object, description
110+
111+
112+
def _parse_field_key(key: str) -> tuple[str, bool, bool, bool, bool]:
113+
"""Parse a Picoschema field key, discarding any modifier description."""
114+
name, required, is_array, is_enum, is_object, _description = _parse_field_key_parts(key)
115+
return name, required, is_array, is_enum, is_object
101116

102117

103118
def _parse_type_and_description(value: str) -> tuple[str, str | None]:
@@ -170,7 +185,9 @@ def parse_picoschema(yaml_dict: dict) -> list[SchemaField]:
170185
fields: list[SchemaField] = []
171186

172187
for key, value in yaml_dict.items():
173-
name, required, is_array, is_enum, is_object = _parse_field_key(key)
188+
name, required, is_array, is_enum, is_object, key_description = _parse_field_key_parts(
189+
key
190+
)
174191

175192
# --- Enum fields ---
176193
# Trigger: value is a list or a string containing bracketed enum values
@@ -179,11 +196,12 @@ def parse_picoschema(yaml_dict: dict) -> list[SchemaField]:
179196
# in YAML to avoid parse errors)
180197
# Outcome: SchemaField with is_enum=True and enum_values populated
181198
if is_enum:
182-
description = None
199+
description = key_description
183200
if isinstance(value, list):
184201
enum_values = [str(v) for v in value]
185202
else:
186-
enum_values, description = _parse_enum_string(str(value))
203+
enum_values, value_description = _parse_enum_string(str(value))
204+
description = description or value_description
187205
fields.append(
188206
SchemaField(
189207
name=name,
@@ -207,13 +225,15 @@ def parse_picoschema(yaml_dict: dict) -> list[SchemaField]:
207225
name=name,
208226
type="object",
209227
required=required,
228+
description=key_description,
210229
children=children,
211230
)
212231
)
213232
continue
214233

215234
# --- Scalar and entity ref fields ---
216-
type_str, description = _parse_type_and_description(str(value))
235+
type_str, value_description = _parse_type_and_description(str(value))
236+
description = key_description or value_description
217237
is_entity_ref = _is_entity_ref_type(type_str)
218238

219239
fields.append(

tests/mcp/test_tool_schema.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,72 @@ async def test_schema_validate_json_output(app, test_project, sync_service):
116116
assert result["results"][0]["passed"] is True
117117

118118

119+
@pytest.mark.asyncio
120+
async def test_schema_validate_picoschema_modifier_descriptions(
121+
app, test_project, sync_service
122+
):
123+
"""Modifier descriptions should not become literal field names."""
124+
project_path = Path(test_project.path)
125+
126+
_write_schema_file(
127+
project_path,
128+
"schemas/PicoTest.md",
129+
"""\
130+
---
131+
title: PicoTest
132+
type: schema
133+
entity: pico_test
134+
schema:
135+
name: string
136+
status(enum, current state): [active, inactive]
137+
tags(array, list of tags): string
138+
settings:
139+
validation: warn
140+
---
141+
142+
# PicoTest
143+
""",
144+
)
145+
_write_schema_file(
146+
project_path,
147+
"pico/PicoTest1.md",
148+
"""\
149+
---
150+
title: PicoTest1
151+
type: pico_test
152+
permalink: pico/pico-test-1
153+
---
154+
155+
# PicoTest1
156+
157+
## Observations
158+
- [name] PicoTest1
159+
- [status] active
160+
- [tags] foo
161+
- [tags] bar
162+
""",
163+
)
164+
165+
await sync_service.sync(project_path)
166+
167+
result = await schema_validate(
168+
note_type="pico_test",
169+
project=test_project.name,
170+
output_format="json",
171+
)
172+
173+
note_result = result["results"][0]
174+
field_statuses = {fr["field_name"]: fr["status"] for fr in note_result["field_results"]}
175+
assert result["valid_count"] == 1
176+
assert note_result["warnings"] == []
177+
assert note_result["unmatched_observations"] == {}
178+
assert field_statuses == {
179+
"name": "present",
180+
"status": "present",
181+
"tags": "present",
182+
}
183+
184+
119185
@pytest.mark.asyncio
120186
async def test_schema_validate_by_identifier(app, test_project, sync_service):
121187
"""Validate a specific note by identifier."""

tests/schema/test_parser.py

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,18 +43,52 @@ def test_optional_array(self):
4343
assert required is False
4444
assert is_array is True
4545

46+
def test_array_with_description_in_modifier(self):
47+
name, required, is_array, is_enum, is_object = _parse_field_key(
48+
"tags(array, list of tags)"
49+
)
50+
assert name == "tags"
51+
assert required is True
52+
assert is_array is True
53+
assert is_enum is False
54+
assert is_object is False
55+
56+
def test_optional_array_with_description_in_modifier(self):
57+
name, required, is_array, is_enum, is_object = _parse_field_key(
58+
"tags?(array, list of tags)"
59+
)
60+
assert name == "tags"
61+
assert required is False
62+
assert is_array is True
63+
4664
def test_enum(self):
4765
name, required, is_array, is_enum, is_object = _parse_field_key("status?(enum)")
4866
assert name == "status"
4967
assert required is False
5068
assert is_enum is True
5169

70+
def test_enum_with_description_in_modifier(self):
71+
name, required, is_array, is_enum, is_object = _parse_field_key(
72+
"status(enum, current state)"
73+
)
74+
assert name == "status"
75+
assert required is True
76+
assert is_enum is True
77+
5278
def test_object(self):
5379
name, required, is_array, is_enum, is_object = _parse_field_key("metadata?(object)")
5480
assert name == "metadata"
5581
assert required is False
5682
assert is_object is True
5783

84+
def test_object_with_description_in_modifier(self):
85+
name, required, is_array, is_enum, is_object = _parse_field_key(
86+
"metadata?(object, nested metadata)"
87+
)
88+
assert name == "metadata"
89+
assert required is False
90+
assert is_object is True
91+
5892
def test_required_enum(self):
5993
name, required, is_array, is_enum, is_object = _parse_field_key("status(enum)")
6094
assert name == "status"
@@ -152,6 +186,13 @@ def test_array_field(self):
152186
assert fields[0].is_array is True
153187
assert fields[0].required is False
154188

189+
def test_array_field_with_description_in_modifier(self):
190+
fields = parse_picoschema({"tags(array, list of tags)": "string"})
191+
assert fields[0].name == "tags"
192+
assert fields[0].type == "string"
193+
assert fields[0].is_array is True
194+
assert fields[0].description == "list of tags"
195+
155196
def test_entity_ref_field(self):
156197
fields = parse_picoschema({"works_at?": "Organization, employer"})
157198
assert fields[0].name == "works_at"
@@ -165,6 +206,15 @@ def test_enum_field_with_list(self):
165206
assert fields[0].is_enum is True
166207
assert fields[0].enum_values == ["active", "inactive"]
167208

209+
def test_enum_field_with_description_in_modifier(self):
210+
fields = parse_picoschema({"status(enum, current state)": ["active", "inactive"]})
211+
assert fields[0].name == "status"
212+
assert fields[0].type == "enum"
213+
assert fields[0].required is True
214+
assert fields[0].is_enum is True
215+
assert fields[0].enum_values == ["active", "inactive"]
216+
assert fields[0].description == "current state"
217+
168218
def test_enum_field_with_string(self):
169219
fields = parse_picoschema({"status?(enum)": "active"})
170220
assert fields[0].is_enum is True
@@ -204,6 +254,20 @@ def test_object_field(self):
204254
assert fields[0].children[0].name == "street"
205255
assert fields[0].children[1].name == "city"
206256

257+
def test_object_field_with_description_in_modifier(self):
258+
fields = parse_picoschema(
259+
{
260+
"address?(object, mailing address)": {
261+
"street": "string",
262+
}
263+
}
264+
)
265+
assert fields[0].name == "address"
266+
assert fields[0].type == "object"
267+
assert fields[0].required is False
268+
assert fields[0].description == "mailing address"
269+
assert fields[0].children[0].name == "street"
270+
207271
def test_dict_value_treated_as_object(self):
208272
"""A dict value without explicit (object) is still treated as an object."""
209273
fields = parse_picoschema(
@@ -315,6 +379,31 @@ def test_settings_frontmatter_parsed_into_frontmatter_fields(self):
315379
assert status_field.is_enum is True
316380
assert status_field.enum_values == ["draft", "published"]
317381

382+
def test_settings_frontmatter_parses_modifier_descriptions(self):
383+
frontmatter = {
384+
"entity": "Person",
385+
"schema": {"name": "string"},
386+
"settings": {
387+
"validation": "warn",
388+
"frontmatter": {
389+
"tags?(array, note tags)": "string",
390+
"status?(enum, publication state)": ["draft", "published"],
391+
},
392+
},
393+
}
394+
result = parse_schema_note(frontmatter)
395+
396+
tags_field = next(f for f in result.frontmatter_fields if f.name == "tags")
397+
assert tags_field.required is False
398+
assert tags_field.is_array is True
399+
assert tags_field.description == "note tags"
400+
401+
status_field = next(f for f in result.frontmatter_fields if f.name == "status")
402+
assert status_field.required is False
403+
assert status_field.is_enum is True
404+
assert status_field.enum_values == ["draft", "published"]
405+
assert status_field.description == "publication state"
406+
318407
def test_no_settings_frontmatter_defaults_to_empty(self):
319408
frontmatter = {
320409
"entity": "Person",

0 commit comments

Comments
 (0)