Skip to content

Commit cc30781

Browse files
joehendrixclaude
andauthored
Add pySpec generator (#355)
This PR introduces a PySpec generator prototype along with several supporting improvements and bug fixes. Main Feature: PySpec Generator Adds a new strata pySpecs subcommand that translates Python specification files into binary PySpec files. Python specification files capture method signatures and their pre/post-conditions as idiomatic Python source code, facilitating LLM-assisted generation. The binary PySpec format enables efficient loading when validating source code that uses these imports. New files: - Strata/Languages/Python/Specs.lean - Core specification translation logic - Strata/Languages/Python/Specs/DDM.lean - DDM types for PySpec - Strata/Languages/Python/Specs/Decls.lean - Declaration handling - Strata/Languages/Python/ReadPython.lean - Python file reading utilities - Test files: StrataTest/Languages/Python/Specs/{basetypes.py, main.py} and SpecsTest.lean - StrataTest/Util/Python.lean - Python testing utilities Refactoring and Code Organization Source range improvements: - Migrated source mapping types from Strata/DDM/AST.lean to dedicated modules: - Strata/DDM/Util/SourceRange.lean - SourceRange definition - Strata/Util/FileRange.lean - FileRange, Uri, DiagnosticModel types and formatting - Added Strata/Util/DecideProp.lean for decidability utilities - Reduces unnecessary code in DDM and improves modularity Import organization: - Pruned unnecessary imports in StrataMain.lean - Filter builtin module imports from Python signatures Bug Fixes 1. DDM Ion parsing: Fixed bug in parsing DDM Ident types in Ion files (changed .asString to .asSymbolString in Strata/DDM/Ion.lean) 2. DDM formatting: - Fixed precedence calculation in Strata/DDM/Format.lean (changed maxPrec to maxPrec + 1 in atom definition) - This fixes C_Simp formatting issues where output had missing spaces (e.g., (bool)proceduretrivial() → bool procedure trivial()) - Made FormatState.empty public to enable external instantiation 3. Python tooling: - Added recursion limit (2500) in Tools/Python/strata/gen.py - Improved parse error handling with dedicated exit code (100) for parse failures - Cleaned up handling of deprecated AST operations Testing - Updated C_Simp example test expectations to reflect formatting fixes - Added comprehensive PySpec test cases --- By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice. --------- Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com>
1 parent bf07bb5 commit cc30781

32 files changed

Lines changed: 2637 additions & 191 deletions

File tree

.github/workflows/ci.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,8 @@ jobs:
157157
auto-config: false
158158
build: true
159159
build-args: "strata:exe"
160+
- name: Run PySpec test that may be skipped without Python
161+
run: PYTHON=python PYTHON_TEST=1 lake build StrataTest.Languages.Python.SpecsTest
160162
- name: Run test script
161163
run: FAIL_FAST=1 ./scripts/run_cpython_tests.sh ${{ matrix.python_version }}
162164
working-directory: Tools/Python

Strata/DDM/AST.lean

Lines changed: 117 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,10 @@
66
module
77

88
public import Std.Data.HashMap.Basic
9+
public import Strata.DDM.AST.Datatype
910
public import Strata.DDM.Util.ByteArray
1011
public import Strata.DDM.Util.Decimal
11-
public import Lean.Data.Position
12-
public import Strata.DDM.AST.Datatype
12+
public import Strata.DDM.Util.SourceRange
1313

1414
import Std.Data.HashMap
1515
import all Strata.DDM.Util.Array
@@ -158,6 +158,28 @@ protected def instTypeM {m α} [Monad m] (d : TypeExprF α) (bindings : α → N
158158
| .arrow n a b => .arrow n <$> a.instTypeM bindings <*> b.instTypeM bindings
159159
termination_by d
160160

161+
/-- Monadic map over all annotations in a type expression. -/
162+
@[specialize]
163+
def mapAnnM {α β} {m} [Monad m] (t : TypeExprF α) (f : α → m β)
164+
: m (TypeExprF β) := do
165+
match t with
166+
| .ident ann name args =>
167+
return .ident (← f ann) name
168+
(← args.attach.mapM fun ⟨e, _⟩ => e.mapAnnM f)
169+
| .bvar ann index => return .bvar (← f ann) index
170+
| .tvar ann name => return .tvar (← f ann) name
171+
| .fvar ann fv args =>
172+
return .fvar (← f ann) fv
173+
(← args.attach.mapM fun ⟨e, _⟩ => e.mapAnnM f)
174+
| .arrow ann arg res =>
175+
return .arrow (← f ann) (← arg.mapAnnM f) (← res.mapAnnM f)
176+
termination_by t
177+
178+
/-- Map over all annotations in a type expression. -/
179+
@[specialize]
180+
def mapAnn {α β} (t : TypeExprF α) (f : α → β) : TypeExprF β :=
181+
t.mapAnnM (m := Id) f
182+
161183
end TypeExprF
162184

163185
/-- Separator format for sequence formatting -/
@@ -267,128 +289,104 @@ end OperationF
267289

268290
namespace ExprF
269291

292+
/--
293+
Flattens a curried application expression into its head and list of arguments.
294+
For example, `((f a) b) c` becomes `(f, [a, b, c])`.
295+
-/
270296
public def flatten {α} (e : ExprF α) (prev : List (ArgF α) := []) : ExprF α × List (ArgF α) :=
271297
match e with
272298
| .app _ f e => f.flatten (e :: prev)
273299
| _ => (e, prev)
274300

275301
end ExprF
276302

277-
/--
278-
Source location information in the DDM is defined
279-
by a range of bytes in a UTF-8 string with the input
280-
Line/column information can be construced from a
281-
`Lean.FileMap`
282-
283-
As an example, in the string `"123abc\ndef"`, the string
284-
`"abc"` has the position `{start := 3, stop := 6 }` while
285-
`"def"` has the position `{start := 7, stop := 10 }`.
286-
-/
287-
structure SourceRange where
288-
/-- The starting offset of the source range. -/
289-
start : String.Pos.Raw
290-
/-- One past the end of the range. -/
291-
stop : String.Pos.Raw
292-
deriving DecidableEq, Inhabited, Repr
293-
294-
namespace SourceRange
295-
296-
def none : SourceRange := { start := 0, stop := 0 }
297-
298-
def isNone (loc : SourceRange) : Bool := loc.start = 0 ∧ loc.stop = 0
299-
300-
instance : ToFormat SourceRange where
301-
format fr := f!"{fr.start}-{fr.stop}"
302-
303-
end SourceRange
304-
305-
inductive Uri where
306-
| file (path: String)
307-
deriving DecidableEq, Repr, Inhabited
308-
309-
instance : ToFormat Uri where
310-
format fr := match fr with | .file path => path
311-
312-
structure FileRange where
313-
file: Uri
314-
range: Strata.SourceRange
315-
deriving DecidableEq, Repr, Inhabited
316-
317-
instance : ToFormat FileRange where
318-
format fr := f!"{fr.file}:{fr.range}"
319-
320-
/-- A default file range for errors without source location.
321-
This should only be used for generated nodes that are guaranteed to be correct. -/
322-
def FileRange.unknown : FileRange :=
323-
{ file := .file "<unknown>", range := SourceRange.none }
324-
325-
/-- A diagnostic model that holds a file range and a message.
326-
This can be converted to a formatted string using a FileMap. -/
327-
structure DiagnosticModel where
328-
fileRange : FileRange
329-
message : String
330-
deriving Repr, BEq, Inhabited
331-
332-
instance : Inhabited DiagnosticModel where
333-
default := { fileRange := FileRange.unknown, message := "" }
334-
335-
/-- Create a DiagnosticModel from just a message (using default location).
336-
This should not be called, it only exists temporarily to enabling incrementally migrating code without error locations -/
337-
def DiagnosticModel.fromMessage (msg : String) : DiagnosticModel :=
338-
{ fileRange := FileRange.unknown, message := msg }
303+
/-- Monadic map over all annotations in a syntax category. -/
304+
@[specialize]
305+
def SyntaxCatF.mapAnnM {α β} {m} [Monad m] (c : SyntaxCatF α)
306+
(f : α → m β) : m (SyntaxCatF β) := do
307+
return {
308+
ann := ← f c.ann
309+
name := c.name
310+
args := ← c.args.attach.mapM fun ⟨e, _⟩ => e.mapAnnM f
311+
}
312+
termination_by sizeOf c
313+
decreasing_by
314+
cases c
315+
case mk ann name args =>
316+
decreasing_tactic
339317

340-
/-- Create a DiagnosticModel from a Format (using default location).
341-
This should not be called, it only exists temporarily to enabling incrementally migrating code without error locations -/
342-
def DiagnosticModel.fromFormat (fmt : Std.Format) : DiagnosticModel :=
343-
{ fileRange := FileRange.unknown, message := toString fmt }
318+
/-- Map over all annotations in a syntax category. -/
319+
@[specialize]
320+
def SyntaxCatF.mapAnn {α β} (c : SyntaxCatF α) (f : α → β) : SyntaxCatF β :=
321+
c.mapAnnM (m := Id) f
344322

345-
/-- Create a DiagnosticModel with source location. -/
346-
def DiagnosticModel.withRange (fr : FileRange) (msg : Format) : DiagnosticModel :=
347-
{ fileRange := fr, message := toString msg }
323+
mutual
348324

349-
/-- Format a file range using a FileMap to convert byte offsets to line/column positions. -/
350-
def FileRange.format (fr : FileRange) (fileMap : Option Lean.FileMap) (includeEnd? : Bool := true) : Std.Format :=
351-
let baseName := match fr.file with
352-
| .file path => (path.splitToList (· == '/')).getLast!
353-
match fileMap with
354-
| some fm =>
355-
let startPos := fm.toPosition fr.range.start
356-
let endPos := fm.toPosition fr.range.stop
357-
if includeEnd? then
358-
if startPos.line == endPos.line then
359-
f!"{baseName}({startPos.line},({startPos.column}-{endPos.column}))"
360-
else
361-
f!"{baseName}(({startPos.line},{startPos.column})-({endPos.line},{endPos.column}))"
362-
else
363-
f!"{baseName}({startPos.line}, {startPos.column})"
364-
| none =>
365-
if fr.range.isNone then
366-
f!""
367-
else
368-
f!"{baseName}({fr.range.start}-{fr.range.stop})"
325+
/-- Monadic map over all annotations in an expression. -/
326+
@[specialize]
327+
def ExprF.mapAnnM {α β} {m} [Monad m] (e : ExprF α) (f : α → m β)
328+
: m (ExprF β) := do
329+
match e with
330+
| .bvar ann idx => return .bvar (← f ann) idx
331+
| .fvar ann idx => return .fvar (← f ann) idx
332+
| .fn ann ident => return .fn (← f ann) ident
333+
| .app ann e a =>
334+
return .app (← f ann) (← e.mapAnnM f) (← a.mapAnnM f)
335+
termination_by sizeOf e
336+
337+
/-- Monadic map over all annotations in an argument. -/
338+
@[specialize]
339+
def ArgF.mapAnnM {α β} {m} [Monad m] (a : ArgF α) (f : α → m β)
340+
: m (ArgF β) := do
341+
match a with
342+
| .op o => return .op (← o.mapAnnM f)
343+
| .cat c => return .cat (← c.mapAnnM f)
344+
| .expr e => return .expr (← e.mapAnnM f)
345+
| .type t => return .type (← t.mapAnnM f)
346+
| .ident ann i => return .ident (← f ann) i
347+
| .num ann v => return .num (← f ann) v
348+
| .decimal ann v => return .decimal (← f ann) v
349+
| .strlit ann i => return .strlit (← f ann) i
350+
| .bytes ann b => return .bytes (← f ann) b
351+
| .option ann none => return .option (← f ann) none
352+
| .option ann (some a) =>
353+
return .option (← f ann) (some (← a.mapAnnM f))
354+
| .seq ann sep l =>
355+
return .seq (← f ann) sep
356+
(← l.attach.mapM fun ⟨e, _⟩ => e.mapAnnM f)
357+
termination_by sizeOf a
358+
359+
/-- Map a monadic function over all annotations in an operation. -/
360+
@[specialize]
361+
def OperationF.mapAnnM {α β} {m} [Monad m] (op : OperationF α)
362+
(f : α → m β) : m (OperationF β) := do
363+
return {
364+
ann := ← f op.ann
365+
name := op.name
366+
args := ← op.args.attach.mapM fun ⟨e, _⟩ => e.mapAnnM f
367+
}
368+
termination_by sizeOf op
369+
decreasing_by
370+
cases op
371+
case mk ann name args =>
372+
decreasing_tactic
369373

370-
/-- Format a DiagnosticModel using a FileMap to convert byte offsets to line/column positions. -/
371-
def DiagnosticModel.format (dm : DiagnosticModel) (fileMap : Option Lean.FileMap) (includeEnd? : Bool := true) : Std.Format :=
372-
let rangeStr := dm.fileRange.format fileMap includeEnd?
373-
if dm.fileRange.range.isNone then
374-
f!"{dm.message}"
375-
else
376-
f!"{rangeStr} {dm.message}"
374+
end
377375

378-
/-- Format just the file range portion of a DiagnosticModel. -/
379-
def DiagnosticModel.formatRange (dm : DiagnosticModel) (fileMap : Option Lean.FileMap) (includeEnd? : Bool := true) : Std.Format :=
380-
dm.fileRange.format fileMap includeEnd?
376+
/-- Map a pure function over all annotations in an expression. -/
377+
@[specialize]
378+
def ExprF.mapAnn {α β} (e : ExprF α) (f : α → β) : ExprF β :=
379+
e.mapAnnM (m := Id) f
381380

382-
/-- Update the file range of a DiagnosticModel if it's currently unknown.
383-
This should not be called, it only exists temporarily to enabling incrementally migrating code without error locations -/
384-
def DiagnosticModel.withRangeIfUnknown (dm : DiagnosticModel) (fr : FileRange) : DiagnosticModel :=
385-
if dm.fileRange.range.isNone then
386-
{ dm with fileRange := fr }
387-
else
388-
dm
381+
/-- Map a pure function over all annotations in an argument. -/
382+
@[specialize]
383+
def ArgF.mapAnn {α β} (a : ArgF α) (f : α → β) : ArgF β :=
384+
a.mapAnnM (m := Id) f
389385

390-
instance : ToString DiagnosticModel where
391-
toString dm := dm.format none |> toString
386+
/-- Map a pure function over all annotations in an operation. -/
387+
@[specialize]
388+
def OperationF.mapAnn {α β} (op : OperationF α) (f : α → β) : OperationF β :=
389+
op.mapAnnM (m := Id) f
392390

393391
abbrev Arg := ArgF SourceRange
394392
abbrev Expr := ExprF SourceRange
@@ -612,6 +610,10 @@ end Metadata
612610

613611
abbrev Var := String
614612

613+
/--
614+
Converts a deBruijn index to a level (counting from the start rather than
615+
the end). Used internally for metadata argument processing.
616+
-/
615617
private def catbvarLevel (varCount : Nat) : MetadataArg → Nat
616618
| .catbvar idx =>
617619
if idx < varCount then
@@ -1068,6 +1070,7 @@ def nameIndex {argDecls} : BindingSpec argDecls → DebruijnIndex argDecls.size
10681070

10691071
end BindingSpec
10701072

1073+
/-- Monad for parsing new binding specifications, accumulating error messages. -/
10711074
private abbrev NewBindingM := StateM (Array String)
10721075

10731076
private def newBindingErr (msg : String) : NewBindingM Unit :=
@@ -1713,7 +1716,9 @@ end DialectMap
17131716
mutual
17141717

17151718
/--
1716-
Invoke a function `f` over each of the declaration specifications for an arg.
1719+
Recursively folds over all binding specifications declared within an argument.
1720+
Used to collect type bindings, value bindings, and other declarations that
1721+
appear nested within operation arguments.
17171722
-/
17181723
partial def foldOverArgBindingSpecs {α β}
17191724
(m : DialectMap)
@@ -1750,7 +1755,7 @@ private partial def OperationF.foldBindingSpecs {α β}
17501755
| some lvl => foldOverArgAtLevel m f init argDecls argsV lvl
17511756
decl.newBindings.foldl (init := init) fun a b => f a op.ann b argsV
17521757
else
1753-
@panic _ ⟨init⟩ "Expected arguments to match bindings"
1758+
@panic _ ⟨init⟩ s!"{op.name} expects {argDecls.size} arguments when {args.size} provided."
17541759
| _ => @panic _ ⟨init⟩ s!"Unknown operation {op.name}"
17551760

17561761
/--

Strata/DDM/Format.lean

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ deriving Inhabited
7373

7474
namespace PrecFormat
7575

76-
private def atom (format : Format) : PrecFormat := { format, prec := maxPrec }
76+
private def atom (format : Format) : PrecFormat := { format, prec := maxPrec + 1 }
7777

7878
private def ofFormat {α} [Std.ToFormat α] (x : α) (prec : Nat := maxPrec) : PrecFormat := { format := Std.format x, prec }
7979

@@ -133,10 +133,10 @@ structure FormatState where
133133
namespace FormatState
134134

135135
/-- A format context that uses no syntactic sugar. -/
136-
private def empty : FormatState where
136+
def empty : FormatState where
137137
openDialects := {}
138138

139-
private instance : Inhabited FormatState where
139+
instance : Inhabited FormatState where
140140
default := .empty
141141

142142
def pushBinding (s : FormatState) (ident : String) : FormatState :=

Strata/DDM/Ion.lean

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,10 @@ end Ion.Ion
8282

8383
namespace Strata
8484

85-
85+
/--
86+
Represents an Ion value that is either a symbol/string or a non-empty
87+
s-expression. The size proof ensures termination in recursive deserialization.
88+
-/
8689
private inductive StringOrSexp (v : Ion SymbolId) where
8790
| string (s : String)
8891
| sexp (a : Array (Ion SymbolId)) (p : a.size > 0 ∧ sizeOf a < sizeOf v)
@@ -92,6 +95,11 @@ private inductive Required where
9295
| opt
9396
deriving DecidableEq
9497

98+
/--
99+
Maps struct field names to their position indices, tracking which fields are
100+
required. Used during Ion deserialization to validate that all required fields
101+
are present.
102+
-/
95103
private structure StructArgMap (size : Nat) where
96104
map : Std.HashMap String (Fin size) := {}
97105
required : Array (String × Fin size)
@@ -259,6 +267,11 @@ private def sizeOfArrayLowerBound [h : SizeOf α] (a : Array α) : sizeOf a ≥
259267
have p := sizeOfListLowerBound l
260268
decreasing_tactic
261269

270+
/--
271+
Maps an array of Ion struct fields to a fixed-size vector according to the
272+
StructArgMap, validating that all required fields are present and no fields are
273+
duplicated.
274+
-/
262275
private def mapFields {size} (args : Array (SymbolId × Ion SymbolId)) (m : StructArgMap size) :
263276
FromIonM (Vector (Ion SymbolId) size) := do
264277
-- We use an assigned vector below to check
@@ -598,7 +611,7 @@ private protected def ArgF.fromIon {α} [FromIon α] (v : Ion SymbolId) : FromIo
598611
| "ident" =>
599612
let ⟨p⟩ ← .checkArgCount "ident" sexp 3
600613
.ident <$> fromIon sexp[1]
601-
<*> .asString "Identifier value" sexp[2]
614+
<*> .asSymbolString "Identifier value" sexp[2]
602615
| "num" =>
603616
let ⟨p⟩ ← .checkArgCount "num" sexp 3
604617
let ann ← fromIon sexp[1]
@@ -1452,7 +1465,10 @@ def fromIonFragment (f : Ion.Fragment)
14521465
commands := ← fromIonFragmentCommands f
14531466
}
14541467

1455-
def fileFromIon (dialects : DialectMap) (dialect : DialectName) (bytes : ByteArray) : Except String Strata.Program := do
1468+
/--
1469+
Decodes bytes in the Ion format into a single Strata program.
1470+
-/
1471+
def fromIon (dialects : DialectMap) (dialect : DialectName) (bytes : ByteArray) : Except String Strata.Program := do
14561472
let (hdr, frag) ←
14571473
match Strata.Ion.Header.parse bytes with
14581474
| .error msg =>

0 commit comments

Comments
 (0)