Skip to content

Commit f3a0dca

Browse files
joehendrixclaude
andauthored
Replace SpecAtomType.pyClass with .ident, add BinaryIO, and align type mappings (#968)
Replace the `SpecAtomType.pyClass` variant with `.ident` using qualified `PythonIdent`, add `typing.BinaryIO` support, and align PySpec→Laurel type mappings with PythonToLaurel so non-primitive builtins map to `Any` instead of lossy approximations. ## Changes ### 1. Fix missing source locations in PySpec translation errors (`SourceRange.lean`, `Specs.lean`) `SourceRange.format` crashed when called with a `none` range because it unconditionally called `fm.toPosition` on zero offsets. Now it guards on `isNone` and renders `"path:unknown"` instead. Several pattern matches in `Specs.lean` were extracting annotated values via `name.val`/`name.ann` but discarding the source location — these now destructure as `⟨_, name⟩` at the binding site and pass the parent `loc` to error reporters, ensuring translation errors always have usable locations. ### 2. Remove `SpecAtomType.pyClass` variant (`Decls.lean`, `DDM.lean`, `Specs.lean`, `ToLaurel.lean`) `pyClass` stored an unqualified class name string, which was ambiguous across modules. All construction sites now use `.ident` with a fully qualified `PythonIdent` (module + name). DDM deserialization of legacy `typeClass`/`typeClassNoArgs` Ion payloads produces `.ident` with empty `pythonModule` for backward compatibility. All match arms and comparison logic for `.pyClass` are removed. ### 3. Add `typing.BinaryIO` support (`Decls.lean`, `Specs.lean`, `ToLaurel.lean`) `BinaryIO` appears in boto3 stubs (e.g., `lambda_/client.py`'s `invoke` returns `StreamingBody` typed as `BinaryIO`). Without this, pySpecs throws "BinaryIO is not defined in module typing". Added `PythonIdent.typingBinaryIO`, registered it in `preludeSig`, and mapped it to `Any` in `knownIdentTypes`. ### 4. Align `knownIdentTypes` with PythonToLaurel (`ToLaurel.lean`) Previously, PySpec→Laurel mapped types more aggressively than PythonToLaurel: `bytes`/`bytearray`→`TString`, `complex`→`TReal`, `dict`→`DictStrAny`, `Exception`→`Error`, `typing.List`→`ListStr`, `typing.Dict`→`DictStrAny`. PythonToLaurel maps all of these to `Any`. The mismatch meant pyspec-dispatched analysis could generate spurious preconditions (e.g., string-length bounds on `bytes` parameters). Now both paths agree: only `int`, `str`, `bool`, `float`, and `None` get concrete Laurel types; everything else is `Any`. By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice. --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 296c668 commit f3a0dca

9 files changed

Lines changed: 203 additions & 247 deletions

File tree

Strata/DDM/Util/SourceRange.lean

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,12 +40,15 @@ instance : Std.ToFormat SourceRange where
4040
Renders location information in a format VSCode understands.
4141
Returns "path:line:col-col" if on same line, otherwise "path:line:col". -/
4242
def format (loc : SourceRange) (path : System.FilePath) (fm : Lean.FileMap) : String :=
43-
let spos := fm.toPosition loc.start
44-
let epos := fm.toPosition loc.stop
45-
if spos.line == epos.line then
46-
s!"{path}:{spos.line}:{spos.column+1}-{epos.column+1}"
43+
if loc.isNone then
44+
s!"{path}:unknown"
4745
else
48-
s!"{path}:{spos.line}:{spos.column+1}"
46+
let spos := fm.toPosition loc.start
47+
let epos := fm.toPosition loc.stop
48+
if spos.line == epos.line then
49+
s!"{path}:{spos.line}:{spos.column+1}-{epos.column+1}"
50+
else
51+
s!"{path}:{spos.line}:{spos.column+1}"
4952

5053
end Strata.SourceRange
5154
end

Strata/Languages/Python/Specs.lean

Lines changed: 154 additions & 150 deletions
Large diffs are not rendered by default.

Strata/Languages/Python/Specs/DDM.lean

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -205,11 +205,6 @@ private def SpecAtomType.toDDM (d : SpecAtomType)
205205
.typeIdentNoArgs loc nm.toDDM
206206
else
207207
.typeIdent loc nm.toDDM ⟨.none, args.map (·.toDDM)⟩
208-
| .pyClass name args =>
209-
if args.isEmpty then
210-
.typeClassNoArgs loc ⟨.none, name⟩
211-
else
212-
.typeClass loc ⟨.none, name⟩ ⟨.none, args.map (·.toDDM)⟩
213208
| .intLiteral i => .typeIntLiteral loc (toDDMInt .none i)
214209
| .stringLiteral v => .typeStringLiteral loc ⟨.none, v⟩
215210
| .typedDict fields types fieldRequired =>
@@ -331,10 +326,10 @@ private def Signature.toDDM (sig : Signature) : DDM.Signature SourceRange :=
331326
private def DDM.SpecType.fromDDM (d : DDM.SpecType SourceRange) : Specs.SpecType :=
332327
match d with
333328
| .typeClassNoArgs loc ⟨_, cl⟩ =>
334-
.ofAtom loc <| .pyClass cl #[]
329+
.ident loc { pythonModule := "", name := cl } #[]
335330
| .typeClass loc ⟨_, cl⟩ ⟨_, args⟩ =>
336331
let a := args.map (·.fromDDM)
337-
.ofAtom loc <| .pyClass cl a
332+
.ident loc { pythonModule := "", name := cl } a
338333
| .typeIdentNoArgs loc ⟨_, ident⟩ =>
339334
if let some pyIdent := PythonIdent.ofString ident then
340335
.ident loc pyIdent #[]

Strata/Languages/Python/Specs/Decls.lean

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ def builtinsStr := mk "builtins" "str"
2525
def noneType := mk "_types" "NoneType"
2626

2727
def typingAny := mk "typing" "Any"
28+
def typingBinaryIO := mk "typing" "BinaryIO"
2829
def typingDict := mk "typing" "Dict"
2930
def typingGenerator := mk "typing" "Generator"
3031
def typingList := mk "typing" "List"
@@ -77,7 +78,6 @@ An atomic type in the PySpec language
7778
-/
7879
inductive SpecAtomType where
7980
| ident (nm : PythonIdent) (args : Array SpecType)
80-
| pyClass (name : String) (args : Array SpecType)
8181
/- An integer literal -/
8282
| intLiteral (value : Int)
8383
/-- A string literal -/
@@ -127,7 +127,7 @@ termination_by a₁.size - i
127127
mutual
128128

129129
/-- Compare two atom types by structure, ignoring `loc` in nested `SpecType`
130-
values. Variants are ordered: ident < pyClass < intLiteral < stringLiteral
130+
values. Variants are ordered: ident < intLiteral < stringLiteral
131131
< typedDict. -/
132132
protected def SpecAtomType.compare (x y : SpecAtomType) : Ordering :=
133133
match x, y with
@@ -137,12 +137,6 @@ protected def SpecAtomType.compare (x y : SpecAtomType) : Ordering :=
137137
| .ident .., _ => .lt
138138
| _, .ident .. => .gt
139139

140-
| .pyClass xname xargs, .pyClass yname yargs =>
141-
compare xname yname |>.then $
142-
compareHLex (fun ⟨xe, _⟩ ye => xe.compare ye) xargs.attach yargs
143-
| .pyClass .., _ => .lt
144-
| _, .pyClass .. => .gt
145-
146140
| .intLiteral xval, .intLiteral yval => compare xval yval
147141
| .intLiteral .., _ => .lt
148142
| _, .intLiteral .. => .gt
@@ -262,9 +256,6 @@ protected def ofArray (loc : SourceRange) (atoms : Array SpecAtomType) : SpecTyp
262256
def ident (loc : SourceRange) (i : PythonIdent) (args : Array SpecType := #[]) : SpecType :=
263257
ofAtom loc (.ident i args)
264258

265-
def pyClass (loc : SourceRange) (name : String) (params : Array SpecType) : SpecType :=
266-
ofAtom loc (.pyClass name params)
267-
268259
def asSingleton (tp : SpecType) : Option SpecAtomType := do
269260
if tp.atoms.size = 1 then
270261
for atp in tp.atoms do return atp

Strata/Languages/Python/Specs/Error.lean

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,6 @@ namespace WarningKind
2929
-- Type translation warnings
3030
def emptyType : WarningKind := { phase := "pySpecToLaurel", category := "emptyType" }
3131
def unsupportedUnion : WarningKind := { phase := "pySpecToLaurel", category := "unsupportedUnion" }
32-
def unknownType : WarningKind := { phase := "pySpecToLaurel", category := "unknownType" }
33-
def unsupportedGenericClass : WarningKind := { phase := "pySpecToLaurel", category := "unsupportedGenericClass" }
34-
def bytesToString : WarningKind := { phase := "pySpecToLaurel", category := "bytesToString" }
35-
def complexToReal : WarningKind := { phase := "pySpecToLaurel", category := "complexToReal" }
3632

3733
-- Unsupported Optional patterns
3834
def unsupportedOptionalFloat : WarningKind := { phase := "pySpecToLaurel", category := "unsupportedOptionalFloat" }

Strata/Languages/Python/Specs/ToLaurel.lean

Lines changed: 13 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -158,11 +158,6 @@ def atomTypeToString (a : SpecAtomType) : String :=
158158
else
159159
let argStrs := args.map specTypeToString
160160
s!"{nm}[{String.intercalate ", " argStrs.toList}]"
161-
| .pyClass name args =>
162-
if args.isEmpty then name
163-
else
164-
let argStrs := args.map specTypeToString
165-
s!"{name}[{String.intercalate ", " argStrs.toList}]"
166161
| .intLiteral v => s!"Literal[{v}]"
167162
| .stringLiteral v => s!"Literal[\"{v}\"]"
168163
| .typedDict _ _ _ => "TypedDict"
@@ -255,25 +250,24 @@ def detectOptionalType (ty : SpecType) : ToLaurelM (Option HighTypeMd) := do
255250
return none
256251

257252
/-- Known PythonIdent → Laurel type mappings for single-atom ident types.
258-
- `bytes`/`bytearray` → TString (closest string-like approximation)
259-
- `complex` → TReal (no complex type in SMT; real is the closest numeric type)
260-
- `Exception` → UserDefined "Error" (matches CorePrelude's Error datatype)
261-
- `typing.Any` → UserDefined "Any" (datatype in Laurel prelude) -/
253+
Matches PythonToLaurel's type mapping: only int, str, bool, float get
254+
concrete Laurel types; everything else maps to Any. -/
262255
private def knownIdentTypes : Std.HashMap PythonIdent HighTypeMd :=
263256
.ofList [
264257
(.builtinsBool, tyBool),
265-
(.builtinsBytearray, tyString),
266-
(.builtinsBytes, tyString),
267-
(.builtinsComplex, tyReal),
268-
(.builtinsDict, tyDictStrAny),
269-
(.builtinsException, tyError),
258+
(.builtinsBytearray, tyAny),
259+
(.builtinsBytes, tyAny),
260+
(.builtinsComplex, tyAny),
261+
(.builtinsDict, tyAny),
262+
(.builtinsException, tyAny),
270263
(.builtinsFloat, tyReal),
271264
(.builtinsInt, tyInt),
272265
(.builtinsStr, tyString),
273266
(.noneType, tyVoid),
274267
(.typingAny, tyAny),
275-
(.typingDict, tyDictStrAny),
276-
(.typingList, tyListStr),
268+
(.typingBinaryIO, tyAny),
269+
(.typingDict, tyAny),
270+
(.typingList, tyAny),
277271
]
278272

279273
/-- Convert a SpecType to a Laurel HighTypeMd. -/
@@ -306,20 +300,9 @@ def specTypeToLaurelType (ty : SpecType) : ToLaurelM HighTypeMd := do
306300
-- Single atom type
307301
match ty.atoms[0]! with
308302
| .ident nm _args =>
309-
-- Warn for lossy known-type approximations
310-
if nm == .builtinsBytes || nm == .builtinsBytearray then
311-
reportError .bytesToString default s!"'{nm}' mapped to TString (bytes have different semantics)"
312-
if nm == .builtinsComplex then
313-
reportError .complexToReal default s!"'{nm}' mapped to TReal (complex loses imaginary component)"
314303
if let some ty := knownIdentTypes[nm]? then
315304
return ty
316-
reportError .unknownType default s!"Unknown type '{nm}' mapped to TString"
317-
return tyString
318-
| .pyClass name args =>
319-
if args.size > 0 then
320-
reportError .unsupportedGenericClass default
321-
s!"Generic class '{name}' with type args unsupported"
322-
let prefixed ← prefixName name
305+
let prefixed ← prefixName nm.name
323306
return mkTy (.UserDefined { text := prefixed, md := .empty })
324307
| .intLiteral _ => return tyInt
325308
| .stringLiteral _ => return tyString
@@ -671,9 +654,8 @@ def typeDefToLaurel (td : TypeDef) : ToLaurelM Unit := do
671654
})
672655

673656
/-- Extract an overload dispatch entry from an `@overload` function declaration.
674-
Looks for a `stringLiteral` in the first argument's type and a class
675-
return type (either `.pyClass` for locally-defined classes or `.ident`
676-
for externally imported classes), and records them in the dispatch table. -/
657+
Looks for a `stringLiteral` in the first argument's type and an `.ident`
658+
return type, and records them in the dispatch table. -/
677659
def extractOverloadEntry (func : FunctionDecl) : ToLaurelM Unit := do
678660
let args := func.args.args
679661
let .isTrue _ := decideProp (args.size > 0)
@@ -703,10 +685,6 @@ def extractOverloadEntry (func : FunctionDecl) : ToLaurelM Unit := do
703685
return
704686
let retType ←
705687
match func.returnType.atoms[0] with
706-
| .pyClass name _ => do
707-
let ctx ← read
708-
let prefixed ← prefixName name
709-
pure (PythonIdent.mk ctx.modulePrefix prefixed)
710688
| .ident nm _ => pure nm
711689
| _ =>
712690
reportError .overloadReturnNotClass func.loc

StrataTest/Languages/Python/Specs/DeclsTest.lean

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,8 @@ namespace DeclsTest
1212
private abbrev mk1 (a : SpecAtomType) : SpecType := ⟨#[a], ⟨0, 0⟩⟩
1313
private abbrev mk2 (a : SpecAtomType) : SpecType := ⟨#[a], ⟨⟨1⟩, ⟨2⟩⟩⟩
1414

15-
-- Atom ordering: ident < pyClass < intLiteral < stringLiteral < typedDict
16-
#guard compare (SpecAtomType.ident .builtinsInt #[]) (.pyClass "Foo" #[]) == .lt
17-
#guard compare (SpecAtomType.pyClass "Foo" #[]) (.intLiteral 0) == .lt
15+
-- Atom ordering: ident < intLiteral < stringLiteral < typedDict
16+
#guard compare (SpecAtomType.ident .builtinsInt #[]) (.intLiteral 0) == .lt
1817
#guard compare (SpecAtomType.intLiteral 0) (.stringLiteral "a") == .lt
1918

2019
-- Same variant compares by fields

StrataTest/Languages/Python/SpecsTest.lean

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ class "MainClass" {
7979
exhaustive: false
8080
function "main_method"{
8181
args: [
82-
self : class(MainClass) [default: ]
82+
self : ident("main.MainClass") [default: ]
8383
x : ident("basetypes.BaseClass") [default: ]
8484
]
8585
kwonly: [
@@ -94,7 +94,7 @@ class "MainClass" {
9494
}
9595
function "main_function"{
9696
args: [
97-
x : class(MainClass) [default: ]
97+
x : ident("main.MainClass") [default: ]
9898
]
9999
kwonly: [
100100
]
@@ -186,7 +186,7 @@ class "InnerHelper" {
186186
class "ClassWithInit" {
187187
bases: []
188188
fields: [
189-
helper : class(_InnerHelper) "_InnerHelper()"
189+
helper : ident("main._InnerHelper") "_InnerHelper()"
190190
]
191191
classVars: []
192192
subclasses: [
@@ -198,7 +198,7 @@ class "ClassWithInit" {
198198
exhaustive: false
199199
function "do_work"{
200200
args: [
201-
self : class(_InnerHelper) [default: ]
201+
self : ident("main._InnerHelper") [default: ]
202202
]
203203
kwonly: [
204204
]

StrataTest/Languages/Python/ToLaurelTest.lean

Lines changed: 18 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -156,10 +156,10 @@ procedure with_kwonly(x:TInt, verbose:TBool) returns(result:TString)
156156

157157
/--
158158
info: procedure takes_any(x:UserDefined(Any)) returns(result:TInt)
159-
procedure takes_list(items:UserDefined(ListStr)) returns(result:TBool)
160-
procedure returns_dict() returns(result:UserDefined(DictStrAny))
161-
procedure typed_list() returns(result:UserDefined(ListStr))
162-
procedure typed_dict() returns(result:UserDefined(DictStrAny))
159+
procedure takes_list(items:UserDefined(Any)) returns(result:TBool)
160+
procedure returns_dict() returns(result:UserDefined(Any))
161+
procedure typed_list() returns(result:UserDefined(Any))
162+
procedure typed_dict() returns(result:UserDefined(Any))
163163
-/
164164
#guard_msgs in
165165
#eval runTest #[
@@ -229,10 +229,10 @@ procedure opt_int_enum() returns(result:UserDefined(IntOrNone))
229229
/-! ## Error cases (updated to verify WarningKind) -/
230230

231231
/--
232-
info: pySpecToLaurel.unknownType: Unknown type 'foo.Bar' mapped to TString
232+
info: procedure f() returns(result:UserDefined(Bar))
233233
-/
234234
#guard_msgs in
235-
#eval runTestWarningKinds
235+
#eval runTest
236236
#[mkFuncSig "f"
237237
(identType (PythonIdent.mk "foo" "Bar"))]
238238

@@ -317,8 +317,8 @@ procedure uses_class(x:UserDefined(Foo)) returns(result:UserDefined(Foo))
317317
loc := loc, name := "Foo"
318318
methods := #[]
319319
},
320-
mkFuncSig "uses_class" (.pyClass loc "Foo" #[])
321-
(args := #[mkArg "x" (.pyClass loc "Foo" #[])])
320+
mkFuncSig "uses_class" (mkType (.ident (PythonIdent.mk "" "Foo") #[]))
321+
(args := #[mkArg "x" (mkType (.ident (PythonIdent.mk "" "Foo") #[]))])
322322
]
323323

324324
/-! ## Empty input -/
@@ -407,7 +407,7 @@ dispatch create_client:
407407
mkFuncSig "helper" (identType .builtinsBool)
408408
]
409409

410-
-- Overloads with locally-defined class return types (.pyClass).
410+
-- Overloads with locally-defined class return types.
411411
/--
412412
info: type Alpha
413413
type Beta
@@ -419,9 +419,9 @@ dispatch make:
419419
#eval runFullTest #[
420420
.classDef { loc := loc, name := "Alpha", methods := #[] },
421421
.classDef { loc := loc, name := "Beta", methods := #[] },
422-
mkOverload "make" (.pyClass loc "Alpha" #[])
422+
mkOverload "make" (mkType (.ident (PythonIdent.mk "" "Alpha") #[]))
423423
(args := #[mkArg "kind" (mkType (.stringLiteral "a"))]),
424-
mkOverload "make" (.pyClass loc "Beta" #[])
424+
mkOverload "make" (mkType (.ident (PythonIdent.mk "" "Beta") #[]))
425425
(args := #[mkArg "kind" (mkType (.stringLiteral "b"))])
426426
]
427427

@@ -502,36 +502,26 @@ body contains FieldSelect: false
502502

503503
/-! ## Warning kind tests -/
504504

505-
-- Type translation: unsupportedGenericClass
505+
-- bytes, bytearray, complex now map to Any (matching PythonToLaurel)
506506
/--
507-
info: pySpecToLaurel.unsupportedGenericClass: Generic class 'Foo' with type args unsupported
507+
info: procedure f() returns(result:UserDefined(Any))
508508
-/
509509
#guard_msgs in
510-
#eval runTestWarningKinds
511-
#[mkFuncSig "f" (mkType (.pyClass "Foo" #[identType .builtinsInt]))]
512-
513-
-- Type translation: bytesToString
514-
/--
515-
info: pySpecToLaurel.bytesToString: 'builtins.bytes' mapped to TString (bytes have different semantics)
516-
-/
517-
#guard_msgs in
518-
#eval runTestWarningKinds
510+
#eval runTest
519511
#[mkFuncSig "f" (identType .builtinsBytes)]
520512

521-
-- Type translation: bytesToString (bytearray variant)
522513
/--
523-
info: pySpecToLaurel.bytesToString: 'builtins.bytearray' mapped to TString (bytes have different semantics)
514+
info: procedure f() returns(result:UserDefined(Any))
524515
-/
525516
#guard_msgs in
526-
#eval runTestWarningKinds
517+
#eval runTest
527518
#[mkFuncSig "f" (identType .builtinsBytearray)]
528519

529-
-- Type translation: complexToReal
530520
/--
531-
info: pySpecToLaurel.complexToReal: 'builtins.complex' mapped to TReal (complex loses imaginary component)
521+
info: procedure f() returns(result:UserDefined(Any))
532522
-/
533523
#guard_msgs in
534-
#eval runTestWarningKinds
524+
#eval runTest
535525
#[mkFuncSig "f" (identType .builtinsComplex)]
536526

537527
-- Unsupported Optional patterns

0 commit comments

Comments
 (0)