Skip to content

Commit f630144

Browse files
authored
Merge pull request #63 from upstat-io/dev
nightly: 2026-03-02
2 parents fcc6c8f + 745e07e commit f630144

215 files changed

Lines changed: 24024 additions & 11662 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

compiler/ori_arc/src/borrow/builtins.rs

Lines changed: 0 additions & 517 deletions
This file was deleted.
Lines changed: 282 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,282 @@
1+
//! Borrowing-builtin method knowledge for ARC borrow inference.
2+
//!
3+
//! Defines which builtin methods borrow their receiver (read without consuming)
4+
//! AND produce independent results (no hidden dependency on the receiver's data).
5+
//!
6+
//! This is a **language-semantic fact**, not a codegen implementation detail.
7+
//! Borrow inference needs this knowledge to recognize calls to inline-compiled
8+
//! builtins (e.g., `len`, `is_empty`, `compare`) as borrowing rather than
9+
//! defaulting to all-Owned.
10+
//!
11+
//! # Exclusions
12+
//!
13+
//! - **Iterator methods**: These consume/transform the iterator or create
14+
//! derived values — the ARC pipeline can't model these hidden dependencies.
15+
//! - **`.iter()`**: Creates an iterator that references the receiver's data.
16+
//! Uses Owned semantics (default). The runtime's `IterState::List` stores
17+
//! the data pointer, cap, and `elem_dec_fn`; `Drop for IterState` calls
18+
//! `ori_buffer_rc_dec` to release the list data when the iterator is consumed.
19+
//!
20+
//! # Sync
21+
//!
22+
//! The LLVM backend maintains a parallel `BuiltinTable` with `receiver_borrowed`
23+
//! flags for codegen dispatch. A sync test in `ori_llvm` asserts that table's
24+
//! effective borrowing set matches this canonical list.
25+
26+
use ori_ir::{Name, StringInterner};
27+
use rustc_hash::FxHashSet;
28+
29+
/// All builtin method names that borrow their receiver, sorted alphabetically.
30+
///
31+
/// Each method listed here borrows its receiver and produces a result that does
32+
/// not reference the receiver's data (i.e., the result is independent).
33+
///
34+
/// When adding a new builtin method to the LLVM backend's `declare_builtins!`
35+
/// with `borrow: true`, also add its name here (if not already present).
36+
const BORROWING_METHOD_NAMES: &[&str] = &[
37+
"abs",
38+
"byte",
39+
"chars",
40+
"clone",
41+
"compare",
42+
"concat",
43+
"contains",
44+
"contains_key",
45+
"count",
46+
"drop",
47+
"ends_with",
48+
"equals",
49+
"f",
50+
"first",
51+
"get",
52+
"hash",
53+
"into",
54+
"is_empty",
55+
"is_equal",
56+
"is_err",
57+
"is_greater",
58+
"is_greater_or_equal",
59+
"is_less",
60+
"is_less_or_equal",
61+
"is_none",
62+
"is_ok",
63+
"is_some",
64+
"keys",
65+
"last",
66+
"len",
67+
"length",
68+
"repeat",
69+
"replace",
70+
"reverse",
71+
"slice",
72+
"split",
73+
"starts_with",
74+
"substring",
75+
"take",
76+
"to_float",
77+
"to_int",
78+
"to_list",
79+
"to_lowercase",
80+
"to_str",
81+
"to_uppercase",
82+
"trim",
83+
"unwrap",
84+
"unwrap_err",
85+
"unwrap_or",
86+
"values",
87+
];
88+
89+
/// Method names with **consuming receiver** semantics for list types.
90+
///
91+
/// These are COW (Copy-on-Write) list methods that handle the old buffer's
92+
/// RC lifecycle internally: the fast path reuses the buffer (unique owner),
93+
/// the slow path allocates a new buffer and `ori_rc_dec`s the old one.
94+
///
95+
/// The ARC pipeline must NOT emit an additional `RcDec` for the receiver
96+
/// argument when calling these methods — doing so causes double-free.
97+
///
98+
/// **Type-qualified**: `"add"` and `"concat"` are borrowing for strings but
99+
/// consuming for lists. The type check happens at the call site in
100+
/// [`annotate_arg_ownership`](crate::rc_insert::annotate_arg_ownership).
101+
///
102+
/// Sorted alphabetically.
103+
const CONSUMING_RECEIVER_METHOD_NAMES: &[&str] = &[
104+
"add", // list + list (COW concat)
105+
"concat", // list.concat (COW concat)
106+
"insert", // list.insert (COW insert)
107+
"pop", // list.pop (COW pop)
108+
"push", // list.push (COW push)
109+
"remove", // list.remove (COW remove)
110+
"reverse", // list.reverse (COW reverse)
111+
"sort", // list.sort (COW sort, unstable)
112+
"sort_stable", // list.sort_stable (COW sort, stable/TimSort)
113+
];
114+
115+
/// COW list methods that consume both receiver AND second argument (list2).
116+
///
117+
/// For these methods, the runtime takes ownership of list2's buffer and checks
118+
/// uniqueness at runtime to skip RC increments when list2 is uniquely owned.
119+
/// The ARC pipeline must mark arg[1] as `Owned` (no extra `RcDec`) in addition
120+
/// to the receiver.
121+
///
122+
/// Sorted alphabetically.
123+
const CONSUMING_SECOND_ARG_METHOD_NAMES: &[&str] = &[
124+
"add", // list + list (COW concat)
125+
"concat", // list.concat(other)
126+
];
127+
128+
/// COW methods that consume ONLY the receiver; non-receiver args are borrowed.
129+
///
130+
/// These are Map/Set COW methods where the runtime takes ownership of the
131+
/// receiver's buffer but only reads other arguments (comparison keys, read-only
132+
/// collections). Contrast with `CONSUMING_RECEIVER_METHOD_NAMES` where List
133+
/// methods also transfer inserted elements.
134+
///
135+
/// In `compute_arg_ownership`, these methods produce `[Owned, Borrowed, ...]`
136+
/// instead of the default all-Owned, preventing RC leaks on comparison keys
137+
/// and read-only collection arguments.
138+
///
139+
/// Sorted alphabetically.
140+
const CONSUMING_RECEIVER_ONLY_METHOD_NAMES: &[&str] = &[
141+
"difference", // set.difference(other) — other is read-only
142+
"intersection", // set.intersection(other) — other is read-only
143+
"remove", // map/set.remove(key) — key is comparison-only
144+
"union", // set.union(other) — other is read-only
145+
];
146+
147+
/// Collect interned [`Name`]s for all builtin methods that borrow their receiver.
148+
///
149+
/// Returns the set of method names (not type-qualified) that borrow inference
150+
/// and RC insertion should treat as borrowing the receiver. This allows
151+
/// inline-compiled builtins to avoid unnecessary `rc_inc`/`rc_dec` pairs.
152+
///
153+
/// See [`BORROWING_METHOD_NAMES`] for the full list and exclusion rules.
154+
pub fn borrowing_builtin_names(interner: &StringInterner) -> FxHashSet<Name> {
155+
BORROWING_METHOD_NAMES
156+
.iter()
157+
.map(|name| interner.intern(name))
158+
.collect()
159+
}
160+
161+
/// Collect interned [`Name`]s for COW list methods with consuming receiver semantics.
162+
///
163+
/// These methods handle the old buffer's RC internally. When the receiver is
164+
/// a `List` type, the ARC pipeline must mark the receiver argument as `Owned`
165+
/// (no extra `RcDec`) instead of the default `Borrowed` from the borrowing set.
166+
///
167+
/// See [`CONSUMING_RECEIVER_METHOD_NAMES`] for the full list and rationale.
168+
pub fn consuming_receiver_builtin_names(interner: &StringInterner) -> FxHashSet<Name> {
169+
CONSUMING_RECEIVER_METHOD_NAMES
170+
.iter()
171+
.map(|name| interner.intern(name))
172+
.collect()
173+
}
174+
175+
/// Collect interned [`Name`]s for COW list methods that also consume their
176+
/// second argument (list2).
177+
///
178+
/// When the receiver is a `List` type and `args.len() >= 2`, the ARC pipeline
179+
/// marks `arg_ownership[1]` as `Owned` to prevent a duplicate `RcDec` — the
180+
/// runtime takes ownership of list2 and handles its lifecycle internally.
181+
///
182+
/// See [`CONSUMING_SECOND_ARG_METHOD_NAMES`] for the full list.
183+
pub fn consuming_second_arg_builtin_names(interner: &StringInterner) -> FxHashSet<Name> {
184+
CONSUMING_SECOND_ARG_METHOD_NAMES
185+
.iter()
186+
.map(|name| interner.intern(name))
187+
.collect()
188+
}
189+
190+
/// Collect interned [`Name`]s for COW methods that consume only the receiver.
191+
///
192+
/// Non-receiver arguments are borrowed (comparison keys, read-only collections).
193+
/// Used by `compute_arg_ownership` to produce `[Owned, Borrowed, ...]` instead
194+
/// of the default all-Owned.
195+
///
196+
/// See [`CONSUMING_RECEIVER_ONLY_METHOD_NAMES`] for the full list.
197+
pub fn consuming_receiver_only_builtin_names(interner: &StringInterner) -> FxHashSet<Name> {
198+
CONSUMING_RECEIVER_ONLY_METHOD_NAMES
199+
.iter()
200+
.map(|name| interner.intern(name))
201+
.collect()
202+
}
203+
204+
/// Collect interned [`Name`]s for ALL COW methods (union of consuming-receiver
205+
/// and consuming-receiver-only sets).
206+
///
207+
/// This is the single integration point for uniqueness analysis: both
208+
/// [`consuming_receiver_builtin_names`] (list COW: `push`, `sort`, …) and
209+
/// [`consuming_receiver_only_builtin_names`] (map/set COW: `remove`, `union`, …)
210+
/// are COW operations whose results are always `Unique` (RC == 1).
211+
///
212+
/// Pass the result to [`crate::uniqueness::inter::build_cow_summaries`] as the
213+
/// `cow_method_names` argument.
214+
pub fn all_cow_method_names(interner: &StringInterner) -> FxHashSet<Name> {
215+
let mut names = consuming_receiver_builtin_names(interner);
216+
names.extend(consuming_receiver_only_builtin_names(interner));
217+
names
218+
}
219+
220+
/// Method names that return values **sharing backing storage** with the receiver.
221+
///
222+
/// Unlike COW methods (which always return `Unique` results), these methods
223+
/// create views into the receiver's data. The returned value shares the
224+
/// receiver's refcounted backing buffer, so its uniqueness is `MaybeShared`.
225+
///
226+
/// Used by [`crate::uniqueness::inter::build_cow_summaries`] as the
227+
/// `shared_method_names` argument.
228+
///
229+
/// Sorted alphabetically.
230+
const SHARING_METHOD_NAMES: &[&str] = &[
231+
"slice", // list.slice — shares list backing
232+
"substring", // str.substring — shares string backing
233+
];
234+
235+
/// Collect interned [`Name`]s for methods that share backing with their receiver.
236+
///
237+
/// These methods return values that reference the receiver's heap data,
238+
/// so their return uniqueness is `MaybeShared` (not `Unique` like COW methods).
239+
///
240+
/// See [`SHARING_METHOD_NAMES`] for the full list.
241+
pub fn sharing_builtin_names(interner: &StringInterner) -> FxHashSet<Name> {
242+
SHARING_METHOD_NAMES
243+
.iter()
244+
.map(|name| interner.intern(name))
245+
.collect()
246+
}
247+
248+
/// Pre-computed interned sets for ARC ownership annotation.
249+
///
250+
/// Groups the builtin method name sets that
251+
/// [`annotate_arg_ownership`](crate::rc_insert::annotate_arg_ownership)
252+
/// needs. Constructing this once avoids redundant `intern()` work across
253+
/// multiple function compilations.
254+
pub struct BuiltinOwnershipSets {
255+
/// Methods that borrow their receiver (e.g., `len`, `is_empty`).
256+
pub borrowing: FxHashSet<Name>,
257+
/// COW list methods that consume their receiver (e.g., `push`, `reverse`).
258+
pub consuming_receiver: FxHashSet<Name>,
259+
/// COW list methods that also consume their second argument (e.g., `add`, `concat`).
260+
pub consuming_second_arg: FxHashSet<Name>,
261+
/// COW methods that consume only the receiver; other args are borrowed.
262+
///
263+
/// For Map/Set operations like `remove(key)` and `union(other)`, the
264+
/// receiver is consumed (COW handles its RC) but the key/other-set is
265+
/// only read for comparison — its RC must be decremented by the caller.
266+
pub consuming_receiver_only: FxHashSet<Name>,
267+
}
268+
269+
impl BuiltinOwnershipSets {
270+
/// Intern all builtin method name sets from the given interner.
271+
pub fn new(interner: &StringInterner) -> Self {
272+
Self {
273+
borrowing: borrowing_builtin_names(interner),
274+
consuming_receiver: consuming_receiver_builtin_names(interner),
275+
consuming_second_arg: consuming_second_arg_builtin_names(interner),
276+
consuming_receiver_only: consuming_receiver_only_builtin_names(interner),
277+
}
278+
}
279+
}
280+
281+
#[cfg(test)]
282+
mod tests;

0 commit comments

Comments
 (0)