|
| 1 | +//! Borrowing-builtin method knowledge for ARC borrow inference. |
| 2 | +//! |
| 3 | +//! Defines which builtin methods borrow their receiver (read without consuming) |
| 4 | +//! AND produce independent results (no hidden dependency on the receiver's data). |
| 5 | +//! |
| 6 | +//! This is a **language-semantic fact**, not a codegen implementation detail. |
| 7 | +//! Borrow inference needs this knowledge to recognize calls to inline-compiled |
| 8 | +//! builtins (e.g., `len`, `is_empty`, `compare`) as borrowing rather than |
| 9 | +//! defaulting to all-Owned. |
| 10 | +//! |
| 11 | +//! # Exclusions |
| 12 | +//! |
| 13 | +//! - **Iterator methods**: These consume/transform the iterator or create |
| 14 | +//! derived values — the ARC pipeline can't model these hidden dependencies. |
| 15 | +//! - **`.iter()`**: Creates an iterator that references the receiver's data. |
| 16 | +//! Uses Owned semantics (default). The runtime's `IterState::List` stores |
| 17 | +//! the data pointer, cap, and `elem_dec_fn`; `Drop for IterState` calls |
| 18 | +//! `ori_buffer_rc_dec` to release the list data when the iterator is consumed. |
| 19 | +//! |
| 20 | +//! # Sync |
| 21 | +//! |
| 22 | +//! The LLVM backend maintains a parallel `BuiltinTable` with `receiver_borrowed` |
| 23 | +//! flags for codegen dispatch. A sync test in `ori_llvm` asserts that table's |
| 24 | +//! effective borrowing set matches this canonical list. |
| 25 | +
|
| 26 | +use ori_ir::{Name, StringInterner}; |
| 27 | +use rustc_hash::FxHashSet; |
| 28 | + |
| 29 | +/// All builtin method names that borrow their receiver, sorted alphabetically. |
| 30 | +/// |
| 31 | +/// Each method listed here borrows its receiver and produces a result that does |
| 32 | +/// not reference the receiver's data (i.e., the result is independent). |
| 33 | +/// |
| 34 | +/// When adding a new builtin method to the LLVM backend's `declare_builtins!` |
| 35 | +/// with `borrow: true`, also add its name here (if not already present). |
| 36 | +const BORROWING_METHOD_NAMES: &[&str] = &[ |
| 37 | + "abs", |
| 38 | + "byte", |
| 39 | + "chars", |
| 40 | + "clone", |
| 41 | + "compare", |
| 42 | + "concat", |
| 43 | + "contains", |
| 44 | + "contains_key", |
| 45 | + "count", |
| 46 | + "drop", |
| 47 | + "ends_with", |
| 48 | + "equals", |
| 49 | + "f", |
| 50 | + "first", |
| 51 | + "get", |
| 52 | + "hash", |
| 53 | + "into", |
| 54 | + "is_empty", |
| 55 | + "is_equal", |
| 56 | + "is_err", |
| 57 | + "is_greater", |
| 58 | + "is_greater_or_equal", |
| 59 | + "is_less", |
| 60 | + "is_less_or_equal", |
| 61 | + "is_none", |
| 62 | + "is_ok", |
| 63 | + "is_some", |
| 64 | + "keys", |
| 65 | + "last", |
| 66 | + "len", |
| 67 | + "length", |
| 68 | + "repeat", |
| 69 | + "replace", |
| 70 | + "reverse", |
| 71 | + "slice", |
| 72 | + "split", |
| 73 | + "starts_with", |
| 74 | + "substring", |
| 75 | + "take", |
| 76 | + "to_float", |
| 77 | + "to_int", |
| 78 | + "to_list", |
| 79 | + "to_lowercase", |
| 80 | + "to_str", |
| 81 | + "to_uppercase", |
| 82 | + "trim", |
| 83 | + "unwrap", |
| 84 | + "unwrap_err", |
| 85 | + "unwrap_or", |
| 86 | + "values", |
| 87 | +]; |
| 88 | + |
| 89 | +/// Method names with **consuming receiver** semantics for list types. |
| 90 | +/// |
| 91 | +/// These are COW (Copy-on-Write) list methods that handle the old buffer's |
| 92 | +/// RC lifecycle internally: the fast path reuses the buffer (unique owner), |
| 93 | +/// the slow path allocates a new buffer and `ori_rc_dec`s the old one. |
| 94 | +/// |
| 95 | +/// The ARC pipeline must NOT emit an additional `RcDec` for the receiver |
| 96 | +/// argument when calling these methods — doing so causes double-free. |
| 97 | +/// |
| 98 | +/// **Type-qualified**: `"add"` and `"concat"` are borrowing for strings but |
| 99 | +/// consuming for lists. The type check happens at the call site in |
| 100 | +/// [`annotate_arg_ownership`](crate::rc_insert::annotate_arg_ownership). |
| 101 | +/// |
| 102 | +/// Sorted alphabetically. |
| 103 | +const CONSUMING_RECEIVER_METHOD_NAMES: &[&str] = &[ |
| 104 | + "add", // list + list (COW concat) |
| 105 | + "concat", // list.concat (COW concat) |
| 106 | + "insert", // list.insert (COW insert) |
| 107 | + "pop", // list.pop (COW pop) |
| 108 | + "push", // list.push (COW push) |
| 109 | + "remove", // list.remove (COW remove) |
| 110 | + "reverse", // list.reverse (COW reverse) |
| 111 | + "sort", // list.sort (COW sort, unstable) |
| 112 | + "sort_stable", // list.sort_stable (COW sort, stable/TimSort) |
| 113 | +]; |
| 114 | + |
| 115 | +/// COW list methods that consume both receiver AND second argument (list2). |
| 116 | +/// |
| 117 | +/// For these methods, the runtime takes ownership of list2's buffer and checks |
| 118 | +/// uniqueness at runtime to skip RC increments when list2 is uniquely owned. |
| 119 | +/// The ARC pipeline must mark arg[1] as `Owned` (no extra `RcDec`) in addition |
| 120 | +/// to the receiver. |
| 121 | +/// |
| 122 | +/// Sorted alphabetically. |
| 123 | +const CONSUMING_SECOND_ARG_METHOD_NAMES: &[&str] = &[ |
| 124 | + "add", // list + list (COW concat) |
| 125 | + "concat", // list.concat(other) |
| 126 | +]; |
| 127 | + |
| 128 | +/// COW methods that consume ONLY the receiver; non-receiver args are borrowed. |
| 129 | +/// |
| 130 | +/// These are Map/Set COW methods where the runtime takes ownership of the |
| 131 | +/// receiver's buffer but only reads other arguments (comparison keys, read-only |
| 132 | +/// collections). Contrast with `CONSUMING_RECEIVER_METHOD_NAMES` where List |
| 133 | +/// methods also transfer inserted elements. |
| 134 | +/// |
| 135 | +/// In `compute_arg_ownership`, these methods produce `[Owned, Borrowed, ...]` |
| 136 | +/// instead of the default all-Owned, preventing RC leaks on comparison keys |
| 137 | +/// and read-only collection arguments. |
| 138 | +/// |
| 139 | +/// Sorted alphabetically. |
| 140 | +const CONSUMING_RECEIVER_ONLY_METHOD_NAMES: &[&str] = &[ |
| 141 | + "difference", // set.difference(other) — other is read-only |
| 142 | + "intersection", // set.intersection(other) — other is read-only |
| 143 | + "remove", // map/set.remove(key) — key is comparison-only |
| 144 | + "union", // set.union(other) — other is read-only |
| 145 | +]; |
| 146 | + |
| 147 | +/// Collect interned [`Name`]s for all builtin methods that borrow their receiver. |
| 148 | +/// |
| 149 | +/// Returns the set of method names (not type-qualified) that borrow inference |
| 150 | +/// and RC insertion should treat as borrowing the receiver. This allows |
| 151 | +/// inline-compiled builtins to avoid unnecessary `rc_inc`/`rc_dec` pairs. |
| 152 | +/// |
| 153 | +/// See [`BORROWING_METHOD_NAMES`] for the full list and exclusion rules. |
| 154 | +pub fn borrowing_builtin_names(interner: &StringInterner) -> FxHashSet<Name> { |
| 155 | + BORROWING_METHOD_NAMES |
| 156 | + .iter() |
| 157 | + .map(|name| interner.intern(name)) |
| 158 | + .collect() |
| 159 | +} |
| 160 | + |
| 161 | +/// Collect interned [`Name`]s for COW list methods with consuming receiver semantics. |
| 162 | +/// |
| 163 | +/// These methods handle the old buffer's RC internally. When the receiver is |
| 164 | +/// a `List` type, the ARC pipeline must mark the receiver argument as `Owned` |
| 165 | +/// (no extra `RcDec`) instead of the default `Borrowed` from the borrowing set. |
| 166 | +/// |
| 167 | +/// See [`CONSUMING_RECEIVER_METHOD_NAMES`] for the full list and rationale. |
| 168 | +pub fn consuming_receiver_builtin_names(interner: &StringInterner) -> FxHashSet<Name> { |
| 169 | + CONSUMING_RECEIVER_METHOD_NAMES |
| 170 | + .iter() |
| 171 | + .map(|name| interner.intern(name)) |
| 172 | + .collect() |
| 173 | +} |
| 174 | + |
| 175 | +/// Collect interned [`Name`]s for COW list methods that also consume their |
| 176 | +/// second argument (list2). |
| 177 | +/// |
| 178 | +/// When the receiver is a `List` type and `args.len() >= 2`, the ARC pipeline |
| 179 | +/// marks `arg_ownership[1]` as `Owned` to prevent a duplicate `RcDec` — the |
| 180 | +/// runtime takes ownership of list2 and handles its lifecycle internally. |
| 181 | +/// |
| 182 | +/// See [`CONSUMING_SECOND_ARG_METHOD_NAMES`] for the full list. |
| 183 | +pub fn consuming_second_arg_builtin_names(interner: &StringInterner) -> FxHashSet<Name> { |
| 184 | + CONSUMING_SECOND_ARG_METHOD_NAMES |
| 185 | + .iter() |
| 186 | + .map(|name| interner.intern(name)) |
| 187 | + .collect() |
| 188 | +} |
| 189 | + |
| 190 | +/// Collect interned [`Name`]s for COW methods that consume only the receiver. |
| 191 | +/// |
| 192 | +/// Non-receiver arguments are borrowed (comparison keys, read-only collections). |
| 193 | +/// Used by `compute_arg_ownership` to produce `[Owned, Borrowed, ...]` instead |
| 194 | +/// of the default all-Owned. |
| 195 | +/// |
| 196 | +/// See [`CONSUMING_RECEIVER_ONLY_METHOD_NAMES`] for the full list. |
| 197 | +pub fn consuming_receiver_only_builtin_names(interner: &StringInterner) -> FxHashSet<Name> { |
| 198 | + CONSUMING_RECEIVER_ONLY_METHOD_NAMES |
| 199 | + .iter() |
| 200 | + .map(|name| interner.intern(name)) |
| 201 | + .collect() |
| 202 | +} |
| 203 | + |
| 204 | +/// Collect interned [`Name`]s for ALL COW methods (union of consuming-receiver |
| 205 | +/// and consuming-receiver-only sets). |
| 206 | +/// |
| 207 | +/// This is the single integration point for uniqueness analysis: both |
| 208 | +/// [`consuming_receiver_builtin_names`] (list COW: `push`, `sort`, …) and |
| 209 | +/// [`consuming_receiver_only_builtin_names`] (map/set COW: `remove`, `union`, …) |
| 210 | +/// are COW operations whose results are always `Unique` (RC == 1). |
| 211 | +/// |
| 212 | +/// Pass the result to [`crate::uniqueness::inter::build_cow_summaries`] as the |
| 213 | +/// `cow_method_names` argument. |
| 214 | +pub fn all_cow_method_names(interner: &StringInterner) -> FxHashSet<Name> { |
| 215 | + let mut names = consuming_receiver_builtin_names(interner); |
| 216 | + names.extend(consuming_receiver_only_builtin_names(interner)); |
| 217 | + names |
| 218 | +} |
| 219 | + |
| 220 | +/// Method names that return values **sharing backing storage** with the receiver. |
| 221 | +/// |
| 222 | +/// Unlike COW methods (which always return `Unique` results), these methods |
| 223 | +/// create views into the receiver's data. The returned value shares the |
| 224 | +/// receiver's refcounted backing buffer, so its uniqueness is `MaybeShared`. |
| 225 | +/// |
| 226 | +/// Used by [`crate::uniqueness::inter::build_cow_summaries`] as the |
| 227 | +/// `shared_method_names` argument. |
| 228 | +/// |
| 229 | +/// Sorted alphabetically. |
| 230 | +const SHARING_METHOD_NAMES: &[&str] = &[ |
| 231 | + "slice", // list.slice — shares list backing |
| 232 | + "substring", // str.substring — shares string backing |
| 233 | +]; |
| 234 | + |
| 235 | +/// Collect interned [`Name`]s for methods that share backing with their receiver. |
| 236 | +/// |
| 237 | +/// These methods return values that reference the receiver's heap data, |
| 238 | +/// so their return uniqueness is `MaybeShared` (not `Unique` like COW methods). |
| 239 | +/// |
| 240 | +/// See [`SHARING_METHOD_NAMES`] for the full list. |
| 241 | +pub fn sharing_builtin_names(interner: &StringInterner) -> FxHashSet<Name> { |
| 242 | + SHARING_METHOD_NAMES |
| 243 | + .iter() |
| 244 | + .map(|name| interner.intern(name)) |
| 245 | + .collect() |
| 246 | +} |
| 247 | + |
| 248 | +/// Pre-computed interned sets for ARC ownership annotation. |
| 249 | +/// |
| 250 | +/// Groups the builtin method name sets that |
| 251 | +/// [`annotate_arg_ownership`](crate::rc_insert::annotate_arg_ownership) |
| 252 | +/// needs. Constructing this once avoids redundant `intern()` work across |
| 253 | +/// multiple function compilations. |
| 254 | +pub struct BuiltinOwnershipSets { |
| 255 | + /// Methods that borrow their receiver (e.g., `len`, `is_empty`). |
| 256 | + pub borrowing: FxHashSet<Name>, |
| 257 | + /// COW list methods that consume their receiver (e.g., `push`, `reverse`). |
| 258 | + pub consuming_receiver: FxHashSet<Name>, |
| 259 | + /// COW list methods that also consume their second argument (e.g., `add`, `concat`). |
| 260 | + pub consuming_second_arg: FxHashSet<Name>, |
| 261 | + /// COW methods that consume only the receiver; other args are borrowed. |
| 262 | + /// |
| 263 | + /// For Map/Set operations like `remove(key)` and `union(other)`, the |
| 264 | + /// receiver is consumed (COW handles its RC) but the key/other-set is |
| 265 | + /// only read for comparison — its RC must be decremented by the caller. |
| 266 | + pub consuming_receiver_only: FxHashSet<Name>, |
| 267 | +} |
| 268 | + |
| 269 | +impl BuiltinOwnershipSets { |
| 270 | + /// Intern all builtin method name sets from the given interner. |
| 271 | + pub fn new(interner: &StringInterner) -> Self { |
| 272 | + Self { |
| 273 | + borrowing: borrowing_builtin_names(interner), |
| 274 | + consuming_receiver: consuming_receiver_builtin_names(interner), |
| 275 | + consuming_second_arg: consuming_second_arg_builtin_names(interner), |
| 276 | + consuming_receiver_only: consuming_receiver_only_builtin_names(interner), |
| 277 | + } |
| 278 | + } |
| 279 | +} |
| 280 | + |
| 281 | +#[cfg(test)] |
| 282 | +mod tests; |
0 commit comments