Skip to content

Commit f396807

Browse files
kinto0meta-codesync[bot]
authored andcommitted
Fix exponential memory blowup in dict literal type inference
Summary: Deeply-nested dict literals (e.g. `{"a": {"b": {"c": ...}}}`) caused exponential memory growth during type inference, with a depth-25 literal consuming ~7.7 GB and triggering OOM. The root cause was `AnonymousTypedDictInner` storing both the individual `fields` and a pre-computed `value_type` (the union of all field types). Since `value_type` was a clone of the field types, each nesting level doubled the size of the type tree on `Clone`, producing O(2^N) memory usage. The fix removes the redundant `value_type` field and instead computes it on demand via `compute_value_type()`. This ensures the type tree is only as deep as the actual nesting, reducing pyrefly's total memory for a depth-25 dict from ~7.7 GB to ~239 MB. fixes #3286 Reviewed By: stroxler Differential Revision: D103479382 fbshipit-source-id: 6734b427217ba54e5a3276291811bb77f98654f6
1 parent 24937e5 commit f396807

7 files changed

Lines changed: 50 additions & 19 deletions

File tree

crates/pyrefly_types/src/display.rs

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ use starlark_map::smallmap;
2626

2727
use crate::callable::Function;
2828
use crate::class::Class;
29+
use crate::heap::TypeHeap;
2930
use crate::literal::Lit;
3031
use crate::quantified::Quantified;
3132
use crate::quantified::QuantifiedIdentity;
@@ -38,6 +39,7 @@ use crate::type_output::DisplayOutput;
3839
use crate::type_output::OutputWithLocations;
3940
use crate::type_output::TypeOutput;
4041
use crate::type_var::Restriction;
42+
use crate::typed_dict::AnonymousTypedDictInner;
4143
use crate::typed_dict::TypedDict;
4244
use crate::types::AnyStyle;
4345
use crate::types::BoundMethod;
@@ -421,6 +423,21 @@ impl<'a> TypeDisplayContext<'a> {
421423
}
422424
}
423425

426+
/// Format the value type of an anonymous typed dict by computing the union
427+
/// of all field types on-the-fly. This avoids storing a redundant clone in the
428+
/// type tree (which caused exponential memory growth for nested dict literals).
429+
/// Delegates to `compute_value_type` + `fmt_helper_generic` so that union
430+
/// display (dedup, literal grouping, etc.) stays in one place.
431+
fn fmt_anonymous_typed_dict_value_type(
432+
&self,
433+
inner: &AnonymousTypedDictInner,
434+
output: &mut impl TypeOutput,
435+
) -> fmt::Result {
436+
let heap = TypeHeap::new();
437+
let value_type = inner.compute_value_type(&heap);
438+
self.fmt_helper_generic(&value_type, false, output)
439+
}
440+
424441
/// Core formatting logic for types that works with any `TypeOutput` implementation.
425442
///
426443
/// The method uses the `TypeOutput` trait abstraction to write output in various ways.
@@ -507,7 +524,7 @@ impl<'a> TypeDisplayContext<'a> {
507524
let str_qname = self.stdlib.map(|s| s.str().qname());
508525
output.write_builtin("str", str_qname)?;
509526
output.write_str(", ")?;
510-
self.fmt_helper_generic(&inner.value_type, false, output)?;
527+
self.fmt_anonymous_typed_dict_value_type(inner, output)?;
511528
output.write_str("]")
512529
}
513530
},
@@ -523,7 +540,7 @@ impl<'a> TypeDisplayContext<'a> {
523540
let str_qname = self.stdlib.map(|s| s.str().qname());
524541
output.write_builtin("str", str_qname)?;
525542
output.write_str(", ")?;
526-
self.fmt_helper_generic(&inner.value_type, false, output)?;
543+
self.fmt_anonymous_typed_dict_value_type(inner, output)?;
527544
output.write_str("]")
528545
}
529546
},

crates/pyrefly_types/src/simplify.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -260,9 +260,8 @@ fn collapse_literals(
260260
fn promote_anonymous_typed_dicts(types: &mut [Type], stdlib: &Stdlib, heap: &TypeHeap) {
261261
for ty in types.iter_mut() {
262262
if let Type::TypedDict(TypedDict::Anonymous(inner)) = ty {
263-
*ty = heap.mk_class_type(
264-
stdlib.dict(stdlib.str().clone().to_type(), inner.value_type.clone()),
265-
);
263+
let value_type = inner.compute_value_type(heap);
264+
*ty = heap.mk_class_type(stdlib.dict(stdlib.str().clone().to_type(), value_type));
266265
}
267266
}
268267
}

crates/pyrefly_types/src/typed_dict.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ use crate::annotation::Qualifier;
1717
use crate::class::Class;
1818
use crate::heap::TypeHeap;
1919
use crate::read_only::ReadOnlyReason;
20+
use crate::simplify;
2021
use crate::stdlib::Stdlib;
2122
use crate::types::Substitution;
2223
use crate::types::TArgs;
@@ -86,7 +87,16 @@ impl TypedDictInner {
8687
)]
8788
pub struct AnonymousTypedDictInner {
8889
pub fields: Vec<(Name, TypedDictField)>,
89-
pub value_type: Type,
90+
}
91+
92+
impl AnonymousTypedDictInner {
93+
/// Compute the union of all field value types. This is derived from `fields`
94+
/// rather than stored, to avoid duplicating the type tree at each nesting
95+
/// level (which caused 2^N memory growth for nested dict literals).
96+
pub fn compute_value_type(&self, heap: &TypeHeap) -> Type {
97+
let tys: Vec<Type> = self.fields.iter().map(|(_, f)| f.ty.clone()).collect();
98+
simplify::unions(tys, heap)
99+
}
90100
}
91101

92102
#[derive(

pyrefly/lib/alt/class/typed_dict.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -741,7 +741,7 @@ impl<'a, Ans: LookupAnswer> AnswersSolver<'a, Ans> {
741741
self.heap.mk_class_type(self.stdlib.object().clone())
742742
}
743743
}
744-
TypedDict::Anonymous(inner) => inner.value_type.clone(),
744+
TypedDict::Anonymous(inner) => inner.compute_value_type(self.heap),
745745
}
746746
}
747747

pyrefly/lib/alt/expr.rs

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1213,17 +1213,10 @@ impl<'a, Ans: LookupAnswer> AnswersSolver<'a, Ans> {
12131213
&& !typed_dict_fields_map.is_empty()
12141214
&& typed_dict_fields_map.len() <= ANONYMOUS_TYPED_DICT_MAX_ITEMS
12151215
{
1216-
// Compute the fallback value type from the field mapping, not from value_tys which
1217-
// may contain types from overridden keys
1218-
let final_value_tys: Vec<_> = typed_dict_fields_map
1219-
.values()
1220-
.map(|f| f.ty.clone())
1221-
.collect();
12221216
let typed_dict_fields: Vec<_> = typed_dict_fields_map.into_iter().collect();
12231217
return self.heap.mk_typed_dict(TypedDict::Anonymous(Box::new(
12241218
AnonymousTypedDictInner {
12251219
fields: typed_dict_fields,
1226-
value_type: self.unions(final_value_tys),
12271220
},
12281221
)));
12291222
}

pyrefly/lib/test/dict.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,3 +151,20 @@ assert_type(f(d), TD) # E: assert_type(dict[str, int], TD)
151151
assert_type(f({"x": 0}), TD) # E: `dict[str, int]` is not assignable to upper bound `TD` # E: assert_type(dict[str, int], TD)
152152
"#,
153153
);
154+
155+
// Regression test: deeply nested dict literals previously caused exponential memory growth
156+
// because AnonymousTypedDictInner stored the value type both in `fields` and a redundant
157+
// `value_type` field, doubling the cloned type tree at each nesting level. The fix removed
158+
// the redundant field and computes the value type on demand from `fields`.
159+
//
160+
// Depth 15 is used for CI speed. The fix was verified at depth 25 (239 MB, down from 7.7 GB)
161+
// and depth 50 (236 MB), confirming linear rather than exponential growth.
162+
testcase!(
163+
test_deeply_nested_dict_literal,
164+
r#"
165+
from typing import assert_type
166+
167+
x = {"a": {"b": {"c": {"d": {"e": {"f": {"g": {"h": {"i": {"j": {"k": {"l": {"m": {"n": {"o": "deep"}}}}}}}}}}}}}}}
168+
assert_type(x, dict[str, dict[str, dict[str, dict[str, dict[str, dict[str, dict[str, dict[str, dict[str, dict[str, dict[str, dict[str, dict[str, dict[str, dict[str, str]]]]]]]]]]]]]]])
169+
"#,
170+
);

pyrefly/lib/test/pysa/types.rs

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -690,11 +690,6 @@ class MyTypedDict(TypedDict):
690690
read_only_reason: None,
691691
},
692692
)],
693-
value_type: context
694-
.answers_context
695-
.answers
696-
.heap()
697-
.mk_class_type(context.answers_context.stdlib.int().clone()),
698693
}))),
699694
&context,
700695
),

0 commit comments

Comments
 (0)