Skip to content

Commit c5489ac

Browse files
authored
perf: Don't traverse deep datasets that we repr as union in CSE (#16096)
1 parent 6ded6f0 commit c5489ac

File tree

1 file changed

+28
-6
lines changed
  • crates/polars-plan/src/logical_plan/optimizer/cse

1 file changed

+28
-6
lines changed

crates/polars-plan/src/logical_plan/optimizer/cse/cse_lp.rs

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -174,21 +174,37 @@ impl LpIdentifierVisitor<'_> {
174174
}
175175
}
176176

177+
fn skip_children(lp: &IR) -> bool {
178+
match lp {
179+
// Don't visit all the files in a `scan *` operation.
180+
// Put an arbitrary limit to 20 files now.
181+
IR::Union {
182+
options, inputs, ..
183+
} => options.from_partitioned_ds && inputs.len() > 20,
184+
_ => false,
185+
}
186+
}
187+
177188
impl<'a> Visitor for LpIdentifierVisitor<'a> {
178189
type Node = IRNode;
179190
type Arena = IRNodeArena;
180191

181192
fn pre_visit(
182193
&mut self,
183-
_node: &Self::Node,
184-
_arena: &Self::Arena,
194+
node: &Self::Node,
195+
arena: &Self::Arena,
185196
) -> PolarsResult<VisitRecursion> {
186197
self.visit_stack
187198
.push(VisitRecord::Entered(self.pre_visit_idx));
188199
self.pre_visit_idx += 1;
189200

190201
self.identifier_array.push((0, Identifier::new()));
191-
Ok(VisitRecursion::Continue)
202+
203+
if skip_children(node.to_alp(&arena.0)) {
204+
Ok(VisitRecursion::Skip)
205+
} else {
206+
Ok(VisitRecursion::Continue)
207+
}
192208
}
193209

194210
fn post_visit(
@@ -256,7 +272,7 @@ impl<'a> RewritingVisitor for CommonSubPlanRewriter<'a> {
256272

257273
fn pre_visit(
258274
&mut self,
259-
_lp_node: &Self::Node,
275+
lp_node: &Self::Node,
260276
arena: &mut Self::Arena,
261277
) -> PolarsResult<RewriteRecursion> {
262278
if self.visited_idx >= self.identifier_array.len()
@@ -270,7 +286,7 @@ impl<'a> RewritingVisitor for CommonSubPlanRewriter<'a> {
270286
// Id placeholder not overwritten, so we can skip this sub-expression.
271287
if !id.is_valid() {
272288
self.visited_idx += 1;
273-
return Ok(RewriteRecursion::MutateAndContinue);
289+
return Ok(RewriteRecursion::NoMutateAndContinue);
274290
}
275291

276292
let Some((_, count)) = self.sp_count.get(id, &arena.0, &arena.1) else {
@@ -281,7 +297,13 @@ impl<'a> RewritingVisitor for CommonSubPlanRewriter<'a> {
281297
if *count > 1 {
282298
// Rewrite this sub-plan, don't visit its children
283299
Ok(RewriteRecursion::MutateAndStop)
284-
} else {
300+
}
301+
// Never mutate if count <= 1. The post-visit will search for the node, and not be able to find it
302+
else {
303+
// Don't traverse the children.
304+
if skip_children(lp_node.to_alp(&arena.0)) {
305+
return Ok(RewriteRecursion::Stop);
306+
}
285307
// This is a unique plan
286308
// visit its children to see if they are cse
287309
self.visited_idx += 1;

0 commit comments

Comments
 (0)