Skip to content

Commit e97088a

Browse files
authored
fix(rust, python): block is_null predicate in asof join (#5358)
1 parent 861e3e3 commit e97088a

File tree

3 files changed

+57
-3
lines changed
  • polars
    • polars-core/src/frame/hash_join
    • polars-lazy/polars-plan/src/logical_plan/optimizer/predicate_pushdown
  • py-polars/tests/unit

3 files changed

+57
-3
lines changed

polars/polars-core/src/frame/hash_join/mod.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,6 @@ pub enum JoinType {
125125
Inner,
126126
Outer,
127127
#[cfg(feature = "asof_join")]
128-
#[cfg_attr(feature = "serde", serde(skip))]
129128
AsOf(AsOfOptions),
130129
Cross,
131130
#[cfg(feature = "semi_anti_join")]

polars/polars-lazy/polars-plan/src/logical_plan/optimizer/predicate_pushdown/mod.rs

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,20 @@ use crate::utils::{aexprs_to_schema, check_input_node, has_aexpr};
1212
#[derive(Default)]
1313
pub struct PredicatePushDown {}
1414

15+
fn join_produces_null(how: &JoinType) -> bool {
16+
#[cfg(feature = "asof_join")]
17+
{
18+
matches!(
19+
how,
20+
JoinType::Left | JoinType::Outer | JoinType::Cross | JoinType::AsOf(_)
21+
)
22+
}
23+
#[cfg(not(feature = "asof_join"))]
24+
{
25+
matches!(how, JoinType::Left | JoinType::Outer | JoinType::Cross)
26+
}
27+
}
28+
1529
impl PredicatePushDown {
1630
fn optional_apply_predicate(
1731
&self,
@@ -427,7 +441,7 @@ impl PredicatePushDown {
427441
// join might create null values.
428442
|| has_aexpr(predicate, expr_arena, checks_nulls)
429443
// only these join types produce null values
430-
&& matches!(&options.how, JoinType::Left | JoinType::Outer | JoinType::Cross){
444+
&& join_produces_null(&options.how) {
431445
local_predicates.push(predicate);
432446
continue;
433447
}

py-polars/tests/unit/test_predicates.py

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from datetime import date, timedelta
1+
from datetime import date, datetime, timedelta
22

33
import polars as pl
44

@@ -38,3 +38,44 @@ def test_when_then_implicit_none() -> None:
3838
"literal": ["Foo", "Foo", "Foo", None, None, None],
3939
"bar": ["Foo", "Foo", "Foo", None, None, None],
4040
}
41+
42+
43+
def test_predicate_null_block_asof_join() -> None:
44+
left = pl.DataFrame(
45+
{
46+
"id": [1, 2, 3, 4],
47+
"timestamp": [
48+
datetime(2022, 1, 1, 10, 0),
49+
datetime(2022, 1, 1, 10, 1),
50+
datetime(2022, 1, 1, 10, 2),
51+
datetime(2022, 1, 1, 10, 3),
52+
],
53+
}
54+
).lazy()
55+
56+
right = pl.DataFrame(
57+
{
58+
"id": [1, 2, 3] * 2,
59+
"timestamp": [
60+
datetime(2022, 1, 1, 9, 59, 50),
61+
datetime(2022, 1, 1, 10, 0, 50),
62+
datetime(2022, 1, 1, 10, 1, 50),
63+
datetime(2022, 1, 1, 8, 0, 0),
64+
datetime(2022, 1, 1, 8, 0, 0),
65+
datetime(2022, 1, 1, 8, 0, 0),
66+
],
67+
"value": ["a", "b", "c"] * 2,
68+
}
69+
).lazy()
70+
71+
assert left.join_asof(right, by="id", on="timestamp").filter(
72+
pl.col("value").is_not_null()
73+
).collect().to_dict(False) == {
74+
"id": [1, 2, 3],
75+
"timestamp": [
76+
datetime(2022, 1, 1, 10, 0),
77+
datetime(2022, 1, 1, 10, 1),
78+
datetime(2022, 1, 1, 10, 2),
79+
],
80+
"value": ["a", "b", "c"],
81+
}

0 commit comments

Comments
 (0)