Skip to content

Commit ef0f7a5

Browse files
salazarmcmpadden
authored andcommitted
[Run Timeline] Don't drop job data if locations within the FutureTicksQuery fails (#27944)
## Summary & Motivation Currently, if the FutureTicksQuery itself fails we recover by returning the ongoing runs and complete runs data that we have. However, if the query doesn't fail then we end up iterating over all of the locations within it and constructing a `jobs` array with all of the rows of the timeline. The problem is that this construction relies on the jobs returned by FutureTicksQuery. If FutureTicksQuery doesn't return a particular job then we drop the data for that job completely. To fix this track which keys we've added via the FutureTicksQuery and then do a second pass where we add data for any jobs that were not in the FutureTicksQuery ## How I Tested These Changes Loaded the Run timeline for a customer with a failing location entry ## Changelog > Insert changelog entry or delete this section.
1 parent 3f396fa commit ef0f7a5

File tree

84 files changed

+1104
-1725
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

84 files changed

+1104
-1725
lines changed
Loading

Diff for: examples/docs_beta_snippets/docs_beta_snippets/guides/components/shell-script-component/2-shell-command-empty.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
import dagster as dg
2-
1+
from dagster import Definitions
32
from dagster_components import (
43
Component,
54
ComponentLoadContext,
@@ -26,6 +25,6 @@ def get_schema(cls):
2625
def get_scaffolder(cls) -> DefaultComponentScaffolder:
2726
return DefaultComponentScaffolder()
2827

29-
def build_defs(self, load_context: ComponentLoadContext) -> dg.Definitions:
28+
def build_defs(self, load_context: ComponentLoadContext) -> Definitions:
3029
# Add definition construction logic here.
31-
return dg.Definitions()
30+
return Definitions()
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,23 @@
11
import pandas as pd
22

3-
from dagster import (
4-
AssetCheckResult,
5-
AssetCheckSpec,
6-
AssetExecutionContext,
7-
Definitions,
8-
Output,
9-
asset,
10-
)
3+
import dagster as dg
114

125

13-
@asset(check_specs=[AssetCheckSpec(name="orders_id_has_no_nulls", asset="orders")])
14-
def orders(context: AssetExecutionContext):
6+
@dg.asset(
7+
check_specs=[dg.AssetCheckSpec(name="orders_id_has_no_nulls", asset="orders")]
8+
)
9+
def orders(context: dg.AssetExecutionContext):
1510
orders_df = pd.DataFrame({"order_id": [1, 2], "item_id": [432, 878]})
1611

1712
# save the output and indicate that it's been saved
1813
orders_df.to_csv("orders")
19-
yield Output(value=None)
14+
yield dg.Output(value=None)
2015

2116
# check it
2217
num_null_order_ids = orders_df["order_id"].isna().sum()
23-
yield AssetCheckResult(
18+
yield dg.AssetCheckResult(
2419
passed=bool(num_null_order_ids == 0),
2520
)
2621

2722

28-
defs = Definitions(assets=[orders])
23+
defs = dg.Definitions(assets=[orders])

Diff for: examples/docs_snippets/docs_snippets/concepts/assets/asset_checks/factory.py

+12-16
Original file line numberDiff line numberDiff line change
@@ -1,50 +1,46 @@
11
from collections.abc import Mapping
22
from unittest.mock import MagicMock
33

4-
from dagster import (
5-
AssetCheckResult,
6-
AssetChecksDefinition,
7-
Definitions,
8-
asset,
9-
asset_check,
10-
)
4+
import dagster as dg
115

126

13-
@asset
7+
@dg.asset
148
def orders(): ...
159

1610

17-
@asset
11+
@dg.asset
1812
def items(): ...
1913

2014

21-
def make_check(check_blob: Mapping[str, str]) -> AssetChecksDefinition:
22-
@asset_check(
15+
def make_check(check_blob: Mapping[str, str]) -> dg.AssetChecksDefinition:
16+
@dg.asset_check(
2317
name=check_blob["name"],
24-
asset=check_blob["asset"],
18+
asset=check_blob["dg.asset"],
2519
required_resource_keys={"db_connection"},
2620
)
2721
def _check(context):
2822
rows = context.resources.db_connection.execute(check_blob["sql"])
29-
return AssetCheckResult(passed=len(rows) == 0, metadata={"num_rows": len(rows)})
23+
return dg.AssetCheckResult(
24+
passed=len(rows) == 0, metadata={"num_rows": len(rows)}
25+
)
3026

3127
return _check
3228

3329

3430
check_blobs = [
3531
{
3632
"name": "orders_id_has_no_nulls",
37-
"asset": "orders",
33+
"dg.asset": "orders",
3834
"sql": "select * from orders where order_id is null",
3935
},
4036
{
4137
"name": "items_id_has_no_nulls",
42-
"asset": "items",
38+
"dg.asset": "items",
4339
"sql": "select * from items where item_id is null",
4440
},
4541
]
4642

47-
defs = Definitions(
43+
defs = dg.Definitions(
4844
assets=[orders, items],
4945
asset_checks=[make_check(check_blob) for check_blob in check_blobs],
5046
resources={"db_connection": MagicMock()},

Diff for: examples/docs_snippets/docs_snippets/concepts/assets/asset_checks/jobs.py

+14-19
Original file line numberDiff line numberDiff line change
@@ -1,47 +1,42 @@
1-
from dagster import (
2-
AssetSelection,
3-
Definitions,
4-
ScheduleDefinition,
5-
asset,
6-
asset_check,
7-
define_asset_job,
8-
)
1+
import dagster as dg
92

103

11-
@asset
4+
@dg.asset
125
def my_asset(): ...
136

147

15-
@asset_check(asset=my_asset)
8+
@dg.asset_check(asset=my_asset)
169
def check_1(): ...
1710

1811

19-
@asset_check(asset=my_asset)
12+
@dg.asset_check(asset=my_asset)
2013
def check_2(): ...
2114

2215

2316
# includes my_asset and both checks
24-
my_job = define_asset_job("my_job", selection=AssetSelection.assets(my_asset))
17+
my_job = dg.define_asset_job("my_job", selection=dg.AssetSelection.assets(my_asset))
2518

2619

2720
# includes only my_asset
28-
my_asset_only_job = define_asset_job(
21+
my_asset_only_job = dg.define_asset_job(
2922
"my_asset_only_job",
30-
selection=AssetSelection.assets(my_asset).without_checks(),
23+
selection=dg.AssetSelection.assets(my_asset).without_checks(),
3124
)
3225

3326
# includes check_1 and check_2, but not my_asset
34-
checks_only_job = define_asset_job(
35-
"checks_only_job", selection=AssetSelection.checks_for_assets(my_asset)
27+
checks_only_job = dg.define_asset_job(
28+
"checks_only_job", selection=dg.AssetSelection.checks_for_assets(my_asset)
3629
)
3730

3831
# includes only check_1
39-
check_1_job = define_asset_job("check_1_job", selection=AssetSelection.checks(check_1))
32+
check_1_job = dg.define_asset_job(
33+
"check_1_job", selection=dg.AssetSelection.checks(check_1)
34+
)
4035

4136
# schedule my_job to run every day at midnight
42-
basic_schedule = ScheduleDefinition(job=my_job, cron_schedule="0 0 * * *")
37+
basic_schedule = dg.ScheduleDefinition(job=my_job, cron_schedule="0 0 * * *")
4338

44-
defs = Definitions(
39+
defs = dg.Definitions(
4540
assets=[my_asset],
4641
asset_checks=[check_1, check_2],
4742
jobs=[my_job, my_asset_only_job, checks_only_job, check_1_job],

Diff for: examples/docs_snippets/docs_snippets/concepts/assets/asset_checks/materializable_freshness_complete.py

+5-10
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,19 @@
11
from datetime import timedelta
22

3-
from dagster import (
4-
Definitions,
5-
asset,
6-
build_last_update_freshness_checks,
7-
build_sensor_for_freshness_checks,
8-
)
3+
import dagster as dg
94

105

11-
@asset
6+
@dg.asset
127
def my_asset(): ...
138

149

15-
asset1_freshness_checks = build_last_update_freshness_checks(
10+
asset1_freshness_checks = dg.build_last_update_freshness_checks(
1611
assets=[my_asset], lower_bound_delta=timedelta(hours=2)
1712
)
18-
freshness_checks_sensor = build_sensor_for_freshness_checks(
13+
freshness_checks_sensor = dg.build_sensor_for_freshness_checks(
1914
freshness_checks=asset1_freshness_checks
2015
)
21-
defs = Definitions(
16+
defs = dg.Definitions(
2217
assets=[my_asset],
2318
asset_checks=asset1_freshness_checks,
2419
sensors=[freshness_checks_sensor],

Diff for: examples/docs_snippets/docs_snippets/concepts/assets/asset_checks/multi_asset_check.py

+8-14
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,23 @@
11
from collections.abc import Iterable
22

3-
from dagster import (
4-
AssetCheckExecutionContext,
5-
AssetCheckResult,
6-
AssetCheckSeverity,
7-
AssetCheckSpec,
8-
multi_asset_check,
9-
)
3+
import dagster as dg
104

115

12-
@multi_asset_check(
6+
@dg.multi_asset_check(
137
specs=[
14-
AssetCheckSpec(name="asset_check_one", asset="my_asset_one"),
15-
AssetCheckSpec(name="asset_check_two", asset="my_asset_two"),
8+
dg.AssetCheckSpec(name="asset_check_one", asset="my_asset_one"),
9+
dg.AssetCheckSpec(name="asset_check_two", asset="my_asset_two"),
1610
]
1711
)
18-
def the_check(context: AssetCheckExecutionContext) -> Iterable[AssetCheckResult]:
19-
yield AssetCheckResult(
12+
def the_check(context: dg.AssetCheckExecutionContext) -> Iterable[dg.AssetCheckResult]:
13+
yield dg.AssetCheckResult(
2014
passed=False,
21-
severity=AssetCheckSeverity.WARN,
15+
severity=dg.AssetCheckSeverity.WARN,
2216
description="The asset is over 0.5",
2317
asset_key="asset_check_one",
2418
)
2519

26-
yield AssetCheckResult(
20+
yield dg.AssetCheckResult(
2721
passed=True,
2822
description="The asset is fresh.",
2923
asset_key="asset_check_two",
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,19 @@
1-
from dagster import (
2-
AssetCheckResult,
3-
AssetCheckSeverity,
4-
Definitions,
5-
asset,
6-
asset_check,
7-
)
1+
import dagster as dg
82

93

10-
@asset
4+
@dg.asset
115
def my_asset(): ...
126

137

14-
@asset_check(asset=my_asset)
8+
@dg.asset_check(asset=my_asset)
159
def my_check():
1610
is_serious = ...
17-
return AssetCheckResult(
11+
return dg.AssetCheckResult(
1812
passed=False,
19-
severity=AssetCheckSeverity.ERROR if is_serious else AssetCheckSeverity.WARN,
13+
severity=dg.AssetCheckSeverity.ERROR
14+
if is_serious
15+
else dg.AssetCheckSeverity.WARN,
2016
)
2117

2218

23-
defs = Definitions(assets=[my_asset], asset_checks=[my_check])
19+
defs = dg.Definitions(assets=[my_asset], asset_checks=[my_check])

Diff for: examples/docs_snippets/docs_snippets/concepts/assets/asset_checks/source_data_freshness_complete.py

+13-24
Original file line numberDiff line numberDiff line change
@@ -2,25 +2,14 @@
22

33
from dagster_snowflake import SnowflakeResource, fetch_last_updated_timestamps
44

5-
from dagster import (
6-
AssetSelection,
7-
AssetSpec,
8-
Definitions,
9-
EnvVar,
10-
MetadataValue,
11-
ObserveResult,
12-
ScheduleDefinition,
13-
build_last_update_freshness_checks,
14-
define_asset_job,
15-
multi_observable_source_asset,
16-
)
5+
import dagster as dg
176

187
TABLE_SCHEMA = "PUBLIC"
198
table_names = ["charges", "customers"]
20-
asset_specs = [AssetSpec(table_name) for table_name in table_names]
9+
asset_specs = [dg.AssetSpec(table_name) for table_name in table_names]
2110

2211

23-
@multi_observable_source_asset(specs=asset_specs)
12+
@dg.multi_observable_source_asset(specs=asset_specs)
2413
def source_tables(snowflake: SnowflakeResource):
2514
with snowflake.get_connection() as conn:
2615
freshness_results = fetch_last_updated_timestamps(
@@ -29,42 +18,42 @@ def source_tables(snowflake: SnowflakeResource):
2918
schema=TABLE_SCHEMA,
3019
)
3120
for table_name, last_updated in freshness_results.items():
32-
yield ObserveResult(
21+
yield dg.ObserveResult(
3322
asset_key=table_name,
3423
metadata={
35-
"dagster/last_updated_timestamp": MetadataValue.timestamp(
24+
"dagster/last_updated_timestamp": dg.MetadataValue.timestamp(
3625
last_updated
3726
)
3827
},
3928
)
4029

4130

42-
source_tables_observation_schedule = ScheduleDefinition(
43-
job=define_asset_job(
31+
source_tables_observation_schedule = dg.ScheduleDefinition(
32+
job=dg.define_asset_job(
4433
"source_tables_observation_job",
45-
selection=AssetSelection.assets(source_tables),
34+
selection=dg.AssetSelection.assets(source_tables),
4635
),
4736
# Runs every minute. Usually, a much less frequent cadence is necessary,
4837
# but a short cadence makes it easier to play around with this example.
4938
cron_schedule="* * * * *",
5039
)
5140

5241

53-
source_table_freshness_checks = build_last_update_freshness_checks(
42+
source_table_freshness_checks = dg.build_last_update_freshness_checks(
5443
assets=[source_tables],
5544
lower_bound_delta=timedelta(hours=2),
5645
)
5746

5847

59-
defs = Definitions(
48+
defs = dg.Definitions(
6049
assets=[source_tables],
6150
asset_checks=source_table_freshness_checks,
6251
schedules=[source_tables_observation_schedule],
6352
resources={
6453
"snowflake": SnowflakeResource(
65-
user=EnvVar("SNOWFLAKE_USER"),
66-
account=EnvVar("SNOWFLAKE_ACCOUNT"),
67-
password=EnvVar("SNOWFLAKE_PASSWORD"),
54+
user=dg.EnvVar("SNOWFLAKE_USER"),
55+
account=dg.EnvVar("SNOWFLAKE_ACCOUNT"),
56+
password=dg.EnvVar("SNOWFLAKE_PASSWORD"),
6857
)
6958
},
7059
)

Diff for: examples/docs_snippets/docs_snippets/concepts/assets/asset_checks/source_data_freshness_in_pieces.py

+5-10
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,14 @@
11
# start_multi_observable_marker
22
from dagster_snowflake import SnowflakeResource, fetch_last_updated_timestamps
33

4-
from dagster import (
5-
AssetSpec,
6-
MetadataValue,
7-
ObserveResult,
8-
multi_observable_source_asset,
9-
)
4+
import dagster as dg
105

116
TABLE_SCHEMA = "PUBLIC"
127
table_names = ["charges", "customers"]
13-
asset_specs = [AssetSpec(table_name) for table_name in table_names]
8+
asset_specs = [dg.AssetSpec(table_name) for table_name in table_names]
149

1510

16-
@multi_observable_source_asset(specs=asset_specs)
11+
@dg.multi_observable_source_asset(specs=asset_specs)
1712
def source_tables(snowflake: SnowflakeResource):
1813
with snowflake.get_connection() as conn:
1914
freshness_results = fetch_last_updated_timestamps(
@@ -22,10 +17,10 @@ def source_tables(snowflake: SnowflakeResource):
2217
schema=TABLE_SCHEMA,
2318
)
2419
for table_name, last_updated in freshness_results.items():
25-
yield ObserveResult(
20+
yield dg.ObserveResult(
2621
asset_key=table_name,
2722
metadata={
28-
"dagster/last_updated_timestamp": MetadataValue.timestamp(
23+
"dagster/last_updated_timestamp": dg.MetadataValue.timestamp(
2924
last_updated
3025
)
3126
},

0 commit comments

Comments
 (0)