|
1 | 1 | import datetime |
2 | | - |
3 | | -from airflow.decorators import dag, task_group |
4 | | -from airflow.providers.airbyte.operators.airbyte import \ |
5 | | - AirbyteTriggerSyncOperator |
6 | | -from fivetran_provider_async.operators import FivetranOperator |
7 | | -from fivetran_provider_async.sensors import FivetranSensor |
8 | | -from operators.datacoves.bash import DatacovesBashOperator |
9 | | -from operators.datacoves.dbt import DatacovesDbtOperator |
| 2 | +from airflow.decorators import dag, task, task_group |
| 3 | +from datahub_airflow_plugin.entities import Dataset |
10 | 4 |
|
11 | 5 | @dag( |
12 | 6 | default_args={"start_date": datetime.datetime(2024, 1, 1, 0, 0), "retries": 3}, |
|
16 | 10 | catchup=False, |
17 | 11 | ) |
18 | 12 | def daily_loan_run(): |
| 13 | + |
19 | 14 | @task_group(group_id="extract_and_load_airbyte", tooltip="Airbyte Extract and Load") |
20 | 15 | def extract_and_load_airbyte(): |
21 | | - country_populations_datacoves_snowflake = AirbyteTriggerSyncOperator( |
22 | | - task_id="country_populations_datacoves_snowflake", |
23 | | - connection_id="ac02ea96-58a1-4061-be67-78900bb5aaf6", |
24 | | - airbyte_conn_id="airbyte_connection", |
25 | | - ) |
| 16 | + @task |
| 17 | + def sync_airbyte(): |
| 18 | + from airflow.providers.airbyte.operators.airbyte import AirbyteTriggerSyncOperator |
| 19 | + return AirbyteTriggerSyncOperator( |
| 20 | + task_id="country_populations_datacoves_snowflake", |
| 21 | + connection_id="b293aaea-6557-4506-8cfb-6b621ec4c6ef", |
| 22 | + airbyte_conn_id="airbyte_connection", |
| 23 | + ).execute({}) |
| 24 | + |
| 25 | + sync_airbyte() |
26 | 26 |
|
27 | 27 | tg_extract_and_load_airbyte = extract_and_load_airbyte() |
28 | 28 |
|
29 | | - @task_group( |
30 | | - group_id="extract_and_load_fivetran", tooltip="Fivetran Extract and Load" |
31 | | - ) |
| 29 | + @task_group(group_id="extract_and_load_fivetran", tooltip="Fivetran Extract and Load") |
32 | 30 | def extract_and_load_fivetran(): |
33 | | - datacoves_snowflake_google_analytics_4_trigger = FivetranOperator( |
34 | | - task_id="datacoves_snowflake_google_analytics_4_trigger", |
35 | | - fivetran_conn_id="fivetran_connection", |
36 | | - connector_id="speak_menial", |
37 | | - wait_for_completion=False, |
38 | | - ) |
39 | | - datacoves_snowflake_google_analytics_4_sensor = FivetranSensor( |
40 | | - task_id="datacoves_snowflake_google_analytics_4_sensor", |
41 | | - fivetran_conn_id="fivetran_connection", |
42 | | - connector_id="speak_menial", |
43 | | - poke_interval=60, |
44 | | - ) |
45 | | - ( |
46 | | - datacoves_snowflake_google_analytics_4_trigger |
47 | | - >> datacoves_snowflake_google_analytics_4_sensor |
48 | | - ) |
| 31 | + |
| 32 | + @task |
| 33 | + def trigger_fivetran(): |
| 34 | + from fivetran_provider_async.operators import FivetranOperator |
| 35 | + return FivetranOperator( |
| 36 | + task_id="datacoves_snowflake_google_analytics_4_trigger", |
| 37 | + fivetran_conn_id="fivetran_connection", |
| 38 | + connector_id="speak_menial", |
| 39 | + wait_for_completion=False, |
| 40 | + ).execute({}) |
| 41 | + |
| 42 | + @task |
| 43 | + def sensor_fivetran(): |
| 44 | + from fivetran_provider_async.sensors import FivetranSensor |
| 45 | + return FivetranSensor( |
| 46 | + task_id="datacoves_snowflake_google_analytics_4_sensor", |
| 47 | + fivetran_conn_id="fivetran_connection", |
| 48 | + connector_id="speak_menial", |
| 49 | + poke_interval=60, |
| 50 | + ).poke({}) |
| 51 | + |
| 52 | + trigger = trigger_fivetran() |
| 53 | + sensor = sensor_fivetran() |
| 54 | + |
| 55 | + trigger >> sensor |
| 56 | + return sensor # Return last task in the group |
49 | 57 |
|
50 | 58 | tg_extract_and_load_fivetran = extract_and_load_fivetran() |
51 | 59 |
|
52 | 60 | @task_group(group_id="extract_and_load_dlt", tooltip="dlt Extract and Load") |
53 | 61 | def extract_and_load_dlt(): |
54 | | - load_us_population = DatacovesBashOperator( |
55 | | - task_id="load_loads_data", |
56 | | - bash_command=""" |
57 | | - cd load/dlt \ |
58 | | - && ./loans_data.py |
59 | | - """, |
| 62 | + @task.datacoves_bash( |
| 63 | + outlets=[Dataset("snowflake", "raw.loans_data.loans_data")], |
60 | 64 | env={ |
61 | 65 | "UV_CACHE_DIR": "/tmp/uv_cache", |
62 | | - "EXTRACT__NEXT_ITEM_MODE":"fifo", |
63 | | - "EXTRACT__MAX_PARALLEL_ITEMS":"1", |
64 | | - "EXTRACT__WORKERS":"1", |
65 | | - "NORMALIZE__WORKERS":"1", |
66 | | - "LOAD__WORKERS":"1", |
| 66 | + "EXTRACT__NEXT_ITEM_MODE": "fifo", |
| 67 | + "EXTRACT__MAX_PARALLEL_ITEMS": "1", |
| 68 | + "EXTRACT__WORKERS": "1", |
| 69 | + "NORMALIZE__WORKERS": "1", |
| 70 | + "LOAD__WORKERS": "1", |
67 | 71 | }, |
68 | | - append_env=True, |
| 72 | + append_env=True |
69 | 73 | ) |
| 74 | + def load_loans_data(): |
| 75 | + return "cd load/dlt && ./loans_data.py" |
| 76 | + load_loans_data() |
70 | 77 |
|
71 | 78 | tg_extract_and_load_dlt = extract_and_load_dlt() |
72 | | - transform = DatacovesDbtOperator( |
73 | | - task_id="transform", |
74 | | - bash_command="dbt build -s 'tag:daily_run_airbyte+ tag:daily_run_fivetran+'", |
75 | | - ) |
76 | | - transform.set_upstream( |
77 | | - [ |
78 | | - tg_extract_and_load_airbyte, |
79 | | - tg_extract_and_load_dlt, |
80 | | - tg_extract_and_load_fivetran, |
| 79 | + |
| 80 | + @task.datacoves_dbt( |
| 81 | + connection_id="main", |
| 82 | + inlets=[ |
| 83 | + Dataset("snowflake", "raw.loans_data.loans_data"), |
| 84 | + Dataset("snowflake", "raw.google_analytics_4.engagement_events_report") |
81 | 85 | ] |
82 | 86 | ) |
83 | | - marketing_automation = DatacovesBashOperator( |
84 | | - task_id="marketing_automation", |
85 | | - bash_command="echo 'send data to marketing tool'", |
86 | | - ) |
87 | | - marketing_automation.set_upstream([transform]) |
88 | | - update_catalog = DatacovesBashOperator( |
89 | | - task_id="update_catalog", bash_command="echo 'refresh data catalog'" |
90 | | - ) |
91 | | - update_catalog.set_upstream([transform]) |
| 87 | + def transform(): |
| 88 | + return "dbt build -s 'tag:daily_run_airbyte+ tag:daily_run_fivetran+ -t prd'" |
| 89 | + |
| 90 | + @task.datacoves_bash |
| 91 | + def marketing_automation(): |
| 92 | + return "echo 'send data to marketing tool'" |
| 93 | + |
| 94 | + @task.datacoves_bash |
| 95 | + def update_catalog(): |
| 96 | + return "echo 'refresh data catalog'" |
| 97 | + |
| 98 | + |
| 99 | + transform_task = transform() |
| 100 | + marketing_automation_task = marketing_automation() |
| 101 | + update_catalog_task = update_catalog() |
92 | 102 |
|
| 103 | + [tg_extract_and_load_airbyte, tg_extract_and_load_dlt, tg_extract_and_load_fivetran] >> transform_task |
| 104 | + transform_task >> [marketing_automation_task, update_catalog_task] |
93 | 105 |
|
| 106 | +# Invoke DAG |
94 | 107 | dag = daily_loan_run() |
0 commit comments