[executors] feat: add workflow entity

AndreyMarkinPPC · AndreyMarkinPPC · commit db15f962cb87 · 2026-01-09T12:03:02.000+04:00
diff --git a/libs/executors/garf_executors/config.py b/libs/executors/garf_executors/config.py
@@ -47,5 +47,7 @@ def from_file(cls, path: str | pathlib.Path | os.PathLike[str]) -> Config:
   def save(self, path: str | pathlib.Path | os.PathLike[str]) -> str:
     """Saves config to local or remote yaml file."""
     with smart_open.open(path, 'w', encoding='utf-8') as f:
-      yaml.dump(self.model_dump().get('sources'), f, encoding='utf-8')
+      yaml.dump(
+        self.model_dump(exclude_none=True).get('sources'), f, encoding='utf-8'
+      )
     return f'Config is saved to {str(path)}'
diff --git a/libs/executors/garf_executors/entrypoints/cli.py b/libs/executors/garf_executors/entrypoints/cli.py
@@ -24,9 +24,10 @@
 import sys
 
 from garf_io import reader
+from opentelemetry import trace
 
 import garf_executors
-from garf_executors import config, exceptions
+from garf_executors import config, exceptions, workflow
 from garf_executors.entrypoints import utils
 from garf_executors.entrypoints.tracer import initialize_tracer
 from garf_executors.telemetry import tracer
@@ -39,6 +40,7 @@ def main():
   parser = argparse.ArgumentParser()
   parser.add_argument('query', nargs='*')
   parser.add_argument('-c', '--config', dest='config', default=None)
+  parser.add_argument('-w', '--workflow', dest='workflow', default=None)
   parser.add_argument('--source', dest='source', default=None)
   parser.add_argument('--output', dest='output', default='console')
   parser.add_argument('--input', dest='input', default='file')
@@ -70,18 +72,47 @@ def main():
   parser.set_defaults(dry_run=False)
   args, kwargs = parser.parse_known_args()
 
+  span = trace.get_current_span()
+  command_args = ' '.join(sys.argv[1:])
+  span.set_attribute('cli.command', f'garf {command_args}')
   if args.version:
     print(garf_executors.__version__)
     sys.exit()
   logger = utils.init_logging(
     loglevel=args.loglevel.upper(), logger_type=args.logger, name=args.log_name
   )
+  reader_client = reader.create_reader(args.input)
+  if workflow_file := args.workflow:
+    execution_workflow = workflow.Workflow.from_file(workflow_file)
+    for i, step in enumerate(execution_workflow.steps, 1):
+      with tracer.start_as_current_span(f'{i}-{step.fetcher}'):
+        query_executor = garf_executors.setup_executor(
+          source=step.fetcher,
+          fetcher_parameters=step.fetcher_parameters,
+          enable_cache=args.enable_cache,
+          cache_ttl_seconds=args.cache_ttl_seconds,
+        )
+        batch = {}
+        if not (queries := step.queries):
+          logger.error('Please provide one or more queries to run')
+          raise exceptions.GarfExecutorError(
+            'Please provide one or more queries to run'
+          )
+        for query in queries:
+          if isinstance(query, garf_executors.workflow.QueryPath):
+            batch[query.path] = reader_client.read(query.path)
+          else:
+            batch[query.query.title] = query.query.text
+        query_executor.execute_batch(
+          batch, step.context, args.parallel_threshold
+        )
+    sys.exit()
+
   if not args.query:
     logger.error('Please provide one or more queries to run')
     raise exceptions.GarfExecutorError(
       'Please provide one or more queries to run'
     )
-  reader_client = reader.create_reader(args.input)
   if config_file := args.config:
     execution_config = config.Config.from_file(config_file)
     if not (context := execution_config.sources.get(args.source)):
diff --git a/libs/executors/garf_executors/workflow.py b/libs/executors/garf_executors/workflow.py
@@ -0,0 +1,96 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+
+import os
+import pathlib
+
+import pydantic
+import smart_open
+import yaml
+
+from garf_executors.execution_context import ExecutionContext
+
+
+class QueryPath(pydantic.BaseModel):
+  """Path file with query."""
+
+  path: str
+
+
+class QueryDefinition(pydantic.BaseModel):
+  """Definition of a query."""
+
+  query: Query
+
+
+class Query(pydantic.BaseModel):
+  """Query elements.
+
+  Attributes:
+    text: Query text.
+    title: Name of the query.
+  """
+
+  text: str
+  title: str
+
+
+class ExecutionStep(ExecutionContext):
+  """Common context for executing one or more queries.
+
+  Attributes:
+    fetcher: Name of a fetcher to get data from API.
+    alias: Optional alias to identify execution step.
+    queries: Queries to run for a particular fetcher.
+    context: Execution context for queries and fetcher.
+  """
+
+  fetcher: str | None = None
+  alias: str | None = None
+  queries: list[QueryPath | QueryDefinition] | None = None
+
+  @property
+  def context(self) -> ExecutionContext:
+    return ExecutionContext(
+      writer=self.writer,
+      writer_parameters=self.writer_parameters,
+      query_parameters=self.query_parameters,
+      fetcher_parameters=self.fetcher_parameters,
+    )
+
+
+class Workflow(pydantic.BaseModel):
+  """Orchestrates execution of queries for multiple fetchers.
+
+  Attributes:
+    steps: Contains one or several fetcher executions.
+  """
+
+  steps: list[ExecutionStep]
+
+  @classmethod
+  def from_file(cls, path: str | pathlib.Path | os.PathLike[str]) -> Workflow:
+    """Builds workflow from local or remote yaml file."""
+    with smart_open.open(path, 'r', encoding='utf-8') as f:
+      data = yaml.safe_load(f)
+    return Workflow(steps=data.get('steps'))
+
+  def save(self, path: str | pathlib.Path | os.PathLike[str]) -> str:
+    """Saves workflow to local or remote yaml file."""
+    with smart_open.open(path, 'w', encoding='utf-8') as f:
+      yaml.dump(
+        self.model_dump(exclude_none=True).get('steps'), f, encoding='utf-8'
+      )
+    return f'Workflow is saved to {str(path)}'
diff --git a/libs/executors/tests/end-to-end/test_cli.py b/libs/executors/tests/end-to-end/test_cli.py
@@ -123,3 +123,28 @@ def test_fake_source_from_config(self, tmp_path):
 
     assert result.returncode == 0
     assert json.loads(result.stdout) == self.expected_output
+
+  def test_fake_source_from_workflow(self, tmp_path):
+    workflow_path = _SCRIPT_PATH / 'test_workflow.yaml'
+    with open(workflow_path, 'r', encoding='utf-8') as f:
+      workflow_data = yaml.safe_load(f)
+      original_data_location = workflow_data['steps'][0]['fetcher_parameters'][
+        'data_location'
+      ]
+      workflow_data['steps'][0]['fetcher_parameters']['data_location'] = str(
+        _SCRIPT_PATH / original_data_location
+      )
+    tmp_workflow = tmp_path / 'workflow.yaml'
+    with open(tmp_workflow, 'w', encoding='utf-8') as f:
+      yaml.dump(workflow_data, f, encoding='utf-8')
+    command = f'garf -w {str(tmp_workflow)} --loglevel ERROR'
+    result = subprocess.run(
+      command,
+      shell=True,
+      check=False,
+      capture_output=True,
+      text=True,
+    )
+
+    assert result.returncode == 0
+    assert json.loads(result.stdout) == self.expected_output
diff --git a/libs/executors/tests/end-to-end/test_workflow.yaml b/libs/executors/tests/end-to-end/test_workflow.yaml
@@ -0,0 +1,17 @@
+steps:
+  - alias: test
+    fetcher: fake
+    queries:
+      - query:
+          title: test_query
+          text:  |
+            SELECT
+              resource,
+              dimensions.name AS name,
+              metrics.clicks AS clicks
+            FROM resource
+    writer: console
+    writer_parameters:
+      format: json
+    fetcher_parameters:
+      data_location: test.json
diff --git a/libs/executors/tests/unit/test_workflow.py b/libs/executors/tests/unit/test_workflow.py
@@ -0,0 +1,61 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import yaml
+from garf_executors.workflow import Workflow
+
+
+class TestWorkflow:
+  data = {
+    'steps': [
+      {
+        'fetcher': 'api',
+        'queries': [
+          {'path': 'example.sql'},
+          {'query': {'text': 'SELECT 1', 'title': 'example2'}},
+        ],
+        'query_parameters': {
+          'macro': {
+            'start_date': '2025-01-01',
+          },
+          'template': {
+            'cohorts': 1,
+          },
+        },
+        'fetcher_parameters': {
+          'id': [1, 2, 3],
+        },
+        'writer': 'csv',
+        'writer_parameters': {
+          'destination_folder': '/tmp',
+        },
+      }
+    ]
+  }
+
+  def test_from_file_returns_correct_context_from_data(self, tmp_path):
+    tmp_workflow = tmp_path / 'workflow.yaml'
+    with open(tmp_workflow, 'w', encoding='utf-8') as f:
+      yaml.dump(self.data, f, encoding='utf-8')
+    workflow = Workflow.from_file(tmp_workflow)
+    expected_workflow = Workflow(steps=self.data.get('steps'))
+    assert workflow == expected_workflow
+
+  def test_save_returns_correct_data(self, tmp_path):
+    tmp_workflow = tmp_path / 'workflow.yaml'
+    workflow = Workflow(steps=self.data.get('steps'))
+    workflow.save(tmp_workflow)
+    with open(tmp_workflow, 'r', encoding='utf-8') as f:
+      workflow_data = yaml.safe_load(f)
+    assert workflow_data == self.data.get('steps')