feat(pipeline,executor,examples): hello_world_parallel hinzufügen und Ausführungs-/Executor-Logik verbessern

legout · legout · commit 1d7128cfacbb · 2025-10-14T03:06:27.000+02:00
- Füge Beispiel-Pipeline hello_world_parallel (YAML + Python) hinzu
- Verbessere Pipeline-Ausführung:
  - setup_logging früher initialisieren
  - aktiviere dynamic execution und konfiguriere lokalen/remote Executor korrekt
  - robustere Behandlung von Retry-Exceptions und Fehler-Logging
- Passe ExecutorFactory an:
  - nutze hamilton.execution.executors MultiThreading/MultiProcessing Executor-API
  - vereinfache Konfigurationsnormalisierung und Rückgabe
- Bump Versionen in pyproject.toml und uv.lock
diff --git a/examples/hello-world/base/conf/pipelines/hello_world_parallel.yml b/examples/hello-world/base/conf/pipelines/hello_world_parallel.yml
@@ -0,0 +1,45 @@
+adapter:
+  hamilton_tracker:
+    capture_data_statistics: true
+    dag_name: null
+    max_dict_length_capture: 10
+    max_list_length_capture: 50
+    project_id: null
+    tags: !munch.Munch {}
+  mlflow:
+    experiment_description: null
+    experiment_name: null
+    experiment_tags: !munch.Munch {}
+    run_description: null
+    run_id: null
+    run_name: null
+    run_tags: !munch.Munch {}
+params: !munch.Munch {}
+run:
+  adapter: null
+  cache: false
+  config: !munch.Munch {}
+  executor:
+    max_workers: 60
+    num_cpus: 12
+    type: threadpool
+  final_vars: []
+  inputs: {}
+  jitter_factor: 0.1
+  log_level: INFO
+  max_retries: 3
+  on_failure: null
+  on_success: null
+  pipeline_adapter_cfg: null
+  project_adapter_cfg: null
+  reload: false
+  retry_delay: 1
+  retry_exceptions:
+  - <class 'Exception'>
+  with_adapter:
+    future: false
+    hamilton_tracker: false
+    mlflow: false
+    opentelemetry: false
+    progressbar: false
+    ray: false
diff --git a/examples/hello-world/base/pipelines/hello_world_parallel.py b/examples/hello-world/base/pipelines/hello_world_parallel.py
@@ -0,0 +1,30 @@
+# FlowerPower pipeline hello_world_parallel.py
+# Created on 2025-10-14 02:39:22
+
+####################################################################################################
+# Import necessary libraries
+# NOTE: Remove or comment out imports that are not used in the pipeline
+
+from hamilton.function_modifiers import parameterize, dataloader, datasaver
+from hamilton.htypes import Parallelizable, Collect
+
+from pathlib import Path
+
+from flowerpower.cfg import Config
+
+####################################################################################################
+# Load pipeline parameters. Do not modify this section.
+
+PARAMS = Config.load(
+    Path(__file__).parents[1], pipeline_name="hello_world_parallel"
+).pipeline.h_params
+
+
+####################################################################################################
+# Helper functions.
+# This functions have to start with an underscore (_).
+
+
+####################################################################################################
+# Pipeline functions
+
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ description = "A simple workflow framework for building and managing data proces
 authors = [{ name = "Volker L.", email = "ligno.blades@gmail.com" }]
 readme = "README.md"
 requires-python = ">= 3.11"
-version = "0.31.3"
+version = "0.31.4"
 keywords = ["hamilton", "workflow", "pipeline", "scheduler", "dask", "ray"]
 
 dependencies = [
diff --git a/src/flowerpower/pipeline/pipeline.py b/src/flowerpower/pipeline/pipeline.py
@@ -9,10 +9,11 @@
 import random
 import time
 from typing import TYPE_CHECKING, Any, Callable
-from requests.exceptions import HTTPError, ConnectionError, Timeout # Example exception
+from requests.exceptions import HTTPError, ConnectionError, Timeout  # Example exception
 
 import humanize
 import msgspec
+from loguru import logger
 from hamilton import driver
 from hamilton.execution import executors
 from hamilton.registry import disable_autoload
@@ -23,6 +24,8 @@
 from .. import settings
 from ..utils.adapter import create_adapter_manager
 from ..utils.executor import create_executor_factory
+from ..utils.logging import setup_logging
+
 
 if importlib.util.find_spec("opentelemetry"):
     from hamilton.plugins import h_opentelemetry
@@ -37,11 +40,6 @@
 else:
     h_mlflow = None
 
-from hamilton.plugins import h_rich
-from hamilton.plugins.h_threadpool import FutureAdapter
-from hamilton_sdk.adapters import HamiltonTracker
-from hamilton_sdk.tracking import constants
-from loguru import logger
 
 if importlib.util.find_spec("distributed"):
     from dask import distributed
@@ -58,7 +56,7 @@
     ray = None
     h_ray = None
 
-from ..cfg import PipelineConfig, ProjectConfig
+from ..cfg import PipelineConfig
 from ..cfg.pipeline.adapter import AdapterConfig as PipelineAdapterConfig
 from ..cfg.pipeline.run import ExecutorConfig, RunConfig
 from ..cfg.project.adapter import AdapterConfig as ProjectAdapterConfig
@@ -67,6 +65,8 @@
 if TYPE_CHECKING:
     from ..flowerpower import FlowerPowerProject
 
+setup_logging(level=settings.LOG_LEVEL)
+
 
 class Pipeline(msgspec.Struct):
     """Active pipeline object that encapsulates its own execution logic.
@@ -100,12 +100,7 @@ def __post_init__(self):
         self._adapter_manager = create_adapter_manager()
         self._executor_factory = create_executor_factory()
 
-
-    def run(
-        self,
-        run_config: RunConfig | None = None,
-        **kwargs
-    ) -> dict[str, Any]:
+    def run(self, run_config: RunConfig | None = None, **kwargs) -> dict[str, Any]:
         """Execute the pipeline with the given parameters.
 
         Args:
@@ -120,7 +115,7 @@ def run(
 
         # Initialize run_config with pipeline defaults if not provided
         run_config = run_config or self.config.run
-        
+
         # Merge kwargs into the run_config
         if kwargs:
             run_config = merge_run_config_with_kwargs(run_config, kwargs)
@@ -131,7 +126,10 @@ def run(
 
         # Set up retry configuration
         retry_config = self._setup_retry_config(
-            run_config.max_retries, run_config.retry_delay, run_config.jitter_factor, run_config.retry_exceptions
+            run_config.max_retries,
+            run_config.retry_delay,
+            run_config.jitter_factor,
+            run_config.retry_exceptions,
         )
         max_retries = retry_config["max_retries"]
         retry_delay = retry_config["retry_delay"]
@@ -165,22 +163,22 @@ def _setup_retry_config(
             converted_exceptions = []
             # Safe mapping of exception names to classes
             exception_mapping = {
-                'Exception': Exception,
-                'ValueError': ValueError,
-                'TypeError': TypeError,
-                'RuntimeError': RuntimeError,
-                'FileNotFoundError': FileNotFoundError,
-                'PermissionError': PermissionError,
-                'ConnectionError': ConnectionError,
-                'TimeoutError': TimeoutError,
-                'KeyError': KeyError,
-                'AttributeError': AttributeError,
-                'ImportError': ImportError,
-                'OSError': OSError,
-                'IOError': IOError,
-                'HTTPError': HTTPError,
-                'ConnectionError': ConnectionError,
-                'Timeout': Timeout,
+                "Exception": Exception,
+                "ValueError": ValueError,
+                "TypeError": TypeError,
+                "RuntimeError": RuntimeError,
+                "FileNotFoundError": FileNotFoundError,
+                "PermissionError": PermissionError,
+                "ConnectionError": ConnectionError,
+                "TimeoutError": TimeoutError,
+                "KeyError": KeyError,
+                "AttributeError": AttributeError,
+                "ImportError": ImportError,
+                "OSError": OSError,
+                "IOError": IOError,
+                "HTTPError": HTTPError,
+                "ConnectionError": ConnectionError,
+                "Timeout": Timeout,
             }
             for exc in retry_exceptions:
                 if isinstance(exc, str):
@@ -296,17 +294,33 @@ def _execute_pipeline(
     ) -> dict[str, Any]:
         """Execute the pipeline with Hamilton."""
         # Set up execution context
-        executor, shutdown_func, adapters = self._setup_execution_context(run_config=run_config)
-
+        executor, shutdown_func, adapters = self._setup_execution_context(
+            run_config=run_config
+        )
+        if (
+            run_config.executor.type != "synchronous"
+            or run_config.executor.type == "local"
+        ):
+            allow_experimental_mode = True
+            synchronous_executor = False
+        else:
+            allow_experimental_mode = False
         try:
             # Create Hamilton driver
             dr = (
                 driver.Builder()
-                .with_config(run_config.config)
                 .with_modules(self.module)
+                .with_config(run_config.config)
                 .with_adapters(*adapters)
-                .build()
+                .enable_dynamic_execution(
+                    allow_experimental_mode=allow_experimental_mode
+                )
+                .with_local_executor(executors.SynchronousLocalTaskExecutor())
             )
+            if not synchronous_executor:
+                dr = dr.with_remote_executor(executor)
+
+            dr = dr.build()
 
             # Execute the pipeline
             result = dr.execute(
@@ -352,11 +366,11 @@ def _get_executor(
         cleanup_fn = None
         if executor_cfg.type == "ray" and h_ray:
             # Handle temporary case where project_context is PipelineManager
-            project_cfg = getattr(
-                self.project_context, "project_cfg", None
-            ) or getattr(self.project_context, "_project_cfg", None)
+            project_cfg = getattr(self.project_context, "project_cfg", None) or getattr(
+                self.project_context, "_project_cfg", None
+            )
 
-            if project_cfg and hasattr(project_cfg.adapter, 'ray'):
+            if project_cfg and hasattr(project_cfg.adapter, "ray"):
                 cleanup_fn = (
                     ray.shutdown
                     if project_cfg.adapter.ray.shutdown_ray_on_completion
@@ -427,5 +441,7 @@ def _reload_module(self):
             logger.error(f"Failed to reload module for pipeline '{self.name}': {e}")
             raise
         except Exception as e:
-            logger.error(f"Unexpected error reloading module for pipeline '{self.name}': {e}")
+            logger.error(
+                f"Unexpected error reloading module for pipeline '{self.name}': {e}"
+            )
             raise
diff --git a/src/flowerpower/utils/executor.py b/src/flowerpower/utils/executor.py
@@ -25,8 +25,7 @@ def __init__(self):
         self._executor_cache: Dict[str, Any] = {}
 
     def create_executor(
-        self,
-        executor_cfg: Union[str, Dict[str, Any], Any, None]
+        self, executor_cfg: Union[str, Dict[str, Any], Any, None]
     ) -> Any:
         """
         Create an executor instance based on configuration.
@@ -52,8 +51,7 @@ def create_executor(
         return executor
 
     def _normalize_config(
-        self,
-        executor_cfg: Union[str, Dict[str, Any], Any, None]
+        self, executor_cfg: Union[str, Dict[str, Any], Any, None]
     ) -> Any:
         """Normalize executor configuration to ExecutorConfig instance."""
         from ..cfg.pipeline.run import ExecutorConfig
@@ -95,17 +93,18 @@ def _create_executor_by_type(self, executor_cfg: Any) -> Any:
     def _create_synchronous_executor(self) -> Any:
         """Create synchronous/local executor."""
         from hamilton.execution.executors import SynchronousLocalTaskExecutor
+
         return SynchronousLocalTaskExecutor()
 
     def _create_threadpool_executor(self, executor_cfg: Any) -> Any:
         """Create thread pool executor."""
         try:
-            from hamilton.plugins.h_threadpool import ThreadPoolExecutor
+            from hamilton.execution.executors import MultiThreadingExecutor
 
             # Extract max workers from config
             if executor_cfg.max_workers is not None:
-                return ThreadPoolExecutor(max_workers=executor_cfg.max_workers)
-            return ThreadPoolExecutor()
+                return MultiThreadingExecutor(max_tasks=executor_cfg.max_workers)
+            return MultiThreadingExecutor()
         except ImportError:
             logger.warning(
                 "ThreadPool executor dependencies not installed. Using local executor."
@@ -115,12 +114,12 @@ def _create_threadpool_executor(self, executor_cfg: Any) -> Any:
     def _create_processpool_executor(self, executor_cfg: Any) -> Any:
         """Create process pool executor."""
         try:
-            from hamilton.execution.executors import ProcessPoolExecutor
+            from hamilton.execution.executors import MultiProcessingExecutor
 
             # Extract max workers from config
             if executor_cfg.max_workers is not None:
-                return ProcessPoolExecutor(max_workers=executor_cfg.max_workers)
-            return ProcessPoolExecutor()
+                return MultiProcessingExecutor(max_tasks=executor_cfg.max_workers)
+            return MultiProcessingExecutor()
         except ImportError:
             logger.warning(
                 "ProcessPool executor dependencies not installed. Using local executor."
@@ -135,7 +134,7 @@ def _create_ray_executor(self, executor_cfg: Any) -> Any:
             # Extract configuration
             config = {}
             if executor_cfg.num_cpus is not None:
-                config['num_cpus'] = executor_cfg.num_cpus
+                config["num_cpus"] = executor_cfg.num_cpus
             if config:
                 return RayTaskExecutor(**config)
             return RayTaskExecutor()
@@ -153,7 +152,7 @@ def _create_dask_executor(self, executor_cfg: Any) -> Any:
             # Extract configuration
             config = {}
             if executor_cfg.num_cpus is not None:
-                config['num_cpus'] = executor_cfg.num_cpus
+                config["num_cpus"] = executor_cfg.num_cpus
             if config:
                 return DaskExecutor(**config)
             return DaskExecutor()
@@ -175,4 +174,4 @@ def create_executor_factory() -> ExecutorFactory:
     Returns:
         ExecutorFactory: Configured factory instance
     """
-    return ExecutorFactory()
+    return ExecutorFactory()
diff --git a/uv.lock b/uv.lock