From e08389ce9aef61ab6b63b55ba3cee4ecd1c1d3e1 Mon Sep 17 00:00:00 2001
From: Synicix <synicix@gmail.com>
Date: Wed, 9 Jul 2025 12:25:14 +0000
Subject: [PATCH 01/29] Save progress

---
 .clippy.toml                         |   2 +-
 Cargo.toml                           |   2 +
 src/core/error.rs                    |  27 +-
 src/uniffi/error.rs                  |   8 +
 src/uniffi/mod.rs                    |   7 +-
 src/uniffi/pipeline.rs               |  16 +-
 src/uniffi/pipeline_runner/docker.rs | 458 +++++++++++++++++++++++++++
 src/uniffi/pipeline_runner/mod.rs    |  55 ++++
 8 files changed, 565 insertions(+), 10 deletions(-)
 create mode 100644 src/uniffi/pipeline_runner/docker.rs
 create mode 100644 src/uniffi/pipeline_runner/mod.rs
diff --git a/.clippy.toml b/.clippy.toml
index 8987fce2..5821063e 100644
--- a/.clippy.toml
+++ b/.clippy.toml
@@ -1,3 +1,3 @@
-excessive-nesting-threshold = 4
+excessive-nesting-threshold = 5
 too-many-arguments-threshold = 10
 allowed-idents-below-min-chars = ["..", "k", "v", "f", "re", "id", "Ok", "'_"]
diff --git a/Cargo.toml b/Cargo.toml
index d9b2b6f0..7486a41c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -48,6 +48,7 @@ glob = "0.3.1"
 heck = "0.5.0"
 # hashmaps that preserve insertion order
 indexmap = { version = "2.9.0", features = ["serde"] }
+itertools = "0.14.0"
 layout-rs = "0.1.3"
 # random name generator
 names = "0.14.0"
@@ -66,6 +67,7 @@ sha2 = "0.10.8"
 snafu = { version = "0.8.5", features = ["futures"] }
 # a runtime for async applications
 tokio = { version = "1.41.0", features = ["full"] }
+tokio-stream = "0.1.17"
 # utilities for async calls
 tokio-util = "0.7.13"
 # automated CFFI + bindings in other languages
diff --git a/src/core/error.rs b/src/core/error.rs
index 710e22f8..a43fa08d 100644
--- a/src/core/error.rs
+++ b/src/core/error.rs
@@ -1,4 +1,7 @@
-use crate::uniffi::error::{Kind, OrcaError};
+use crate::uniffi::{
+    error::{Kind, OrcaError},
+    pipeline_runner::docker::Message,
+};
 use bollard::errors::Error as BollardError;
 use glob;
 use serde_json;
@@ -9,6 +12,7 @@ use std::{
     io,
     path::{self},
 };
+use tokio::{sync::broadcast::error::SendError, task::JoinError};
 
 impl From<BollardError> for OrcaError {
     fn from(error: BollardError) -> Self {
@@ -70,6 +74,26 @@ impl From<serde_yaml::Error> for OrcaError {
         }
     }
 }
+impl From<JoinError> for OrcaError {
+    fn from(error: JoinError) -> Self {
+        Self {
+            kind: Kind::IoError {
+                source: error.into(),
+                backtrace: Some(Backtrace::capture()),
+            },
+        }
+    }
+}
+impl From<SendError<Message>> for OrcaError {
+    fn from(error: SendError<Message>) -> Self {
+        Self {
+            kind: Kind::SendError {
+                source: error,
+                backtrace: Some(Backtrace::capture()),
+            },
+        }
+    }
+}
 impl From<Kind> for OrcaError {
     fn from(error: Kind) -> Self {
         Self { kind: error }
@@ -105,6 +129,7 @@ impl fmt::Debug for OrcaError {
             | Kind::GlobPatternError { backtrace, .. }
             | Kind::IoError { backtrace, .. }
             | Kind::PathPrefixError { backtrace, .. }
+            | Kind::SendError { backtrace, .. }
             | Kind::SerdeJsonError { backtrace, .. }
             | Kind::SerdeYamlError { backtrace, .. } => {
                 write!(f, "{}{}", self.kind, format_stack(backtrace.as_ref()))
diff --git a/src/uniffi/error.rs b/src/uniffi/error.rs
index 0ed5a4d6..661ec775 100644
--- a/src/uniffi/error.rs
+++ b/src/uniffi/error.rs
@@ -14,8 +14,11 @@ use std::{
     path::{self, PathBuf},
     result,
 };
+use tokio::sync::broadcast::error::SendError;
 use uniffi;
 
+use crate::uniffi::pipeline_runner::docker::Message;
+
 /// Shorthand for a Result that returns an `OrcaError`.
 pub type Result<T, E = OrcaError> = result::Result<T, E>;
 /// Possible errors you may encounter.
@@ -107,6 +110,11 @@ pub(crate) enum Kind {
         backtrace: Option<Backtrace>,
     },
     #[snafu(transparent)]
+    SendError {
+        source: SendError<Message>,
+        backtrace: Option<Backtrace>,
+    },
+    #[snafu(transparent)]
     SerdeJsonError {
         source: serde_json::Error,
         backtrace: Option<Backtrace>,
diff --git a/src/uniffi/mod.rs b/src/uniffi/mod.rs
index dadf9d5e..f0a20412 100644
--- a/src/uniffi/mod.rs
+++ b/src/uniffi/mod.rs
@@ -4,8 +4,9 @@ pub mod error;
 pub mod model;
 /// Interface into container orchestration engine.
 pub mod orchestrator;
-/// Data persistence provided by a store backend.
-pub mod store;
-
 /// Pipeline management and execution.
 pub mod pipeline;
+/// Pipeline runner interface.
+pub mod pipeline_runner;
+/// Data persistence provided by a store backend.
+pub mod store;
diff --git a/src/uniffi/pipeline.rs b/src/uniffi/pipeline.rs
index fdb1c316..97923ff0 100644
--- a/src/uniffi/pipeline.rs
+++ b/src/uniffi/pipeline.rs
@@ -6,7 +6,7 @@ use crate::{
     },
     uniffi::{
         error::{Kind, OrcaError, Result},
-        model::{Annotation, PathSet, Pod},
+        model::{Annotation, PathSet, Pod, URI},
     },
 };
 use derive_more::Display;
@@ -126,8 +126,8 @@ impl From<Mapper> for Kernel {
 #[uniffi::export(Display)]
 /// Struct to represent a node in the pipeline graph
 pub struct Node {
-    /// This is name for now till hashing feature get merged
-    pub name: String,
+    /// This is id for now till hashing feature get merged
+    pub id: String,
     /// Hash of the kernel to use in `kernel_lut`
     pub kernel_hash: String,
 }
@@ -136,7 +136,7 @@ impl Node {
     /// Creates a new `Node` instance and computes its hash based on the kernel hash and parent hashes.
     pub fn new(kernel_hash: &str, parent_hashes: Vec<&str>) -> Self {
         Self {
-            name: Self::compute_hash(kernel_hash, parent_hashes),
+            id: Self::compute_hash(kernel_hash, parent_hashes),
             kernel_hash: kernel_hash.to_owned(),
         }
     }
@@ -222,7 +222,7 @@ impl Pipeline {
             // Create the node, insert into graph and store the idx
             for node_name in node_names {
                 let node = Node {
-                    name: (*node_name).clone(),
+                    id: (*node_name).clone(),
                     kernel_hash: kernel.get_hash(),
                 };
                 let node_idx = graph.add_node(node);
@@ -349,6 +349,7 @@ pub struct PipelineJob {
     pub pipeline: Pipeline,
     /// Mapping of outside input to keys to be match with the pipeline `input_map`
     pub input_map: HashMap<String, PathSet>,
+    pub output_dir: URI,
     /// Annotation for the pipeline job
     pub annotation: Option<Annotation>,
 }
@@ -360,6 +361,7 @@ impl PipelineJob {
     pub fn new(
         pipeline: Pipeline,
         input_packet: HashMap<String, PathSet>,
+        output_dir: URI,
         annotation: Option<Annotation>,
     ) -> Result<Self> {
         // Check if input_map has all the requires keys
@@ -390,6 +392,10 @@ impl PipelineJob {
             pipeline,
             input_map: input_packet,
             annotation,
+            output_dir,
         })
     }
 }
+pub struct PipelineResult {
+    pub pipeline_job: PipelineJob,
+}
diff --git a/src/uniffi/pipeline_runner/docker.rs b/src/uniffi/pipeline_runner/docker.rs
new file mode 100644
index 00000000..fb760c49
--- /dev/null
+++ b/src/uniffi/pipeline_runner/docker.rs
@@ -0,0 +1,458 @@
+use super::PipelineRun;
+use crate::{
+    core::{
+        crypto::{hash_buffer, hash_stream},
+        model::serialize_hashmap,
+        util::get,
+    },
+    uniffi::{
+        error::{OrcaError, Result, selector},
+        model::{PathSet, Pod, PodJob, URI},
+        pipeline::{Kernel, Node, PipelineJob, PipelineResult},
+    },
+};
+use futures_util::stream::FuturesUnordered;
+use itertools::Itertools;
+use serde_yaml::Serializer;
+use snafu::OptionExt as _;
+use std::{
+    clone,
+    collections::HashMap,
+    mem,
+    path::{Path, PathBuf},
+    sync::Arc,
+};
+use tokio::{
+    sync::broadcast::{self, Receiver, Sender},
+    task::JoinSet,
+};
+use tokio_stream::StreamExt as _;
+
+#[derive(Clone, Debug)]
+pub(crate) enum Message {
+    NodeOutput(String, HashMap<String, PathSet>), // String is the parent_node_name, while HashMap is output of the parent node
+    Stop,                                         // Message to halt all operations
+}
+
+struct PipelineRunInfo {
+    node_task_join_set: JoinSet<Result<()>>, // Join set to track the tasks for this pipeline run
+    job_manager_ch_tx: Sender<Message>,
+    node_tx: HashMap<String, Sender<Message>>,
+    outputs: HashMap<String, HashMap<String, PathSet>>, // String is the node key, while hash
+    namespace_lookup: HashMap<String, String>,          // Namespace to operate as storage
+}
+
+/// Docker based pipeline runner meant to execute on a single machine
+#[derive(Default)]
+pub struct DockerPipelineRunner {
+    pipeline_runs: HashMap<PipelineRun, PipelineRunInfo>, // For each pipeline run, we have a join set to track the tasks and wait on them
+}
+
+impl DockerPipelineRunner {
+    /// Create a new Docker pipeline runner
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Start the `pipeline_job` returning `pipeline_run`un
+    ///
+    /// # Errors
+    /// Will error out if the pipeline job fails to start
+    pub fn start(
+        &mut self,
+        pipeline_job: PipelineJob,
+        namespace_lookup: HashMap<String, String>,
+    ) -> Result<PipelineRun> {
+        // Create a new pipeline run
+        let pipeline_run = PipelineRun { pipeline_job };
+        let pipeline_run_arc = Arc::new(pipeline_run.clone());
+
+        // Insert into the list of pipeline runs
+        self.pipeline_runs.insert(
+            (*pipeline_run_arc).clone(),
+            PipelineRunInfo {
+                job_manager_ch_tx: broadcast::channel::<Message>(1).0,
+                node_tx: HashMap::new(),
+                node_task_join_set: JoinSet::new(),
+                outputs: HashMap::new(),
+                namespace_lookup,
+            },
+        );
+
+        // Create the source channel for the pipeline
+        // This channel will be used to send inputs to the pipeline
+        let (source_tx, _) = broadcast::channel::<Message>(1);
+
+        // Get reference to the pipeline
+        let pipeline = &pipeline_run_arc.pipeline_job.pipeline;
+
+        // Get all the leaf nodes and call the create_task_for_node function for each leaf node
+        // This will recursively create all the tasks and channels for the pipeline
+        pipeline.get_leaf_nodes().try_for_each(|node| {
+            self.create_task_for_node(node, &pipeline_run_arc, &source_tx)?;
+            Ok::<(), OrcaError>(())
+        })?;
+
+        for node_key in pipeline.get_leaf_nodes() {
+            self.create_task_for_node(node_key, &pipeline_run_arc, &source_tx)?;
+        }
+
+        // Create a task to handle outputs of output nodes in pipeline
+        // for node_key in pipeline.output_nodes {}
+
+        Ok(pipeline_run)
+    }
+
+    /// Given a pipeline run, wait for all its tasks to complete and return the `PipelineResult`
+    ///
+    /// # Errors
+    /// Will error out if any of the pipeline tasks failed to join
+    pub async fn get_result(&mut self, pipeline_run: &PipelineRun) -> Result<PipelineResult> {
+        // Call join on the join set for the pipeline run
+        let pipeline_run_info =
+            self.pipeline_runs
+                .get_mut(pipeline_run)
+                .context(selector::KeyMissing {
+                    key: pipeline_run.to_string(),
+                })?;
+
+        // Wait for all the tasks to complete
+        while let Some(result) = pipeline_run_info.node_task_join_set.join_next().await {
+            match result {
+                Ok(Ok(())) => {} // Task completed successfully
+                Ok(Err(err)) => {
+                    eprintln!("Task failed: {err}");
+                    return Err(err);
+                }
+                Err(err) => {
+                    eprintln!("Join set error: {err}");
+                    return Err(err.into());
+                }
+            }
+        }
+
+        Ok(PipelineResult {
+            pipeline_job: pipeline_run.pipeline_job.clone(),
+        })
+    }
+
+    fn create_task_for_node(
+        &mut self,
+        node: &Node,
+        pipeline_run: &Arc<PipelineRun>,
+        source_tx: &Sender<Message>,
+    ) -> Result<Sender<Message>> {
+        // Get the input channels for this node which should be it's parents
+        let mut input_ch_rxs = pipeline_run
+            .pipeline_job
+            .pipeline
+            .get_parents_for_node(node)
+            .map(|parent_node| {
+                // Check if it exists in the pipeline_runs hashmap
+                match get(&self.pipeline_runs, pipeline_run)?
+                    .node_tx
+                    .get(&parent_node.id)
+                {
+                    Some(rx) => Ok(rx.subscribe()),
+                    None => {
+                        // Missing parent node, thus call create_task for the parent node parent node first
+                        Ok(self
+                            .create_task_for_node(parent_node, pipeline_run, source_tx)?
+                            .subscribe())
+                    }
+                }
+            })
+            .collect::<Result<Vec<_>>>()?;
+
+        // Check if input_ch_rxs is empty, meaning this node has no parents and is a root node
+        // In this case, we will use the source channel as the input channel
+        // TODO: This will be replaced by input_node logic once that is merged
+        if input_ch_rxs.is_empty() {
+            // No parents, thus this is root node
+            // The parent rx will be the source channel rx
+            input_ch_rxs.push(source_tx.subscribe());
+        }
+
+        // Get the job manager ch and subscribe to it (mainly for receiving shutdown signal)
+        let job_manager_ch_rx = get(&self.pipeline_runs, pipeline_run)?
+            .job_manager_ch_tx
+            .subscribe();
+
+        // Create the output_channel for this node
+        let (tx, _) = broadcast::channel::<Message>(128);
+
+        // Spawn the node_manager for this node
+        self.pipeline_runs
+            .get_mut(pipeline_run)
+            .context(selector::KeyMissing {
+                key: pipeline_run.to_string(),
+            })?
+            .node_task_join_set
+            .spawn(Self::start_node_manager(
+                node.clone(),
+                Arc::clone(pipeline_run),
+                input_ch_rxs,
+                job_manager_ch_rx,
+                tx.clone(),
+            ));
+
+        // Insert it into the the tx into the pipeline_runs hashmap
+        self.pipeline_runs
+            .get_mut(pipeline_run)
+            .context(selector::KeyMissing {
+                key: pipeline_run.to_string(),
+            })?
+            .node_tx
+            .insert(node.id.clone(), tx.clone());
+
+        // Return tx
+        Ok(tx)
+    }
+
+    /// For tx: Sender<Message>, we only want to send successfully completed results to the next node
+    async fn start_node_manager(
+        node: Node,
+        pipeline_run: Arc<PipelineRun>,
+        parent_channel_rxs: Vec<Receiver<Message>>,
+        mut job_manager_channel: Receiver<Message>,
+        tx: Sender<Message>,
+        namespace_lookup: &HashMap<String, PathBuf>,
+    ) -> Result<()> {
+        // Create a futures unordered set to dynamically listen to N number of receivers
+        let mut futures = FuturesUnordered::new();
+
+        // Add all the parent channel receivers to the futures unordered set
+        for mut rx in parent_channel_rxs {
+            futures.push(tokio::spawn(async move { rx.recv().await }));
+        }
+
+        // Add the job manager channel to the futures unordered set
+        futures.push(tokio::spawn(
+            async move { job_manager_channel.recv().await },
+        ));
+
+        // Get the kernel for this node
+        let kernel = get(
+            &pipeline_run.pipeline_job.pipeline.kernel_lut,
+            &node.kernel_hash,
+        )?;
+
+        // Set up a join_set to track the tasks ()
+        let mut task_join_set = JoinSet::new();
+
+        // Listen to the MPSC channel and handle messages
+        while let Some(result) = futures.next().await {
+            let rx_result = match result {
+                Ok(rx_result) => rx_result,
+                Err(err) => {
+                    // Record into pipeilne_error log
+                    if err.is_panic() {
+                        eprintln!("Task panicked: {err}");
+                    } else {
+                        eprintln!("Error receiving message: {err}");
+                    }
+                    continue;
+                }
+            };
+
+            let Ok(msg) = rx_result else {
+                eprintln!("Failed to receive message from parent channel");
+                continue;
+            };
+
+            match msg {
+                Message::NodeOutput(_, input_packet) => {
+                    // Inputs from parents are ready, thus we need to process them if they are already computed and cached
+                    // NOTE: Cache is TODO
+                    match kernel {
+                        Kernel::Pod(pod) => {
+                            Self::process_packet_pod(
+                                &node,
+                                pod.clone(),
+                                tx.clone(),
+                                tx.clone(),
+                                input_packet,
+                                Arc::clone(&pipeline_run),
+                                namespace_lookup,
+                            )?;
+                        }
+                        Kernel::Mapper(mapper) => {
+                            // For mapper, we just apply it directly
+                            let output_map = mapper
+                                .mapping
+                                .iter()
+                                .map(|(input_key, output_key)| {
+                                    let input = get(&input_packet, input_key)?.clone();
+                                    Ok((output_key.to_owned(), input))
+                                })
+                                .collect::<Result<HashMap<_, _>>>()?;
+
+                            // Send the output via the channel
+                            tx.send(Message::NodeOutput(node_key.clone(), output_map))?;
+                        }
+                        Kernel::Joiner(joiner) => todo!(),
+                    }
+                }
+                Message::Stop => {
+                    // Stop all pod_job tasks abruptly
+                    task_join_set.shutdown().await;
+                    break;
+                }
+            }
+        }
+
+        Ok(())
+    }
+
+    fn process_packet_pod(
+        node: &Node,
+        pod: Arc<Pod>,
+        success_ch_tx: Sender<Message>,
+        failure_ch_tx: Sender<Message>,
+        input_packet: HashMap<String, PathSet>,
+        pipeline_run: Arc<PipelineRun>,
+        namespace_lookup: &HashMap<String, PathBuf>,
+    ) -> Result<()> {
+        // Output directory is pod_runs/pod_run_id/node_id/hash_of_input_packet
+
+        // Compute the hash of the input_packet
+        let mut buf = Vec::new();
+        let mut serializer = Serializer::new(&mut buf);
+        serialize_hashmap(&input_packet, &mut serializer)?;
+        let input_packet_hash = hash_buffer(buf);
+        let output_dir = URI {
+            namespace: pipeline_run.pipeline_job.output_dir.namespace.clone(),
+            path: PathBuf::from(format!("pod_runs/{}/{}", pod.hash, input_packet_hash)),
+        };
+
+        let cpu_limit = pod.recommended_cpus;
+        let memory_limit = pod.recommended_memory;
+
+        // Create the pod job
+        let pod_job = PodJob::new(
+            None,
+            pod,
+            input_packet.clone(),
+            output_dir,
+            cpu_limit,
+            memory_limit,
+            None,
+            namespace_lookup,
+        )?;
+
+        // Simulate pod execution by just printing out pod_job_hash and pod hash
+        // This will be replaced by sending the pod_job to the orchestrator via the agent
+        println!(
+            "Executing pod job: {} with pod hash: {}",
+            pod_job.hash, pod_job.pod.hash
+        );
+
+        // For now we will just send the input_packet to the success channel
+        success_ch_tx.send(Message::NodeOutput(node.id.clone(), input_packet.clone()))?;
+
+        Ok(())
+    }
+}
+
+trait ProcessPacket {
+    fn process_packet(
+        &mut self,
+        sender_node_id: String,
+        packet: HashMap<String, PathSet>,
+        success_ch_tx: Sender<Message>,
+        failure_ch_tx: Sender<Message>,
+    ) -> Result<()>;
+}
+
+struct PodNodeProcessor {}
+
+struct MapperProcessor {}
+
+struct JoinNodeProcessor {
+    /// Cache for all packets received by the node
+    input_packet_cache: HashMap<String, Vec<HashMap<String, PathSet>>>,
+}
+
+impl JoinNodeProcessor {
+    fn new(self, parents_node_id: Vec<String>) -> Self {
+        let input_packet_cache = parents_node_id
+            .into_iter()
+            .map(|id| (id, Vec::new()))
+            .collect();
+        Self { input_packet_cache }
+    }
+
+    fn compute_new_packet_combination(
+        &self,
+        sender_node_id: String,
+        new_packet: &HashMap<String, PathSet>,
+    ) -> Result<Vec<HashMap<String, PathSet>>> {
+        // Combine the new packet with the existing packets in the cache
+        // Get all the cached packets from other parents
+        let other_parent_ids = self
+            .input_packet_cache
+            .keys()
+            .filter(|key| *key != &sender_node_id);
+        let mut factors = other_parent_ids
+            .map(|id| get(&self.input_packet_cache, id))
+            .collect::<Result<Vec<_>>>()?;
+
+        // Add the new incoming packet as a factor
+        let incoming_packet = vec![new_packet.clone()];
+        factors.push(&incoming_packet);
+
+        let result = factors
+            .into_iter()
+            .multi_cartesian_product()
+            .map(|packets_to_combined| {
+                packets_to_combined
+                    .into_iter()
+                    .fold(HashMap::new(), |mut acc, packet| {
+                        acc.extend(packet.clone());
+                        acc
+                    })
+            })
+            .collect::<Vec<_>>();
+
+        Ok(result)
+    }
+}
+
+impl ProcessPacket for JoinNodeProcessor {
+    fn process_packet(
+        &mut self,
+        sender_node_id: String,
+        packet: HashMap<String, PathSet>,
+        success_ch_tx: Sender<Message>,
+        failure_ch_tx: Sender<Message>,
+    ) -> Result<()> {
+        match {
+            get(&self.input_packet_cache, &sender_node_id)?.push(packet);
+
+            // Compute the new packet combination based on the sender node id and the packet
+            let new_packets_to_send =
+                self.compute_new_packet_combination(sender_node_id, &packet)?;
+
+            Ok::<Vec<HashMap<String, PathSet>>, OrcaError>(new_packets_to_send)
+        } {
+            Ok(output_packets) => {
+                // Send the output packets to the success channel
+                for output_packet in output_packets {
+                    success_ch_tx
+                        .send(Message::NodeOutput(sender_node_id.clone(), output_packet))?;
+                }
+            }
+            Err(err) => {
+                // Send the error to the failure channel
+                failure_ch_tx.send(Message::NodeOutput(
+                    sender_node_id.clone(),
+                    HashMap::new(), // Empty packet on failure
+                ))?;
+                return Err(err);
+            }
+        }
+        // Add the new packet into the cache
+
+        Ok(())
+    }
+}
diff --git a/src/uniffi/pipeline_runner/mod.rs b/src/uniffi/pipeline_runner/mod.rs
new file mode 100644
index 00000000..67fa0663
--- /dev/null
+++ b/src/uniffi/pipeline_runner/mod.rs
@@ -0,0 +1,55 @@
+use crate::uniffi::error::Result;
+
+use super::pipeline::PipelineJob;
+use std::fmt;
+use std::hash::{Hash, Hasher};
+
+/// # Errors:
+/// Error out if fail to start the pipeline job
+pub trait PipelineRunner {
+    /// Starts the given pipeline job.
+    ///
+    /// # Errors
+    /// Returns an error if the pipeline job fails to start.
+    fn start(&self, pipeline_job: PipelineJob) -> Result<()>;
+}
+
+#[derive(Debug, Clone)]
+/// Struct to store the active pipeline run.
+pub struct PipelineRun {
+    pipeline_job: PipelineJob,
+}
+
+impl fmt::Display for PipelineRun {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(
+            f,
+            "PipelineRun {{ pipeline_job: {} }}",
+            self.pipeline_job.hash
+        )
+    }
+}
+
+impl PipelineRun {
+    /// New function to initialize the pipeline run
+    pub const fn new(pipeline_job: PipelineJob) -> Self {
+        Self { pipeline_job }
+    }
+}
+
+impl PartialEq for PipelineRun {
+    fn eq(&self, other: &Self) -> bool {
+        self.pipeline_job.hash == other.pipeline_job.hash
+    }
+}
+
+impl Eq for PipelineRun {}
+
+impl Hash for PipelineRun {
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        self.pipeline_job.hash.hash(state);
+    }
+}
+
+/// Docker pipeline runner
+pub mod docker;

From 6b7913d293ccce137708a7b3dc919d7b3fb7b2be Mon Sep 17 00:00:00 2001
From: Synicix <synicix@gmail.com>
Date: Wed, 9 Jul 2025 14:16:28 +0000
Subject: [PATCH 02/29] Save progress

---
 src/core/orchestrator/docker.rs      |   2 +-
 src/uniffi/pipeline.rs               |  16 +-
 src/uniffi/pipeline_runner/docker.rs | 240 ++++++++++++++++++---------
 3 files changed, 178 insertions(+), 80 deletions(-)

diff --git a/src/core/orchestrator/docker.rs b/src/core/orchestrator/docker.rs
index 999f01a5..b6d22f9d 100644
--- a/src/core/orchestrator/docker.rs
+++ b/src/core/orchestrator/docker.rs
@@ -38,7 +38,7 @@ impl LocalDockerOrchestrator {
     fn prepare_mount_binds(
         namespace_lookup: &HashMap<String, PathBuf>,
         pod_job: &PodJob,
-    ) -> Result<(Vec<String>, [String; 1])> {
+    ) -> Result<(Vec<String>)> {
         // all host mounted paths need to be absolute
         let host_output_directory = path::absolute(
             namespace_lookup[&pod_job.output_dir.namespace].join(&pod_job.output_dir.path),
diff --git a/src/uniffi/pipeline.rs b/src/uniffi/pipeline.rs
index 97923ff0..d90fac87 100644
--- a/src/uniffi/pipeline.rs
+++ b/src/uniffi/pipeline.rs
@@ -339,13 +339,16 @@ impl Pipeline {
     }
 }
 
-#[derive(uniffi::Object, Display, Debug, Clone)]
+#[derive(uniffi::Object, Display, Debug, Clone, Serialize)]
 #[display("{self:#?}")]
 #[uniffi::export(Display)]
 /// `PipelineJob` struct
 /// This struct is used to store the pipeline and the input map
 pub struct PipelineJob {
-    /// Pipeline struct
+    /// Used to unique identify the pipeline job
+    pub hash: String,
+    /// Pipeline struct (Note: Due to the removal of the hash system to be deferred, this has no guarantee of being unique)
+    #[serde(skip)]
     pub pipeline: Pipeline,
     /// Mapping of outside input to keys to be match with the pipeline `input_map`
     pub input_map: HashMap<String, PathSet>,
@@ -388,11 +391,18 @@ impl PipelineJob {
             });
         }
 
-        Ok(Self {
+        // Create the job without_hash
+        let no_hash = Self {
+            hash: String::new(),
             pipeline,
             input_map: input_packet,
             annotation,
             output_dir,
+        };
+
+        Ok(Self {
+            hash: hash_buffer(to_yaml(&no_hash)?),
+            ..no_hash
         })
     }
 }
diff --git a/src/uniffi/pipeline_runner/docker.rs b/src/uniffi/pipeline_runner/docker.rs
index fb760c49..7ebad3e7 100644
--- a/src/uniffi/pipeline_runner/docker.rs
+++ b/src/uniffi/pipeline_runner/docker.rs
@@ -1,27 +1,17 @@
 use super::PipelineRun;
 use crate::{
-    core::{
-        crypto::{hash_buffer, hash_stream},
-        model::serialize_hashmap,
-        util::get,
-    },
+    core::{crypto::hash_buffer, model::serialize_hashmap, util::get},
     uniffi::{
         error::{OrcaError, Result, selector},
         model::{PathSet, Pod, PodJob, URI},
-        pipeline::{Kernel, Node, PipelineJob, PipelineResult},
+        pipeline::{Kernel, Mapper, Node, PipelineJob, PipelineResult},
     },
 };
 use futures_util::stream::FuturesUnordered;
 use itertools::Itertools;
 use serde_yaml::Serializer;
 use snafu::OptionExt as _;
-use std::{
-    clone,
-    collections::HashMap,
-    mem,
-    path::{Path, PathBuf},
-    sync::Arc,
-};
+use std::{collections::HashMap, path::PathBuf, sync::Arc};
 use tokio::{
     sync::broadcast::{self, Receiver, Sender},
     task::JoinSet,
@@ -30,8 +20,9 @@ use tokio_stream::StreamExt as _;
 
 #[derive(Clone, Debug)]
 pub(crate) enum Message {
-    NodeOutput(String, HashMap<String, PathSet>), // String is the parent_node_name, while HashMap is output of the parent node
-    Stop,                                         // Message to halt all operations
+    NodeOutput(String, HashMap<String, PathSet>), // String is the parent_node_id, while HashMap is output of the parent node
+    ProcessingFailed(String, Arc<OrcaError>), // String is the node_id, while OrcaError is the error that occurred
+    Stop,                                     // Message to halt all operations
 }
 
 struct PipelineRunInfo {
@@ -39,7 +30,7 @@ struct PipelineRunInfo {
     job_manager_ch_tx: Sender<Message>,
     node_tx: HashMap<String, Sender<Message>>,
     outputs: HashMap<String, HashMap<String, PathSet>>, // String is the node key, while hash
-    namespace_lookup: HashMap<String, String>,          // Namespace to operate as storage
+    namespace_lookup: HashMap<String, PathBuf>,
 }
 
 /// Docker based pipeline runner meant to execute on a single machine
@@ -61,7 +52,7 @@ impl DockerPipelineRunner {
     pub fn start(
         &mut self,
         pipeline_job: PipelineJob,
-        namespace_lookup: HashMap<String, String>,
+        namespace_lookup: HashMap<String, PathBuf>,
     ) -> Result<PipelineRun> {
         // Create a new pipeline run
         let pipeline_run = PipelineRun { pipeline_job };
@@ -75,7 +66,7 @@ impl DockerPipelineRunner {
                 node_tx: HashMap::new(),
                 node_task_join_set: JoinSet::new(),
                 outputs: HashMap::new(),
-                namespace_lookup,
+                namespace_lookup: namespace_lookup.clone(),
             },
         );
 
@@ -89,14 +80,10 @@ impl DockerPipelineRunner {
         // Get all the leaf nodes and call the create_task_for_node function for each leaf node
         // This will recursively create all the tasks and channels for the pipeline
         pipeline.get_leaf_nodes().try_for_each(|node| {
-            self.create_task_for_node(node, &pipeline_run_arc, &source_tx)?;
+            self.create_task_for_node(node, &pipeline_run_arc, &source_tx, &namespace_lookup)?;
             Ok::<(), OrcaError>(())
         })?;
 
-        for node_key in pipeline.get_leaf_nodes() {
-            self.create_task_for_node(node_key, &pipeline_run_arc, &source_tx)?;
-        }
-
         // Create a task to handle outputs of output nodes in pipeline
         // for node_key in pipeline.output_nodes {}
 
@@ -141,6 +128,7 @@ impl DockerPipelineRunner {
         node: &Node,
         pipeline_run: &Arc<PipelineRun>,
         source_tx: &Sender<Message>,
+        namespace_lookup: &HashMap<String, PathBuf>,
     ) -> Result<Sender<Message>> {
         // Get the input channels for this node which should be it's parents
         let mut input_ch_rxs = pipeline_run
@@ -157,7 +145,12 @@ impl DockerPipelineRunner {
                     None => {
                         // Missing parent node, thus call create_task for the parent node parent node first
                         Ok(self
-                            .create_task_for_node(parent_node, pipeline_run, source_tx)?
+                            .create_task_for_node(
+                                parent_node,
+                                pipeline_run,
+                                source_tx,
+                                namespace_lookup,
+                            )?
                             .subscribe())
                     }
                 }
@@ -194,6 +187,7 @@ impl DockerPipelineRunner {
                 input_ch_rxs,
                 job_manager_ch_rx,
                 tx.clone(),
+                namespace_lookup.clone(),
             ));
 
         // Insert it into the the tx into the pipeline_runs hashmap
@@ -216,7 +210,7 @@ impl DockerPipelineRunner {
         parent_channel_rxs: Vec<Receiver<Message>>,
         mut job_manager_channel: Receiver<Message>,
         tx: Sender<Message>,
-        namespace_lookup: &HashMap<String, PathBuf>,
+        namespace_lookup: HashMap<String, PathBuf>,
     ) -> Result<()> {
         // Create a futures unordered set to dynamically listen to N number of receivers
         let mut futures = FuturesUnordered::new();
@@ -231,14 +225,37 @@ impl DockerPipelineRunner {
             async move { job_manager_channel.recv().await },
         ));
 
-        // Get the kernel for this node
-        let kernel = get(
+        // Get the kernel for this node and build the correct processor
+        let mut processor: Box<dyn NodeProcessor> = match get(
             &pipeline_run.pipeline_job.pipeline.kernel_lut,
             &node.kernel_hash,
-        )?;
-
-        // Set up a join_set to track the tasks ()
-        let mut task_join_set = JoinSet::new();
+        )? {
+            Kernel::Pod(pod) => {
+                // Create a processor for the pod node
+                Box::new(PodNodeProcessor::new(
+                    pod.clone(),
+                    pipeline_run.pipeline_job.output_dir.namespace.clone(),
+                    namespace_lookup.clone(),
+                ))
+            }
+            Kernel::Mapper(mapper) => {
+                // Create a processor for the mapper node
+                Box::new(MapperProcessor {
+                    mapper: mapper.clone(),
+                })
+            }
+            Kernel::Joiner => {
+                // Get the parents of the join node
+                let parent_nodes_id = pipeline_run
+                    .pipeline_job
+                    .pipeline
+                    .get_parents_for_node(&node)
+                    .map(|parent_node| parent_node.id.clone())
+                    .collect::<Vec<_>>();
+                // Create a processor for the join node
+                Box::new(JoinerNodeProcessor::new(parent_nodes_id))
+            }
+        };
 
         // Listen to the MPSC channel and handle messages
         while let Some(result) = futures.next().await {
@@ -261,43 +278,13 @@ impl DockerPipelineRunner {
             };
 
             match msg {
-                Message::NodeOutput(_, input_packet) => {
+                Message::NodeOutput(sender_node_id, input_packet) => {
                     // Inputs from parents are ready, thus we need to process them if they are already computed and cached
-                    // NOTE: Cache is TODO
-                    match kernel {
-                        Kernel::Pod(pod) => {
-                            Self::process_packet_pod(
-                                &node,
-                                pod.clone(),
-                                tx.clone(),
-                                tx.clone(),
-                                input_packet,
-                                Arc::clone(&pipeline_run),
-                                namespace_lookup,
-                            )?;
-                        }
-                        Kernel::Mapper(mapper) => {
-                            // For mapper, we just apply it directly
-                            let output_map = mapper
-                                .mapping
-                                .iter()
-                                .map(|(input_key, output_key)| {
-                                    let input = get(&input_packet, input_key)?.clone();
-                                    Ok((output_key.to_owned(), input))
-                                })
-                                .collect::<Result<HashMap<_, _>>>()?;
-
-                            // Send the output via the channel
-                            tx.send(Message::NodeOutput(node_key.clone(), output_map))?;
-                        }
-                        Kernel::Joiner(joiner) => todo!(),
-                    }
                 }
                 Message::Stop => {
-                    // Stop all pod_job tasks abruptly
-                    task_join_set.shutdown().await;
                     break;
                 }
+                Message::ProcessingFailed(_, orca_error) => todo!(),
             }
         }
 
@@ -354,27 +341,121 @@ impl DockerPipelineRunner {
     }
 }
 
-trait ProcessPacket {
+trait NodeProcessor: Send {
     fn process_packet(
         &mut self,
         sender_node_id: String,
+        current_node_id: String,
         packet: HashMap<String, PathSet>,
         success_ch_tx: Sender<Message>,
         failure_ch_tx: Sender<Message>,
     ) -> Result<()>;
 }
 
-struct PodNodeProcessor {}
+struct PodNodeProcessor {
+    pod: Arc<Pod>,
+    namespace: String,
+    namespace_lookup: HashMap<String, PathBuf>, // Copy of the look up table
+    processing_tasks: JoinSet<()>,
+}
+
+impl PodNodeProcessor {
+    fn new(pod: Arc<Pod>, namespace: String, namespace_lookup: HashMap<String, PathBuf>) -> Self {
+        Self {
+            pod,
+            namespace,
+            namespace_lookup,
+            processing_tasks: JoinSet::new(),
+        }
+    }
+}
+
+impl NodeProcessor for PodNodeProcessor {
+    fn process_packet(
+        &mut self,
+        sender_node_id: String,
+        current_node_id: String,
+        packet: HashMap<String, PathSet>,
+        success_ch_tx: Sender<Message>,
+        failure_ch_tx: Sender<Message>,
+    ) -> Result<()> {
+        // Process the packet using the pod
 
-struct MapperProcessor {}
+        // Create the pod_job
+        let mut buf = Vec::new();
+        let mut serializer = Serializer::new(&mut buf);
+        serialize_hashmap(&packet, &mut serializer)?;
+        let input_packet_hash = hash_buffer(buf);
+        let output_dir = URI {
+            namespace: self.namespace.clone(),
+            path: PathBuf::from(format!("pod_runs/{}/{}", self.pod.hash, input_packet_hash)),
+        };
+
+        let cpu_limit = self.pod.recommended_cpus;
+        let memory_limit = self.pod.recommended_memory;
 
-struct JoinNodeProcessor {
+        // Create the pod job
+        let pod_job = PodJob::new(
+            None,
+            self.pod.clone(),
+            packet.clone(),
+            output_dir,
+            cpu_limit,
+            memory_limit,
+            None,
+            &self.namespace_lookup,
+        )?;
+
+        // Simulate pod execution by just printing out pod_job_hash and pod hash
+        // This will be replaced by sending the pod_job to the orchestrator via the agent
+        println!(
+            "Executing pod job: {} with pod hash: {}",
+            pod_job.hash, pod_job.pod.hash
+        );
+
+        // For now we will just send the input_packet to the success channel
+        success_ch_tx.send(Message::NodeOutput(current_node_id, packet.clone()))?;
+
+        Ok(())
+    }
+}
+struct MapperProcessor {
+    mapper: Arc<Mapper>,
+}
+
+impl NodeProcessor for MapperProcessor {
+    fn process_packet(
+        &mut self,
+        sender_node_id: String,
+        current_node_id: String,
+        packet: HashMap<String, PathSet>,
+        success_ch_tx: Sender<Message>,
+        _failure_ch_tx: Sender<Message>,
+    ) -> Result<()> {
+        // Apply the mapping to the input packet
+        let output_map = self
+            .mapper
+            .mapping
+            .iter()
+            .map(|(input_key, output_key)| {
+                let input = get(&packet, input_key)?.clone();
+                Ok((output_key.to_owned(), input))
+            })
+            .collect::<Result<HashMap<_, _>>>()?;
+
+        // Send the output via the channel
+        success_ch_tx.send(Message::NodeOutput(sender_node_id, output_map))?;
+        Ok(())
+    }
+}
+
+struct JoinerNodeProcessor {
     /// Cache for all packets received by the node
     input_packet_cache: HashMap<String, Vec<HashMap<String, PathSet>>>,
 }
 
-impl JoinNodeProcessor {
-    fn new(self, parents_node_id: Vec<String>) -> Self {
+impl JoinerNodeProcessor {
+    fn new(parents_node_id: Vec<String>) -> Self {
         let input_packet_cache = parents_node_id
             .into_iter()
             .map(|id| (id, Vec::new()))
@@ -384,7 +465,7 @@ impl JoinNodeProcessor {
 
     fn compute_new_packet_combination(
         &self,
-        sender_node_id: String,
+        sender_node_id: &str,
         new_packet: &HashMap<String, PathSet>,
     ) -> Result<Vec<HashMap<String, PathSet>>> {
         // Combine the new packet with the existing packets in the cache
@@ -392,7 +473,7 @@ impl JoinNodeProcessor {
         let other_parent_ids = self
             .input_packet_cache
             .keys()
-            .filter(|key| *key != &sender_node_id);
+            .filter(|key| *key != sender_node_id);
         let mut factors = other_parent_ids
             .map(|id| get(&self.input_packet_cache, id))
             .collect::<Result<Vec<_>>>()?;
@@ -418,20 +499,27 @@ impl JoinNodeProcessor {
     }
 }
 
-impl ProcessPacket for JoinNodeProcessor {
+impl NodeProcessor for JoinerNodeProcessor {
     fn process_packet(
         &mut self,
         sender_node_id: String,
+        current_node_id: String,
         packet: HashMap<String, PathSet>,
         success_ch_tx: Sender<Message>,
         failure_ch_tx: Sender<Message>,
     ) -> Result<()> {
         match {
-            get(&self.input_packet_cache, &sender_node_id)?.push(packet);
-
             // Compute the new packet combination based on the sender node id and the packet
             let new_packets_to_send =
-                self.compute_new_packet_combination(sender_node_id, &packet)?;
+                self.compute_new_packet_combination(&sender_node_id, &packet)?;
+
+            // Record the packet into the cache
+            self.input_packet_cache
+                .get_mut(&sender_node_id)
+                .context(selector::KeyMissing {
+                    key: sender_node_id.clone(),
+                })?
+                .push(packet);
 
             Ok::<Vec<HashMap<String, PathSet>>, OrcaError>(new_packets_to_send)
         } {
@@ -439,7 +527,7 @@ impl ProcessPacket for JoinNodeProcessor {
                 // Send the output packets to the success channel
                 for output_packet in output_packets {
                     success_ch_tx
-                        .send(Message::NodeOutput(sender_node_id.clone(), output_packet))?;
+                        .send(Message::NodeOutput(current_node_id.clone(), output_packet))?;
                 }
             }
             Err(err) => {

From 5cd16d2ebf7510e6613de69b9c076de275c5ba86 Mon Sep 17 00:00:00 2001
From: Synicix <synicix@gmail.com>
Date: Wed, 9 Jul 2025 14:28:36 +0000
Subject: [PATCH 03/29] Save progress

---
 cspell.json                          |  3 ++-
 src/core/error.rs                    |  2 +-
 src/core/orchestrator/docker.rs      |  2 +-
 src/uniffi/error.rs                  | 13 +++++------
 src/uniffi/pipeline_runner/docker.rs | 32 +++++++++++++++-------------
 tests/fixture/mod.rs                 |  4 ++++
 tests/pipeline.rs                    |  4 ++++
 7 files changed, 34 insertions(+), 26 deletions(-)

diff --git a/cspell.json b/cspell.json
index c32f7e17..e3315aad 100644
--- a/cspell.json
+++ b/cspell.json
@@ -75,7 +75,8 @@
         "getrandom",
         "wasi",
         "petgraph",
-        "rfind"
+        "rfind",
+        "itertools"
     ],
     "useGitignore": false,
     "ignorePaths": [
diff --git a/src/core/error.rs b/src/core/error.rs
index a43fa08d..af0eb7aa 100644
--- a/src/core/error.rs
+++ b/src/core/error.rs
@@ -88,7 +88,7 @@ impl From<SendError<Message>> for OrcaError {
     fn from(error: SendError<Message>) -> Self {
         Self {
             kind: Kind::SendError {
-                source: error,
+                reason: error.to_string(),
                 backtrace: Some(Backtrace::capture()),
             },
         }
diff --git a/src/core/orchestrator/docker.rs b/src/core/orchestrator/docker.rs
index b6d22f9d..999f01a5 100644
--- a/src/core/orchestrator/docker.rs
+++ b/src/core/orchestrator/docker.rs
@@ -38,7 +38,7 @@ impl LocalDockerOrchestrator {
     fn prepare_mount_binds(
         namespace_lookup: &HashMap<String, PathBuf>,
         pod_job: &PodJob,
-    ) -> Result<(Vec<String>)> {
+    ) -> Result<(Vec<String>, [String; 1])> {
         // all host mounted paths need to be absolute
         let host_output_directory = path::absolute(
             namespace_lookup[&pod_job.output_dir.namespace].join(&pod_job.output_dir.path),
diff --git a/src/uniffi/error.rs b/src/uniffi/error.rs
index 661ec775..85eb7103 100644
--- a/src/uniffi/error.rs
+++ b/src/uniffi/error.rs
@@ -14,11 +14,8 @@ use std::{
     path::{self, PathBuf},
     result,
 };
-use tokio::sync::broadcast::error::SendError;
 use uniffi;
 
-use crate::uniffi::pipeline_runner::docker::Message;
-
 /// Shorthand for a Result that returns an `OrcaError`.
 pub type Result<T, E = OrcaError> = result::Result<T, E>;
 /// Possible errors you may encounter.
@@ -89,6 +86,11 @@ pub(crate) enum Kind {
         missing_keys: Vec<String>,
         backtrace: Option<Backtrace>,
     },
+    #[snafu(display("Failed to send message because: {reason}"))]
+    SendError {
+        reason: String,
+        backtrace: Option<Backtrace>,
+    },
     #[snafu(transparent)]
     BollardError {
         source: BollardError,
@@ -110,11 +112,6 @@ pub(crate) enum Kind {
         backtrace: Option<Backtrace>,
     },
     #[snafu(transparent)]
-    SendError {
-        source: SendError<Message>,
-        backtrace: Option<Backtrace>,
-    },
-    #[snafu(transparent)]
     SerdeJsonError {
         source: serde_json::Error,
         backtrace: Option<Backtrace>,
diff --git a/src/uniffi/pipeline_runner/docker.rs b/src/uniffi/pipeline_runner/docker.rs
index 7ebad3e7..a8c6782b 100644
--- a/src/uniffi/pipeline_runner/docker.rs
+++ b/src/uniffi/pipeline_runner/docker.rs
@@ -8,7 +8,7 @@ use crate::{
     },
 };
 use futures_util::stream::FuturesUnordered;
-use itertools::Itertools;
+use itertools::Itertools as _;
 use serde_yaml::Serializer;
 use snafu::OptionExt as _;
 use std::{collections::HashMap, path::PathBuf, sync::Arc};
@@ -52,7 +52,7 @@ impl DockerPipelineRunner {
     pub fn start(
         &mut self,
         pipeline_job: PipelineJob,
-        namespace_lookup: HashMap<String, PathBuf>,
+        namespace_lookup: &HashMap<String, PathBuf>,
     ) -> Result<PipelineRun> {
         // Create a new pipeline run
         let pipeline_run = PipelineRun { pipeline_job };
@@ -80,7 +80,7 @@ impl DockerPipelineRunner {
         // Get all the leaf nodes and call the create_task_for_node function for each leaf node
         // This will recursively create all the tasks and channels for the pipeline
         pipeline.get_leaf_nodes().try_for_each(|node| {
-            self.create_task_for_node(node, &pipeline_run_arc, &source_tx, &namespace_lookup)?;
+            self.create_task_for_node(node, &pipeline_run_arc, &source_tx, namespace_lookup)?;
             Ok::<(), OrcaError>(())
         })?;
 
@@ -209,7 +209,7 @@ impl DockerPipelineRunner {
         pipeline_run: Arc<PipelineRun>,
         parent_channel_rxs: Vec<Receiver<Message>>,
         mut job_manager_channel: Receiver<Message>,
-        tx: Sender<Message>,
+        success_ch_tx: Sender<Message>,
         namespace_lookup: HashMap<String, PathBuf>,
     ) -> Result<()> {
         // Create a futures unordered set to dynamically listen to N number of receivers
@@ -233,7 +233,7 @@ impl DockerPipelineRunner {
             Kernel::Pod(pod) => {
                 // Create a processor for the pod node
                 Box::new(PodNodeProcessor::new(
-                    pod.clone(),
+                    Arc::clone(pod),
                     pipeline_run.pipeline_job.output_dir.namespace.clone(),
                     namespace_lookup.clone(),
                 ))
@@ -241,7 +241,7 @@ impl DockerPipelineRunner {
             Kernel::Mapper(mapper) => {
                 // Create a processor for the mapper node
                 Box::new(MapperProcessor {
-                    mapper: mapper.clone(),
+                    mapper: Arc::clone(mapper),
                 })
             }
             Kernel::Joiner => {
@@ -262,7 +262,7 @@ impl DockerPipelineRunner {
             let rx_result = match result {
                 Ok(rx_result) => rx_result,
                 Err(err) => {
-                    // Record into pipeilne_error log
+                    // Record into pipeline_error log
                     if err.is_panic() {
                         eprintln!("Task panicked: {err}");
                     } else {
@@ -294,10 +294,10 @@ impl DockerPipelineRunner {
     fn process_packet_pod(
         node: &Node,
         pod: Arc<Pod>,
-        success_ch_tx: Sender<Message>,
-        failure_ch_tx: Sender<Message>,
-        input_packet: HashMap<String, PathSet>,
-        pipeline_run: Arc<PipelineRun>,
+        success_ch_tx: &Sender<Message>,
+        failure_ch_tx: &Sender<Message>,
+        input_packet: &HashMap<String, PathSet>,
+        pipeline_run: &Arc<PipelineRun>,
         namespace_lookup: &HashMap<String, PathBuf>,
     ) -> Result<()> {
         // Output directory is pod_runs/pod_run_id/node_id/hash_of_input_packet
@@ -305,7 +305,7 @@ impl DockerPipelineRunner {
         // Compute the hash of the input_packet
         let mut buf = Vec::new();
         let mut serializer = Serializer::new(&mut buf);
-        serialize_hashmap(&input_packet, &mut serializer)?;
+        serialize_hashmap(input_packet, &mut serializer)?;
         let input_packet_hash = hash_buffer(buf);
         let output_dir = URI {
             namespace: pipeline_run.pipeline_job.output_dir.namespace.clone(),
@@ -397,7 +397,7 @@ impl NodeProcessor for PodNodeProcessor {
         // Create the pod job
         let pod_job = PodJob::new(
             None,
-            self.pod.clone(),
+            Arc::clone(&self.pod),
             packet.clone(),
             output_dir,
             cpu_limit,
@@ -508,7 +508,7 @@ impl NodeProcessor for JoinerNodeProcessor {
         success_ch_tx: Sender<Message>,
         failure_ch_tx: Sender<Message>,
     ) -> Result<()> {
-        match {
+        let process_result = {
             // Compute the new packet combination based on the sender node id and the packet
             let new_packets_to_send =
                 self.compute_new_packet_combination(&sender_node_id, &packet)?;
@@ -522,7 +522,9 @@ impl NodeProcessor for JoinerNodeProcessor {
                 .push(packet);
 
             Ok::<Vec<HashMap<String, PathSet>>, OrcaError>(new_packets_to_send)
-        } {
+        };
+
+        match process_result {
             Ok(output_packets) => {
                 // Send the output packets to the success channel
                 for output_packet in output_packets {
diff --git a/tests/fixture/mod.rs b/tests/fixture/mod.rs
index f3dc49ea..c787dc93 100644
--- a/tests/fixture/mod.rs
+++ b/tests/fixture/mod.rs
@@ -292,6 +292,10 @@ pub fn pipeline_job() -> Result<PipelineJob> {
                 ..Default::default()
             }),
         )]),
+        URI {
+            namespace: "default".to_owned(),
+            path: PathBuf::from("output"),
+        },
         Some(Annotation {
             name: "Example Pipeline Job".to_owned(),
             description: "This is an example pipeline job.".to_owned(),
diff --git a/tests/pipeline.rs b/tests/pipeline.rs
index 25e03296..71a0f069 100644
--- a/tests/pipeline.rs
+++ b/tests/pipeline.rs
@@ -117,6 +117,10 @@ fn incorrect_input_packet() -> Result<()> {
                     ..Default::default()
                 }),
             )]),
+            URI {
+                namespace: "default".to_owned(),
+                path: PathBuf::from("output"),
+            },
             None
         )
         .is_err()

From 81b8a49f8f2a9ef06cafab9d2d1455b5aa58098e Mon Sep 17 00:00:00 2001
From: Synicix <synicix@gmail.com>
Date: Wed, 9 Jul 2025 18:10:50 +0000
Subject: [PATCH 04/29] Save redesign

---
 cspell.json                          |   3 +-
 src/uniffi/pipeline_runner/docker.rs | 522 ++++++++++++++-------------
 tests/pipeline_runner.rs             |  33 ++
 3 files changed, 304 insertions(+), 254 deletions(-)
 create mode 100644 tests/pipeline_runner.rs

diff --git a/cspell.json b/cspell.json
index e3315aad..6f799bdc 100644
--- a/cspell.json
+++ b/cspell.json
@@ -76,7 +76,8 @@
         "wasi",
         "petgraph",
         "rfind",
-        "itertools"
+        "itertools",
+        "oneshot"
     ],
     "useGitignore": false,
     "ignorePaths": [
diff --git a/src/uniffi/pipeline_runner/docker.rs b/src/uniffi/pipeline_runner/docker.rs
index a8c6782b..523d5120 100644
--- a/src/uniffi/pipeline_runner/docker.rs
+++ b/src/uniffi/pipeline_runner/docker.rs
@@ -13,14 +13,20 @@ use serde_yaml::Serializer;
 use snafu::OptionExt as _;
 use std::{collections::HashMap, path::PathBuf, sync::Arc};
 use tokio::{
-    sync::broadcast::{self, Receiver, Sender},
-    task::JoinSet,
+    sync::{
+        broadcast::{self, Receiver, Sender, error::RecvError},
+        oneshot,
+    },
+    task::{JoinHandle, JoinSet},
 };
 use tokio_stream::StreamExt as _;
 
 #[derive(Clone, Debug)]
 pub(crate) enum Message {
-    NodeOutput(String, HashMap<String, PathSet>), // String is the parent_node_id, while HashMap is output of the parent node
+    /// String is the `parent_node_id`, while `HashMap` is output of the parent node
+    NodeOutput(String, HashMap<String, PathSet>),
+    /// String is the `node_id` that has completed processing
+    NodeProcessingComplete(String),
     ProcessingFailed(String, Arc<OrcaError>), // String is the node_id, while OrcaError is the error that occurred
     Stop,                                     // Message to halt all operations
 }
@@ -130,6 +136,7 @@ impl DockerPipelineRunner {
         source_tx: &Sender<Message>,
         namespace_lookup: &HashMap<String, PathBuf>,
     ) -> Result<Sender<Message>> {
+        println!("Creating task for node: {}", node.id);
         // Get the input channels for this node which should be it's parents
         let mut input_ch_rxs = pipeline_run
             .pipeline_job
@@ -212,53 +219,127 @@ impl DockerPipelineRunner {
         success_ch_tx: Sender<Message>,
         namespace_lookup: HashMap<String, PathBuf>,
     ) -> Result<()> {
+        // Create a channel to for waiting when the node processing is complete
+        let (node_complete_tx, node_complete_rx) = oneshot::channel::<()>();
+
         // Create a futures unordered set to dynamically listen to N number of receivers
-        let mut futures = FuturesUnordered::new();
+        let chs_to_listen_to = FuturesUnordered::new();
 
         // Add all the parent channel receivers to the futures unordered set
         for mut rx in parent_channel_rxs {
-            futures.push(tokio::spawn(async move { rx.recv().await }));
+            chs_to_listen_to.push(tokio::spawn(async move { rx.recv().await }));
         }
 
         // Add the job manager channel to the futures unordered set
-        futures.push(tokio::spawn(
+        chs_to_listen_to.push(tokio::spawn(
             async move { job_manager_channel.recv().await },
         ));
 
         // Get the kernel for this node and build the correct processor
-        let mut processor: Box<dyn NodeProcessor> = match get(
+        match get(
             &pipeline_run.pipeline_job.pipeline.kernel_lut,
             &node.kernel_hash,
         )? {
-            Kernel::Pod(pod) => {
-                // Create a processor for the pod node
-                Box::new(PodNodeProcessor::new(
-                    Arc::clone(pod),
-                    pipeline_run.pipeline_job.output_dir.namespace.clone(),
-                    namespace_lookup.clone(),
-                ))
-            }
+            Kernel::Pod(pod) => PodNodeProcessor::new(
+                Arc::clone(pod),
+                node.id.clone(),
+                chs_to_listen_to,
+                success_ch_tx,
+                pipeline_run.pipeline_job.output_dir.namespace.clone(),
+                namespace_lookup,
+                node_complete_tx,
+            ),
             Kernel::Mapper(mapper) => {
-                // Create a processor for the mapper node
-                Box::new(MapperProcessor {
-                    mapper: Arc::clone(mapper),
-                })
+                todo!()
             }
             Kernel::Joiner => {
-                // Get the parents of the join node
-                let parent_nodes_id = pipeline_run
-                    .pipeline_job
-                    .pipeline
-                    .get_parents_for_node(&node)
-                    .map(|parent_node| parent_node.id.clone())
-                    .collect::<Vec<_>>();
-                // Create a processor for the join node
-                Box::new(JoinerNodeProcessor::new(parent_nodes_id))
+                todo!()
             }
         };
 
+        node_complete_rx.await;
+
+        // // Listen to the MPSC channel and handle messages
+        // while let Some(result) = chs_to_listen_to.next().await {
+        //     let rx_result = match result {
+        //         Ok(rx_result) => rx_result,
+        //         Err(err) => {
+        //             // Record into pipeline_error log
+        //             if err.is_panic() {
+        //                 eprintln!("Task panicked: {err}");
+        //             } else {
+        //                 eprintln!("Error receiving message: {err}");
+        //             }
+        //             continue;
+        //         }
+        //     };
+
+        //     let Ok(msg) = rx_result else {
+        //         eprintln!("Failed to receive message from parent channel");
+        //         continue;
+        //     };
+
+        //     match msg {
+        //         Message::NodeOutput(sender_node_id, packet) => {
+        //             // Inputs from parents are ready, thus we need to process them if they are already computed and cached
+        //             processor.process_packet(
+        //                 &sender_node_id,
+        //                 &node.id,
+        //                 packet,
+        //                 success_ch_tx.clone(),
+        //                 failure_ch_tx.clone(),
+        //             )?;
+        //         }
+        //         Message::Stop => {
+        //             todo!()
+        //         }
+        //         Message::ProcessingFailed(_, orca_error) => todo!(),
+        //         Message::NodeProcessingComplete(node_id) => ,
+        //     }
+        // }
+
+        Ok(())
+    }
+}
+
+struct PodNodeProcessor {
+    pod: Arc<Pod>,
+    node_id: String,
+    ch_to_listen_to: FuturesUnordered<JoinHandle<Result<Message, RecvError>>>,
+    success_ch_tx: Sender<Message>, // Channel to send successful outputs to the next node
+    namespace: String,
+    namespace_lookup: HashMap<String, PathBuf>, // Copy of the look up table
+    node_complete_tx: oneshot::Sender<()>,
+    processing_tasks: JoinSet<Result<(), OrcaError>>,
+}
+
+impl PodNodeProcessor {
+    fn new(
+        pod: Arc<Pod>,
+        node_id: String,
+        ch_to_listen_to: FuturesUnordered<JoinHandle<Result<Message, RecvError>>>,
+        success_ch_tx: Sender<Message>,
+        namespace: String,
+        namespace_lookup: HashMap<String, PathBuf>,
+        node_complete_tx: oneshot::Sender<()>,
+    ) -> Self {
+        Self {
+            pod,
+            node_id,
+            ch_to_listen_to,
+            success_ch_tx,
+            namespace,
+            namespace_lookup,
+            node_complete_tx,
+            processing_tasks: JoinSet::new(),
+        }
+    }
+
+    async fn start(&mut self) {
+        // Start to listen to the channels
         // Listen to the MPSC channel and handle messages
-        while let Some(result) = futures.next().await {
+
+        while let Some(result) = self.ch_to_listen_to.next().await {
             let rx_result = match result {
                 Ok(rx_result) => rx_result,
                 Err(err) => {
@@ -278,37 +359,50 @@ impl DockerPipelineRunner {
             };
 
             match msg {
-                Message::NodeOutput(sender_node_id, input_packet) => {
-                    // Inputs from parents are ready, thus we need to process them if they are already computed and cached
+                Message::NodeOutput(sender_node_id, packet) => {
+                    let pod_ref = Arc::clone(&self.pod);
+                    let node_id = self.node_id.clone();
+                    let namespace = self.namespace.clone();
+                    let namespace_lookup = self.namespace_lookup.clone();
+                    let success_ch_tx = self.success_ch_tx.clone();
+                    // Forward it into a processing task
+                    self.processing_tasks.spawn(async move {
+                        Self::process_packet(
+                            &node_id,
+                            &pod_ref,
+                            &namespace,
+                            &namespace_lookup,
+                            &packet,
+                            &success_ch_tx,
+                        )
+                    });
                 }
                 Message::Stop => {
-                    break;
+                    todo!()
                 }
                 Message::ProcessingFailed(_, orca_error) => todo!(),
+                Message::NodeProcessingComplete(node_id) => todo!(),
             }
         }
-
-        Ok(())
     }
 
-    fn process_packet_pod(
-        node: &Node,
-        pod: Arc<Pod>,
-        success_ch_tx: &Sender<Message>,
-        failure_ch_tx: &Sender<Message>,
-        input_packet: &HashMap<String, PathSet>,
-        pipeline_run: &Arc<PipelineRun>,
+    fn process_packet(
+        node_id: &str,
+        pod: &Arc<Pod>,
+        namespace: &str,
         namespace_lookup: &HashMap<String, PathBuf>,
+        packet: &HashMap<String, PathSet>,
+        success_ch_tx: &Sender<Message>,
     ) -> Result<()> {
-        // Output directory is pod_runs/pod_run_id/node_id/hash_of_input_packet
+        // Process the packet using the pod
 
-        // Compute the hash of the input_packet
+        // Create the pod_job
         let mut buf = Vec::new();
         let mut serializer = Serializer::new(&mut buf);
-        serialize_hashmap(input_packet, &mut serializer)?;
+        serialize_hashmap(packet, &mut serializer)?;
         let input_packet_hash = hash_buffer(buf);
         let output_dir = URI {
-            namespace: pipeline_run.pipeline_job.output_dir.namespace.clone(),
+            namespace: namespace.to_owned(),
             path: PathBuf::from(format!("pod_runs/{}/{}", pod.hash, input_packet_hash)),
         };
 
@@ -318,92 +412,13 @@ impl DockerPipelineRunner {
         // Create the pod job
         let pod_job = PodJob::new(
             None,
-            pod,
-            input_packet.clone(),
-            output_dir,
-            cpu_limit,
-            memory_limit,
-            None,
-            namespace_lookup,
-        )?;
-
-        // Simulate pod execution by just printing out pod_job_hash and pod hash
-        // This will be replaced by sending the pod_job to the orchestrator via the agent
-        println!(
-            "Executing pod job: {} with pod hash: {}",
-            pod_job.hash, pod_job.pod.hash
-        );
-
-        // For now we will just send the input_packet to the success channel
-        success_ch_tx.send(Message::NodeOutput(node.id.clone(), input_packet.clone()))?;
-
-        Ok(())
-    }
-}
-
-trait NodeProcessor: Send {
-    fn process_packet(
-        &mut self,
-        sender_node_id: String,
-        current_node_id: String,
-        packet: HashMap<String, PathSet>,
-        success_ch_tx: Sender<Message>,
-        failure_ch_tx: Sender<Message>,
-    ) -> Result<()>;
-}
-
-struct PodNodeProcessor {
-    pod: Arc<Pod>,
-    namespace: String,
-    namespace_lookup: HashMap<String, PathBuf>, // Copy of the look up table
-    processing_tasks: JoinSet<()>,
-}
-
-impl PodNodeProcessor {
-    fn new(pod: Arc<Pod>, namespace: String, namespace_lookup: HashMap<String, PathBuf>) -> Self {
-        Self {
-            pod,
-            namespace,
-            namespace_lookup,
-            processing_tasks: JoinSet::new(),
-        }
-    }
-}
-
-impl NodeProcessor for PodNodeProcessor {
-    fn process_packet(
-        &mut self,
-        sender_node_id: String,
-        current_node_id: String,
-        packet: HashMap<String, PathSet>,
-        success_ch_tx: Sender<Message>,
-        failure_ch_tx: Sender<Message>,
-    ) -> Result<()> {
-        // Process the packet using the pod
-
-        // Create the pod_job
-        let mut buf = Vec::new();
-        let mut serializer = Serializer::new(&mut buf);
-        serialize_hashmap(&packet, &mut serializer)?;
-        let input_packet_hash = hash_buffer(buf);
-        let output_dir = URI {
-            namespace: self.namespace.clone(),
-            path: PathBuf::from(format!("pod_runs/{}/{}", self.pod.hash, input_packet_hash)),
-        };
-
-        let cpu_limit = self.pod.recommended_cpus;
-        let memory_limit = self.pod.recommended_memory;
-
-        // Create the pod job
-        let pod_job = PodJob::new(
-            None,
-            Arc::clone(&self.pod),
+            Arc::clone(pod),
             packet.clone(),
             output_dir,
             cpu_limit,
             memory_limit,
             None,
-            &self.namespace_lookup,
+            namespace_lookup,
         )?;
 
         // Simulate pod execution by just printing out pod_job_hash and pod hash
@@ -414,135 +429,136 @@ impl NodeProcessor for PodNodeProcessor {
         );
 
         // For now we will just send the input_packet to the success channel
-        success_ch_tx.send(Message::NodeOutput(current_node_id, packet.clone()))?;
+        success_ch_tx.send(Message::NodeOutput(node_id.to_owned(), packet.clone()))?;
 
         Ok(())
     }
 }
-struct MapperProcessor {
-    mapper: Arc<Mapper>,
-}
 
-impl NodeProcessor for MapperProcessor {
-    fn process_packet(
-        &mut self,
-        sender_node_id: String,
-        current_node_id: String,
-        packet: HashMap<String, PathSet>,
-        success_ch_tx: Sender<Message>,
-        _failure_ch_tx: Sender<Message>,
-    ) -> Result<()> {
-        // Apply the mapping to the input packet
-        let output_map = self
-            .mapper
-            .mapping
-            .iter()
-            .map(|(input_key, output_key)| {
-                let input = get(&packet, input_key)?.clone();
-                Ok((output_key.to_owned(), input))
-            })
-            .collect::<Result<HashMap<_, _>>>()?;
-
-        // Send the output via the channel
-        success_ch_tx.send(Message::NodeOutput(sender_node_id, output_map))?;
-        Ok(())
-    }
-}
-
-struct JoinerNodeProcessor {
-    /// Cache for all packets received by the node
-    input_packet_cache: HashMap<String, Vec<HashMap<String, PathSet>>>,
-}
-
-impl JoinerNodeProcessor {
-    fn new(parents_node_id: Vec<String>) -> Self {
-        let input_packet_cache = parents_node_id
-            .into_iter()
-            .map(|id| (id, Vec::new()))
-            .collect();
-        Self { input_packet_cache }
-    }
-
-    fn compute_new_packet_combination(
-        &self,
-        sender_node_id: &str,
-        new_packet: &HashMap<String, PathSet>,
-    ) -> Result<Vec<HashMap<String, PathSet>>> {
-        // Combine the new packet with the existing packets in the cache
-        // Get all the cached packets from other parents
-        let other_parent_ids = self
-            .input_packet_cache
-            .keys()
-            .filter(|key| *key != sender_node_id);
-        let mut factors = other_parent_ids
-            .map(|id| get(&self.input_packet_cache, id))
-            .collect::<Result<Vec<_>>>()?;
-
-        // Add the new incoming packet as a factor
-        let incoming_packet = vec![new_packet.clone()];
-        factors.push(&incoming_packet);
-
-        let result = factors
-            .into_iter()
-            .multi_cartesian_product()
-            .map(|packets_to_combined| {
-                packets_to_combined
-                    .into_iter()
-                    .fold(HashMap::new(), |mut acc, packet| {
-                        acc.extend(packet.clone());
-                        acc
-                    })
-            })
-            .collect::<Vec<_>>();
-
-        Ok(result)
-    }
-}
-
-impl NodeProcessor for JoinerNodeProcessor {
-    fn process_packet(
-        &mut self,
-        sender_node_id: String,
-        current_node_id: String,
-        packet: HashMap<String, PathSet>,
-        success_ch_tx: Sender<Message>,
-        failure_ch_tx: Sender<Message>,
-    ) -> Result<()> {
-        let process_result = {
-            // Compute the new packet combination based on the sender node id and the packet
-            let new_packets_to_send =
-                self.compute_new_packet_combination(&sender_node_id, &packet)?;
-
-            // Record the packet into the cache
-            self.input_packet_cache
-                .get_mut(&sender_node_id)
-                .context(selector::KeyMissing {
-                    key: sender_node_id.clone(),
-                })?
-                .push(packet);
-
-            Ok::<Vec<HashMap<String, PathSet>>, OrcaError>(new_packets_to_send)
-        };
-
-        match process_result {
-            Ok(output_packets) => {
-                // Send the output packets to the success channel
-                for output_packet in output_packets {
-                    success_ch_tx
-                        .send(Message::NodeOutput(current_node_id.clone(), output_packet))?;
-                }
-            }
-            Err(err) => {
-                // Send the error to the failure channel
-                failure_ch_tx.send(Message::NodeOutput(
-                    sender_node_id.clone(),
-                    HashMap::new(), // Empty packet on failure
-                ))?;
-                return Err(err);
-            }
-        }
-        // Add the new packet into the cache
-
-        Ok(())
-    }
-}
+// struct MapperProcessor {
+//     mapper: Arc<Mapper>,
+// }
+
+// impl NodeProcessor for MapperProcessor {
+//     fn process_packet(
+//         &mut self,
+//         sender_node_id: String,
+//         current_node_id: String,
+//         packet: HashMap<String, PathSet>,
+//         success_ch_tx: Sender<Message>,
+//         _failure_ch_tx: Sender<Message>,
+//     ) -> Result<()> {
+//         // Apply the mapping to the input packet
+//         let output_map = self
+//             .mapper
+//             .mapping
+//             .iter()
+//             .map(|(input_key, output_key)| {
+//                 let input = get(&packet, input_key)?.clone();
+//                 Ok((output_key.to_owned(), input))
+//             })
+//             .collect::<Result<HashMap<_, _>>>()?;
+
+//         // Send the output via the channel
+//         success_ch_tx.send(Message::NodeOutput(sender_node_id, output_map))?;
+//         Ok(())
+//     }
+// }
+
+// struct JoinerNodeProcessor {
+//     /// Cache for all packets received by the node
+//     input_packet_cache: HashMap<String, Vec<HashMap<String, PathSet>>>,
+// }
+
+// impl JoinerNodeProcessor {
+//     fn new(parents_node_id: Vec<String>) -> Self {
+//         let input_packet_cache = parents_node_id
+//             .into_iter()
+//             .map(|id| (id, Vec::new()))
+//             .collect();
+//         Self { input_packet_cache }
+//     }
+
+//     fn compute_new_packet_combination(
+//         &self,
+//         sender_node_id: &str,
+//         new_packet: &HashMap<String, PathSet>,
+//     ) -> Result<Vec<HashMap<String, PathSet>>> {
+//         // Combine the new packet with the existing packets in the cache
+//         // Get all the cached packets from other parents
+//         let other_parent_ids = self
+//             .input_packet_cache
+//             .keys()
+//             .filter(|key| *key != sender_node_id);
+//         let mut factors = other_parent_ids
+//             .map(|id| get(&self.input_packet_cache, id))
+//             .collect::<Result<Vec<_>>>()?;
+
+//         // Add the new incoming packet as a factor
+//         let incoming_packet = vec![new_packet.clone()];
+//         factors.push(&incoming_packet);
+
+//         let result = factors
+//             .into_iter()
+//             .multi_cartesian_product()
+//             .map(|packets_to_combined| {
+//                 packets_to_combined
+//                     .into_iter()
+//                     .fold(HashMap::new(), |mut acc, packet| {
+//                         acc.extend(packet.clone());
+//                         acc
+//                     })
+//             })
+//             .collect::<Vec<_>>();
+
+//         Ok(result)
+//     }
+// }
+
+// impl NodeProcessor for JoinerNodeProcessor {
+//     fn process_packet(
+//         &mut self,
+//         sender_node_id: String,
+//         current_node_id: String,
+//         packet: HashMap<String, PathSet>,
+//         success_ch_tx: Sender<Message>,
+//         failure_ch_tx: Sender<Message>,
+//     ) -> Result<()> {
+//         let process_result = {
+//             // Compute the new packet combination based on the sender node id and the packet
+//             let new_packets_to_send =
+//                 self.compute_new_packet_combination(&sender_node_id, &packet)?;
+
+//             // Record the packet into the cache
+//             self.input_packet_cache
+//                 .get_mut(&sender_node_id)
+//                 .context(selector::KeyMissing {
+//                     key: sender_node_id.clone(),
+//                 })?
+//                 .push(packet);
+
+//             Ok::<Vec<HashMap<String, PathSet>>, OrcaError>(new_packets_to_send)
+//         };
+
+//         match process_result {
+//             Ok(output_packets) => {
+//                 // Send the output packets to the success channel
+//                 for output_packet in output_packets {
+//                     success_ch_tx
+//                         .send(Message::NodeOutput(current_node_id.clone(), output_packet))?;
+//                 }
+//             }
+//             Err(err) => {
+//                 // Send the error to the failure channel
+//                 failure_ch_tx.send(Message::NodeOutput(
+//                     sender_node_id.clone(),
+//                     HashMap::new(), // Empty packet on failure
+//                 ))?;
+//                 return Err(err);
+//             }
+//         }
+//         // Add the new packet into the cache
+
+//         Ok(())
+//     }
+// }
diff --git a/tests/pipeline_runner.rs b/tests/pipeline_runner.rs
new file mode 100644
index 00000000..7959088b
--- /dev/null
+++ b/tests/pipeline_runner.rs
@@ -0,0 +1,33 @@
+#![expect(missing_docs, reason = "OK in tests.")]
+// If 'fixture' is a local module, ensure there is a 'mod fixture;' statement or a 'fixture.rs' file in the same directory or in 'tests/'.
+// If 'fixture' is an external crate, add it to Cargo.toml and import as shown below.
+// use fixture::pipeline_job;
+pub mod fixture;
+
+// Example for a local module:
+use std::collections::HashMap;
+
+use orcapod::uniffi::{error::Result, pipeline_runner::docker::DockerPipelineRunner};
+
+use crate::fixture::TestDirs;
+use fixture::pipeline_job;
+
+#[tokio::test]
+async fn basic_run() -> Result<()> {
+    let pipeline_job = pipeline_job()?;
+
+    // Create the runner
+    let mut runner = DockerPipelineRunner::new();
+
+    let test_dirs = TestDirs::new(&HashMap::from([(
+        "default".to_owned(),
+        Some("./tests/extra/data/"),
+    )]))?;
+    let namespace_lookup = test_dirs.namespace_lookup();
+
+    let pipeline_run = runner.start(pipeline_job, &namespace_lookup)?;
+
+    // Wait for the pipeline run to complete
+    let result = runner.get_result(&pipeline_run).await?;
+    Ok(())
+}

From 54d38be74664a3975a558d44cc34182b27b9d492 Mon Sep 17 00:00:00 2001
From: Synicix <synicix@gmail.com>
Date: Thu, 10 Jul 2025 10:57:57 +0000
Subject: [PATCH 05/29] Fix bugs and implemented missing parts

---
 .clippy.toml                         |   2 +-
 src/core/error.rs                    |  20 +-
 src/core/util.rs                     |  20 +-
 src/uniffi/error.rs                  |  15 +-
 src/uniffi/pipeline.rs               |  39 +-
 src/uniffi/pipeline_runner/docker.rs | 665 +++++++++++++++++----------
 tests/fixture/mod.rs                 |   4 +-
 tests/pipeline.rs                    |  35 +-
 tests/pipeline_runner.rs             |   1 +
 9 files changed, 463 insertions(+), 338 deletions(-)

diff --git a/.clippy.toml b/.clippy.toml
index 5821063e..6b3b5fee 100644
--- a/.clippy.toml
+++ b/.clippy.toml
@@ -1,3 +1,3 @@
-excessive-nesting-threshold = 5
+excessive-nesting-threshold = 6
 too-many-arguments-threshold = 10
 allowed-idents-below-min-chars = ["..", "k", "v", "f", "re", "id", "Ok", "'_"]
diff --git a/src/core/error.rs b/src/core/error.rs
index af0eb7aa..06e3adef 100644
--- a/src/core/error.rs
+++ b/src/core/error.rs
@@ -12,7 +12,10 @@ use std::{
     io,
     path::{self},
 };
-use tokio::{sync::broadcast::error::SendError, task::JoinError};
+use tokio::{
+    sync::{broadcast::error::SendError, oneshot},
+    task::JoinError,
+};
 
 impl From<BollardError> for OrcaError {
     fn from(error: BollardError) -> Self {
@@ -24,6 +27,16 @@ impl From<BollardError> for OrcaError {
         }
     }
 }
+impl From<oneshot::error::RecvError> for OrcaError {
+    fn from(error: oneshot::error::RecvError) -> Self {
+        Self {
+            kind: Kind::ChannelReceiveError {
+                source: error,
+                backtrace: Some(Backtrace::capture()),
+            },
+        }
+    }
+}
 impl From<glob::PatternError> for OrcaError {
     fn from(error: glob::PatternError) -> Self {
         Self {
@@ -113,7 +126,8 @@ fn format_stack(backtrace: Option<&Backtrace>) -> String {
 impl fmt::Debug for OrcaError {
     fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
         match &self.kind {
-            Kind::EmptyResponseWhenLoadingContainerAltImage { backtrace, .. }
+            Kind::ReceiverDroppedBeforeSender { backtrace, .. }
+            | Kind::EmptyResponseWhenLoadingContainerAltImage { backtrace, .. }
             | Kind::FailedToParseDot { backtrace, .. }
             | Kind::GeneratedNamesOverflow { backtrace, .. }
             | Kind::InvalidFilepath { backtrace, .. }
@@ -124,8 +138,8 @@ impl fmt::Debug for OrcaError {
             | Kind::NoFileName { backtrace, .. }
             | Kind::NoMatchingPodRun { backtrace, .. }
             | Kind::NoTagFoundInContainerAltImage { backtrace, .. }
-            | Kind::MissingInputSpecKey { backtrace, .. }
             | Kind::BollardError { backtrace, .. }
+            | Kind::ChannelReceiveError { backtrace, .. }
             | Kind::GlobPatternError { backtrace, .. }
             | Kind::IoError { backtrace, .. }
             | Kind::PathPrefixError { backtrace, .. }
diff --git a/src/core/util.rs b/src/core/util.rs
index d41ed4ac..2b7bf6b4 100644
--- a/src/core/util.rs
+++ b/src/core/util.rs
@@ -1,7 +1,4 @@
-use crate::uniffi::{
-    error::{Result, selector},
-    model::PathSet,
-};
+use crate::uniffi::error::{Result, selector};
 use snafu::OptionExt as _;
 use std::{
     any::type_name,
@@ -43,18 +40,3 @@ where
     })?;
     Ok(temp)
 }
-
-pub fn find_missing_keys<'a>(
-    input_map: &HashMap<String, PathSet>,
-    keys_to_check: impl Iterator<Item = &'a String>,
-) -> Vec<String> {
-    keys_to_check
-        .filter_map(|key| {
-            if input_map.contains_key(key) {
-                None
-            } else {
-                Some(key.clone())
-            }
-        })
-        .collect()
-}
diff --git a/src/uniffi/error.rs b/src/uniffi/error.rs
index 85eb7103..72cae77f 100644
--- a/src/uniffi/error.rs
+++ b/src/uniffi/error.rs
@@ -14,6 +14,7 @@ use std::{
     path::{self, PathBuf},
     result,
 };
+use tokio::sync::oneshot;
 use uniffi;
 
 /// Shorthand for a Result that returns an `OrcaError`.
@@ -23,6 +24,10 @@ pub type Result<T, E = OrcaError> = result::Result<T, E>;
 #[snafu(module(selector), visibility(pub(crate)), context(suffix(false)))]
 #[uniffi(flat_error)]
 pub(crate) enum Kind {
+    #[snafu(display(
+        "Receiver was dropped before sender could send a message for oneshot channel"
+    ))]
+    ReceiverDroppedBeforeSender { backtrace: Option<Backtrace> },
     #[snafu(display(
         "Received an empty response when attempting to load the alternate container image file: {path:?}."
     ))]
@@ -81,11 +86,6 @@ pub(crate) enum Kind {
         path: PathBuf,
         backtrace: Option<Backtrace>,
     },
-    #[snafu(display("Input map missing required packet keys: {missing_keys:?}"))]
-    MissingInputSpecKey {
-        missing_keys: Vec<String>,
-        backtrace: Option<Backtrace>,
-    },
     #[snafu(display("Failed to send message because: {reason}"))]
     SendError {
         reason: String,
@@ -97,6 +97,11 @@ pub(crate) enum Kind {
         backtrace: Option<Backtrace>,
     },
     #[snafu(transparent)]
+    ChannelReceiveError {
+        source: oneshot::error::RecvError,
+        backtrace: Option<Backtrace>,
+    },
+    #[snafu(transparent)]
     GlobPatternError {
         source: glob::PatternError,
         backtrace: Option<Backtrace>,
diff --git a/src/uniffi/pipeline.rs b/src/uniffi/pipeline.rs
index d90fac87..27d84dbd 100644
--- a/src/uniffi/pipeline.rs
+++ b/src/uniffi/pipeline.rs
@@ -1,9 +1,5 @@
 use crate::{
-    core::{
-        crypto::hash_buffer,
-        model::to_yaml,
-        util::{find_missing_keys, get},
-    },
+    core::{crypto::hash_buffer, model::to_yaml, util::get},
     uniffi::{
         error::{Kind, OrcaError, Result},
         model::{Annotation, PathSet, Pod, URI},
@@ -351,7 +347,8 @@ pub struct PipelineJob {
     #[serde(skip)]
     pub pipeline: Pipeline,
     /// Mapping of outside input to keys to be match with the pipeline `input_map`
-    pub input_map: HashMap<String, PathSet>,
+    pub input_packets: Vec<HashMap<String, PathSet>>,
+    /// Directory where to store the outputs of the pipeline
     pub output_dir: URI,
     /// Annotation for the pipeline job
     pub annotation: Option<Annotation>,
@@ -363,39 +360,15 @@ impl PipelineJob {
     /// Error out if there are missing keys or failed to convert to yaml
     pub fn new(
         pipeline: Pipeline,
-        input_packet: HashMap<String, PathSet>,
+        input_packets: Vec<HashMap<String, PathSet>>,
         output_dir: URI,
         annotation: Option<Annotation>,
     ) -> Result<Self> {
-        // Check if input_map has all the requires keys
-        let missing_keys = pipeline
-            .get_root_nodes()
-            .map(|node| match pipeline.get_kernel(&node.kernel_hash)? {
-                Kernel::Pod(pod) => Ok(find_missing_keys(&input_packet, pod.input_spec.keys())),
-                Kernel::Mapper(mapper) => {
-                    Ok(find_missing_keys(&input_packet, mapper.mapping.keys()))
-                }
-                Kernel::Joiner => Ok(Vec::<String>::new()), // Should probably error out because joiner should not be a root node
-            })
-            .collect::<Result<Vec<Vec<String>>>>()?
-            .into_iter()
-            .flatten()
-            .collect::<Vec<String>>();
-
-        if !missing_keys.is_empty() {
-            return Err(OrcaError {
-                kind: Kind::MissingInputSpecKey {
-                    missing_keys,
-                    backtrace: Some(Backtrace::capture()),
-                },
-            });
-        }
-
         // Create the job without_hash
         let no_hash = Self {
             hash: String::new(),
             pipeline,
-            input_map: input_packet,
+            input_packets,
             annotation,
             output_dir,
         };
@@ -406,6 +379,8 @@ impl PipelineJob {
         })
     }
 }
+
+#[derive(uniffi::Object, Display, Debug, Clone, Serialize)]
 pub struct PipelineResult {
     pub pipeline_job: PipelineJob,
 }
diff --git a/src/uniffi/pipeline_runner/docker.rs b/src/uniffi/pipeline_runner/docker.rs
index 523d5120..1b279bb0 100644
--- a/src/uniffi/pipeline_runner/docker.rs
+++ b/src/uniffi/pipeline_runner/docker.rs
@@ -2,7 +2,7 @@ use super::PipelineRun;
 use crate::{
     core::{crypto::hash_buffer, model::serialize_hashmap, util::get},
     uniffi::{
-        error::{OrcaError, Result, selector},
+        error::{Kind, OrcaError, Result, selector},
         model::{PathSet, Pod, PodJob, URI},
         pipeline::{Kernel, Mapper, Node, PipelineJob, PipelineResult},
     },
@@ -11,9 +11,10 @@ use futures_util::stream::FuturesUnordered;
 use itertools::Itertools as _;
 use serde_yaml::Serializer;
 use snafu::OptionExt as _;
-use std::{collections::HashMap, path::PathBuf, sync::Arc};
+use std::{backtrace::Backtrace, collections::HashMap, path::PathBuf, sync::Arc};
 use tokio::{
     sync::{
+        RwLock,
         broadcast::{self, Receiver, Sender, error::RecvError},
         oneshot,
     },
@@ -27,16 +28,18 @@ pub(crate) enum Message {
     NodeOutput(String, HashMap<String, PathSet>),
     /// String is the `node_id` that has completed processing
     NodeProcessingComplete(String),
-    ProcessingFailed(String, Arc<OrcaError>), // String is the node_id, while OrcaError is the error that occurred
-    Stop,                                     // Message to halt all operations
+    Stop, // Message to halt all operations
 }
 
+#[expect(
+    clippy::type_complexity,
+    reason = "too complex, but necessary for async handling"
+)]
 struct PipelineRunInfo {
     node_task_join_set: JoinSet<Result<()>>, // Join set to track the tasks for this pipeline run
     job_manager_ch_tx: Sender<Message>,
     node_tx: HashMap<String, Sender<Message>>,
-    outputs: HashMap<String, HashMap<String, PathSet>>, // String is the node key, while hash
-    namespace_lookup: HashMap<String, PathBuf>,
+    outputs: Arc<RwLock<HashMap<String, Vec<HashMap<String, PathSet>>>>>, // String is the node key, while hash
 }
 
 /// Docker based pipeline runner meant to execute on a single machine
@@ -71,8 +74,7 @@ impl DockerPipelineRunner {
                 job_manager_ch_tx: broadcast::channel::<Message>(1).0,
                 node_tx: HashMap::new(),
                 node_task_join_set: JoinSet::new(),
-                outputs: HashMap::new(),
-                namespace_lookup: namespace_lookup.clone(),
+                outputs: Arc::new(RwLock::new(HashMap::new())),
             },
         );
 
@@ -87,11 +89,25 @@ impl DockerPipelineRunner {
         // This will recursively create all the tasks and channels for the pipeline
         pipeline.get_leaf_nodes().try_for_each(|node| {
             self.create_task_for_node(node, &pipeline_run_arc, &source_tx, namespace_lookup)?;
+
+            // Since we don't have output nodes implemented, and currently it is set as leaf nodes,
+            // we can do the output handling logic here too
+
             Ok::<(), OrcaError>(())
         })?;
 
-        // Create a task to handle outputs of output nodes in pipeline
-        // for node_key in pipeline.output_nodes {}
+        // All pipeline tasks have been created, now we need to feed the inputs to the pipeline
+        pipeline_run
+            .pipeline_job
+            .input_packets
+            .iter()
+            .try_for_each(|input_map| {
+                source_tx.send(Message::NodeOutput("input".to_owned(), input_map.clone()))?;
+                Ok::<(), OrcaError>(())
+            })?;
+
+        // Send a message that all job inputs have been sent
+        source_tx.send(Message::NodeProcessingComplete("input".to_owned()))?;
 
         Ok(pipeline_run)
     }
@@ -210,6 +226,75 @@ impl DockerPipelineRunner {
         Ok(tx)
     }
 
+    fn create_task_to_capture_output_of_node(
+        &mut self,
+        node: &Node,
+        pipeline_run: &Arc<PipelineRun>,
+    ) -> Result<()> {
+        let pipeline_run_info =
+            self.pipeline_runs
+                .get_mut(pipeline_run)
+                .context(selector::KeyMissing {
+                    key: pipeline_run.to_string(),
+                })?;
+        // Get the output ch rx for the node
+        let node_rx = get(&pipeline_run_info.node_tx, &node.id)?.subscribe();
+        // Create a new ref copy of pipeline_run_output
+        let outputs_ref = Arc::clone(&pipeline_run_info.outputs);
+        // Create a task to listen to it and record the outputs
+        pipeline_run_info
+            .node_task_join_set
+            .spawn(Self::capture_node_output(node_rx, outputs_ref));
+
+        Ok(())
+    }
+
+    #[expect(
+        clippy::type_complexity,
+        reason = "too complex, but necessary for async handling"
+    )]
+    async fn capture_node_output(
+        mut node_rx: Receiver<Message>,
+        outputs_ref: Arc<RwLock<HashMap<String, Vec<HashMap<String, PathSet>>>>>,
+    ) -> Result<()> {
+        loop {
+            let message = match node_rx.recv().await {
+                Ok(message) => message,
+                Err(err) => {
+                    match err {
+                        RecvError::Closed => {
+                            // No more message will be received, thus we can exit the loop
+                            // Only case where this will occur is when the channel is closed due to abort
+                            break;
+                        }
+                        RecvError::Lagged(_) => {
+                            print!("Warning: Channel lagged, skipping message");
+                        }
+                    }
+                    continue;
+                }
+            };
+            match message {
+                Message::NodeOutput(node_id, hash_map) => {
+                    // Record the output
+
+                    outputs_ref
+                        .write()
+                        .await
+                        .entry(node_id)
+                        .or_default()
+                        .push(hash_map);
+                }
+                Message::NodeProcessingComplete(_) | Message::Stop => {
+                    // Node processing is complete, we can stop listening to this channel
+                    break;
+                }
+            }
+        }
+
+        Ok(())
+    }
+
     /// For tx: Sender<Message>, we only want to send successfully completed results to the next node
     async fn start_node_manager(
         node: Node,
@@ -220,7 +305,7 @@ impl DockerPipelineRunner {
         namespace_lookup: HashMap<String, PathBuf>,
     ) -> Result<()> {
         // Create a channel to for waiting when the node processing is complete
-        let (node_complete_tx, node_complete_rx) = oneshot::channel::<()>();
+        let (processing_complete_ch_tx, processing_complete_ch_rx) = oneshot::channel::<()>();
 
         // Create a futures unordered set to dynamically listen to N number of receivers
         let chs_to_listen_to = FuturesUnordered::new();
@@ -235,111 +320,66 @@ impl DockerPipelineRunner {
             async move { job_manager_channel.recv().await },
         ));
 
+        // Create a metadata struct for this node
+        let node_metadata = NodeMetaData {
+            node_id: node.id.clone(),
+            ch_to_listen_to: chs_to_listen_to,
+            success_ch_tx: success_ch_tx.clone(),
+            namespace: pipeline_run.pipeline_job.output_dir.namespace.clone(),
+            namespace_lookup: namespace_lookup.clone(),
+        };
+
         // Get the kernel for this node and build the correct processor
         match get(
             &pipeline_run.pipeline_job.pipeline.kernel_lut,
             &node.kernel_hash,
         )? {
-            Kernel::Pod(pod) => PodNodeProcessor::new(
-                Arc::clone(pod),
-                node.id.clone(),
-                chs_to_listen_to,
-                success_ch_tx,
-                pipeline_run.pipeline_job.output_dir.namespace.clone(),
-                namespace_lookup,
-                node_complete_tx,
-            ),
+            Kernel::Pod(pod) => {
+                let mut processor = PodProcessor::new(Arc::clone(pod), node_metadata);
+                processor.start(processing_complete_ch_tx).await?;
+                processing_complete_ch_rx.await?;
+            }
             Kernel::Mapper(mapper) => {
-                todo!()
+                let mut processor = MapperProcessor::new(Arc::clone(mapper), node_metadata);
+                processor.start(processing_complete_ch_tx).await?;
+                processing_complete_ch_rx.await?;
             }
             Kernel::Joiner => {
-                todo!()
+                let parent_nodes_id = pipeline_run
+                    .pipeline_job
+                    .pipeline
+                    .get_parents_for_node(&node)
+                    .map(|parent_node| parent_node.id.clone())
+                    .collect::<Vec<_>>();
+                let mut processor = JoinerProcessor::new(parent_nodes_id, node_metadata);
+                processor.start(processing_complete_ch_tx).await?;
+                processing_complete_ch_rx.await?;
             }
-        };
-
-        node_complete_rx.await;
-
-        // // Listen to the MPSC channel and handle messages
-        // while let Some(result) = chs_to_listen_to.next().await {
-        //     let rx_result = match result {
-        //         Ok(rx_result) => rx_result,
-        //         Err(err) => {
-        //             // Record into pipeline_error log
-        //             if err.is_panic() {
-        //                 eprintln!("Task panicked: {err}");
-        //             } else {
-        //                 eprintln!("Error receiving message: {err}");
-        //             }
-        //             continue;
-        //         }
-        //     };
-
-        //     let Ok(msg) = rx_result else {
-        //         eprintln!("Failed to receive message from parent channel");
-        //         continue;
-        //     };
-
-        //     match msg {
-        //         Message::NodeOutput(sender_node_id, packet) => {
-        //             // Inputs from parents are ready, thus we need to process them if they are already computed and cached
-        //             processor.process_packet(
-        //                 &sender_node_id,
-        //                 &node.id,
-        //                 packet,
-        //                 success_ch_tx.clone(),
-        //                 failure_ch_tx.clone(),
-        //             )?;
-        //         }
-        //         Message::Stop => {
-        //             todo!()
-        //         }
-        //         Message::ProcessingFailed(_, orca_error) => todo!(),
-        //         Message::NodeProcessingComplete(node_id) => ,
-        //     }
-        // }
+        }
 
         Ok(())
     }
 }
 
-struct PodNodeProcessor {
-    pod: Arc<Pod>,
+struct NodeMetaData {
     node_id: String,
     ch_to_listen_to: FuturesUnordered<JoinHandle<Result<Message, RecvError>>>,
     success_ch_tx: Sender<Message>, // Channel to send successful outputs to the next node
     namespace: String,
     namespace_lookup: HashMap<String, PathBuf>, // Copy of the look up table
-    node_complete_tx: oneshot::Sender<()>,
-    processing_tasks: JoinSet<Result<(), OrcaError>>,
 }
 
-impl PodNodeProcessor {
-    fn new(
-        pod: Arc<Pod>,
-        node_id: String,
-        ch_to_listen_to: FuturesUnordered<JoinHandle<Result<Message, RecvError>>>,
-        success_ch_tx: Sender<Message>,
-        namespace: String,
-        namespace_lookup: HashMap<String, PathBuf>,
-        node_complete_tx: oneshot::Sender<()>,
-    ) -> Self {
-        Self {
-            pod,
-            node_id,
-            ch_to_listen_to,
-            success_ch_tx,
-            namespace,
-            namespace_lookup,
-            node_complete_tx,
-            processing_tasks: JoinSet::new(),
-        }
-    }
+trait NodeProcessor {
+    fn get_ch_to_listen_to(
+        &mut self,
+    ) -> &mut FuturesUnordered<JoinHandle<Result<Message, RecvError>>>;
 
-    async fn start(&mut self) {
+    async fn wait_for_node_task_completion(&mut self);
+
+    async fn start(&mut self, process_complete_ch_tx: oneshot::Sender<()>) -> Result<()> {
         // Start to listen to the channels
         // Listen to the MPSC channel and handle messages
-
-        while let Some(result) = self.ch_to_listen_to.next().await {
+        while let Some(result) = self.get_ch_to_listen_to().next().await {
             let rx_result = match result {
                 Ok(rx_result) => rx_result,
                 Err(err) => {
@@ -358,32 +398,42 @@ impl PodNodeProcessor {
                 continue;
             };
 
-            match msg {
-                Message::NodeOutput(sender_node_id, packet) => {
-                    let pod_ref = Arc::clone(&self.pod);
-                    let node_id = self.node_id.clone();
-                    let namespace = self.namespace.clone();
-                    let namespace_lookup = self.namespace_lookup.clone();
-                    let success_ch_tx = self.success_ch_tx.clone();
-                    // Forward it into a processing task
-                    self.processing_tasks.spawn(async move {
-                        Self::process_packet(
-                            &node_id,
-                            &pod_ref,
-                            &namespace,
-                            &namespace_lookup,
-                            &packet,
-                            &success_ch_tx,
-                        )
-                    });
-                }
-                Message::Stop => {
-                    todo!()
-                }
-                Message::ProcessingFailed(_, orca_error) => todo!(),
-                Message::NodeProcessingComplete(node_id) => todo!(),
+            // Process the message
+            if self.process_msg(msg).await? {
+                // If the message indicates that processing is complete, we can exit the loop
+                // Wait for all processing tasks to complete before sending the completion message
+
+                self.wait_for_node_task_completion().await;
+
+                // Send the node processing complete message
+                process_complete_ch_tx.send(()).map_err(|()| OrcaError {
+                    kind: Kind::ReceiverDroppedBeforeSender {
+                        backtrace: Some(Backtrace::capture()),
+                    },
+                })?;
+                break;
             }
         }
+
+        Ok(())
+    }
+
+    async fn process_msg(&mut self, msg: Message) -> Result<bool>;
+}
+
+struct PodProcessor {
+    pod: Arc<Pod>,
+    node_metadata: NodeMetaData,
+    processing_tasks: JoinSet<Result<(), OrcaError>>,
+}
+
+impl PodProcessor {
+    fn new(pod: Arc<Pod>, node_metadata: NodeMetaData) -> Self {
+        Self {
+            pod,
+            node_metadata,
+            processing_tasks: JoinSet::new(),
+        }
     }
 
     fn process_packet(
@@ -435,130 +485,259 @@ impl PodNodeProcessor {
     }
 }
 
-// struct MapperProcessor {
-//     mapper: Arc<Mapper>,
-// }
-
-// impl NodeProcessor for MapperProcessor {
-//     fn process_packet(
-//         &mut self,
-//         sender_node_id: String,
-//         current_node_id: String,
-//         packet: HashMap<String, PathSet>,
-//         success_ch_tx: Sender<Message>,
-//         _failure_ch_tx: Sender<Message>,
-//     ) -> Result<()> {
-//         // Apply the mapping to the input packet
-//         let output_map = self
-//             .mapper
-//             .mapping
-//             .iter()
-//             .map(|(input_key, output_key)| {
-//                 let input = get(&packet, input_key)?.clone();
-//                 Ok((output_key.to_owned(), input))
-//             })
-//             .collect::<Result<HashMap<_, _>>>()?;
-
-//         // Send the output via the channel
-//         success_ch_tx.send(Message::NodeOutput(sender_node_id, output_map))?;
-//         Ok(())
-//     }
-// }
-
-// struct JoinerNodeProcessor {
-//     /// Cache for all packets received by the node
-//     input_packet_cache: HashMap<String, Vec<HashMap<String, PathSet>>>,
-// }
-
-// impl JoinerNodeProcessor {
-//     fn new(parents_node_id: Vec<String>) -> Self {
-//         let input_packet_cache = parents_node_id
-//             .into_iter()
-//             .map(|id| (id, Vec::new()))
-//             .collect();
-//         Self { input_packet_cache }
-//     }
-
-//     fn compute_new_packet_combination(
-//         &self,
-//         sender_node_id: &str,
-//         new_packet: &HashMap<String, PathSet>,
-//     ) -> Result<Vec<HashMap<String, PathSet>>> {
-//         // Combine the new packet with the existing packets in the cache
-//         // Get all the cached packets from other parents
-//         let other_parent_ids = self
-//             .input_packet_cache
-//             .keys()
-//             .filter(|key| *key != sender_node_id);
-//         let mut factors = other_parent_ids
-//             .map(|id| get(&self.input_packet_cache, id))
-//             .collect::<Result<Vec<_>>>()?;
-
-//         // Add the new incoming packet as a factor
-//         let incoming_packet = vec![new_packet.clone()];
-//         factors.push(&incoming_packet);
-
-//         let result = factors
-//             .into_iter()
-//             .multi_cartesian_product()
-//             .map(|packets_to_combined| {
-//                 packets_to_combined
-//                     .into_iter()
-//                     .fold(HashMap::new(), |mut acc, packet| {
-//                         acc.extend(packet.clone());
-//                         acc
-//                     })
-//             })
-//             .collect::<Vec<_>>();
-
-//         Ok(result)
-//     }
-// }
-
-// impl NodeProcessor for JoinerNodeProcessor {
-//     fn process_packet(
-//         &mut self,
-//         sender_node_id: String,
-//         current_node_id: String,
-//         packet: HashMap<String, PathSet>,
-//         success_ch_tx: Sender<Message>,
-//         failure_ch_tx: Sender<Message>,
-//     ) -> Result<()> {
-//         let process_result = {
-//             // Compute the new packet combination based on the sender node id and the packet
-//             let new_packets_to_send =
-//                 self.compute_new_packet_combination(&sender_node_id, &packet)?;
-
-//             // Record the packet into the cache
-//             self.input_packet_cache
-//                 .get_mut(&sender_node_id)
-//                 .context(selector::KeyMissing {
-//                     key: sender_node_id.clone(),
-//                 })?
-//                 .push(packet);
-
-//             Ok::<Vec<HashMap<String, PathSet>>, OrcaError>(new_packets_to_send)
-//         };
-
-//         match process_result {
-//             Ok(output_packets) => {
-//                 // Send the output packets to the success channel
-//                 for output_packet in output_packets {
-//                     success_ch_tx
-//                         .send(Message::NodeOutput(current_node_id.clone(), output_packet))?;
-//                 }
-//             }
-//             Err(err) => {
-//                 // Send the error to the failure channel
-//                 failure_ch_tx.send(Message::NodeOutput(
-//                     sender_node_id.clone(),
-//                     HashMap::new(), // Empty packet on failure
-//                 ))?;
-//                 return Err(err);
-//             }
-//         }
-//         // Add the new packet into the cache
-
-//         Ok(())
-//     }
-// }
+impl NodeProcessor for PodProcessor {
+    async fn process_msg(&mut self, msg: Message) -> Result<bool> {
+        match msg {
+            Message::NodeOutput(sender_node_id, packet) => {
+                let pod_ref = Arc::clone(&self.pod);
+                let node_id = self.node_metadata.node_id.clone();
+                let namespace = self.node_metadata.namespace.clone();
+                let namespace_lookup = self.node_metadata.namespace_lookup.clone();
+                let success_ch_tx = self.node_metadata.success_ch_tx.clone();
+                // Forward it into a processing task
+                self.processing_tasks.spawn(async move {
+                    Self::process_packet(
+                        &node_id,
+                        &pod_ref,
+                        &namespace,
+                        &namespace_lookup,
+                        &packet,
+                        &success_ch_tx,
+                    )
+                });
+            }
+            Message::Stop => {
+                // Stop message received, we will stop processing
+                self.processing_tasks.abort_all();
+                return Ok(true);
+            }
+            Message::NodeProcessingComplete(_) => {
+                // Since pod only have one parent, we can expect that there will be no more incoming packet
+                // thus, we need to wait for everything to finish processing and send completion message
+                // Return true to notify caller that processing is complete
+                self.wait_for_node_task_completion().await;
+                return Ok(true);
+            }
+        }
+        Ok(false)
+    }
+
+    fn get_ch_to_listen_to(
+        &mut self,
+    ) -> &mut FuturesUnordered<JoinHandle<Result<Message, RecvError>>> {
+        &mut self.node_metadata.ch_to_listen_to
+    }
+
+    async fn wait_for_node_task_completion(&mut self) {
+        while self.processing_tasks.join_next().await.is_some() {
+            // Wait for all processing tasks to complete
+        }
+    }
+}
+
+struct MapperProcessor {
+    mapper: Arc<Mapper>,
+    node_metadata: NodeMetaData,
+}
+
+impl MapperProcessor {
+    const fn new(mapper: Arc<Mapper>, node_metadata: NodeMetaData) -> Self {
+        Self {
+            mapper,
+            node_metadata,
+        }
+    }
+
+    fn process_packet(&self, packet: &HashMap<String, PathSet>) -> Result<()> {
+        // Apply the mapping to the input packet
+        let output_map = self
+            .mapper
+            .mapping
+            .iter()
+            .map(|(input_key, output_key)| {
+                let input = get(packet, input_key)?.clone();
+                Ok((output_key.to_owned(), input))
+            })
+            .collect::<Result<HashMap<_, _>>>()?;
+
+        // Send the output via the channel
+        self.node_metadata.success_ch_tx.send(Message::NodeOutput(
+            self.node_metadata.node_id.clone(),
+            output_map,
+        ))?;
+        Ok(())
+    }
+}
+
+impl NodeProcessor for MapperProcessor {
+    fn get_ch_to_listen_to(
+        &mut self,
+    ) -> &mut FuturesUnordered<JoinHandle<Result<Message, RecvError>>> {
+        &mut self.node_metadata.ch_to_listen_to
+    }
+
+    async fn wait_for_node_task_completion(&mut self) {
+        // Mapper doesn't spawn additional tasks, so this is a no-op
+    }
+
+    async fn process_msg(&mut self, msg: Message) -> Result<bool> {
+        match msg {
+            Message::NodeOutput(_, hash_map) => {
+                let output_map = self
+                    .mapper
+                    .mapping
+                    .iter()
+                    .map(|(input_key, output_key)| {
+                        let input = get(&hash_map, input_key)?.clone();
+                        Ok((output_key.to_owned(), input))
+                    })
+                    .collect::<Result<HashMap<_, _>>>()?;
+
+                // For now we will just send the input_packet to the success channel
+                self.node_metadata.success_ch_tx.send(Message::NodeOutput(
+                    self.node_metadata.node_id.clone(),
+                    output_map,
+                ))?;
+            }
+            Message::NodeProcessingComplete(_) => return Ok(true),
+            Message::Stop => todo!(),
+        }
+
+        Ok(false)
+    }
+}
+
+struct JoinerProcessor {
+    /// Cache for all packets received by the node
+    input_packet_cache: HashMap<String, Vec<HashMap<String, PathSet>>>,
+    completed_parents: Vec<String>,
+    node_metadata: NodeMetaData,
+}
+
+impl JoinerProcessor {
+    fn new(parents_node_id: Vec<String>, node_metadata: NodeMetaData) -> Self {
+        let input_packet_cache = parents_node_id
+            .into_iter()
+            .map(|id| (id, Vec::new()))
+            .collect();
+        Self {
+            input_packet_cache,
+            node_metadata,
+            completed_parents: Vec::new(),
+        }
+    }
+
+    fn compute_new_packet_combination(
+        &self,
+        sender_node_id: &str,
+        new_packet: &HashMap<String, PathSet>,
+    ) -> Result<Vec<HashMap<String, PathSet>>> {
+        // Combine the new packet with the existing packets in the cache
+        // Get all the cached packets from other parents
+        let other_parent_ids = self
+            .input_packet_cache
+            .keys()
+            .filter(|key| *key != sender_node_id);
+        let mut factors = other_parent_ids
+            .map(|id| get(&self.input_packet_cache, id))
+            .collect::<Result<Vec<_>>>()?;
+
+        // Add the new incoming packet as a factor
+        let incoming_packet = vec![new_packet.clone()];
+        factors.push(&incoming_packet);
+
+        let result = factors
+            .into_iter()
+            .multi_cartesian_product()
+            .map(|packets_to_combined| {
+                packets_to_combined
+                    .into_iter()
+                    .fold(HashMap::new(), |mut acc, packet| {
+                        acc.extend(packet.clone());
+                        acc
+                    })
+            })
+            .collect::<Vec<_>>();
+
+        Ok(result)
+    }
+
+    fn process_packet(
+        &mut self,
+        sender_node_id: &str,
+        packet: HashMap<String, PathSet>,
+    ) -> Result<()> {
+        let process_result = {
+            // Compute the new packet combination based on the sender node id and the packet
+            let new_packets_to_send =
+                self.compute_new_packet_combination(sender_node_id, &packet)?;
+
+            // Record the packet into the cache
+            self.input_packet_cache
+                .get_mut(sender_node_id)
+                .context(selector::KeyMissing {
+                    key: sender_node_id.to_owned(),
+                })?
+                .push(packet);
+
+            Ok::<Vec<HashMap<String, PathSet>>, OrcaError>(new_packets_to_send)
+        };
+
+        match process_result {
+            Ok(output_packets) => {
+                // Send the output packets to the success channel
+                for output_packet in output_packets {
+                    self.node_metadata.success_ch_tx.send(Message::NodeOutput(
+                        self.node_metadata.node_id.clone(),
+                        output_packet,
+                    ))?;
+                }
+            }
+            Err(err) => {
+                // Send the error to the failure channel
+                todo!();
+            }
+        }
+        // Add the new packet into the cache
+
+        Ok(())
+    }
+}
+
+impl NodeProcessor for JoinerProcessor {
+    fn get_ch_to_listen_to(
+        &mut self,
+    ) -> &mut FuturesUnordered<JoinHandle<Result<Message, RecvError>>> {
+        &mut self.node_metadata.ch_to_listen_to
+    }
+
+    async fn wait_for_node_task_completion(&mut self) {
+        // Joiner doesn't spawn additional tasks, so this is a no-op
+    }
+
+    async fn process_msg(&mut self, msg: Message) -> Result<bool> {
+        match msg {
+            Message::NodeOutput(sender_node_id, packet) => {
+                // Process the packet and send the output to the success channel
+                self.process_packet(&sender_node_id, packet)?;
+            }
+            Message::NodeProcessingComplete(sender_node_id) => {
+                // Record that this parent node has completed processing
+                self.completed_parents.push(sender_node_id);
+
+                // Check if all parents have completed processing
+                if self.completed_parents.len() == self.input_packet_cache.len() {
+                    // All parents have completed processing, we can send the output
+                    // Wait for all packets to be processed and send the output
+                    return Ok(true);
+                }
+            }
+            Message::Stop => {
+                // We don't have anything to clean up, so we can just return
+                return Ok(true);
+            }
+        }
+
+        Ok(false)
+    }
+}
diff --git a/tests/fixture/mod.rs b/tests/fixture/mod.rs
index c787dc93..150babaf 100644
--- a/tests/fixture/mod.rs
+++ b/tests/fixture/mod.rs
@@ -281,7 +281,7 @@ pub fn pipeline_job() -> Result<PipelineJob> {
     // Create a simple pipeline_job
     PipelineJob::new(
         pipeline()?,
-        HashMap::from([(
+        vec![HashMap::from([(
             "input_text".to_owned(),
             PathSet::Unary(Blob {
                 kind: BlobKind::File,
@@ -291,7 +291,7 @@ pub fn pipeline_job() -> Result<PipelineJob> {
                 },
                 ..Default::default()
             }),
-        )]),
+        )])],
         URI {
             namespace: "default".to_owned(),
             path: PathBuf::from("output"),
diff --git a/tests/pipeline.rs b/tests/pipeline.rs
index 71a0f069..4150deeb 100644
--- a/tests/pipeline.rs
+++ b/tests/pipeline.rs
@@ -6,14 +6,10 @@
 //! process completes successfully and outputs the expected results.
 
 pub mod fixture;
-use std::{collections::HashMap, path::PathBuf, vec};
+use std::vec;
 
 use fixture::pipeline;
-use orcapod::uniffi::{
-    error::Result,
-    model::{Annotation, Blob, BlobKind, PathSet, URI},
-    pipeline::PipelineJob,
-};
+use orcapod::uniffi::{error::Result, model::Annotation};
 
 use crate::fixture::pipeline_job;
 
@@ -100,30 +96,3 @@ fn pipeline_job_creation() -> Result<()> {
 
     Ok(())
 }
-
-#[test]
-fn incorrect_input_packet() -> Result<()> {
-    assert!(
-        PipelineJob::new(
-            pipeline()?,
-            HashMap::from([(
-                "wrong_key".to_owned(),
-                PathSet::Unary(Blob {
-                    kind: BlobKind::File,
-                    location: URI {
-                        namespace: "default".to_owned(),
-                        path: PathBuf::from("data/input.txt"),
-                    },
-                    ..Default::default()
-                }),
-            )]),
-            URI {
-                namespace: "default".to_owned(),
-                path: PathBuf::from("output"),
-            },
-            None
-        )
-        .is_err()
-    );
-    Ok(())
-}
diff --git a/tests/pipeline_runner.rs b/tests/pipeline_runner.rs
index 7959088b..a2889480 100644
--- a/tests/pipeline_runner.rs
+++ b/tests/pipeline_runner.rs
@@ -29,5 +29,6 @@ async fn basic_run() -> Result<()> {
 
     // Wait for the pipeline run to complete
     let result = runner.get_result(&pipeline_run).await?;
+    println!("Pipeline run result: {result:?}");
     Ok(())
 }

From 3058eb75667a1831b514082bcc237791f6a01674 Mon Sep 17 00:00:00 2001
From: Synicix <synicix@gmail.com>
Date: Thu, 10 Jul 2025 11:36:22 +0000
Subject: [PATCH 06/29] Add basic design struct

---
 src/uniffi/pipeline_runner/docker.rs | 75 +++++++++++++++++++++++-----
 tests/pipeline_runner.rs             |  2 +-
 2 files changed, 64 insertions(+), 13 deletions(-)

diff --git a/src/uniffi/pipeline_runner/docker.rs b/src/uniffi/pipeline_runner/docker.rs
index 1b279bb0..2d815726 100644
--- a/src/uniffi/pipeline_runner/docker.rs
+++ b/src/uniffi/pipeline_runner/docker.rs
@@ -11,7 +11,10 @@ use futures_util::stream::FuturesUnordered;
 use itertools::Itertools as _;
 use serde_yaml::Serializer;
 use snafu::OptionExt as _;
-use std::{backtrace::Backtrace, collections::HashMap, path::PathBuf, sync::Arc};
+use std::{
+    backtrace::Backtrace, collections::HashMap, path::PathBuf, sync::Arc, thread::sleep,
+    time::Duration,
+};
 use tokio::{
     sync::{
         RwLock,
@@ -80,7 +83,7 @@ impl DockerPipelineRunner {
 
         // Create the source channel for the pipeline
         // This channel will be used to send inputs to the pipeline
-        let (source_tx, _) = broadcast::channel::<Message>(1);
+        let (source_tx, _) = broadcast::channel::<Message>(128);
 
         // Get reference to the pipeline
         let pipeline = &pipeline_run_arc.pipeline_job.pipeline;
@@ -109,6 +112,8 @@ impl DockerPipelineRunner {
         // Send a message that all job inputs have been sent
         source_tx.send(Message::NodeProcessingComplete("input".to_owned()))?;
 
+        sleep(Duration::from_secs(1)); // Give some time for the tasks to start
+        panic!();
         Ok(pipeline_run)
     }
 
@@ -304,6 +309,7 @@ impl DockerPipelineRunner {
         success_ch_tx: Sender<Message>,
         namespace_lookup: HashMap<String, PathBuf>,
     ) -> Result<()> {
+        println!("Starting node manager for node: {}", node.id);
         // Create a channel to for waiting when the node processing is complete
         let (processing_complete_ch_tx, processing_complete_ch_rx) = oneshot::channel::<()>();
 
@@ -329,6 +335,8 @@ impl DockerPipelineRunner {
             namespace_lookup: namespace_lookup.clone(),
         };
 
+        println!("Setting up node processor for node: {}", node.id);
+
         // Get the kernel for this node and build the correct processor
         match get(
             &pipeline_run.pipeline_job.pipeline.kernel_lut,
@@ -380,7 +388,7 @@ trait NodeProcessor {
         // Start to listen to the channels
         // Listen to the MPSC channel and handle messages
         while let Some(result) = self.get_ch_to_listen_to().next().await {
-            let rx_result = match result {
+            let repeater_result = match result {
                 Ok(rx_result) => rx_result,
                 Err(err) => {
                     // Record into pipeline_error log
@@ -393,9 +401,18 @@ trait NodeProcessor {
                 }
             };
 
-            let Ok(msg) = rx_result else {
-                eprintln!("Failed to receive message from parent channel");
-                continue;
+            let msg = match repeater_result {
+                Ok(msg) => msg,
+                Err(RecvError::Closed) => {
+                    // Channel is closed, we can exit the loop
+                    eprintln!("Channel closed, exiting node processor");
+                    break;
+                }
+                Err(RecvError::Lagged(_)) => {
+                    // Channel lagged, skip this message
+                    eprintln!("Channel lagged, skipping message");
+                    continue;
+                }
             };
 
             // Process the message
@@ -445,11 +462,22 @@ impl PodProcessor {
         success_ch_tx: &Sender<Message>,
     ) -> Result<()> {
         // Process the packet using the pod
+        println!(
+            "Processing packet in pod: {} with node_id: {}",
+            pod.hash, node_id
+        );
 
         // Create the pod_job
         let mut buf = Vec::new();
         let mut serializer = Serializer::new(&mut buf);
-        serialize_hashmap(packet, &mut serializer)?;
+        match serialize_hashmap(packet, &mut serializer) {
+            Ok(_) => {}
+            Err(err) => {
+                println!("Failed to serialize packet: {err}");
+            }
+        }
+
+        println!("managed to serialize packet: {:?}", buf);
         let input_packet_hash = hash_buffer(buf);
         let output_dir = URI {
             namespace: namespace.to_owned(),
@@ -460,7 +488,7 @@ impl PodProcessor {
         let memory_limit = pod.recommended_memory;
 
         // Create the pod job
-        let pod_job = PodJob::new(
+        let pod_job = match PodJob::new(
             None,
             Arc::clone(pod),
             packet.clone(),
@@ -469,7 +497,13 @@ impl PodProcessor {
             memory_limit,
             None,
             namespace_lookup,
-        )?;
+        ) {
+            Ok(job) => job,
+            Err(err) => {
+                println!("Failed to create pod job: {err}");
+                panic!("Failed to create pod job: {err}");
+            }
+        };
 
         // Simulate pod execution by just printing out pod_job_hash and pod hash
         // This will be replaced by sending the pod_job to the orchestrator via the agent
@@ -479,7 +513,12 @@ impl PodProcessor {
         );
 
         // For now we will just send the input_packet to the success channel
-        success_ch_tx.send(Message::NodeOutput(node_id.to_owned(), packet.clone()))?;
+        match success_ch_tx.send(Message::NodeOutput(node_id.to_owned(), packet.clone())) {
+            Ok(_) => {}
+            Err(err) => {
+                println!("Failed to send message to success channel: {err}");
+            }
+        }
 
         Ok(())
     }
@@ -489,6 +528,10 @@ impl NodeProcessor for PodProcessor {
     async fn process_msg(&mut self, msg: Message) -> Result<bool> {
         match msg {
             Message::NodeOutput(sender_node_id, packet) => {
+                println!(
+                    "Node {} received packet: {:?} from {}",
+                    self.node_metadata.node_id, packet, sender_node_id
+                );
                 let pod_ref = Arc::clone(&self.pod);
                 let node_id = self.node_metadata.node_id.clone();
                 let namespace = self.node_metadata.namespace.clone();
@@ -582,13 +625,17 @@ impl NodeProcessor for MapperProcessor {
 
     async fn process_msg(&mut self, msg: Message) -> Result<bool> {
         match msg {
-            Message::NodeOutput(_, hash_map) => {
+            Message::NodeOutput(sender_node_id, packet) => {
+                println!(
+                    "Node {} received packet: {:?} from {}",
+                    self.node_metadata.node_id, packet, sender_node_id
+                );
                 let output_map = self
                     .mapper
                     .mapping
                     .iter()
                     .map(|(input_key, output_key)| {
-                        let input = get(&hash_map, input_key)?.clone();
+                        let input = get(&packet, input_key)?.clone();
                         Ok((output_key.to_owned(), input))
                     })
                     .collect::<Result<HashMap<_, _>>>()?;
@@ -718,6 +765,10 @@ impl NodeProcessor for JoinerProcessor {
     async fn process_msg(&mut self, msg: Message) -> Result<bool> {
         match msg {
             Message::NodeOutput(sender_node_id, packet) => {
+                println!(
+                    "Node {} received packet: {:?} from {}",
+                    self.node_metadata.node_id, packet, sender_node_id
+                );
                 // Process the packet and send the output to the success channel
                 self.process_packet(&sender_node_id, packet)?;
             }
diff --git a/tests/pipeline_runner.rs b/tests/pipeline_runner.rs
index a2889480..a349eb24 100644
--- a/tests/pipeline_runner.rs
+++ b/tests/pipeline_runner.rs
@@ -12,7 +12,7 @@ use orcapod::uniffi::{error::Result, pipeline_runner::docker::DockerPipelineRunn
 use crate::fixture::TestDirs;
 use fixture::pipeline_job;
 
-#[tokio::test]
+#[tokio::test(flavor = "multi_thread", worker_threads = 16)]
 async fn basic_run() -> Result<()> {
     let pipeline_job = pipeline_job()?;
 

From c45df99cf4f18440dd38458a3837980946a17673 Mon Sep 17 00:00:00 2001
From: Synicix <synicix@gmail.com>
Date: Thu, 10 Jul 2025 16:21:21 +0000
Subject: [PATCH 07/29] Replace broadcast arch with MPSC

---
 src/core/error.rs                    |   2 +-
 src/uniffi/pipeline.rs               |  13 +
 src/uniffi/pipeline_runner/docker.rs | 495 ++++++++++-----------------
 3 files changed, 186 insertions(+), 324 deletions(-)

diff --git a/src/core/error.rs b/src/core/error.rs
index 06e3adef..d2404663 100644
--- a/src/core/error.rs
+++ b/src/core/error.rs
@@ -13,7 +13,7 @@ use std::{
     path::{self},
 };
 use tokio::{
-    sync::{broadcast::error::SendError, oneshot},
+    sync::{mpsc::error::SendError, oneshot},
     task::JoinError,
 };
 
diff --git a/src/uniffi/pipeline.rs b/src/uniffi/pipeline.rs
index 27d84dbd..2bb205a7 100644
--- a/src/uniffi/pipeline.rs
+++ b/src/uniffi/pipeline.rs
@@ -333,6 +333,19 @@ impl Pipeline {
                 .map(move |parent_idx| &self.graph[parent_idx])
         })
     }
+
+    pub fn get_children_for_node(&self, node: &Node) -> impl Iterator<Item = &Node> {
+        // Find the NodeIndex for the given node_key
+        let node_index = self
+            .graph
+            .node_indices()
+            .find(|&idx| self.graph[idx] == *node);
+        node_index.into_iter().flat_map(move |idx| {
+            self.graph
+                .neighbors_directed(idx, Outgoing)
+                .map(move |child_idx| &self.graph[child_idx])
+        })
+    }
 }
 
 #[derive(uniffi::Object, Display, Debug, Clone, Serialize)]
diff --git a/src/uniffi/pipeline_runner/docker.rs b/src/uniffi/pipeline_runner/docker.rs
index 2d815726..461edc8c 100644
--- a/src/uniffi/pipeline_runner/docker.rs
+++ b/src/uniffi/pipeline_runner/docker.rs
@@ -2,28 +2,20 @@ use super::PipelineRun;
 use crate::{
     core::{crypto::hash_buffer, model::serialize_hashmap, util::get},
     uniffi::{
-        error::{Kind, OrcaError, Result, selector},
+        error::{OrcaError, Result, selector},
         model::{PathSet, Pod, PodJob, URI},
         pipeline::{Kernel, Mapper, Node, PipelineJob, PipelineResult},
     },
 };
-use futures_util::stream::FuturesUnordered;
+use futures_util::future::try_join_all;
 use itertools::Itertools as _;
 use serde_yaml::Serializer;
 use snafu::OptionExt as _;
-use std::{
-    backtrace::Backtrace, collections::HashMap, path::PathBuf, sync::Arc, thread::sleep,
-    time::Duration,
-};
+use std::{collections::HashMap, path::PathBuf, sync::Arc, thread::sleep, time::Duration};
 use tokio::{
-    sync::{
-        RwLock,
-        broadcast::{self, Receiver, Sender, error::RecvError},
-        oneshot,
-    },
-    task::{JoinHandle, JoinSet},
+    sync::{RwLock, mpsc},
+    task::{JoinSet, spawn_blocking},
 };
-use tokio_stream::StreamExt as _;
 
 #[derive(Clone, Debug)]
 pub(crate) enum Message {
@@ -40,8 +32,7 @@ pub(crate) enum Message {
 )]
 struct PipelineRunInfo {
     node_task_join_set: JoinSet<Result<()>>, // Join set to track the tasks for this pipeline run
-    job_manager_ch_tx: Sender<Message>,
-    node_tx: HashMap<String, Sender<Message>>,
+    node_tx: HashMap<String, mpsc::Sender<Message>>,
     outputs: Arc<RwLock<HashMap<String, Vec<HashMap<String, PathSet>>>>>, // String is the node key, while hash
 }
 
@@ -61,7 +52,7 @@ impl DockerPipelineRunner {
     ///
     /// # Errors
     /// Will error out if the pipeline job fails to start
-    pub fn start(
+    pub async fn start(
         &mut self,
         pipeline_job: PipelineJob,
         namespace_lookup: &HashMap<String, PathBuf>,
@@ -74,45 +65,40 @@ impl DockerPipelineRunner {
         self.pipeline_runs.insert(
             (*pipeline_run_arc).clone(),
             PipelineRunInfo {
-                job_manager_ch_tx: broadcast::channel::<Message>(1).0,
                 node_tx: HashMap::new(),
                 node_task_join_set: JoinSet::new(),
                 outputs: Arc::new(RwLock::new(HashMap::new())),
             },
         );
 
-        // Create the source channel for the pipeline
-        // This channel will be used to send inputs to the pipeline
-        let (source_tx, _) = broadcast::channel::<Message>(128);
-
         // Get reference to the pipeline
         let pipeline = &pipeline_run_arc.pipeline_job.pipeline;
 
-        // Get all the leaf nodes and call the create_task_for_node function for each leaf node
+        // Get all the root nodes and call the create_task_for_node function for each root node
         // This will recursively create all the tasks and channels for the pipeline
-        pipeline.get_leaf_nodes().try_for_each(|node| {
-            self.create_task_for_node(node, &pipeline_run_arc, &source_tx, namespace_lookup)?;
-
-            // Since we don't have output nodes implemented, and currently it is set as leaf nodes,
-            // we can do the output handling logic here too
-
-            Ok::<(), OrcaError>(())
-        })?;
+        let root_nodes_tx = pipeline
+            .get_root_nodes()
+            .map(|node| self.create_task_for_node(node, &pipeline_run_arc, namespace_lookup))
+            .collect::<Result<Vec<_>>>()?;
 
         // All pipeline tasks have been created, now we need to feed the inputs to the pipeline
-        pipeline_run
-            .pipeline_job
-            .input_packets
-            .iter()
-            .try_for_each(|input_map| {
-                source_tx.send(Message::NodeOutput("input".to_owned(), input_map.clone()))?;
-                Ok::<(), OrcaError>(())
-            })?;
+        for tx in &root_nodes_tx {
+            for input_packet in &pipeline_run.pipeline_job.input_packets {
+                tx.send(Message::NodeOutput(
+                    "input".to_owned(),
+                    input_packet.clone(),
+                ))
+                .await?;
+            }
+        }
 
         // Send a message that all job inputs have been sent
-        source_tx.send(Message::NodeProcessingComplete("input".to_owned()))?;
+        for tx in &root_nodes_tx {
+            tx.send(Message::NodeProcessingComplete("input".to_owned()))
+                .await?;
+        }
 
-        sleep(Duration::from_secs(1)); // Give some time for the tasks to start
+        sleep(Duration::from_secs(5)); // Give some time for the tasks to start
         panic!();
         Ok(pipeline_run)
     }
@@ -154,55 +140,40 @@ impl DockerPipelineRunner {
         &mut self,
         node: &Node,
         pipeline_run: &Arc<PipelineRun>,
-        source_tx: &Sender<Message>,
         namespace_lookup: &HashMap<String, PathBuf>,
-    ) -> Result<Sender<Message>> {
+    ) -> Result<mpsc::Sender<Message>> {
         println!("Creating task for node: {}", node.id);
-        // Get the input channels for this node which should be it's parents
-        let mut input_ch_rxs = pipeline_run
-            .pipeline_job
-            .pipeline
-            .get_parents_for_node(node)
-            .map(|parent_node| {
-                // Check if it exists in the pipeline_runs hashmap
-                match get(&self.pipeline_runs, pipeline_run)?
-                    .node_tx
-                    .get(&parent_node.id)
-                {
-                    Some(rx) => Ok(rx.subscribe()),
-                    None => {
-                        // Missing parent node, thus call create_task for the parent node parent node first
-                        Ok(self
-                            .create_task_for_node(
-                                parent_node,
-                                pipeline_run,
-                                source_tx,
-                                namespace_lookup,
-                            )?
-                            .subscribe())
-                    }
-                }
-            })
-            .collect::<Result<Vec<_>>>()?;
+        // Create a channel for the node
+        // This channel will be used to send messages to the node processor
+        let (tx, rx) = mpsc::channel::<Message>(128);
+
+        // Use closer to limit the scope of the borrow
+        {
+            let pipeline_info =
+                self.pipeline_runs
+                    .get_mut(pipeline_run)
+                    .context(selector::KeyMissing {
+                        key: pipeline_run.to_string(),
+                    })?;
+            // Check if the node is already inside the node_tx
+            if pipeline_info.node_tx.contains_key(&node.id) {
+                // Node already exists, thus we can return the existing tx
+                return Ok(pipeline_info.node_tx.get(&node.id).unwrap().clone());
+            }
 
-        // Check if input_ch_rxs is empty, meaning this node has no parents and is a root node
-        // In this case, we will use the source channel as the input channel
-        // TODO: This will be replaced by input_node logic once that is merged
-        if input_ch_rxs.is_empty() {
-            // No parents, thus this is root node
-            // The parent rx will be the source channel rx
-            input_ch_rxs.push(source_tx.subscribe());
+            // Record the tx into the pipeline_info tx_hashmap
+            pipeline_info.node_tx.insert(node.id.clone(), tx.clone());
         }
 
-        // Get the job manager ch and subscribe to it (mainly for receiving shutdown signal)
-        let job_manager_ch_rx = get(&self.pipeline_runs, pipeline_run)?
-            .job_manager_ch_tx
-            .subscribe();
-
-        // Create the output_channel for this node
-        let (tx, _) = broadcast::channel::<Message>(128);
+        // Call this function for each of the child nodes to get their Sender_tx
+        let children_node_tx = pipeline_run
+            .pipeline_job
+            .pipeline
+            .get_children_for_node(node)
+            .map(|child_node| self.create_task_for_node(child_node, pipeline_run, namespace_lookup))
+            .collect::<Result<Vec<_>>>()?;
 
-        // Spawn the node_manager for this node
+        // Start the task_manager
         self.pipeline_runs
             .get_mut(pipeline_run)
             .context(selector::KeyMissing {
@@ -212,77 +183,26 @@ impl DockerPipelineRunner {
             .spawn(Self::start_node_manager(
                 node.clone(),
                 Arc::clone(pipeline_run),
-                input_ch_rxs,
-                job_manager_ch_rx,
-                tx.clone(),
+                rx,
+                children_node_tx,
                 namespace_lookup.clone(),
             ));
 
-        // Insert it into the the tx into the pipeline_runs hashmap
-        self.pipeline_runs
-            .get_mut(pipeline_run)
-            .context(selector::KeyMissing {
-                key: pipeline_run.to_string(),
-            })?
-            .node_tx
-            .insert(node.id.clone(), tx.clone());
-
-        // Return tx
         Ok(tx)
     }
 
-    fn create_task_to_capture_output_of_node(
-        &mut self,
-        node: &Node,
-        pipeline_run: &Arc<PipelineRun>,
-    ) -> Result<()> {
-        let pipeline_run_info =
-            self.pipeline_runs
-                .get_mut(pipeline_run)
-                .context(selector::KeyMissing {
-                    key: pipeline_run.to_string(),
-                })?;
-        // Get the output ch rx for the node
-        let node_rx = get(&pipeline_run_info.node_tx, &node.id)?.subscribe();
-        // Create a new ref copy of pipeline_run_output
-        let outputs_ref = Arc::clone(&pipeline_run_info.outputs);
-        // Create a task to listen to it and record the outputs
-        pipeline_run_info
-            .node_task_join_set
-            .spawn(Self::capture_node_output(node_rx, outputs_ref));
-
-        Ok(())
-    }
-
     #[expect(
         clippy::type_complexity,
         reason = "too complex, but necessary for async handling"
     )]
     async fn capture_node_output(
-        mut node_rx: Receiver<Message>,
+        mut output_rx: mpsc::Receiver<Message>,
         outputs_ref: Arc<RwLock<HashMap<String, Vec<HashMap<String, PathSet>>>>>,
     ) -> Result<()> {
-        loop {
-            let message = match node_rx.recv().await {
-                Ok(message) => message,
-                Err(err) => {
-                    match err {
-                        RecvError::Closed => {
-                            // No more message will be received, thus we can exit the loop
-                            // Only case where this will occur is when the channel is closed due to abort
-                            break;
-                        }
-                        RecvError::Lagged(_) => {
-                            print!("Warning: Channel lagged, skipping message");
-                        }
-                    }
-                    continue;
-                }
-            };
-            match message {
+        while let Some(msg) = output_rx.recv().await {
+            match msg {
                 Message::NodeOutput(node_id, hash_map) => {
                     // Record the output
-
                     outputs_ref
                         .write()
                         .await
@@ -296,7 +216,6 @@ impl DockerPipelineRunner {
                 }
             }
         }
-
         Ok(())
     }
 
@@ -304,33 +223,16 @@ impl DockerPipelineRunner {
     async fn start_node_manager(
         node: Node,
         pipeline_run: Arc<PipelineRun>,
-        parent_channel_rxs: Vec<Receiver<Message>>,
-        mut job_manager_channel: Receiver<Message>,
-        success_ch_tx: Sender<Message>,
+        node_rx: mpsc::Receiver<Message>,
+        success_chs_tx: Vec<mpsc::Sender<Message>>,
         namespace_lookup: HashMap<String, PathBuf>,
     ) -> Result<()> {
         println!("Starting node manager for node: {}", node.id);
-        // Create a channel to for waiting when the node processing is complete
-        let (processing_complete_ch_tx, processing_complete_ch_rx) = oneshot::channel::<()>();
-
-        // Create a futures unordered set to dynamically listen to N number of receivers
-        let chs_to_listen_to = FuturesUnordered::new();
-
-        // Add all the parent channel receivers to the futures unordered set
-        for mut rx in parent_channel_rxs {
-            chs_to_listen_to.push(tokio::spawn(async move { rx.recv().await }));
-        }
-
-        // Add the job manager channel to the futures unordered set
-        chs_to_listen_to.push(tokio::spawn(
-            async move { job_manager_channel.recv().await },
-        ));
-
         // Create a metadata struct for this node
         let node_metadata = NodeMetaData {
             node_id: node.id.clone(),
-            ch_to_listen_to: chs_to_listen_to,
-            success_ch_tx: success_ch_tx.clone(),
+            node_rx,
+            child_nodes_txs: success_chs_tx.clone(),
             namespace: pipeline_run.pipeline_job.output_dir.namespace.clone(),
             namespace_lookup: namespace_lookup.clone(),
         };
@@ -344,13 +246,11 @@ impl DockerPipelineRunner {
         )? {
             Kernel::Pod(pod) => {
                 let mut processor = PodProcessor::new(Arc::clone(pod), node_metadata);
-                processor.start(processing_complete_ch_tx).await?;
-                processing_complete_ch_rx.await?;
+                processor.start().await;
             }
             Kernel::Mapper(mapper) => {
                 let mut processor = MapperProcessor::new(Arc::clone(mapper), node_metadata);
-                processor.start(processing_complete_ch_tx).await?;
-                processing_complete_ch_rx.await?;
+                processor.start().await;
             }
             Kernel::Joiner => {
                 let parent_nodes_id = pipeline_run
@@ -360,82 +260,49 @@ impl DockerPipelineRunner {
                     .map(|parent_node| parent_node.id.clone())
                     .collect::<Vec<_>>();
                 let mut processor = JoinerProcessor::new(parent_nodes_id, node_metadata);
-                processor.start(processing_complete_ch_tx).await?;
-                processing_complete_ch_rx.await?;
+                processor.start().await;
             }
         }
 
+        // Notify that node is finish processing
+        println!("Node {} processing complete", node.id);
+        for success_ch_tx in &success_chs_tx {
+            success_ch_tx
+                .send(Message::NodeProcessingComplete(node.id.clone()))
+                .await?;
+        }
+
         Ok(())
     }
 }
 
 struct NodeMetaData {
     node_id: String,
-    ch_to_listen_to: FuturesUnordered<JoinHandle<Result<Message, RecvError>>>,
-    success_ch_tx: Sender<Message>, // Channel to send successful outputs to the next node
+    node_rx: mpsc::Receiver<Message>, // Channel to listen to messages from parent nodes
+    child_nodes_txs: Vec<mpsc::Sender<Message>>, // Channel to send successful outputs to the next node
     namespace: String,
     namespace_lookup: HashMap<String, PathBuf>, // Copy of the look up table
 }
 
 trait NodeProcessor {
-    fn get_ch_to_listen_to(
-        &mut self,
-    ) -> &mut FuturesUnordered<JoinHandle<Result<Message, RecvError>>>;
-
-    async fn wait_for_node_task_completion(&mut self);
+    fn get_node_rx(&mut self) -> &mut mpsc::Receiver<Message>;
 
-    async fn start(&mut self, process_complete_ch_tx: oneshot::Sender<()>) -> Result<()> {
+    async fn start(&mut self) {
         // Start to listen to the channels
         // Listen to the MPSC channel and handle messages
-        while let Some(result) = self.get_ch_to_listen_to().next().await {
-            let repeater_result = match result {
-                Ok(rx_result) => rx_result,
-                Err(err) => {
-                    // Record into pipeline_error log
-                    if err.is_panic() {
-                        eprintln!("Task panicked: {err}");
-                    } else {
-                        eprintln!("Error receiving message: {err}");
-                    }
-                    continue;
-                }
-            };
-
-            let msg = match repeater_result {
-                Ok(msg) => msg,
-                Err(RecvError::Closed) => {
-                    // Channel is closed, we can exit the loop
-                    eprintln!("Channel closed, exiting node processor");
-                    break;
-                }
-                Err(RecvError::Lagged(_)) => {
-                    // Channel lagged, skip this message
-                    eprintln!("Channel lagged, skipping message");
-                    continue;
-                }
-            };
-
-            // Process the message
-            if self.process_msg(msg).await? {
+        while let Some(msg) = self.get_node_rx().recv().await {
+            if self.process_msg(msg).await {
                 // If the message indicates that processing is complete, we can exit the loop
-                // Wait for all processing tasks to complete before sending the completion message
-
+                // Wait for all processing tasks to complete before returning
                 self.wait_for_node_task_completion().await;
-
-                // Send the node processing complete message
-                process_complete_ch_tx.send(()).map_err(|()| OrcaError {
-                    kind: Kind::ReceiverDroppedBeforeSender {
-                        backtrace: Some(Backtrace::capture()),
-                    },
-                })?;
                 break;
             }
         }
-
-        Ok(())
     }
 
-    async fn process_msg(&mut self, msg: Message) -> Result<bool>;
+    async fn process_msg(&mut self, msg: Message) -> bool;
+
+    async fn wait_for_node_task_completion(&mut self);
 }
 
 struct PodProcessor {
@@ -453,32 +320,28 @@ impl PodProcessor {
         }
     }
 
-    fn process_packet(
-        node_id: &str,
-        pod: &Arc<Pod>,
-        namespace: &str,
-        namespace_lookup: &HashMap<String, PathBuf>,
-        packet: &HashMap<String, PathSet>,
-        success_ch_tx: &Sender<Message>,
+    async fn process_packet(
+        node_id: String,
+        pod: Arc<Pod>,
+        namespace: String,
+        namespace_lookup: HashMap<String, PathBuf>,
+        packet: HashMap<String, PathSet>,
+        success_chs_tx: Vec<mpsc::Sender<Message>>,
     ) -> Result<()> {
         // Process the packet using the pod
-        println!(
-            "Processing packet in pod: {} with node_id: {}",
-            pod.hash, node_id
-        );
-
         // Create the pod_job
-        let mut buf = Vec::new();
-        let mut serializer = Serializer::new(&mut buf);
-        match serialize_hashmap(packet, &mut serializer) {
-            Ok(_) => {}
-            Err(err) => {
-                println!("Failed to serialize packet: {err}");
-            }
-        }
 
-        println!("managed to serialize packet: {:?}", buf);
-        let input_packet_hash = hash_buffer(buf);
+        // We need a unique hash for this given input packet process by the node
+        // therefore we need to generate a hash that has the pod_id + input_packet
+        let node_id_bytes = node_id.as_bytes().to_vec();
+        let packet_copy = packet.clone();
+        let input_packet_hash = spawn_blocking(move || {
+            let mut buf = node_id_bytes;
+            let mut serializer = Serializer::new(&mut buf);
+            serialize_hashmap(&packet_copy, &mut serializer)?;
+            Ok::<_, OrcaError>(hash_buffer(buf))
+        })
+        .await??;
         let output_dir = URI {
             namespace: namespace.to_owned(),
             path: PathBuf::from(format!("pod_runs/{}/{}", pod.hash, input_packet_hash)),
@@ -488,22 +351,16 @@ impl PodProcessor {
         let memory_limit = pod.recommended_memory;
 
         // Create the pod job
-        let pod_job = match PodJob::new(
+        let pod_job = PodJob::new(
             None,
-            Arc::clone(pod),
+            Arc::clone(&pod),
             packet.clone(),
             output_dir,
             cpu_limit,
             memory_limit,
             None,
-            namespace_lookup,
-        ) {
-            Ok(job) => job,
-            Err(err) => {
-                println!("Failed to create pod job: {err}");
-                panic!("Failed to create pod job: {err}");
-            }
-        };
+            &namespace_lookup,
+        )?;
 
         // Simulate pod execution by just printing out pod_job_hash and pod hash
         // This will be replaced by sending the pod_job to the orchestrator via the agent
@@ -513,19 +370,21 @@ impl PodProcessor {
         );
 
         // For now we will just send the input_packet to the success channel
-        match success_ch_tx.send(Message::NodeOutput(node_id.to_owned(), packet.clone())) {
-            Ok(_) => {}
-            Err(err) => {
-                println!("Failed to send message to success channel: {err}");
-            }
-        }
+        try_join_all(success_chs_tx.iter().map(|success_ch_tx| {
+            success_ch_tx.send(Message::NodeOutput(node_id.to_owned(), packet.clone()))
+        }))
+        .await?;
 
         Ok(())
     }
 }
 
 impl NodeProcessor for PodProcessor {
-    async fn process_msg(&mut self, msg: Message) -> Result<bool> {
+    fn get_node_rx(&mut self) -> &mut mpsc::Receiver<Message> {
+        &mut self.node_metadata.node_rx
+    }
+
+    async fn process_msg(&mut self, msg: Message) -> bool {
         match msg {
             Message::NodeOutput(sender_node_id, packet) => {
                 println!(
@@ -536,39 +395,33 @@ impl NodeProcessor for PodProcessor {
                 let node_id = self.node_metadata.node_id.clone();
                 let namespace = self.node_metadata.namespace.clone();
                 let namespace_lookup = self.node_metadata.namespace_lookup.clone();
-                let success_ch_tx = self.node_metadata.success_ch_tx.clone();
+                let child_nodes_txs = self.node_metadata.child_nodes_txs.clone();
                 // Forward it into a processing task
-                self.processing_tasks.spawn(async move {
-                    Self::process_packet(
-                        &node_id,
-                        &pod_ref,
-                        &namespace,
-                        &namespace_lookup,
-                        &packet,
-                        &success_ch_tx,
-                    )
-                });
+                self.processing_tasks.spawn(Self::process_packet(
+                    node_id,
+                    pod_ref,
+                    namespace,
+                    namespace_lookup,
+                    packet,
+                    child_nodes_txs,
+                ));
             }
             Message::Stop => {
                 // Stop message received, we will stop processing
                 self.processing_tasks.abort_all();
-                return Ok(true);
+                return true;
             }
             Message::NodeProcessingComplete(_) => {
+                println!("Node processing complete");
                 // Since pod only have one parent, we can expect that there will be no more incoming packet
                 // thus, we need to wait for everything to finish processing and send completion message
                 // Return true to notify caller that processing is complete
                 self.wait_for_node_task_completion().await;
-                return Ok(true);
+                return true;
             }
         }
-        Ok(false)
-    }
-
-    fn get_ch_to_listen_to(
-        &mut self,
-    ) -> &mut FuturesUnordered<JoinHandle<Result<Message, RecvError>>> {
-        &mut self.node_metadata.ch_to_listen_to
+        println!("returning false");
+        false
     }
 
     async fn wait_for_node_task_completion(&mut self) {
@@ -591,7 +444,7 @@ impl MapperProcessor {
         }
     }
 
-    fn process_packet(&self, packet: &HashMap<String, PathSet>) -> Result<()> {
+    async fn process_packet(&self, packet: &HashMap<String, PathSet>) -> Result<()> {
         // Apply the mapping to the input packet
         let output_map = self
             .mapper
@@ -604,53 +457,42 @@ impl MapperProcessor {
             .collect::<Result<HashMap<_, _>>>()?;
 
         // Send the output via the channel
-        self.node_metadata.success_ch_tx.send(Message::NodeOutput(
-            self.node_metadata.node_id.clone(),
-            output_map,
-        ))?;
+        try_join_all(self.node_metadata.child_nodes_txs.iter().map(|ch| {
+            ch.send(Message::NodeOutput(
+                self.node_metadata.node_id.clone(),
+                output_map.clone(),
+            ))
+        }))
+        .await?;
         Ok(())
     }
 }
 
 impl NodeProcessor for MapperProcessor {
-    fn get_ch_to_listen_to(
-        &mut self,
-    ) -> &mut FuturesUnordered<JoinHandle<Result<Message, RecvError>>> {
-        &mut self.node_metadata.ch_to_listen_to
+    fn get_node_rx(&mut self) -> &mut mpsc::Receiver<Message> {
+        &mut self.node_metadata.node_rx
     }
 
     async fn wait_for_node_task_completion(&mut self) {
         // Mapper doesn't spawn additional tasks, so this is a no-op
     }
 
-    async fn process_msg(&mut self, msg: Message) -> Result<bool> {
+    async fn process_msg(&mut self, msg: Message) -> bool {
         match msg {
-            Message::NodeOutput(sender_node_id, packet) => {
-                println!(
-                    "Node {} received packet: {:?} from {}",
-                    self.node_metadata.node_id, packet, sender_node_id
-                );
-                let output_map = self
-                    .mapper
-                    .mapping
-                    .iter()
-                    .map(|(input_key, output_key)| {
-                        let input = get(&packet, input_key)?.clone();
-                        Ok((output_key.to_owned(), input))
-                    })
-                    .collect::<Result<HashMap<_, _>>>()?;
-
-                // For now we will just send the input_packet to the success channel
-                self.node_metadata.success_ch_tx.send(Message::NodeOutput(
-                    self.node_metadata.node_id.clone(),
-                    output_map,
-                ))?;
+            Message::NodeOutput(_, packet) => {
+                match self.process_packet(&packet).await {
+                    Ok(_) => {}
+                    Err(err) => {
+                        // Send the error to the failure channel
+                        // For now just print it out
+                        println!("Failed to process packet with error: {}", err);
+                    }
+                }
             }
-            Message::NodeProcessingComplete(_) => return Ok(true),
-            Message::Stop => todo!(),
+            Message::NodeProcessingComplete(_) | Message::Stop => return true,
         }
 
-        Ok(false)
+        false
     }
 }
 
@@ -709,7 +551,7 @@ impl JoinerProcessor {
         Ok(result)
     }
 
-    fn process_packet(
+    async fn process_packet(
         &mut self,
         sender_node_id: &str,
         packet: HashMap<String, PathSet>,
@@ -734,10 +576,13 @@ impl JoinerProcessor {
             Ok(output_packets) => {
                 // Send the output packets to the success channel
                 for output_packet in output_packets {
-                    self.node_metadata.success_ch_tx.send(Message::NodeOutput(
-                        self.node_metadata.node_id.clone(),
-                        output_packet,
-                    ))?;
+                    try_join_all(self.node_metadata.child_nodes_txs.iter().map(|ch| {
+                        ch.send(Message::NodeOutput(
+                            self.node_metadata.node_id.clone(),
+                            output_packet.clone(),
+                        ))
+                    }))
+                    .await?;
                 }
             }
             Err(err) => {
@@ -752,17 +597,15 @@ impl JoinerProcessor {
 }
 
 impl NodeProcessor for JoinerProcessor {
-    fn get_ch_to_listen_to(
-        &mut self,
-    ) -> &mut FuturesUnordered<JoinHandle<Result<Message, RecvError>>> {
-        &mut self.node_metadata.ch_to_listen_to
+    fn get_node_rx(&mut self) -> &mut mpsc::Receiver<Message> {
+        &mut self.node_metadata.node_rx
     }
 
     async fn wait_for_node_task_completion(&mut self) {
         // Joiner doesn't spawn additional tasks, so this is a no-op
     }
 
-    async fn process_msg(&mut self, msg: Message) -> Result<bool> {
+    async fn process_msg(&mut self, msg: Message) -> bool {
         match msg {
             Message::NodeOutput(sender_node_id, packet) => {
                 println!(
@@ -770,7 +613,13 @@ impl NodeProcessor for JoinerProcessor {
                     self.node_metadata.node_id, packet, sender_node_id
                 );
                 // Process the packet and send the output to the success channel
-                self.process_packet(&sender_node_id, packet)?;
+                match self.process_packet(&sender_node_id, packet).await {
+                    Ok(_) => {}
+                    Err(err) => {
+                        // Send the error to the failure channel
+                        println!("Failed to process packet with error: {}", err);
+                    }
+                }
             }
             Message::NodeProcessingComplete(sender_node_id) => {
                 // Record that this parent node has completed processing
@@ -780,15 +629,15 @@ impl NodeProcessor for JoinerProcessor {
                 if self.completed_parents.len() == self.input_packet_cache.len() {
                     // All parents have completed processing, we can send the output
                     // Wait for all packets to be processed and send the output
-                    return Ok(true);
+                    return true;
                 }
             }
             Message::Stop => {
                 // We don't have anything to clean up, so we can just return
-                return Ok(true);
+                return true;
             }
         }
 
-        Ok(false)
+        false
     }
 }

From 415eb054a800b7772cd535e7a738a63f983241f1 Mon Sep 17 00:00:00 2001
From: Synicix <synicix@gmail.com>
Date: Thu, 10 Jul 2025 16:46:01 +0000
Subject: [PATCH 08/29] Added output handling

---
 src/uniffi/pipeline_runner/docker.rs | 75 ++++++++++++++++++++++++----
 tests/pipeline_runner.rs             |  2 +-
 2 files changed, 66 insertions(+), 11 deletions(-)

diff --git a/src/uniffi/pipeline_runner/docker.rs b/src/uniffi/pipeline_runner/docker.rs
index 461edc8c..2f6cbf3f 100644
--- a/src/uniffi/pipeline_runner/docker.rs
+++ b/src/uniffi/pipeline_runner/docker.rs
@@ -11,8 +11,15 @@ use futures_util::future::try_join_all;
 use itertools::Itertools as _;
 use serde_yaml::Serializer;
 use snafu::OptionExt as _;
-use std::{collections::HashMap, path::PathBuf, sync::Arc, thread::sleep, time::Duration};
+use std::{
+    collections::{HashMap, HashSet},
+    path::PathBuf,
+    sync::Arc,
+    thread::sleep,
+    time::Duration,
+};
 use tokio::{
+    net::unix::pipe,
     sync::{RwLock, mpsc},
     task::{JoinSet, spawn_blocking},
 };
@@ -74,6 +81,55 @@ impl DockerPipelineRunner {
         // Get reference to the pipeline
         let pipeline = &pipeline_run_arc.pipeline_job.pipeline;
 
+        // Create the output channel to capture the outputs of the outputs nodes (Currently only leaf nodes)
+        let (output_tx, mut output_rx) = mpsc::channel::<Message>(128); // Channel to capture outputs from nodes
+
+        // Get the output_nodes (leaf nodes for now) so the output task can keep track when parents are done
+        let output_nodes_ids = pipeline
+            .get_leaf_nodes()
+            .map(|node| node.id.clone())
+            .collect::<HashSet<_>>();
+        let outputs = get(&self.pipeline_runs, &pipeline_run_arc)?.outputs.clone();
+
+        // Create the task that captures the output from the nodes and stores them in the outputs map
+        self.pipeline_runs
+            .get_mut(&pipeline_run_arc)
+            .context(selector::KeyMissing {
+                key: pipeline_run_arc.to_string(),
+            })?
+            .node_task_join_set
+            .spawn(async move {
+                let mut complete_parent_nodes = HashSet::new();
+                while let Some(message) = output_rx.recv().await {
+                    match message {
+                        Message::NodeOutput(sender_node_id, hash_map) => {
+                            // Store the output in the outputs map
+                            outputs
+                                .write()
+                                .await
+                                .entry(sender_node_id)
+                                .or_default()
+                                .push(hash_map);
+                        }
+                        Message::NodeProcessingComplete(sender_node_id) => {
+                            // Add the sender node id to the complete parent nodes
+                            complete_parent_nodes.insert(sender_node_id.clone());
+
+                            // Check if all parent nodes are complete
+                            if complete_parent_nodes.is_superset(&output_nodes_ids) {
+                                // All parents are complete, we can exit this task
+                                println!(
+                                    "All parent nodes are complete, stopping output capture task."
+                                );
+                                return Ok(());
+                            }
+                        }
+                        Message::Stop => todo!(),
+                    }
+                }
+                Ok(())
+            });
+
         // Get all the root nodes and call the create_task_for_node function for each root node
         // This will recursively create all the tasks and channels for the pipeline
         let root_nodes_tx = pipeline
@@ -158,12 +214,12 @@ impl DockerPipelineRunner {
             // Check if the node is already inside the node_tx
             if pipeline_info.node_tx.contains_key(&node.id) {
                 // Node already exists, thus we can return the existing tx
-                return Ok(pipeline_info.node_tx.get(&node.id).unwrap().clone());
+                return Ok(get(&pipeline_info.node_tx, &node.id)?.clone());
             }
 
             // Record the tx into the pipeline_info tx_hashmap
-            pipeline_info.node_tx.insert(node.id.clone(), tx.clone());
-        }
+            pipeline_info.node_tx.insert(node.id.clone(), tx.clone())
+        };
 
         // Call this function for each of the child nodes to get their Sender_tx
         let children_node_tx = pipeline_run
@@ -264,8 +320,7 @@ impl DockerPipelineRunner {
             }
         }
 
-        // Notify that node is finish processing
-        println!("Node {} processing complete", node.id);
+        // Since all inputs are sent, we can send a message that the "input node" processing is complete
         for success_ch_tx in &success_chs_tx {
             success_ch_tx
                 .send(Message::NodeProcessingComplete(node.id.clone()))
@@ -343,7 +398,7 @@ impl PodProcessor {
         })
         .await??;
         let output_dir = URI {
-            namespace: namespace.to_owned(),
+            namespace: namespace.clone(),
             path: PathBuf::from(format!("pod_runs/{}/{}", pod.hash, input_packet_hash)),
         };
 
@@ -371,7 +426,7 @@ impl PodProcessor {
 
         // For now we will just send the input_packet to the success channel
         try_join_all(success_chs_tx.iter().map(|success_ch_tx| {
-            success_ch_tx.send(Message::NodeOutput(node_id.to_owned(), packet.clone()))
+            success_ch_tx.send(Message::NodeOutput(node_id.clone(), packet.clone()))
         }))
         .await?;
 
@@ -481,7 +536,7 @@ impl NodeProcessor for MapperProcessor {
         match msg {
             Message::NodeOutput(_, packet) => {
                 match self.process_packet(&packet).await {
-                    Ok(_) => {}
+                    Ok(()) => {}
                     Err(err) => {
                         // Send the error to the failure channel
                         // For now just print it out
@@ -614,7 +669,7 @@ impl NodeProcessor for JoinerProcessor {
                 );
                 // Process the packet and send the output to the success channel
                 match self.process_packet(&sender_node_id, packet).await {
-                    Ok(_) => {}
+                    Ok(()) => {}
                     Err(err) => {
                         // Send the error to the failure channel
                         println!("Failed to process packet with error: {}", err);
diff --git a/tests/pipeline_runner.rs b/tests/pipeline_runner.rs
index a349eb24..920e95da 100644
--- a/tests/pipeline_runner.rs
+++ b/tests/pipeline_runner.rs
@@ -25,7 +25,7 @@ async fn basic_run() -> Result<()> {
     )]))?;
     let namespace_lookup = test_dirs.namespace_lookup();
 
-    let pipeline_run = runner.start(pipeline_job, &namespace_lookup)?;
+    let pipeline_run = runner.start(pipeline_job, &namespace_lookup).await?;
 
     // Wait for the pipeline run to complete
     let result = runner.get_result(&pipeline_run).await?;

From fb8ace1dec0f03a6f1d31a2f961a0adcd97066ac Mon Sep 17 00:00:00 2001
From: Synicix <synicix@gmail.com>
Date: Thu, 10 Jul 2025 17:01:58 +0000
Subject: [PATCH 09/29] Fix bugs and clippy suggestions

---
 src/core/error.rs                             |  5 +-
 src/uniffi/error.rs                           |  4 -
 src/uniffi/pipeline.rs                        |  8 +-
 src/uniffi/pipeline_runner/mod.rs             |  2 +-
 .../pipeline_runner/{docker.rs => runner.rs}  | 75 +++++++------------
 tests/pipeline_runner.rs                      |  4 +-
 6 files changed, 37 insertions(+), 61 deletions(-)
 rename src/uniffi/pipeline_runner/{docker.rs => runner.rs} (92%)

diff --git a/src/core/error.rs b/src/core/error.rs
index d2404663..614cd593 100644
--- a/src/core/error.rs
+++ b/src/core/error.rs
@@ -1,6 +1,6 @@
 use crate::uniffi::{
     error::{Kind, OrcaError},
-    pipeline_runner::docker::Message,
+    pipeline_runner::runner::Message,
 };
 use bollard::errors::Error as BollardError;
 use glob;
@@ -126,8 +126,7 @@ fn format_stack(backtrace: Option<&Backtrace>) -> String {
 impl fmt::Debug for OrcaError {
     fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
         match &self.kind {
-            Kind::ReceiverDroppedBeforeSender { backtrace, .. }
-            | Kind::EmptyResponseWhenLoadingContainerAltImage { backtrace, .. }
+            Kind::EmptyResponseWhenLoadingContainerAltImage { backtrace, .. }
             | Kind::FailedToParseDot { backtrace, .. }
             | Kind::GeneratedNamesOverflow { backtrace, .. }
             | Kind::InvalidFilepath { backtrace, .. }
diff --git a/src/uniffi/error.rs b/src/uniffi/error.rs
index 72cae77f..c52da3b7 100644
--- a/src/uniffi/error.rs
+++ b/src/uniffi/error.rs
@@ -24,10 +24,6 @@ pub type Result<T, E = OrcaError> = result::Result<T, E>;
 #[snafu(module(selector), visibility(pub(crate)), context(suffix(false)))]
 #[uniffi(flat_error)]
 pub(crate) enum Kind {
-    #[snafu(display(
-        "Receiver was dropped before sender could send a message for oneshot channel"
-    ))]
-    ReceiverDroppedBeforeSender { backtrace: Option<Backtrace> },
     #[snafu(display(
         "Received an empty response when attempting to load the alternate container image file: {path:?}."
     ))]
diff --git a/src/uniffi/pipeline.rs b/src/uniffi/pipeline.rs
index 2bb205a7..e634ae36 100644
--- a/src/uniffi/pipeline.rs
+++ b/src/uniffi/pipeline.rs
@@ -334,6 +334,7 @@ impl Pipeline {
         })
     }
 
+    /// Function to get the children of a node
     pub fn get_children_for_node(&self, node: &Node) -> impl Iterator<Item = &Node> {
         // Find the NodeIndex for the given node_key
         let node_index = self
@@ -393,7 +394,12 @@ impl PipelineJob {
     }
 }
 
-#[derive(uniffi::Object, Display, Debug, Clone, Serialize)]
+#[derive(uniffi::Object, Debug, Clone, Serialize)]
+/// `PipelineResult` struct
+/// This struct is used to return the result of a pipeline job
 pub struct PipelineResult {
+    /// Ref to the pipeline job that was executed
     pub pipeline_job: PipelineJob,
+    /// Output packets produced by the pipeline job
+    pub output_packets: HashMap<String, Vec<HashMap<String, PathSet>>>,
 }
diff --git a/src/uniffi/pipeline_runner/mod.rs b/src/uniffi/pipeline_runner/mod.rs
index 67fa0663..a0d4812b 100644
--- a/src/uniffi/pipeline_runner/mod.rs
+++ b/src/uniffi/pipeline_runner/mod.rs
@@ -52,4 +52,4 @@ impl Hash for PipelineRun {
 }
 
 /// Docker pipeline runner
-pub mod docker;
+pub mod runner;
diff --git a/src/uniffi/pipeline_runner/docker.rs b/src/uniffi/pipeline_runner/runner.rs
similarity index 92%
rename from src/uniffi/pipeline_runner/docker.rs
rename to src/uniffi/pipeline_runner/runner.rs
index 2f6cbf3f..74f440fa 100644
--- a/src/uniffi/pipeline_runner/docker.rs
+++ b/src/uniffi/pipeline_runner/runner.rs
@@ -15,11 +15,8 @@ use std::{
     collections::{HashMap, HashSet},
     path::PathBuf,
     sync::Arc,
-    thread::sleep,
-    time::Duration,
 };
 use tokio::{
-    net::unix::pipe,
     sync::{RwLock, mpsc},
     task::{JoinSet, spawn_blocking},
 };
@@ -89,7 +86,7 @@ impl DockerPipelineRunner {
             .get_leaf_nodes()
             .map(|node| node.id.clone())
             .collect::<HashSet<_>>();
-        let outputs = get(&self.pipeline_runs, &pipeline_run_arc)?.outputs.clone();
+        let outputs = Arc::clone(&get(&self.pipeline_runs, &pipeline_run_arc)?.outputs);
 
         // Create the task that captures the output from the nodes and stores them in the outputs map
         self.pipeline_runs
@@ -134,7 +131,9 @@ impl DockerPipelineRunner {
         // This will recursively create all the tasks and channels for the pipeline
         let root_nodes_tx = pipeline
             .get_root_nodes()
-            .map(|node| self.create_task_for_node(node, &pipeline_run_arc, namespace_lookup))
+            .map(|node| {
+                self.create_task_for_node(node, &pipeline_run_arc, &output_tx, namespace_lookup)
+            })
             .collect::<Result<Vec<_>>>()?;
 
         // All pipeline tasks have been created, now we need to feed the inputs to the pipeline
@@ -154,8 +153,6 @@ impl DockerPipelineRunner {
                 .await?;
         }
 
-        sleep(Duration::from_secs(5)); // Give some time for the tasks to start
-        panic!();
         Ok(pipeline_run)
     }
 
@@ -189,6 +186,7 @@ impl DockerPipelineRunner {
 
         Ok(PipelineResult {
             pipeline_job: pipeline_run.pipeline_job.clone(),
+            output_packets: pipeline_run_info.outputs.read().await.clone(),
         })
     }
 
@@ -196,6 +194,7 @@ impl DockerPipelineRunner {
         &mut self,
         node: &Node,
         pipeline_run: &Arc<PipelineRun>,
+        output_tx: &mpsc::Sender<Message>,
         namespace_lookup: &HashMap<String, PathBuf>,
     ) -> Result<mpsc::Sender<Message>> {
         println!("Creating task for node: {}", node.id);
@@ -222,13 +221,22 @@ impl DockerPipelineRunner {
         };
 
         // Call this function for each of the child nodes to get their Sender_tx
-        let children_node_tx = pipeline_run
+        let mut children_node_tx = pipeline_run
             .pipeline_job
             .pipeline
             .get_children_for_node(node)
-            .map(|child_node| self.create_task_for_node(child_node, pipeline_run, namespace_lookup))
+            .map(|child_node| {
+                self.create_task_for_node(child_node, pipeline_run, output_tx, namespace_lookup)
+            })
             .collect::<Result<Vec<_>>>()?;
 
+        // Check if children_node_tx is empty, if so, this is a leaf node thus we need to attach the output_tx
+        if children_node_tx.is_empty() {
+            // This is a leaf node, thus we need to attach the output_tx to the tx
+            // This will allow the node to send its output to the output channel
+            children_node_tx.push(output_tx.clone());
+        }
+
         // Start the task_manager
         self.pipeline_runs
             .get_mut(pipeline_run)
@@ -247,34 +255,6 @@ impl DockerPipelineRunner {
         Ok(tx)
     }
 
-    #[expect(
-        clippy::type_complexity,
-        reason = "too complex, but necessary for async handling"
-    )]
-    async fn capture_node_output(
-        mut output_rx: mpsc::Receiver<Message>,
-        outputs_ref: Arc<RwLock<HashMap<String, Vec<HashMap<String, PathSet>>>>>,
-    ) -> Result<()> {
-        while let Some(msg) = output_rx.recv().await {
-            match msg {
-                Message::NodeOutput(node_id, hash_map) => {
-                    // Record the output
-                    outputs_ref
-                        .write()
-                        .await
-                        .entry(node_id)
-                        .or_default()
-                        .push(hash_map);
-                }
-                Message::NodeProcessingComplete(_) | Message::Stop => {
-                    // Node processing is complete, we can stop listening to this channel
-                    break;
-                }
-            }
-        }
-        Ok(())
-    }
-
     /// For tx: Sender<Message>, we only want to send successfully completed results to the next node
     async fn start_node_manager(
         node: Node,
@@ -327,6 +307,8 @@ impl DockerPipelineRunner {
                 .await?;
         }
 
+        println!("Node manager for node: {} has completed", node.id);
+
         Ok(())
     }
 }
@@ -441,11 +423,7 @@ impl NodeProcessor for PodProcessor {
 
     async fn process_msg(&mut self, msg: Message) -> bool {
         match msg {
-            Message::NodeOutput(sender_node_id, packet) => {
-                println!(
-                    "Node {} received packet: {:?} from {}",
-                    self.node_metadata.node_id, packet, sender_node_id
-                );
+            Message::NodeOutput(_, packet) => {
                 let pod_ref = Arc::clone(&self.pod);
                 let node_id = self.node_metadata.node_id.clone();
                 let namespace = self.node_metadata.namespace.clone();
@@ -467,7 +445,6 @@ impl NodeProcessor for PodProcessor {
                 return true;
             }
             Message::NodeProcessingComplete(_) => {
-                println!("Node processing complete");
                 // Since pod only have one parent, we can expect that there will be no more incoming packet
                 // thus, we need to wait for everything to finish processing and send completion message
                 // Return true to notify caller that processing is complete
@@ -540,7 +517,7 @@ impl NodeProcessor for MapperProcessor {
                     Err(err) => {
                         // Send the error to the failure channel
                         // For now just print it out
-                        println!("Failed to process packet with error: {}", err);
+                        println!("Failed to process packet with error: {err}");
                     }
                 }
             }
@@ -642,7 +619,9 @@ impl JoinerProcessor {
             }
             Err(err) => {
                 // Send the error to the failure channel
-                todo!();
+                println!(
+                    "Failed to process packet from {sender_node_id} for joiner node with error: {err}"
+                );
             }
         }
         // Add the new packet into the cache
@@ -663,16 +642,12 @@ impl NodeProcessor for JoinerProcessor {
     async fn process_msg(&mut self, msg: Message) -> bool {
         match msg {
             Message::NodeOutput(sender_node_id, packet) => {
-                println!(
-                    "Node {} received packet: {:?} from {}",
-                    self.node_metadata.node_id, packet, sender_node_id
-                );
                 // Process the packet and send the output to the success channel
                 match self.process_packet(&sender_node_id, packet).await {
                     Ok(()) => {}
                     Err(err) => {
                         // Send the error to the failure channel
-                        println!("Failed to process packet with error: {}", err);
+                        println!("Failed to process packet with error: {err}");
                     }
                 }
             }
diff --git a/tests/pipeline_runner.rs b/tests/pipeline_runner.rs
index 920e95da..afd58eb2 100644
--- a/tests/pipeline_runner.rs
+++ b/tests/pipeline_runner.rs
@@ -7,7 +7,7 @@ pub mod fixture;
 // Example for a local module:
 use std::collections::HashMap;
 
-use orcapod::uniffi::{error::Result, pipeline_runner::docker::DockerPipelineRunner};
+use orcapod::uniffi::{error::Result, pipeline_runner::runner::DockerPipelineRunner};
 
 use crate::fixture::TestDirs;
 use fixture::pipeline_job;
@@ -29,6 +29,6 @@ async fn basic_run() -> Result<()> {
 
     // Wait for the pipeline run to complete
     let result = runner.get_result(&pipeline_run).await?;
-    println!("Pipeline run result: {result:?}");
+    println!("Pipeline run result: {:?}", result.output_packets);
     Ok(())
 }

From 08453b3bc94fd503dd074026ddbf756c1e32f324 Mon Sep 17 00:00:00 2001
From: Synicix <synicix@gmail.com>
Date: Thu, 10 Jul 2025 18:02:54 +0000
Subject: [PATCH 10/29] Added a lot of docs and fix input.txt issue for test

---
 src/uniffi/pipeline_runner/runner.rs | 205 ++++++++++++++++++++++-----
 tests/extra/data/input.txt           |   0
 tests/fixture/mod.rs                 |   2 +-
 tests/pipeline_runner.rs             |  26 +++-
 4 files changed, 194 insertions(+), 39 deletions(-)
 create mode 100644 tests/extra/data/input.txt

diff --git a/src/uniffi/pipeline_runner/runner.rs b/src/uniffi/pipeline_runner/runner.rs
index 74f440fa..af72e5d1 100644
--- a/src/uniffi/pipeline_runner/runner.rs
+++ b/src/uniffi/pipeline_runner/runner.rs
@@ -40,7 +40,16 @@ struct PipelineRunInfo {
     outputs: Arc<RwLock<HashMap<String, Vec<HashMap<String, PathSet>>>>>, // String is the node key, while hash
 }
 
-/// Docker based pipeline runner meant to execute on a single machine
+/**
+ * Runner for pipelines
+ *
+ * General Algorithm:
+ * 1. All nodes receive inputs via a MPSC channel, where parents nodes will send their output packets
+ * 2. There are two "functional nodes processor" in the pipeline,
+ *    which is the `input_node` and `output_node`
+ * 3. Each node will process the inputs its receives and will only send it children input channels
+ *    if they are successfully processed. Failures are just printed for now (Will be replaced by logging)
+ */
 #[derive(Default)]
 pub struct DockerPipelineRunner {
     pipeline_runs: HashMap<PipelineRun, PipelineRunInfo>, // For each pipeline run, we have a join set to track the tasks and wait on them
@@ -52,10 +61,28 @@ impl DockerPipelineRunner {
         Self::default()
     }
 
-    /// Start the `pipeline_job` returning `pipeline_run`un
-    ///
-    /// # Errors
-    /// Will error out if the pipeline job fails to start
+    /**
+    Start the `pipeline_job` returning `pipeline_run`un
+
+    Algorithm:
+    1. Create a new `PipelineRun` from the `pipeline_job`
+    2. Insert the `PipelineRun` into the `pipeline_runs` map
+    3. Create an output channel to capture the outputs of the nodes
+       (This will be given to the output capture task)
+    4. Create a task that captures the outputs form nodes and stores them in the `outputs` map
+       This is done via listening the channel and acting like a final node in the pipeline
+    5. Get the root nodes of the pipeline and call `create_task_for_node` for each root node
+       This will recursively BFS through the pipeline and create tasks for each node
+       (More detail in that function)
+    6. Using the `root_nodes` txs, we will send all inputs to that channel.
+       This will start the pipeline execution
+    7. Upon sending all the inputs, we will send node complete message
+       signifying that the `input_node` is done
+    8. Return the `PipelineRun` which can be used to get the results later
+
+    # Errors
+    Will error out if the pipeline job fails to start
+    */
     pub async fn start(
         &mut self,
         pipeline_job: PipelineJob,
@@ -81,6 +108,15 @@ impl DockerPipelineRunner {
         // Create the output channel to capture the outputs of the outputs nodes (Currently only leaf nodes)
         let (output_tx, mut output_rx) = mpsc::channel::<Message>(128); // Channel to capture outputs from nodes
 
+        // Insert the output channel into the pipeline run info
+        self.pipeline_runs
+            .get_mut(&pipeline_run_arc)
+            .context(selector::KeyMissing {
+                key: pipeline_run_arc.to_string(),
+            })?
+            .node_tx
+            .insert("output".to_owned(), output_tx.clone());
+
         // Get the output_nodes (leaf nodes for now) so the output task can keep track when parents are done
         let output_nodes_ids = pipeline
             .get_leaf_nodes()
@@ -115,13 +151,13 @@ impl DockerPipelineRunner {
                             // Check if all parent nodes are complete
                             if complete_parent_nodes.is_superset(&output_nodes_ids) {
                                 // All parents are complete, we can exit this task
-                                println!(
-                                    "All parent nodes are complete, stopping output capture task."
-                                );
                                 return Ok(());
                             }
                         }
-                        Message::Stop => todo!(),
+                        Message::Stop => {
+                            // No clear action needed, just exit the task
+                            return Ok(());
+                        }
                     }
                 }
                 Ok(())
@@ -190,6 +226,50 @@ impl DockerPipelineRunner {
         })
     }
 
+    /// Stop the pipeline run and all its tasks
+    /// # Errors
+    /// Will error out if the pipeline run is not found or if any of the tasks fail to stop correctly
+    pub async fn stop(&mut self, pipeline_run: &PipelineRun) -> Result<()> {
+        // Get the pipeline run info
+        let pipeline_run_info =
+            self.pipeline_runs
+                .get_mut(pipeline_run)
+                .context(selector::KeyMissing {
+                    key: pipeline_run.to_string(),
+                })?;
+
+        // Send a stop message to all the node txs
+        for tx in pipeline_run_info.node_tx.values() {
+            tx.send(Message::Stop).await?;
+        }
+
+        // Wait for all tasks to complete
+        while let Some(result) = pipeline_run_info.node_task_join_set.join_next().await {
+            match result {
+                Ok(Ok(())) => {} // Task completed successfully
+                Ok(Err(err)) => {
+                    eprintln!("Task failed: {err}");
+                    return Err(err);
+                }
+                Err(err) => {
+                    eprintln!("Join set error: {err}");
+                    return Err(err.into());
+                }
+            }
+        }
+
+        // Remove the pipeline run from the list of pipeline runs
+        self.pipeline_runs.remove(pipeline_run);
+
+        Ok(())
+    }
+
+    /// Helper function to create a task for each node, while recursively BFS through the pipeline
+    /// Summary:
+    /// 1. Check if their is already a channel created for the node, if not create one and insert it
+    /// 2. Call this function for each of the child nodes to get their `Sender_tx`
+    /// 3. If the node is a leaf node, attach the `output_tx` to the tx (Will be replaced by `output_nodes`)
+    /// 4. Start the task manager for the node, which will act as the node's processor
     fn create_task_for_node(
         &mut self,
         node: &Node,
@@ -199,11 +279,9 @@ impl DockerPipelineRunner {
     ) -> Result<mpsc::Sender<Message>> {
         println!("Creating task for node: {}", node.id);
         // Create a channel for the node
-        // This channel will be used to send messages to the node processor
-        let (tx, rx) = mpsc::channel::<Message>(128);
 
         // Use closer to limit the scope of the borrow
-        {
+        let (tx, rx) = {
             let pipeline_info =
                 self.pipeline_runs
                     .get_mut(pipeline_run)
@@ -216,8 +294,12 @@ impl DockerPipelineRunner {
                 return Ok(get(&pipeline_info.node_tx, &node.id)?.clone());
             }
 
+            // This channel will be used to send messages to the node processor
+            let (tx, rx) = mpsc::channel::<Message>(128);
+
             // Record the tx into the pipeline_info tx_hashmap
-            pipeline_info.node_tx.insert(node.id.clone(), tx.clone())
+            pipeline_info.node_tx.insert(node.id.clone(), tx.clone());
+            (tx, rx)
         };
 
         // Call this function for each of the child nodes to get their Sender_tx
@@ -255,7 +337,14 @@ impl DockerPipelineRunner {
         Ok(tx)
     }
 
-    /// For tx: Sender<Message>, we only want to send successfully completed results to the next node
+    /// Act as the processor of the node by:
+    /// 1. Creating a metadata struct for the node to be passed to the appropriate processor
+    /// 2. Get the kernel for the node and build the correct processor for this node
+    /// 3. Start the processor and wait till it completes
+    /// 4. Send a message that the node processing is complete
+    ///
+    /// # Errors
+    /// Will error out if the kernel for the node is not found or if the
     async fn start_node_manager(
         node: Node,
         pipeline_run: Arc<PipelineRun>,
@@ -263,7 +352,6 @@ impl DockerPipelineRunner {
         success_chs_tx: Vec<mpsc::Sender<Message>>,
         namespace_lookup: HashMap<String, PathBuf>,
     ) -> Result<()> {
-        println!("Starting node manager for node: {}", node.id);
         // Create a metadata struct for this node
         let node_metadata = NodeMetaData {
             node_id: node.id.clone(),
@@ -273,8 +361,6 @@ impl DockerPipelineRunner {
             namespace_lookup: namespace_lookup.clone(),
         };
 
-        println!("Setting up node processor for node: {}", node.id);
-
         // Get the kernel for this node and build the correct processor
         match get(
             &pipeline_run.pipeline_job.pipeline.kernel_lut,
@@ -302,17 +388,31 @@ impl DockerPipelineRunner {
 
         // Since all inputs are sent, we can send a message that the "input node" processing is complete
         for success_ch_tx in &success_chs_tx {
-            success_ch_tx
+            match success_ch_tx
                 .send(Message::NodeProcessingComplete(node.id.clone()))
-                .await?;
+                .await
+            {
+                Ok(()) => {}
+                Err(err) => {
+                    match err {
+                        mpsc::error::SendError(Message::NodeProcessingComplete(_)) => {
+                            // The channel is closed, we can ignore this error, this happens when stop it called
+                            eprintln!("Failed to send processing complete message, channel closed");
+                        }
+                        _ => {
+                            eprintln!("Failed to send processing complete message: {err}");
+                        }
+                    }
+                }
+            }
         }
 
-        println!("Node manager for node: {} has completed", node.id);
-
         Ok(())
     }
 }
 
+/// Metadata for the node processor
+/// Contains fields that is normally needed to process incoming packets
 struct NodeMetaData {
     node_id: String,
     node_rx: mpsc::Receiver<Message>, // Channel to listen to messages from parent nodes
@@ -321,6 +421,11 @@ struct NodeMetaData {
     namespace_lookup: HashMap<String, PathBuf>, // Copy of the look up table
 }
 
+/// Unify the interface for node processors and provide a common way to handle processing of incoming messages
+/// This trait defines the methods that all node processors should implement
+///
+/// Main purpose was to reduce the amount of code duplication between different node processors
+/// As a result, each processor only needs to worry about writing their own function to process the msg.
 trait NodeProcessor {
     fn get_node_rx(&mut self) -> &mut mpsc::Receiver<Message>;
 
@@ -342,6 +447,8 @@ trait NodeProcessor {
     async fn wait_for_node_task_completion(&mut self);
 }
 
+/// Processor for Pods
+/// Currently missing implementation to call agents for actual pod processing
 struct PodProcessor {
     pod: Arc<Pod>,
     node_metadata: NodeMetaData,
@@ -357,6 +464,8 @@ impl PodProcessor {
         }
     }
 
+    /// Actual logic of processing a packet using the pod
+    /// At the moment it does a simulation of pod execution
     async fn process_packet(
         node_id: String,
         pod: Arc<Pod>,
@@ -402,13 +511,24 @@ impl PodProcessor {
         // Simulate pod execution by just printing out pod_job_hash and pod hash
         // This will be replaced by sending the pod_job to the orchestrator via the agent
         println!(
-            "Executing pod job: {} with pod hash: {}",
+            "Simulating Executing pod job: {} with pod hash: {}",
             pod_job.hash, pod_job.pod.hash
         );
 
+        #[expect(
+            clippy::unwrap_used,
+            reason = "Hard code for now, will be replaced by agent"
+        )]
+        // Build the output_packet
+        let output_packet = pod
+            .output_spec
+            .keys()
+            .map(|output_key| (output_key.clone(), packet.values().next().cloned().unwrap()))
+            .collect::<HashMap<_, _>>();
+
         // For now we will just send the input_packet to the success channel
         try_join_all(success_chs_tx.iter().map(|success_ch_tx| {
-            success_ch_tx.send(Message::NodeOutput(node_id.clone(), packet.clone()))
+            success_ch_tx.send(Message::NodeOutput(node_id.clone(), output_packet.clone()))
         }))
         .await?;
 
@@ -430,14 +550,25 @@ impl NodeProcessor for PodProcessor {
                 let namespace_lookup = self.node_metadata.namespace_lookup.clone();
                 let child_nodes_txs = self.node_metadata.child_nodes_txs.clone();
                 // Forward it into a processing task
-                self.processing_tasks.spawn(Self::process_packet(
-                    node_id,
-                    pod_ref,
-                    namespace,
-                    namespace_lookup,
-                    packet,
-                    child_nodes_txs,
-                ));
+                self.processing_tasks.spawn(async move {
+                    // Process the packet using the pod
+                    // This will execute the pod and send the output to the next node
+                    if let Err(err) = Self::process_packet(
+                        node_id,
+                        pod_ref,
+                        namespace,
+                        namespace_lookup,
+                        packet,
+                        child_nodes_txs,
+                    )
+                    .await
+                    {
+                        // Send the error to the failure channel
+                        // For now just print it out
+                        eprintln!("Failed to process packet with error: {err}");
+                    }
+                    Ok(())
+                });
             }
             Message::Stop => {
                 // Stop message received, we will stop processing
@@ -452,7 +583,6 @@ impl NodeProcessor for PodProcessor {
                 return true;
             }
         }
-        println!("returning false");
         false
     }
 
@@ -463,6 +593,8 @@ impl NodeProcessor for PodProcessor {
     }
 }
 
+/// Processor for Mapper nodes
+/// This processor renames the `input_keys` from the input packet to the `output_keys` defined by the map
 struct MapperProcessor {
     mapper: Arc<Mapper>,
     node_metadata: NodeMetaData,
@@ -517,7 +649,7 @@ impl NodeProcessor for MapperProcessor {
                     Err(err) => {
                         // Send the error to the failure channel
                         // For now just print it out
-                        println!("Failed to process packet with error: {err}");
+                        eprintln!("Failed to process packet with error: {err}");
                     }
                 }
             }
@@ -528,6 +660,9 @@ impl NodeProcessor for MapperProcessor {
     }
 }
 
+/// Processor for Joiner nodes
+/// This processor combines packets from multiple parent nodes into a single output packet
+/// It uses a cartesian product to combine packets from different parents
 struct JoinerProcessor {
     /// Cache for all packets received by the node
     input_packet_cache: HashMap<String, Vec<HashMap<String, PathSet>>>,
@@ -619,7 +754,7 @@ impl JoinerProcessor {
             }
             Err(err) => {
                 // Send the error to the failure channel
-                println!(
+                eprintln!(
                     "Failed to process packet from {sender_node_id} for joiner node with error: {err}"
                 );
             }
@@ -647,7 +782,7 @@ impl NodeProcessor for JoinerProcessor {
                     Ok(()) => {}
                     Err(err) => {
                         // Send the error to the failure channel
-                        println!("Failed to process packet with error: {err}");
+                        eprintln!("Failed to process packet with error: {err}");
                     }
                 }
             }
diff --git a/tests/extra/data/input.txt b/tests/extra/data/input.txt
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/fixture/mod.rs b/tests/fixture/mod.rs
index 150babaf..0e577e4f 100644
--- a/tests/fixture/mod.rs
+++ b/tests/fixture/mod.rs
@@ -287,7 +287,7 @@ pub fn pipeline_job() -> Result<PipelineJob> {
                 kind: BlobKind::File,
                 location: URI {
                     namespace: "default".to_owned(),
-                    path: PathBuf::from("data/input.txt"),
+                    path: PathBuf::from("input.txt"),
                 },
                 ..Default::default()
             }),
diff --git a/tests/pipeline_runner.rs b/tests/pipeline_runner.rs
index afd58eb2..37fcdecf 100644
--- a/tests/pipeline_runner.rs
+++ b/tests/pipeline_runner.rs
@@ -12,7 +12,7 @@ use orcapod::uniffi::{error::Result, pipeline_runner::runner::DockerPipelineRunn
 use crate::fixture::TestDirs;
 use fixture::pipeline_job;
 
-#[tokio::test(flavor = "multi_thread", worker_threads = 16)]
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn basic_run() -> Result<()> {
     let pipeline_job = pipeline_job()?;
 
@@ -28,7 +28,27 @@ async fn basic_run() -> Result<()> {
     let pipeline_run = runner.start(pipeline_job, &namespace_lookup).await?;
 
     // Wait for the pipeline run to complete
-    let result = runner.get_result(&pipeline_run).await?;
-    println!("Pipeline run result: {:?}", result.output_packets);
+    runner.get_result(&pipeline_run).await?;
+    Ok(())
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn stop() -> Result<()> {
+    let pipeline_job = pipeline_job()?;
+
+    // Create the runner
+    let mut runner = DockerPipelineRunner::new();
+
+    let test_dirs = TestDirs::new(&HashMap::from([(
+        "default".to_owned(),
+        Some("./tests/extra/data/"),
+    )]))?;
+    let namespace_lookup = test_dirs.namespace_lookup();
+
+    let pipeline_run = runner.start(pipeline_job, &namespace_lookup).await?;
+
+    // Abort the pipeline run
+    runner.stop(&pipeline_run).await?;
+
     Ok(())
 }

From 713e4710ee4af95cd417bab0c617658eabac931b Mon Sep 17 00:00:00 2001
From: Synicix <synicix@gmail.com>
Date: Thu, 10 Jul 2025 18:19:25 +0000
Subject: [PATCH 11/29] fix mistake in docs

---
 src/uniffi/pipeline_runner/runner.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/uniffi/pipeline_runner/runner.rs b/src/uniffi/pipeline_runner/runner.rs
index af72e5d1..87aaa649 100644
--- a/src/uniffi/pipeline_runner/runner.rs
+++ b/src/uniffi/pipeline_runner/runner.rs
@@ -62,7 +62,7 @@ impl DockerPipelineRunner {
     }
 
     /**
-    Start the `pipeline_job` returning `pipeline_run`un
+    Start the `pipeline_job` returning `pipeline_run`
 
     Algorithm:
     1. Create a new `PipelineRun` from the `pipeline_job`

From d3701c92900e9644cc8d52defe30c2dd750a3095 Mon Sep 17 00:00:00 2001
From: synicix <synicix@gmail.com>
Date: Tue, 15 Jul 2025 05:57:27 +0000
Subject: [PATCH 12/29] Added joiner processor unit test and fix bug for case
 where we don't have all the parents yet

---
 src/core/pipeline.rs                 |   0
 src/uniffi/pipeline.rs               |   9 +-
 src/uniffi/pipeline_runner/runner.rs | 132 ++++++++++++++++++++++++++-
 tests/fixture/mod.rs                 |  53 ++++++++---
 tests/pipeline.rs                    |  15 ++-
 tests/pipeline_runner.rs             |   1 +
 6 files changed, 186 insertions(+), 24 deletions(-)
 create mode 100644 src/core/pipeline.rs

diff --git a/src/core/pipeline.rs b/src/core/pipeline.rs
new file mode 100644
index 00000000..e69de29b
diff --git a/src/uniffi/pipeline.rs b/src/uniffi/pipeline.rs
index e634ae36..3476ce07 100644
--- a/src/uniffi/pipeline.rs
+++ b/src/uniffi/pipeline.rs
@@ -12,7 +12,7 @@ use petgraph::{graph::DiGraph, prelude::NodeIndex};
 use serde::Serialize;
 use std::{
     backtrace::Backtrace,
-    collections::HashMap,
+    collections::{HashMap, HashSet},
     hash::{Hash, Hasher},
     string::String,
     sync::Arc,
@@ -243,8 +243,9 @@ impl Pipeline {
         })
     }
 
-    /// Returns the input specification for the pipeline.
-    /// This is currently a combination of all the root nodes' input specifications.
+    /// Returns the input specification for the pipeline, where the specification is a list of unique
+    /// keys that are required as input to the pipeline.
+    ///
     /// # Errors
     /// Will error out if it fails to get the kernel from the kernel lookup table
     pub fn get_input_spec(&self) -> Result<Vec<&String>> {
@@ -254,7 +255,7 @@ impl Pipeline {
         Ok(self
             .get_root_nodes()
             .map(|node| Ok(get(&self.kernel_lut, &node.kernel_hash)?.get_input_keys()))
-            .collect::<Result<Vec<_>>>()?
+            .collect::<Result<HashSet<_>>>()?
             .into_iter()
             .flatten()
             .collect())
diff --git a/src/uniffi/pipeline_runner/runner.rs b/src/uniffi/pipeline_runner/runner.rs
index 87aaa649..4085ffac 100644
--- a/src/uniffi/pipeline_runner/runner.rs
+++ b/src/uniffi/pipeline_runner/runner.rs
@@ -426,7 +426,7 @@ struct NodeMetaData {
 ///
 /// Main purpose was to reduce the amount of code duplication between different node processors
 /// As a result, each processor only needs to worry about writing their own function to process the msg.
-trait NodeProcessor {
+pub(crate) trait NodeProcessor {
     fn get_node_rx(&mut self) -> &mut mpsc::Receiver<Message>;
 
     async fn start(&mut self) {
@@ -668,6 +668,7 @@ struct JoinerProcessor {
     input_packet_cache: HashMap<String, Vec<HashMap<String, PathSet>>>,
     completed_parents: Vec<String>,
     node_metadata: NodeMetaData,
+    initial_computation_completed: bool,
 }
 
 impl JoinerProcessor {
@@ -680,11 +681,12 @@ impl JoinerProcessor {
             input_packet_cache,
             node_metadata,
             completed_parents: Vec::new(),
+            initial_computation_completed: false,
         }
     }
 
     fn compute_new_packet_combination(
-        &self,
+        &mut self,
         sender_node_id: &str,
         new_packet: &HashMap<String, PathSet>,
     ) -> Result<Vec<HashMap<String, PathSet>>> {
@@ -694,12 +696,36 @@ impl JoinerProcessor {
             .input_packet_cache
             .keys()
             .filter(|key| *key != sender_node_id);
+
+        // Create a vector to hold the incoming packet
+        // This will be used to compute the cartesian product and will be modified if the initial computation is not completed
+        let mut incoming_packet = vec![new_packet.clone()];
+
+        // Determine if the initial computation has been computed
+        if !self.initial_computation_completed {
+            // Check if we at least have one cached packet for each of the other parents
+            for parent_id in other_parent_ids.clone() {
+                if get(&self.input_packet_cache, parent_id)?.is_empty() {
+                    // We are still missing other parents, so we can't compute the new packet combination yet
+                    return Ok(Vec::new());
+                }
+            }
+
+            // We have at least one packet for each of the other parents, thus we can compute the cartesian product
+            // For the initial computation, we will add all of the add all previous packets for this sender
+            get(&self.input_packet_cache, &sender_node_id.to_owned())?
+                .iter()
+                .for_each(|packet| incoming_packet.push(packet.clone()));
+
+            self.initial_computation_completed = true;
+        }
+
         let mut factors = other_parent_ids
             .map(|id| get(&self.input_packet_cache, id))
             .collect::<Result<Vec<_>>>()?;
 
         // Add the new incoming packet as a factor
-        let incoming_packet = vec![new_packet.clone()];
+
         factors.push(&incoming_packet);
 
         let result = factors
@@ -806,3 +832,103 @@ impl NodeProcessor for JoinerProcessor {
         false
     }
 }
+
+#[cfg(test)]
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[expect(clippy::panic_in_result_fn, reason = "Unit test")]
+async fn joiner() -> Result<()> {
+    // Create a fake mpsc channel for the node
+    let (_, node_rx) = mpsc::channel::<Message>(128);
+
+    // Create a child mpsc
+    let (child_tx, mut child_rx) = mpsc::channel::<Message>(128);
+
+    let node_metadata = NodeMetaData {
+        node_id: "joiner_node".to_owned(),
+        node_rx,
+        child_nodes_txs: vec![child_tx],
+        namespace: "test".to_owned(),
+        namespace_lookup: HashMap::new(),
+    };
+
+    let mut joiner_process = JoinerProcessor::new(
+        vec!["0".to_owned(), "1".to_owned(), "2".to_owned()],
+        node_metadata,
+    );
+
+    // Make each parent has 1 packet
+    for idx in 0..2 {
+        joiner_process
+            .process_packet(
+                &format!("{idx}"),
+                make_test_packet("data_1.txt".to_owned().into()),
+            )
+            .await?;
+    }
+
+    // Confirm that there should be no output yet
+
+    // Now we send the missing parent package
+    // This will yield one unique combination
+    joiner_process
+        .process_packet("2", make_test_packet("data_1.txt".to_owned().into()))
+        .await?;
+
+    // Confirm that the output is sent to the child channel
+    assert!(
+        child_rx.len() == 1,
+        "Should have only one message in the channel",
+    );
+    assert!(
+        child_rx.recv().await.is_some(),
+        "Should have received a message"
+    );
+
+    // Insert another one
+    joiner_process
+        .process_packet("2", make_test_packet("data_2.txt".to_owned().into()))
+        .await?;
+
+    // The joiner node should send another one
+    assert!(
+        child_rx.len() == 1,
+        "Should have only one message in the channel",
+    );
+    assert!(
+        child_rx.recv().await.is_some(),
+        "Should have received a message"
+    );
+
+    // Now insert to packet for parent 0, which should yield 2 packets in total
+    // This is because of the cartesian product
+    joiner_process
+        .process_packet("0", make_test_packet("data_2.txt".to_owned().into()))
+        .await?;
+
+    assert!(
+        child_rx.len() == 2,
+        "Should have only two messages in the channel",
+    );
+    assert!(
+        child_rx.recv().await.is_some(),
+        "Should have received a message"
+    );
+
+    Ok(())
+}
+
+#[cfg(test)]
+fn make_test_packet(path: PathBuf) -> HashMap<String, PathSet> {
+    use crate::uniffi::model::{Blob, BlobKind};
+
+    let path_set = PathSet::Unary(Blob {
+        kind: BlobKind::File,
+        location: URI {
+            namespace: "test".to_owned(),
+            path,
+        },
+        checksum: String::new(),
+    });
+
+    HashMap::from([("key".to_owned(), path_set)])
+}
diff --git a/tests/fixture/mod.rs b/tests/fixture/mod.rs
index 0e577e4f..cf252ffc 100644
--- a/tests/fixture/mod.rs
+++ b/tests/fixture/mod.rs
@@ -190,7 +190,7 @@ pub fn container_image_style(binary_location: impl AsRef<Path>) -> Result<TestCo
 
 // Pipeline stuff
 
-pub fn pod_append_name(pod_name: &str) -> Result<Pod> {
+pub fn append_name_pod(pod_name: &str) -> Result<Pod> {
     Pod::new(
         Some(Annotation {
             name: pod_name.to_owned(),
@@ -225,21 +225,30 @@ pub fn pod_append_name(pod_name: &str) -> Result<Pod> {
 
 pub fn pipeline() -> Result<Pipeline> {
     // Create a simple pipeline where the functions job is to add append their name into the input file
-    // Structure: A -> B -> C
+    // Structure: A -> Mapper -> Joiner -> B -> Mapper -> C, D -> Mapper -> Joiner
 
     // Create the components of the pipeline
-    let pod_a = pod_append_name("A")?;
-    let pod_b = pod_append_name("B")?;
-    let pod_c = pod_append_name("C")?;
+    let pod_a = append_name_pod("A")?;
+    let pod_b = append_name_pod("B")?;
+    let pod_c = append_name_pod("C")?;
+    let pod_d = append_name_pod("D")?;
 
+    // Create the file mapper that will be used to map the output of one pod to the input of another
     let file_mapper = Mapper::new(HashMap::from([(
         "output_text".to_owned(),
         "input_text".to_owned(),
     )]))?;
+
+    // Create the file mapper that will be used to map the output of one pod to the input of another
+    let file_mapper_for_pod_d = Mapper::new(HashMap::from([(
+        "output_text".to_owned(),
+        "input2_text".to_owned(),
+    )]))?;
+
     let mut kernel_to_node_name = HashMap::<Kernel, Vec<String>>::new();
 
     // Insert the pods into the kernel_to_node_name mapping
-    for pod in [&pod_a, &pod_b, &pod_c] {
+    for pod in [&pod_a, &pod_b, &pod_c, &pod_d] {
         kernel_to_node_name
             .entry(pod.clone().into())
             .or_default()
@@ -252,18 +261,34 @@ pub fn pipeline() -> Result<Pipeline> {
             );
     }
 
-    // Insert the mapping next
-    for idx in 0..2 {
-        kernel_to_node_name
-            .entry(file_mapper.clone().into())
-            .or_default()
-            .push("file_mapper_".to_owned() + &idx.to_string());
-    }
+    // Add mapper to end of pod_a and pod_b
+    kernel_to_node_name
+        .entry(file_mapper.clone().into())
+        .or_default()
+        .push("pod_a_mapper".to_owned());
+
+    kernel_to_node_name
+        .entry(file_mapper.into())
+        .or_default()
+        .push("pod_b_mapper".to_owned());
+
+    // Insert mapper for pod_d
+    kernel_to_node_name
+        .entry(file_mapper_for_pod_d.into())
+        .or_default()
+        .push("pod_d_mapper".to_owned());
+
+    // Add the joiner
+    kernel_to_node_name
+        .entry(Kernel::Joiner)
+        .or_default()
+        .push("pod_b_joiner".to_owned());
 
     // Write all the edges in DOT format
     let dot = "
         digraph {
-        A -> file_mapper_0 -> B -> file_mapper_1 -> C;
+        A -> pod_a_mapper -> pod_b_joiner -> B -> pod_b_mapper -> C;
+        D -> pod_d_mapper -> pod_b_joiner;
         }
     ";
 
diff --git a/tests/pipeline.rs b/tests/pipeline.rs
index 4150deeb..70de9487 100644
--- a/tests/pipeline.rs
+++ b/tests/pipeline.rs
@@ -33,8 +33,8 @@ fn creation() -> Result<()> {
     // so graph of 5, and 4 kernels due to the mapping being repeated
     assert_eq!(
         pipeline.kernel_lut.len(),
-        4,
-        "Kernel LUT should have exactly 4 entries."
+        7,
+        "Kernel LUT should have exactly 7 entries."
     );
 
     Ok(())
@@ -44,7 +44,7 @@ fn creation() -> Result<()> {
 fn root_nodes() -> Result<()> {
     let pipeline = pipeline()?;
 
-    assert_eq!(pipeline.get_root_nodes().count(), 1);
+    assert_eq!(pipeline.get_root_nodes().count(), 2);
     Ok(())
 }
 
@@ -65,6 +65,15 @@ fn get_parents_key_for_node() -> Result<()> {
     Ok(())
 }
 
+#[test]
+fn get_childen_for_node() -> Result<()> {
+    let pipeline = pipeline()?;
+    let node_key = pipeline.get_root_nodes().next().unwrap();
+
+    assert_eq!(pipeline.get_children_for_node(node_key).count(), 1);
+    Ok(())
+}
+
 #[test]
 fn get_input_spec() -> Result<()> {
     let pipeline = pipeline()?;
diff --git a/tests/pipeline_runner.rs b/tests/pipeline_runner.rs
index 37fcdecf..d3b816af 100644
--- a/tests/pipeline_runner.rs
+++ b/tests/pipeline_runner.rs
@@ -1,4 +1,5 @@
 #![expect(missing_docs, reason = "OK in tests.")]
+
 // If 'fixture' is a local module, ensure there is a 'mod fixture;' statement or a 'fixture.rs' file in the same directory or in 'tests/'.
 // If 'fixture' is an external crate, add it to Cargo.toml and import as shown below.
 // use fixture::pipeline_job;

From b911f7972ea4a0fb0031e7bc31f6e77bd5808e34 Mon Sep 17 00:00:00 2001
From: synicix <synicix@gmail.com>
Date: Fri, 18 Jul 2025 03:47:16 +0000
Subject: [PATCH 13/29] Save progress

---
 Cargo.toml                           |   1 +
 src/core/orchestrator/agent.rs       |   1 +
 src/uniffi/orchestrator/agent.rs     |   1 -
 src/uniffi/pipeline_runner/mod.rs    |  40 ---
 src/uniffi/pipeline_runner/runner.rs | 382 ++++++++-------------------
 tests/agent.rs                       |   1 -
 tests/pipeline_runner.rs             |   2 +-
 7 files changed, 106 insertions(+), 322 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 1340c5d7..4b1ff057 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -29,6 +29,7 @@ crate-type = ["rlib", "cdylib"]
 [dependencies]
 # make async fn in traits work with dyn traits
 async-trait = "0.1.88"
+bitcode = "0.6.6"
 # docker API in orchestrator
 bollard = "0.17.1"
 # datetime utilities
diff --git a/src/core/orchestrator/agent.rs b/src/core/orchestrator/agent.rs
index a739b90f..bd93077c 100644
--- a/src/core/orchestrator/agent.rs
+++ b/src/core/orchestrator/agent.rs
@@ -102,6 +102,7 @@ impl AgentClient {
 
 #[expect(
     clippy::let_underscore_must_use,
+    clippy::excessive_nesting,
     reason = "`result::Result<(), SendError<_>>` is the only uncaptured result since it would mean we can't transmit results over mpsc."
 )]
 pub async fn start_service<
diff --git a/src/uniffi/orchestrator/agent.rs b/src/uniffi/orchestrator/agent.rs
index 8a55f795..bcf48fcc 100644
--- a/src/uniffi/orchestrator/agent.rs
+++ b/src/uniffi/orchestrator/agent.rs
@@ -140,7 +140,6 @@ impl Agent {
     /// # Errors
     ///
     /// Will stop and return an error if encounters an error while processing any pod job request.
-    #[expect(clippy::excessive_nesting, reason = "Nesting manageable.")]
     pub async fn start(
         &self,
         namespace_lookup: &HashMap<String, PathBuf>,
diff --git a/src/uniffi/pipeline_runner/mod.rs b/src/uniffi/pipeline_runner/mod.rs
index a0d4812b..c4a8e880 100644
--- a/src/uniffi/pipeline_runner/mod.rs
+++ b/src/uniffi/pipeline_runner/mod.rs
@@ -1,8 +1,6 @@
 use crate::uniffi::error::Result;
 
 use super::pipeline::PipelineJob;
-use std::fmt;
-use std::hash::{Hash, Hasher};
 
 /// # Errors:
 /// Error out if fail to start the pipeline job
@@ -13,43 +11,5 @@ pub trait PipelineRunner {
     /// Returns an error if the pipeline job fails to start.
     fn start(&self, pipeline_job: PipelineJob) -> Result<()>;
 }
-
-#[derive(Debug, Clone)]
-/// Struct to store the active pipeline run.
-pub struct PipelineRun {
-    pipeline_job: PipelineJob,
-}
-
-impl fmt::Display for PipelineRun {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        write!(
-            f,
-            "PipelineRun {{ pipeline_job: {} }}",
-            self.pipeline_job.hash
-        )
-    }
-}
-
-impl PipelineRun {
-    /// New function to initialize the pipeline run
-    pub const fn new(pipeline_job: PipelineJob) -> Self {
-        Self { pipeline_job }
-    }
-}
-
-impl PartialEq for PipelineRun {
-    fn eq(&self, other: &Self) -> bool {
-        self.pipeline_job.hash == other.pipeline_job.hash
-    }
-}
-
-impl Eq for PipelineRun {}
-
-impl Hash for PipelineRun {
-    fn hash<H: Hasher>(&self, state: &mut H) {
-        self.pipeline_job.hash.hash(state);
-    }
-}
-
 /// Docker pipeline runner
 pub mod runner;
diff --git a/src/uniffi/pipeline_runner/runner.rs b/src/uniffi/pipeline_runner/runner.rs
index 4085ffac..8a1475be 100644
--- a/src/uniffi/pipeline_runner/runner.rs
+++ b/src/uniffi/pipeline_runner/runner.rs
@@ -1,4 +1,3 @@
-use super::PipelineRun;
 use crate::{
     core::{crypto::hash_buffer, model::serialize_hashmap, util::get},
     uniffi::{
@@ -7,12 +6,14 @@ use crate::{
         pipeline::{Kernel, Mapper, Node, PipelineJob, PipelineResult},
     },
 };
+use bitcode::{Decode, Encode};
 use futures_util::future::try_join_all;
 use itertools::Itertools as _;
 use serde_yaml::Serializer;
 use snafu::OptionExt as _;
 use std::{
     collections::{HashMap, HashSet},
+    hash::{Hash, Hasher},
     path::PathBuf,
     sync::Arc,
 };
@@ -21,7 +22,7 @@ use tokio::{
     task::{JoinSet, spawn_blocking},
 };
 
-#[derive(Clone, Debug)]
+#[derive(Encode, Decode, Clone, Debug)]
 pub(crate) enum Message {
     /// String is the `parent_node_id`, while `HashMap` is output of the parent node
     NodeOutput(String, HashMap<String, PathSet>),
@@ -34,12 +35,27 @@ pub(crate) enum Message {
     clippy::type_complexity,
     reason = "too complex, but necessary for async handling"
 )]
-struct PipelineRunInfo {
-    node_task_join_set: JoinSet<Result<()>>, // Join set to track the tasks for this pipeline run
-    node_tx: HashMap<String, mpsc::Sender<Message>>,
+#[derive(Debug, Clone)]
+pub struct PipelineRun {
+    /// PipelineJob that this run is associated with
+    pub pipeline_job: PipelineJob, // The pipeline job that this run is associated with
     outputs: Arc<RwLock<HashMap<String, Vec<HashMap<String, PathSet>>>>>, // String is the node key, while hash
 }
 
+impl PartialEq for PipelineRun {
+    fn eq(&self, other: &Self) -> bool {
+        self.pipeline_job.hash == other.pipeline_job.hash
+    }
+}
+
+impl Eq for PipelineRun {}
+
+impl Hash for PipelineRun {
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        self.pipeline_job.hash.hash(state);
+    }
+}
+
 /**
  * Runner for pipelines
  *
@@ -52,9 +68,17 @@ struct PipelineRunInfo {
  */
 #[derive(Default)]
 pub struct DockerPipelineRunner {
-    pipeline_runs: HashMap<PipelineRun, PipelineRunInfo>, // For each pipeline run, we have a join set to track the tasks and wait on them
+    pipeline_runs: HashSet<Arc<PipelineRun>>,
 }
 
+/**
+ * This is an implementation of a pipeline runner that uses Zenoh to communicate between the tasks
+ * The runtime is tokio
+ *
+ * These are the key expressions of the components of the pipeline:
+ * - Input Node: pipeline_job_hash/input_node/outputs (This is where the pipeline_job packets get fed to)
+ * - Nodes: pipeline_job_hash/node_id/outputs/(success|failure) (This is where the node outputs are sent to)
+*/
 impl DockerPipelineRunner {
     /// Create a new Docker pipeline runner
     pub fn new() -> Self {
@@ -87,90 +111,17 @@ impl DockerPipelineRunner {
         &mut self,
         pipeline_job: PipelineJob,
         namespace_lookup: &HashMap<String, PathBuf>,
-    ) -> Result<PipelineRun> {
+    ) -> Result<&PipelineRun> {
         // Create a new pipeline run
-        let pipeline_run = PipelineRun { pipeline_job };
-        let pipeline_run_arc = Arc::new(pipeline_run.clone());
-
-        // Insert into the list of pipeline runs
-        self.pipeline_runs.insert(
-            (*pipeline_run_arc).clone(),
-            PipelineRunInfo {
-                node_tx: HashMap::new(),
-                node_task_join_set: JoinSet::new(),
-                outputs: Arc::new(RwLock::new(HashMap::new())),
-            },
-        );
+        let pipeline_run = Arc::new(PipelineRun {
+            pipeline_job,
+            outputs: Arc::new(RwLock::new(HashMap::new())),
+        });
 
         // Get reference to the pipeline
-        let pipeline = &pipeline_run_arc.pipeline_job.pipeline;
-
-        // Create the output channel to capture the outputs of the outputs nodes (Currently only leaf nodes)
-        let (output_tx, mut output_rx) = mpsc::channel::<Message>(128); // Channel to capture outputs from nodes
+        let pipeline = &pipeline_run.pipeline_job.pipeline;
 
-        // Insert the output channel into the pipeline run info
-        self.pipeline_runs
-            .get_mut(&pipeline_run_arc)
-            .context(selector::KeyMissing {
-                key: pipeline_run_arc.to_string(),
-            })?
-            .node_tx
-            .insert("output".to_owned(), output_tx.clone());
-
-        // Get the output_nodes (leaf nodes for now) so the output task can keep track when parents are done
-        let output_nodes_ids = pipeline
-            .get_leaf_nodes()
-            .map(|node| node.id.clone())
-            .collect::<HashSet<_>>();
-        let outputs = Arc::clone(&get(&self.pipeline_runs, &pipeline_run_arc)?.outputs);
-
-        // Create the task that captures the output from the nodes and stores them in the outputs map
-        self.pipeline_runs
-            .get_mut(&pipeline_run_arc)
-            .context(selector::KeyMissing {
-                key: pipeline_run_arc.to_string(),
-            })?
-            .node_task_join_set
-            .spawn(async move {
-                let mut complete_parent_nodes = HashSet::new();
-                while let Some(message) = output_rx.recv().await {
-                    match message {
-                        Message::NodeOutput(sender_node_id, hash_map) => {
-                            // Store the output in the outputs map
-                            outputs
-                                .write()
-                                .await
-                                .entry(sender_node_id)
-                                .or_default()
-                                .push(hash_map);
-                        }
-                        Message::NodeProcessingComplete(sender_node_id) => {
-                            // Add the sender node id to the complete parent nodes
-                            complete_parent_nodes.insert(sender_node_id.clone());
-
-                            // Check if all parent nodes are complete
-                            if complete_parent_nodes.is_superset(&output_nodes_ids) {
-                                // All parents are complete, we can exit this task
-                                return Ok(());
-                            }
-                        }
-                        Message::Stop => {
-                            // No clear action needed, just exit the task
-                            return Ok(());
-                        }
-                    }
-                }
-                Ok(())
-            });
-
-        // Get all the root nodes and call the create_task_for_node function for each root node
-        // This will recursively create all the tasks and channels for the pipeline
-        let root_nodes_tx = pipeline
-            .get_root_nodes()
-            .map(|node| {
-                self.create_task_for_node(node, &pipeline_run_arc, &output_tx, namespace_lookup)
-            })
-            .collect::<Result<Vec<_>>>()?;
+        // Create a task for each node
 
         // All pipeline tasks have been created, now we need to feed the inputs to the pipeline
         for tx in &root_nodes_tx {
@@ -189,7 +140,15 @@ impl DockerPipelineRunner {
                 .await?;
         }
 
-        Ok(pipeline_run)
+        // Insert into the list of pipeline runs
+        self.pipeline_runs.insert(pipeline_run);
+
+        Ok(self
+            .pipeline_runs
+            .get(&pipeline_run_arc)
+            .context(selector::KeyMissing {
+                key: pipeline_run.to_string(),
+            })?)
     }
 
     /// Given a pipeline run, wait for all its tasks to complete and return the `PipelineResult`
@@ -264,79 +223,6 @@ impl DockerPipelineRunner {
         Ok(())
     }
 
-    /// Helper function to create a task for each node, while recursively BFS through the pipeline
-    /// Summary:
-    /// 1. Check if their is already a channel created for the node, if not create one and insert it
-    /// 2. Call this function for each of the child nodes to get their `Sender_tx`
-    /// 3. If the node is a leaf node, attach the `output_tx` to the tx (Will be replaced by `output_nodes`)
-    /// 4. Start the task manager for the node, which will act as the node's processor
-    fn create_task_for_node(
-        &mut self,
-        node: &Node,
-        pipeline_run: &Arc<PipelineRun>,
-        output_tx: &mpsc::Sender<Message>,
-        namespace_lookup: &HashMap<String, PathBuf>,
-    ) -> Result<mpsc::Sender<Message>> {
-        println!("Creating task for node: {}", node.id);
-        // Create a channel for the node
-
-        // Use closer to limit the scope of the borrow
-        let (tx, rx) = {
-            let pipeline_info =
-                self.pipeline_runs
-                    .get_mut(pipeline_run)
-                    .context(selector::KeyMissing {
-                        key: pipeline_run.to_string(),
-                    })?;
-            // Check if the node is already inside the node_tx
-            if pipeline_info.node_tx.contains_key(&node.id) {
-                // Node already exists, thus we can return the existing tx
-                return Ok(get(&pipeline_info.node_tx, &node.id)?.clone());
-            }
-
-            // This channel will be used to send messages to the node processor
-            let (tx, rx) = mpsc::channel::<Message>(128);
-
-            // Record the tx into the pipeline_info tx_hashmap
-            pipeline_info.node_tx.insert(node.id.clone(), tx.clone());
-            (tx, rx)
-        };
-
-        // Call this function for each of the child nodes to get their Sender_tx
-        let mut children_node_tx = pipeline_run
-            .pipeline_job
-            .pipeline
-            .get_children_for_node(node)
-            .map(|child_node| {
-                self.create_task_for_node(child_node, pipeline_run, output_tx, namespace_lookup)
-            })
-            .collect::<Result<Vec<_>>>()?;
-
-        // Check if children_node_tx is empty, if so, this is a leaf node thus we need to attach the output_tx
-        if children_node_tx.is_empty() {
-            // This is a leaf node, thus we need to attach the output_tx to the tx
-            // This will allow the node to send its output to the output channel
-            children_node_tx.push(output_tx.clone());
-        }
-
-        // Start the task_manager
-        self.pipeline_runs
-            .get_mut(pipeline_run)
-            .context(selector::KeyMissing {
-                key: pipeline_run.to_string(),
-            })?
-            .node_task_join_set
-            .spawn(Self::start_node_manager(
-                node.clone(),
-                Arc::clone(pipeline_run),
-                rx,
-                children_node_tx,
-                namespace_lookup.clone(),
-            ));
-
-        Ok(tx)
-    }
-
     /// Act as the processor of the node by:
     /// 1. Creating a metadata struct for the node to be passed to the appropriate processor
     /// 2. Get the kernel for the node and build the correct processor for this node
@@ -345,27 +231,13 @@ impl DockerPipelineRunner {
     ///
     /// # Errors
     /// Will error out if the kernel for the node is not found or if the
-    async fn start_node_manager(
-        node: Node,
-        pipeline_run: Arc<PipelineRun>,
-        node_rx: mpsc::Receiver<Message>,
-        success_chs_tx: Vec<mpsc::Sender<Message>>,
-        namespace_lookup: HashMap<String, PathBuf>,
+    async fn start_node_task(
+        kernel: Kernel,
+        output_key_expression: String,
+        namespace_path: PathBuf,
     ) -> Result<()> {
-        // Create a metadata struct for this node
-        let node_metadata = NodeMetaData {
-            node_id: node.id.clone(),
-            node_rx,
-            child_nodes_txs: success_chs_tx.clone(),
-            namespace: pipeline_run.pipeline_job.output_dir.namespace.clone(),
-            namespace_lookup: namespace_lookup.clone(),
-        };
-
         // Get the kernel for this node and build the correct processor
-        match get(
-            &pipeline_run.pipeline_job.pipeline.kernel_lut,
-            &node.kernel_hash,
-        )? {
+        match kernel {
             Kernel::Pod(pod) => {
                 let mut processor = PodProcessor::new(Arc::clone(pod), node_metadata);
                 processor.start().await;
@@ -411,55 +283,37 @@ impl DockerPipelineRunner {
     }
 }
 
-/// Metadata for the node processor
-/// Contains fields that is normally needed to process incoming packets
-struct NodeMetaData {
-    node_id: String,
-    node_rx: mpsc::Receiver<Message>, // Channel to listen to messages from parent nodes
-    child_nodes_txs: Vec<mpsc::Sender<Message>>, // Channel to send successful outputs to the next node
-    namespace: String,
-    namespace_lookup: HashMap<String, PathBuf>, // Copy of the look up table
-}
-
 /// Unify the interface for node processors and provide a common way to handle processing of incoming messages
 /// This trait defines the methods that all node processors should implement
 ///
 /// Main purpose was to reduce the amount of code duplication between different node processors
 /// As a result, each processor only needs to worry about writing their own function to process the msg.
 pub(crate) trait NodeProcessor {
-    fn get_node_rx(&mut self) -> &mut mpsc::Receiver<Message>;
-
-    async fn start(&mut self) {
-        // Start to listen to the channels
-        // Listen to the MPSC channel and handle messages
-        while let Some(msg) = self.get_node_rx().recv().await {
-            if self.process_msg(msg).await {
-                // If the message indicates that processing is complete, we can exit the loop
-                // Wait for all processing tasks to complete before returning
-                self.wait_for_node_task_completion().await;
-                break;
-            }
-        }
-    }
+    async fn process_packet(
+        &mut self,
+        packet: HashMap<String, PathSet>,
+        session: Arc<zenoh::Session>,
+        output_key_exp: &str,
+    ) -> Result<()>;
 
-    async fn process_msg(&mut self, msg: Message) -> bool;
+    async fn wait_for_node_task_completion(&mut self) -> Result<()>;
 
-    async fn wait_for_node_task_completion(&mut self);
+    fn stop(&mut self) -> Result<()>;
 }
 
 /// Processor for Pods
 /// Currently missing implementation to call agents for actual pod processing
 struct PodProcessor {
+    session: zenoh::Session,
     pod: Arc<Pod>,
-    node_metadata: NodeMetaData,
     processing_tasks: JoinSet<Result<(), OrcaError>>,
 }
 
 impl PodProcessor {
-    fn new(pod: Arc<Pod>, node_metadata: NodeMetaData) -> Self {
+    fn new(pod: Arc<Pod>) -> Self {
         Self {
+            session: zenoh::Session::default(),
             pod,
-            node_metadata,
             processing_tasks: JoinSet::new(),
         }
     }
@@ -537,59 +391,19 @@ impl PodProcessor {
 }
 
 impl NodeProcessor for PodProcessor {
-    fn get_node_rx(&mut self) -> &mut mpsc::Receiver<Message> {
-        &mut self.node_metadata.node_rx
-    }
+    async fn process_packet(
+        &mut self,
+        packet: HashMap<String, PathSet>,
+        session: Arc<zenoh::Session>,
+        output_key_exp: &str,
+    ) -> Result<()>;
 
-    async fn process_msg(&mut self, msg: Message) -> bool {
-        match msg {
-            Message::NodeOutput(_, packet) => {
-                let pod_ref = Arc::clone(&self.pod);
-                let node_id = self.node_metadata.node_id.clone();
-                let namespace = self.node_metadata.namespace.clone();
-                let namespace_lookup = self.node_metadata.namespace_lookup.clone();
-                let child_nodes_txs = self.node_metadata.child_nodes_txs.clone();
-                // Forward it into a processing task
-                self.processing_tasks.spawn(async move {
-                    // Process the packet using the pod
-                    // This will execute the pod and send the output to the next node
-                    if let Err(err) = Self::process_packet(
-                        node_id,
-                        pod_ref,
-                        namespace,
-                        namespace_lookup,
-                        packet,
-                        child_nodes_txs,
-                    )
-                    .await
-                    {
-                        // Send the error to the failure channel
-                        // For now just print it out
-                        eprintln!("Failed to process packet with error: {err}");
-                    }
-                    Ok(())
-                });
-            }
-            Message::Stop => {
-                // Stop message received, we will stop processing
-                self.processing_tasks.abort_all();
-                return true;
-            }
-            Message::NodeProcessingComplete(_) => {
-                // Since pod only have one parent, we can expect that there will be no more incoming packet
-                // thus, we need to wait for everything to finish processing and send completion message
-                // Return true to notify caller that processing is complete
-                self.wait_for_node_task_completion().await;
-                return true;
-            }
-        }
-        false
+    async fn wait_for_node_task_completion(&mut self) -> Result<()> {
+        todo!()
     }
 
-    async fn wait_for_node_task_completion(&mut self) {
-        while self.processing_tasks.join_next().await.is_some() {
-            // Wait for all processing tasks to complete
-        }
+    fn stop(&mut self) -> Result<()> {
+        todo!()
     }
 }
 
@@ -597,7 +411,6 @@ impl NodeProcessor for PodProcessor {
 /// This processor renames the `input_keys` from the input packet to the `output_keys` defined by the map
 struct MapperProcessor {
     mapper: Arc<Mapper>,
-    node_metadata: NodeMetaData,
 }
 
 impl MapperProcessor {
@@ -633,30 +446,41 @@ impl MapperProcessor {
 }
 
 impl NodeProcessor for MapperProcessor {
-    fn get_node_rx(&mut self) -> &mut mpsc::Receiver<Message> {
-        &mut self.node_metadata.node_rx
-    }
+    async fn process_packet(
+        &mut self,
+        packet: HashMap<String, PathSet>,
+        session: Arc<zenoh::Session>,
+        output_key_exp: &str,
+    ) -> Result<()> {
+        // Apply the mapping to the input packet
+        let output_map = self
+            .mapper
+            .mapping
+            .iter()
+            .map(|(input_key, output_key)| {
+                let input = get(&packet, input_key)?.clone();
+                Ok((output_key.to_owned(), input))
+            })
+            .collect::<Result<HashMap<_, _>>>()?;
 
-    async fn wait_for_node_task_completion(&mut self) {
-        // Mapper doesn't spawn additional tasks, so this is a no-op
+        // Send the packet outwards
+        session
+            .put(
+                output_key_exp,
+                bitcode::encode(&Message::NodeOutput((), ())),
+            )
+            .await
+            .unwrap();
+
+        Ok(())
     }
 
-    async fn process_msg(&mut self, msg: Message) -> bool {
-        match msg {
-            Message::NodeOutput(_, packet) => {
-                match self.process_packet(&packet).await {
-                    Ok(()) => {}
-                    Err(err) => {
-                        // Send the error to the failure channel
-                        // For now just print it out
-                        eprintln!("Failed to process packet with error: {err}");
-                    }
-                }
-            }
-            Message::NodeProcessingComplete(_) | Message::Stop => return true,
-        }
+    async fn wait_for_node_task_completion(&mut self) -> Result<()> {
+        todo!()
+    }
 
-        false
+    fn stop(&mut self) -> Result<()> {
+        todo!()
     }
 }
 
diff --git a/tests/agent.rs b/tests/agent.rs
index 781c8086..2935e213 100644
--- a/tests/agent.rs
+++ b/tests/agent.rs
@@ -39,7 +39,6 @@ fn simple() -> Result<()> {
     Ok(())
 }
 
-#[expect(clippy::excessive_nesting, reason = "Nesting is manageable")]
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn parallel_four_cores() -> Result<()> {
     let test_dirs = TestDirs::new(&HashMap::from([("default".to_owned(), None::<String>)]))?;
diff --git a/tests/pipeline_runner.rs b/tests/pipeline_runner.rs
index d3b816af..054a8f2e 100644
--- a/tests/pipeline_runner.rs
+++ b/tests/pipeline_runner.rs
@@ -33,7 +33,7 @@ async fn basic_run() -> Result<()> {
     Ok(())
 }
 
-#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
 async fn stop() -> Result<()> {
     let pipeline_job = pipeline_job()?;
 

From b77d3f27a26d895ec87a7cdf5123e17e8fdc753a Mon Sep 17 00:00:00 2001
From: synicix <synicix@gmail.com>
Date: Fri, 18 Jul 2025 17:48:47 +0000
Subject: [PATCH 14/29] dsave progres

---
 .devcontainer/gpu/devcontainer.json  |   1 -
 Cargo.toml                           |   2 +-
 cspell.json                          |   3 +-
 src/uniffi/pipeline_runner/runner.rs | 284 ++++++++++++++++-----------
 4 files changed, 170 insertions(+), 120 deletions(-)

diff --git a/.devcontainer/gpu/devcontainer.json b/.devcontainer/gpu/devcontainer.json
index ebbaba51..f663fc73 100644
--- a/.devcontainer/gpu/devcontainer.json
+++ b/.devcontainer/gpu/devcontainer.json
@@ -21,7 +21,6 @@
 	},
 	"runArgs": [
 		"--name=${localWorkspaceFolderBasename}_devcontainer",
-		"--gpus=all",
 		"--privileged",
 		"--cgroupns=host"
 	],
diff --git a/Cargo.toml b/Cargo.toml
index 4b1ff057..d70b1059 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -29,7 +29,7 @@ crate-type = ["rlib", "cdylib"]
 [dependencies]
 # make async fn in traits work with dyn traits
 async-trait = "0.1.88"
-bitcode = "0.6.6"
+bincode = { version = "2.0.1", features = ["serde"] }
 # docker API in orchestrator
 bollard = "0.17.1"
 # datetime utilities
diff --git a/cspell.json b/cspell.json
index 4211ef6e..e1526ca7 100644
--- a/cspell.json
+++ b/cspell.json
@@ -82,7 +82,8 @@
         "itertools",
         "oneshot",
         "patchelf",
-        "colinianking"
+        "colinianking",
+        "bitcode"
     ],
     "useGitignore": false,
     "ignorePaths": [
diff --git a/src/uniffi/pipeline_runner/runner.rs b/src/uniffi/pipeline_runner/runner.rs
index 8a1475be..94abcb4e 100644
--- a/src/uniffi/pipeline_runner/runner.rs
+++ b/src/uniffi/pipeline_runner/runner.rs
@@ -6,11 +6,12 @@ use crate::{
         pipeline::{Kernel, Mapper, Node, PipelineJob, PipelineResult},
     },
 };
-use bitcode::{Decode, Encode};
+use bincode::{Decode, Encode};
 use futures_util::future::try_join_all;
 use itertools::Itertools as _;
+use serde::{Deserialize, Serialize};
 use serde_yaml::Serializer;
-use snafu::OptionExt as _;
+use snafu::{OptionExt as _, ResultExt};
 use std::{
     collections::{HashMap, HashSet},
     hash::{Hash, Hasher},
@@ -22,10 +23,14 @@ use tokio::{
     task::{JoinSet, spawn_blocking},
 };
 
-#[derive(Encode, Decode, Clone, Debug)]
+static SUCCESS_KEY_EXP: &str = "/success";
+static FAILURE_KEY_EXP: &str = "/failure";
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
 pub(crate) enum Message {
     /// String is the `parent_node_id`, while `HashMap` is output of the parent node
     NodeOutput(String, HashMap<String, PathSet>),
+    NodeProcessingFailure(String, String), // String is the `node_id` that has failed processing
     /// String is the `node_id` that has completed processing
     NodeProcessingComplete(String),
     Stop, // Message to halt all operations
@@ -288,12 +293,16 @@ impl DockerPipelineRunner {
 ///
 /// Main purpose was to reduce the amount of code duplication between different node processors
 /// As a result, each processor only needs to worry about writing their own function to process the msg.
-pub(crate) trait NodeProcessor {
+trait NodeProcessor {
     async fn process_packet(
         &mut self,
-        packet: HashMap<String, PathSet>,
+        sender_node_id: &str,
+        node_id: &str,
+        packet: &HashMap<String, PathSet>,
         session: Arc<zenoh::Session>,
         output_key_exp: &str,
+        namespace: &str,
+        namespace_lookup: &HashMap<String, PathBuf>,
     ) -> Result<()>;
 
     async fn wait_for_node_task_completion(&mut self) -> Result<()>;
@@ -321,6 +330,7 @@ impl PodProcessor {
     /// Actual logic of processing a packet using the pod
     /// At the moment it does a simulation of pod execution
     async fn process_packet(
+        _sender_node_id: &str,
         node_id: String,
         pod: Arc<Pod>,
         namespace: String,
@@ -393,10 +403,80 @@ impl PodProcessor {
 impl NodeProcessor for PodProcessor {
     async fn process_packet(
         &mut self,
-        packet: HashMap<String, PathSet>,
+        _sender_node_id: &str,
+        node_id: &str,
+        packet: &HashMap<String, PathSet>,
         session: Arc<zenoh::Session>,
         output_key_exp: &str,
-    ) -> Result<()>;
+        namespace: &str,
+        namespace_lookup: &HashMap<String, PathBuf>,
+    ) -> Result<()> {
+        // Process the packet using the pod
+        // Create the pod_job
+
+        // We need a unique hash for this given input packet process by the node
+        // therefore we need to generate a hash that has the pod_id + input_packet
+        let node_id_bytes = node_id.as_bytes().to_vec();
+        let packet_copy = packet.clone();
+        let input_packet_hash = {
+            let mut buf = node_id_bytes;
+            let mut serializer = Serializer::new(&mut buf);
+            serialize_hashmap(&packet_copy, &mut serializer)?;
+            hash_buffer(buf)
+        };
+        let output_dir = URI {
+            namespace: namespace.to_owned(),
+            path: PathBuf::from(format!("pod_runs/{}/{}", self.pod.hash, input_packet_hash)),
+        };
+
+        let cpu_limit = self.pod.recommended_cpus;
+        let memory_limit = self.pod.recommended_memory;
+
+        // Create the pod job
+        let pod_job = PodJob::new(
+            None,
+            Arc::clone(&self.pod),
+            packet.clone(),
+            output_dir,
+            cpu_limit,
+            memory_limit,
+            None,
+            &namespace_lookup,
+        )?;
+
+        // Simulate pod execution by just printing out pod_job_hash and pod hash
+        // This will be replaced by sending the pod_job to the orchestrator via the agent
+        self.processing_tasks.spawn(async move {
+            println!(
+                "Simulating Executing pod job: {} with pod hash: {}",
+                pod_job.hash, pod_job.pod.hash
+            );
+            Ok(())
+        });
+
+        #[expect(
+            clippy::unwrap_used,
+            reason = "Hard code for now, will be replaced by agent"
+        )]
+        // Build the output_packet, in reality, this will be extracted from the pod_result
+        let output_packet = self
+            .pod
+            .output_spec
+            .keys()
+            .map(|output_key| (output_key.clone(), packet.values().next().cloned().unwrap()))
+            .collect::<HashMap<_, _>>();
+
+        // For now we will just send the input_packet to the success channel
+        session
+            .put(
+                output_key_exp,
+                bitcode::encode(&Message::NodeOutput(node_id.to_owned(), output_packet)),
+            )
+            .await
+            .context(selector::AgentCommunicationFailure {})?;
+
+        Ok(())
+    }
 
     async fn wait_for_node_task_completion(&mut self) -> Result<()> {
         todo!()
@@ -414,43 +494,21 @@ struct MapperProcessor {
 }
 
 impl MapperProcessor {
-    const fn new(mapper: Arc<Mapper>, node_metadata: NodeMetaData) -> Self {
-        Self {
-            mapper,
-            node_metadata,
-        }
-    }
-
-    async fn process_packet(&self, packet: &HashMap<String, PathSet>) -> Result<()> {
-        // Apply the mapping to the input packet
-        let output_map = self
-            .mapper
-            .mapping
-            .iter()
-            .map(|(input_key, output_key)| {
-                let input = get(packet, input_key)?.clone();
-                Ok((output_key.to_owned(), input))
-            })
-            .collect::<Result<HashMap<_, _>>>()?;
-
-        // Send the output via the channel
-        try_join_all(self.node_metadata.child_nodes_txs.iter().map(|ch| {
-            ch.send(Message::NodeOutput(
-                self.node_metadata.node_id.clone(),
-                output_map.clone(),
-            ))
-        }))
-        .await?;
-        Ok(())
+    const fn new(mapper: Arc<Mapper>) -> Self {
+        Self { mapper }
     }
 }
 
 impl NodeProcessor for MapperProcessor {
     async fn process_packet(
         &mut self,
-        packet: HashMap<String, PathSet>,
+        _sender_node_id: &str,
+        node_id: &str,
+        packet: &HashMap<String, PathSet>,
         session: Arc<zenoh::Session>,
         output_key_exp: &str,
+        _namespace: &str,
+        _namespace_lookup: &HashMap<String, PathBuf>,
     ) -> Result<()> {
         // Apply the mapping to the input packet
         let output_map = self
@@ -467,7 +525,7 @@ impl NodeProcessor for MapperProcessor {
         session
             .put(
                 output_key_exp,
-                bitcode::encode(&Message::NodeOutput((), ())),
+                bitcode::encode(&Message::NodeOutput(node_id.to_owned(), output_map)),
             )
             .await
             .unwrap();
@@ -491,35 +549,32 @@ struct JoinerProcessor {
     /// Cache for all packets received by the node
     input_packet_cache: HashMap<String, Vec<HashMap<String, PathSet>>>,
     completed_parents: Vec<String>,
-    node_metadata: NodeMetaData,
     initial_computation_completed: bool,
+    processing_tasks: JoinSet<Result<(), OrcaError>>,
 }
 
 impl JoinerProcessor {
-    fn new(parents_node_id: Vec<String>, node_metadata: NodeMetaData) -> Self {
+    fn new(parents_node_id: Vec<String>) -> Self {
         let input_packet_cache = parents_node_id
             .into_iter()
             .map(|id| (id, Vec::new()))
             .collect();
         Self {
             input_packet_cache,
-            node_metadata,
             completed_parents: Vec::new(),
             initial_computation_completed: false,
+            processing_tasks: JoinSet::new(),
         }
     }
 
     fn compute_new_packet_combination(
-        &mut self,
         sender_node_id: &str,
         new_packet: &HashMap<String, PathSet>,
+        packet_cache: HashMap<String, Vec<HashMap<String, PathSet>>>,
     ) -> Result<Vec<HashMap<String, PathSet>>> {
         // Combine the new packet with the existing packets in the cache
         // Get all the cached packets from other parents
-        let other_parent_ids = self
-            .input_packet_cache
-            .keys()
-            .filter(|key| *key != sender_node_id);
+        let other_parent_ids = packet_cache.keys().filter(|key| *key != sender_node_id);
 
         // Create a vector to hold the incoming packet
         // This will be used to compute the cartesian product and will be modified if the initial computation is not completed
@@ -567,94 +622,89 @@ impl JoinerProcessor {
 
         Ok(result)
     }
+}
 
+impl NodeProcessor for JoinerProcessor {
     async fn process_packet(
         &mut self,
         sender_node_id: &str,
-        packet: HashMap<String, PathSet>,
+        node_id: &str,
+        packet: &HashMap<String, PathSet>,
+        session: Arc<zenoh::Session>,
+        output_key_exp: &str,
+        namespace: &str,
+        namespace_lookup: &HashMap<String, PathBuf>,
     ) -> Result<()> {
-        let process_result = {
-            // Compute the new packet combination based on the sender node id and the packet
-            let new_packets_to_send =
-                self.compute_new_packet_combination(sender_node_id, &packet)?;
-
-            // Record the packet into the cache
-            self.input_packet_cache
-                .get_mut(sender_node_id)
-                .context(selector::KeyMissing {
-                    key: sender_node_id.to_owned(),
-                })?
-                .push(packet);
-
-            Ok::<Vec<HashMap<String, PathSet>>, OrcaError>(new_packets_to_send)
-        };
+        self.input_packet_cache
+            .get_mut(sender_node_id)
+            .context(selector::KeyMissing {
+                key: sender_node_id.to_owned(),
+            })?
+            .push(packet.clone());
+
+        self.processing_tasks.spawn(async move {
+            let process_result = {
+                for packet in self.compute_new_packet_combination(sender_node_id, &packet)? {
+                    session
+                        .put(
+                            output_key_exp.to_owned() + SUCCESS_KEY_EXP,
+                            bincode::serde::encode_to_vec(
+                                &Message::NodeOutput(node_id.to_owned(), packet),
+                                bincode::config::standard(),
+                            )
+                            .unwrap(),
+                        )
+                        .await
+                        .context(selector::AgentCommunicationFailure {})?;
+                }
+                Ok::<(), OrcaError>(())
+            };
 
-        match process_result {
-            Ok(output_packets) => {
-                // Send the output packets to the success channel
-                for output_packet in output_packets {
-                    try_join_all(self.node_metadata.child_nodes_txs.iter().map(|ch| {
-                        ch.send(Message::NodeOutput(
-                            self.node_metadata.node_id.clone(),
-                            output_packet.clone(),
-                        ))
-                    }))
-                    .await?;
+            match process_result {
+                Ok(_) => {}
+                Err(err) => {
+                    // Something failed thus we should output to the failed channel
+                    session
+                        .put(
+                            output_key_exp.to_owned() + FAILURE_KEY_EXP,
+                            bincode::serde::encode_to_vec(
+                                &Message::NodeProcessingFailure(
+                                    node_id.to_owned(),
+                                    err.to_string(),
+                                ),
+                                bincode::config::standard(),
+                            )
+                            .unwrap(),
+                        )
+                        .await
+                        .context(selector::AgentCommunicationFailure {})?;
                 }
             }
-            Err(err) => {
-                // Send the error to the failure channel
-                eprintln!(
-                    "Failed to process packet from {sender_node_id} for joiner node with error: {err}"
-                );
-            }
-        }
-        // Add the new packet into the cache
+            // For each new packet, we
+            Ok(())
+        });
 
         Ok(())
     }
-}
 
-impl NodeProcessor for JoinerProcessor {
-    fn get_node_rx(&mut self) -> &mut mpsc::Receiver<Message> {
-        &mut self.node_metadata.node_rx
+    async fn wait_for_node_task_completion(&mut self) -> Result<()> {
+        todo!()
     }
 
-    async fn wait_for_node_task_completion(&mut self) {
-        // Joiner doesn't spawn additional tasks, so this is a no-op
+    fn stop(&mut self) -> Result<()> {
+        todo!()
     }
+}
 
-    async fn process_msg(&mut self, msg: Message) -> bool {
-        match msg {
-            Message::NodeOutput(sender_node_id, packet) => {
-                // Process the packet and send the output to the success channel
-                match self.process_packet(&sender_node_id, packet).await {
-                    Ok(()) => {}
-                    Err(err) => {
-                        // Send the error to the failure channel
-                        eprintln!("Failed to process packet with error: {err}");
-                    }
-                }
-            }
-            Message::NodeProcessingComplete(sender_node_id) => {
-                // Record that this parent node has completed processing
-                self.completed_parents.push(sender_node_id);
-
-                // Check if all parents have completed processing
-                if self.completed_parents.len() == self.input_packet_cache.len() {
-                    // All parents have completed processing, we can send the output
-                    // Wait for all packets to be processed and send the output
-                    return true;
-                }
-            }
-            Message::Stop => {
-                // We don't have anything to clean up, so we can just return
-                return true;
-            }
-        }
-
-        false
-    }
+// Utils functions
+fn get_node_id(output_key_exp: &str) -> String {
+    // Extract the node id from the output key expression
+    // The output key expression is in the format of "pipeline_job_hash/node_id/outputs"
+    output_key_exp
+        .split('/')
+        .nth(1)
+        .map(|s| s.to_owned())
+        .unwrap_or_else(|| "unknown_node".to_owned())
 }
 
 #[cfg(test)]

From 75d1e79465223a3890c35a9ec51601079466a804 Mon Sep 17 00:00:00 2001
From: synicix <synicix@gmail.com>
Date: Fri, 18 Jul 2025 21:15:55 +0000
Subject: [PATCH 15/29] Save progress

---
 src/core/error.rs                    |  10 +++
 src/uniffi/error.rs                  |   5 ++
 src/uniffi/pipeline_runner/runner.rs | 111 ++++++++++++++++-----------
 3 files changed, 80 insertions(+), 46 deletions(-)

diff --git a/src/core/error.rs b/src/core/error.rs
index f4263009..12918fe9 100644
--- a/src/core/error.rs
+++ b/src/core/error.rs
@@ -25,6 +25,16 @@ impl From<BollardError> for OrcaError {
         }
     }
 }
+impl From<EncodingError> for OrcaError {
+    fn from(error: EncodingError) -> Self {
+        Self {
+            kind: Kind::FailedToParseDot {
+                source: error,
+                backtrace: Some(Backtrace::capture()),
+            },
+        }
+    }
+}
 impl From<oneshot::error::RecvError> for OrcaError {
     fn from(error: oneshot::error::RecvError) -> Self {
         Self {
diff --git a/src/uniffi/error.rs b/src/uniffi/error.rs
index 89ac49ea..95f1d1e1 100644
--- a/src/uniffi/error.rs
+++ b/src/uniffi/error.rs
@@ -107,6 +107,11 @@ pub(crate) enum Kind {
         backtrace: Option<Backtrace>,
     },
     #[snafu(transparent)]
+    EncodingError {
+        source: EncodingError,
+        backtrace: Option<Backtrace>,
+    },
+    #[snafu(transparent)]
     GlobPatternError {
         source: glob::PatternError,
         backtrace: Option<Backtrace>,
diff --git a/src/uniffi/pipeline_runner/runner.rs b/src/uniffi/pipeline_runner/runner.rs
index 94abcb4e..d8bcc3a0 100644
--- a/src/uniffi/pipeline_runner/runner.rs
+++ b/src/uniffi/pipeline_runner/runner.rs
@@ -6,7 +6,7 @@ use crate::{
         pipeline::{Kernel, Mapper, Node, PipelineJob, PipelineResult},
     },
 };
-use bincode::{Decode, Encode};
+use bincode::{Decode, Encode, config, serde::encode_to_vec};
 use futures_util::future::try_join_all;
 use itertools::Itertools as _;
 use serde::{Deserialize, Serialize};
@@ -525,7 +525,11 @@ impl NodeProcessor for MapperProcessor {
         session
             .put(
                 output_key_exp,
-                bitcode::encode(&Message::NodeOutput(node_id.to_owned(), output_map)),
+                bincode::serde::encode_to_vec(
+                    &Message::NodeOutput(node_id.to_owned(), output_map),
+                    bincode::config::standard(),
+                )
+                .unwrap(),
             )
             .await
             .unwrap();
@@ -567,47 +571,10 @@ impl JoinerProcessor {
         }
     }
 
-    fn compute_new_packet_combination(
-        sender_node_id: &str,
-        new_packet: &HashMap<String, PathSet>,
-        packet_cache: HashMap<String, Vec<HashMap<String, PathSet>>>,
-    ) -> Result<Vec<HashMap<String, PathSet>>> {
-        // Combine the new packet with the existing packets in the cache
-        // Get all the cached packets from other parents
-        let other_parent_ids = packet_cache.keys().filter(|key| *key != sender_node_id);
-
-        // Create a vector to hold the incoming packet
-        // This will be used to compute the cartesian product and will be modified if the initial computation is not completed
-        let mut incoming_packet = vec![new_packet.clone()];
-
-        // Determine if the initial computation has been computed
-        if !self.initial_computation_completed {
-            // Check if we at least have one cached packet for each of the other parents
-            for parent_id in other_parent_ids.clone() {
-                if get(&self.input_packet_cache, parent_id)?.is_empty() {
-                    // We are still missing other parents, so we can't compute the new packet combination yet
-                    return Ok(Vec::new());
-                }
-            }
-
-            // We have at least one packet for each of the other parents, thus we can compute the cartesian product
-            // For the initial computation, we will add all of the add all previous packets for this sender
-            get(&self.input_packet_cache, &sender_node_id.to_owned())?
-                .iter()
-                .for_each(|packet| incoming_packet.push(packet.clone()));
-
-            self.initial_computation_completed = true;
-        }
-
-        let mut factors = other_parent_ids
-            .map(|id| get(&self.input_packet_cache, id))
-            .collect::<Result<Vec<_>>>()?;
-
-        // Add the new incoming packet as a factor
-
-        factors.push(&incoming_packet);
-
-        let result = factors
+    fn compute_cartesian_product(
+        factors: &Vec<&Vec<HashMap<String, PathSet>>>,
+    ) -> Vec<HashMap<String, PathSet>> {
+        factors
             .into_iter()
             .multi_cartesian_product()
             .map(|packets_to_combined| {
@@ -618,9 +585,7 @@ impl JoinerProcessor {
                         acc
                     })
             })
-            .collect::<Vec<_>>();
-
-        Ok(result)
+            .collect::<Vec<_>>()
     }
 }
 
@@ -642,6 +607,60 @@ impl NodeProcessor for JoinerProcessor {
             })?
             .push(packet.clone());
 
+        // Check if we have all the other parents needed to compute the cartesian product
+        if self.input_packet_cache.values().all(|v| !v.is_empty()) {
+            // Get all the cached packets from other parents
+            let other_parent_ids = self
+                .input_packet_cache
+                .keys()
+                .filter(|key| *key != sender_node_id);
+
+            // Build the factors of the product
+            let mut factors = other_parent_ids
+                .map(|id| get(&self.input_packet_cache, id))
+                .collect::<Result<Vec<_>>>()?;
+
+            // Add the new packet as a factor
+            factors.push(&vec![packet.clone()]);
+
+            // Compute the cartesian product of the factors
+            self.processing_tasks.spawn(async move {
+                let cartesian_product = Self::compute_cartesian_product(&factors);
+
+                // Post all products to the output channel
+                for output_packet in cartesian_product {
+                    let result = session.put(
+                        output_key_exp.to_owned() + SUCCESS_KEY_EXP,
+                        encode_to_vec(
+                            &Message::NodeOutput(node_id.to_owned(), output_packet),
+                            config::standard(),
+                        )?,
+                    );
+                }
+
+                Ok(())
+            });
+        }
+
+        // Check if this packet is the first packet from this parent node
+        if get(&self.input_packet_cache, sender_node_id)?.len() == 1 {}
+
+        // Determine if the initial computation is completed
+        if !self.initial_computation_completed {
+            // Check if we have at least one packet for each parent node
+            if self.input_packet_cache.values().all(|v| !v.is_empty()) {
+                self.initial_computation_completed = true;
+            } else {
+                // If not, we cannot compute the new packet combination yet
+                return Ok(());
+            }
+        }
+        if self.input_packet_cache.values().all(|v| !v.is_empty())
+            | self.input_packet_cache.values().any(|v| v.len() == 1)
+        {
+            // Initial case where we first have at least one packet for each parent node met
+        }
+
         self.processing_tasks.spawn(async move {
             let process_result = {
                 for packet in self.compute_new_packet_combination(sender_node_id, &packet)? {

From 99b2d1ee0f346c69449eb11c465858be811cd4ce Mon Sep 17 00:00:00 2001
From: synicix <synicix@gmail.com>
Date: Sat, 19 Jul 2025 00:33:32 +0000
Subject: [PATCH 16/29] Save progress

---
 src/core/error.rs                    |   8 +-
 src/uniffi/error.rs                  |   3 +-
 src/uniffi/pipeline_runner/runner.rs | 208 ++++++++++++---------------
 3 files changed, 96 insertions(+), 123 deletions(-)

diff --git a/src/core/error.rs b/src/core/error.rs
index 12918fe9..3ca897c7 100644
--- a/src/core/error.rs
+++ b/src/core/error.rs
@@ -2,6 +2,7 @@ use crate::uniffi::{
     error::{Kind, OrcaError},
     pipeline_runner::runner::Message,
 };
+use bincode::error::EncodeError;
 use bollard::errors::Error as BollardError;
 use glob;
 use serde_json;
@@ -25,10 +26,10 @@ impl From<BollardError> for OrcaError {
         }
     }
 }
-impl From<EncodingError> for OrcaError {
-    fn from(error: EncodingError) -> Self {
+impl From<EncodeError> for OrcaError {
+    fn from(error: EncodeError) -> Self {
         Self {
-            kind: Kind::FailedToParseDot {
+            kind: Kind::EncodingError {
                 source: error,
                 backtrace: Some(Backtrace::capture()),
             },
@@ -150,6 +151,7 @@ impl fmt::Debug for OrcaError {
             | Kind::NoTagFoundInContainerAltImage { backtrace, .. }
             | Kind::BollardError { backtrace, .. }
             | Kind::ChannelReceiveError { backtrace, .. }
+            | Kind::EncodingError { backtrace, .. }
             | Kind::GlobPatternError { backtrace, .. }
             | Kind::IoError { backtrace, .. }
             | Kind::PathPrefixError { backtrace, .. }
diff --git a/src/uniffi/error.rs b/src/uniffi/error.rs
index 95f1d1e1..4a8e9c0d 100644
--- a/src/uniffi/error.rs
+++ b/src/uniffi/error.rs
@@ -3,6 +3,7 @@
     reason = "Needed since SNAFU dynamically generating selectors."
 )]
 
+use bincode::error::EncodeError;
 use bollard::errors::Error as BollardError;
 use glob;
 use serde_json;
@@ -108,7 +109,7 @@ pub(crate) enum Kind {
     },
     #[snafu(transparent)]
     EncodingError {
-        source: EncodingError,
+        source: EncodeError,
         backtrace: Option<Backtrace>,
     },
     #[snafu(transparent)]
diff --git a/src/uniffi/pipeline_runner/runner.rs b/src/uniffi/pipeline_runner/runner.rs
index d8bcc3a0..63f12b4e 100644
--- a/src/uniffi/pipeline_runner/runner.rs
+++ b/src/uniffi/pipeline_runner/runner.rs
@@ -228,14 +228,16 @@ impl DockerPipelineRunner {
         Ok(())
     }
 
-    /// Act as the processor of the node by:
-    /// 1. Creating a metadata struct for the node to be passed to the appropriate processor
-    /// 2. Get the kernel for the node and build the correct processor for this node
-    /// 3. Start the processor and wait till it completes
-    /// 4. Send a message that the node processing is complete
-    ///
-    /// # Errors
-    /// Will error out if the kernel for the node is not found or if the
+    /**
+     * Act as the processor of the node by:
+     * 1. Creating a metadata struct for the node to be passed to the appropriate processor
+     * 2. Get the kernel for the node and build the correct processor for this node
+     * 3. Start the processor and wait till it completes
+     * 4. Send a message that the node processing is complete
+     *
+     * # Errors
+     * Will error out if the kernel for the node is not found or if the
+     */
     async fn start_node_task(
         kernel: Kernel,
         output_key_expression: String,
@@ -446,19 +448,12 @@ impl NodeProcessor for PodProcessor {
 
         // Simulate pod execution by just printing out pod_job_hash and pod hash
         // This will be replaced by sending the pod_job to the orchestrator via the agent
-        self.processing_tasks.spawn(async move {
-            println!(
-                "Simulating Executing pod job: {} with pod hash: {}",
-                pod_job.hash, pod_job.pod.hash
-            );
-            Ok(())
-        });
 
+        // Build the output_packet, in reality, this will be extracted from the pod_result
         #[expect(
             clippy::unwrap_used,
             reason = "Hard code for now, will be replaced by agent"
         )]
-        // Build the output_packet, in reality, this will be extracted from the pod_result
         let output_packet = self
             .pod
             .output_spec
@@ -466,24 +461,40 @@ impl NodeProcessor for PodProcessor {
             .map(|output_key| (output_key.clone(), packet.values().next().cloned().unwrap()))
             .collect::<HashMap<_, _>>();
 
-        // For now we will just send the input_packet to the success channel
-        session
-            .put(
-                output_key_exp,
-                bitcode::encode(&Message::NodeOutput(node_id.to_owned(), output_packet)),
-            )
-            .await
-            .context(selector::AgentCommunicationFailure {})?;
+        let node_id_clone = node_id.to_owned();
+        let output_key_exp_clone = output_key_exp.to_owned();
+        self.processing_tasks.spawn(async move {
+            println!(
+                "Simulating Executing pod job: {} with pod hash: {}",
+                pod_job.hash, pod_job.pod.hash
+            );
+
+            // For now we will just send the input_packet to the success channel
+            session
+                .put(
+                    output_key_exp_clone + SUCCESS_KEY_EXP,
+                    bincode::serde::encode_to_vec(
+                        &Message::NodeOutput(node_id_clone, output_packet),
+                        bincode::config::standard(),
+                    )?,
+                )
+                .await
+                .context(selector::AgentCommunicationFailure {})?;
+
+            Ok(())
+        });
 
         Ok(())
     }
 
     async fn wait_for_node_task_completion(&mut self) -> Result<()> {
-        todo!()
+        while self.processing_tasks.join_next().await.is_some() {}
+        Ok(())
     }
 
     fn stop(&mut self) -> Result<()> {
-        todo!()
+        self.processing_tasks.abort_all();
+        Ok(())
     }
 }
 
@@ -528,21 +539,22 @@ impl NodeProcessor for MapperProcessor {
                 bincode::serde::encode_to_vec(
                     &Message::NodeOutput(node_id.to_owned(), output_map),
                     bincode::config::standard(),
-                )
-                .unwrap(),
+                )?,
             )
             .await
-            .unwrap();
+            .context(selector::AgentCommunicationFailure {})?;
 
         Ok(())
     }
 
     async fn wait_for_node_task_completion(&mut self) -> Result<()> {
-        todo!()
+        // All mappers tasks are synchronous, so we don't need to wait for anything
+        Ok(())
     }
 
     fn stop(&mut self) -> Result<()> {
-        todo!()
+        // Mappers do not have any state to stop, so we can just return Ok
+        Ok(())
     }
 }
 
@@ -572,7 +584,7 @@ impl JoinerProcessor {
     }
 
     fn compute_cartesian_product(
-        factors: &Vec<&Vec<HashMap<String, PathSet>>>,
+        factors: &Vec<Vec<HashMap<String, PathSet>>>,
     ) -> Vec<HashMap<String, PathSet>> {
         factors
             .into_iter()
@@ -597,8 +609,8 @@ impl NodeProcessor for JoinerProcessor {
         packet: &HashMap<String, PathSet>,
         session: Arc<zenoh::Session>,
         output_key_exp: &str,
-        namespace: &str,
-        namespace_lookup: &HashMap<String, PathBuf>,
+        _namespace: &str,
+        _namespace_lookup: &HashMap<String, PathBuf>,
     ) -> Result<()> {
         self.input_packet_cache
             .get_mut(sender_node_id)
@@ -615,117 +627,75 @@ impl NodeProcessor for JoinerProcessor {
                 .keys()
                 .filter(|key| *key != sender_node_id);
 
-            // Build the factors of the product
+            // Build the factors of the product as owned values to avoid lifetime issues
             let mut factors = other_parent_ids
-                .map(|id| get(&self.input_packet_cache, id))
+                .map(|id| get(&self.input_packet_cache, id).map(|v| v.clone()))
                 .collect::<Result<Vec<_>>>()?;
 
             // Add the new packet as a factor
-            factors.push(&vec![packet.clone()]);
+            factors.push(vec![packet.clone()]);
 
             // Compute the cartesian product of the factors
+            let node_id_clone = node_id.to_owned();
+            let output_key_exp_clone = output_key_exp.to_owned();
+
             self.processing_tasks.spawn(async move {
+                // Convert Vec<Vec<HashMap<...>>> to Vec<&Vec<HashMap<...>>> for compute_cartesian_product
                 let cartesian_product = Self::compute_cartesian_product(&factors);
 
                 // Post all products to the output channel
                 for output_packet in cartesian_product {
-                    let result = session.put(
-                        output_key_exp.to_owned() + SUCCESS_KEY_EXP,
-                        encode_to_vec(
-                            &Message::NodeOutput(node_id.to_owned(), output_packet),
-                            config::standard(),
-                        )?,
-                    );
+                    let result = {
+                        session
+                            .put(
+                                output_key_exp_clone.clone() + SUCCESS_KEY_EXP,
+                                encode_to_vec(
+                                    &Message::NodeOutput(node_id_clone.clone(), output_packet),
+                                    config::standard(),
+                                )?,
+                            )
+                            .await
+                            .context(selector::AgentCommunicationFailure {})?;
+                        Ok::<(), OrcaError>(())
+                    };
+
+                    // If the result is an error, we will just send it to the error channel
+                    if let Err(err) = result {
+                        session
+                            .put(
+                                output_key_exp_clone.clone() + FAILURE_KEY_EXP,
+                                encode_to_vec(
+                                    &Message::NodeProcessingFailure(
+                                        node_id_clone.clone(),
+                                        err.to_string(),
+                                    ),
+                                    config::standard(),
+                                )?,
+                            )
+                            .await
+                            .context(selector::AgentCommunicationFailure {})?;
+                    }
                 }
 
                 Ok(())
             });
         }
-
-        // Check if this packet is the first packet from this parent node
-        if get(&self.input_packet_cache, sender_node_id)?.len() == 1 {}
-
-        // Determine if the initial computation is completed
-        if !self.initial_computation_completed {
-            // Check if we have at least one packet for each parent node
-            if self.input_packet_cache.values().all(|v| !v.is_empty()) {
-                self.initial_computation_completed = true;
-            } else {
-                // If not, we cannot compute the new packet combination yet
-                return Ok(());
-            }
-        }
-        if self.input_packet_cache.values().all(|v| !v.is_empty())
-            | self.input_packet_cache.values().any(|v| v.len() == 1)
-        {
-            // Initial case where we first have at least one packet for each parent node met
-        }
-
-        self.processing_tasks.spawn(async move {
-            let process_result = {
-                for packet in self.compute_new_packet_combination(sender_node_id, &packet)? {
-                    session
-                        .put(
-                            output_key_exp.to_owned() + SUCCESS_KEY_EXP,
-                            bincode::serde::encode_to_vec(
-                                &Message::NodeOutput(node_id.to_owned(), packet),
-                                bincode::config::standard(),
-                            )
-                            .unwrap(),
-                        )
-                        .await
-                        .context(selector::AgentCommunicationFailure {})?;
-                }
-                Ok::<(), OrcaError>(())
-            };
-
-            match process_result {
-                Ok(_) => {}
-                Err(err) => {
-                    // Something failed thus we should output to the failed channel
-                    session
-                        .put(
-                            output_key_exp.to_owned() + FAILURE_KEY_EXP,
-                            bincode::serde::encode_to_vec(
-                                &Message::NodeProcessingFailure(
-                                    node_id.to_owned(),
-                                    err.to_string(),
-                                ),
-                                bincode::config::standard(),
-                            )
-                            .unwrap(),
-                        )
-                        .await
-                        .context(selector::AgentCommunicationFailure {})?;
-                }
-            }
-            // For each new packet, we
-            Ok(())
-        });
-
         Ok(())
     }
 
     async fn wait_for_node_task_completion(&mut self) -> Result<()> {
-        todo!()
+        // We must wait for all joiner processing task to complete
+        while self.processing_tasks.join_next().await.is_some() {}
+        Ok(())
     }
 
     fn stop(&mut self) -> Result<()> {
-        todo!()
+        // We want to abort any computation
+        self.processing_tasks.abort_all();
+        Ok(())
     }
 }
 
-// Utils functions
-fn get_node_id(output_key_exp: &str) -> String {
-    // Extract the node id from the output key expression
-    // The output key expression is in the format of "pipeline_job_hash/node_id/outputs"
-    output_key_exp
-        .split('/')
-        .nth(1)
-        .map(|s| s.to_owned())
-        .unwrap_or_else(|| "unknown_node".to_owned())
-}
-
 #[cfg(test)]
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 #[expect(clippy::panic_in_result_fn, reason = "Unit test")]

From 630e27ed4fff054f020969ce9af9d2a9c96067d0 Mon Sep 17 00:00:00 2001
From: synicix <synicix@gmail.com>
Date: Sun, 20 Jul 2025 09:15:33 +0000
Subject: [PATCH 17/29] Save progress

---
 src/core/error.rs                    |  31 +-
 src/uniffi/error.rs                  |   7 +-
 src/uniffi/pipeline_runner/runner.rs | 884 ++++++++++++++++-----------
 tests/pipeline_runner.rs             |  72 ++-
 4 files changed, 615 insertions(+), 379 deletions(-)

diff --git a/src/core/error.rs b/src/core/error.rs
index 3ca897c7..ada0b0e2 100644
--- a/src/core/error.rs
+++ b/src/core/error.rs
@@ -1,8 +1,5 @@
-use crate::uniffi::{
-    error::{Kind, OrcaError},
-    pipeline_runner::runner::Message,
-};
-use bincode::error::EncodeError;
+use crate::uniffi::error::{Kind, OrcaError};
+use bincode::error::{DecodeError, EncodeError};
 use bollard::errors::Error as BollardError;
 use glob;
 use serde_json;
@@ -13,7 +10,7 @@ use std::{
     io,
     path::{self},
 };
-use tokio::sync::{mpsc::error::SendError, oneshot};
+use tokio::sync::oneshot;
 use tokio::task;
 
 impl From<BollardError> for OrcaError {
@@ -26,6 +23,16 @@ impl From<BollardError> for OrcaError {
         }
     }
 }
+impl From<DecodeError> for OrcaError {
+    fn from(error: DecodeError) -> Self {
+        Self {
+            kind: Kind::DecodeError {
+                source: error,
+                backtrace: Some(Backtrace::capture()),
+            },
+        }
+    }
+}
 impl From<EncodeError> for OrcaError {
     fn from(error: EncodeError) -> Self {
         Self {
@@ -106,17 +113,6 @@ impl From<task::JoinError> for OrcaError {
         }
     }
 }
-
-impl From<SendError<Message>> for OrcaError {
-    fn from(error: SendError<Message>) -> Self {
-        Self {
-            kind: Kind::SendError {
-                reason: error.to_string(),
-                backtrace: Some(Backtrace::capture()),
-            },
-        }
-    }
-}
 impl From<Kind> for OrcaError {
     fn from(error: Kind) -> Self {
         Self { kind: error }
@@ -151,6 +147,7 @@ impl fmt::Debug for OrcaError {
             | Kind::NoTagFoundInContainerAltImage { backtrace, .. }
             | Kind::BollardError { backtrace, .. }
             | Kind::ChannelReceiveError { backtrace, .. }
+            | Kind::DecodeError { backtrace, .. }
             | Kind::EncodingError { backtrace, .. }
             | Kind::GlobPatternError { backtrace, .. }
             | Kind::IoError { backtrace, .. }
diff --git a/src/uniffi/error.rs b/src/uniffi/error.rs
index 4a8e9c0d..c932570e 100644
--- a/src/uniffi/error.rs
+++ b/src/uniffi/error.rs
@@ -3,7 +3,7 @@
     reason = "Needed since SNAFU dynamically generating selectors."
 )]
 
-use bincode::error::EncodeError;
+use bincode::error::{DecodeError, EncodeError};
 use bollard::errors::Error as BollardError;
 use glob;
 use serde_json;
@@ -108,6 +108,11 @@ pub(crate) enum Kind {
         backtrace: Option<Backtrace>,
     },
     #[snafu(transparent)]
+    DecodeError {
+        source: DecodeError,
+        backtrace: Option<Backtrace>,
+    },
+    #[snafu(transparent)]
     EncodingError {
         source: EncodeError,
         backtrace: Option<Backtrace>,
diff --git a/src/uniffi/pipeline_runner/runner.rs b/src/uniffi/pipeline_runner/runner.rs
index 63f12b4e..a1ddd3f9 100644
--- a/src/uniffi/pipeline_runner/runner.rs
+++ b/src/uniffi/pipeline_runner/runner.rs
@@ -3,47 +3,56 @@ use crate::{
     uniffi::{
         error::{OrcaError, Result, selector},
         model::{PathSet, Pod, PodJob, URI},
-        pipeline::{Kernel, Mapper, Node, PipelineJob, PipelineResult},
+        pipeline::{Kernel, Mapper, Node, Pipeline, PipelineJob, PipelineResult},
     },
 };
-use bincode::{Decode, Encode, config, serde::encode_to_vec};
-use futures_util::future::try_join_all;
+use async_trait::async_trait;
+use bincode::{
+    config,
+    serde::{decode_from_slice, encode_to_vec},
+};
 use itertools::Itertools as _;
 use serde::{Deserialize, Serialize};
 use serde_yaml::Serializer;
-use snafu::{OptionExt as _, ResultExt};
+use snafu::{OptionExt as _, ResultExt as _};
 use std::{
-    collections::{HashMap, HashSet},
+    collections::HashMap,
+    fmt::{Display, Formatter, Result as FmtResult},
     hash::{Hash, Hasher},
     path::PathBuf,
     sync::Arc,
 };
 use tokio::{
-    sync::{RwLock, mpsc},
-    task::{JoinSet, spawn_blocking},
+    sync::{Mutex, RwLock},
+    task::JoinSet,
 };
+use zenoh::{handlers::FifoChannelHandler, pubsub::Subscriber, sample::Sample};
+
+static SUCCESS_KEY_EXP: &str = "success";
+static FAILURE_KEY_EXP: &str = "failure";
+static INPUT_KEY_EXP: &str = "input_node/outputs";
 
-static SUCCESS_KEY_EXP: &str = "/success";
-static FAILURE_KEY_EXP: &str = "/failure";
+#[derive(Serialize, Deserialize, Clone, Debug)]
+enum NodeOutput {
+    Packet(String, HashMap<String, PathSet>),
+    ProcessingCompleted(String),
+}
 
 #[derive(Serialize, Deserialize, Clone, Debug)]
-pub(crate) enum Message {
-    /// String is the `parent_node_id`, while `HashMap` is output of the parent node
-    NodeOutput(String, HashMap<String, PathSet>),
-    NodeProcessingFailure(String, String), // String is the `node_id` that has failed processing
-    /// String is the `node_id` that has completed processing
-    NodeProcessingComplete(String),
-    Stop, // Message to halt all operations
+struct ProcessingFailure {
+    node_id: String,
+    error: String,
 }
 
 #[expect(
     clippy::type_complexity,
     reason = "too complex, but necessary for async handling"
 )]
-#[derive(Debug, Clone)]
+#[derive(Debug)]
 pub struct PipelineRun {
-    /// PipelineJob that this run is associated with
+    /// `PipelineJob` that this run is associated with
     pub pipeline_job: PipelineJob, // The pipeline job that this run is associated with
+    node_tasks: JoinSet<Result<()>>, // JoinSet of tasks for each node in the pipeline
     outputs: Arc<RwLock<HashMap<String, Vec<HashMap<String, PathSet>>>>>, // String is the node key, while hash
 }
 
@@ -61,6 +70,11 @@ impl Hash for PipelineRun {
     }
 }
 
+impl Display for PipelineRun {
+    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
+        write!(f, "PipelineRun({})", self.pipeline_job.hash)
+    }
+}
 /**
  * Runner for pipelines
  *
@@ -73,7 +87,7 @@ impl Hash for PipelineRun {
  */
 #[derive(Default)]
 pub struct DockerPipelineRunner {
-    pipeline_runs: HashSet<Arc<PipelineRun>>,
+    pipeline_runs: HashMap<String, PipelineRun>,
 }
 
 /**
@@ -81,8 +95,8 @@ pub struct DockerPipelineRunner {
  * The runtime is tokio
  *
  * These are the key expressions of the components of the pipeline:
- * - Input Node: pipeline_job_hash/input_node/outputs (This is where the pipeline_job packets get fed to)
- * - Nodes: pipeline_job_hash/node_id/outputs/(success|failure) (This is where the node outputs are sent to)
+ * - Input Node: `pipeline_job_hash/input_node/outputs` (This is where the `pipeline_job` packets get fed to)
+ * - Nodes: `pipeline_job_hash/node_id/outputs/(success|failure)` (This is where the node outputs are sent to)
 */
 impl DockerPipelineRunner {
     /// Create a new Docker pipeline runner
@@ -115,66 +129,119 @@ impl DockerPipelineRunner {
     pub async fn start(
         &mut self,
         pipeline_job: PipelineJob,
+        namespace: &str, // Name space to save pod_results to
         namespace_lookup: &HashMap<String, PathBuf>,
-    ) -> Result<&PipelineRun> {
+    ) -> Result<String> {
         // Create a new pipeline run
-        let pipeline_run = Arc::new(PipelineRun {
+        let mut pipeline_run = PipelineRun {
             pipeline_job,
             outputs: Arc::new(RwLock::new(HashMap::new())),
-        });
+            node_tasks: JoinSet::new(),
+        };
 
-        // Get reference to the pipeline
-        let pipeline = &pipeline_run.pipeline_job.pipeline;
+        // Get the pipeline_job_hash which will be use to identify the pipeline run
+        let pipeline_job_hash = pipeline_run.pipeline_job.hash.clone();
 
-        // Create a task for each node
+        let graph = &pipeline_run.pipeline_job.pipeline.graph;
 
-        // All pipeline tasks have been created, now we need to feed the inputs to the pipeline
-        for tx in &root_nodes_tx {
-            for input_packet in &pipeline_run.pipeline_job.input_packets {
-                tx.send(Message::NodeOutput(
-                    "input".to_owned(),
-                    input_packet.clone(),
-                ))
-                .await?;
-            }
+        // Create the subscriber to listen to node ready status before sending inputs
+        let session = zenoh::open(zenoh::Config::default())
+            .await
+            .context(selector::AgentCommunicationFailure {})?;
+
+        let subscriber = session
+            .declare_subscriber(format!("{pipeline_job_hash}/*/status/ready"))
+            .await
+            .context(selector::AgentCommunicationFailure {})?;
+
+        // For each node, we will create call create_node_processing_task
+        for node_idx in graph.node_indices() {
+            let node = &graph[node_idx];
+
+            // Spawn the task
+            pipeline_run
+                .node_tasks
+                .spawn(Self::create_node_processing_task(
+                    node.clone(),
+                    pipeline_run.pipeline_job.pipeline.clone(),
+                    pipeline_job_hash.clone(),
+                    namespace.to_owned(),
+                    namespace_lookup.clone(),
+                ));
+        }
+
+        // Spawn the task that captures the outputs from the output_nodes
+        // For now the output nodes are hardcoded to be the leaf nodes of the pipeline
+        for node in pipeline_run.pipeline_job.pipeline.get_leaf_nodes() {
+            pipeline_run
+                .node_tasks
+                .spawn(Self::create_capture_task_for_node(
+                    node.id.clone(),
+                    pipeline_run.pipeline_job.hash.clone(),
+                    Arc::clone(&pipeline_run.outputs),
+                ));
         }
 
-        // Send a message that all job inputs have been sent
-        for tx in &root_nodes_tx {
-            tx.send(Message::NodeProcessingComplete("input".to_owned()))
-                .await?;
+        let num_of_nodes = graph.node_count();
+        println!("Waiting for {num_of_nodes} nodes to be ready");
+        let mut ready_nodes = 0;
+
+        // Wait for all nodes to be ready before sending inputs
+        while (subscriber.recv_async().await).is_ok() {
+            // Message is empty, just increment the counter
+            ready_nodes += 1;
+            println!("number of ready nodes: {ready_nodes}");
+
+            if ready_nodes == num_of_nodes {
+                break; // All nodes are ready, we can start sending inputs
+            }
         }
 
+        println!(
+            "All nodes are ready, starting pipeline run: {}",
+            pipeline_job_hash
+        );
+
+        // // Submit the input_packets to the correct key_exp
+        // for packet in &pipeline_run.pipeline_job.input_packets {
+        //     println!("Sending packet");
+        //     // Send the packet to the input node key_exp
+        //     session
+        //         .put(
+        //             format!("{pipeline_job_hash}/{INPUT_KEY_EXP}"),
+        //             encode_to_vec(packet, config::standard())?,
+        //         )
+        //         .await
+        //         .context(selector::AgentCommunicationFailure {})?;
+        // }
+
         // Insert into the list of pipeline runs
-        self.pipeline_runs.insert(pipeline_run);
+        self.pipeline_runs
+            .insert(pipeline_job_hash.clone(), pipeline_run);
 
-        Ok(self
-            .pipeline_runs
-            .get(&pipeline_run_arc)
-            .context(selector::KeyMissing {
-                key: pipeline_run.to_string(),
-            })?)
+        Ok(pipeline_job_hash)
     }
 
     /// Given a pipeline run, wait for all its tasks to complete and return the `PipelineResult`
     ///
     /// # Errors
     /// Will error out if any of the pipeline tasks failed to join
-    pub async fn get_result(&mut self, pipeline_run: &PipelineRun) -> Result<PipelineResult> {
-        // Call join on the join set for the pipeline run
-        let pipeline_run_info =
+    pub async fn get_result(&mut self, pipeline_run_id: &str) -> Result<PipelineResult> {
+        // To get the result, the pipeline execution must be complete, so we need to await on the tasks
+
+        let pipeline_run =
             self.pipeline_runs
-                .get_mut(pipeline_run)
+                .get_mut(pipeline_run_id)
                 .context(selector::KeyMissing {
-                    key: pipeline_run.to_string(),
+                    key: pipeline_run_id.to_owned(),
                 })?;
 
         // Wait for all the tasks to complete
-        while let Some(result) = pipeline_run_info.node_task_join_set.join_next().await {
+        while let Some(result) = pipeline_run.node_tasks.join_next().await {
             match result {
                 Ok(Ok(())) => {} // Task completed successfully
                 Ok(Err(err)) => {
-                    eprintln!("Task failed: {err}");
+                    eprintln!("Task failed with err: {err}");
                     return Err(err);
                 }
                 Err(err) => {
@@ -186,107 +253,266 @@ impl DockerPipelineRunner {
 
         Ok(PipelineResult {
             pipeline_job: pipeline_run.pipeline_job.clone(),
-            output_packets: pipeline_run_info.outputs.read().await.clone(),
+            output_packets: pipeline_run.outputs.read().await.clone(),
         })
     }
 
     /// Stop the pipeline run and all its tasks
     /// # Errors
     /// Will error out if the pipeline run is not found or if any of the tasks fail to stop correctly
-    pub async fn stop(&mut self, pipeline_run: &PipelineRun) -> Result<()> {
-        // Get the pipeline run info
-        let pipeline_run_info =
+    pub async fn stop(&mut self, pipeline_run_id: &str) -> Result<()> {
+        // To stop the pipeline run, we need to send a stop message to all the tasks
+
+        // Get the pipeline run first
+        let pipeline_run =
             self.pipeline_runs
-                .get_mut(pipeline_run)
+                .get_mut(pipeline_run_id)
                 .context(selector::KeyMissing {
-                    key: pipeline_run.to_string(),
+                    key: pipeline_run_id.to_owned(),
                 })?;
 
-        // Send a stop message to all the node txs
-        for tx in pipeline_run_info.node_tx.values() {
-            tx.send(Message::Stop).await?;
-        }
+        let session = zenoh::open(zenoh::Config::default())
+            .await
+            .context(selector::AgentCommunicationFailure {})?;
 
-        // Wait for all tasks to complete
-        while let Some(result) = pipeline_run_info.node_task_join_set.join_next().await {
-            match result {
-                Ok(Ok(())) => {} // Task completed successfully
-                Ok(Err(err)) => {
-                    eprintln!("Task failed: {err}");
-                    return Err(err);
+        // Send the stop message into the stop key_exp, the msg is just an empty vector
+        session
+            .put(
+                format!("{}/stop", pipeline_run.pipeline_job.hash),
+                Vec::new(),
+            )
+            .await
+            .context(selector::AgentCommunicationFailure {})?;
+
+        while pipeline_run.node_tasks.join_next().await.is_some() {}
+        Ok(())
+    }
+
+    #[expect(clippy::type_complexity, reason = "Needed for async")]
+    async fn create_capture_task_for_node(
+        node_id: String,
+        pipeline_run_id: String,
+        outputs: Arc<RwLock<HashMap<String, Vec<HashMap<String, PathSet>>>>>,
+    ) -> Result<()> {
+        // Create a zenoh session
+        let session = zenoh::open(zenoh::Config::default())
+            .await
+            .context(selector::AgentCommunicationFailure {})?;
+        let subscriber = session
+            .declare_subscriber(format!(
+                "{pipeline_run_id}/{node_id}/outputs/{SUCCESS_KEY_EXP}"
+            ))
+            .await
+            .context(selector::AgentCommunicationFailure {})?;
+
+        while let Ok(payload) = subscriber.recv_async().await {
+            // Extract the message from the payload
+            let (msg, _): (NodeOutput, usize) =
+                decode_from_slice(&payload.payload().to_bytes(), config::standard())?;
+
+            match msg {
+                NodeOutput::Packet(sender_id, hash_map) => {
+                    // Optionally, you can log or print the output packet
+                    println!("Captured output from node {}: {:?}", sender_id, hash_map);
+
+                    // Store the output packet in the outputs map
+                    let mut outputs_lock = outputs.write().await;
+                    outputs_lock
+                        .entry(node_id.clone())
+                        .or_default()
+                        .push(hash_map);
                 }
-                Err(err) => {
-                    eprintln!("Join set error: {err}");
-                    return Err(err.into());
+                NodeOutput::ProcessingCompleted(_) => {
+                    // Handle processing completed message if needed
                 }
             }
         }
 
-        // Remove the pipeline run from the list of pipeline runs
-        self.pipeline_runs.remove(pipeline_run);
-
         Ok(())
     }
 
-    /**
-     * Act as the processor of the node by:
-     * 1. Creating a metadata struct for the node to be passed to the appropriate processor
-     * 2. Get the kernel for the node and build the correct processor for this node
-     * 3. Start the processor and wait till it completes
-     * 4. Send a message that the node processing is complete
-     *
-     * # Errors
-     * Will error out if the kernel for the node is not found or if the
-     */
-    async fn start_node_task(
-        kernel: Kernel,
-        output_key_expression: String,
-        namespace_path: PathBuf,
+    /// Function to start tasks associated with the node
+    /// Steps:
+    /// - Create the node processor based on the kernel type
+    /// - Create the zenoh session
+    /// - Create a join set to spawn and handle incoming messages tasks
+    /// - Create a subscriber for each of the parent nodes (Should only be 1, unless it is a joiner node)
+    /// - For each subscriber, handle the incoming message appropriately
+    ///
+    /// # Errors
+    /// Will error out if the kernel for the node is not found or if the
+    async fn create_node_processing_task(
+        node: Node,
+        pipeline: Pipeline,
+        pipeline_job_id: String,
+        namespace: String,
+        namespace_lookup: HashMap<String, PathBuf>,
     ) -> Result<()> {
-        // Get the kernel for this node and build the correct processor
-        match kernel {
-            Kernel::Pod(pod) => {
-                let mut processor = PodProcessor::new(Arc::clone(pod), node_metadata);
-                processor.start().await;
-            }
-            Kernel::Mapper(mapper) => {
-                let mut processor = MapperProcessor::new(Arc::clone(mapper), node_metadata);
-                processor.start().await;
-            }
-            Kernel::Joiner => {
-                let parent_nodes_id = pipeline_run
-                    .pipeline_job
-                    .pipeline
-                    .get_parents_for_node(&node)
-                    .map(|parent_node| parent_node.id.clone())
-                    .collect::<Vec<_>>();
-                let mut processor = JoinerProcessor::new(parent_nodes_id, node_metadata);
-                processor.start().await;
-            }
+        // Create the correct processor for the node based on the kernel type
+        let node_processor: Arc<Mutex<Box<dyn NodeProcessor>>> = Arc::new(Mutex::new(
+            match get(&pipeline.kernel_lut, &node.kernel_hash)? {
+                Kernel::Pod(pod) => Box::new(PodProcessor::new(Arc::clone(pod))),
+                Kernel::Mapper(mapper) => Box::new(MapperProcessor::new(Arc::clone(mapper))),
+                Kernel::Joiner => {
+                    // Need to get the parent node id for this joiner node
+                    let parent_nodes_id = pipeline
+                        .get_parents_for_node(&node)
+                        .map(|parent_node| parent_node.id.clone())
+                        .collect::<Vec<_>>();
+                    Box::new(JoinerProcessor::new(parent_nodes_id))
+                }
+            },
+        ));
+
+        // Create the zenoh session
+        let session = Arc::new(
+            zenoh::open(zenoh::Config::default())
+                .await
+                .context(selector::AgentCommunicationFailure {})?,
+        );
+
+        // Create a joinset to spawn and handle incoming messages tasks
+        let mut listener_tasks = JoinSet::new();
+
+        // Create the list of key_expressions to subscribe to
+        let mut key_exps_to_subscribe_to = pipeline
+            .get_parents_for_node(&node)
+            .map(|parent_node| {
+                format!(
+                    "{pipeline_job_id}/{}/outputs/{SUCCESS_KEY_EXP}",
+                    parent_node.id
+                )
+            })
+            .collect::<Vec<_>>();
+
+        // If there was no parent node, then this is root node, therefore we need to subscribe to the input node
+        if key_exps_to_subscribe_to.is_empty() {
+            key_exps_to_subscribe_to.push(format!("{pipeline_job_id}/{INPUT_KEY_EXP}"));
         }
 
-        // Since all inputs are sent, we can send a message that the "input node" processing is complete
-        for success_ch_tx in &success_chs_tx {
-            match success_ch_tx
-                .send(Message::NodeProcessingComplete(node.id.clone()))
+        // Create a subscriber for each of the parent nodes (Should only be 1, unless it is a joiner node)
+        for key_exp in key_exps_to_subscribe_to {
+            let subscriber = session
+                .declare_subscriber(key_exp)
                 .await
-            {
-                Ok(()) => {}
-                Err(err) => {
-                    match err {
-                        mpsc::error::SendError(Message::NodeProcessingComplete(_)) => {
-                            // The channel is closed, we can ignore this error, this happens when stop it called
-                            eprintln!("Failed to send processing complete message, channel closed");
-                        }
-                        _ => {
-                            eprintln!("Failed to send processing complete message: {err}");
-                        }
+                .context(selector::AgentCommunicationFailure {})?;
+
+            listener_tasks.spawn(Self::start_async_processor_task(
+                subscriber,
+                Arc::clone(&node_processor),
+                node.id.clone(),
+                pipeline_job_id.clone(),
+                namespace.clone(),
+                namespace_lookup.clone(),
+                Arc::clone(&session),
+            ));
+        }
+
+        // Create the task to handle stop request
+        listener_tasks.spawn(Self::start_stop_request_task(
+            Arc::clone(&node_processor),
+            pipeline_job_id.clone(),
+            Arc::clone(&session),
+        ));
+
+        // Wait for all task to complete
+        listener_tasks.join_all().await;
+
+        Ok(())
+    }
+
+    async fn start_async_processor_task(
+        subscriber: Subscriber<FifoChannelHandler<Sample>>,
+        node_processor: Arc<Mutex<Box<dyn NodeProcessor>>>,
+        node_id: String,
+        pipeline_job_id: String,
+        namespace: String,
+        namespace_lookup: HashMap<String, PathBuf>,
+        session: Arc<zenoh::Session>,
+    ) -> Result<()> {
+        // Send a ready message so the pipeline knows when to start sending inputs
+        let result = session
+            .put(
+                format!("{pipeline_job_id}/{node_id}/status/ready"),
+                &node_id,
+            )
+            .await
+            .context(selector::AgentCommunicationFailure {});
+
+        // Print out if the ready message was sent successfully
+        if let Err(err) = result {
+            eprintln!("Failed to send ready message for node {}: {}", node_id, err);
+        } else {
+            println!("Ready message sent for node {}", node_id);
+        }
+
+        while let Ok(payload) = subscriber.recv_async().await {
+            // Extract the message from the payload
+
+            let (msg, _): (NodeOutput, usize) =
+                decode_from_slice(&payload.payload().to_bytes(), config::standard())?;
+            println!("Received message for node {}: {:?}", node_id, msg);
+            match msg {
+                NodeOutput::Packet(sender_id, hash_map) => {
+                    println!(
+                        "Received packet from {} for node {}: {:?}",
+                        sender_id, node_id, hash_map
+                    );
+                    // Process the packet using the node processor
+                    node_processor.lock().await.process_packet(
+                        &sender_id,
+                        &node_id,
+                        &hash_map,
+                        Arc::clone(&session),
+                        &format!("{}/{}/outputs", pipeline_job_id, node_id.clone()),
+                        &namespace,
+                        &namespace_lookup,
+                    )?;
+                }
+                NodeOutput::ProcessingCompleted(sender_id) => {
+                    // Notify the processor that the parent node has completed processing
+                    if node_processor
+                        .lock()
+                        .await
+                        .mark_parent_as_complete(&sender_id)
+                        .await
+                    {
+                        // This was the last parent, thus we need to send the processing complete message
+                        let output_key_exp =
+                            format!("{pipeline_job_id}/{node_id}/outputs/{SUCCESS_KEY_EXP}");
+                        session
+                            .put(
+                                output_key_exp,
+                                encode_to_vec(
+                                    NodeOutput::ProcessingCompleted(node_id.clone()),
+                                    config::standard(),
+                                )?,
+                            )
+                            .await
+                            .context(selector::AgentCommunicationFailure {})?;
                     }
                 }
             }
+
+            // Process the message based on its type
         }
+        Ok::<(), OrcaError>(())
+    }
 
-        Ok(())
+    async fn start_stop_request_task(
+        node_processor: Arc<Mutex<Box<dyn NodeProcessor>>>,
+        pipeline_run_id: String,
+        session: Arc<zenoh::Session>,
+    ) -> Result<()> {
+        let subscriber = session
+            .declare_subscriber(pipeline_run_id.clone() + "/stop")
+            .await
+            .context(selector::AgentCommunicationFailure {})?;
+        while subscriber.recv_async().await.is_ok() {
+            // Received a requst to stop, therefore we need to tell the node_processor to shutdown
+            node_processor.lock().await.stop();
+        }
+        Ok::<(), OrcaError>(())
     }
 }
 
@@ -294,9 +520,10 @@ impl DockerPipelineRunner {
 /// This trait defines the methods that all node processors should implement
 ///
 /// Main purpose was to reduce the amount of code duplication between different node processors
-/// As a result, each processor only needs to worry about writing their own function to process the msg.
-trait NodeProcessor {
-    async fn process_packet(
+/// As a result, each processor only needs to worry about writing their own function to process the msg
+#[async_trait]
+trait NodeProcessor: Send + Sync {
+    fn process_packet(
         &mut self,
         sender_node_id: &str,
         node_id: &str,
@@ -307,15 +534,25 @@ trait NodeProcessor {
         namespace_lookup: &HashMap<String, PathBuf>,
     ) -> Result<()>;
 
-    async fn wait_for_node_task_completion(&mut self) -> Result<()>;
+    /// Notifies the processor that the parent node has completed processing
+    /// If the parent node was the last one to complete, this function will wait till all task are done
+    /// and send the node processing complete message then return.
+    ///
+    /// Otherwise it will return immediately
+    ///
+    /// # Returns
+    /// true if the parent node was the last one to complete processing, user send
+    /// the processing completion message to the output
+    ///
+    /// false if there are still other parent nodes that need to complete processing
+    async fn mark_parent_as_complete(&mut self, parent_node_id: &str) -> bool;
 
-    fn stop(&mut self) -> Result<()>;
+    fn stop(&mut self);
 }
 
 /// Processor for Pods
 /// Currently missing implementation to call agents for actual pod processing
 struct PodProcessor {
-    session: zenoh::Session,
     pod: Arc<Pod>,
     processing_tasks: JoinSet<Result<(), OrcaError>>,
 }
@@ -323,87 +560,20 @@ struct PodProcessor {
 impl PodProcessor {
     fn new(pod: Arc<Pod>) -> Self {
         Self {
-            session: zenoh::Session::default(),
             pod,
             processing_tasks: JoinSet::new(),
         }
     }
-
-    /// Actual logic of processing a packet using the pod
-    /// At the moment it does a simulation of pod execution
-    async fn process_packet(
-        _sender_node_id: &str,
-        node_id: String,
-        pod: Arc<Pod>,
-        namespace: String,
-        namespace_lookup: HashMap<String, PathBuf>,
-        packet: HashMap<String, PathSet>,
-        success_chs_tx: Vec<mpsc::Sender<Message>>,
-    ) -> Result<()> {
-        // Process the packet using the pod
-        // Create the pod_job
-
-        // We need a unique hash for this given input packet process by the node
-        // therefore we need to generate a hash that has the pod_id + input_packet
-        let node_id_bytes = node_id.as_bytes().to_vec();
-        let packet_copy = packet.clone();
-        let input_packet_hash = spawn_blocking(move || {
-            let mut buf = node_id_bytes;
-            let mut serializer = Serializer::new(&mut buf);
-            serialize_hashmap(&packet_copy, &mut serializer)?;
-            Ok::<_, OrcaError>(hash_buffer(buf))
-        })
-        .await??;
-        let output_dir = URI {
-            namespace: namespace.clone(),
-            path: PathBuf::from(format!("pod_runs/{}/{}", pod.hash, input_packet_hash)),
-        };
-
-        let cpu_limit = pod.recommended_cpus;
-        let memory_limit = pod.recommended_memory;
-
-        // Create the pod job
-        let pod_job = PodJob::new(
-            None,
-            Arc::clone(&pod),
-            packet.clone(),
-            output_dir,
-            cpu_limit,
-            memory_limit,
-            None,
-            &namespace_lookup,
-        )?;
-
-        // Simulate pod execution by just printing out pod_job_hash and pod hash
-        // This will be replaced by sending the pod_job to the orchestrator via the agent
-        println!(
-            "Simulating Executing pod job: {} with pod hash: {}",
-            pod_job.hash, pod_job.pod.hash
-        );
-
-        #[expect(
-            clippy::unwrap_used,
-            reason = "Hard code for now, will be replaced by agent"
-        )]
-        // Build the output_packet
-        let output_packet = pod
-            .output_spec
-            .keys()
-            .map(|output_key| (output_key.clone(), packet.values().next().cloned().unwrap()))
-            .collect::<HashMap<_, _>>();
-
-        // For now we will just send the input_packet to the success channel
-        try_join_all(success_chs_tx.iter().map(|success_ch_tx| {
-            success_ch_tx.send(Message::NodeOutput(node_id.clone(), output_packet.clone()))
-        }))
-        .await?;
-
-        Ok(())
-    }
 }
 
+#[async_trait]
 impl NodeProcessor for PodProcessor {
-    async fn process_packet(
+    #[expect(
+        clippy::unwrap_used,
+        clippy::unwrap_in_result,
+        reason = "Hard code for now, will be replaced by agent"
+    )]
+    fn process_packet(
         &mut self,
         _sender_node_id: &str,
         node_id: &str,
@@ -443,17 +613,14 @@ impl NodeProcessor for PodProcessor {
             cpu_limit,
             memory_limit,
             None,
-            &namespace_lookup,
+            namespace_lookup,
         )?;
 
         // Simulate pod execution by just printing out pod_job_hash and pod hash
         // This will be replaced by sending the pod_job to the orchestrator via the agent
 
         // Build the output_packet, in reality, this will be extracted from the pod_result
-        #[expect(
-            clippy::unwrap_used,
-            reason = "Hard code for now, will be replaced by agent"
-        )]
+
         let output_packet = self
             .pod
             .output_spec
@@ -473,9 +640,9 @@ impl NodeProcessor for PodProcessor {
             session
                 .put(
                     output_key_exp_clone + SUCCESS_KEY_EXP,
-                    bincode::serde::encode_to_vec(
-                        &Message::NodeOutput(node_id_clone, output_packet),
-                        bincode::config::standard(),
+                    encode_to_vec(
+                        NodeOutput::Packet(node_id_clone, output_packet),
+                        config::standard(),
                     )?,
                 )
                 .await
@@ -484,17 +651,21 @@ impl NodeProcessor for PodProcessor {
             Ok(())
         });
 
+        println!("Successfully started processor for node: {}", node_id);
         Ok(())
     }
 
-    async fn wait_for_node_task_completion(&mut self) -> Result<()> {
-        while self.processing_tasks.join_next().await.is_some() {}
-        Ok(())
+    async fn mark_parent_as_complete(&mut self, _parent_node_id: &str) -> bool {
+        // For pod we only have one parent, thus execute the exit case
+        while (self.processing_tasks.join_next().await).is_some() {
+            // Wait for all tasks to complete
+        }
+
+        true
     }
 
-    fn stop(&mut self) -> Result<()> {
+    fn stop(&mut self) {
         self.processing_tasks.abort_all();
-        Ok(())
     }
 }
 
@@ -502,16 +673,21 @@ impl NodeProcessor for PodProcessor {
 /// This processor renames the `input_keys` from the input packet to the `output_keys` defined by the map
 struct MapperProcessor {
     mapper: Arc<Mapper>,
+    processing_tasks: JoinSet<Result<(), OrcaError>>,
 }
 
 impl MapperProcessor {
-    const fn new(mapper: Arc<Mapper>) -> Self {
-        Self { mapper }
+    fn new(mapper: Arc<Mapper>) -> Self {
+        Self {
+            mapper,
+            processing_tasks: JoinSet::new(),
+        }
     }
 }
 
+#[async_trait]
 impl NodeProcessor for MapperProcessor {
-    async fn process_packet(
+    fn process_packet(
         &mut self,
         _sender_node_id: &str,
         node_id: &str,
@@ -521,40 +697,71 @@ impl NodeProcessor for MapperProcessor {
         _namespace: &str,
         _namespace_lookup: &HashMap<String, PathBuf>,
     ) -> Result<()> {
-        // Apply the mapping to the input packet
-        let output_map = self
-            .mapper
-            .mapping
-            .iter()
-            .map(|(input_key, output_key)| {
-                let input = get(&packet, input_key)?.clone();
-                Ok((output_key.to_owned(), input))
-            })
-            .collect::<Result<HashMap<_, _>>>()?;
+        let mapping = self.mapper.mapping.clone();
+        let packet_clone = packet.clone();
+        let node_id_clone = node_id.to_owned();
+        let output_key_exp_clone = output_key_exp.to_owned();
 
-        // Send the packet outwards
-        session
-            .put(
-                output_key_exp,
-                bincode::serde::encode_to_vec(
-                    &Message::NodeOutput(node_id.to_owned(), output_map),
-                    bincode::config::standard(),
-                )?,
-            )
-            .await
-            .context(selector::AgentCommunicationFailure {})?;
+        self.processing_tasks.spawn(async move {
+            let result = {
+                // Apply the mapping to the input packet
+                let output_map = mapping
+                    .iter()
+                    .map(|(input_key, output_key)| {
+                        let input = get(&packet_clone, input_key)?.clone();
+                        Ok((output_key.to_owned(), input))
+                    })
+                    .collect::<Result<HashMap<_, _>>>()?;
+
+                // Send the packet outwards
+                session
+                    .put(
+                        output_key_exp_clone.clone() + SUCCESS_KEY_EXP,
+                        encode_to_vec(
+                            NodeOutput::Packet(node_id_clone.clone(), output_map),
+                            config::standard(),
+                        )?,
+                    )
+                    .await
+                    .context(selector::AgentCommunicationFailure {})?;
+                Ok::<(), OrcaError>(())
+            };
+
+            if let Err(err) = result {
+                // If there was an error, we send it to the failure channel
+                session
+                    .put(
+                        output_key_exp_clone + FAILURE_KEY_EXP,
+                        encode_to_vec(
+                            &ProcessingFailure {
+                                node_id: node_id_clone,
+                                error: err.to_string(),
+                            },
+                            config::standard(),
+                        )?,
+                    )
+                    .await
+                    .context(selector::AgentCommunicationFailure {})?;
+            }
+
+            Ok(())
+        });
 
+        println!("Successfully started processor for node: {}", node_id);
         Ok(())
     }
 
-    async fn wait_for_node_task_completion(&mut self) -> Result<()> {
-        // All mappers tasks are synchronous, so we don't need to wait for anything
-        Ok(())
+    async fn mark_parent_as_complete(&mut self, _parent_node_id: &str) -> bool {
+        // For mapper we only have one parent, thus execute the exit case
+        while (self.processing_tasks.join_next().await).is_some() {
+            // Wait for all tasks to complete
+        }
+
+        true
     }
 
-    fn stop(&mut self) -> Result<()> {
-        // Mappers do not have any state to stop, so we can just return Ok
-        Ok(())
+    fn stop(&mut self) {
+        self.processing_tasks.abort_all();
     }
 }
 
@@ -565,7 +772,6 @@ struct JoinerProcessor {
     /// Cache for all packets received by the node
     input_packet_cache: HashMap<String, Vec<HashMap<String, PathSet>>>,
     completed_parents: Vec<String>,
-    initial_computation_completed: bool,
     processing_tasks: JoinSet<Result<(), OrcaError>>,
 }
 
@@ -578,16 +784,15 @@ impl JoinerProcessor {
         Self {
             input_packet_cache,
             completed_parents: Vec::new(),
-            initial_computation_completed: false,
             processing_tasks: JoinSet::new(),
         }
     }
 
     fn compute_cartesian_product(
-        factors: &Vec<Vec<HashMap<String, PathSet>>>,
+        factors: &[Vec<HashMap<String, PathSet>>],
     ) -> Vec<HashMap<String, PathSet>> {
         factors
-            .into_iter()
+            .iter()
             .multi_cartesian_product()
             .map(|packets_to_combined| {
                 packets_to_combined
@@ -601,8 +806,9 @@ impl JoinerProcessor {
     }
 }
 
+#[async_trait]
 impl NodeProcessor for JoinerProcessor {
-    async fn process_packet(
+    fn process_packet(
         &mut self,
         sender_node_id: &str,
         node_id: &str,
@@ -629,7 +835,7 @@ impl NodeProcessor for JoinerProcessor {
 
             // Build the factors of the product as owned values to avoid lifetime issues
             let mut factors = other_parent_ids
-                .map(|id| get(&self.input_packet_cache, id).map(|v| v.clone()))
+                .map(|id| get(&self.input_packet_cache, id).cloned())
                 .collect::<Result<Vec<_>>>()?;
 
             // Add the new packet as a factor
@@ -650,7 +856,7 @@ impl NodeProcessor for JoinerProcessor {
                             .put(
                                 output_key_exp_clone.clone() + SUCCESS_KEY_EXP,
                                 encode_to_vec(
-                                    &Message::NodeOutput(node_id_clone.clone(), output_packet),
+                                    NodeOutput::Packet(node_id_clone.clone(), output_packet),
                                     config::standard(),
                                 )?,
                             )
@@ -665,10 +871,10 @@ impl NodeProcessor for JoinerProcessor {
                             .put(
                                 output_key_exp_clone.clone() + FAILURE_KEY_EXP,
                                 encode_to_vec(
-                                    &Message::NodeProcessingFailure(
-                                        node_id_clone.clone(),
-                                        err.to_string(),
-                                    ),
+                                    &ProcessingFailure {
+                                        node_id: node_id_clone.clone(),
+                                        error: err.to_string(),
+                                    },
                                     config::standard(),
                                 )?,
                             )
@@ -680,19 +886,30 @@ impl NodeProcessor for JoinerProcessor {
                 Ok(())
             });
         }
+        println!("Successfully started processor for node: {}", node_id);
         Ok(())
     }
 
-    async fn wait_for_node_task_completion(&mut self) -> Result<()> {
-        // We must wait for all joiner processing task to complete
-        while self.processing_tasks.join_next().await.is_some() {}
-        Ok(())
+    async fn mark_parent_as_complete(&mut self, _parent_node_id: &str) -> bool {
+        // For Joiner, we need to determine if all parents are complete, if so then wait for task to complete
+        // before returning true
+        self.completed_parents.push(_parent_node_id.to_owned());
+
+        // If we have all parents completed, we can wait for the tasks to complete
+        if self.completed_parents.len() == self.input_packet_cache.len() {
+            while (self.processing_tasks.join_next().await).is_some() {
+                // Wait for all tasks to complete
+            }
+            return true;
+        }
+
+        // If not all parents are completed, we return false
+        false
     }
 
-    fn stop(&mut self) -> Result<()> {
+    fn stop(&mut self) {
         // We want to abort any computation
         self.processing_tasks.abort_all();
-        Ok(())
     }
 }
 
@@ -700,82 +917,63 @@ impl NodeProcessor for JoinerProcessor {
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 #[expect(clippy::panic_in_result_fn, reason = "Unit test")]
 async fn joiner() -> Result<()> {
-    // Create a fake mpsc channel for the node
-    let (_, node_rx) = mpsc::channel::<Message>(128);
-
-    // Create a child mpsc
-    let (child_tx, mut child_rx) = mpsc::channel::<Message>(128);
-
-    let node_metadata = NodeMetaData {
-        node_id: "joiner_node".to_owned(),
-        node_rx,
-        child_nodes_txs: vec![child_tx],
-        namespace: "test".to_owned(),
-        namespace_lookup: HashMap::new(),
-    };
-
-    let mut joiner_process = JoinerProcessor::new(
-        vec!["0".to_owned(), "1".to_owned(), "2".to_owned()],
-        node_metadata,
-    );
-
-    // Make each parent has 1 packet
-    for idx in 0..2 {
-        joiner_process
-            .process_packet(
-                &format!("{idx}"),
-                make_test_packet("data_1.txt".to_owned().into()),
-            )
-            .await?;
-    }
-
-    // Confirm that there should be no output yet
-
-    // Now we send the missing parent package
-    // This will yield one unique combination
-    joiner_process
-        .process_packet("2", make_test_packet("data_1.txt".to_owned().into()))
-        .await?;
-
-    // Confirm that the output is sent to the child channel
-    assert!(
-        child_rx.len() == 1,
-        "Should have only one message in the channel",
-    );
-    assert!(
-        child_rx.recv().await.is_some(),
-        "Should have received a message"
-    );
-
-    // Insert another one
-    joiner_process
-        .process_packet("2", make_test_packet("data_2.txt".to_owned().into()))
-        .await?;
-
-    // The joiner node should send another one
-    assert!(
-        child_rx.len() == 1,
-        "Should have only one message in the channel",
-    );
-    assert!(
-        child_rx.recv().await.is_some(),
-        "Should have received a message"
-    );
-
-    // Now insert to packet for parent 0, which should yield 2 packets in total
-    // This is because of the cartesian product
-    joiner_process
-        .process_packet("0", make_test_packet("data_2.txt".to_owned().into()))
-        .await?;
-
-    assert!(
-        child_rx.len() == 2,
-        "Should have only two messages in the channel",
-    );
-    assert!(
-        child_rx.recv().await.is_some(),
-        "Should have received a message"
-    );
+    // let parent_ids = vec!["0".to_owned(), "1".to_owned(), "2".to_owned()];
+
+    // let mut joiner_process = JoinerProcessor::new(parent_ids);
+
+    // // Make each parent has 1 packet
+    // for idx in 0..2 {
+    //     let packet = make_test_packet(format!("data_{idx}.txt").into());
+    //     joiner_process.process_packet(idx, "joiner", packet, session, output_key_exp, namespace, namespace_lookup);
+    // }
+
+    // // Confirm that there should be no output yet
+
+    // // Now we send the missing parent package
+    // // This will yield one unique combination
+    // joiner_process
+    //     .process_packet("2", make_test_packet("data_1.txt".to_owned().into()))
+    //     .await?;
+
+    // // Confirm that the output is sent to the child channel
+    // assert!(
+    //     child_rx.len() == 1,
+    //     "Should have only one message in the channel",
+    // );
+    // assert!(
+    //     child_rx.recv().await.is_some(),
+    //     "Should have received a message"
+    // );
+
+    // // Insert another one
+    // joiner_process
+    //     .process_packet("2", make_test_packet("data_2.txt".to_owned().into()))
+    //     .await?;
+
+    // // The joiner node should send another one
+    // assert!(
+    //     child_rx.len() == 1,
+    //     "Should have only one message in the channel",
+    // );
+    // assert!(
+    //     child_rx.recv().await.is_some(),
+    //     "Should have received a message"
+    // );
+
+    // // Now insert to packet for parent 0, which should yield 2 packets in total
+    // // This is because of the cartesian product
+    // joiner_process
+    //     .process_packet("0", make_test_packet("data_2.txt".to_owned().into()))
+    //     .await?;
+
+    // assert!(
+    //     child_rx.len() == 2,
+    //     "Should have only two messages in the channel",
+    // );
+    // assert!(
+    //     child_rx.recv().await.is_some(),
+    //     "Should have received a message"
+    // );
 
     Ok(())
 }
diff --git a/tests/pipeline_runner.rs b/tests/pipeline_runner.rs
index 054a8f2e..7354a542 100644
--- a/tests/pipeline_runner.rs
+++ b/tests/pipeline_runner.rs
@@ -9,14 +9,42 @@ pub mod fixture;
 use std::collections::HashMap;
 
 use orcapod::uniffi::{error::Result, pipeline_runner::runner::DockerPipelineRunner};
+use snafu::ResultExt;
+use tokio::time::sleep;
 
 use crate::fixture::TestDirs;
 use fixture::pipeline_job;
 
-#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[tokio::test(flavor = "multi_thread", worker_threads = 32)]
 async fn basic_run() -> Result<()> {
     let pipeline_job = pipeline_job()?;
 
+    // Create zenoh to monitor the node ready message
+    let zenoh = zenoh::open(zenoh::Config::default()).await.unwrap(); // Replace with the correct error variant if needed
+    let sub = zenoh.declare_subscriber("*/*/status/ready").await.unwrap();
+
+    tokio::spawn({
+        async move {
+            // Receive loop ready, publish ready message
+            zenoh.put("ready", vec![]).await.unwrap();
+            println!("Listening for messages...");
+            loop {
+                match sub.recv_async().await {
+                    Ok(msg) => {
+                        println!(
+                            "Received message: {:?}",
+                            msg.payload().try_to_string().unwrap()
+                        );
+                    }
+                    Err(_) => todo!(),
+                }
+            }
+        }
+    });
+
+    // Wait for the zenoh subscriber to be ready
+    sleep(std::time::Duration::from_secs(5)).await;
+
     // Create the runner
     let mut runner = DockerPipelineRunner::new();
 
@@ -26,30 +54,38 @@ async fn basic_run() -> Result<()> {
     )]))?;
     let namespace_lookup = test_dirs.namespace_lookup();
 
-    let pipeline_run = runner.start(pipeline_job, &namespace_lookup).await?;
+    let pipeline_run = runner
+        .start(pipeline_job, "default", &namespace_lookup)
+        .await?;
 
+    sleep(std::time::Duration::from_secs(5)).await;
+    panic!();
     // Wait for the pipeline run to complete
-    runner.get_result(&pipeline_run).await?;
+    let pipeline_result = runner.get_result(&pipeline_run).await?;
+    println!("{:?}", pipeline_result.output_packets);
+
     Ok(())
 }
 
-#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
-async fn stop() -> Result<()> {
-    let pipeline_job = pipeline_job()?;
+// #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
+// async fn stop() -> Result<()> {
+//     let pipeline_job = pipeline_job()?;
 
-    // Create the runner
-    let mut runner = DockerPipelineRunner::new();
+//     // Create the runner
+//     let mut runner = DockerPipelineRunner::new();
 
-    let test_dirs = TestDirs::new(&HashMap::from([(
-        "default".to_owned(),
-        Some("./tests/extra/data/"),
-    )]))?;
-    let namespace_lookup = test_dirs.namespace_lookup();
+//     let test_dirs = TestDirs::new(&HashMap::from([(
+//         "default".to_owned(),
+//         Some("./tests/extra/data/"),
+//     )]))?;
+//     let namespace_lookup = test_dirs.namespace_lookup();
 
-    let pipeline_run = runner.start(pipeline_job, &namespace_lookup).await?;
+//     let pipeline_run = runner
+//         .start(pipeline_job, "default", &namespace_lookup)
+//         .await?;
 
-    // Abort the pipeline run
-    runner.stop(&pipeline_run).await?;
+//     // Abort the pipeline run
+//     runner.stop(&pipeline_run).await?;
 
-    Ok(())
-}
+//     Ok(())
+// }

From 31f925e9ab8851448db9851b97c3f8cb17854b0d Mon Sep 17 00:00:00 2001
From: synicix <synicix@gmail.com>
Date: Sun, 20 Jul 2025 16:27:42 +0000
Subject: [PATCH 18/29] Save progress

---
 src/uniffi/pipeline_runner/runner.rs | 12 +++++-
 tests/pipeline_runner.rs             | 57 +++++++++++++++++-----------
 2 files changed, 45 insertions(+), 24 deletions(-)

diff --git a/src/uniffi/pipeline_runner/runner.rs b/src/uniffi/pipeline_runner/runner.rs
index a1ddd3f9..a73abdeb 100644
--- a/src/uniffi/pipeline_runner/runner.rs
+++ b/src/uniffi/pipeline_runner/runner.rs
@@ -155,6 +155,7 @@ impl DockerPipelineRunner {
             .context(selector::AgentCommunicationFailure {})?;
 
         // For each node, we will create call create_node_processing_task
+        println!("Num of node indices: {}", graph.node_count());
         for node_idx in graph.node_indices() {
             let node = &graph[node_idx];
 
@@ -348,6 +349,8 @@ impl DockerPipelineRunner {
         namespace: String,
         namespace_lookup: HashMap<String, PathBuf>,
     ) -> Result<()> {
+        // Print out node id for debugging
+        println!("Creating processing task for node: {}", node.id);
         // Create the correct processor for the node based on the kernel type
         let node_processor: Arc<Mutex<Box<dyn NodeProcessor>>> = Arc::new(Mutex::new(
             match get(&pipeline.kernel_lut, &node.kernel_hash)? {
@@ -378,6 +381,7 @@ impl DockerPipelineRunner {
         let mut key_exps_to_subscribe_to = pipeline
             .get_parents_for_node(&node)
             .map(|parent_node| {
+                println!("Setting up listener for parent node: {}", parent_node.id);
                 format!(
                     "{pipeline_job_id}/{}/outputs/{SUCCESS_KEY_EXP}",
                     parent_node.id
@@ -443,9 +447,15 @@ impl DockerPipelineRunner {
         if let Err(err) = result {
             eprintln!("Failed to send ready message for node {}: {}", node_id, err);
         } else {
-            println!("Ready message sent for node {}", node_id);
+            println!(
+                "Ready message sent for node {}, with key exp {}",
+                node_id,
+                format!("{pipeline_job_id}/{node_id}/status/ready")
+            );
         }
 
+        println!("Listening for messages on node: {}", node_id);
+
         while let Ok(payload) = subscriber.recv_async().await {
             // Extract the message from the payload
 
diff --git a/tests/pipeline_runner.rs b/tests/pipeline_runner.rs
index 7354a542..a9f0238c 100644
--- a/tests/pipeline_runner.rs
+++ b/tests/pipeline_runner.rs
@@ -21,12 +21,11 @@ async fn basic_run() -> Result<()> {
 
     // Create zenoh to monitor the node ready message
     let zenoh = zenoh::open(zenoh::Config::default()).await.unwrap(); // Replace with the correct error variant if needed
-    let sub = zenoh.declare_subscriber("*/*/status/ready").await.unwrap();
+    let sub = zenoh.declare_subscriber("**").await.unwrap();
 
     tokio::spawn({
         async move {
             // Receive loop ready, publish ready message
-            zenoh.put("ready", vec![]).await.unwrap();
             println!("Listening for messages...");
             loop {
                 match sub.recv_async().await {
@@ -36,33 +35,45 @@ async fn basic_run() -> Result<()> {
                             msg.payload().try_to_string().unwrap()
                         );
                     }
-                    Err(_) => todo!(),
+                    Err(err) => println!("Error receiving message: {}", err),
                 }
             }
         }
     });
 
-    // Wait for the zenoh subscriber to be ready
-    sleep(std::time::Duration::from_secs(5)).await;
-
-    // Create the runner
-    let mut runner = DockerPipelineRunner::new();
-
-    let test_dirs = TestDirs::new(&HashMap::from([(
-        "default".to_owned(),
-        Some("./tests/extra/data/"),
-    )]))?;
-    let namespace_lookup = test_dirs.namespace_lookup();
-
-    let pipeline_run = runner
-        .start(pipeline_job, "default", &namespace_lookup)
-        .await?;
+    let zenoh2 = zenoh::open(zenoh::Config::default()).await.unwrap();
+
+    let joiner = tokio::spawn(async move {
+        sleep(tokio::time::Duration::from_secs(2)).await;
+        // Send a bunch of messsage to the channel
+        for i in 0..10 {
+            zenoh2
+                .put(format!("test/{}", i), format!("message {}", i).as_bytes())
+                .await
+                .unwrap();
+            println!("Sent message {}", i);
+        }
+    });
 
-    sleep(std::time::Duration::from_secs(5)).await;
-    panic!();
-    // Wait for the pipeline run to complete
-    let pipeline_result = runner.get_result(&pipeline_run).await?;
-    println!("{:?}", pipeline_result.output_packets);
+    joiner.await.unwrap();
+    // // Create the runner
+    // let mut runner = DockerPipelineRunner::new();
+
+    // let test_dirs = TestDirs::new(&HashMap::from([(
+    //     "default".to_owned(),
+    //     Some("./tests/extra/data/"),
+    // )]))?;
+    // let namespace_lookup = test_dirs.namespace_lookup();
+
+    // let pipeline_run = runner
+    //     .start(pipeline_job, "default", &namespace_lookup)
+    //     .await?;
+
+    // sleep(std::time::Duration::from_secs(5)).await;
+    // panic!();
+    // // Wait for the pipeline run to complete
+    // let pipeline_result = runner.get_result(&pipeline_run).await?;
+    // println!("{:?}", pipeline_result.output_packets);
 
     Ok(())
 }

From b369549fdd2d35bf25622224c2880c88e656a953 Mon Sep 17 00:00:00 2001
From: Synicix <synicix@gmail.com>
Date: Mon, 21 Jul 2025 01:56:49 +0000
Subject: [PATCH 19/29] Save progress

---
 src/uniffi/pipeline_runner/runner.rs | 223 ++++++++++++++++-----------
 tests/pipeline_runner.rs             |  58 +++----
 2 files changed, 157 insertions(+), 124 deletions(-)

diff --git a/src/uniffi/pipeline_runner/runner.rs b/src/uniffi/pipeline_runner/runner.rs
index a73abdeb..c43a5799 100644
--- a/src/uniffi/pipeline_runner/runner.rs
+++ b/src/uniffi/pipeline_runner/runner.rs
@@ -2,13 +2,13 @@ use crate::{
     core::{crypto::hash_buffer, model::serialize_hashmap, util::get},
     uniffi::{
         error::{OrcaError, Result, selector},
-        model::{PathSet, Pod, PodJob, URI},
+        model::{Blob, BlobKind, PathSet, Pod, PodJob, URI},
         pipeline::{Kernel, Mapper, Node, Pipeline, PipelineJob, PipelineResult},
     },
 };
 use async_trait::async_trait;
 use bincode::{
-    config,
+    config, de,
     serde::{decode_from_slice, encode_to_vec},
 };
 use itertools::Itertools as _;
@@ -49,9 +49,9 @@ struct ProcessingFailure {
     reason = "too complex, but necessary for async handling"
 )]
 #[derive(Debug)]
-pub struct PipelineRun {
+struct PipelineRun {
     /// `PipelineJob` that this run is associated with
-    pub pipeline_job: PipelineJob, // The pipeline job that this run is associated with
+    pipeline_job: PipelineJob, // The pipeline job that this run is associated with
     node_tasks: JoinSet<Result<()>>, // JoinSet of tasks for each node in the pipeline
     outputs: Arc<RwLock<HashMap<String, Vec<HashMap<String, PathSet>>>>>, // String is the node key, while hash
 }
@@ -104,28 +104,8 @@ impl DockerPipelineRunner {
         Self::default()
     }
 
-    /**
-    Start the `pipeline_job` returning `pipeline_run`
-
-    Algorithm:
-    1. Create a new `PipelineRun` from the `pipeline_job`
-    2. Insert the `PipelineRun` into the `pipeline_runs` map
-    3. Create an output channel to capture the outputs of the nodes
-       (This will be given to the output capture task)
-    4. Create a task that captures the outputs form nodes and stores them in the `outputs` map
-       This is done via listening the channel and acting like a final node in the pipeline
-    5. Get the root nodes of the pipeline and call `create_task_for_node` for each root node
-       This will recursively BFS through the pipeline and create tasks for each node
-       (More detail in that function)
-    6. Using the `root_nodes` txs, we will send all inputs to that channel.
-       This will start the pipeline execution
-    7. Upon sending all the inputs, we will send node complete message
-       signifying that the `input_node` is done
-    8. Return the `PipelineRun` which can be used to get the results later
-
-    # Errors
-    Will error out if the pipeline job fails to start
-    */
+    /// # Errors
+    /// Will error out if the pipeline job fails to start
     pub async fn start(
         &mut self,
         pipeline_job: PipelineJob,
@@ -145,9 +125,11 @@ impl DockerPipelineRunner {
         let graph = &pipeline_run.pipeline_job.pipeline.graph;
 
         // Create the subscriber to listen to node ready status before sending inputs
-        let session = zenoh::open(zenoh::Config::default())
-            .await
-            .context(selector::AgentCommunicationFailure {})?;
+        let session = Arc::new(
+            zenoh::open(zenoh::Config::default())
+                .await
+                .context(selector::AgentCommunicationFailure {})?,
+        );
 
         let subscriber = session
             .declare_subscriber(format!("{pipeline_job_hash}/*/status/ready"))
@@ -155,19 +137,19 @@ impl DockerPipelineRunner {
             .context(selector::AgentCommunicationFailure {})?;
 
         // For each node, we will create call create_node_processing_task
-        println!("Num of node indices: {}", graph.node_count());
         for node_idx in graph.node_indices() {
             let node = &graph[node_idx];
 
             // Spawn the task
             pipeline_run
                 .node_tasks
-                .spawn(Self::create_node_processing_task(
+                .spawn(Self::spawn_node_processing_task(
                     node.clone(),
                     pipeline_run.pipeline_job.pipeline.clone(),
                     pipeline_job_hash.clone(),
                     namespace.to_owned(),
                     namespace_lookup.clone(),
+                    Arc::clone(&session),
                 ));
         }
 
@@ -180,46 +162,82 @@ impl DockerPipelineRunner {
                     node.id.clone(),
                     pipeline_run.pipeline_job.hash.clone(),
                     Arc::clone(&pipeline_run.outputs),
+                    Arc::clone(&session),
                 ));
         }
 
         let num_of_nodes = graph.node_count();
-        println!("Waiting for {num_of_nodes} nodes to be ready");
         let mut ready_nodes = 0;
 
         // Wait for all nodes to be ready before sending inputs
         while (subscriber.recv_async().await).is_ok() {
             // Message is empty, just increment the counter
             ready_nodes += 1;
-            println!("number of ready nodes: {ready_nodes}");
 
             if ready_nodes == num_of_nodes {
                 break; // All nodes are ready, we can start sending inputs
             }
         }
 
-        println!(
-            "All nodes are ready, starting pipeline run: {}",
-            pipeline_job_hash
-        );
+        // Submit the input_packets to the correct key_exp
+        let input_node_key_exp = format!("{pipeline_job_hash}/{INPUT_KEY_EXP}");
+        for packet in &pipeline_run.pipeline_job.input_packets {
+            // Send the packet to the input node key_exp
+            let payload_encoded = encode_to_vec(
+                PathSet::Unary(Blob {
+                    kind: BlobKind::File,
+                    location: URI {
+                        namespace: "asdfasdf".to_owned(),
+                        path: "asdfasdf".into(),
+                    },
+                    checksum: "".to_owned(),
+                }),
+                config::standard(),
+            )?;
+
+            let (decoded_packet, _): (PathSet, usize) =
+                decode_from_slice(&payload_encoded, config::standard())?;
+            println!("decoded packet: {:?}", decoded_packet);
+            println!(
+                "Payload bytes: {:?}",
+                encode_to_vec(
+                    NodeOutput::Packet("input_node".to_owned(), packet.clone()),
+                    config::standard(),
+                )?
+            );
+            session
+                .put(
+                    &input_node_key_exp,
+                    encode_to_vec(
+                        NodeOutput::Packet("input_node".to_owned(), packet.clone()),
+                        config::standard(),
+                    )?,
+                )
+                .await
+                .context(selector::AgentCommunicationFailure {})?;
+        }
 
-        // // Submit the input_packets to the correct key_exp
-        // for packet in &pipeline_run.pipeline_job.input_packets {
-        //     println!("Sending packet");
-        //     // Send the packet to the input node key_exp
-        //     session
-        //         .put(
-        //             format!("{pipeline_job_hash}/{INPUT_KEY_EXP}"),
-        //             encode_to_vec(packet, config::standard())?,
-        //         )
-        //         .await
-        //         .context(selector::AgentCommunicationFailure {})?;
-        // }
+        // Send the complete processing message for the input node
+        session
+            .put(
+                input_node_key_exp,
+                encode_to_vec(
+                    NodeOutput::ProcessingCompleted("input_node".to_owned()),
+                    config::standard(),
+                )?,
+            )
+            .await
+            .context(selector::AgentCommunicationFailure {})?;
 
         // Insert into the list of pipeline runs
         self.pipeline_runs
             .insert(pipeline_job_hash.clone(), pipeline_run);
 
+        println!(
+            "Pipeline run started with id: {} and hash: {}",
+            pipeline_job_hash, pipeline_job_hash
+        );
+
         Ok(pipeline_job_hash)
     }
 
@@ -237,8 +255,12 @@ impl DockerPipelineRunner {
                     key: pipeline_run_id.to_owned(),
                 })?;
 
+        println!("len of node_tasks: {}", pipeline_run.node_tasks.len());
+        println!("Join set {:?}", pipeline_run.node_tasks);
         // Wait for all the tasks to complete
         while let Some(result) = pipeline_run.node_tasks.join_next().await {
+            println!("Join set {:?}", pipeline_run.node_tasks);
+            println!("Task completed, result: {:?}", result);
             match result {
                 Ok(Ok(())) => {} // Task completed successfully
                 Ok(Err(err)) => {
@@ -250,6 +272,8 @@ impl DockerPipelineRunner {
                     return Err(err.into());
                 }
             }
+            pipeline_run.node_tasks.abort_all();
+            panic!();
         }
 
         Ok(PipelineResult {
@@ -294,11 +318,9 @@ impl DockerPipelineRunner {
         node_id: String,
         pipeline_run_id: String,
         outputs: Arc<RwLock<HashMap<String, Vec<HashMap<String, PathSet>>>>>,
+        session: Arc<zenoh::Session>,
     ) -> Result<()> {
         // Create a zenoh session
-        let session = zenoh::open(zenoh::Config::default())
-            .await
-            .context(selector::AgentCommunicationFailure {})?;
         let subscriber = session
             .declare_subscriber(format!(
                 "{pipeline_run_id}/{node_id}/outputs/{SUCCESS_KEY_EXP}"
@@ -324,11 +346,15 @@ impl DockerPipelineRunner {
                         .push(hash_map);
                 }
                 NodeOutput::ProcessingCompleted(_) => {
-                    // Handle processing completed message if needed
+                    // Processing is completed, thus we can exit this task
+                    break;
                 }
             }
         }
 
+        // Print exit message
+        println!("Capture task for node {} completed.", node_id);
+
         Ok(())
     }
 
@@ -342,15 +368,14 @@ impl DockerPipelineRunner {
     ///
     /// # Errors
     /// Will error out if the kernel for the node is not found or if the
-    async fn create_node_processing_task(
+    async fn spawn_node_processing_task(
         node: Node,
         pipeline: Pipeline,
         pipeline_job_id: String,
         namespace: String,
         namespace_lookup: HashMap<String, PathBuf>,
+        session: Arc<zenoh::Session>,
     ) -> Result<()> {
-        // Print out node id for debugging
-        println!("Creating processing task for node: {}", node.id);
         // Create the correct processor for the node based on the kernel type
         let node_processor: Arc<Mutex<Box<dyn NodeProcessor>>> = Arc::new(Mutex::new(
             match get(&pipeline.kernel_lut, &node.kernel_hash)? {
@@ -367,13 +392,6 @@ impl DockerPipelineRunner {
             },
         ));
 
-        // Create the zenoh session
-        let session = Arc::new(
-            zenoh::open(zenoh::Config::default())
-                .await
-                .context(selector::AgentCommunicationFailure {})?,
-        );
-
         // Create a joinset to spawn and handle incoming messages tasks
         let mut listener_tasks = JoinSet::new();
 
@@ -381,7 +399,6 @@ impl DockerPipelineRunner {
         let mut key_exps_to_subscribe_to = pipeline
             .get_parents_for_node(&node)
             .map(|parent_node| {
-                println!("Setting up listener for parent node: {}", parent_node.id);
                 format!(
                     "{pipeline_job_id}/{}/outputs/{SUCCESS_KEY_EXP}",
                     parent_node.id
@@ -395,7 +412,7 @@ impl DockerPipelineRunner {
         }
 
         // Create a subscriber for each of the parent nodes (Should only be 1, unless it is a joiner node)
-        for key_exp in key_exps_to_subscribe_to {
+        for key_exp in &key_exps_to_subscribe_to {
             let subscriber = session
                 .declare_subscriber(key_exp)
                 .await
@@ -419,6 +436,36 @@ impl DockerPipelineRunner {
             Arc::clone(&session),
         ));
 
+        // Wait for all tasks to be spawned and reply with ready message
+        // This is to ensure that the pipeline run knows when all tasks are ready to receive inputs
+
+        let mut num_of_ready_subcribers: usize = 0;
+        // Build the subscriber
+        let status_subscriber = session
+            .declare_subscriber(format!(
+                "{pipeline_job_id}/{}/subscriber/status/ready",
+                node.id
+            ))
+            .await
+            .context(selector::AgentCommunicationFailure {})?;
+
+        while status_subscriber.recv_async().await.is_ok() {
+            num_of_ready_subcribers += 1;
+            if num_of_ready_subcribers == key_exps_to_subscribe_to.len() {
+                // +1 for the stop request task
+                break; // All tasks are ready, we can start sending inputs
+            }
+        }
+
+        // Send a ready message so the pipeline knows when to start sending inputs
+        session
+            .put(
+                format!("{pipeline_job_id}/{}/status/ready", node.id),
+                &node.id,
+            )
+            .await
+            .context(selector::AgentCommunicationFailure {})?;
+
         // Wait for all task to complete
         listener_tasks.join_all().await;
 
@@ -434,34 +481,33 @@ impl DockerPipelineRunner {
         namespace_lookup: HashMap<String, PathBuf>,
         session: Arc<zenoh::Session>,
     ) -> Result<()> {
-        // Send a ready message so the pipeline knows when to start sending inputs
-        let result = session
+        // We do not know when tokio will start executing this task, therefore we need to send a ready message
+        // back to our spawner task
+        session
             .put(
-                format!("{pipeline_job_id}/{node_id}/status/ready"),
+                format!("{pipeline_job_id}/{node_id}/subscriber/status/ready"),
                 &node_id,
             )
             .await
-            .context(selector::AgentCommunicationFailure {});
+            .context(selector::AgentCommunicationFailure {})?;
 
-        // Print out if the ready message was sent successfully
-        if let Err(err) = result {
-            eprintln!("Failed to send ready message for node {}: {}", node_id, err);
-        } else {
+        while let Ok(payload) = subscriber.recv_async().await {
+            // Extract the message from the payload
             println!(
-                "Ready message sent for node {}, with key exp {}",
+                "Received message for node {}: {:?}",
                 node_id,
-                format!("{pipeline_job_id}/{node_id}/status/ready")
+                payload.payload().to_bytes()
             );
-        }
-
-        println!("Listening for messages on node: {}", node_id);
-
-        while let Ok(payload) = subscriber.recv_async().await {
-            // Extract the message from the payload
 
             let (msg, _): (NodeOutput, usize) =
-                decode_from_slice(&payload.payload().to_bytes(), config::standard())?;
-            println!("Received message for node {}: {:?}", node_id, msg);
+                match decode_from_slice(&payload.payload().to_bytes(), config::standard()) {
+                    Ok(msg) => msg,
+                    Err(err) => {
+                        eprintln!("Failed to decode message: {err}");
+                        panic!("Failed to decode message: {err}");
+                    }
+                };
+
             match msg {
                 NodeOutput::Packet(sender_id, hash_map) => {
                     println!(
@@ -481,6 +527,10 @@ impl DockerPipelineRunner {
                 }
                 NodeOutput::ProcessingCompleted(sender_id) => {
                     // Notify the processor that the parent node has completed processing
+                    println!(
+                        "Received processing completed message for node {}",
+                        sender_id
+                    );
                     if node_processor
                         .lock()
                         .await
@@ -500,12 +550,12 @@ impl DockerPipelineRunner {
                             )
                             .await
                             .context(selector::AgentCommunicationFailure {})?;
+                        break;
                     }
                 }
             }
-
-            // Process the message based on its type
         }
+
         Ok::<(), OrcaError>(())
     }
 
@@ -756,8 +806,6 @@ impl NodeProcessor for MapperProcessor {
 
             Ok(())
         });
-
-        println!("Successfully started processor for node: {}", node_id);
         Ok(())
     }
 
@@ -896,7 +944,6 @@ impl NodeProcessor for JoinerProcessor {
                 Ok(())
             });
         }
-        println!("Successfully started processor for node: {}", node_id);
         Ok(())
     }
 
diff --git a/tests/pipeline_runner.rs b/tests/pipeline_runner.rs
index a9f0238c..5492c13a 100644
--- a/tests/pipeline_runner.rs
+++ b/tests/pipeline_runner.rs
@@ -10,21 +10,21 @@ use std::collections::HashMap;
 
 use orcapod::uniffi::{error::Result, pipeline_runner::runner::DockerPipelineRunner};
 use snafu::ResultExt;
-use tokio::time::sleep;
+use tokio::{task::JoinSet, time::sleep};
 
 use crate::fixture::TestDirs;
 use fixture::pipeline_job;
 
-#[tokio::test(flavor = "multi_thread", worker_threads = 32)]
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn basic_run() -> Result<()> {
     let pipeline_job = pipeline_job()?;
 
     // Create zenoh to monitor the node ready message
     let zenoh = zenoh::open(zenoh::Config::default()).await.unwrap(); // Replace with the correct error variant if needed
-    let sub = zenoh.declare_subscriber("**").await.unwrap();
 
     tokio::spawn({
         async move {
+            let sub = zenoh.declare_subscriber("**").await.unwrap();
             // Receive loop ready, publish ready message
             println!("Listening for messages...");
             loop {
@@ -35,45 +35,31 @@ async fn basic_run() -> Result<()> {
                             msg.payload().try_to_string().unwrap()
                         );
                     }
-                    Err(err) => println!("Error receiving message: {}", err),
+                    Err(err) => {
+                        println!("Error receiving message: {}", err);
+                        break;
+                    }
                 }
             }
         }
     });
 
-    let zenoh2 = zenoh::open(zenoh::Config::default()).await.unwrap();
-
-    let joiner = tokio::spawn(async move {
-        sleep(tokio::time::Duration::from_secs(2)).await;
-        // Send a bunch of messsage to the channel
-        for i in 0..10 {
-            zenoh2
-                .put(format!("test/{}", i), format!("message {}", i).as_bytes())
-                .await
-                .unwrap();
-            println!("Sent message {}", i);
-        }
-    });
+    // Create the runner
+    let mut runner = DockerPipelineRunner::new();
+
+    let test_dirs = TestDirs::new(&HashMap::from([(
+        "default".to_owned(),
+        Some("./tests/extra/data/"),
+    )]))?;
+    let namespace_lookup = test_dirs.namespace_lookup();
+
+    let pipeline_run = runner
+        .start(pipeline_job, "default", &namespace_lookup)
+        .await?;
 
-    joiner.await.unwrap();
-    // // Create the runner
-    // let mut runner = DockerPipelineRunner::new();
-
-    // let test_dirs = TestDirs::new(&HashMap::from([(
-    //     "default".to_owned(),
-    //     Some("./tests/extra/data/"),
-    // )]))?;
-    // let namespace_lookup = test_dirs.namespace_lookup();
-
-    // let pipeline_run = runner
-    //     .start(pipeline_job, "default", &namespace_lookup)
-    //     .await?;
-
-    // sleep(std::time::Duration::from_secs(5)).await;
-    // panic!();
-    // // Wait for the pipeline run to complete
-    // let pipeline_result = runner.get_result(&pipeline_run).await?;
-    // println!("{:?}", pipeline_result.output_packets);
+    // Wait for the pipeline run to complete
+    let pipeline_result = runner.get_result(&pipeline_run).await?;
+    println!("{:?}", pipeline_result.output_packets);
 
     Ok(())
 }

From dc3be339175fc286953e7e87d116ca53550fea00 Mon Sep 17 00:00:00 2001
From: Synicix <synicix@gmail.com>
Date: Mon, 21 Jul 2025 23:49:26 +0000
Subject: [PATCH 20/29] Remove bincode and switch to json. Fix a few joining
 error

---
 Cargo.toml                           |   1 -
 src/uniffi/pipeline_runner/runner.rs | 180 ++++++++-------------------
 tests/pipeline_runner.rs             |  36 +++---
 3 files changed, 67 insertions(+), 150 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index d70b1059..1340c5d7 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -29,7 +29,6 @@ crate-type = ["rlib", "cdylib"]
 [dependencies]
 # make async fn in traits work with dyn traits
 async-trait = "0.1.88"
-bincode = { version = "2.0.1", features = ["serde"] }
 # docker API in orchestrator
 bollard = "0.17.1"
 # datetime utilities
diff --git a/src/uniffi/pipeline_runner/runner.rs b/src/uniffi/pipeline_runner/runner.rs
index c43a5799..627c860d 100644
--- a/src/uniffi/pipeline_runner/runner.rs
+++ b/src/uniffi/pipeline_runner/runner.rs
@@ -7,10 +7,6 @@ use crate::{
     },
 };
 use async_trait::async_trait;
-use bincode::{
-    config, de,
-    serde::{decode_from_slice, encode_to_vec},
-};
 use itertools::Itertools as _;
 use serde::{Deserialize, Serialize};
 use serde_yaml::Serializer;
@@ -20,6 +16,7 @@ use std::{
     fmt::{Display, Formatter, Result as FmtResult},
     hash::{Hash, Hasher},
     path::PathBuf,
+    result,
     sync::Arc,
 };
 use tokio::{
@@ -183,35 +180,13 @@ impl DockerPipelineRunner {
         let input_node_key_exp = format!("{pipeline_job_hash}/{INPUT_KEY_EXP}");
         for packet in &pipeline_run.pipeline_job.input_packets {
             // Send the packet to the input node key_exp
-            let payload_encoded = encode_to_vec(
-                PathSet::Unary(Blob {
-                    kind: BlobKind::File,
-                    location: URI {
-                        namespace: "asdfasdf".to_owned(),
-                        path: "asdfasdf".into(),
-                    },
-                    checksum: "".to_owned(),
-                }),
-                config::standard(),
-            )?;
-
-            let (decoded_packet, _): (PathSet, usize) =
-                decode_from_slice(&payload_encoded, config::standard())?;
-            println!("decoded packet: {:?}", decoded_packet);
-            println!(
-                "Payload bytes: {:?}",
-                encode_to_vec(
-                    NodeOutput::Packet("input_node".to_owned(), packet.clone()),
-                    config::standard(),
-                )?
-            );
             session
                 .put(
                     &input_node_key_exp,
-                    encode_to_vec(
-                        NodeOutput::Packet("input_node".to_owned(), packet.clone()),
-                        config::standard(),
-                    )?,
+                    serde_json::to_string(&NodeOutput::Packet(
+                        "input_node".to_owned(),
+                        packet.clone(),
+                    ))?,
                 )
                 .await
                 .context(selector::AgentCommunicationFailure {})?;
@@ -221,10 +196,7 @@ impl DockerPipelineRunner {
         session
             .put(
                 input_node_key_exp,
-                encode_to_vec(
-                    NodeOutput::ProcessingCompleted("input_node".to_owned()),
-                    config::standard(),
-                )?,
+                serde_json::to_string(&NodeOutput::ProcessingCompleted("input_node".to_owned()))?,
             )
             .await
             .context(selector::AgentCommunicationFailure {})?;
@@ -233,11 +205,6 @@ impl DockerPipelineRunner {
         self.pipeline_runs
             .insert(pipeline_job_hash.clone(), pipeline_run);
 
-        println!(
-            "Pipeline run started with id: {} and hash: {}",
-            pipeline_job_hash, pipeline_job_hash
-        );
-
         Ok(pipeline_job_hash)
     }
 
@@ -255,12 +222,8 @@ impl DockerPipelineRunner {
                     key: pipeline_run_id.to_owned(),
                 })?;
 
-        println!("len of node_tasks: {}", pipeline_run.node_tasks.len());
-        println!("Join set {:?}", pipeline_run.node_tasks);
         // Wait for all the tasks to complete
         while let Some(result) = pipeline_run.node_tasks.join_next().await {
-            println!("Join set {:?}", pipeline_run.node_tasks);
-            println!("Task completed, result: {:?}", result);
             match result {
                 Ok(Ok(())) => {} // Task completed successfully
                 Ok(Err(err)) => {
@@ -272,8 +235,6 @@ impl DockerPipelineRunner {
                     return Err(err.into());
                 }
             }
-            pipeline_run.node_tasks.abort_all();
-            panic!();
         }
 
         Ok(PipelineResult {
@@ -287,7 +248,6 @@ impl DockerPipelineRunner {
     /// Will error out if the pipeline run is not found or if any of the tasks fail to stop correctly
     pub async fn stop(&mut self, pipeline_run_id: &str) -> Result<()> {
         // To stop the pipeline run, we need to send a stop message to all the tasks
-
         // Get the pipeline run first
         let pipeline_run =
             self.pipeline_runs
@@ -330,14 +290,10 @@ impl DockerPipelineRunner {
 
         while let Ok(payload) = subscriber.recv_async().await {
             // Extract the message from the payload
-            let (msg, _): (NodeOutput, usize) =
-                decode_from_slice(&payload.payload().to_bytes(), config::standard())?;
+            let msg: NodeOutput = serde_json::from_slice(&payload.payload().to_bytes())?;
 
             match msg {
                 NodeOutput::Packet(sender_id, hash_map) => {
-                    // Optionally, you can log or print the output packet
-                    println!("Captured output from node {}: {:?}", sender_id, hash_map);
-
                     // Store the output packet in the outputs map
                     let mut outputs_lock = outputs.write().await;
                     outputs_lock
@@ -351,10 +307,6 @@ impl DockerPipelineRunner {
                 }
             }
         }
-
-        // Print exit message
-        println!("Capture task for node {} completed.", node_id);
-
         Ok(())
     }
 
@@ -392,7 +344,7 @@ impl DockerPipelineRunner {
             },
         ));
 
-        // Create a joinset to spawn and handle incoming messages tasks
+        // Create a join set to spawn and handle incoming messages tasks
         let mut listener_tasks = JoinSet::new();
 
         // Create the list of key_expressions to subscribe to
@@ -429,10 +381,12 @@ impl DockerPipelineRunner {
             ));
         }
 
-        // Create the task to handle stop request
-        listener_tasks.spawn(Self::start_stop_request_task(
+        // Create the listener task for the stop request
+        let mut stop_listener_task = JoinSet::new();
+
+        stop_listener_task.spawn(Self::start_stop_request_task(
             Arc::clone(&node_processor),
-            pipeline_job_id.clone(),
+            format!("{pipeline_job_id}/{}/stop", node.id),
             Arc::clone(&session),
         ));
 
@@ -469,6 +423,9 @@ impl DockerPipelineRunner {
         // Wait for all task to complete
         listener_tasks.join_all().await;
 
+        // Abort the stop listener task since we don't need it anymore
+        stop_listener_task.abort_all();
+
         Ok(())
     }
 
@@ -493,27 +450,8 @@ impl DockerPipelineRunner {
 
         while let Ok(payload) = subscriber.recv_async().await {
             // Extract the message from the payload
-            println!(
-                "Received message for node {}: {:?}",
-                node_id,
-                payload.payload().to_bytes()
-            );
-
-            let (msg, _): (NodeOutput, usize) =
-                match decode_from_slice(&payload.payload().to_bytes(), config::standard()) {
-                    Ok(msg) => msg,
-                    Err(err) => {
-                        eprintln!("Failed to decode message: {err}");
-                        panic!("Failed to decode message: {err}");
-                    }
-                };
-
-            match msg {
+            match serde_json::from_slice(&payload.payload().to_bytes())? {
                 NodeOutput::Packet(sender_id, hash_map) => {
-                    println!(
-                        "Received packet from {} for node {}: {:?}",
-                        sender_id, node_id, hash_map
-                    );
                     // Process the packet using the node processor
                     node_processor.lock().await.process_packet(
                         &sender_id,
@@ -527,10 +465,6 @@ impl DockerPipelineRunner {
                 }
                 NodeOutput::ProcessingCompleted(sender_id) => {
                     // Notify the processor that the parent node has completed processing
-                    println!(
-                        "Received processing completed message for node {}",
-                        sender_id
-                    );
                     if node_processor
                         .lock()
                         .await
@@ -543,15 +477,14 @@ impl DockerPipelineRunner {
                         session
                             .put(
                                 output_key_exp,
-                                encode_to_vec(
-                                    NodeOutput::ProcessingCompleted(node_id.clone()),
-                                    config::standard(),
-                                )?,
+                                serde_json::to_string(&NodeOutput::ProcessingCompleted(
+                                    node_id.clone(),
+                                ))?,
                             )
                             .await
                             .context(selector::AgentCommunicationFailure {})?;
-                        break;
                     }
+                    break;
                 }
             }
         }
@@ -691,36 +624,29 @@ impl NodeProcessor for PodProcessor {
         let node_id_clone = node_id.to_owned();
         let output_key_exp_clone = output_key_exp.to_owned();
         self.processing_tasks.spawn(async move {
-            println!(
-                "Simulating Executing pod job: {} with pod hash: {}",
-                pod_job.hash, pod_job.pod.hash
-            );
-
             // For now we will just send the input_packet to the success channel
             session
                 .put(
-                    output_key_exp_clone + SUCCESS_KEY_EXP,
-                    encode_to_vec(
-                        NodeOutput::Packet(node_id_clone, output_packet),
-                        config::standard(),
-                    )?,
+                    output_key_exp_clone + "/" + SUCCESS_KEY_EXP,
+                    serde_json::to_string(&NodeOutput::Packet(node_id_clone, output_packet))?,
                 )
                 .await
                 .context(selector::AgentCommunicationFailure {})?;
 
             Ok(())
         });
-
-        println!("Successfully started processor for node: {}", node_id);
         Ok(())
     }
 
     async fn mark_parent_as_complete(&mut self, _parent_node_id: &str) -> bool {
         // For pod we only have one parent, thus execute the exit case
-        while (self.processing_tasks.join_next().await).is_some() {
-            // Wait for all tasks to complete
+        while let Some(result) = self.processing_tasks.join_next().await {
+            match result {
+                Ok(Ok(())) => {}
+                Ok(Err(err)) => {}
+                Err(err) => {}
+            }
         }
-
         true
     }
 
@@ -776,11 +702,11 @@ impl NodeProcessor for MapperProcessor {
                 // Send the packet outwards
                 session
                     .put(
-                        output_key_exp_clone.clone() + SUCCESS_KEY_EXP,
-                        encode_to_vec(
-                            NodeOutput::Packet(node_id_clone.clone(), output_map),
-                            config::standard(),
-                        )?,
+                        format!("{}/{}", output_key_exp_clone, SUCCESS_KEY_EXP),
+                        &serde_json::to_string(&NodeOutput::Packet(
+                            node_id_clone.clone(),
+                            output_map,
+                        ))?,
                     )
                     .await
                     .context(selector::AgentCommunicationFailure {})?;
@@ -791,19 +717,15 @@ impl NodeProcessor for MapperProcessor {
                 // If there was an error, we send it to the failure channel
                 session
                     .put(
-                        output_key_exp_clone + FAILURE_KEY_EXP,
-                        encode_to_vec(
-                            &ProcessingFailure {
-                                node_id: node_id_clone,
-                                error: err.to_string(),
-                            },
-                            config::standard(),
-                        )?,
+                        format!("{}/{}", output_key_exp_clone, FAILURE_KEY_EXP),
+                        serde_json::to_string(&ProcessingFailure {
+                            node_id: node_id_clone.clone(),
+                            error: err.to_string(),
+                        })?,
                     )
                     .await
                     .context(selector::AgentCommunicationFailure {})?;
             }
-
             Ok(())
         });
         Ok(())
@@ -906,17 +828,16 @@ impl NodeProcessor for JoinerProcessor {
             self.processing_tasks.spawn(async move {
                 // Convert Vec<Vec<HashMap<...>>> to Vec<&Vec<HashMap<...>>> for compute_cartesian_product
                 let cartesian_product = Self::compute_cartesian_product(&factors);
-
                 // Post all products to the output channel
                 for output_packet in cartesian_product {
                     let result = {
                         session
                             .put(
-                                output_key_exp_clone.clone() + SUCCESS_KEY_EXP,
-                                encode_to_vec(
-                                    NodeOutput::Packet(node_id_clone.clone(), output_packet),
-                                    config::standard(),
-                                )?,
+                                format!("{}/{}", output_key_exp_clone, SUCCESS_KEY_EXP),
+                                serde_json::to_string(&NodeOutput::Packet(
+                                    node_id_clone.clone(),
+                                    output_packet,
+                                ))?,
                             )
                             .await
                             .context(selector::AgentCommunicationFailure {})?;
@@ -927,14 +848,11 @@ impl NodeProcessor for JoinerProcessor {
                     if let Err(err) = result {
                         session
                             .put(
-                                output_key_exp_clone.clone() + FAILURE_KEY_EXP,
-                                encode_to_vec(
-                                    &ProcessingFailure {
-                                        node_id: node_id_clone.clone(),
-                                        error: err.to_string(),
-                                    },
-                                    config::standard(),
-                                )?,
+                                format!("{}/{}", output_key_exp_clone, FAILURE_KEY_EXP),
+                                serde_json::to_string(&ProcessingFailure {
+                                    node_id: node_id_clone.clone(),
+                                    error: err.to_string(),
+                                })?,
                             )
                             .await
                             .context(selector::AgentCommunicationFailure {})?;
diff --git a/tests/pipeline_runner.rs b/tests/pipeline_runner.rs
index 5492c13a..37cb7c0e 100644
--- a/tests/pipeline_runner.rs
+++ b/tests/pipeline_runner.rs
@@ -24,7 +24,7 @@ async fn basic_run() -> Result<()> {
 
     tokio::spawn({
         async move {
-            let sub = zenoh.declare_subscriber("**").await.unwrap();
+            let sub = zenoh.declare_subscriber("**/failure").await.unwrap();
             // Receive loop ready, publish ready message
             println!("Listening for messages...");
             loop {
@@ -64,25 +64,25 @@ async fn basic_run() -> Result<()> {
     Ok(())
 }
 
-// #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
-// async fn stop() -> Result<()> {
-//     let pipeline_job = pipeline_job()?;
+#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
+async fn stop() -> Result<()> {
+    let pipeline_job = pipeline_job()?;
 
-//     // Create the runner
-//     let mut runner = DockerPipelineRunner::new();
+    // Create the runner
+    let mut runner = DockerPipelineRunner::new();
 
-//     let test_dirs = TestDirs::new(&HashMap::from([(
-//         "default".to_owned(),
-//         Some("./tests/extra/data/"),
-//     )]))?;
-//     let namespace_lookup = test_dirs.namespace_lookup();
+    let test_dirs = TestDirs::new(&HashMap::from([(
+        "default".to_owned(),
+        Some("./tests/extra/data/"),
+    )]))?;
+    let namespace_lookup = test_dirs.namespace_lookup();
 
-//     let pipeline_run = runner
-//         .start(pipeline_job, "default", &namespace_lookup)
-//         .await?;
+    let pipeline_run = runner
+        .start(pipeline_job, "default", &namespace_lookup)
+        .await?;
 
-//     // Abort the pipeline run
-//     runner.stop(&pipeline_run).await?;
+    // Abort the pipeline run
+    runner.stop(&pipeline_run).await?;
 
-//     Ok(())
-// }
+    Ok(())
+}

From 83c9eca79e373b6ae16c0b805b48dc5b0fda420d Mon Sep 17 00:00:00 2001
From: Synicix <synicix@gmail.com>
Date: Wed, 23 Jul 2025 02:41:16 +0000
Subject: [PATCH 21/29] Add group and host name

---
 Cargo.toml                           |   1 +
 src/core/error.rs                    |  23 ------
 src/uniffi/error.rs                  |  12 ---
 src/uniffi/pipeline_runner/runner.rs | 117 +++++++++++++++------------
 tests/pipeline_runner.rs             |  48 ++++-------
 5 files changed, 83 insertions(+), 118 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 1340c5d7..171236d9 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -48,6 +48,7 @@ getset = { version = "0.1.5", git = "https://github.com/guzman-raphael/getset.gi
 glob = "0.3.1"
 # strings to snake_case
 heck = "0.5.0"
+hostname = "0.4.1"
 # hashmaps that preserve insertion order
 indexmap = { version = "2.9.0", features = ["serde"] }
 itertools = "0.14.0"
diff --git a/src/core/error.rs b/src/core/error.rs
index ada0b0e2..cffb538c 100644
--- a/src/core/error.rs
+++ b/src/core/error.rs
@@ -1,5 +1,4 @@
 use crate::uniffi::error::{Kind, OrcaError};
-use bincode::error::{DecodeError, EncodeError};
 use bollard::errors::Error as BollardError;
 use glob;
 use serde_json;
@@ -23,26 +22,6 @@ impl From<BollardError> for OrcaError {
         }
     }
 }
-impl From<DecodeError> for OrcaError {
-    fn from(error: DecodeError) -> Self {
-        Self {
-            kind: Kind::DecodeError {
-                source: error,
-                backtrace: Some(Backtrace::capture()),
-            },
-        }
-    }
-}
-impl From<EncodeError> for OrcaError {
-    fn from(error: EncodeError) -> Self {
-        Self {
-            kind: Kind::EncodingError {
-                source: error,
-                backtrace: Some(Backtrace::capture()),
-            },
-        }
-    }
-}
 impl From<oneshot::error::RecvError> for OrcaError {
     fn from(error: oneshot::error::RecvError) -> Self {
         Self {
@@ -147,8 +126,6 @@ impl fmt::Debug for OrcaError {
             | Kind::NoTagFoundInContainerAltImage { backtrace, .. }
             | Kind::BollardError { backtrace, .. }
             | Kind::ChannelReceiveError { backtrace, .. }
-            | Kind::DecodeError { backtrace, .. }
-            | Kind::EncodingError { backtrace, .. }
             | Kind::GlobPatternError { backtrace, .. }
             | Kind::IoError { backtrace, .. }
             | Kind::PathPrefixError { backtrace, .. }
diff --git a/src/uniffi/error.rs b/src/uniffi/error.rs
index c932570e..e94159f4 100644
--- a/src/uniffi/error.rs
+++ b/src/uniffi/error.rs
@@ -2,8 +2,6 @@
     clippy::field_scoped_visibility_modifiers,
     reason = "Needed since SNAFU dynamically generating selectors."
 )]
-
-use bincode::error::{DecodeError, EncodeError};
 use bollard::errors::Error as BollardError;
 use glob;
 use serde_json;
@@ -108,16 +106,6 @@ pub(crate) enum Kind {
         backtrace: Option<Backtrace>,
     },
     #[snafu(transparent)]
-    DecodeError {
-        source: DecodeError,
-        backtrace: Option<Backtrace>,
-    },
-    #[snafu(transparent)]
-    EncodingError {
-        source: EncodeError,
-        backtrace: Option<Backtrace>,
-    },
-    #[snafu(transparent)]
     GlobPatternError {
         source: glob::PatternError,
         backtrace: Option<Backtrace>,
diff --git a/src/uniffi/pipeline_runner/runner.rs b/src/uniffi/pipeline_runner/runner.rs
index 627c860d..b4452242 100644
--- a/src/uniffi/pipeline_runner/runner.rs
+++ b/src/uniffi/pipeline_runner/runner.rs
@@ -2,8 +2,8 @@ use crate::{
     core::{crypto::hash_buffer, model::serialize_hashmap, util::get},
     uniffi::{
         error::{OrcaError, Result, selector},
-        model::{Blob, BlobKind, PathSet, Pod, PodJob, URI},
-        pipeline::{Kernel, Mapper, Node, Pipeline, PipelineJob, PipelineResult},
+        model::{PathSet, Pod, PodJob, URI},
+        pipeline::{self, Kernel, Mapper, Node, Pipeline, PipelineJob, PipelineResult},
     },
 };
 use async_trait::async_trait;
@@ -16,7 +16,6 @@ use std::{
     fmt::{Display, Formatter, Result as FmtResult},
     hash::{Hash, Hasher},
     path::PathBuf,
-    result,
     sync::Arc,
 };
 use tokio::{
@@ -84,6 +83,10 @@ impl Display for PipelineRun {
  */
 #[derive(Default)]
 pub struct DockerPipelineRunner {
+    /// User label on which group of agents this runner is associated with
+    pub group: String,
+    /// The host name of the runner
+    pub host: String,
     pipeline_runs: HashMap<String, PipelineRun>,
 }
 
@@ -97,8 +100,14 @@ pub struct DockerPipelineRunner {
 */
 impl DockerPipelineRunner {
     /// Create a new Docker pipeline runner
-    pub fn new() -> Self {
-        Self::default()
+    /// # Errors
+    /// Will error out if the environment variable `HOSTNAME` is not set
+    pub fn new(group: String) -> Result<Self> {
+        Ok(Self {
+            group,
+            host: hostname::get()?.to_string_lossy().to_string(),
+            pipeline_runs: HashMap::new(),
+        })
     }
 
     /// # Errors
@@ -116,8 +125,8 @@ impl DockerPipelineRunner {
             node_tasks: JoinSet::new(),
         };
 
-        // Get the pipeline_job_hash which will be use to identify the pipeline run
-        let pipeline_job_hash = pipeline_run.pipeline_job.hash.clone();
+        // The id for the pipeline_run is the pipeline_job hash
+        let pipeline_run_id = pipeline_run.pipeline_job.hash.clone();
 
         let graph = &pipeline_run.pipeline_job.pipeline.graph;
 
@@ -129,7 +138,7 @@ impl DockerPipelineRunner {
         );
 
         let subscriber = session
-            .declare_subscriber(format!("{pipeline_job_hash}/*/status/ready"))
+            .declare_subscriber(self.get_base_key_exp(&pipeline_run_id) + "/*/status/ready")
             .await
             .context(selector::AgentCommunicationFailure {})?;
 
@@ -143,7 +152,7 @@ impl DockerPipelineRunner {
                 .spawn(Self::spawn_node_processing_task(
                     node.clone(),
                     pipeline_run.pipeline_job.pipeline.clone(),
-                    pipeline_job_hash.clone(),
+                    self.get_base_key_exp(&pipeline_run_id),
                     namespace.to_owned(),
                     namespace_lookup.clone(),
                     Arc::clone(&session),
@@ -157,9 +166,14 @@ impl DockerPipelineRunner {
                 .node_tasks
                 .spawn(Self::create_capture_task_for_node(
                     node.id.clone(),
-                    pipeline_run.pipeline_job.hash.clone(),
                     Arc::clone(&pipeline_run.outputs),
                     Arc::clone(&session),
+                    format!(
+                        "{}/{}/outputs/{}",
+                        self.get_base_key_exp(&pipeline_run_id),
+                        node.id,
+                        SUCCESS_KEY_EXP,
+                    ),
                 ));
         }
 
@@ -177,7 +191,11 @@ impl DockerPipelineRunner {
         }
 
         // Submit the input_packets to the correct key_exp
-        let input_node_key_exp = format!("{pipeline_job_hash}/{INPUT_KEY_EXP}");
+        let input_node_key_exp = format!(
+            "{}/{}",
+            self.get_base_key_exp(&pipeline_run_id),
+            INPUT_KEY_EXP,
+        );
         for packet in &pipeline_run.pipeline_job.input_packets {
             // Send the packet to the input node key_exp
             session
@@ -203,9 +221,9 @@ impl DockerPipelineRunner {
 
         // Insert into the list of pipeline runs
         self.pipeline_runs
-            .insert(pipeline_job_hash.clone(), pipeline_run);
+            .insert(pipeline_run_id.clone(), pipeline_run);
 
-        Ok(pipeline_job_hash)
+        Ok(pipeline_run_id)
     }
 
     /// Given a pipeline run, wait for all its tasks to complete and return the `PipelineResult`
@@ -247,6 +265,11 @@ impl DockerPipelineRunner {
     /// # Errors
     /// Will error out if the pipeline run is not found or if any of the tasks fail to stop correctly
     pub async fn stop(&mut self, pipeline_run_id: &str) -> Result<()> {
+        let stop_key_exp = format!(
+            "{}/{}/stop",
+            self.get_base_key_exp(pipeline_run_id),
+            pipeline_run_id
+        );
         // To stop the pipeline run, we need to send a stop message to all the tasks
         // Get the pipeline run first
         let pipeline_run =
@@ -262,10 +285,7 @@ impl DockerPipelineRunner {
 
         // Send the stop message into the stop key_exp, the msg is just an empty vector
         session
-            .put(
-                format!("{}/stop", pipeline_run.pipeline_job.hash),
-                Vec::new(),
-            )
+            .put(stop_key_exp, Vec::new())
             .await
             .context(selector::AgentCommunicationFailure {})?;
 
@@ -276,15 +296,13 @@ impl DockerPipelineRunner {
     #[expect(clippy::type_complexity, reason = "Needed for async")]
     async fn create_capture_task_for_node(
         node_id: String,
-        pipeline_run_id: String,
         outputs: Arc<RwLock<HashMap<String, Vec<HashMap<String, PathSet>>>>>,
         session: Arc<zenoh::Session>,
+        key_exp_to_sub: String,
     ) -> Result<()> {
         // Create a zenoh session
         let subscriber = session
-            .declare_subscriber(format!(
-                "{pipeline_run_id}/{node_id}/outputs/{SUCCESS_KEY_EXP}"
-            ))
+            .declare_subscriber(key_exp_to_sub)
             .await
             .context(selector::AgentCommunicationFailure {})?;
 
@@ -293,7 +311,7 @@ impl DockerPipelineRunner {
             let msg: NodeOutput = serde_json::from_slice(&payload.payload().to_bytes())?;
 
             match msg {
-                NodeOutput::Packet(sender_id, hash_map) => {
+                NodeOutput::Packet(_, hash_map) => {
                     // Store the output packet in the outputs map
                     let mut outputs_lock = outputs.write().await;
                     outputs_lock
@@ -323,7 +341,7 @@ impl DockerPipelineRunner {
     async fn spawn_node_processing_task(
         node: Node,
         pipeline: Pipeline,
-        pipeline_job_id: String,
+        base_key_exp: String,
         namespace: String,
         namespace_lookup: HashMap<String, PathBuf>,
         session: Arc<zenoh::Session>,
@@ -352,7 +370,7 @@ impl DockerPipelineRunner {
             .get_parents_for_node(&node)
             .map(|parent_node| {
                 format!(
-                    "{pipeline_job_id}/{}/outputs/{SUCCESS_KEY_EXP}",
+                    "{base_key_exp}/{}/outputs/{SUCCESS_KEY_EXP}",
                     parent_node.id
                 )
             })
@@ -360,7 +378,7 @@ impl DockerPipelineRunner {
 
         // If there was no parent node, then this is root node, therefore we need to subscribe to the input node
         if key_exps_to_subscribe_to.is_empty() {
-            key_exps_to_subscribe_to.push(format!("{pipeline_job_id}/{INPUT_KEY_EXP}"));
+            key_exps_to_subscribe_to.push(format!("{base_key_exp}/{INPUT_KEY_EXP}"));
         }
 
         // Create a subscriber for each of the parent nodes (Should only be 1, unless it is a joiner node)
@@ -374,7 +392,7 @@ impl DockerPipelineRunner {
                 subscriber,
                 Arc::clone(&node_processor),
                 node.id.clone(),
-                pipeline_job_id.clone(),
+                base_key_exp.clone(),
                 namespace.clone(),
                 namespace_lookup.clone(),
                 Arc::clone(&session),
@@ -386,26 +404,26 @@ impl DockerPipelineRunner {
 
         stop_listener_task.spawn(Self::start_stop_request_task(
             Arc::clone(&node_processor),
-            format!("{pipeline_job_id}/{}/stop", node.id),
+            format!("{base_key_exp}/{}/stop", node.id),
             Arc::clone(&session),
         ));
 
         // Wait for all tasks to be spawned and reply with ready message
         // This is to ensure that the pipeline run knows when all tasks are ready to receive inputs
 
-        let mut num_of_ready_subcribers: usize = 0;
+        let mut num_of_ready_subscribers: usize = 0;
         // Build the subscriber
         let status_subscriber = session
             .declare_subscriber(format!(
-                "{pipeline_job_id}/{}/subscriber/status/ready",
+                "{base_key_exp}/{}/subscriber/status/ready",
                 node.id
             ))
             .await
             .context(selector::AgentCommunicationFailure {})?;
 
         while status_subscriber.recv_async().await.is_ok() {
-            num_of_ready_subcribers += 1;
-            if num_of_ready_subcribers == key_exps_to_subscribe_to.len() {
+            num_of_ready_subscribers += 1;
+            if num_of_ready_subscribers == key_exps_to_subscribe_to.len() {
                 // +1 for the stop request task
                 break; // All tasks are ready, we can start sending inputs
             }
@@ -413,10 +431,7 @@ impl DockerPipelineRunner {
 
         // Send a ready message so the pipeline knows when to start sending inputs
         session
-            .put(
-                format!("{pipeline_job_id}/{}/status/ready", node.id),
-                &node.id,
-            )
+            .put(format!("{base_key_exp}/{}/status/ready", node.id), &node.id)
             .await
             .context(selector::AgentCommunicationFailure {})?;
 
@@ -433,7 +448,7 @@ impl DockerPipelineRunner {
         subscriber: Subscriber<FifoChannelHandler<Sample>>,
         node_processor: Arc<Mutex<Box<dyn NodeProcessor>>>,
         node_id: String,
-        pipeline_job_id: String,
+        base_key_exp: String,
         namespace: String,
         namespace_lookup: HashMap<String, PathBuf>,
         session: Arc<zenoh::Session>,
@@ -442,7 +457,7 @@ impl DockerPipelineRunner {
         // back to our spawner task
         session
             .put(
-                format!("{pipeline_job_id}/{node_id}/subscriber/status/ready"),
+                format!("{base_key_exp}/{node_id}/subscriber/status/ready"),
                 &node_id,
             )
             .await
@@ -458,7 +473,7 @@ impl DockerPipelineRunner {
                         &node_id,
                         &hash_map,
                         Arc::clone(&session),
-                        &format!("{}/{}/outputs", pipeline_job_id, node_id.clone()),
+                        &format!("{base_key_exp}/{}/outputs", node_id.clone()),
                         &namespace,
                         &namespace_lookup,
                     )?;
@@ -473,7 +488,7 @@ impl DockerPipelineRunner {
                     {
                         // This was the last parent, thus we need to send the processing complete message
                         let output_key_exp =
-                            format!("{pipeline_job_id}/{node_id}/outputs/{SUCCESS_KEY_EXP}");
+                            format!("{base_key_exp}/{node_id}/outputs/{SUCCESS_KEY_EXP}");
                         session
                             .put(
                                 output_key_exp,
@@ -494,11 +509,11 @@ impl DockerPipelineRunner {
 
     async fn start_stop_request_task(
         node_processor: Arc<Mutex<Box<dyn NodeProcessor>>>,
-        pipeline_run_id: String,
+        base_key_exp: String,
         session: Arc<zenoh::Session>,
     ) -> Result<()> {
         let subscriber = session
-            .declare_subscriber(pipeline_run_id.clone() + "/stop")
+            .declare_subscriber(format!("{base_key_exp}/stop"))
             .await
             .context(selector::AgentCommunicationFailure {})?;
         while subscriber.recv_async().await.is_ok() {
@@ -507,6 +522,10 @@ impl DockerPipelineRunner {
         }
         Ok::<(), OrcaError>(())
     }
+
+    fn get_base_key_exp(&self, pipeline_run_id: &str) -> String {
+        format!("{}/{}/{}", self.group, self.host, pipeline_run_id)
+    }
 }
 
 /// Unify the interface for node processors and provide a common way to handle processing of incoming messages
@@ -640,13 +659,7 @@ impl NodeProcessor for PodProcessor {
 
     async fn mark_parent_as_complete(&mut self, _parent_node_id: &str) -> bool {
         // For pod we only have one parent, thus execute the exit case
-        while let Some(result) = self.processing_tasks.join_next().await {
-            match result {
-                Ok(Ok(())) => {}
-                Ok(Err(err)) => {}
-                Err(err) => {}
-            }
-        }
+        while let Some(result) = self.processing_tasks.join_next().await {}
         true
     }
 
@@ -702,7 +715,7 @@ impl NodeProcessor for MapperProcessor {
                 // Send the packet outwards
                 session
                     .put(
-                        format!("{}/{}", output_key_exp_clone, SUCCESS_KEY_EXP),
+                        format!("{output_key_exp_clone}/{SUCCESS_KEY_EXP}"),
                         &serde_json::to_string(&NodeOutput::Packet(
                             node_id_clone.clone(),
                             output_map,
@@ -717,7 +730,7 @@ impl NodeProcessor for MapperProcessor {
                 // If there was an error, we send it to the failure channel
                 session
                     .put(
-                        format!("{}/{}", output_key_exp_clone, FAILURE_KEY_EXP),
+                        format!("{output_key_exp_clone}/{FAILURE_KEY_EXP}"),
                         serde_json::to_string(&ProcessingFailure {
                             node_id: node_id_clone.clone(),
                             error: err.to_string(),
@@ -833,7 +846,7 @@ impl NodeProcessor for JoinerProcessor {
                     let result = {
                         session
                             .put(
-                                format!("{}/{}", output_key_exp_clone, SUCCESS_KEY_EXP),
+                                format!("{output_key_exp_clone}/{SUCCESS_KEY_EXP}"),
                                 serde_json::to_string(&NodeOutput::Packet(
                                     node_id_clone.clone(),
                                     output_packet,
@@ -848,7 +861,7 @@ impl NodeProcessor for JoinerProcessor {
                     if let Err(err) = result {
                         session
                             .put(
-                                format!("{}/{}", output_key_exp_clone, FAILURE_KEY_EXP),
+                                format!("{output_key_exp_clone}/{FAILURE_KEY_EXP}"),
                                 serde_json::to_string(&ProcessingFailure {
                                     node_id: node_id_clone.clone(),
                                     error: err.to_string(),
diff --git a/tests/pipeline_runner.rs b/tests/pipeline_runner.rs
index 37cb7c0e..1dd90f58 100644
--- a/tests/pipeline_runner.rs
+++ b/tests/pipeline_runner.rs
@@ -1,4 +1,4 @@
-#![expect(missing_docs, reason = "OK in tests.")]
+#![expect(missing_docs, clippy::panic_in_result_fn, reason = "OK in tests.")]
 
 // If 'fixture' is a local module, ensure there is a 'mod fixture;' statement or a 'fixture.rs' file in the same directory or in 'tests/'.
 // If 'fixture' is an external crate, add it to Cargo.toml and import as shown below.
@@ -9,8 +9,6 @@ pub mod fixture;
 use std::collections::HashMap;
 
 use orcapod::uniffi::{error::Result, pipeline_runner::runner::DockerPipelineRunner};
-use snafu::ResultExt;
-use tokio::{task::JoinSet, time::sleep};
 
 use crate::fixture::TestDirs;
 use fixture::pipeline_job;
@@ -19,33 +17,8 @@ use fixture::pipeline_job;
 async fn basic_run() -> Result<()> {
     let pipeline_job = pipeline_job()?;
 
-    // Create zenoh to monitor the node ready message
-    let zenoh = zenoh::open(zenoh::Config::default()).await.unwrap(); // Replace with the correct error variant if needed
-
-    tokio::spawn({
-        async move {
-            let sub = zenoh.declare_subscriber("**/failure").await.unwrap();
-            // Receive loop ready, publish ready message
-            println!("Listening for messages...");
-            loop {
-                match sub.recv_async().await {
-                    Ok(msg) => {
-                        println!(
-                            "Received message: {:?}",
-                            msg.payload().try_to_string().unwrap()
-                        );
-                    }
-                    Err(err) => {
-                        println!("Error receiving message: {}", err);
-                        break;
-                    }
-                }
-            }
-        }
-    });
-
     // Create the runner
-    let mut runner = DockerPipelineRunner::new();
+    let mut runner = DockerPipelineRunner::new("test".to_owned())?;
 
     let test_dirs = TestDirs::new(&HashMap::from([(
         "default".to_owned(),
@@ -59,17 +32,22 @@ async fn basic_run() -> Result<()> {
 
     // Wait for the pipeline run to complete
     let pipeline_result = runner.get_result(&pipeline_run).await?;
+
+    assert!(
+        pipeline_result.output_packets.len() == 1,
+        "Expected exactly one output packet."
+    );
     println!("{:?}", pipeline_result.output_packets);
 
     Ok(())
 }
 
-#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn stop() -> Result<()> {
     let pipeline_job = pipeline_job()?;
 
     // Create the runner
-    let mut runner = DockerPipelineRunner::new();
+    let mut runner = DockerPipelineRunner::new("test".to_owned())?;
 
     let test_dirs = TestDirs::new(&HashMap::from([(
         "default".to_owned(),
@@ -86,3 +64,11 @@ async fn stop() -> Result<()> {
 
     Ok(())
 }
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn check_group_and_host() -> Result<()> {
+    let runner = DockerPipelineRunner::new("test".to_owned())?;
+    assert_eq!(runner.group, "test");
+    assert!(!runner.host.is_empty(), "Host should not be empty");
+    Ok(())
+}

From 1df8295e1d012ac2fdb1c30f225dbdd2a2785ffe Mon Sep 17 00:00:00 2001
From: Synicix <synicix@gmail.com>
Date: Wed, 23 Jul 2025 10:14:02 +0000
Subject: [PATCH 22/29] fix unit test

---
 src/uniffi/pipeline_runner/runner.rs | 346 ++++++++++++++++++---------
 1 file changed, 229 insertions(+), 117 deletions(-)

diff --git a/src/uniffi/pipeline_runner/runner.rs b/src/uniffi/pipeline_runner/runner.rs
index b4452242..37122c4a 100644
--- a/src/uniffi/pipeline_runner/runner.rs
+++ b/src/uniffi/pipeline_runner/runner.rs
@@ -3,12 +3,13 @@ use crate::{
     uniffi::{
         error::{OrcaError, Result, selector},
         model::{PathSet, Pod, PodJob, URI},
-        pipeline::{self, Kernel, Mapper, Node, Pipeline, PipelineJob, PipelineResult},
+        pipeline::{Kernel, Mapper, Node, Pipeline, PipelineJob, PipelineResult},
     },
 };
 use async_trait::async_trait;
+use derive_more::derive;
 use itertools::Itertools as _;
-use serde::{Deserialize, Serialize};
+use serde::{Deserialize, Serialize, de::value};
 use serde_yaml::Serializer;
 use snafu::{OptionExt as _, ResultExt as _};
 use std::{
@@ -463,20 +464,31 @@ impl DockerPipelineRunner {
             .await
             .context(selector::AgentCommunicationFailure {})?;
 
+        let node_base_output_key_exp = format!("{base_key_exp}/{node_id}/outputs");
         while let Ok(payload) = subscriber.recv_async().await {
             // Extract the message from the payload
             match serde_json::from_slice(&payload.payload().to_bytes())? {
                 NodeOutput::Packet(sender_id, hash_map) => {
                     // Process the packet using the node processor
-                    node_processor.lock().await.process_packet(
+                    let result = node_processor.lock().await.process_packet(
                         &sender_id,
                         &node_id,
                         &hash_map,
                         Arc::clone(&session),
-                        &format!("{base_key_exp}/{}/outputs", node_id.clone()),
+                        &node_base_output_key_exp,
                         &namespace,
                         &namespace_lookup,
-                    )?;
+                    );
+
+                    if let Err(err) = result {
+                        try_to_forward_err_msg(
+                            Arc::clone(&session),
+                            err,
+                            &node_base_output_key_exp,
+                            &node_id,
+                        )
+                        .await;
+                    }
                 }
                 NodeOutput::ProcessingCompleted(sender_id) => {
                     // Notify the processor that the parent node has completed processing
@@ -517,7 +529,7 @@ impl DockerPipelineRunner {
             .await
             .context(selector::AgentCommunicationFailure {})?;
         while subscriber.recv_async().await.is_ok() {
-            // Received a requst to stop, therefore we need to tell the node_processor to shutdown
+            // Received a request to stop, therefore we need to tell the node_processor to shutdown
             node_processor.lock().await.stop();
         }
         Ok::<(), OrcaError>(())
@@ -541,7 +553,7 @@ trait NodeProcessor: Send + Sync {
         node_id: &str,
         packet: &HashMap<String, PathSet>,
         session: Arc<zenoh::Session>,
-        output_key_exp: &str,
+        base_output_key_exp: &str,
         namespace: &str,
         namespace_lookup: &HashMap<String, PathBuf>,
     ) -> Result<()>;
@@ -562,11 +574,40 @@ trait NodeProcessor: Send + Sync {
     fn stop(&mut self);
 }
 
+/// Util function to handle forwarding error messages to the failure channel
+async fn try_to_forward_err_msg(
+    session: Arc<zenoh::Session>,
+    err: OrcaError,
+    node_base_output_key_exp: &str,
+    node_id: &str,
+) {
+    match async {
+        session
+            .put(
+                format!("{node_base_output_key_exp}/{FAILURE_KEY_EXP}"),
+                serde_json::to_string(&ProcessingFailure {
+                    node_id: node_id.to_owned(),
+                    error: err.to_string(),
+                })?,
+            )
+            .await
+            .context(selector::AgentCommunicationFailure {})?;
+        Ok::<(), OrcaError>(())
+    }
+    .await
+    {
+        Ok(()) => {}
+        Err(send_err) => {
+            eprintln!("Failed to send failure message: {send_err}");
+        }
+    }
+}
+
 /// Processor for Pods
 /// Currently missing implementation to call agents for actual pod processing
 struct PodProcessor {
     pod: Arc<Pod>,
-    processing_tasks: JoinSet<Result<(), OrcaError>>,
+    processing_tasks: JoinSet<()>,
 }
 
 impl PodProcessor {
@@ -591,13 +632,10 @@ impl NodeProcessor for PodProcessor {
         node_id: &str,
         packet: &HashMap<String, PathSet>,
         session: Arc<zenoh::Session>,
-        output_key_exp: &str,
+        base_output_key_exp: &str,
         namespace: &str,
         namespace_lookup: &HashMap<String, PathBuf>,
     ) -> Result<()> {
-        // Process the packet using the pod
-        // Create the pod_job
-
         // We need a unique hash for this given input packet process by the node
         // therefore we need to generate a hash that has the pod_id + input_packet
         let node_id_bytes = node_id.as_bytes().to_vec();
@@ -641,25 +679,42 @@ impl NodeProcessor for PodProcessor {
             .collect::<HashMap<_, _>>();
 
         let node_id_clone = node_id.to_owned();
-        let output_key_exp_clone = output_key_exp.to_owned();
+        let output_key_exp_clone = base_output_key_exp.to_owned();
         self.processing_tasks.spawn(async move {
             // For now we will just send the input_packet to the success channel
-            session
-                .put(
-                    output_key_exp_clone + "/" + SUCCESS_KEY_EXP,
-                    serde_json::to_string(&NodeOutput::Packet(node_id_clone, output_packet))?,
-                )
-                .await
-                .context(selector::AgentCommunicationFailure {})?;
+            let results = async {
+                session
+                    .put(
+                        output_key_exp_clone.clone() + "/" + SUCCESS_KEY_EXP,
+                        serde_json::to_string(&NodeOutput::Packet(
+                            node_id_clone.clone(),
+                            output_packet,
+                        ))?,
+                    )
+                    .await
+                    .context(selector::AgentCommunicationFailure {})?;
+                Ok::<(), OrcaError>(())
+            };
 
-            Ok(())
+            match results.await {
+                Ok(()) => {}
+                Err(err) => {
+                    try_to_forward_err_msg(
+                        session,
+                        err,
+                        &format!("{output_key_exp_clone}/{FAILURE_KEY_EXP}"),
+                        &node_id_clone,
+                    )
+                    .await;
+                }
+            }
         });
         Ok(())
     }
 
     async fn mark_parent_as_complete(&mut self, _parent_node_id: &str) -> bool {
         // For pod we only have one parent, thus execute the exit case
-        while let Some(result) = self.processing_tasks.join_next().await {}
+        while self.processing_tasks.join_next().await.is_some() {}
         true
     }
 
@@ -672,7 +727,7 @@ impl NodeProcessor for PodProcessor {
 /// This processor renames the `input_keys` from the input packet to the `output_keys` defined by the map
 struct MapperProcessor {
     mapper: Arc<Mapper>,
-    processing_tasks: JoinSet<Result<(), OrcaError>>,
+    processing_tasks: JoinSet<()>,
 }
 
 impl MapperProcessor {
@@ -692,17 +747,17 @@ impl NodeProcessor for MapperProcessor {
         node_id: &str,
         packet: &HashMap<String, PathSet>,
         session: Arc<zenoh::Session>,
-        output_key_exp: &str,
+        base_output_key_exp: &str,
         _namespace: &str,
         _namespace_lookup: &HashMap<String, PathBuf>,
     ) -> Result<()> {
         let mapping = self.mapper.mapping.clone();
         let packet_clone = packet.clone();
         let node_id_clone = node_id.to_owned();
-        let output_key_exp_clone = output_key_exp.to_owned();
+        let output_key_exp_clone = base_output_key_exp.to_owned();
 
         self.processing_tasks.spawn(async move {
-            let result = {
+            let result = async {
                 // Apply the mapping to the input packet
                 let output_map = mapping
                     .iter()
@@ -724,22 +779,18 @@ impl NodeProcessor for MapperProcessor {
                     .await
                     .context(selector::AgentCommunicationFailure {})?;
                 Ok::<(), OrcaError>(())
-            };
+            }
+            .await;
 
             if let Err(err) = result {
-                // If there was an error, we send it to the failure channel
-                session
-                    .put(
-                        format!("{output_key_exp_clone}/{FAILURE_KEY_EXP}"),
-                        serde_json::to_string(&ProcessingFailure {
-                            node_id: node_id_clone.clone(),
-                            error: err.to_string(),
-                        })?,
-                    )
-                    .await
-                    .context(selector::AgentCommunicationFailure {})?;
+                try_to_forward_err_msg(
+                    session,
+                    err,
+                    &format!("{output_key_exp_clone}/{FAILURE_KEY_EXP}"),
+                    &node_id_clone,
+                )
+                .await;
             }
-            Ok(())
         });
         Ok(())
     }
@@ -747,7 +798,7 @@ impl NodeProcessor for MapperProcessor {
     async fn mark_parent_as_complete(&mut self, _parent_node_id: &str) -> bool {
         // For mapper we only have one parent, thus execute the exit case
         while (self.processing_tasks.join_next().await).is_some() {
-            // Wait for all tasks to complete
+            // The only error that should be forwarded here is the failure to send the output packet
         }
 
         true
@@ -761,11 +812,12 @@ impl NodeProcessor for MapperProcessor {
 /// Processor for Joiner nodes
 /// This processor combines packets from multiple parent nodes into a single output packet
 /// It uses a cartesian product to combine packets from different parents
+#[derive(Debug)]
 struct JoinerProcessor {
     /// Cache for all packets received by the node
     input_packet_cache: HashMap<String, Vec<HashMap<String, PathSet>>>,
     completed_parents: Vec<String>,
-    processing_tasks: JoinSet<Result<(), OrcaError>>,
+    processing_tasks: JoinSet<()>,
 }
 
 impl JoinerProcessor {
@@ -807,7 +859,7 @@ impl NodeProcessor for JoinerProcessor {
         node_id: &str,
         packet: &HashMap<String, PathSet>,
         session: Arc<zenoh::Session>,
-        output_key_exp: &str,
+        base_output_key_exp: &str,
         _namespace: &str,
         _namespace_lookup: &HashMap<String, PathBuf>,
     ) -> Result<()> {
@@ -820,6 +872,7 @@ impl NodeProcessor for JoinerProcessor {
 
         // Check if we have all the other parents needed to compute the cartesian product
         if self.input_packet_cache.values().all(|v| !v.is_empty()) {
+            // Print we have all the parents
             // Get all the cached packets from other parents
             let other_parent_ids = self
                 .input_packet_cache
@@ -836,15 +889,16 @@ impl NodeProcessor for JoinerProcessor {
 
             // Compute the cartesian product of the factors
             let node_id_clone = node_id.to_owned();
-            let output_key_exp_clone = output_key_exp.to_owned();
+            let output_key_exp_clone = base_output_key_exp.to_owned();
 
             self.processing_tasks.spawn(async move {
                 // Convert Vec<Vec<HashMap<...>>> to Vec<&Vec<HashMap<...>>> for compute_cartesian_product
                 let cartesian_product = Self::compute_cartesian_product(&factors);
                 // Post all products to the output channel
+                let session_clone = Arc::clone(&session);
                 for output_packet in cartesian_product {
-                    let result = {
-                        session
+                    let result = async {
+                        session_clone
                             .put(
                                 format!("{output_key_exp_clone}/{SUCCESS_KEY_EXP}"),
                                 serde_json::to_string(&NodeOutput::Packet(
@@ -855,24 +909,20 @@ impl NodeProcessor for JoinerProcessor {
                             .await
                             .context(selector::AgentCommunicationFailure {})?;
                         Ok::<(), OrcaError>(())
-                    };
+                    }
+                    .await;
 
                     // If the result is an error, we will just send it to the error channel
                     if let Err(err) = result {
-                        session
-                            .put(
-                                format!("{output_key_exp_clone}/{FAILURE_KEY_EXP}"),
-                                serde_json::to_string(&ProcessingFailure {
-                                    node_id: node_id_clone.clone(),
-                                    error: err.to_string(),
-                                })?,
-                            )
-                            .await
-                            .context(selector::AgentCommunicationFailure {})?;
+                        try_to_forward_err_msg(
+                            Arc::clone(&session_clone),
+                            err,
+                            &output_key_exp_clone,
+                            &node_id_clone,
+                        )
+                        .await;
                     }
                 }
-
-                Ok(())
             });
         }
         Ok(())
@@ -904,70 +954,132 @@ impl NodeProcessor for JoinerProcessor {
 #[cfg(test)]
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 #[expect(clippy::panic_in_result_fn, reason = "Unit test")]
+/// This test 3 cases for the joiner node:
+/// The notation is as follows: (parent_id: data_file)
+/// 1. Insufficient parents: It should not output anything until all parents has produce a packet (0: [A] 1: [A] 2: []) -> No output
+/// 2. Sufficient parents: It should output a single packet with the cartesian product of the parents (0: [A] 1: [A] 2: [A]) -> Output: (0: A, 1: A, 2: A)
+/// 3. Additional packet after initial condition is met: It should output a new packet with the cartesian product of the parents (0: [A] 1: [A] 2: [A, B]) -> Output: (0: A, 1: A, 2: B)
+/// 4. Add an additional packet where more than 1 packet will be generated: (0: [A, B] 1: [A] 2: [A, B]) -> Output: (0: B, 1: A, 2: A), (0: B, 1: A, 2: B),
 async fn joiner() -> Result<()> {
-    // let parent_ids = vec!["0".to_owned(), "1".to_owned(), "2".to_owned()];
-
-    // let mut joiner_process = JoinerProcessor::new(parent_ids);
-
-    // // Make each parent has 1 packet
-    // for idx in 0..2 {
-    //     let packet = make_test_packet(format!("data_{idx}.txt").into());
-    //     joiner_process.process_packet(idx, "joiner", packet, session, output_key_exp, namespace, namespace_lookup);
-    // }
-
-    // // Confirm that there should be no output yet
-
-    // // Now we send the missing parent package
-    // // This will yield one unique combination
-    // joiner_process
-    //     .process_packet("2", make_test_packet("data_1.txt".to_owned().into()))
-    //     .await?;
-
-    // // Confirm that the output is sent to the child channel
-    // assert!(
-    //     child_rx.len() == 1,
-    //     "Should have only one message in the channel",
-    // );
-    // assert!(
-    //     child_rx.recv().await.is_some(),
-    //     "Should have received a message"
-    // );
-
-    // // Insert another one
-    // joiner_process
-    //     .process_packet("2", make_test_packet("data_2.txt".to_owned().into()))
-    //     .await?;
-
-    // // The joiner node should send another one
-    // assert!(
-    //     child_rx.len() == 1,
-    //     "Should have only one message in the channel",
-    // );
-    // assert!(
-    //     child_rx.recv().await.is_some(),
-    //     "Should have received a message"
-    // );
-
-    // // Now insert to packet for parent 0, which should yield 2 packets in total
-    // // This is because of the cartesian product
-    // joiner_process
-    //     .process_packet("0", make_test_packet("data_2.txt".to_owned().into()))
-    //     .await?;
-
-    // assert!(
-    //     child_rx.len() == 2,
-    //     "Should have only two messages in the channel",
-    // );
-    // assert!(
-    //     child_rx.recv().await.is_some(),
-    //     "Should have received a message"
-    // );
+    use std::{thread::sleep, time::Duration};
+
+    let parent_ids = vec!["0".to_owned(), "1".to_owned(), "2".to_owned()];
+
+    let mut joiner_processor = JoinerProcessor::new(parent_ids);
+    let session = Arc::new(
+        zenoh::open(zenoh::Config::default())
+            .await
+            .context(selector::AgentCommunicationFailure {})?,
+    );
+
+    let base_output_key_exp = "joiner_unit_test".to_owned();
+
+    // Create a buffer and a listener for the output channel
+    let success_msg = Arc::new(Mutex::new(Vec::new()));
+    let success_sub = session
+        .declare_subscriber(format!("{base_output_key_exp}/{SUCCESS_KEY_EXP}"))
+        .await
+        .context(selector::AgentCommunicationFailure {})?;
+
+    // Create the async test to receive messages from the output channel
+    let mut listener_task = JoinSet::new();
+    let success_msg_clone = Arc::clone(&success_msg);
+    listener_task.spawn(async move {
+        while let Ok(msg) = success_sub.recv() {
+            success_msg_clone.lock().await.push(msg);
+        }
+    });
+
+    // Make each parent has 1 packet
+    for idx in 0..2 {
+        let packet = make_test_packet(format!("key_{idx}"), "data_A.txt".to_string().into());
+        joiner_processor.process_packet(
+            &format!("{idx}"),
+            &idx.to_string(),
+            &packet,
+            Arc::clone(&session),
+            &base_output_key_exp,
+            "",
+            &HashMap::new(),
+        )?;
+    }
+
+    // Confirm that there should be no output yet
+    assert!(
+        success_msg.lock().await.is_empty(),
+        "Should have no messages in the channel",
+    );
+
+    // Now we send the missing parent package
+    // This will yield one unique combination
+    let packet_2_a = make_test_packet("key_2".to_owned(), "data_A.txt".to_owned().into());
+    joiner_processor.process_packet(
+        "2",
+        "2",
+        &packet_2_a,
+        Arc::clone(&session),
+        &base_output_key_exp,
+        "",
+        &HashMap::new(),
+    )?;
+
+    // Wait for the joiner to process and the listener to process the message
+    sleep(Duration::from_millis(100));
+
+    // Confirm that the output is sent to the child channel
+    assert_eq!(
+        success_msg.lock().await.len(),
+        1,
+        "Should have only one message in the channel",
+    );
+
+    let packet_2_b = make_test_packet("key_2".to_owned(), "data_B.txt".to_owned().into());
+    joiner_processor.process_packet(
+        "2",
+        "2",
+        &packet_2_b,
+        Arc::clone(&session),
+        &base_output_key_exp,
+        "",
+        &HashMap::new(),
+    )?;
+
+    // Wait for the joiner to process and the listener to process the message
+    sleep(Duration::from_millis(100));
+
+    // The joiner node should send another one
+    assert_eq!(
+        success_msg.lock().await.len(),
+        2,
+        "Should have only two messages in the channel",
+    );
+
+    let packet_0_b = make_test_packet("key_0".to_owned(), "data_B.txt".to_owned().into());
+    joiner_processor.process_packet(
+        "0",
+        "0",
+        &packet_0_b,
+        Arc::clone(&session),
+        &base_output_key_exp,
+        "",
+        &HashMap::new(),
+    )?;
+
+    // Wait for the joiner to process and the listener to process the message
+    sleep(Duration::from_millis(100));
+
+    // Should be a total of 6 messages in the channel
+    assert_eq!(
+        success_msg.lock().await.len(),
+        4,
+        "Should have 4 messages in the channel",
+    );
 
     Ok(())
 }
 
 #[cfg(test)]
-fn make_test_packet(path: PathBuf) -> HashMap<String, PathSet> {
+fn make_test_packet(key: String, path: PathBuf) -> HashMap<String, PathSet> {
     use crate::uniffi::model::{Blob, BlobKind};
 
     let path_set = PathSet::Unary(Blob {
@@ -979,5 +1091,5 @@ fn make_test_packet(path: PathBuf) -> HashMap<String, PathSet> {
         checksum: String::new(),
     });
 
-    HashMap::from([("key".to_owned(), path_set)])
+    HashMap::from([(key, path_set)])
 }

From 4ed76a8810c2c08193d75c146332de9eb717d6d5 Mon Sep 17 00:00:00 2001
From: Synicix <synicix@gmail.com>
Date: Wed, 23 Jul 2025 21:46:19 +0000
Subject: [PATCH 23/29] Readd gpu

---
 .devcontainer/devcontainer.json | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index 95a1a1ae..2fe4edcb 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -21,6 +21,7 @@
 	},
 	"runArgs": [
 		"--name=${localWorkspaceFolderBasename}_devcontainer",
+		"--gpus=all",
 		"--privileged",
 		"--cgroupns=host"
 	],

From f6bd697ff3a23c57128e42482ac06fa6d715a590 Mon Sep 17 00:00:00 2001
From: Synicix <synicix@gmail.com>
Date: Wed, 23 Jul 2025 22:07:06 +0000
Subject: [PATCH 24/29] Update comments

---
 src/uniffi/pipeline_runner/runner.rs | 51 ++++++++++++++++------------
 tests/pipeline_runner.rs             |  1 -
 2 files changed, 29 insertions(+), 23 deletions(-)

diff --git a/src/uniffi/pipeline_runner/runner.rs b/src/uniffi/pipeline_runner/runner.rs
index 37122c4a..535b54d6 100644
--- a/src/uniffi/pipeline_runner/runner.rs
+++ b/src/uniffi/pipeline_runner/runner.rs
@@ -7,9 +7,8 @@ use crate::{
     },
 };
 use async_trait::async_trait;
-use derive_more::derive;
 use itertools::Itertools as _;
-use serde::{Deserialize, Serialize, de::value};
+use serde::{Deserialize, Serialize};
 use serde_yaml::Serializer;
 use snafu::{OptionExt as _, ResultExt as _};
 use std::{
@@ -41,6 +40,8 @@ struct ProcessingFailure {
     error: String,
 }
 
+/// Internal representation of a pipeline run, this should not be made public due to the fact that it contains
+/// internal states and tasks
 #[expect(
     clippy::type_complexity,
     reason = "too complex, but necessary for async handling"
@@ -72,16 +73,8 @@ impl Display for PipelineRun {
         write!(f, "PipelineRun({})", self.pipeline_job.hash)
     }
 }
-/**
- * Runner for pipelines
- *
- * General Algorithm:
- * 1. All nodes receive inputs via a MPSC channel, where parents nodes will send their output packets
- * 2. There are two "functional nodes processor" in the pipeline,
- *    which is the `input_node` and `output_node`
- * 3. Each node will process the inputs its receives and will only send it children input channels
- *    if they are successfully processed. Failures are just printed for now (Will be replaced by logging)
- */
+
+/// Runner that uses a docker agent to run pipelines
 #[derive(Default)]
 pub struct DockerPipelineRunner {
     /// User label on which group of agents this runner is associated with
@@ -91,14 +84,13 @@ pub struct DockerPipelineRunner {
     pipeline_runs: HashMap<String, PipelineRun>,
 }
 
-/**
- * This is an implementation of a pipeline runner that uses Zenoh to communicate between the tasks
- * The runtime is tokio
- *
- * These are the key expressions of the components of the pipeline:
- * - Input Node: `pipeline_job_hash/input_node/outputs` (This is where the `pipeline_job` packets get fed to)
- * - Nodes: `pipeline_job_hash/node_id/outputs/(success|failure)` (This is where the node outputs are sent to)
-*/
+/// This is an implementation of a pipeline runner that uses Zenoh to communicate between the tasks
+/// The runtime is tokio
+///
+/// These are the key expressions of the components of the pipeline:
+/// Input Node: `pipeline_job_hash/input_node/outputs` (This is where the `pipeline_job` packets get fed to)
+/// Nodes: `pipeline_job_hash/node_id/outputs/(success|failure)` (This is where the node outputs are sent to)
+///
 impl DockerPipelineRunner {
     /// Create a new Docker pipeline runner
     /// # Errors
@@ -111,6 +103,12 @@ impl DockerPipelineRunner {
         })
     }
 
+    /// Will start a new pipeline run with the given `PipelineJob`
+    /// This will start the async tasks for each node in the pipeline
+    /// including the one that captures the outputs from the leaf nodes
+    ///
+    /// Upon receiving the ready message from all the nodes, it will send the input packets to the input node
+    ///
     /// # Errors
     /// Will error out if the pipeline job fails to start
     pub async fn start(
@@ -178,10 +176,10 @@ impl DockerPipelineRunner {
                 ));
         }
 
+        // Wait for all nodes to be ready before sending inputs
         let num_of_nodes = graph.node_count();
         let mut ready_nodes = 0;
 
-        // Wait for all nodes to be ready before sending inputs
         while (subscriber.recv_async().await).is_ok() {
             // Message is empty, just increment the counter
             ready_nodes += 1;
@@ -224,6 +222,7 @@ impl DockerPipelineRunner {
         self.pipeline_runs
             .insert(pipeline_run_id.clone(), pipeline_run);
 
+        // Return the pipeline run id
         Ok(pipeline_run_id)
     }
 
@@ -263,6 +262,9 @@ impl DockerPipelineRunner {
     }
 
     /// Stop the pipeline run and all its tasks
+    /// This will send a stop message to a channel that all node manager task are subscribed to.
+    /// Upon receiving the stop message, each node manager will force abort all of its task and exit.
+    ///
     /// # Errors
     /// Will error out if the pipeline run is not found or if any of the tasks fail to stop correctly
     pub async fn stop(&mut self, pipeline_run_id: &str) -> Result<()> {
@@ -294,6 +296,7 @@ impl DockerPipelineRunner {
         Ok(())
     }
 
+    /// This will capture the outputs of the given nodes and store it in the `outputs` map
     #[expect(clippy::type_complexity, reason = "Needed for async")]
     async fn create_capture_task_for_node(
         node_id: String,
@@ -335,6 +338,7 @@ impl DockerPipelineRunner {
     /// - Create the zenoh session
     /// - Create a join set to spawn and handle incoming messages tasks
     /// - Create a subscriber for each of the parent nodes (Should only be 1, unless it is a joiner node)
+    /// - Create an abort listener task that will listen for stop requests
     /// - For each subscriber, handle the incoming message appropriately
     ///
     /// # Errors
@@ -445,6 +449,7 @@ impl DockerPipelineRunner {
         Ok(())
     }
 
+    /// This is the actual handler for incoming messages for the node
     async fn start_async_processor_task(
         subscriber: Subscriber<FifoChannelHandler<Sample>>,
         node_processor: Arc<Mutex<Box<dyn NodeProcessor>>>,
@@ -519,6 +524,7 @@ impl DockerPipelineRunner {
         Ok::<(), OrcaError>(())
     }
 
+    /// This task will listen for stop requests on the given key expression
     async fn start_stop_request_task(
         node_processor: Arc<Mutex<Box<dyn NodeProcessor>>>,
         base_key_exp: String,
@@ -992,7 +998,7 @@ async fn joiner() -> Result<()> {
 
     // Make each parent has 1 packet
     for idx in 0..2 {
-        let packet = make_test_packet(format!("key_{idx}"), "data_A.txt".to_string().into());
+        let packet = make_test_packet(format!("key_{idx}"), "data_A.txt".to_owned().into());
         joiner_processor.process_packet(
             &format!("{idx}"),
             &idx.to_string(),
@@ -1078,6 +1084,7 @@ async fn joiner() -> Result<()> {
     Ok(())
 }
 
+/// Helper function to create a test packet with a given key and path
 #[cfg(test)]
 fn make_test_packet(key: String, path: PathBuf) -> HashMap<String, PathSet> {
     use crate::uniffi::model::{Blob, BlobKind};
diff --git a/tests/pipeline_runner.rs b/tests/pipeline_runner.rs
index 1dd90f58..2c0ee628 100644
--- a/tests/pipeline_runner.rs
+++ b/tests/pipeline_runner.rs
@@ -37,7 +37,6 @@ async fn basic_run() -> Result<()> {
         pipeline_result.output_packets.len() == 1,
         "Expected exactly one output packet."
     );
-    println!("{:?}", pipeline_result.output_packets);
 
     Ok(())
 }

From 16e0cbbf8ca5f5923b85f9c52ef26f6da48df564 Mon Sep 17 00:00:00 2001
From: Synicix <synicix@gmail.com>
Date: Sat, 26 Jul 2025 07:06:15 +0000
Subject: [PATCH 25/29] Save progress

---
 src/core/error.rs                    |   5 +
 src/uniffi/error.rs                  |  31 ++++
 src/uniffi/model.rs                  |  89 ++++++++++-
 src/uniffi/orchestrator/agent.rs     |  28 +++-
 src/uniffi/orchestrator/docker.rs    |   2 +-
 src/uniffi/pipeline_runner/runner.rs | 231 ++++++++++++++++++++-------
 6 files changed, 312 insertions(+), 74 deletions(-)

diff --git a/src/core/error.rs b/src/core/error.rs
index cffb538c..9be7405b 100644
--- a/src/core/error.rs
+++ b/src/core/error.rs
@@ -116,6 +116,7 @@ impl fmt::Debug for OrcaError {
             | Kind::FailedToParseDot { backtrace, .. }
             | Kind::GeneratedNamesOverflow { backtrace, .. }
             | Kind::InvalidFilepath { backtrace, .. }
+            | Kind::InvalidIndex { backtrace, .. }
             | Kind::InvalidPodResultTerminatedDatetime { backtrace, .. }
             | Kind::KeyMissing { backtrace, .. }
             | Kind::NoAnnotationFound { backtrace, .. }
@@ -124,6 +125,10 @@ impl fmt::Debug for OrcaError {
             | Kind::NoMatchingPodRun { backtrace, .. }
             | Kind::NoRemainingServices { backtrace, .. }
             | Kind::NoTagFoundInContainerAltImage { backtrace, .. }
+            | Kind::PodJobSubmissionFailed { backtrace, .. }
+            | Kind::PodJobProcessingError { backtrace, .. }
+            | Kind::StatusConversionFailure { backtrace, .. }
+            | Kind::UnsupportedPathType { backtrace, .. }
             | Kind::BollardError { backtrace, .. }
             | Kind::ChannelReceiveError { backtrace, .. }
             | Kind::GlobPatternError { backtrace, .. }
diff --git a/src/uniffi/error.rs b/src/uniffi/error.rs
index e94159f4..355e723d 100644
--- a/src/uniffi/error.rs
+++ b/src/uniffi/error.rs
@@ -18,6 +18,8 @@ use tokio::sync::oneshot;
 use tokio::task;
 use uniffi;
 
+use crate::uniffi::orchestrator::Status;
+
 /// Shorthand for a Result that returns an `OrcaError`.
 pub type Result<T, E = OrcaError> = result::Result<T, E>;
 /// Possible errors you may encounter.
@@ -51,6 +53,11 @@ pub(crate) enum Kind {
         source: io::Error,
         backtrace: Option<Backtrace>,
     },
+    #[snafu(display("Failed to get items at idx {idx}."))]
+    InvalidIndex {
+        idx: usize,
+        backtrace: Option<Backtrace>,
+    },
     #[snafu(display(
         "An invalid datetime was set for pod result for pod job (hash: {pod_job_hash})."
     ))]
@@ -90,11 +97,35 @@ pub(crate) enum Kind {
         path: PathBuf,
         backtrace: Option<Backtrace>,
     },
+    #[snafu(display("Pod job {hash} failed to process with reason: {reason}."))]
+    PodJobProcessingError {
+        hash: String,
+        reason: String,
+        backtrace: Option<Backtrace>,
+    },
+    #[snafu(display(
+        "Failed to convert status {status:?} to PodResultStatus with reason: {reason}."
+    ))]
+    StatusConversionFailure {
+        status: Status,
+        reason: String,
+        backtrace: Option<Backtrace>,
+    },
+    #[snafu(display("Unsupported path type: {path:?}."))]
+    UnsupportedPathType {
+        path: PathBuf,
+        backtrace: Option<Backtrace>,
+    },
     #[snafu(display("Failed to send message because: {reason}"))]
     SendError {
         reason: String,
         backtrace: Option<Backtrace>,
     },
+    #[snafu(display("Pod job submission failed with reason: {reason}."))]
+    PodJobSubmissionFailed {
+        reason: String,
+        backtrace: Option<Backtrace>,
+    },
     #[snafu(transparent)]
     BollardError {
         source: BollardError,
diff --git a/src/uniffi/model.rs b/src/uniffi/model.rs
index b2c7dca7..1a8ad1a6 100644
--- a/src/uniffi/model.rs
+++ b/src/uniffi/model.rs
@@ -1,12 +1,16 @@
 use crate::{
     core::{
-        crypto::{hash_blob, hash_buffer},
+        crypto::{hash_blob, hash_buffer, hash_dir, hash_file},
         model::{
             deserialize_pod, deserialize_pod_job, serialize_hashmap, serialize_hashmap_option,
             to_yaml,
         },
+        util::get,
+    },
+    uniffi::{
+        error::{Kind, OrcaError, Result},
+        orchestrator::Status,
     },
-    uniffi::{error::Result, orchestrator::Status},
 };
 use derive_more::Display;
 use getset::CloneGetters;
@@ -183,8 +187,85 @@ impl PodJob {
             ..pod_job_no_hash
         })
     }
+
+    /// Util function to get the `output_packet` from a given `pod_job`, assuming it results already computed
+    /// # Errors
+    /// Will return `Err` if the output packet cannot be constructed, e.g. if the pod job has not been run yet or the output directory is not set.
+    pub fn get_output_packet(
+        &self,
+        namespace_lookup: &HashMap<String, PathBuf>,
+    ) -> Result<HashMap<String, PathSet>> {
+        self.pod
+            .output_spec
+            .iter()
+            .map(|(key, value)| {
+                // Construct the full path and figure out if it is a file or directory
+                let namespace_path = get(namespace_lookup, &self.output_dir.namespace)?;
+                let rel_path = self.output_dir.path.join(&value.path);
+                let abs_path = namespace_path.join(&rel_path);
+
+                // Check if if it is a file or directory
+                let path_set = if abs_path.is_file() {
+                    PathSet::Unary(Blob {
+                        kind: BlobKind::File,
+                        location: URI {
+                            namespace: self.output_dir.namespace.clone(),
+                            path: rel_path,
+                        },
+                        checksum: hash_file(&abs_path)?,
+                    })
+                } else if abs_path.is_dir() {
+                    PathSet::Unary(Blob {
+                        kind: BlobKind::Directory,
+                        location: URI {
+                            namespace: self.output_dir.namespace.clone(),
+                            path: rel_path,
+                        },
+                        checksum: hash_dir(&abs_path)?,
+                    })
+                } else {
+                    return Err(OrcaError {
+                        kind: Kind::UnsupportedPathType {
+                            path: abs_path,
+                            backtrace: Some(snafu::Backtrace::capture()),
+                        },
+                    });
+                };
+                Ok((key.clone(), path_set))
+            })
+            .collect::<Result<_>>()
+    }
 }
 
+#[derive(uniffi::Enum, Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)]
+/// Status of a pod result.
+pub enum PodResultStatus {
+    /// Pod Job completed successfully.
+    Completed,
+    /// Pod Job failed with an exit code.
+    Failed(i16),
+    /// Mainly used for default values, not a valid status.
+    #[default]
+    Unset,
+}
+
+impl TryFrom<Status> for PodResultStatus {
+    type Error = OrcaError;
+
+    fn try_from(status: Status) -> Result<Self, Self::Error> {
+        match status {
+            Status::Completed => Ok(Self::Completed),
+            Status::Failed(code) => Ok(Self::Failed(code)),
+            Status::Running | Status::Unset => Err(OrcaError {
+                kind: Kind::StatusConversionFailure {
+                    status,
+                    reason: "Cannot convert Running or Unset status to PodResultStatus".to_owned(),
+                    backtrace: Some(snafu::Backtrace::capture()),
+                },
+            }),
+        }
+    }
+}
 /// Result from a compute job run.
 #[derive(uniffi::Record, Serialize, Deserialize, Debug, Clone, PartialEq, Default)]
 pub struct PodResult {
@@ -199,7 +280,7 @@ pub struct PodResult {
     /// Name given by orchestrator.
     pub assigned_name: String,
     /// Status of compute run when terminated.
-    pub status: Status,
+    pub status: PodResultStatus,
     /// Time in epoch when created in seconds.
     pub created: u64,
     /// Time in epoch when terminated in seconds.
@@ -216,7 +297,7 @@ impl PodResult {
         annotation: Option<Annotation>,
         pod_job: Arc<PodJob>,
         assigned_name: String,
-        status: Status,
+        status: PodResultStatus,
         created: u64,
         terminated: u64,
     ) -> Result<Self> {
diff --git a/src/uniffi/orchestrator/agent.rs b/src/uniffi/orchestrator/agent.rs
index bcf48fcc..b99ddea5 100644
--- a/src/uniffi/orchestrator/agent.rs
+++ b/src/uniffi/orchestrator/agent.rs
@@ -1,9 +1,9 @@
 use crate::{
     core::orchestrator::agent::{EventPayload, start_service},
     uniffi::{
-        error::{OrcaError, Result, selector},
-        model::{PodJob, PodResult},
-        orchestrator::{Orchestrator, Status, docker::LocalDockerOrchestrator},
+        error::{Kind, OrcaError, Result, selector},
+        model::{PodJob, PodResult, PodResultStatus},
+        orchestrator::{Orchestrator, docker::LocalDockerOrchestrator},
         store::{Store as _, filestore::LocalFileStore},
     },
 };
@@ -13,7 +13,7 @@ use futures_util::future::join_all;
 use getset::CloneGetters;
 use serde_json::Value;
 use snafu::{OptionExt as _, ResultExt as _};
-use std::{collections::HashMap, path::PathBuf, sync::Arc};
+use std::{backtrace::Backtrace, collections::HashMap, path::PathBuf, sync::Arc};
 use tokio::task::JoinSet;
 use uniffi;
 use zenoh;
@@ -152,20 +152,33 @@ impl Agent {
             namespace_lookup.clone(),
             |pod_job: &PodJob| EventPayload::Request(pod_job.clone()),
             async |agent, inner_namespace_lookup, _, pod_job| {
+                println!("Processing pod job: {}", pod_job.hash);
                 let pod_run = agent
                     .orchestrator
                     .start(&inner_namespace_lookup, &pod_job)
-                    .await?;
+                    .await
+                    .unwrap();
                 let pod_result = agent.orchestrator.get_result(&pod_run).await?;
                 agent.orchestrator.delete(&pod_run).await?;
                 Ok(pod_result)
             },
             async |client, pod_result| {
                 let response_topic = match &pod_result.status {
-                    Status::Completed => &format!("success/pod_job/{}", pod_result.pod_job.hash),
-                    Status::Running | Status::Failed(_) | Status::Unset => {
+                    PodResultStatus::Completed => {
+                        &format!("success/pod_job/{}", pod_result.pod_job.hash)
+                    }
+                    PodResultStatus::Failed(_) => {
                         &format!("failure/pod_job/{}", pod_result.pod_job.hash)
                     }
+                    PodResultStatus::Unset => {
+                        return Err(OrcaError {
+                            kind: Kind::PodJobProcessingError {
+                                hash: pod_result.pod_job.hash.clone(),
+                                reason: "PodResultStatus should not be unset".to_owned(),
+                                backtrace: Some(Backtrace::capture()),
+                            },
+                        });
+                    }
                 };
                 client.publish(response_topic, &pod_result).await
             },
@@ -197,6 +210,7 @@ impl Agent {
                 async |_, ()| Ok(()),
             ));
         }
+
         services
             .join_next()
             .await
diff --git a/src/uniffi/orchestrator/docker.rs b/src/uniffi/orchestrator/docker.rs
index ff3790ad..38a8e248 100644
--- a/src/uniffi/orchestrator/docker.rs
+++ b/src/uniffi/orchestrator/docker.rs
@@ -223,7 +223,7 @@ impl Orchestrator for LocalDockerOrchestrator {
             None,
             Arc::clone(&pod_run.pod_job),
             pod_run.assigned_name.clone(),
-            result_info.status,
+            result_info.status.try_into()?,
             result_info.created,
             result_info
                 .terminated
diff --git a/src/uniffi/pipeline_runner/runner.rs b/src/uniffi/pipeline_runner/runner.rs
index 535b54d6..6cba4389 100644
--- a/src/uniffi/pipeline_runner/runner.rs
+++ b/src/uniffi/pipeline_runner/runner.rs
@@ -1,8 +1,12 @@
 use crate::{
     core::{crypto::hash_buffer, model::serialize_hashmap, util::get},
     uniffi::{
-        error::{OrcaError, Result, selector},
-        model::{PathSet, Pod, PodJob, URI},
+        error::{Kind, OrcaError, Result, selector},
+        model::{PathSet, Pod, PodJob, PodResult, PodResultStatus, URI},
+        orchestrator::{
+            agent::{Agent, AgentClient, Response},
+            docker::LocalDockerOrchestrator,
+        },
         pipeline::{Kernel, Mapper, Node, Pipeline, PipelineJob, PipelineResult},
     },
 };
@@ -52,6 +56,8 @@ struct PipelineRun {
     pipeline_job: PipelineJob, // The pipeline job that this run is associated with
     node_tasks: JoinSet<Result<()>>, // JoinSet of tasks for each node in the pipeline
     outputs: Arc<RwLock<HashMap<String, Vec<HashMap<String, PathSet>>>>>, // String is the node key, while hash
+    orchestrator_agent: Arc<Agent>, // This is placed in pipeline due to the current design requiring a namespace to operate on
+    orchestrator_agent_task: JoinSet<Result<()>>, // JoinSet of tasks for the orchestrator agent
 }
 
 impl PartialEq for PipelineRun {
@@ -75,7 +81,6 @@ impl Display for PipelineRun {
 }
 
 /// Runner that uses a docker agent to run pipelines
-#[derive(Default)]
 pub struct DockerPipelineRunner {
     /// User label on which group of agents this runner is associated with
     pub group: String,
@@ -96,9 +101,10 @@ impl DockerPipelineRunner {
     /// # Errors
     /// Will error out if the environment variable `HOSTNAME` is not set
     pub fn new(group: String) -> Result<Self> {
+        let host = hostname::get()?.to_string_lossy().to_string();
         Ok(Self {
             group,
-            host: hostname::get()?.to_string_lossy().to_string(),
+            host,
             pipeline_runs: HashMap::new(),
         })
     }
@@ -117,13 +123,31 @@ impl DockerPipelineRunner {
         namespace: &str, // Name space to save pod_results to
         namespace_lookup: &HashMap<String, PathBuf>,
     ) -> Result<String> {
+        // Create the orchestrator
+        let orchestrator_agent = Agent::new(
+            self.group.clone(),
+            self.host.clone(),
+            LocalDockerOrchestrator::new()?.into(),
+        )?;
+
         // Create a new pipeline run
         let mut pipeline_run = PipelineRun {
             pipeline_job,
             outputs: Arc::new(RwLock::new(HashMap::new())),
             node_tasks: JoinSet::new(),
+            orchestrator_agent: orchestrator_agent.into(),
+            orchestrator_agent_task: JoinSet::new(),
         };
 
+        let orchestrator_agent_clone = Arc::clone(&pipeline_run.orchestrator_agent);
+        let namespace_lookup_clone = namespace_lookup.clone();
+        // Start the orchestrator agent service
+        pipeline_run.orchestrator_agent_task.spawn(async move {
+            orchestrator_agent_clone
+                .start(&namespace_lookup_clone, None)
+                .await
+        });
+
         // The id for the pipeline_run is the pipeline_job hash
         let pipeline_run_id = pipeline_run.pipeline_job.hash.clone();
 
@@ -155,6 +179,7 @@ impl DockerPipelineRunner {
                     namespace.to_owned(),
                     namespace_lookup.clone(),
                     Arc::clone(&session),
+                    Arc::clone(&pipeline_run.orchestrator_agent.client),
                 ));
         }
 
@@ -163,7 +188,7 @@ impl DockerPipelineRunner {
         for node in pipeline_run.pipeline_job.pipeline.get_leaf_nodes() {
             pipeline_run
                 .node_tasks
-                .spawn(Self::create_capture_task_for_node(
+                .spawn(Self::create_output_capture_task_for_node(
                     node.id.clone(),
                     Arc::clone(&pipeline_run.outputs),
                     Arc::clone(&session),
@@ -298,7 +323,7 @@ impl DockerPipelineRunner {
 
     /// This will capture the outputs of the given nodes and store it in the `outputs` map
     #[expect(clippy::type_complexity, reason = "Needed for async")]
-    async fn create_capture_task_for_node(
+    async fn create_output_capture_task_for_node(
         node_id: String,
         outputs: Arc<RwLock<HashMap<String, Vec<HashMap<String, PathSet>>>>>,
         session: Arc<zenoh::Session>,
@@ -350,11 +375,12 @@ impl DockerPipelineRunner {
         namespace: String,
         namespace_lookup: HashMap<String, PathBuf>,
         session: Arc<zenoh::Session>,
+        client: Arc<AgentClient>,
     ) -> Result<()> {
         // Create the correct processor for the node based on the kernel type
         let node_processor: Arc<Mutex<Box<dyn NodeProcessor>>> = Arc::new(Mutex::new(
             match get(&pipeline.kernel_lut, &node.kernel_hash)? {
-                Kernel::Pod(pod) => Box::new(PodProcessor::new(Arc::clone(pod))),
+                Kernel::Pod(pod) => Box::new(PodProcessor::new(Arc::clone(pod), client)),
                 Kernel::Mapper(mapper) => Box::new(MapperProcessor::new(Arc::clone(mapper))),
                 Kernel::Joiner => {
                     // Need to get the parent node id for this joiner node
@@ -614,57 +640,50 @@ async fn try_to_forward_err_msg(
 struct PodProcessor {
     pod: Arc<Pod>,
     processing_tasks: JoinSet<()>,
+    client: Arc<AgentClient>,
 }
 
 impl PodProcessor {
-    fn new(pod: Arc<Pod>) -> Self {
+    fn new(pod: Arc<Pod>, client: Arc<AgentClient>) -> Self {
         Self {
             pod,
             processing_tasks: JoinSet::new(),
+            client,
         }
     }
-}
 
-#[async_trait]
-impl NodeProcessor for PodProcessor {
-    #[expect(
-        clippy::unwrap_used,
-        clippy::unwrap_in_result,
-        reason = "Hard code for now, will be replaced by agent"
-    )]
-    fn process_packet(
-        &mut self,
-        _sender_node_id: &str,
-        node_id: &str,
-        packet: &HashMap<String, PathSet>,
+    /// Will handle the creation of the pod job, submission to the agent, listening for completion, and extracting the `output_packet` if successful
+    async fn start_pod_job_task(
+        node_id: String,
+        pod: Arc<Pod>,
+        packet: HashMap<String, PathSet>,
+        client: Arc<AgentClient>,
         session: Arc<zenoh::Session>,
-        base_output_key_exp: &str,
-        namespace: &str,
+        base_output_key_exp: String,
+        namespace: String,
         namespace_lookup: &HashMap<String, PathBuf>,
     ) -> Result<()> {
-        // We need a unique hash for this given input packet process by the node
-        // therefore we need to generate a hash that has the pod_id + input_packet
+        // For now we will just send the input_packet to the success channel
         let node_id_bytes = node_id.as_bytes().to_vec();
-        let packet_copy = packet.clone();
         let input_packet_hash = {
             let mut buf = node_id_bytes;
             let mut serializer = Serializer::new(&mut buf);
-            serialize_hashmap(&packet_copy, &mut serializer)?;
+            serialize_hashmap(&packet, &mut serializer)?;
             hash_buffer(buf)
         };
         let output_dir = URI {
-            namespace: namespace.to_owned(),
-            path: PathBuf::from(format!("pod_runs/{}/{}", self.pod.hash, input_packet_hash)),
+            namespace: namespace.clone(),
+            path: PathBuf::from(format!("pod_runs/{node_id}/{input_packet_hash}")),
         };
 
-        let cpu_limit = self.pod.recommended_cpus;
-        let memory_limit = self.pod.recommended_memory;
+        let cpu_limit = pod.recommended_cpus;
+        let memory_limit = pod.recommended_memory;
 
         // Create the pod job
         let pod_job = PodJob::new(
             None,
-            Arc::clone(&self.pod),
-            packet.clone(),
+            Arc::clone(&pod),
+            packet,
             output_dir,
             cpu_limit,
             memory_limit,
@@ -672,44 +691,133 @@ impl NodeProcessor for PodProcessor {
             namespace_lookup,
         )?;
 
-        // Simulate pod execution by just printing out pod_job_hash and pod hash
-        // This will be replaced by sending the pod_job to the orchestrator via the agent
+        // Create listener for pod_job
+        let target_key_exp = format!(
+            "group/{}/{}/*/pod_job/{}",
+            client.group, client.host, pod_job.hash
+        );
 
-        // Build the output_packet, in reality, this will be extracted from the pod_result
+        // Create the subscriber
+        let pod_job_subscriber = session
+            .declare_subscriber(target_key_exp)
+            .await
+            .context(selector::AgentCommunicationFailure {})?;
 
-        let output_packet = self
-            .pod
-            .output_spec
-            .keys()
-            .map(|output_key| (output_key.clone(), packet.values().next().cloned().unwrap()))
-            .collect::<HashMap<_, _>>();
+        // Create the async task to listen for the pod job completion
+        let pod_job_listener_task = tokio::spawn(async move {
+            // Wait for the pod job to complete and extract the result
+
+            let sample = pod_job_subscriber
+                .recv_async()
+                .await
+                .context(selector::AgentCommunicationFailure {})?;
+            // Extract the pod_result from the payload
+            let pod_result: PodResult = serde_json::from_slice(&sample.payload().to_bytes())?;
+            Ok::<_, OrcaError>(pod_result)
+        });
+
+        // Submit it to the client and get the response to make sure it was successful
+        let responses = client.submit_pod_jobs(vec![pod_job.into()]).await;
+        let response = responses
+            .first()
+            .context(selector::InvalidIndex { idx: 0_usize })?;
+
+        match response {
+            Response::Ok => (),
+            Response::Err(err) => {
+                return Err(OrcaError {
+                    kind: Kind::PodJobSubmissionFailed {
+                        reason: err.clone(),
+                        backtrace: Some(snafu::Backtrace::capture()),
+                    },
+                });
+            }
+        }
+
+        // Get the pod result from the listener task
+        let pod_result = pod_job_listener_task.await??;
+        // Get the output packet for the pod result
+        let output_packet = match pod_result.status {
+            PodResultStatus::Completed => {
+                // Get the output packet
+                pod_result.pod_job.get_output_packet(namespace_lookup)?
+            }
+            PodResultStatus::Failed(exit_code) => {
+                // Processing failed, thus return the error
+                return Err(OrcaError {
+                    kind: Kind::PodJobProcessingError {
+                        hash: pod_result.pod_job.hash.clone(),
+                        reason: format!("Pod processing failed with exit code {exit_code}"),
+                        backtrace: Some(snafu::Backtrace::capture()),
+                    },
+                });
+            }
+            PodResultStatus::Unset => {
+                // This should not happen, but if it does, we will return an error
+                return Err(OrcaError {
+                    kind: Kind::PodJobProcessingError {
+                        hash: pod_result.pod_job.hash.clone(),
+                        reason: "Pod processing status is unset".to_owned(),
+                        backtrace: Some(snafu::Backtrace::capture()),
+                    },
+                });
+            }
+        };
+
+        session
+            .put(
+                base_output_key_exp.clone() + "/" + SUCCESS_KEY_EXP,
+                serde_json::to_string(&NodeOutput::Packet(node_id.clone(), output_packet))?,
+            )
+            .await
+            .context(selector::AgentCommunicationFailure {})?;
+        Ok::<(), OrcaError>(())
+    }
+}
+
+#[async_trait]
+impl NodeProcessor for PodProcessor {
+    fn process_packet(
+        &mut self,
+        _sender_node_id: &str,
+        node_id: &str,
+        packet: &HashMap<String, PathSet>,
+        session: Arc<zenoh::Session>,
+        base_output_key_exp: &str,
+        namespace: &str,
+        namespace_lookup: &HashMap<String, PathBuf>,
+    ) -> Result<()> {
+        // We need a unique hash for this given input packet process by the node
+        // therefore we need to generate a hash that has the pod_id + input_packet
+        let pod_clone = Arc::clone(&self.pod);
+        let client_clone = Arc::clone(&self.client);
+        let node_id_owned = node_id.to_owned();
+        let packet_owned = packet.clone();
+        let base_output_key_exp_owned = base_output_key_exp.to_owned();
+        let namespace_owned = namespace.to_owned();
+        let namespace_lookup_owned = namespace_lookup.clone();
 
-        let node_id_clone = node_id.to_owned();
-        let output_key_exp_clone = base_output_key_exp.to_owned();
         self.processing_tasks.spawn(async move {
-            // For now we will just send the input_packet to the success channel
-            let results = async {
-                session
-                    .put(
-                        output_key_exp_clone.clone() + "/" + SUCCESS_KEY_EXP,
-                        serde_json::to_string(&NodeOutput::Packet(
-                            node_id_clone.clone(),
-                            output_packet,
-                        ))?,
-                    )
-                    .await
-                    .context(selector::AgentCommunicationFailure {})?;
-                Ok::<(), OrcaError>(())
-            };
+            let results = Self::start_pod_job_task(
+                node_id_owned.clone(),
+                pod_clone,
+                packet_owned,
+                client_clone,
+                Arc::clone(&session),
+                base_output_key_exp_owned.clone(),
+                namespace_owned.clone(),
+                &namespace_lookup_owned,
+            )
+            .await;
 
-            match results.await {
+            match results {
                 Ok(()) => {}
                 Err(err) => {
                     try_to_forward_err_msg(
                         session,
                         err,
-                        &format!("{output_key_exp_clone}/{FAILURE_KEY_EXP}"),
-                        &node_id_clone,
+                        &format!("{base_output_key_exp_owned}/{FAILURE_KEY_EXP}"),
+                        &node_id_owned,
                     )
                     .await;
                 }
@@ -806,7 +914,6 @@ impl NodeProcessor for MapperProcessor {
         while (self.processing_tasks.join_next().await).is_some() {
             // The only error that should be forwarded here is the failure to send the output packet
         }
-
         true
     }
 

From 65cbbdd497cab96f0a4b71c3f2b8296b481193d9 Mon Sep 17 00:00:00 2001
From: Synicix <synicix@gmail.com>
Date: Mon, 28 Jul 2025 19:56:52 +0000
Subject: [PATCH 26/29] Save progress

---
 src/core/orchestrator/agent.rs       | 13 ++++++++++++-
 src/uniffi/orchestrator/agent.rs     | 15 ++++++++++-----
 src/uniffi/pipeline_runner/runner.rs | 22 +++++++++-------------
 tests/fixture/mod.rs                 |  7 ++++---
 tests/orchestrator.rs                |  6 +++---
 tests/pipeline_runner.rs             | 24 +++++++++++++++++++++++-
 6 files changed, 61 insertions(+), 26 deletions(-)

diff --git a/src/core/orchestrator/agent.rs b/src/core/orchestrator/agent.rs
index bd93077c..e5f39066 100644
--- a/src/core/orchestrator/agent.rs
+++ b/src/core/orchestrator/agent.rs
@@ -28,7 +28,7 @@ static RE_PODJOB_ACTION: LazyLock<Regex> = LazyLock::new(|| {
                 group\/(?<group>[a-z_]+)\/
                     (?<action>request|reservation|success|failure)\/
                         pod_job\/(?<pod_job_hash>[0-9a-f]+)\/
-                            host\/(?<host>[a-z_]+)\/
+                            host\/(?<host>[0-9a-z_]+)\/
                                 timestamp\/(?<timestamp>.*?)
             $
             ",
@@ -154,6 +154,16 @@ where
                 .await
                 .context(selector::AgentCommunicationFailure {})?;
             while let Ok(sample) = subscriber.recv_async().await {
+                println!(
+                    "Received message on key expression: {}",
+                    sample.key_expr().as_str(),
+                );
+
+                println!(
+                    "Received payload: {:?}",
+                    RE_PODJOB_ACTION.captures(sample.key_expr().as_str())
+                );
+
                 if let (Ok(input), Some(metadata)) = (
                     serde_json::from_slice::<RequestI>(&sample.payload().to_bytes()),
                     RE_PODJOB_ACTION.captures(sample.key_expr().as_str()),
@@ -165,6 +175,7 @@ where
                         subgroup: metadata["pod_job_hash"].to_string(),
                     };
                     let _event_payload = event_classifier(&input);
+                    println!("Sending it to request task.");
                     tasks.spawn({
                         let inner_request_task = request_task.clone();
                         let inner_inner_agent = Arc::clone(&inner_agent);
diff --git a/src/uniffi/orchestrator/agent.rs b/src/uniffi/orchestrator/agent.rs
index b99ddea5..7029bd2e 100644
--- a/src/uniffi/orchestrator/agent.rs
+++ b/src/uniffi/orchestrator/agent.rs
@@ -46,7 +46,7 @@ pub struct AgentClient {
     /// Connecting agent's assigned name used for reference.
     pub host: String,
     #[getset(skip)]
-    pub(crate) session: zenoh::Session,
+    pub(crate) session: Arc<zenoh::Session>,
 }
 
 #[uniffi::export]
@@ -67,7 +67,8 @@ impl AgentClient {
                         .await
                         .context(selector::AgentCommunicationFailure {})?,
                 )
-            })?,
+            })?
+            .into(),
         })
     }
     /// Submit many pod jobs to be processed in parallel.
@@ -156,13 +157,13 @@ impl Agent {
                 let pod_run = agent
                     .orchestrator
                     .start(&inner_namespace_lookup, &pod_job)
-                    .await
-                    .unwrap();
+                    .await?;
                 let pod_result = agent.orchestrator.get_result(&pod_run).await?;
-                agent.orchestrator.delete(&pod_run).await?;
+                //agent.orchestrator.delete(&pod_run).await?;
                 Ok(pod_result)
             },
             async |client, pod_result| {
+                println!("Finished processing pod job: {}", pod_result.pod_job.hash);
                 let response_topic = match &pod_result.status {
                     PodResultStatus::Completed => {
                         &format!("success/pod_job/{}", pod_result.pod_job.hash)
@@ -210,6 +211,10 @@ impl Agent {
                 async |_, ()| Ok(()),
             ));
         }
+        // Create a service that responds to pod_job_worker availability requests.
+        services.spawn(start_service(
+
+        ))
 
         services
             .join_next()
diff --git a/src/uniffi/pipeline_runner/runner.rs b/src/uniffi/pipeline_runner/runner.rs
index 6cba4389..a93d9efa 100644
--- a/src/uniffi/pipeline_runner/runner.rs
+++ b/src/uniffi/pipeline_runner/runner.rs
@@ -139,6 +139,9 @@ impl DockerPipelineRunner {
             orchestrator_agent_task: JoinSet::new(),
         };
 
+        // Get the preexisting zenoh session from agent
+        let session = Arc::clone(&pipeline_run.orchestrator_agent.client.session);
+
         let orchestrator_agent_clone = Arc::clone(&pipeline_run.orchestrator_agent);
         let namespace_lookup_clone = namespace_lookup.clone();
         // Start the orchestrator agent service
@@ -153,13 +156,6 @@ impl DockerPipelineRunner {
 
         let graph = &pipeline_run.pipeline_job.pipeline.graph;
 
-        // Create the subscriber to listen to node ready status before sending inputs
-        let session = Arc::new(
-            zenoh::open(zenoh::Config::default())
-                .await
-                .context(selector::AgentCommunicationFailure {})?,
-        );
-
         let subscriber = session
             .declare_subscriber(self.get_base_key_exp(&pipeline_run_id) + "/*/status/ready")
             .await
@@ -692,10 +688,7 @@ impl PodProcessor {
         )?;
 
         // Create listener for pod_job
-        let target_key_exp = format!(
-            "group/{}/{}/*/pod_job/{}",
-            client.group, client.host, pod_job.hash
-        );
+        let target_key_exp = format!("group/{}/*/pod_job/{}/**", client.group, pod_job.hash);
 
         // Create the subscriber
         let pod_job_subscriber = session
@@ -706,7 +699,6 @@ impl PodProcessor {
         // Create the async task to listen for the pod job completion
         let pod_job_listener_task = tokio::spawn(async move {
             // Wait for the pod job to complete and extract the result
-
             let sample = pod_job_subscriber
                 .recv_async()
                 .await
@@ -735,7 +727,11 @@ impl PodProcessor {
         }
 
         // Get the pod result from the listener task
-        let pod_result = pod_job_listener_task.await??;
+        println!("Trying to get pod job result...");
+        let temp = pod_job_listener_task.await?;
+        println!("Waiting for pod job to complete... {:?}", temp);
+        let pod_result = temp?;
+
         // Get the output packet for the pod result
         let output_packet = match pod_result.status {
             PodResultStatus::Completed => {
diff --git a/tests/fixture/mod.rs b/tests/fixture/mod.rs
index df74e440..64800499 100644
--- a/tests/fixture/mod.rs
+++ b/tests/fixture/mod.rs
@@ -10,8 +10,9 @@
 use names::{Generator, Name};
 use orcapod::uniffi::{
     error::Result,
-    model::{Annotation, Blob, BlobKind, PathInfo, PathSet, Pod, PodJob, PodResult, URI},
-    orchestrator::Status,
+    model::{
+        Annotation, Blob, BlobKind, PathInfo, PathSet, Pod, PodJob, PodResult, PodResultStatus, URI,
+    },
     pipeline::{Kernel, Mapper, Pipeline, PipelineJob},
     store::{ModelID, ModelInfo, Store},
 };
@@ -137,7 +138,7 @@ pub fn pod_result_style(
         }),
         pod_job_style(namespace_lookup)?.into(),
         "simple-endeavour".to_owned(),
-        Status::Completed,
+        PodResultStatus::Completed,
         1_737_922_307,
         1_737_925_907,
     )
diff --git a/tests/orchestrator.rs b/tests/orchestrator.rs
index 7ab494e3..11775a5f 100644
--- a/tests/orchestrator.rs
+++ b/tests/orchestrator.rs
@@ -8,7 +8,7 @@ use fixture::{
 use futures_util::future::join_all;
 use orcapod::uniffi::{
     error::{OrcaError, Result},
-    model::URI,
+    model::{PodResultStatus, URI},
     orchestrator::{ImageKind, Orchestrator as _, PodRun, Status, docker::LocalDockerOrchestrator},
 };
 use std::{collections::HashMap, path::PathBuf};
@@ -131,7 +131,7 @@ async fn remote_container_image_failed() -> Result<()> {
     orch.delete(&pod_run).await?;
 
     assert!(
-        matches!(pod_result.status, Status::Failed(1)),
+        matches!(pod_result.status, PodResultStatus::Failed(1)),
         "Expected to fail but did not."
     );
     Ok(())
@@ -161,7 +161,7 @@ async fn verify_pod_result_not_running() -> Result<()> {
     let statuses = results
         .into_iter()
         .map(|result| Ok(result?.status))
-        .filter(|status| !matches!(status, Ok(Status::Completed)))
+        .filter(|status| !matches!(status, Ok(PodResultStatus::Completed)))
         .collect::<Result<Vec<_>>>()?;
 
     println!("statuses: {statuses:?}");
diff --git a/tests/pipeline_runner.rs b/tests/pipeline_runner.rs
index 2c0ee628..a6e6f19c 100644
--- a/tests/pipeline_runner.rs
+++ b/tests/pipeline_runner.rs
@@ -13,8 +13,30 @@ use orcapod::uniffi::{error::Result, pipeline_runner::runner::DockerPipelineRunn
 use crate::fixture::TestDirs;
 use fixture::pipeline_job;
 
-#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
 async fn basic_run() -> Result<()> {
+    // create a zenoh session to print out all communication message
+    let session = zenoh::open(zenoh::Config::default())
+        .await
+        .expect("Failed to open zenoh session");
+
+    tokio::spawn(async move {
+        // Subscribe to all messages in the 'test' group
+        let sub = session
+            .declare_subscriber("**")
+            .await
+            .expect("Failed to declare subscriber");
+
+        while let Ok(sample) = sub.recv_async().await {
+            // Print the key expression and payload of each message
+            println!(
+                "Received message: {}: {:?}",
+                sample.key_expr().as_str(),
+                sample.payload();
+            );
+        }
+    });
+
     let pipeline_job = pipeline_job()?;
 
     // Create the runner

From 58864a005ee1014f6dd77fa3878c543626c7d808 Mon Sep 17 00:00:00 2001
From: Synicix <synicix@gmail.com>
Date: Thu, 31 Jul 2025 22:15:34 +0000
Subject: [PATCH 27/29] Fix majority of merge errors

---
 output.txt                           |   3 +
 src/core/mod.rs                      |   3 +-
 src/core/model/mod.rs                |   8 +-
 src/uniffi/model/packet.rs           |  22 +++
 src/uniffi/model/pipeline.rs         |  35 +++--
 src/uniffi/pipeline_runner/runner.rs |   4 -
 tests/extra/data/input.txt           |   0
 tests/extra/data/input1.txt          |   1 +
 tests/extra/data/input2.txt          |   1 +
 tests/fixture/mod.rs                 | 191 +++++++++++++++------------
 tests/model.rs                       |   3 +-
 tests/orchestrator.rs                |   2 +-
 tests/pipeline.rs                    |  38 +++---
 tests/pipeline_runner.rs             |  36 ++---
 14 files changed, 205 insertions(+), 142 deletions(-)
 create mode 100644 output.txt
 delete mode 100644 tests/extra/data/input.txt
 create mode 100644 tests/extra/data/input1.txt
 create mode 100644 tests/extra/data/input2.txt

diff --git a/output.txt b/output.txt
new file mode 100644
index 00000000..2ca3cd52
--- /dev/null
+++ b/output.txt
@@ -0,0 +1,3 @@
+1
+2
+2
diff --git a/src/core/mod.rs b/src/core/mod.rs
index 52a6d376..6e3cc1ca 100644
--- a/src/core/mod.rs
+++ b/src/core/mod.rs
@@ -2,7 +2,8 @@
 pub mod crypto;
 pub(crate) mod error;
 pub(crate) mod graph;
-pub(crate) mod model;
+/// Model definitions and utilities.
+pub mod model;
 pub(crate) mod orchestrator;
 pub(crate) mod store;
 pub(crate) mod util;
diff --git a/src/core/model/mod.rs b/src/core/model/mod.rs
index e74cd7e6..50194881 100644
--- a/src/core/model/mod.rs
+++ b/src/core/model/mod.rs
@@ -32,7 +32,7 @@ pub fn to_yaml<T: Serialize>(instance: &T) -> Result<String> {
     Ok(yaml)
 }
 
-pub fn serialize_hashmap<S, K: Ord + Serialize, V: Serialize>(
+pub(crate) fn serialize_hashmap<S, K: Ord + Serialize, V: Serialize>(
     map: &HashMap<K, V>,
     serializer: S,
 ) -> result::Result<S::Ok, S::Error>
@@ -44,7 +44,7 @@ where
 }
 
 #[expect(clippy::ref_option, reason = "Serde requires this signature.")]
-pub fn serialize_hashmap_option<S, K: Ord + Serialize, V: Serialize>(
+pub(crate) fn serialize_hashmap_option<S, K: Ord + Serialize, V: Serialize>(
     map_option: &Option<HashMap<K, V>>,
     serializer: S,
 ) -> result::Result<S::Ok, S::Error>
@@ -57,5 +57,5 @@ where
     sorted.serialize(serializer)
 }
 
-pub mod pipeline;
-pub mod pod;
+pub(crate) mod pipeline;
+pub(crate) mod pod;
diff --git a/src/uniffi/model/packet.rs b/src/uniffi/model/packet.rs
index b33b3e17..79e554e6 100644
--- a/src/uniffi/model/packet.rs
+++ b/src/uniffi/model/packet.rs
@@ -31,6 +31,15 @@ pub struct URI {
     pub path: PathBuf,
 }
 
+#[uniffi::export]
+impl URI {
+    #[uniffi::constructor]
+    /// Create a new URI with the given namespace and path.
+    pub const fn new(namespace: String, path: PathBuf) -> Self {
+        Self { namespace, path }
+    }
+}
+
 /// BLOB with metadata.
 #[derive(uniffi::Record, Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)]
 pub struct Blob {
@@ -42,6 +51,19 @@ pub struct Blob {
     pub checksum: String,
 }
 
+#[uniffi::export]
+impl Blob {
+    #[uniffi::constructor]
+    /// Create a new BLOB with the given kind, location, and checksum.
+    pub const fn new(kind: BlobKind, location: URI) -> Self {
+        Self {
+            kind,
+            location,
+            checksum: String::new(),
+        }
+    }
+}
+
 /// A single BLOB or a collection of BLOBs.
 #[derive(uniffi::Enum, Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 #[serde(untagged)]
diff --git a/src/uniffi/model/pipeline.rs b/src/uniffi/model/pipeline.rs
index c2b1cb17..72e9ab72 100644
--- a/src/uniffi/model/pipeline.rs
+++ b/src/uniffi/model/pipeline.rs
@@ -8,6 +8,7 @@ use crate::{
     uniffi::{
         error::Result,
         model::{
+            Annotation,
             packet::{PathSet, URI},
             pod::Pod,
         },
@@ -31,9 +32,12 @@ pub struct Pipeline {
     #[getset(skip)]
     pub graph: DiGraph<PipelineNode, ()>,
     /// Exposed, internal input specification. Each input may be fed into more than one node/key if desired.
-    pub input_spec: HashMap<String, Vec<SpecURI>>,
+    pub input_spec: HashMap<String, Vec<NodeURI>>,
     /// Exposed, internal output specification. Each output is associated with only one node/key.
-    pub output_spec: HashMap<String, SpecURI>,
+    pub output_spec: HashMap<String, NodeURI>,
+    /// Optional annotation for the pipeline.
+    #[getset(skip)]
+    pub annotation: Option<Annotation>,
 }
 
 #[uniffi::export]
@@ -46,15 +50,17 @@ impl Pipeline {
     #[uniffi::constructor]
     pub fn new(
         graph_dot: &str,
-        metadata: HashMap<String, Kernel>,
-        input_spec: &HashMap<String, Vec<SpecURI>>,
-        output_spec: &HashMap<String, SpecURI>,
+        kernel_map: HashMap<String, Kernel>,
+        input_spec: HashMap<String, Vec<NodeURI>>,
+        output_spec: HashMap<String, NodeURI>,
+        annotation: Option<Annotation>,
     ) -> Result<Self> {
-        let graph = make_graph(graph_dot, metadata)?;
+        let graph = make_graph(graph_dot, kernel_map)?;
         Ok(Self {
             graph,
-            input_spec: input_spec.clone(),
-            output_spec: output_spec.clone(),
+            input_spec,
+            output_spec,
+            annotation,
         })
     }
 }
@@ -78,6 +84,9 @@ pub struct PipelineJob {
     pub input_packet: HashMap<String, Vec<PathSet>>,
     /// Attached, external output directory.
     pub output_dir: URI,
+    /// Optional annotation for the pipeline job.
+    #[getset(skip)]
+    pub annotation: Option<Annotation>,
 }
 
 #[uniffi::export]
@@ -91,7 +100,8 @@ impl PipelineJob {
     pub fn new(
         pipeline: Arc<Pipeline>,
         input_packet: &HashMap<String, Vec<PathSet>>,
-        output_dir: &URI,
+        output_dir: URI,
+        annotation: Option<Annotation>,
         namespace_lookup: &HashMap<String, PathBuf>,
     ) -> Result<Self> {
         validate_packet("input".into(), &pipeline.input_spec, input_packet)?;
@@ -112,7 +122,8 @@ impl PipelineJob {
             hash: make_random_hash(),
             pipeline,
             input_packet: input_packet_with_checksum,
-            output_dir: output_dir.clone(),
+            output_dir,
+            annotation,
         })
     }
 }
@@ -137,6 +148,7 @@ impl PipelineJob {
     }
 }
 
+/// Struct to hold the result of a pipeline execution.
 pub struct PipelineResult {
     /// The pipeline job that was executed.
     pub pipeline_job: Arc<PipelineJob>,
@@ -175,6 +187,7 @@ impl From<Mapper> for Kernel {
     }
 }
 
+/// Mapper struct to store mapping information between input and output stream keys.
 #[derive(uniffi::Object, Display, Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
 #[display("{self:#?}")]
 #[uniffi::export(Display)]
@@ -206,7 +219,7 @@ impl Mapper {
 
 /// Index from pipeline node into pod specification.
 #[derive(uniffi::Record, Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
-pub struct SpecURI {
+pub struct NodeURI {
     /// Node reference name in pipeline.
     pub node_name: String,
     /// Specification key.
diff --git a/src/uniffi/pipeline_runner/runner.rs b/src/uniffi/pipeline_runner/runner.rs
index 405fb51a..44ad3d57 100644
--- a/src/uniffi/pipeline_runner/runner.rs
+++ b/src/uniffi/pipeline_runner/runner.rs
@@ -53,10 +53,6 @@ struct ProcessingFailure {
 
 /// Internal representation of a pipeline run, this should not be made public due to the fact that it contains
 /// internal states and tasks
-#[expect(
-    clippy::type_complexity,
-    reason = "too complex, but necessary for async handling"
-)]
 #[derive(Debug)]
 struct PipelineRun {
     /// `PipelineJob` that this run is associated with
diff --git a/tests/extra/data/input.txt b/tests/extra/data/input.txt
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/extra/data/input1.txt b/tests/extra/data/input1.txt
new file mode 100644
index 00000000..6ccbe584
--- /dev/null
+++ b/tests/extra/data/input1.txt
@@ -0,0 +1 @@
+input1_data
diff --git a/tests/extra/data/input2.txt b/tests/extra/data/input2.txt
new file mode 100644
index 00000000..901c1e11
--- /dev/null
+++ b/tests/extra/data/input2.txt
@@ -0,0 +1 @@
+input2_data
diff --git a/tests/fixture/mod.rs b/tests/fixture/mod.rs
index 23f7d217..12b73d53 100644
--- a/tests/fixture/mod.rs
+++ b/tests/fixture/mod.rs
@@ -13,9 +13,9 @@ use orcapod::uniffi::{
     model::{
         Annotation,
         packet::{Blob, BlobKind, PathInfo, PathSet, URI},
-        pod::{Pod, PodJob, PodResult},
+        pipeline::{Kernel, Mapper, NodeURI, Pipeline, PipelineJob},
+        pod::{Pod, PodJob, PodResult, PodResultStatus},
     },
-    orchestrator::Status,
     store::{ModelID, ModelInfo, Store},
 };
 use std::{
@@ -156,7 +156,7 @@ pub fn pod_custom(
         image_reference.into(),
         command.into(),
         input_spec,
-        PathBuf::from("/tmp/output"),
+        PathBuf::from("/output"),
         HashMap::new(),
         "https://github.com/place/holder".to_owned(),
         0.1,          // 100 millicores as frac cores
@@ -271,20 +271,29 @@ pub fn append_name_pod(pod_name: &str) -> Result<Pod> {
         }),
         "alpine:3.14".to_owned(),
         format!(
-            "cp /input/input.txt /output/input.txt && echo \"Touch by Pod: {pod_name}\" >> /output/input.txt"
+            "cat input/input1.txt input/input2.txt > /output/output.txt && echo \"Processed by {pod_name}\" >> /output/output.txt"
         ),
-        HashMap::from([(
-            "input_text".to_owned(),
-            PathInfo {
-                path: PathBuf::from("/input/input.txt"),
-                match_pattern: r".*\.txt".to_owned(),
-            },
-        )]),
+        HashMap::from([
+            (
+                "input1".to_owned(),
+                PathInfo {
+                    path: PathBuf::from("/input/input1.txt"),
+                    match_pattern: r".*\.txt".to_owned(),
+                },
+            ),
+            (
+                "input2".into(),
+                PathInfo {
+                    path: PathBuf::from("/input/input2.txt"),
+                    match_pattern: r".*\.txt".to_owned(),
+                },
+            ),
+        ]),
         PathBuf::from("/output"),
         HashMap::from([(
-            "output_text".to_owned(),
+            "output".to_owned(),
             PathInfo {
-                path: PathBuf::from("/output/input.txt"),
+                path: PathBuf::from("/output/output.txt"),
                 match_pattern: r".*\.txt".to_owned(),
             },
         )]),
@@ -299,62 +308,29 @@ pub fn pipeline() -> Result<Pipeline> {
     // Create a simple pipeline where the functions job is to add append their name into the input file
     // Structure: A -> Mapper -> Joiner -> B -> Mapper -> C, D -> Mapper -> Joiner
 
-    // Create the components of the pipeline
-    let pod_a = append_name_pod("A")?;
-    let pod_b = append_name_pod("B")?;
-    let pod_c = append_name_pod("C")?;
-    let pod_d = append_name_pod("D")?;
+    // Create the kernel map
+    let mut kernel_map = HashMap::new();
 
-    // Create the file mapper that will be used to map the output of one pod to the input of another
-    let file_mapper = Mapper::new(HashMap::from([(
-        "output_text".to_owned(),
-        "input_text".to_owned(),
-    )]))?;
+    // Insert the pod into the kernel map
+    for pod_name in ["A", "B", "C", "D"] {
+        kernel_map.insert(pod_name.into(), append_name_pod(pod_name)?.into());
+    }
 
     // Create the file mapper that will be used to map the output of one pod to the input of another
-    let file_mapper_for_pod_d = Mapper::new(HashMap::from([(
-        "output_text".to_owned(),
-        "input2_text".to_owned(),
-    )]))?;
-
-    let mut kernel_to_node_name = HashMap::<Kernel, Vec<String>>::new();
-
-    // Insert the pods into the kernel_to_node_name mapping
-    for pod in [&pod_a, &pod_b, &pod_c, &pod_d] {
-        kernel_to_node_name
-            .entry(pod.clone().into())
-            .or_default()
-            .push(
-                pod.annotation
-                    .as_ref()
-                    .expect("Annotation missing.")
-                    .name
-                    .clone(),
-            );
-    }
+    let mapper_kernel: Kernel =
+        Mapper::new(HashMap::from([("output".to_owned(), "input".to_owned())]))?.into();
+    // Add the mappers
+    kernel_map.insert("pod_a_mapper".into(), mapper_kernel.clone());
+    kernel_map.insert("pod_b_mapper".into(), mapper_kernel);
+
+    // Create the file mapper for d which needs to be different
+    kernel_map.insert(
+        "pod_d_mapper".into(),
+        Mapper::new(HashMap::from([("output".to_owned(), "input2".to_owned())]))?.into(),
+    );
 
-    // Add mapper to end of pod_a and pod_b
-    kernel_to_node_name
-        .entry(file_mapper.clone().into())
-        .or_default()
-        .push("pod_a_mapper".to_owned());
-
-    kernel_to_node_name
-        .entry(file_mapper.into())
-        .or_default()
-        .push("pod_b_mapper".to_owned());
-
-    // Insert mapper for pod_d
-    kernel_to_node_name
-        .entry(file_mapper_for_pod_d.into())
-        .or_default()
-        .push("pod_d_mapper".to_owned());
-
-    // Add the joiner
-    kernel_to_node_name
-        .entry(Kernel::Joiner)
-        .or_default()
-        .push("pod_b_joiner".to_owned());
+    // Add the joiner node
+    kernel_map.insert("pod_b_joiner".into(), Kernel::Joiner);
 
     // Write all the edges in DOT format
     let dot = "
@@ -364,31 +340,73 @@ pub fn pipeline() -> Result<Pipeline> {
         }
     ";
 
-    // Create pipeline with annotation
-    let annotation = Some(Annotation {
-        name: "Example Pipeline".to_owned(),
-        description: "This is an example pipeline. of A -> B -> C".to_owned(),
-        version: "1.0.0".to_owned(),
-    });
-
-    Pipeline::from_dot(&kernel_to_node_name, dot, annotation)
+    Pipeline::new(
+        dot,
+        kernel_map,
+        HashMap::from([
+            (
+                "input".into(),
+                vec![
+                    NodeURI {
+                        node_name: "A".into(),
+                        key: "input".into(),
+                    },
+                    NodeURI {
+                        node_name: "D".into(),
+                        key: "input".into(),
+                    },
+                ],
+            ),
+            (
+                "input2".into(),
+                vec![
+                    NodeURI {
+                        node_name: "A".into(),
+                        key: "input2".into(),
+                    },
+                    NodeURI {
+                        node_name: "D".into(),
+                        key: "input2".into(),
+                    },
+                ],
+            ),
+        ]),
+        HashMap::from([(
+            "output".to_owned(),
+            NodeURI {
+                node_name: "C".into(),
+                key: "output".into(),
+            },
+        )]),
+        Some(Annotation {
+            name: "Example Pipeline".to_owned(),
+            description: "This is an example pipeline. of A -> B -> C".to_owned(),
+            version: "1.0.0".to_owned(),
+        }),
+    )
 }
 
-pub fn pipeline_job() -> Result<PipelineJob> {
+#[expect(clippy::implicit_hasher, reason = "Could be a false positive?")]
+pub fn pipeline_job(namespace_lookup: &HashMap<String, PathBuf>) -> Result<PipelineJob> {
     // Create a simple pipeline_job
     PipelineJob::new(
-        pipeline()?,
-        vec![HashMap::from([(
-            "input_text".to_owned(),
-            PathSet::Unary(Blob {
-                kind: BlobKind::File,
-                location: URI {
-                    namespace: "default".to_owned(),
-                    path: PathBuf::from("input.txt"),
-                },
-                ..Default::default()
-            }),
-        )])],
+        pipeline()?.into(),
+        &HashMap::from([
+            (
+                "input1".into(),
+                vec![PathSet::Unary(Blob::new(
+                    BlobKind::File,
+                    URI::new("default".into(), "input.txt".into()),
+                ))],
+            ),
+            (
+                "input2".into(),
+                vec![PathSet::Unary(Blob::new(
+                    BlobKind::File,
+                    URI::new("default".into(), "input2.txt".into()),
+                ))],
+            ),
+        ]),
         URI {
             namespace: "default".to_owned(),
             path: PathBuf::from("output"),
@@ -398,6 +416,7 @@ pub fn pipeline_job() -> Result<PipelineJob> {
             description: "This is an example pipeline job.".to_owned(),
             version: "1.0.0".to_owned(),
         }),
+        namespace_lookup,
     )
 }
 
diff --git a/tests/model.rs b/tests/model.rs
index 4180caf0..6a6d3e29 100644
--- a/tests/model.rs
+++ b/tests/model.rs
@@ -3,7 +3,8 @@
 pub mod fixture;
 use fixture::{NAMESPACE_LOOKUP_READ_ONLY, pod_job_style, pod_result_style, pod_style};
 use indoc::indoc;
-use orcapod::{core::model::to_yaml, uniffi::error::Result};
+use orcapod::core::model::to_yaml;
+use orcapod::uniffi::error::Result;
 
 #[test]
 fn hash_pod() -> Result<()> {
diff --git a/tests/orchestrator.rs b/tests/orchestrator.rs
index 86e32128..f9e31989 100644
--- a/tests/orchestrator.rs
+++ b/tests/orchestrator.rs
@@ -8,7 +8,7 @@ use fixture::{
 use futures_util::future::join_all;
 use orcapod::uniffi::{
     error::{OrcaError, Result},
-    model::packet::{PodResultStatus, URI},
+    model::{packet::URI, pod::PodResultStatus},
     orchestrator::{ImageKind, Orchestrator as _, PodRun, Status, docker::LocalDockerOrchestrator},
 };
 use std::{collections::HashMap, path::PathBuf};
diff --git a/tests/pipeline.rs b/tests/pipeline.rs
index 1e68dab5..a0783325 100644
--- a/tests/pipeline.rs
+++ b/tests/pipeline.rs
@@ -13,7 +13,7 @@ use orcapod::uniffi::{
     error::Result,
     model::{
         packet::{Blob, BlobKind, PathInfo, PathSet, URI},
-        pipeline::{Kernel, Pipeline, PipelineJob, SpecURI},
+        pipeline::{NodeURI, Pipeline, PipelineJob},
     },
 };
 use std::collections::HashMap;
@@ -30,29 +30,28 @@ fn input_packet_checksum() -> Result<()> {
         "},
         HashMap::from([(
             "A".into(),
-            Kernel::Pod {
-                r#ref: pod_custom(
-                    "alpine:3.14",
-                    "echo",
-                    HashMap::from([(
-                        "node_key_1".into(),
-                        PathInfo {
-                            path: "/tmp/input/subject.jpeg".into(),
-                            match_pattern: r".*\.jpeg".into(),
-                        },
-                    )]),
-                )?
-                .into(),
-            },
+            pod_custom(
+                "alpine:3.14",
+                "echo",
+                HashMap::from([(
+                    "node_key_1".into(),
+                    PathInfo {
+                        path: "/tmp/input/subject.jpeg".into(),
+                        match_pattern: r".*\.jpeg".into(),
+                    },
+                )]),
+            )?
+            .into(),
         )]),
-        &HashMap::from([(
+        HashMap::from([(
             "pipeline_key_1".into(),
-            vec![SpecURI {
+            vec![NodeURI {
                 node_name: "A".into(),
                 key: "node_key_1".into(),
             }],
         )]),
-        &HashMap::new(),
+        HashMap::new(),
+        None,
     )?;
 
     let pipeline_job = PipelineJob::new(
@@ -68,10 +67,11 @@ fn input_packet_checksum() -> Result<()> {
                 checksum: String::new(),
             })],
         )]),
-        &URI {
+        URI {
             namespace: "default".into(),
             path: "output/pipeline".into(),
         },
+        None,
         &NAMESPACE_LOOKUP_READ_ONLY,
     )?;
 
diff --git a/tests/pipeline_runner.rs b/tests/pipeline_runner.rs
index a6e6f19c..99535a6e 100644
--- a/tests/pipeline_runner.rs
+++ b/tests/pipeline_runner.rs
@@ -13,8 +13,15 @@ use orcapod::uniffi::{error::Result, pipeline_runner::runner::DockerPipelineRunn
 use crate::fixture::TestDirs;
 use fixture::pipeline_job;
 
-#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn basic_run() -> Result<()> {
+    // Create the test_dir and get the namespace lookup
+    let test_dirs = TestDirs::new(&HashMap::from([(
+        "default".to_owned(),
+        Some("./tests/extra/data/"),
+    )]))?;
+    let namespace_lookup = test_dirs.namespace_lookup();
+
     // create a zenoh session to print out all communication message
     let session = zenoh::open(zenoh::Config::default())
         .await
@@ -32,22 +39,16 @@ async fn basic_run() -> Result<()> {
             println!(
                 "Received message: {}: {:?}",
                 sample.key_expr().as_str(),
-                sample.payload();
+                sample.payload()
             );
         }
     });
 
-    let pipeline_job = pipeline_job()?;
+    let pipeline_job = pipeline_job(&namespace_lookup)?;
 
     // Create the runner
     let mut runner = DockerPipelineRunner::new("test".to_owned())?;
 
-    let test_dirs = TestDirs::new(&HashMap::from([(
-        "default".to_owned(),
-        Some("./tests/extra/data/"),
-    )]))?;
-    let namespace_lookup = test_dirs.namespace_lookup();
-
     let pipeline_run = runner
         .start(pipeline_job, "default", &namespace_lookup)
         .await?;
@@ -65,17 +66,22 @@ async fn basic_run() -> Result<()> {
 
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn stop() -> Result<()> {
-    let pipeline_job = pipeline_job()?;
-
-    // Create the runner
-    let mut runner = DockerPipelineRunner::new("test".to_owned())?;
-
+    // Create the test_dir and get the namespace lookup
     let test_dirs = TestDirs::new(&HashMap::from([(
         "default".to_owned(),
-        Some("./tests/extra/data/"),
+        Some(
+            "./tests/extra
+        /data/",
+        ),
     )]))?;
+
     let namespace_lookup = test_dirs.namespace_lookup();
 
+    let pipeline_job = pipeline_job(&namespace_lookup)?;
+
+    // Create the runner
+    let mut runner = DockerPipelineRunner::new("test".to_owned())?;
+
     let pipeline_run = runner
         .start(pipeline_job, "default", &namespace_lookup)
         .await?;

From 0fca0941a7543b4999d2c9ebc0fa1d5a428c088d Mon Sep 17 00:00:00 2001
From: Synicix <synicix@gmail.com>
Date: Fri, 1 Aug 2025 10:13:26 +0000
Subject: [PATCH 28/29] Add pipeline util func to handle new pipeline
 input_spec behavior (Runner shas no support yet)

---
 .vscode/settings.json                  |   2 +-
 src/core/orchestrator/agent.rs         |  10 --
 src/uniffi/model/packet.rs             |  14 ++-
 src/uniffi/model/pipeline.rs           |  79 ++++++++++---
 src/uniffi/pipeline_runner/runner.rs   | 147 ++++++++++++++++---------
 tests/extra/data/input1.txt            |   1 -
 tests/extra/data/input2.txt            |   1 -
 tests/extra/data/input_txt/Where.txt   |   1 +
 tests/extra/data/input_txt/black.txt   |   1 +
 tests/extra/data/input_txt/cat.txt     |   1 +
 tests/extra/data/input_txt/hiding.txt  |   1 +
 tests/extra/data/input_txt/is_the.txt  |   1 +
 tests/extra/data/input_txt/playing.txt |   1 +
 tests/extra/data/input_txt/tabby.txt   |   1 +
 tests/fixture/mod.rs                   | 143 ++++++++++++++----------
 tests/pipeline.rs                      |  90 ++++++++++++++-
 tests/pipeline_runner.rs               |  13 ++-
 17 files changed, 363 insertions(+), 144 deletions(-)
 delete mode 100644 tests/extra/data/input1.txt
 delete mode 100644 tests/extra/data/input2.txt
 create mode 100644 tests/extra/data/input_txt/Where.txt
 create mode 100644 tests/extra/data/input_txt/black.txt
 create mode 100644 tests/extra/data/input_txt/cat.txt
 create mode 100644 tests/extra/data/input_txt/hiding.txt
 create mode 100644 tests/extra/data/input_txt/is_the.txt
 create mode 100644 tests/extra/data/input_txt/playing.txt
 create mode 100644 tests/extra/data/input_txt/tabby.txt

diff --git a/.vscode/settings.json b/.vscode/settings.json
index 48efb42c..f95b3e1a 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -21,4 +21,4 @@
     "python.terminal.activateEnvironment": false,
     "notebook.formatOnSave.enabled": true,
     "notebook.output.scrolling": true
-}
+}
\ No newline at end of file
diff --git a/src/core/orchestrator/agent.rs b/src/core/orchestrator/agent.rs
index 1566484b..1e518d18 100644
--- a/src/core/orchestrator/agent.rs
+++ b/src/core/orchestrator/agent.rs
@@ -154,16 +154,6 @@ where
                 .await
                 .context(selector::AgentCommunicationFailure {})?;
             while let Ok(sample) = subscriber.recv_async().await {
-                println!(
-                    "Received message on key expression: {}",
-                    sample.key_expr().as_str(),
-                );
-
-                println!(
-                    "Received payload: {:?}",
-                    RE_PODJOB_ACTION.captures(sample.key_expr().as_str())
-                );
-
                 if let (Ok(input), Some(metadata)) = (
                     serde_json::from_slice::<RequestI>(&sample.payload().to_bytes()),
                     RE_PODJOB_ACTION.captures(sample.key_expr().as_str()),
diff --git a/src/uniffi/model/packet.rs b/src/uniffi/model/packet.rs
index 79e554e6..fddac353 100644
--- a/src/uniffi/model/packet.rs
+++ b/src/uniffi/model/packet.rs
@@ -12,6 +12,18 @@ pub struct PathInfo {
     pub match_pattern: String,
 }
 
+#[uniffi::export]
+impl PathInfo {
+    #[uniffi::constructor]
+    /// Create a new `PathInfo` with the given path and match pattern.
+    pub const fn new(path: PathBuf, match_pattern: String) -> Self {
+        Self {
+            path,
+            match_pattern,
+        }
+    }
+}
+
 /// File or directory options for BLOBs.
 #[derive(uniffi::Enum, Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)]
 pub enum BlobKind {
@@ -53,8 +65,8 @@ pub struct Blob {
 
 #[uniffi::export]
 impl Blob {
-    #[uniffi::constructor]
     /// Create a new BLOB with the given kind, location, and checksum.
+    #[uniffi::constructor]
     pub const fn new(kind: BlobKind, location: URI) -> Self {
         Self {
             kind,
diff --git a/src/uniffi/model/pipeline.rs b/src/uniffi/model/pipeline.rs
index 72e9ab72..5dca259d 100644
--- a/src/uniffi/model/pipeline.rs
+++ b/src/uniffi/model/pipeline.rs
@@ -6,7 +6,7 @@ use crate::{
         validation::validate_packet,
     },
     uniffi::{
-        error::Result,
+        error::{Kind, OrcaError, Result},
         model::{
             Annotation,
             packet::{PathSet, URI},
@@ -19,7 +19,7 @@ use getset::CloneGetters;
 use itertools::Itertools as _;
 use petgraph::graph::DiGraph;
 use serde::{Deserialize, Serialize};
-use std::{collections::HashMap, path::PathBuf, sync::Arc};
+use std::{backtrace::Backtrace, collections::HashMap, path::PathBuf, sync::Arc};
 use uniffi;
 
 /// Computational dependencies as a [DAG](https://en.wikipedia.org/wiki/Directed_acyclic_graph).
@@ -129,22 +129,64 @@ impl PipelineJob {
 }
 
 impl PipelineJob {
-    pub(crate) fn get_input_packets(&self) -> impl Iterator<Item = HashMap<String, PathSet>> {
-        let (keys, values) = self
-            .input_packet
-            .iter()
-            .map(|(key, value)| (key.clone(), value))
-            .collect::<(Vec<_>, Vec<_>)>();
+    /// Helpful function to get the input packet for input nodes of the pipeline based on the `pipeline_job` an`pipeline_spec`ec
+    /// # Errors
+    /// Will return `Err` if there is an issue getting the input packet per node.
+    /// # Returns
+    /// A `HashMap` where the key is the node name and the value is a vector of `HashMap<String, PathSet>` representing the input packets for that node.
+    pub fn get_input_packet_per_node(
+        &self,
+    ) -> Result<HashMap<String, Vec<HashMap<String, PathSet>>>> {
+        // For each node in the input specification, we will iterate over its mapping and
+        let mut node_input_spec = HashMap::new();
+        for (input_key, node_uris) in &self.pipeline.input_spec {
+            for node_uri in node_uris {
+                let input_path_sets = self.input_packet.get(input_key).ok_or(OrcaError {
+                    kind: Kind::KeyMissing {
+                        key: input_key.clone(),
+                        backtrace: Some(Backtrace::capture()),
+                    },
+                })?;
+                // There shouldn't be a duplicate key in the input packet
+                let node_input_path_sets_ref = node_input_spec
+                    .entry(&node_uri.node_name)
+                    .or_insert_with(HashMap::new);
+
+                // Check if the node_uri.key already exists, if it does this is an error as there can't be two input_packet that map to the same key
+                if node_input_path_sets_ref.contains_key(&node_uri.key) {
+                    todo!()
+                } else {
+                    // Insert all the input_path_sets that map to this specific key for the node
+                    node_input_path_sets_ref.insert(&node_uri.key, input_path_sets);
+                }
+            }
+        }
 
-        values
+        // For each node, compute the cartesian product of the path_sets for each unique combination of keys
+        let node_input_packets = node_input_spec
             .into_iter()
-            .multi_cartesian_product()
-            .map(move |combo| {
-                keys.clone()
+            .map(|(node_id, input_node_keys)| {
+                // We need to pull them out at the same time to ensure the key order is preserve to match the cartesian product
+                let (keys, values): (Vec<_>, Vec<_>) = input_node_keys.into_iter().unzip();
+
+                // Covert each combo into a packet
+                let packets = values
                     .into_iter()
-                    .zip(combo.into_iter().cloned())
-                    .collect::<HashMap<_, _>>()
+                    .multi_cartesian_product()
+                    .map(|combo| {
+                        keys.iter()
+                            .copied()
+                            .zip(combo)
+                            .map(|(key, pathset)| (key.to_owned(), pathset.to_owned()))
+                            .collect::<HashMap<_, _>>()
+                    })
+                    .collect::<Vec<HashMap<String, PathSet>>>();
+
+                (node_id.to_owned(), packets)
             })
+            .collect::<HashMap<_, _>>();
+
+        Ok(node_input_packets)
     }
 }
 
@@ -225,3 +267,12 @@ pub struct NodeURI {
     /// Specification key.
     pub key: String,
 }
+
+#[uniffi::export]
+impl NodeURI {
+    /// Create a new `NodeURI` instance.
+    #[uniffi::constructor]
+    pub const fn new(node_name: String, key: String) -> Self {
+        Self { node_name, key }
+    }
+}
diff --git a/src/uniffi/pipeline_runner/runner.rs b/src/uniffi/pipeline_runner/runner.rs
index 8e4b836b..481aabdf 100644
--- a/src/uniffi/pipeline_runner/runner.rs
+++ b/src/uniffi/pipeline_runner/runner.rs
@@ -61,6 +61,8 @@ struct PipelineRun {
     outputs: Arc<RwLock<HashMap<String, Vec<PathSet>>>>, // String is the node key, while hash
     orchestrator_agent: Arc<Agent>, // This is placed in pipeline due to the current design requiring a namespace to operate on
     orchestrator_agent_task: JoinSet<Result<()>>, // JoinSet of tasks for the orchestrator agent
+    failure_logs: Arc<RwLock<Vec<ProcessingFailure>>>, // Logs of processing failures
+    failure_logging_task: JoinSet<Result<()>>, // JoinSet of tasks for logging failures
 }
 
 impl PartialEq for PipelineRun {
@@ -140,11 +142,14 @@ impl DockerPipelineRunner {
             node_tasks: JoinSet::new(),
             orchestrator_agent: orchestrator_agent.into(),
             orchestrator_agent_task: JoinSet::new(),
+            failure_logs: Arc::new(RwLock::new(Vec::new())),
+            failure_logging_task: JoinSet::new(),
         };
 
         // Get the preexisting zenoh session from agent
         let session = Arc::clone(&pipeline_run.orchestrator_agent.client.session);
 
+        // Spawn task for each of the processing node
         let orchestrator_agent_clone = Arc::clone(&pipeline_run.orchestrator_agent);
         let namespace_lookup_clone = namespace_lookup.clone();
         // Start the orchestrator agent service
@@ -154,17 +159,27 @@ impl DockerPipelineRunner {
                 .await
         });
 
+        // Create failure logging task
+        pipeline_run
+            .failure_logging_task
+            .spawn(Self::failure_capture_task(
+                Arc::clone(&session),
+                Arc::clone(&pipeline_run.failure_logs),
+            ));
+
+        // Create the processor task for each node
         // The id for the pipeline_run is the pipeline_job hash
         let pipeline_run_id = pipeline_run.pipeline_job.hash.clone();
 
         let graph = &pipeline_run.pipeline_job.pipeline.graph;
 
+        // Create the subscriber that listen for ready messages
         let subscriber = session
             .declare_subscriber(self.get_base_key_exp(&pipeline_run_id) + "/*/status/ready")
             .await
             .context(selector::AgentCommunicationFailure {})?;
 
-        // For each node, we will create call create_node_processing_task
+        // Iterate through each node in the graph and spawn a task for each
         for node_idx in graph.node_indices() {
             let node = &graph[node_idx];
 
@@ -182,24 +197,35 @@ impl DockerPipelineRunner {
                 ));
         }
 
-        // Spawn the task that captures the outputs from the output_nodes
-        // For now the output nodes are hardcoded to be the leaf nodes of the pipeline
-
-        // for node in pipeline_run.pipeline_job.pipeline.get_leaf_nodes() {
-        //     pipeline_run
-        //         .node_tasks
-        //         .spawn(Self::create_output_capture_task_for_node(
-        //             node.id.clone(),
-        //             Arc::clone(&pipeline_run.outputs),
-        //             Arc::clone(&session),
-        //             format!(
-        //                 "{}/{}/outputs/{}",
-        //                 self.get_base_key_exp(&pipeline_run_id),
-        //                 node.id,
-        //                 SUCCESS_KEY_EXP,
-        //             ),
-        //         ));
-        // }
+        // Spawn the task that captures the outputs based on the output_spec
+        let mut node_output_spec = HashMap::new();
+        // Group the output spec by node
+        for (output_key, node_uri) in &pipeline_run.pipeline_job.pipeline.output_spec {
+            node_output_spec
+                .entry(node_uri.node_name.clone())
+                .or_insert_with(HashMap::new)
+                .insert(output_key.clone(), node_uri.key.clone());
+        }
+
+        for (node_id, key_mapping) in node_output_spec {
+            // Create the key expression to subscribe to
+            let key_exp_to_sub = format!(
+                "{}/{}/outputs/{}",
+                self.get_base_key_exp(&pipeline_run_id),
+                node_id,
+                SUCCESS_KEY_EXP,
+            );
+
+            // Spawn the task that captures the outputs
+            pipeline_run
+                .node_tasks
+                .spawn(Self::create_output_capture_task_for_node(
+                    key_mapping,
+                    Arc::clone(&pipeline_run.outputs),
+                    Arc::clone(&session),
+                    key_exp_to_sub,
+                ));
+        }
 
         // Wait for all nodes to be ready before sending inputs
         let num_of_nodes = graph.node_count();
@@ -220,19 +246,19 @@ impl DockerPipelineRunner {
             self.get_base_key_exp(&pipeline_run_id),
             INPUT_KEY_EXP,
         );
-        for packet in pipeline_run.pipeline_job.get_input_packets() {
-            // Send the packet to the input node key_exp
-            session
-                .put(
-                    &input_node_key_exp,
-                    serde_json::to_string(&NodeOutput::Packet(
-                        "input_node".to_owned(),
-                        packet.clone(),
-                    ))?,
-                )
-                .await
-                .context(selector::AgentCommunicationFailure {})?;
-        }
+        // for packet in pipeline_run.pipeline_job.get_input_packet_per_node() {
+        //     // Send the packet to the input node key_exp
+        //     session
+        //         .put(
+        //             &input_node_key_exp,
+        //             serde_json::to_string(&NodeOutput::Packet(
+        //                 "input_node".to_owned(),
+        //                 packet.clone(),
+        //             ))?,
+        //         )
+        //         .await
+        //         .context(selector::AgentCommunicationFailure {})?;
+        // }
 
         // Send the complete processing message for the input node
         session
@@ -323,13 +349,15 @@ impl DockerPipelineRunner {
     }
 
     /// This will capture the outputs of the given nodes and store it in the `outputs` map
-    #[expect(clippy::type_complexity, reason = "Needed for async")]
     async fn create_output_capture_task_for_node(
-        node_id: String,
-        outputs: Arc<RwLock<HashMap<String, Vec<HashMap<String, PathSet>>>>>,
+        //<Key to pull from the node, Key that will be mapped to in the outputs>
+        key_mapping: HashMap<String, String>,
+        outputs: Arc<RwLock<HashMap<String, Vec<PathSet>>>>,
         session: Arc<zenoh::Session>,
         key_exp_to_sub: String,
     ) -> Result<()> {
+        // Determine which keys we are interested in for the given node_id
+
         // Create a zenoh session
         let subscriber = session
             .declare_subscriber(key_exp_to_sub)
@@ -341,13 +369,16 @@ impl DockerPipelineRunner {
             let msg: NodeOutput = serde_json::from_slice(&payload.payload().to_bytes())?;
 
             match msg {
-                NodeOutput::Packet(_, hash_map) => {
+                NodeOutput::Packet(_, packet) => {
+                    // Figure out which keys
                     // Store the output packet in the outputs map
                     let mut outputs_lock = outputs.write().await;
-                    outputs_lock
-                        .entry(node_id.clone())
-                        .or_default()
-                        .push(hash_map);
+                    for (output_key, node_key) in &key_mapping {
+                        outputs_lock
+                            .entry(output_key.to_owned())
+                            .or_default()
+                            .push(get(&packet, node_key.as_str())?.clone());
+                    }
                 }
                 NodeOutput::ProcessingCompleted(_) => {
                     // Processing is completed, thus we can exit this task
@@ -358,6 +389,28 @@ impl DockerPipelineRunner {
         Ok(())
     }
 
+    async fn failure_capture_task(
+        session: Arc<zenoh::Session>,
+        failure_logs: Arc<RwLock<Vec<ProcessingFailure>>>,
+    ) -> Result<()> {
+        let sub = session
+            .declare_subscriber(format!("**/outputs/{FAILURE_KEY_EXP}"))
+            .await
+            .context(selector::AgentCommunicationFailure {})?;
+
+        // Listen to any failure messages and write it the logs
+        while let Ok(payload) = sub.recv_async().await {
+            // Extract the message from the payload
+            let msg: ProcessingFailure = serde_json::from_slice(&payload.payload().to_bytes())?;
+            // Store the failure message in the logs
+            failure_logs.write().await.push(msg.clone());
+            // Print the failure message to stderr
+            eprintln!("Processing failure for node {}: {}", msg.node_id, msg.error);
+        }
+
+        Ok(())
+    }
+
     /// Function to start tasks associated with the node
     /// Steps:
     /// - Create the node processor based on the kernel type
@@ -694,6 +747,7 @@ impl PodProcessor {
             None,
             namespace_lookup,
         )?;
+        // Print out the packet
 
         // Create listener for pod_job
         let target_key_exp = format!("group/{}/*/pod_job/{}/**", client.group, pod_job.hash);
@@ -735,9 +789,8 @@ impl PodProcessor {
         }
 
         // Get the pod result from the listener task
-        println!("Trying to get pod job result...");
         let temp = pod_job_listener_task.await?;
-        println!("Waiting for pod job to complete... {temp:?}");
+
         let pod_result = temp?;
 
         // Get the output packet for the pod result
@@ -820,7 +873,7 @@ impl NodeProcessor for PodProcessor {
                     try_to_forward_err_msg(
                         session,
                         err,
-                        &format!("{base_output_key_exp_owned}/{FAILURE_KEY_EXP}"),
+                        &base_output_key_exp_owned,
                         &node_id_owned,
                     )
                     .await;
@@ -901,13 +954,7 @@ impl NodeProcessor for MapperProcessor {
             .await;
 
             if let Err(err) = result {
-                try_to_forward_err_msg(
-                    session,
-                    err,
-                    &format!("{output_key_exp_clone}/{FAILURE_KEY_EXP}"),
-                    &node_id_clone,
-                )
-                .await;
+                try_to_forward_err_msg(session, err, &output_key_exp_clone, &node_id_clone).await;
             }
         });
         Ok(())
diff --git a/tests/extra/data/input1.txt b/tests/extra/data/input1.txt
deleted file mode 100644
index 6ccbe584..00000000
--- a/tests/extra/data/input1.txt
+++ /dev/null
@@ -1 +0,0 @@
-input1_data
diff --git a/tests/extra/data/input2.txt b/tests/extra/data/input2.txt
deleted file mode 100644
index 901c1e11..00000000
--- a/tests/extra/data/input2.txt
+++ /dev/null
@@ -1 +0,0 @@
-input2_data
diff --git a/tests/extra/data/input_txt/Where.txt b/tests/extra/data/input_txt/Where.txt
new file mode 100644
index 00000000..2891a132
--- /dev/null
+++ b/tests/extra/data/input_txt/Where.txt
@@ -0,0 +1 @@
+Where
diff --git a/tests/extra/data/input_txt/black.txt b/tests/extra/data/input_txt/black.txt
new file mode 100644
index 00000000..7e66a17d
--- /dev/null
+++ b/tests/extra/data/input_txt/black.txt
@@ -0,0 +1 @@
+black
diff --git a/tests/extra/data/input_txt/cat.txt b/tests/extra/data/input_txt/cat.txt
new file mode 100644
index 00000000..ef07ddcd
--- /dev/null
+++ b/tests/extra/data/input_txt/cat.txt
@@ -0,0 +1 @@
+cat
diff --git a/tests/extra/data/input_txt/hiding.txt b/tests/extra/data/input_txt/hiding.txt
new file mode 100644
index 00000000..56e64f05
--- /dev/null
+++ b/tests/extra/data/input_txt/hiding.txt
@@ -0,0 +1 @@
+hiding
diff --git a/tests/extra/data/input_txt/is_the.txt b/tests/extra/data/input_txt/is_the.txt
new file mode 100644
index 00000000..863d01a3
--- /dev/null
+++ b/tests/extra/data/input_txt/is_the.txt
@@ -0,0 +1 @@
+is the
diff --git a/tests/extra/data/input_txt/playing.txt b/tests/extra/data/input_txt/playing.txt
new file mode 100644
index 00000000..0395b790
--- /dev/null
+++ b/tests/extra/data/input_txt/playing.txt
@@ -0,0 +1 @@
+playing
diff --git a/tests/extra/data/input_txt/tabby.txt b/tests/extra/data/input_txt/tabby.txt
new file mode 100644
index 00000000..3de6015d
--- /dev/null
+++ b/tests/extra/data/input_txt/tabby.txt
@@ -0,0 +1 @@
+tabby
diff --git a/tests/fixture/mod.rs b/tests/fixture/mod.rs
index 9e291c03..e42454d8 100644
--- a/tests/fixture/mod.rs
+++ b/tests/fixture/mod.rs
@@ -18,7 +18,7 @@ use orcapod::uniffi::{
     },
     store::{ModelID, ModelInfo, Store},
 };
-use std::borrow::ToOwned;
+use std::borrow::ToOwned as _;
 use std::{
     collections::HashMap,
     fs::{self, File},
@@ -263,7 +263,7 @@ pub fn container_image_style(binary_location: impl AsRef<Path>) -> Result<TestCo
 
 // Pipeline stuff
 
-pub fn append_name_pod(pod_name: &str) -> Result<Pod> {
+pub fn combine_txt_pod(pod_name: &str) -> Result<Pod> {
     Pod::new(
         Some(Annotation {
             name: pod_name.to_owned(),
@@ -272,22 +272,16 @@ pub fn append_name_pod(pod_name: &str) -> Result<Pod> {
         }),
         "alpine:3.14".to_owned(),
         str_to_vec(&format!(
-            "cat input/input1.txt input/input2.txt > /output/output.txt && echo \"Processed by {pod_name}\" >> /output/output.txt"
+            "cat input/input_1.txt input/input_2.txt > /output/output.txt && echo \"Processed by {pod_name}\" >> /output/output.txt"
         )),
         HashMap::from([
             (
                 "input1".to_owned(),
-                PathInfo {
-                    path: PathBuf::from("/input/input1.txt"),
-                    match_pattern: r".*\.txt".to_owned(),
-                },
+                PathInfo::new("/input/input_1.txt".into(), r".*\.txt".into()),
             ),
             (
                 "input2".into(),
-                PathInfo {
-                    path: PathBuf::from("/input/input2.txt"),
-                    match_pattern: r".*\.txt".to_owned(),
-                },
+                PathInfo::new("/input/input2.txt".into(), r".*\.txt".into()),
             ),
         ]),
         PathBuf::from("/output"),
@@ -299,8 +293,8 @@ pub fn append_name_pod(pod_name: &str) -> Result<Pod> {
             },
         )]),
         "N/A".to_owned(),
-        0.25,        // 250 millicores as frac cores
-        1_u64 << 30, // 1GiB in bytes
+        0.25,          // 250 millicores as frac cores
+        128_u64 << 20, // 128MB in bytes
         None,
     )
 }
@@ -314,30 +308,32 @@ pub fn pipeline() -> Result<Pipeline> {
 
     // Insert the pod into the kernel map
     for pod_name in ["A", "B", "C", "D"] {
-        kernel_map.insert(pod_name.into(), append_name_pod(pod_name)?.into());
+        kernel_map.insert(pod_name.into(), combine_txt_pod(pod_name)?.into());
     }
 
-    // Create the file mapper that will be used to map the output of one pod to the input of another
-    let mapper_kernel: Kernel =
-        Mapper::new(HashMap::from([("output".to_owned(), "input".to_owned())]))?.into();
-    // Add the mappers
-    kernel_map.insert("pod_a_mapper".into(), mapper_kernel.clone());
-    kernel_map.insert("pod_b_mapper".into(), mapper_kernel);
-
-    // Create the file mapper for d which needs to be different
+    // Create a mapper for A, B, and C
+    kernel_map.insert(
+        "pod_a_mapper".into(),
+        Mapper::new(HashMap::from([("output".to_owned(), "input_1".to_owned())]))?.into(),
+    );
+    kernel_map.insert(
+        "pod_b_mapper".into(),
+        Mapper::new(HashMap::from([("output".to_owned(), "input_2".to_owned())]))?.into(),
+    );
     kernel_map.insert(
-        "pod_d_mapper".into(),
-        Mapper::new(HashMap::from([("output".to_owned(), "input2".to_owned())]))?.into(),
+        "pod_c_mapper".into(),
+        Mapper::new(HashMap::from([("output".to_owned(), "input_1".to_owned())]))?.into(),
     );
 
     // Add the joiner node
-    kernel_map.insert("pod_b_joiner".into(), Kernel::Joiner);
+    kernel_map.insert("pod_c_joiner".into(), Kernel::Joiner);
 
     // Write all the edges in DOT format
     let dot = "
         digraph {
-        A -> pod_a_mapper -> pod_b_joiner -> B -> pod_b_mapper -> C;
-        D -> pod_d_mapper -> pod_b_joiner;
+        A -> pod_a_mapper -> pod_c_joiner;
+        B -> pod_b_mapper -> pod_c_joiner;
+        pod_c_joiner -> C -> D;
         }
     ";
 
@@ -346,42 +342,36 @@ pub fn pipeline() -> Result<Pipeline> {
         kernel_map,
         HashMap::from([
             (
-                "input".into(),
-                vec![
-                    NodeURI {
-                        node_name: "A".into(),
-                        key: "input".into(),
-                    },
-                    NodeURI {
-                        node_name: "D".into(),
-                        key: "input".into(),
-                    },
-                ],
+                "where".into(),
+                vec![NodeURI::new("A".into(), "input_1".into())],
             ),
             (
-                "input2".into(),
-                vec![
-                    NodeURI {
-                        node_name: "A".into(),
-                        key: "input2".into(),
-                    },
-                    NodeURI {
-                        node_name: "D".into(),
-                        key: "input2".into(),
-                    },
-                ],
+                "is_the".into(),
+                vec![NodeURI::new("A".into(), "input_2".into())],
+            ),
+            (
+                "cat_color".into(),
+                vec![NodeURI::new("B".into(), "input_1".into())],
+            ),
+            (
+                "cat".into(),
+                vec![NodeURI::new("B".into(), "input_2".into())],
+            ),
+            (
+                "action".into(),
+                vec![NodeURI::new("D".into(), "input_2".into())],
             ),
         ]),
         HashMap::from([(
             "output".to_owned(),
             NodeURI {
-                node_name: "C".into(),
+                node_name: "D".into(),
                 key: "output".into(),
             },
         )]),
         Some(Annotation {
-            name: "Example Pipeline".to_owned(),
-            description: "This is an example pipeline. of A -> B -> C".to_owned(),
+            name: "Sentence making pipeline".to_owned(),
+            description: "Parse txt files with txt and to form sentences".to_owned(),
             version: "1.0.0".to_owned(),
         }),
     )
@@ -394,27 +384,62 @@ pub fn pipeline_job(namespace_lookup: &HashMap<String, PathBuf>) -> Result<Pipel
         pipeline()?.into(),
         &HashMap::from([
             (
-                "input1".into(),
+                "where".into(),
                 vec![PathSet::Unary(Blob::new(
                     BlobKind::File,
-                    URI::new("default".into(), "input.txt".into()),
+                    URI::new("default".into(), "input_txt/Where.txt".into()),
                 ))],
             ),
             (
-                "input2".into(),
+                "is_the".into(),
+                vec![PathSet::Unary(Blob::new(
+                    BlobKind::File,
+                    URI::new("default".into(), "input_txt/is_the.txt".into()),
+                ))],
+            ),
+            (
+                "cat_color".into(),
+                vec![
+                    PathSet::Unary(Blob::new(
+                        BlobKind::File,
+                        URI::new("default".into(), "input_txt/black.txt".into()),
+                    )),
+                    PathSet::Unary(Blob::new(
+                        BlobKind::File,
+                        URI::new("default".into(), "input_txt/tabby.txt".into()),
+                    )),
+                ],
+            ),
+            (
+                "cat".into(),
                 vec![PathSet::Unary(Blob::new(
                     BlobKind::File,
-                    URI::new("default".into(), "input2.txt".into()),
+                    URI::new("default".into(), "input_txt/cat.txt".into()),
                 ))],
             ),
+            (
+                "action".into(),
+                vec![
+                    PathSet::Unary(Blob::new(
+                        BlobKind::File,
+                        URI::new("default".into(), "input_txt/hiding.txt".into()),
+                    )),
+                    PathSet::Unary(Blob::new(
+                        BlobKind::File,
+                        URI::new("default".into(), "input_txt/playing.txt".into()),
+                    )),
+                ],
+            ),
         ]),
         URI {
             namespace: "default".to_owned(),
-            path: PathBuf::from("output"),
+            path: PathBuf::from("pipeline_output"),
         },
         Some(Annotation {
-            name: "Example Pipeline Job".to_owned(),
-            description: "This is an example pipeline job.".to_owned(),
+            name: "Hiding Cat Sentence".to_owned(),
+            description:
+                "This pipeline should produce a txt file with the phrase about a cat hiding"
+                    .to_owned(),
             version: "1.0.0".to_owned(),
         }),
         namespace_lookup,
diff --git a/tests/pipeline.rs b/tests/pipeline.rs
index fc6b4900..6ddabb9f 100644
--- a/tests/pipeline.rs
+++ b/tests/pipeline.rs
@@ -3,6 +3,8 @@
     clippy::panic_in_result_fn,
     clippy::indexing_slicing,
     clippy::panic,
+    clippy::get_unwrap,
+    clippy::unwrap_used,
     reason = "OK in tests."
 )]
 
@@ -12,13 +14,14 @@ use indoc::indoc;
 use orcapod::uniffi::{
     error::Result,
     model::{
+        Annotation,
         packet::{Blob, BlobKind, PathInfo, PathSet, URI},
         pipeline::{NodeURI, Pipeline, PipelineJob},
     },
 };
 use std::collections::HashMap;
 
-use crate::fixture::NAMESPACE_LOOKUP_READ_ONLY;
+use crate::fixture::{NAMESPACE_LOOKUP_READ_ONLY, pipeline, pipeline_job};
 
 #[test]
 fn input_packet_checksum() -> Result<()> {
@@ -87,3 +90,88 @@ fn input_packet_checksum() -> Result<()> {
     );
     Ok(())
 }
+
+#[test]
+fn creation() -> Result<()> {
+    // This test checks if the pipeline can be created successfully.
+    let pipeline = pipeline()?;
+
+    assert_eq!(
+        pipeline.annotation,
+        Some(Annotation {
+            name: "Sentence making pipeline".to_owned(),
+            description: "Parse txt files with txt and to form sentences".to_owned(),
+            version: "1.0.0".to_owned(),
+        }),
+        "Pipeline annotation does not match expected values."
+    );
+
+    assert_eq!(
+        pipeline.graph.node_count(),
+        6,
+        "Pipeline graph should have 6 nodes."
+    );
+    assert_eq!(
+        pipeline.graph.edge_count(),
+        5,
+        "Pipeline graph should have 5 edges."
+    );
+
+    Ok(())
+}
+
+/// Verify that the utility function that computes the input packets to feed into each input node works as expected.
+#[test]
+fn get_input_packet_per_node() -> Result<()> {
+    let pipeline_job = pipeline_job(&NAMESPACE_LOOKUP_READ_ONLY)?;
+
+    let input_packets_per_node = pipeline_job.get_input_packet_per_node()?;
+
+    // Given the pipeline definition used in pipeline_job, we expect the following input packets per node:
+    // The full sentence to be constructed is "Where is the black/tabby cat hiding/playing"
+    // Node A: 1 packets, with keys input "input_1" and "input_2" Due to only Where.txt and is_the.txt being route to this node
+    // Node B: 2 packets, with keys "input_1" and "input_2" Due to black.txt / tabby.txt and cat.txt being routed to this node
+    // Node C should not receive any input, as it is an internal node
+    // Node D: 2 packets, with keys "input_2" only, due to input_1 being received by the joiner node, and input_2 being hiding.txt
+
+    // Check A
+    let input_packet_node_a = input_packets_per_node.get("A").unwrap();
+    assert_num_of_packets(input_packet_node_a.len(), 1);
+    assert_contains_keys(&input_packet_node_a[0], &["input_1", "input_2"]);
+
+    // Check B
+    let input_packet_node_b = input_packets_per_node.get("B").unwrap();
+    assert_num_of_packets(input_packet_node_b.len(), 2);
+    assert_contains_keys(&input_packet_node_b[0], &["input_1", "input_2"]);
+    assert_contains_keys(&input_packet_node_b[1], &["input_1", "input_2"]);
+
+    // Check C
+    assert!(
+        !input_packets_per_node.contains_key("C"),
+        "Node C should not have any input packets.",
+    );
+
+    // Check D
+    let input_packet_node_d = input_packets_per_node.get("D").unwrap();
+    assert_num_of_packets(input_packet_node_d.len(), 2);
+    assert_contains_keys(&input_packet_node_d[0], &["input_2"]);
+    assert_contains_keys(&input_packet_node_d[1], &["input_2"]);
+
+    Ok(())
+}
+
+fn assert_num_of_packets(num_of_packets: usize, expected: usize) {
+    assert!(
+        num_of_packets == expected,
+        "Expected {expected} packets, but got {num_of_packets}."
+    );
+}
+
+fn assert_contains_keys(input_packet: &HashMap<String, PathSet>, keys: &[&str]) {
+    for key in keys {
+        assert!(
+            input_packet.contains_key(*key),
+            "Input packet should contain key '{key}'."
+        );
+    }
+}
diff --git a/tests/pipeline_runner.rs b/tests/pipeline_runner.rs
index 99535a6e..5d70e78c 100644
--- a/tests/pipeline_runner.rs
+++ b/tests/pipeline_runner.rs
@@ -1,4 +1,9 @@
-#![expect(missing_docs, clippy::panic_in_result_fn, reason = "OK in tests.")]
+#![expect(
+    missing_docs,
+    clippy::panic_in_result_fn,
+    clippy::expect_used,
+    reason = "OK in tests."
+)]
 
 // If 'fixture' is a local module, ensure there is a 'mod fixture;' statement or a 'fixture.rs' file in the same directory or in 'tests/'.
 // If 'fixture' is an external crate, add it to Cargo.toml and import as shown below.
@@ -36,11 +41,7 @@ async fn basic_run() -> Result<()> {
 
         while let Ok(sample) = sub.recv_async().await {
             // Print the key expression and payload of each message
-            println!(
-                "Received message: {}: {:?}",
-                sample.key_expr().as_str(),
-                sample.payload()
-            );
+            println!("Received message: {}:", sample.key_expr().as_str(),);
         }
     });
 

From 0c59a9dd1896df76c531134855c9899d8a980d8f Mon Sep 17 00:00:00 2001
From: Synicix <synicix@gmail.com>
Date: Sat, 2 Aug 2025 03:48:21 +0000
Subject: [PATCH 29/29] Save progress

---
 src/core/model/pipeline.rs           | 15 +++++
 src/uniffi/orchestrator/agent.rs     |  2 +-
 src/uniffi/pipeline_runner/runner.rs | 99 +++++++++++++++++-----------
 tests/fixture/mod.rs                 | 13 ++--
 tests/pipeline.rs                    | 21 ++++--
 tests/pipeline_runner.rs             |  7 +-
 6 files changed, 104 insertions(+), 53 deletions(-)

diff --git a/src/core/model/pipeline.rs b/src/core/model/pipeline.rs
index 572b8a75..1c5af674 100644
--- a/src/core/model/pipeline.rs
+++ b/src/core/model/pipeline.rs
@@ -1,3 +1,5 @@
+use std::collections::HashSet;
+
 use crate::uniffi::model::pipeline::{Kernel, Pipeline};
 use petgraph::Direction::Incoming;
 use serde::{Deserialize, Serialize};
@@ -25,4 +27,17 @@ impl Pipeline {
                 .map(move |parent_idx| &self.graph[parent_idx])
         })
     }
+
+    /// Return a vec of `node_names` that takes in inputs based on the `input_spec`ec
+    pub(crate) fn get_input_nodes(&self) -> HashSet<&String> {
+        let mut input_nodes = HashSet::new();
+
+        self.input_spec.iter().for_each(|(_, node_uris)| {
+            for node_uri in node_uris {
+                input_nodes.insert(&node_uri.node_name);
+            }
+        });
+
+        input_nodes
+    }
 }
diff --git a/src/uniffi/orchestrator/agent.rs b/src/uniffi/orchestrator/agent.rs
index 4a559f37..0bfd7598 100644
--- a/src/uniffi/orchestrator/agent.rs
+++ b/src/uniffi/orchestrator/agent.rs
@@ -154,11 +154,11 @@ impl Agent {
             namespace_lookup.clone(),
             |pod_job: &PodJob| EventPayload::Request(pod_job.clone()),
             async |agent, inner_namespace_lookup, _, pod_job| {
-                println!("Processing pod job: {}", pod_job.hash);
                 let pod_run = agent
                     .orchestrator
                     .start(&inner_namespace_lookup, &pod_job)
                     .await?;
+
                 let pod_result = agent.orchestrator.get_result(&pod_run).await?;
                 //agent.orchestrator.delete(&pod_run).await?;
                 Ok(pod_result)
diff --git a/src/uniffi/pipeline_runner/runner.rs b/src/uniffi/pipeline_runner/runner.rs
index 481aabdf..3026f8a1 100644
--- a/src/uniffi/pipeline_runner/runner.rs
+++ b/src/uniffi/pipeline_runner/runner.rs
@@ -179,6 +179,9 @@ impl DockerPipelineRunner {
             .await
             .context(selector::AgentCommunicationFailure {})?;
 
+        // Get the set of input_nodes
+        let input_nodes = pipeline_run.pipeline_job.pipeline.get_input_nodes();
+
         // Iterate through each node in the graph and spawn a task for each
         for node_idx in graph.node_indices() {
             let node = &graph[node_idx];
@@ -189,6 +192,7 @@ impl DockerPipelineRunner {
                 .spawn(Self::spawn_node_processing_task(
                     node.clone(),
                     Arc::clone(&pipeline_run.pipeline_job.pipeline),
+                    input_nodes.contains(&node.name),
                     self.get_base_key_exp(&pipeline_run_id),
                     namespace.to_owned(),
                     namespace_lookup.clone(),
@@ -241,33 +245,40 @@ impl DockerPipelineRunner {
         }
 
         // Submit the input_packets to the correct key_exp
-        let input_node_key_exp = format!(
+        let base_input_node_key_exp = format!(
             "{}/{}",
             self.get_base_key_exp(&pipeline_run_id),
             INPUT_KEY_EXP,
         );
-        // for packet in pipeline_run.pipeline_job.get_input_packet_per_node() {
-        //     // Send the packet to the input node key_exp
-        //     session
-        //         .put(
-        //             &input_node_key_exp,
-        //             serde_json::to_string(&NodeOutput::Packet(
-        //                 "input_node".to_owned(),
-        //                 packet.clone(),
-        //             ))?,
-        //         )
-        //         .await
-        //         .context(selector::AgentCommunicationFailure {})?;
-        // }
-
-        // Send the complete processing message for the input node
-        session
-            .put(
-                input_node_key_exp,
-                serde_json::to_string(&NodeOutput::ProcessingCompleted("input_node".to_owned()))?,
-            )
-            .await
-            .context(selector::AgentCommunicationFailure {})?;
+
+        // For each node send all the packets associate with it
+        for (node_name, input_packets) in pipeline_run.pipeline_job.get_input_packet_per_node()? {
+            for packet in input_packets {
+                // Send the packet to the input node key_exp
+                let output_key_exp = format!("{base_input_node_key_exp}/{node_name}");
+                session
+                    .put(
+                        &output_key_exp,
+                        serde_json::to_string(&NodeOutput::Packet(
+                            "input_node".to_owned(),
+                            packet.clone(),
+                        ))?,
+                    )
+                    .await
+                    .context(selector::AgentCommunicationFailure {})?;
+
+                // All packets associate with node are sent, we can send processing complete msg now
+                session
+                    .put(
+                        &output_key_exp,
+                        serde_json::to_string(&NodeOutput::ProcessingCompleted(
+                            "input_node".to_owned(),
+                        ))?,
+                    )
+                    .await
+                    .context(selector::AgentCommunicationFailure {})?;
+            }
+        }
 
         // Insert into the list of pipeline runs
         self.pipeline_runs
@@ -401,11 +412,16 @@ impl DockerPipelineRunner {
         // Listen to any failure messages and write it the logs
         while let Ok(payload) = sub.recv_async().await {
             // Extract the message from the payload
-            let msg: ProcessingFailure = serde_json::from_slice(&payload.payload().to_bytes())?;
+            let process_failure: ProcessingFailure =
+                serde_json::from_slice(&payload.payload().to_bytes())?;
             // Store the failure message in the logs
-            failure_logs.write().await.push(msg.clone());
-            // Print the failure message to stderr
-            eprintln!("Processing failure for node {}: {}", msg.node_id, msg.error);
+            failure_logs.write().await.push(process_failure.clone());
+            if let Some(first_line) = process_failure.error.lines().next() {
+                println!(
+                    "Node {} processing failed with error: {}",
+                    process_failure.node_id, first_line
+                );
+            }
         }
 
         Ok(())
@@ -425,6 +441,7 @@ impl DockerPipelineRunner {
     async fn spawn_node_processing_task(
         node: PipelineNode,
         pipeline: Arc<Pipeline>,
+        is_input_node: bool,
         base_key_exp: String,
         namespace: String,
         namespace_lookup: HashMap<String, PathBuf>,
@@ -438,12 +455,17 @@ impl DockerPipelineRunner {
                 Kernel::Mapper { mapper } => Box::new(MapperProcessor::new(Arc::clone(mapper))),
                 Kernel::Joiner => {
                     // Need to get the parent node id for this joiner node
-                    Box::new(JoinerProcessor::new(
-                        pipeline
-                            .get_node_parents(&node)
-                            .map(|parent_node| parent_node.name.clone())
-                            .collect::<Vec<_>>(),
-                    ))
+                    let mut parent_nodes = pipeline
+                        .get_node_parents(&node)
+                        .map(|parent_node| parent_node.name.clone())
+                        .collect::<Vec<_>>();
+
+                    // Check if it this node takes input from input_nodes, if so we need ot add it to parent_node
+                    if is_input_node {
+                        parent_nodes.push("input_node".to_owned());
+                    }
+
+                    Box::new(JoinerProcessor::new(parent_nodes))
                 }
             }));
 
@@ -461,9 +483,10 @@ impl DockerPipelineRunner {
             })
             .collect::<Vec<_>>();
 
-        // If there was no parent node, then this is root node, therefore we need to subscribe to the input node
-        if key_exps_to_subscribe_to.is_empty() {
-            key_exps_to_subscribe_to.push(format!("{base_key_exp}/{INPUT_KEY_EXP}"));
+        // Check if node is an input_node, if so we need to add the input node key expression
+        if is_input_node {
+            key_exps_to_subscribe_to
+                .push(format!("{base_key_exp}/input_node/outputs/{}", node.name));
         }
 
         // Create a subscriber for each of the parent nodes (Should only be 1, unless it is a joiner node)
@@ -747,10 +770,9 @@ impl PodProcessor {
             None,
             namespace_lookup,
         )?;
-        // Print out the packet
 
         // Create listener for pod_job
-        let target_key_exp = format!("group/{}/*/pod_job/{}/**", client.group, pod_job.hash);
+        let target_key_exp = format!("group/{}/success/pod_job/{}/**", client.group, pod_job.hash);
 
         // Create the subscriber
         let pod_job_subscriber = session
@@ -1036,7 +1058,6 @@ impl NodeProcessor for JoinerProcessor {
 
         // Check if we have all the other parents needed to compute the cartesian product
         if self.input_packet_cache.values().all(|v| !v.is_empty()) {
-            // Print we have all the parents
             // Get all the cached packets from other parents
             let other_parent_ids = self
                 .input_packet_cache
diff --git a/tests/fixture/mod.rs b/tests/fixture/mod.rs
index e42454d8..a5b4fc28 100644
--- a/tests/fixture/mod.rs
+++ b/tests/fixture/mod.rs
@@ -272,15 +272,15 @@ pub fn combine_txt_pod(pod_name: &str) -> Result<Pod> {
         }),
         "alpine:3.14".to_owned(),
         str_to_vec(&format!(
-            "cat input/input_1.txt input/input_2.txt > /output/output.txt && echo \"Processed by {pod_name}\" >> /output/output.txt"
+            "sh -c cat input/input_1.txt input/input_2.txt > /output/output.txt && echo \"Processed by {pod_name}\" >> /output/output.txt"
         )),
         HashMap::from([
             (
-                "input1".to_owned(),
+                "input_1".to_owned(),
                 PathInfo::new("/input/input_1.txt".into(), r".*\.txt".into()),
             ),
             (
-                "input2".into(),
+                "input_2".into(),
                 PathInfo::new("/input/input2.txt".into(), r".*\.txt".into()),
             ),
         ]),
@@ -328,12 +328,15 @@ pub fn pipeline() -> Result<Pipeline> {
     // Add the joiner node
     kernel_map.insert("pod_c_joiner".into(), Kernel::Joiner);
 
+    // Add joiner node for D
+    kernel_map.insert("pod_d_joiner".into(), Kernel::Joiner);
+
     // Write all the edges in DOT format
     let dot = "
         digraph {
         A -> pod_a_mapper -> pod_c_joiner;
         B -> pod_b_mapper -> pod_c_joiner;
-        pod_c_joiner -> C -> D;
+        pod_c_joiner -> C -> pod_d_joiner -> D;
         }
     ";
 
@@ -359,7 +362,7 @@ pub fn pipeline() -> Result<Pipeline> {
             ),
             (
                 "action".into(),
-                vec![NodeURI::new("D".into(), "input_2".into())],
+                vec![NodeURI::new("pod_d_joiner".into(), "input_2".into())],
             ),
         ]),
         HashMap::from([(
diff --git a/tests/pipeline.rs b/tests/pipeline.rs
index 6ddabb9f..53797c73 100644
--- a/tests/pipeline.rs
+++ b/tests/pipeline.rs
@@ -108,13 +108,13 @@ fn creation() -> Result<()> {
 
     assert_eq!(
         pipeline.graph.node_count(),
-        6,
-        "Pipeline graph should have 6 nodes."
+        8,
+        "Pipeline graph should have 8 nodes."
     );
     assert_eq!(
         pipeline.graph.edge_count(),
-        5,
-        "Pipeline graph should have 5 edges."
+        7,
+        "Pipeline graph should have 7 edges."
     );
 
     Ok(())
@@ -132,7 +132,7 @@ fn get_input_packet_per_node() -> Result<()> {
     // Node A: 1 packets, with keys input "input_1" and "input_2" Due to only Where.txt and is_the.txt being route to this node
     // Node B: 2 packets, with keys "input_1" and "input_2" Due to black.txt / tabby.txt and cat.txt being routed to this node
     // Node C should not receive any input, as it is an internal node
-    // Node D: 2 packets, with keys "input_2" only, due to input_1 being received by the joiner node, and input_2 being hiding.txt
+    // Node pod_d_joiner: 2 packets, with keys "input_2" only, due to input_1 being received by the joiner node, and input_2 being hiding.txt
 
     // Check A
     let input_packet_node_a = input_packets_per_node.get("A").unwrap();
@@ -151,12 +151,19 @@ fn get_input_packet_per_node() -> Result<()> {
         "Node C should not have any input packets.",
     );
 
-    // Check D
-    let input_packet_node_d = input_packets_per_node.get("D").unwrap();
+    // Check pod_d_joiner
+    // Node node_d_joiner: 2 packets, with keys "input_2" only, due to input_1 being received by the joiner node, and input_2 being hiding.txt
+    let input_packet_node_d = input_packets_per_node.get("pod_d_joiner").unwrap();
     assert_num_of_packets(input_packet_node_d.len(), 2);
     assert_contains_keys(&input_packet_node_d[0], &["input_2"]);
     assert_contains_keys(&input_packet_node_d[1], &["input_2"]);
 
+    // Check D
+    assert!(
+        !input_packets_per_node.contains_key("D"),
+        "Node D should not have any input packets.",
+    );
+
     Ok(())
 }
 
diff --git a/tests/pipeline_runner.rs b/tests/pipeline_runner.rs
index 5d70e78c..56598496 100644
--- a/tests/pipeline_runner.rs
+++ b/tests/pipeline_runner.rs
@@ -35,7 +35,7 @@ async fn basic_run() -> Result<()> {
     tokio::spawn(async move {
         // Subscribe to all messages in the 'test' group
         let sub = session
-            .declare_subscriber("**")
+            .declare_subscriber("**/failure/**")
             .await
             .expect("Failed to declare subscriber");
 
@@ -57,6 +57,11 @@ async fn basic_run() -> Result<()> {
     // Wait for the pipeline run to complete
     let pipeline_result = runner.get_result(&pipeline_run).await?;
 
+    println!(
+        "Pipeline run completed: {:?}",
+        pipeline_result.output_packets
+    );
+
     assert!(
         pipeline_result.output_packets.len() == 1,
         "Expected exactly one output packet."