From d72cd39367a691a22193bcdff8aaf166bb781b26 Mon Sep 17 00:00:00 2001 From: synicix Date: Mon, 20 Oct 2025 01:48:19 -0700 Subject: [PATCH 1/7] Merge unffi and core into one set of files. Still require manual scope clean up. --- Cargo.toml | 99 +++---- src/core/error.rs | 154 ----------- src/core/mod.rs | 14 - src/core/model/pod.rs | 265 ------------------ src/core/orchestrator/agent.rs | 202 -------------- src/core/orchestrator/mod.rs | 29 -- src/core/store/filestore.rs | 248 ----------------- src/core/store/mod.rs | 4 - src/{core => }/crypto.rs | 20 +- src/{uniffi => }/error.rs | 148 ++++++++++- src/{core => }/graph.rs | 10 +- src/lib.rs | 20 +- src/{core => }/model/mod.rs | 45 +++- src/{uniffi => }/model/packet.rs | 3 +- src/{core => }/model/pipeline.rs | 336 +++++++++++++++++++++-- src/{uniffi => }/model/pod.rs | 314 ++++++++++++++++++++-- src/{core => }/operator.rs | 59 +++- src/{uniffi => }/orchestrator/agent.rs | 205 +++++++++++++- src/{core => }/orchestrator/docker.rs | 355 ++++++++++++++++++++++++- src/{uniffi => }/orchestrator/mod.rs | 59 ++-- src/{core => }/pipeline_runner.rs | 40 ++- src/{uniffi => }/store/filestore.rs | 270 +++++++++++++++++-- src/{uniffi => }/store/mod.rs | 5 +- src/uniffi/mod.rs | 10 - src/uniffi/model/mod.rs | 37 --- src/uniffi/model/pipeline.rs | 315 ---------------------- src/uniffi/operator.rs | 39 --- src/uniffi/orchestrator/docker.rs | 346 ------------------------ src/{core => }/util.rs | 2 +- src/{core => }/validation.rs | 2 +- tests/agent.rs | 2 +- tests/error.rs | 2 +- tests/fixture/mod.rs | 5 +- tests/orchestrator.rs | 6 +- tests/pipeline.rs | 2 +- tests/pipeline_runner.rs | 10 +- tests/store.rs | 2 +- 37 files changed, 1787 insertions(+), 1897 deletions(-) delete mode 100644 src/core/error.rs delete mode 100644 src/core/mod.rs delete mode 100644 src/core/model/pod.rs delete mode 100644 src/core/orchestrator/agent.rs delete mode 100644 src/core/orchestrator/mod.rs delete mode 100644 src/core/store/filestore.rs delete mode 100644 src/core/store/mod.rs rename src/{core => }/crypto.rs (94%) rename src/{uniffi => }/error.rs (58%) rename src/{core => }/graph.rs (91%) rename src/{core => }/model/mod.rs (60%) rename src/{uniffi => }/model/packet.rs (97%) rename src/{core => }/model/pipeline.rs (64%) rename src/{uniffi => }/model/pod.rs (57%) rename src/{core => }/operator.rs (87%) rename src/{uniffi => }/orchestrator/agent.rs (52%) rename src/{core => }/orchestrator/docker.rs (50%) rename src/{uniffi => }/orchestrator/mod.rs (87%) rename src/{core => }/pipeline_runner.rs (98%) rename src/{uniffi => }/store/filestore.rs (56%) rename src/{uniffi => }/store/mod.rs (98%) delete mode 100644 src/uniffi/mod.rs delete mode 100644 src/uniffi/model/mod.rs delete mode 100644 src/uniffi/model/pipeline.rs delete mode 100644 src/uniffi/operator.rs delete mode 100644 src/uniffi/orchestrator/docker.rs rename src/{core => }/util.rs (96%) rename src/{core => }/validation.rs (92%) diff --git a/Cargo.toml b/Cargo.toml index be17fcef..601c668c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -113,55 +113,56 @@ restriction = "deny" style = "deny" suspicious = "deny" -allow_attributes = { level = "allow", priority = 127 } # Useful when suppressing warnings is also desired. -arbitrary_source_item_ordering = { level = "allow", priority = 127 } # allow arbitrary ordering to keep relevant code nearby -arithmetic_side_effects = { level = "allow", priority = 127 } # allow arithmetic for convenience though it could overflow -as_conversions = { level = "allow", priority = 127 } # allow casting -assertions_on_result_states = { level = "allow", priority = 127 } # allow checking is_ok/is_err -big_endian_bytes = { level = "allow", priority = 127 } # allow to_be_bytes / from_be_bytes -blanket_clippy_restriction_lints = { level = "allow", priority = 127 } # allow setting all restrictions so we can omit specific ones -default_numeric_fallback = { level = "allow", priority = 127 } # allow type inferred by numeric literal -disallowed_script_idents = { level = "allow", priority = 127 } # skip since we use only ascii -else_if_without_else = { level = "allow", priority = 127 } # missing else ok -exhaustive_enums = { level = "allow", priority = 127 } # revisit once lib is ready to be used externally -exhaustive_structs = { level = "allow", priority = 127 } # revisit once lib is ready to be used externally -float_arithmetic = { level = "allow", priority = 127 } # allow float arithmetic -host_endian_bytes = { level = "allow", priority = 127 } # allow to_ne_bytes / from_ne_bytes -impl_trait_in_params = { level = "allow", priority = 127 } # impl in params ok -implicit_return = { level = "allow", priority = 127 } # missing return ok -inline_asm_x86_intel_syntax = { level = "allow", priority = 127 } # intel syntax ok -integer_division = { level = "allow", priority = 127 } # allow discarding remainder -iter_over_hash_type = { level = "allow", priority = 127 } # allow iterating over unordered iterables like `HashMap` -little_endian_bytes = { level = "allow", priority = 127 } # allow to_le_bytes / from_le_bytes -missing_asserts_for_indexing = { level = "allow", priority = 127 } # missing assert before indexing ok -missing_docs_in_private_items = { level = "allow", priority = 127 } # missing docs on private ok -missing_inline_in_public_items = { level = "allow", priority = 127 } # let rust compiler determine best inline logic -missing_trait_methods = { level = "allow", priority = 127 } # allow in favor of rustc `implement the missing item` -module_name_repetitions = { level = "allow", priority = 127 } # allow use of module name in type names -multiple_crate_versions = { level = "allow", priority = 127 } # allow since list of exceptions changes frequently from external -multiple_inherent_impl = { level = "allow", priority = 127 } # required in best practice to limit exposure over UniFFI -must_use_candidate = { level = "allow", priority = 127 } # omitting #[must_use] ok -mod_module_files = { level = "allow", priority = 127 } # mod directories ok -non_ascii_literal = { level = "allow", priority = 127 } # non-ascii char in string literal ok -partial_pub_fields = { level = "allow", priority = 127 } # partial struct pub fields ok -pattern_type_mismatch = { level = "allow", priority = 127 } # allow in favor of clippy::ref_patterns -print_stderr = { level = "allow", priority = 127 } # stderr prints ok -print_stdout = { level = "allow", priority = 127 } # stdout prints ok -pub_use = { level = "allow", priority = 127 } # ok to structure source into many files but clean up import -pub_with_shorthand = { level = "allow", priority = 127 } # allow use of pub(super) -pub_without_shorthand = { level = "allow", priority = 127 } # allow use of pub(in super) -question_mark_used = { level = "allow", priority = 127 } # allow question operator -self_named_module_files = { level = "allow", priority = 127 } # mod files ok -semicolon_inside_block = { level = "allow", priority = 127 } # ok to keep inside block -separated_literal_suffix = { level = "allow", priority = 127 } # literal suffixes should be separated by underscore -single_char_lifetime_names = { level = "allow", priority = 127 } # single char lifetimes ok -single_component_path_imports = { level = "allow", priority = 127 } # allow for readability -std_instead_of_alloc = { level = "allow", priority = 127 } # we should use std when possible -std_instead_of_core = { level = "allow", priority = 127 } # we should use std when possible -string_add = { level = "allow", priority = 127 } # simple concat ok -string_lit_chars_any = { level = "allow", priority = 127 } # favor readability until a perf case comes up -use_debug = { level = "warn", priority = 127 } # debug print -wildcard_enum_match_arm = { level = "allow", priority = 127 } # allow wildcard match arm in enums +allow_attributes = { level = "allow", priority = 127 } # Useful when suppressing warnings is also desired. +arbitrary_source_item_ordering = { level = "allow", priority = 127 } # allow arbitrary ordering to keep relevant code nearby +arithmetic_side_effects = { level = "allow", priority = 127 } # allow arithmetic for convenience though it could overflow +as_conversions = { level = "allow", priority = 127 } # allow casting +assertions_on_result_states = { level = "allow", priority = 127 } # allow checking is_ok/is_err +big_endian_bytes = { level = "allow", priority = 127 } # allow to_be_bytes / from_be_bytes +blanket_clippy_restriction_lints = { level = "allow", priority = 127 } # allow setting all restrictions so we can omit specific ones +default_numeric_fallback = { level = "allow", priority = 127 } # allow type inferred by numeric literal +disallowed_script_idents = { level = "allow", priority = 127 } # skip since we use only ascii +else_if_without_else = { level = "allow", priority = 127 } # missing else ok +exhaustive_enums = { level = "allow", priority = 127 } # revisit once lib is ready to be used externally +exhaustive_structs = { level = "allow", priority = 127 } # revisit once lib is ready to be used externally +field_scoped_visibility_modifiers = { level = "allow", priority = 127 } # allow field-level visibility modifiers +float_arithmetic = { level = "allow", priority = 127 } # allow float arithmetic +host_endian_bytes = { level = "allow", priority = 127 } # allow to_ne_bytes / from_ne_bytes +impl_trait_in_params = { level = "allow", priority = 127 } # impl in params ok +implicit_return = { level = "allow", priority = 127 } # missing return ok +inline_asm_x86_intel_syntax = { level = "allow", priority = 127 } # intel syntax ok +integer_division = { level = "allow", priority = 127 } # allow discarding remainder +iter_over_hash_type = { level = "allow", priority = 127 } # allow iterating over unordered iterables like `HashMap` +little_endian_bytes = { level = "allow", priority = 127 } # allow to_le_bytes / from_le_bytes +missing_asserts_for_indexing = { level = "allow", priority = 127 } # missing assert before indexing ok +missing_docs_in_private_items = { level = "allow", priority = 127 } # missing docs on private ok +missing_inline_in_public_items = { level = "allow", priority = 127 } # let rust compiler determine best inline logic +missing_trait_methods = { level = "allow", priority = 127 } # allow in favor of rustc `implement the missing item` +module_name_repetitions = { level = "allow", priority = 127 } # allow use of module name in type names +multiple_crate_versions = { level = "allow", priority = 127 } # allow since list of exceptions changes frequently from external +multiple_inherent_impl = { level = "allow", priority = 127 } # required in best practice to limit exposure over UniFFI +must_use_candidate = { level = "allow", priority = 127 } # omitting #[must_use] ok +mod_module_files = { level = "allow", priority = 127 } # mod directories ok +non_ascii_literal = { level = "allow", priority = 127 } # non-ascii char in string literal ok +partial_pub_fields = { level = "allow", priority = 127 } # partial struct pub fields ok +pattern_type_mismatch = { level = "allow", priority = 127 } # allow in favor of clippy::ref_patterns +print_stderr = { level = "allow", priority = 127 } # stderr prints ok +print_stdout = { level = "allow", priority = 127 } # stdout prints ok +pub_use = { level = "allow", priority = 127 } # ok to structure source into many files but clean up import +pub_with_shorthand = { level = "allow", priority = 127 } # allow use of pub(super) +pub_without_shorthand = { level = "allow", priority = 127 } # allow use of pub(in super) +question_mark_used = { level = "allow", priority = 127 } # allow question operator +self_named_module_files = { level = "allow", priority = 127 } # mod files ok +semicolon_inside_block = { level = "allow", priority = 127 } # ok to keep inside block +separated_literal_suffix = { level = "allow", priority = 127 } # literal suffixes should be separated by underscore +single_char_lifetime_names = { level = "allow", priority = 127 } # single char lifetimes ok +single_component_path_imports = { level = "allow", priority = 127 } # allow for readability +std_instead_of_alloc = { level = "allow", priority = 127 } # we should use std when possible +std_instead_of_core = { level = "allow", priority = 127 } # we should use std when possible +string_add = { level = "allow", priority = 127 } # simple concat ok +string_lit_chars_any = { level = "allow", priority = 127 } # favor readability until a perf case comes up +use_debug = { level = "warn", priority = 127 } # debug print +wildcard_enum_match_arm = { level = "allow", priority = 127 } # allow wildcard match arm in enums # temporary single_call_fn = { level = "allow", priority = 127 } # remove once more models need pointer serializers/deserializers diff --git a/src/core/error.rs b/src/core/error.rs deleted file mode 100644 index 52d7449e..00000000 --- a/src/core/error.rs +++ /dev/null @@ -1,154 +0,0 @@ -use crate::uniffi::error::{Kind, OrcaError}; -use bollard::errors::Error as BollardError; -use dot_parser::ast::PestError; -use glob; -use serde_json; -use serde_yaml; -use std::{ - backtrace::{Backtrace, BacktraceStatus}, - fmt::{self, Formatter}, - io, path, -}; -use tokio::task; - -impl From for OrcaError { - fn from(error: BollardError) -> Self { - Self { - kind: Kind::BollardError { - source: error.into(), - backtrace: Some(Backtrace::capture()), - }, - } - } -} -impl From for OrcaError { - fn from(error: chrono::ParseError) -> Self { - Self { - kind: Kind::ChronoParseError { - source: error.into(), - backtrace: Some(Backtrace::capture()), - }, - } - } -} -impl From for OrcaError { - fn from(error: PestError) -> Self { - Self { - kind: Kind::DOTError { - source: error.into(), - backtrace: Some(Backtrace::capture()), - }, - } - } -} -impl From for OrcaError { - fn from(error: glob::PatternError) -> Self { - Self { - kind: Kind::GlobPatternError { - source: error.into(), - backtrace: Some(Backtrace::capture()), - }, - } - } -} -impl From for OrcaError { - fn from(error: io::Error) -> Self { - Self { - kind: Kind::IoError { - source: error.into(), - backtrace: Some(Backtrace::capture()), - }, - } - } -} -impl From for OrcaError { - fn from(error: path::StripPrefixError) -> Self { - Self { - kind: Kind::PathPrefixError { - source: error.into(), - backtrace: Some(Backtrace::capture()), - }, - } - } -} -impl From for OrcaError { - fn from(error: serde_json::Error) -> Self { - Self { - kind: Kind::SerdeJsonError { - source: error.into(), - backtrace: Some(Backtrace::capture()), - }, - } - } -} -impl From for OrcaError { - fn from(error: serde_yaml::Error) -> Self { - Self { - kind: Kind::SerdeYamlError { - source: error.into(), - backtrace: Some(Backtrace::capture()), - }, - } - } -} -impl From for OrcaError { - fn from(error: task::JoinError) -> Self { - Self { - kind: Kind::TokioTaskJoinError { - source: error.into(), - backtrace: Some(Backtrace::capture()), - }, - } - } -} -impl From for OrcaError { - fn from(error: Kind) -> Self { - Self { kind: error } - } -} -fn format_stack(backtrace: Option<&Backtrace>) -> String { - backtrace.map_or( - String::new(), - |unpacked_backtrace| match unpacked_backtrace.status() { - BacktraceStatus::Captured => { - format!("\nstack backtrace:\n{unpacked_backtrace}") - } - BacktraceStatus::Disabled | BacktraceStatus::Unsupported | _ => String::new(), - }, - ) -} -impl fmt::Debug for OrcaError { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - match &self.kind { - Kind::AgentCommunicationFailure { backtrace, .. } - | Kind::EmptyDir { backtrace, .. } - | Kind::FailedToStartPod { backtrace, .. } - | Kind::FailedToExtractRunInfo { backtrace, .. } - | Kind::IncompletePacket { backtrace, .. } - | Kind::InvalidPath { backtrace, .. } - | Kind::InvalidIndex { backtrace, .. } - | Kind::InvalidInputSpecNodeNotInGraph { backtrace, .. } - | Kind::InvalidOutputSpecKeyNotInNode { backtrace, .. } - | Kind::InvalidOutputSpecNodeNotInGraph { backtrace, .. } - | Kind::KeyMissing { backtrace, .. } - | Kind::MissingInfo { backtrace, .. } - | Kind::FailedToGetLabelHashFromFileName { backtrace, .. } - | Kind::FailedToGetPodJobOutput { backtrace, .. } - | Kind::PipelineValidationErrorMissingKeys { backtrace, .. } - | Kind::PodJobProcessingError { backtrace, .. } - | Kind::PodJobSubmissionFailed { backtrace, .. } - | Kind::UnexpectedPathType { backtrace, .. } - | Kind::BollardError { backtrace, .. } - | Kind::ChronoParseError { backtrace, .. } - | Kind::DOTError { backtrace, .. } - | Kind::GlobPatternError { backtrace, .. } - | Kind::IoError { backtrace, .. } - | Kind::PathPrefixError { backtrace, .. } - | Kind::SerdeJsonError { backtrace, .. } - | Kind::SerdeYamlError { backtrace, .. } - | Kind::TokioTaskJoinError { backtrace, .. } => { - write!(f, "{}{}", self.kind, format_stack(backtrace.as_ref())) - } - } - } -} diff --git a/src/core/mod.rs b/src/core/mod.rs deleted file mode 100644 index 9faec897..00000000 --- a/src/core/mod.rs +++ /dev/null @@ -1,14 +0,0 @@ -pub(crate) mod error; -pub(crate) mod graph; -pub(crate) mod store; -pub(crate) mod util; -pub(crate) mod validation; - -pub(crate) mod crypto; -/// Model definition for orcapod -pub(crate) mod model; -pub(crate) mod operator; -pub(crate) mod orchestrator; - -/// Pipeline runner module -pub mod pipeline_runner; diff --git a/src/core/model/pod.rs b/src/core/model/pod.rs deleted file mode 100644 index fdcafc13..00000000 --- a/src/core/model/pod.rs +++ /dev/null @@ -1,265 +0,0 @@ -use crate::uniffi::model::pod::{Pod, PodJob}; -use serde::{Deserialize as _, Deserializer}; -use serde_yaml::{self, Value}; -use std::{result, sync::Arc}; - -#[expect(clippy::expect_used, reason = "Serde requires this signature.")] -pub fn deserialize_pod<'de, D>(deserializer: D) -> result::Result, D::Error> -where - D: Deserializer<'de>, -{ - let value = Value::deserialize(deserializer)?; - (value).as_str().map_or_else( - || { - Ok(serde_yaml::from_value(value.clone()) - .expect("Failed to convert from serde value to specific type.")) - }, - |hash| { - Ok({ - Pod { - hash: hash.to_owned(), - ..Pod::default() - } - .into() - }) - }, - ) -} - -#[expect(clippy::expect_used, reason = "Serde requires this signature.")] -pub fn deserialize_pod_job<'de, D>(deserializer: D) -> result::Result, D::Error> -where - D: Deserializer<'de>, -{ - let value = Value::deserialize(deserializer)?; - (value).as_str().map_or_else( - || { - Ok(serde_yaml::from_value(value.clone()) - .expect("Failed to convert from serde value to specific type.")) - }, - |hash| { - Ok({ - PodJob { - hash: hash.to_owned(), - ..PodJob::default() - } - .into() - }) - }, - ) -} - -#[cfg(test)] -mod tests { - #![expect(clippy::unwrap_used, reason = "OK in tests.")] - use indoc::indoc; - use std::sync::{Arc, LazyLock}; - use std::{collections::HashMap, path::PathBuf}; - - use crate::core::model::ToYaml as _; - use crate::uniffi::model::packet::{Blob, BlobKind, PathSet, URI}; - use crate::uniffi::model::pod::PodResult; - use crate::uniffi::orchestrator::PodStatus; - use crate::uniffi::{ - error::Result, - model::{ - Annotation, - packet::PathInfo, - pod::{Pod, PodJob, RecommendSpecs}, - }, - }; - - use pretty_assertions::assert_eq; - - static TEST_FILE_NAMESPACE_LOOKUP: LazyLock> = LazyLock::new(|| { - HashMap::from([ - ("input".into(), PathBuf::from("tests/extra/data/input_txt")), - ("output".into(), PathBuf::from("tests/extra/data/output")), - ]) - }); - - fn basic_pod() -> Result { - Pod::new( - Some(Annotation { - name: "test".into(), - version: "0.1".into(), - description: "Basic pod for testing hashing and yaml serialization".into(), - }), - "alpine:3.14".into(), - vec!["cp", "/input/input.txt", "/output/output.txt"] - .into_iter() - .map(String::from) - .collect(), - HashMap::from([( - "input_txt".into(), - PathInfo { - path: "/input/input.txt".into(), - match_pattern: r".*\.txt".into(), - }, - )]), - "/output".into(), - HashMap::from([( - "output_txt".into(), - PathInfo { - path: "output.txt".into(), - match_pattern: r".*\.txt".into(), - }, - )]), - RecommendSpecs { - cpus: 0.20, - memory: 128 << 20, - }, - None, - ) - } - - fn basic_pod_job() -> Result { - let pod = Arc::new(basic_pod()?); - PodJob::new( - Some(Annotation { - name: "test_job".into(), - version: "0.1".into(), - description: "Basic pod job for testing hashing and yaml serialization".into(), - }), - Arc::clone(&pod), - HashMap::from([( - "input_txt".into(), - PathSet::Unary(Blob::new( - BlobKind::File, - URI { - namespace: "input".into(), - path: "cat.txt".into(), - }, - )), - )]), - URI { - namespace: "output".into(), - path: "".into(), - }, - pod.recommend_specs.cpus, - pod.recommend_specs.memory, - Some(HashMap::from([("FAKE_ENV".into(), "FakeValue".into())])), - &TEST_FILE_NAMESPACE_LOOKUP, - ) - } - - fn basic_pod_result() -> Result { - PodResult::new( - Some(Annotation { - name: "test".into(), - version: "0.1".into(), - description: "Basic Result for testing hashing and yaml serialization".into(), - }), - basic_pod_job()?.into(), - "randomly_assigned_name".into(), - PodStatus::Completed, - 1_737_922_307, - 1_737_925_907, - &TEST_FILE_NAMESPACE_LOOKUP, - "example_logs".to_owned(), - ) - } - - #[test] - fn pod_hash() { - assert_eq!( - basic_pod().unwrap().hash, - "b5574e2efdf26361e8e8e886389a250cfbfcceed08b29325a78fd738cbb2a1b8", - "Hash didn't match." - ); - } - - #[test] - fn pod_to_yaml() { - assert_eq!( - basic_pod().unwrap().to_yaml().unwrap(), - indoc! {r" - class: pod - image: alpine:3.14 - command: - - cp - - /input/input.txt - - /output/output.txt - input_spec: - input_txt: - path: /input/input.txt - match_pattern: .*\.txt - output_dir: /output - output_spec: - output_txt: - path: output.txt - match_pattern: .*\.txt - gpu_requirements: null - "}, - "YAML serialization didn't match." - ); - } - - #[test] - fn pod_job_hash() { - assert_eq!( - basic_pod_job().unwrap().hash, - "80348a4ef866a9dfc1a5d0a48467a6592ef2ed9e8de67930d64afefbb395f1c6", - "Hash didn't match." - ); - } - - #[test] - fn pod_job_to_yaml() { - assert_eq!( - basic_pod_job().unwrap().to_yaml().unwrap(), - indoc! {" - class: pod_job - pod: b5574e2efdf26361e8e8e886389a250cfbfcceed08b29325a78fd738cbb2a1b8 - input_packet: - input_txt: - kind: File - location: - namespace: input - path: cat.txt - checksum: 175cc6f362b2f75acd08a373e000144fdb8d14a833d4b70fd743f16a7039103f - output_dir: - namespace: output - path: '' - cpu_limit: 0.2 - memory_limit: 134217728 - env_vars: - FAKE_ENV: FakeValue - "}, - "YAML serialization didn't match." - ); - } - - #[test] - fn pod_result_hash() { - assert_eq!( - basic_pod_result().unwrap().hash, - "92809a4ce13b4fe8c8dcdcf2b48dd14a9dd885593fe3ab5d9809d27bc9a16354", - "Hash didn't match." - ); - } - - #[test] - fn pod_result_to_yaml() { - assert_eq!( - basic_pod_result().unwrap().to_yaml().unwrap(), - indoc! {" - class: pod_result - pod_job: 80348a4ef866a9dfc1a5d0a48467a6592ef2ed9e8de67930d64afefbb395f1c6 - output_packet: - output_txt: - kind: File - location: - namespace: output - path: output.txt - checksum: 175cc6f362b2f75acd08a373e000144fdb8d14a833d4b70fd743f16a7039103f - assigned_name: randomly_assigned_name - status: Completed - created: 1737922307 - terminated: 1737925907 - logs: example_logs - "}, - "YAML serialization didn't match." - ); - } -} diff --git a/src/core/orchestrator/agent.rs b/src/core/orchestrator/agent.rs deleted file mode 100644 index 9cbc7904..00000000 --- a/src/core/orchestrator/agent.rs +++ /dev/null @@ -1,202 +0,0 @@ -use crate::uniffi::{ - error::{OrcaError, Result, selector}, - orchestrator::agent::{Agent, AgentClient}, -}; -use chrono::{DateTime, Utc}; -use futures_util::future::FutureExt as _; -use itertools::Itertools as _; -use serde::{Deserialize, Serialize}; -use snafu::{OptionExt as _, ResultExt as _}; -use std::{ - borrow::ToOwned, - collections::{BTreeMap, HashMap}, - fmt::Write as _, - hash::RandomState, - path::PathBuf, - sync::Arc, -}; -use tokio::{ - sync::mpsc::{self, error::SendError}, - task::JoinSet, -}; -use tokio_util::task::TaskTracker; - -pub fn extract_metadata(key_expr: &str) -> HashMap { - key_expr - .split('/') - .map(ToOwned::to_owned) - .tuples() - .collect() -} - -impl AgentClient { - #[expect( - clippy::let_underscore_must_use, - reason = "write! on a `String` cannot fail. https://rust-lang.github.io/rust-clippy/master/index.html#format_collect" - )] - pub(crate) fn make_key_expr( - &self, - is_subscriber: bool, - topic: &str, - mut metadata: BTreeMap<&str, String>, - ) -> String { - metadata.insert("group", self.group.clone()); - metadata.insert("topic", topic.to_owned()); - - let delimiter = if is_subscriber { - "**/".to_owned() - } else { - metadata.insert("host", self.host.clone()); - metadata.insert("timestamp", Utc::now().to_rfc3339()); - String::new() - }; - - metadata - .iter() - .fold(delimiter.clone(), |mut key_expr, (key, value)| { - let _ = write!(key_expr, "{key}/{value}/{delimiter}"); - key_expr - }) - .trim_end_matches('/') - .to_owned() - } - - pub(crate) async fn publish( - &self, - topic: &str, - metadata: BTreeMap<&str, String>, - payload: &T, - ) -> Result<()> - where - T: Serialize + Sync + ?Sized, - { - Ok(self - .session - .put( - self.make_key_expr(false, topic, metadata), - &serde_json::to_vec(payload)?, - ) - .await - .context(selector::AgentCommunicationFailure {})?) - } - /// Send a log message to the agent network. - /// - /// # Errors - /// - /// Will fail if there is an issue sending the message. - pub(crate) async fn log(&self, message: &str) -> Result<()> { - self.publish("log", BTreeMap::new(), message).await - } -} - -#[expect( - clippy::excessive_nesting, - clippy::let_underscore_must_use, - reason = "`result::Result<(), SendError<_>>` is the only uncaptured result since it would mean we can't transmit results over mpsc." -)] -pub async fn start_service< - RequestF, // function to run on requests - RequestI, // input to the function for requests - RequestR, // output to the function for requests - ResponseF, // function to run on completing a request i.e. response - ResponseI, // input to the function for responses - ResponseR, // output to the function for responses ->( - agent: Arc, - request_topic: &str, - request_metadata: BTreeMap<&'static str, String>, - namespace_lookup: HashMap, - request_task: RequestF, - response_task: ResponseF, -) -> Result<()> -where - RequestI: for<'serde> Deserialize<'serde> + Send + 'static, - RequestF: FnOnce( - Arc, - HashMap, - (DateTime, HashMap), - RequestI, - ) -> RequestR - + Clone - + Send - + 'static, - RequestR: Future> + Send + 'static, - ResponseI: Send + 'static, - ResponseF: Fn(Arc, ResponseI) -> ResponseR + Send + 'static, - ResponseR: Future> + Send + 'static, -{ - agent - .client - .log(&format!( - "Started `{request_topic}` service for {request_metadata:?}." - )) - .await?; - let (response_tx, mut response_rx) = mpsc::channel(100); - - let mut services = JoinSet::new(); - services.spawn({ - let inner_agent = Arc::clone(&agent); - let inner_request_topic = request_topic.to_owned(); - async move { - let tasks = TaskTracker::new(); - let subscriber = inner_agent - .client - .session - .declare_subscriber(inner_agent.client.make_key_expr( - true, - &inner_request_topic, - request_metadata, - )) - .await - .context(selector::AgentCommunicationFailure {})?; - loop { - let sample = subscriber - .recv_async() - .await - .context(selector::AgentCommunicationFailure {})?; - let input = serde_json::from_slice::(&sample.payload().to_bytes())?; - let inner_response_tx = response_tx.clone(); - let mut event_metadata = extract_metadata(sample.key_expr().as_str()); - let timestamp = - event_metadata - .remove("timestamp") - .context(selector::MissingInfo { - details: "timestamp", - })?; - let event_timestamp = - DateTime::::from(DateTime::parse_from_rfc3339(×tamp)?); - tasks.spawn({ - let inner_request_task = request_task.clone(); - let inner_inner_agent = Arc::clone(&inner_agent); - let inner_namespace_lookup = namespace_lookup.clone(); - async move { - inner_request_task( - inner_inner_agent, - inner_namespace_lookup, - (event_timestamp, event_metadata), - input, - ) - .then(move |response| async move { - let _: Result<(), SendError>> = - inner_response_tx.send(response).await; - Ok::<_, OrcaError>(()) - }) - .await - } - }); - } - } - }); - services.spawn(async move { - loop { - let response = response_rx.recv().await.context(selector::MissingInfo { - details: "channel empty or closed", - })?; - response_task(Arc::clone(&agent.client), response?).await?; - } - }); - - services.join_next().await.context(selector::MissingInfo { - details: "no available services", - })?? -} diff --git a/src/core/orchestrator/mod.rs b/src/core/orchestrator/mod.rs deleted file mode 100644 index 29a079eb..00000000 --- a/src/core/orchestrator/mod.rs +++ /dev/null @@ -1,29 +0,0 @@ -use crate::{ - core::util::get_type_name, - uniffi::{ - model::pod::PodJob, - orchestrator::{Orchestrator, PodRun}, - }, -}; -use std::sync::LazyLock; -use tokio::runtime::Runtime; - -#[expect( - clippy::expect_used, - reason = "Should be able to create Tokio runtime." -)] -pub static ASYNC_RUNTIME: LazyLock = - LazyLock::new(|| Runtime::new().expect("Unable to create Tokio runtime.")); - -impl PodRun { - pub(crate) fn new(pod_job: &PodJob, assigned_name: String) -> Self { - Self { - pod_job: pod_job.clone().into(), - orchestrator_source: get_type_name::(), - assigned_name, - } - } -} - -pub mod agent; -pub mod docker; diff --git a/src/core/store/filestore.rs b/src/core/store/filestore.rs deleted file mode 100644 index c21d2254..00000000 --- a/src/core/store/filestore.rs +++ /dev/null @@ -1,248 +0,0 @@ -use crate::{ - core::{model::ToYaml, store::MODEL_NAMESPACE, util::get_type_name}, - uniffi::{ - error::{OrcaError, Result, selector}, - model::{Annotation, pipeline::Pipeline}, - store::{ModelID, ModelInfo, filestore::LocalFileStore}, - }, -}; -use colored::Colorize as _; -use glob::glob; -use heck::ToSnakeCase as _; -use regex::Regex; -use serde::{Serialize, de::DeserializeOwned}; -use serde_yaml; -use snafu::OptionExt as _; -use std::{ - fmt, fs, - path::{Path, PathBuf}, - sync::LazyLock, -}; - -#[expect(clippy::expect_used, reason = "Valid static regex")] -static RE_MODEL_METADATA: LazyLock = LazyLock::new(|| { - Regex::new( - r"(?x) - ^ - (?.*?)/ - (?[a-z_]+)/ - (?[a-z_]+)/ - (?[0-9a-f]+)/ - ( - annotation/ - (?[0-9a-zA-Z\-]+) - - - (?[0-9]+\.[0-9]+\.[0-9]+) - \.yaml - | - spec\.yaml - ) - $ - ", - ) - .expect("Invalid model metadata regex.") -}); - -impl LocalFileStore { - /// Relative path where model specification is stored within the model directory. - pub const SPEC_RELPATH: &str = "spec.yaml"; - /// Relative path where model annotation is stored within the model directory. - pub fn make_annotation_relpath(name: &str, version: &str) -> PathBuf { - PathBuf::from(format!("annotation/{name}-{version}.yaml")) - } - /// Build the storage path with the model directory (`hash`) and a file's relative path. - pub fn make_path(&self, hash: &str, relpath: impl AsRef) -> PathBuf { - PathBuf::from(format!( - "{}/{}/{}/{}", - self.directory.to_string_lossy(), - MODEL_NAMESPACE, - get_type_name::().to_snake_case(), - hash - )) - .join(relpath) - } - - fn find_model_metadata(glob_pattern: &Path) -> Result> { - let paths = glob(&glob_pattern.to_string_lossy())?.filter_map(move |filepath| { - let filepath_string = String::from(filepath.ok()?.to_string_lossy()); - let group = RE_MODEL_METADATA.captures(&filepath_string)?; - Some(ModelInfo { - name: group.name("name").map(|name| name.as_str().to_owned()), - version: group - .name("version") - .map(|version| version.as_str().to_owned()), - hash: group["hash"].to_string(), - }) - }); - Ok(paths) - } - /// Find hash using name and version. - /// - /// # Errors - /// - /// Will return error if unable to find. - pub(crate) fn lookup_hash(&self, name: &str, version: &str) -> Result { - let model_info = Self::find_model_metadata( - &self.make_path::("*", Self::make_annotation_relpath(name, version)), - )? - .next() - .context(selector::MissingInfo { - details: format!( - "annotation where class = {}, name = {name}, version = {version}", - get_type_name::().to_snake_case() - ), - })?; - Ok(model_info.hash) - } - - pub(crate) fn save_file(file: impl AsRef, content: impl AsRef<[u8]>) -> Result<()> { - if let Some(parent) = file.as_ref().parent() { - fs::create_dir_all(parent)?; - } - fs::write(file, content)?; - Ok(()) - } - /// How any model is stored. - /// - /// # Errors - /// - /// Will return `Err` if there is an issue storing the model. - pub(crate) fn save_model( - &self, - model: &T, - hash: &str, - annotation: Option<&Annotation>, - ) -> Result<()> { - let class = get_type_name::().to_snake_case(); - // Save annotation if defined and doesn't collide globally i.e. model, name, version - if let Some(provided_annotation) = annotation { - let relpath = &Self::make_annotation_relpath( - &provided_annotation.name, - &provided_annotation.version, - ); - if let Some((found_hash, found_name, found_version)) = - Self::find_model_metadata(&self.make_path::("*", relpath))? - .next() - .and_then(|model_info| { - Some((model_info.hash, model_info.name?, model_info.version?)) - }) - { - println!( - "{}", - format!( - "Skip saving {} annotation since `{}`, `{}`, `{}` exists.", - class.bright_cyan(), - found_hash.bright_cyan(), - found_name.bright_cyan(), - found_version.bright_cyan(), - ) - .yellow(), - ); - } else { - Self::save_file( - self.make_path::(hash, relpath), - serde_yaml::to_string(provided_annotation)?, - )?; - } - } - // Save model specification and skip if it already exist e.g. on new annotations - let spec_file = &self.make_path::(hash, Self::SPEC_RELPATH); - if spec_file.exists() { - println!( - "{}", - format!( - "Skip saving {} model since `{}` exists.", - class.bright_cyan(), - hash.bright_cyan(), - ) - .yellow(), - ); - } else { - Self::save_file(spec_file, model.to_yaml()?)?; - } - Ok(()) - } - /// How to load any stored model into an instance. - /// - /// # Errors - /// - /// Will return `Err` if there is an issue loading the model from the store using `name` and - /// `version`. - pub(crate) fn load_model( - &self, - model_id: &ModelID, - ) -> Result<(T, Option, String)> { - let (hash, annotation) = self.decode_model_id::(model_id)?; - - Ok(( - serde_yaml::from_str(&fs::read_to_string( - self.make_path::(&hash, Self::SPEC_RELPATH), - )?)?, - annotation, - hash, - )) - } - - pub(crate) fn decode_model_id( - &self, - model_id: &ModelID, - ) -> Result<(String, Option)> { - match model_id { - ModelID::Hash(hash) => Ok((hash.to_owned(), None)), - ModelID::Annotation(name, version) => { - let hash = self.lookup_hash::(name, version)?; - let annotation_str = fs::read_to_string( - self.make_path::(&hash, Self::make_annotation_relpath(name, version)), - )?; - let annotation: Annotation = serde_yaml::from_str(&annotation_str)?; - Ok((hash, Some(annotation))) - } - } - } - /// How to query any stored models. - /// - /// # Errors - /// - /// Will return `Err` if there is an issue querying metadata from existing models in the store. - pub(crate) fn list_model(&self) -> Result> { - Ok(Self::find_model_metadata(&self.make_path::("**", "*"))?.collect()) - } - /// How to explicitly delete any stored model and all associated annotations (does not propagate). - /// - /// # Errors - /// - /// Will return `Err` if there is an issue deleting a model from the store using `name` and - /// `version`. - pub(crate) fn delete_model(&self, model_id: &ModelID) -> Result<()> { - // assumes propagate = false - let hash = match model_id { - ModelID::Hash(hash) => hash, - ModelID::Annotation(name, version) => &self.lookup_hash::(name, version)?, - }; - let spec_dir = self.make_path::(hash, ""); - fs::remove_dir_all(spec_dir)?; - - Ok(()) - } - - pub(crate) fn get_latest_pipeline_labels_file_name( - &self, - pipeline_hash: &str, - ) -> Result> { - let existing_labels_path = self.make_path::(pipeline_hash, "labels/"); - Ok(if existing_labels_path.exists() { - let mut label_file_names = fs::read_dir(&existing_labels_path)? - .map(|entry| Ok::<_, OrcaError>(entry?.file_name())) - .collect::, _>>()?; - - // Sort and get the latest one - label_file_names.sort(); - - label_file_names - .last() - .map(|os_str| os_str.to_string_lossy().to_string()) - } else { - None - }) - } -} diff --git a/src/core/store/mod.rs b/src/core/store/mod.rs deleted file mode 100644 index 544190f5..00000000 --- a/src/core/store/mod.rs +++ /dev/null @@ -1,4 +0,0 @@ -/// Namespace where models will be stored. -const MODEL_NAMESPACE: &str = "orcapod_model"; - -pub mod filestore; diff --git a/src/core/crypto.rs b/src/crypto.rs similarity index 94% rename from src/core/crypto.rs rename to src/crypto.rs index 4a109500..fc8f32ad 100644 --- a/src/core/crypto.rs +++ b/src/crypto.rs @@ -1,10 +1,3 @@ -use crate::{ - core::util::get, - uniffi::{ - error::{Result, selector}, - model::packet::{Blob, BlobKind}, - }, -}; use hex; use rand::{self, RngCore as _}; use serde_yaml; @@ -17,6 +10,12 @@ use std::{ io::Read, path::{Path, PathBuf}, }; + +use crate::{ + error::{Result, selector}, + model::packet::{Blob, BlobKind}, + util::get, +}; /// Evaluate checksum hash of streamed data i.e. chunked buffers. /// /// # Errors @@ -112,11 +111,12 @@ pub fn make_random_hash() -> String { #[cfg(test)] mod tests { #![expect(clippy::panic_in_result_fn, reason = "OK in tests.")] + use std::fs::read; + use crate::{ - core::crypto::{hash_buffer, hash_dir, hash_file}, - uniffi::error::Result, + crypto::{hash_buffer, hash_dir, hash_file}, + error::Result, }; - use std::fs::read; #[test] fn consistent_hash() -> Result<()> { diff --git a/src/uniffi/error.rs b/src/error.rs similarity index 58% rename from src/uniffi/error.rs rename to src/error.rs index 5cc2e5db..57b49c65 100644 --- a/src/uniffi/error.rs +++ b/src/error.rs @@ -5,14 +5,12 @@ use bollard::errors::Error as BollardError; use dot_parser::ast::PestError; -use glob; -use serde_json; -use serde_yaml; use snafu::prelude::Snafu; use std::{ - backtrace::Backtrace, + backtrace::{Backtrace, BacktraceStatus}, collections::HashSet, error::Error, + fmt::{self, Formatter}, io, path::{self, PathBuf}, result, @@ -219,3 +217,145 @@ impl OrcaError { } } } + +impl From for OrcaError { + fn from(error: BollardError) -> Self { + Self { + kind: Kind::BollardError { + source: error.into(), + backtrace: Some(Backtrace::capture()), + }, + } + } +} +impl From for OrcaError { + fn from(error: chrono::ParseError) -> Self { + Self { + kind: Kind::ChronoParseError { + source: error.into(), + backtrace: Some(Backtrace::capture()), + }, + } + } +} +impl From for OrcaError { + fn from(error: PestError) -> Self { + Self { + kind: Kind::DOTError { + source: error.into(), + backtrace: Some(Backtrace::capture()), + }, + } + } +} +impl From for OrcaError { + fn from(error: glob::PatternError) -> Self { + Self { + kind: Kind::GlobPatternError { + source: error.into(), + backtrace: Some(Backtrace::capture()), + }, + } + } +} +impl From for OrcaError { + fn from(error: io::Error) -> Self { + Self { + kind: Kind::IoError { + source: error.into(), + backtrace: Some(Backtrace::capture()), + }, + } + } +} +impl From for OrcaError { + fn from(error: path::StripPrefixError) -> Self { + Self { + kind: Kind::PathPrefixError { + source: error.into(), + backtrace: Some(Backtrace::capture()), + }, + } + } +} +impl From for OrcaError { + fn from(error: serde_json::Error) -> Self { + Self { + kind: Kind::SerdeJsonError { + source: error.into(), + backtrace: Some(Backtrace::capture()), + }, + } + } +} +impl From for OrcaError { + fn from(error: serde_yaml::Error) -> Self { + Self { + kind: Kind::SerdeYamlError { + source: error.into(), + backtrace: Some(Backtrace::capture()), + }, + } + } +} +impl From for OrcaError { + fn from(error: task::JoinError) -> Self { + Self { + kind: Kind::TokioTaskJoinError { + source: error.into(), + backtrace: Some(Backtrace::capture()), + }, + } + } +} +impl From for OrcaError { + fn from(error: Kind) -> Self { + Self { kind: error } + } +} +fn format_stack(backtrace: Option<&Backtrace>) -> String { + backtrace.map_or( + String::new(), + |unpacked_backtrace| match unpacked_backtrace.status() { + BacktraceStatus::Captured => { + format!("\nstack backtrace:\n{unpacked_backtrace}") + } + BacktraceStatus::Disabled | BacktraceStatus::Unsupported | _ => String::new(), + }, + ) +} +impl fmt::Debug for OrcaError { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match &self.kind { + Kind::AgentCommunicationFailure { backtrace, .. } + | Kind::EmptyDir { backtrace, .. } + | Kind::FailedToStartPod { backtrace, .. } + | Kind::FailedToExtractRunInfo { backtrace, .. } + | Kind::IncompletePacket { backtrace, .. } + | Kind::InvalidPath { backtrace, .. } + | Kind::InvalidIndex { backtrace, .. } + | Kind::InvalidInputSpecNodeNotInGraph { backtrace, .. } + | Kind::InvalidOutputSpecKeyNotInNode { backtrace, .. } + | Kind::InvalidOutputSpecNodeNotInGraph { backtrace, .. } + | Kind::KeyMissing { backtrace, .. } + | Kind::MissingInfo { backtrace, .. } + | Kind::FailedToGetLabelHashFromFileName { backtrace, .. } + | Kind::FailedToGetPodJobOutput { backtrace, .. } + | Kind::PipelineValidationErrorMissingKeys { backtrace, .. } + | Kind::PodJobProcessingError { backtrace, .. } + | Kind::PodJobSubmissionFailed { backtrace, .. } + | Kind::UnexpectedPathType { backtrace, .. } + | Kind::BollardError { backtrace, .. } + | Kind::ChronoParseError { backtrace, .. } + | Kind::DOTError { backtrace, .. } + | Kind::GlobPatternError { backtrace, .. } + | Kind::IoError { backtrace, .. } + | Kind::PathPrefixError { backtrace, .. } + | Kind::SerdeJsonError { backtrace, .. } + | Kind::SerdeYamlError { backtrace, .. } + | Kind::TokioTaskJoinError { backtrace, .. } => { + write!(f, "{}{}", self.kind, format_stack(backtrace.as_ref())) + } + } + } +} diff --git a/src/core/graph.rs b/src/graph.rs similarity index 91% rename from src/core/graph.rs rename to src/graph.rs index 4dd9f643..45f2f170 100644 --- a/src/core/graph.rs +++ b/src/graph.rs @@ -1,7 +1,3 @@ -use crate::{ - core::{model::pipeline::PipelineNode, util::get}, - uniffi::{error::Result, model::pipeline::Kernel}, -}; use dot_parser::ast::Graph as DOTGraph; use petgraph::{ dot::dot_parser::{DotAttrList, DotNodeWeight, ParseFromDot as _}, @@ -9,6 +5,12 @@ use petgraph::{ }; use std::collections::HashMap; +use crate::{ + error::Result, + model::pipeline::{Kernel, PipelineNode}, + util::get, +}; + #[expect( clippy::panic_in_result_fn, clippy::panic, diff --git a/src/lib.rs b/src/lib.rs index 99d625b1..7a9ae115 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,8 +6,6 @@ uniffi_external::setup_scaffolding!(); #[cfg(all(feature = "default", feature = "test"))] compile_error!(r#"Feature "default" and feature "test" cannot be enabled at the same time."#); -/// Pure Rust source. -pub mod core; /// Exposed CFFI client based on [uniffi](https://crates.io/crates/uniffi). /// /// `uniffi` brings a lot of convenience in creating a CFFI but we also must meet several @@ -34,4 +32,20 @@ pub mod core; /// 1. (Rust limitation) No associated functions in traits e.g. class methods in Python /// 1. Hint: Enum variants with named fields offer a better UX (e.g. in Python) as opposed to /// unnamed enum fields i.e. will show up in help. -pub mod uniffi; +pub mod error; +pub(crate) mod graph; +/// Store backends for model persistence +pub mod store; +pub(crate) mod util; +pub(crate) mod validation; + +pub(crate) mod crypto; +/// Model definition for orcapod +pub mod model; +/// Operators for data transformation in a pipeline +pub mod operator; +/// For pod execution orchestration +pub mod orchestrator; + +/// Pipeline runner module +pub mod pipeline_runner; diff --git a/src/core/model/mod.rs b/src/model/mod.rs similarity index 60% rename from src/core/model/mod.rs rename to src/model/mod.rs index 94fa83d2..4e23ade3 100644 --- a/src/core/model/mod.rs +++ b/src/model/mod.rs @@ -1,7 +1,6 @@ -use crate::{core::util::get_type_name, uniffi::error::Result}; use heck::ToSnakeCase as _; use indexmap::IndexMap; -use serde::{Serialize, Serializer}; +use serde::{Deserialize, Serialize, Serializer}; use serde_yaml::{self, Value}; use std::{ collections::{BTreeMap, HashMap}, @@ -10,8 +9,40 @@ use std::{ result, }; +use std::path::PathBuf; + +use crate::{error::Result, util::get_type_name}; + +/// Available models. +#[derive(uniffi::Enum, Debug)] +pub enum ModelType { + /// See [`Pod`](crate::uniffi::model::pod::Pod). + Pod, + /// See [`PodJob`](crate::uniffi::model::pod::PodJob). + PodJob, + /// See [`PodResult`](crate::uniffi::model::pod::PodResult). + PodResult, +} + +/// Standard metadata structure for all model instances. +#[derive(uniffi::Record, Serialize, Deserialize, Debug, PartialEq, Eq, Clone)] +pub struct Annotation { + /// A unique name. + pub name: String, + /// A unique semantic version. + pub version: String, + /// A long form description. + pub description: String, +} + +uniffi::custom_type!(PathBuf, String, { + remote, + try_lift: |val| Ok(PathBuf::from(&val)), + lower: |obj| obj.display().to_string(), +}); + /// Trait to handle serialization to yaml for `OrcaPod` models -pub trait ToYaml: Serialize + Sized + Debug { +pub(crate) trait ToYaml: Serialize + Sized + Debug { /// Serializes the instance to a YAML string. /// # Errors /// Will return `Err` if it fail to serialize instance to string @@ -38,7 +69,7 @@ pub trait ToYaml: Serialize + Sized + Debug { fn process_field(field_name: &str, field_value: &Value) -> Option<(String, Value)>; } -pub fn serialize_hashmap( +pub(crate) fn serialize_hashmap( map: &HashMap, serializer: S, ) -> result::Result @@ -50,7 +81,7 @@ where } #[allow(clippy::ref_option, reason = "Serde requires this signature.")] -pub fn serialize_hashmap_option( +pub(crate) fn serialize_hashmap_option( map_option: &Option>, serializer: S, ) -> result::Result @@ -63,5 +94,9 @@ where sorted.serialize(serializer) } +/// Utility types for describing packets. +pub mod packet; +/// Models and utility types for pipelines. pub mod pipeline; +/// Models and utility types for pods. pub mod pod; diff --git a/src/uniffi/model/packet.rs b/src/model/packet.rs similarity index 97% rename from src/uniffi/model/packet.rs rename to src/model/packet.rs index c1f9c013..1f726f5a 100644 --- a/src/uniffi/model/packet.rs +++ b/src/model/packet.rs @@ -2,8 +2,7 @@ use serde::{Deserialize, Serialize}; use std::{collections::HashMap, path::PathBuf}; use uniffi; -use crate::core::util::get; -use crate::uniffi::error::Result; +use crate::{error::Result, util::get}; /// Path sets are named and represent an abstraction for the file(s) that represent some particular /// data within a compute environment. diff --git a/src/core/model/pipeline.rs b/src/model/pipeline.rs similarity index 64% rename from src/core/model/pipeline.rs rename to src/model/pipeline.rs index 4532e2c8..1daaec14 100644 --- a/src/core/model/pipeline.rs +++ b/src/model/pipeline.rs @@ -1,30 +1,325 @@ use std::{ backtrace::Backtrace, collections::{BTreeMap, BTreeSet, HashMap, HashSet}, + hash::{Hash, Hasher}, + path::PathBuf, result, + sync::{Arc, LazyLock}, }; -use crate::{ - core::{crypto::hash_buffer, model::ToYaml}, - uniffi::{ - error::{Kind, OrcaError, Result, selector}, - model::{ - packet::PathSet, - pipeline::{Kernel, NodeURI, Pipeline, PipelineJob}, - }, - }, -}; +use derive_more::Display; +use getset::CloneGetters; use itertools::Itertools as _; use petgraph::{ Direction::Incoming, - graph::{self, NodeIndex}, + graph::{self, DiGraph, NodeIndex}, }; use serde::{Deserialize, Serialize, ser::SerializeStruct as _}; use snafu::OptionExt as _; +use crate::{ + crypto::{hash_blob, hash_buffer, make_random_hash}, + error::{Kind, OrcaError, Result, selector}, + graph::make_graph, + model::{ + Annotation, ToYaml, + packet::{PathSet, URI}, + pod::Pod, + }, + operator::MapOperator, + validation::validate_packet, +}; + +pub(crate) static JOIN_OPERATOR_HASH: LazyLock = + LazyLock::new(|| hash_buffer(b"join_operator")); + +/// Computational dependencies as a [DAG](https://en.wikipedia.org/wiki/Directed_acyclic_graph). +#[derive(uniffi::Object, Debug, Display, CloneGetters, Clone, Deserialize, Default)] +#[getset(get_clone, impl_attrs = "#[uniffi::export]")] +#[display("{self:#?}")] +#[uniffi::export(Display)] +pub struct Pipeline { + /// Hash for pipeline + #[serde(default)] + pub hash: String, + /// Annotations for the pipeline. + #[serde(default)] + pub annotation: Option, + /// Computational DAG in-memory. + #[getset(skip)] + #[serde(skip_deserializing)] + /// Petgraph graph structure representing the pipeline + pub graph: DiGraph, + /// Exposed, internal input specification. Each input may be fed into more than one node/key if desired. + pub input_spec: HashMap>, + /// Exposed, internal output specification. Each output is associated with only one node/key. + pub output_spec: HashMap, +} + +#[uniffi::export] +impl Pipeline { + /// Construct a new pipeline instance. + /// + /// # Errors + /// + /// Will return `Err` if there is an issue initializing a `Pipeline` instance. + #[uniffi::constructor] + pub fn new( + graph_dot: &str, + metadata: &HashMap, + mut input_spec: HashMap>, + mut output_spec: HashMap, + annotation: Option, + ) -> Result { + // Note this gives us the graph, but the nodes do not have their hashes computed yet. + let mut graph = make_graph(graph_dot, metadata)?; + + // Run preprocessing to compute the hash for each node + for node_idx in graph.node_indices() { + Self::compute_hash_for_node_and_parents(node_idx, &input_spec, &mut graph); + } + + // Build LUT for node_label -> node_hash + let label_to_hash_lut = + graph + .node_indices() + .fold(HashMap::<&String, &String>::new(), |mut acc, node_idx| { + let node = &graph[node_idx]; + acc.insert(&node.label, &node.hash); + acc + }); + + // Build the new input_spec to refer to the hash instead of label + input_spec.iter_mut().try_for_each(|(_, node_uris)| { + node_uris.iter_mut().try_for_each(|node_uri| { + node_uri.node_id = (*label_to_hash_lut.get(&node_uri.node_id).context( + selector::InvalidInputSpecNodeNotInGraph { + node_name: node_uri.node_id.clone(), + }, + )?) + .clone(); + Ok::<(), OrcaError>(()) + }) + })?; + + // Update the output_spec to refer to the hash instead of label + output_spec.iter_mut().try_for_each(|(_, node_uri)| { + node_uri.node_id = (*label_to_hash_lut.get(&node_uri.node_id).context( + selector::InvalidOutputSpecNodeNotInGraph { + node_name: node_uri.node_id.clone(), + }, + )?) + .clone(); + + Ok::<(), OrcaError>(()) + })?; + + let pipeline_no_hash = Self { + hash: String::new(), + graph, + input_spec, + output_spec, + annotation, + }; + + // Run verification on the pipeline first before computing hash + pipeline_no_hash.validate()?; + + Ok(Self { + hash: hash_buffer(pipeline_no_hash.to_yaml()?.as_bytes()), + ..pipeline_no_hash + }) + } +} + +impl PartialEq for Pipeline { + fn eq(&self, other: &Self) -> bool { + self.hash == other.hash + && self.annotation == other.annotation + && self.input_spec.keys().collect::>() + == other.input_spec.keys().collect::>() + && self.input_spec.values().collect::>() + == other.input_spec.values().collect::>() + && self.output_spec == other.output_spec + } +} + +/// A compute pipeline job that supplies input/output targets. +#[expect( + clippy::field_scoped_visibility_modifiers, + reason = "Temporary until a proper hash is implemented." +)] +#[derive(uniffi::Object, Debug, Display, CloneGetters, Deserialize, Serialize, Clone)] +#[getset(get_clone, impl_attrs = "#[uniffi::export]")] +#[display("{self:#?}")] +#[uniffi::export(Display)] +pub struct PipelineJob { + /// todo: replace with a consistent hash + #[getset(skip)] + pub(crate) hash: String, + /// A pipeline to base the pipeline job on. + pub pipeline: Arc, + /// Attached, external input packet. Applies cartesian product by default on keys pointing to the same node. + pub input_packet: HashMap>, + /// Attached, external output directory. + pub output_dir: URI, +} + +#[uniffi::export] +impl PipelineJob { + /// Construct a new pipeline job instance. + /// + /// # Errors + /// + /// Will return `Err` if there is an issue initializing a `PipelineJob` instance. + #[uniffi::constructor] + pub fn new( + pipeline: Arc, + input_packet: &HashMap>, + output_dir: URI, + namespace_lookup: &HashMap, + ) -> Result { + validate_packet("input".into(), &pipeline.input_spec, input_packet)?; + let input_packet_with_checksum = input_packet + .iter() + .map(|(path_set_key, path_sets)| { + Ok(( + path_set_key.clone(), + path_sets + .iter() + .map(|path_set| { + Ok(match path_set { + PathSet::Unary(blob) => { + PathSet::Unary(hash_blob(namespace_lookup, blob)?) + } + PathSet::Collection(blobs) => PathSet::Collection( + blobs + .iter() + .map(|blob| hash_blob(namespace_lookup, blob)) + .collect::>()?, + ), + }) + }) + .collect::>()?, + )) + }) + .collect::>()?; + + Ok(Self { + hash: make_random_hash(), + pipeline, + input_packet: input_packet_with_checksum, + output_dir, + }) + } +} + +/// Struct to hold the result of a pipeline execution. +#[derive(uniffi::Object, Debug, Clone, Deserialize, Serialize, Display, CloneGetters)] +#[getset(get_clone, impl_attrs = "#[uniffi::export]")] +#[display("{self:#?}")] +#[uniffi::export(Display)] +pub struct PipelineResult { + /// The pipeline job that was executed. + pub pipeline_job: Arc, + /// The result of the pipeline execution. + pub output_packets: HashMap>, + /// Logs of any failures that occurred during the pipeline execution. + pub failure_logs: Vec, + /// The status of the pipeline execution. + pub status: PipelineStatus, +} + +/// The status of a pipeline execution. +#[derive(uniffi::Enum, Debug, Clone, Deserialize, Serialize, PartialEq, Eq)] +pub enum PipelineStatus { + /// The pipeline is currently running. + Running, + /// The pipeline has completed successfully. + Succeeded, + /// The pipeline has failed. + Failed, + /// The pipeline has partially succeeded. There should be some failure logs + PartiallySucceeded, +} +/// A node in a computational pipeline. +#[derive(uniffi::Enum, Debug, Clone, Deserialize, Serialize)] +pub enum Kernel { + /// Pod reference. + Pod { + /// See [`Pod`](crate::uniffi::model::pod::Pod). + pod: Arc, + }, + /// Cartesian product operation. See [`JoinOperator`](crate::core::operator::JoinOperator). + JoinOperator, + /// Rename a path set key operation. + MapOperator { + /// See [`MapOperator`](crate::core::operator::MapOperator). + mapper: Arc, + }, +} + +impl From for Kernel { + fn from(mapper: MapOperator) -> Self { + Self::MapOperator { + mapper: Arc::new(mapper), + } + } +} + +impl From for Kernel { + fn from(pod: Pod) -> Self { + Self::Pod { pod: Arc::new(pod) } + } +} + +impl From> for Kernel { + fn from(pod: Arc) -> Self { + Self::Pod { pod } + } +} + +impl Kernel { + /// Get a unique hash that represents the kernel. + /// The exception here is the `JoinOperator` doesn't have any pre execution configuration, since it's logic is completely dependent on what is fed to it during execution. + pub fn get_hash(&self) -> &str { + match self { + Self::Pod { pod } => &pod.hash, + Self::JoinOperator => &JOIN_OPERATOR_HASH, + Self::MapOperator { mapper } => &mapper.hash, + } + } +} + +impl PartialEq for Kernel { + fn eq(&self, other: &Self) -> bool { + self.get_hash() == other.get_hash() + } +} + +impl Eq for Kernel {} + +impl Hash for Kernel { + fn hash(&self, state: &mut H) { + self.get_hash().hash(state); + } +} + +/// Index from pipeline node into pod specification. +#[derive( + uniffi::Record, Debug, Clone, Deserialize, Serialize, PartialEq, Eq, Hash, PartialOrd, Ord, +)] +pub struct NodeURI { + /// Node reference name in pipeline. + pub node_id: String, + /// Specification key. + pub key: String, +} + +/// A node in the computation pipeline that stores its hash, kernel, and user provided label. #[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)] pub struct PipelineNode { - // Hash that represent the node + /// Hash that represent the node pub hash: String, /// Kernel associated with the node pub kernel: Kernel, @@ -417,21 +712,16 @@ impl PipelineJob { #[cfg(test)] mod tests { - use crate::{ - core::model::ToYaml as _, - uniffi::{ - error::Result, - model::{ - Annotation, - pipeline::{NodeURI, Pipeline}, - }, - operator::MapOperator, - }, - }; use indoc::indoc; use pretty_assertions::assert_eq; use std::collections::HashMap; + use crate::error::Result; + use crate::model::ToYaml as _; + use crate::model::pipeline::NodeURI; + use crate::model::{Annotation, pipeline::Pipeline}; + use crate::operator::MapOperator; + #[test] fn to_yaml() -> Result<()> { let pipeline = Pipeline::new( diff --git a/src/uniffi/model/pod.rs b/src/model/pod.rs similarity index 57% rename from src/uniffi/model/pod.rs rename to src/model/pod.rs index 13ae9f1e..d07dd0f3 100644 --- a/src/uniffi/model/pod.rs +++ b/src/model/pod.rs @@ -1,28 +1,21 @@ +use std::{backtrace::Backtrace, collections::HashMap, path::PathBuf, result, sync::Arc}; + +use derive_more::Display; +use getset::CloneGetters; +use serde::{Deserialize, Deserializer, Serialize}; +use serde_yaml::Value; + use crate::{ - core::{ - crypto::{hash_blob, hash_buffer}, - model::{ - ToYaml, - pod::{deserialize_pod, deserialize_pod_job}, - serialize_hashmap, serialize_hashmap_option, - }, - util::get, - validation::validate_packet, - }, - uniffi::{ - error::{Kind, OrcaError, Result}, - model::{ - Annotation, - packet::{Blob, BlobKind, Packet, PathInfo, PathSet, URI}, - }, - orchestrator::PodStatus, + crypto::{hash_blob, hash_buffer}, + error::{Kind, OrcaError, Result}, + model::{ + Annotation, ToYaml, + packet::{Blob, BlobKind, Packet, PathInfo, PathSet, URI}, + serialize_hashmap, serialize_hashmap_option, }, + util::get, + validation::validate_packet, }; -use derive_more::Display; -use getset::CloneGetters; -use serde::{Deserialize, Serialize}; -use std::{backtrace::Backtrace, collections::HashMap, path::PathBuf, sync::Arc}; -use uniffi; /// A reusable, containerized computational unit. #[derive( @@ -369,3 +362,280 @@ impl ToYaml for PodResult { } } } + +#[expect(clippy::expect_used, reason = "Serde requires this signature.")] +fn deserialize_pod<'de, D>(deserializer: D) -> result::Result, D::Error> +where + D: Deserializer<'de>, +{ + let value = Value::deserialize(deserializer)?; + (value).as_str().map_or_else( + || { + Ok(serde_yaml::from_value(value.clone()) + .expect("Failed to convert from serde value to specific type.")) + }, + |hash| { + Ok({ + Pod { + hash: hash.to_owned(), + ..Pod::default() + } + .into() + }) + }, + ) +} + +#[expect(clippy::expect_used, reason = "Serde requires this signature.")] +fn deserialize_pod_job<'de, D>(deserializer: D) -> result::Result, D::Error> +where + D: Deserializer<'de>, +{ + let value = Value::deserialize(deserializer)?; + (value).as_str().map_or_else( + || { + Ok(serde_yaml::from_value(value.clone()) + .expect("Failed to convert from serde value to specific type.")) + }, + |hash| { + Ok({ + PodJob { + hash: hash.to_owned(), + ..PodJob::default() + } + .into() + }) + }, + ) +} + +/// Status of a particular compute run. +#[derive(uniffi::Enum, Serialize, Deserialize, Debug, PartialEq, Eq, Clone, Default)] +pub enum PodStatus { + /// Run is ongoing. + Running, + /// Run has completed successfully. + Completed, + /// Run failed with the provided error code. + Failed(i16), + /// For other container states that are not listed. + Undefined, + /// No status set. + #[default] + Unset, +} + +#[cfg(test)] +mod tests { + #![expect(clippy::unwrap_used, reason = "OK in tests.")] + use std::{ + collections::HashMap, + path::PathBuf, + sync::{Arc, LazyLock}, + }; + + use indoc::indoc; + + use crate::{ + error::Result, + model::{ + Annotation, ToYaml as _, + packet::{Blob, BlobKind, PathInfo, PathSet, URI}, + pod::{Pod, PodJob, PodResult, PodStatus, RecommendSpecs}, + }, + }; + + use pretty_assertions::assert_eq; + + static TEST_FILE_NAMESPACE_LOOKUP: LazyLock> = LazyLock::new(|| { + HashMap::from([ + ("input".into(), PathBuf::from("tests/extra/data/input_txt")), + ("output".into(), PathBuf::from("tests/extra/data/output")), + ]) + }); + + fn basic_pod() -> Result { + Pod::new( + Some(Annotation { + name: "test".into(), + version: "0.1".into(), + description: "Basic pod for testing hashing and yaml serialization".into(), + }), + "alpine:3.14".into(), + vec!["cp", "/input/input.txt", "/output/output.txt"] + .into_iter() + .map(String::from) + .collect(), + HashMap::from([( + "input_txt".into(), + PathInfo { + path: "/input/input.txt".into(), + match_pattern: r".*\.txt".into(), + }, + )]), + "/output".into(), + HashMap::from([( + "output_txt".into(), + PathInfo { + path: "output.txt".into(), + match_pattern: r".*\.txt".into(), + }, + )]), + RecommendSpecs { + cpus: 0.20, + memory: 128 << 20, + }, + None, + ) + } + + fn basic_pod_job() -> Result { + let pod = Arc::new(basic_pod()?); + PodJob::new( + Some(Annotation { + name: "test_job".into(), + version: "0.1".into(), + description: "Basic pod job for testing hashing and yaml serialization".into(), + }), + Arc::clone(&pod), + HashMap::from([( + "input_txt".into(), + PathSet::Unary(Blob::new( + BlobKind::File, + URI { + namespace: "input".into(), + path: "cat.txt".into(), + }, + )), + )]), + URI { + namespace: "output".into(), + path: "".into(), + }, + pod.recommend_specs.cpus, + pod.recommend_specs.memory, + Some(HashMap::from([("FAKE_ENV".into(), "FakeValue".into())])), + &TEST_FILE_NAMESPACE_LOOKUP, + ) + } + + fn basic_pod_result() -> Result { + PodResult::new( + Some(Annotation { + name: "test".into(), + version: "0.1".into(), + description: "Basic Result for testing hashing and yaml serialization".into(), + }), + basic_pod_job()?.into(), + "randomly_assigned_name".into(), + PodStatus::Completed, + 1_737_922_307, + 1_737_925_907, + &TEST_FILE_NAMESPACE_LOOKUP, + "example_logs".to_owned(), + ) + } + + #[test] + fn pod_hash() { + assert_eq!( + basic_pod().unwrap().hash, + "b5574e2efdf26361e8e8e886389a250cfbfcceed08b29325a78fd738cbb2a1b8", + "Hash didn't match." + ); + } + + #[test] + fn pod_to_yaml() { + assert_eq!( + basic_pod().unwrap().to_yaml().unwrap(), + indoc! {r" + class: pod + image: alpine:3.14 + command: + - cp + - /input/input.txt + - /output/output.txt + input_spec: + input_txt: + path: /input/input.txt + match_pattern: .*\.txt + output_dir: /output + output_spec: + output_txt: + path: output.txt + match_pattern: .*\.txt + gpu_requirements: null + "}, + "YAML serialization didn't match." + ); + } + + #[test] + fn pod_job_hash() { + assert_eq!( + basic_pod_job().unwrap().hash, + "80348a4ef866a9dfc1a5d0a48467a6592ef2ed9e8de67930d64afefbb395f1c6", + "Hash didn't match." + ); + } + + #[test] + fn pod_job_to_yaml() { + assert_eq!( + basic_pod_job().unwrap().to_yaml().unwrap(), + indoc! {" + class: pod_job + pod: b5574e2efdf26361e8e8e886389a250cfbfcceed08b29325a78fd738cbb2a1b8 + input_packet: + input_txt: + kind: File + location: + namespace: input + path: cat.txt + checksum: 175cc6f362b2f75acd08a373e000144fdb8d14a833d4b70fd743f16a7039103f + output_dir: + namespace: output + path: '' + cpu_limit: 0.2 + memory_limit: 134217728 + env_vars: + FAKE_ENV: FakeValue + "}, + "YAML serialization didn't match." + ); + } + + #[test] + fn pod_result_hash() { + assert_eq!( + basic_pod_result().unwrap().hash, + "92809a4ce13b4fe8c8dcdcf2b48dd14a9dd885593fe3ab5d9809d27bc9a16354", + "Hash didn't match." + ); + } + + #[test] + fn pod_result_to_yaml() { + assert_eq!( + basic_pod_result().unwrap().to_yaml().unwrap(), + indoc! {" + class: pod_result + pod_job: 80348a4ef866a9dfc1a5d0a48467a6592ef2ed9e8de67930d64afefbb395f1c6 + output_packet: + output_txt: + kind: File + location: + namespace: output + path: output.txt + checksum: 175cc6f362b2f75acd08a373e000144fdb8d14a833d4b70fd743f16a7039103f + assigned_name: randomly_assigned_name + status: Completed + created: 1737922307 + terminated: 1737925907 + logs: example_logs + "}, + "YAML serialization didn't match." + ); + } +} diff --git a/src/core/operator.rs b/src/operator.rs similarity index 87% rename from src/core/operator.rs rename to src/operator.rs index 55f72bed..843351a6 100644 --- a/src/core/operator.rs +++ b/src/operator.rs @@ -1,23 +1,62 @@ -use crate::{ - core::model::ToYaml, - uniffi::{error::Result, model::packet::Packet, operator::MapOperator}, -}; -use async_trait; +use std::{collections::HashMap, sync::Arc}; + use itertools::Itertools as _; -use std::{clone::Clone, collections::HashMap, iter::IntoIterator, sync::Arc}; +use serde::{Deserialize, Serialize}; use tokio::sync::Mutex; +use crate::{ + crypto::hash_buffer, + error::Result, + model::{ToYaml, packet::Packet, serialize_hashmap}, +}; + +/// Trait that all operators must implement for it to work in the pipeline #[async_trait::async_trait] pub trait Operator { + /// Method where the operator get pass a packet for processing one at a time async fn process_packet(&self, stream_name: String, packet: Packet) -> Result>; } +/// Operator class that map `input_keys` to `output_key`, effectively renaming it +/// For use in pipelines +#[derive(uniffi::Object, Debug, Clone, Deserialize, Serialize, PartialEq, Eq, Default)] +pub struct MapOperator { + /// Unique hash of the map operator + #[serde(skip)] + pub hash: String, + /// Mapping of input keys to output keys + #[serde(serialize_with = "serialize_hashmap")] + pub map: HashMap, +} + +#[uniffi::export] +impl MapOperator { + #[uniffi::constructor] + /// Create a new `MapOperator` + /// + /// # Errors + /// Will error if there are issues converting the map to yaml for hashing + pub fn new(map: HashMap) -> Result { + let no_hash = Self { + map, + hash: String::new(), + }; + + Ok(Self { + hash: hash_buffer(no_hash.to_yaml()?), + ..no_hash + }) + } +} + +/// Operator class that join packets from multiple parent streams into one packet pub struct JoinOperator { parent_count: usize, received_packets: Arc>>>, } impl JoinOperator { + /// Create a new `JoinOperator` pub fn new(parent_count: usize) -> Self { Self { parent_count, @@ -100,11 +139,9 @@ mod tests { #![expect(clippy::panic_in_result_fn, reason = "OK in tests.")] use crate::{ - core::operator::{JoinOperator, MapOperator, Operator}, - uniffi::{ - error::Result, - model::packet::{Blob, BlobKind, Packet, PathSet, URI}, - }, + error::Result, + model::packet::{Blob, BlobKind, Packet, PathSet, URI}, + operator::{JoinOperator, MapOperator, Operator}, }; use std::{collections::HashMap, path::PathBuf}; diff --git a/src/uniffi/orchestrator/agent.rs b/src/orchestrator/agent.rs similarity index 52% rename from src/uniffi/orchestrator/agent.rs rename to src/orchestrator/agent.rs index 1d1df2e7..106e57ae 100644 --- a/src/uniffi/orchestrator/agent.rs +++ b/src/orchestrator/agent.rs @@ -1,25 +1,32 @@ use crate::{ - core::orchestrator::agent::start_service, - uniffi::{ - error::{OrcaError, Result, selector}, - model::pod::PodJob, - orchestrator::{Orchestrator, PodStatus, docker::LocalDockerOrchestrator}, - store::{Store as _, filestore::LocalFileStore}, - }, + error::{OrcaError, Result, selector}, + model::pod::{PodJob, PodStatus}, + orchestrator::{Orchestrator, docker::LocalDockerOrchestrator}, + store::{Store as _, filestore::LocalFileStore}, }; +use chrono::{DateTime, Utc}; use colored::Colorize as _; use derive_more::Display; use futures_executor::block_on; -use futures_util::future::join_all; +use futures_util::future::{FutureExt as _, join_all}; use getset::CloneGetters; +use itertools::Itertools as _; +use serde::{Deserialize, Serialize}; use serde_json::Value; use snafu::{OptionExt as _, ResultExt as _}; use std::{ + borrow::ToOwned, collections::{BTreeMap, HashMap}, + fmt::Write as _, + hash::RandomState, path::PathBuf, sync::Arc, }; -use tokio::task::JoinSet; +use tokio::{ + sync::mpsc::{self, error::SendError}, + task::JoinSet, +}; +use tokio_util::task::TaskTracker; use uniffi; use zenoh; @@ -221,3 +228,183 @@ impl Agent { })?? } } + +pub(crate) fn extract_metadata(key_expr: &str) -> HashMap { + key_expr + .split('/') + .map(ToOwned::to_owned) + .tuples() + .collect() +} + +impl AgentClient { + #[expect( + clippy::let_underscore_must_use, + reason = "write! on a `String` cannot fail. https://rust-lang.github.io/rust-clippy/master/index.html#format_collect" + )] + pub(crate) fn make_key_expr( + &self, + is_subscriber: bool, + topic: &str, + mut metadata: BTreeMap<&str, String>, + ) -> String { + metadata.insert("group", self.group.clone()); + metadata.insert("topic", topic.to_owned()); + + let delimiter = if is_subscriber { + "**/".to_owned() + } else { + metadata.insert("host", self.host.clone()); + metadata.insert("timestamp", Utc::now().to_rfc3339()); + String::new() + }; + + metadata + .iter() + .fold(delimiter.clone(), |mut key_expr, (key, value)| { + let _ = write!(key_expr, "{key}/{value}/{delimiter}"); + key_expr + }) + .trim_end_matches('/') + .to_owned() + } + + pub(crate) async fn publish( + &self, + topic: &str, + metadata: BTreeMap<&str, String>, + payload: &T, + ) -> Result<()> + where + T: Serialize + Sync + ?Sized, + { + Ok(self + .session + .put( + self.make_key_expr(false, topic, metadata), + &serde_json::to_vec(payload)?, + ) + .await + .context(selector::AgentCommunicationFailure {})?) + } + /// Send a log message to the agent network. + /// + /// # Errors + /// + /// Will fail if there is an issue sending the message. + pub(crate) async fn log(&self, message: &str) -> Result<()> { + self.publish("log", BTreeMap::new(), message).await + } +} + +#[expect( + clippy::excessive_nesting, + clippy::let_underscore_must_use, + reason = "`result::Result<(), SendError<_>>` is the only uncaptured result since it would mean we can't transmit results over mpsc." +)] +async fn start_service< + RequestF, // function to run on requests + RequestI, // input to the function for requests + RequestR, // output to the function for requests + ResponseF, // function to run on completing a request i.e. response + ResponseI, // input to the function for responses + ResponseR, // output to the function for responses +>( + agent: Arc, + request_topic: &str, + request_metadata: BTreeMap<&'static str, String>, + namespace_lookup: HashMap, + request_task: RequestF, + response_task: ResponseF, +) -> Result<()> +where + RequestI: for<'serde> Deserialize<'serde> + Send + 'static, + RequestF: FnOnce( + Arc, + HashMap, + (DateTime, HashMap), + RequestI, + ) -> RequestR + + Clone + + Send + + 'static, + RequestR: Future> + Send + 'static, + ResponseI: Send + 'static, + ResponseF: Fn(Arc, ResponseI) -> ResponseR + Send + 'static, + ResponseR: Future> + Send + 'static, +{ + agent + .client + .log(&format!( + "Started `{request_topic}` service for {request_metadata:?}." + )) + .await?; + let (response_tx, mut response_rx) = mpsc::channel(100); + + let mut services = JoinSet::new(); + services.spawn({ + let inner_agent = Arc::clone(&agent); + let inner_request_topic = request_topic.to_owned(); + async move { + let tasks = TaskTracker::new(); + let subscriber = inner_agent + .client + .session + .declare_subscriber(inner_agent.client.make_key_expr( + true, + &inner_request_topic, + request_metadata, + )) + .await + .context(selector::AgentCommunicationFailure {})?; + loop { + let sample = subscriber + .recv_async() + .await + .context(selector::AgentCommunicationFailure {})?; + let input = serde_json::from_slice::(&sample.payload().to_bytes())?; + let inner_response_tx = response_tx.clone(); + let mut event_metadata = extract_metadata(sample.key_expr().as_str()); + let timestamp = + event_metadata + .remove("timestamp") + .context(selector::MissingInfo { + details: "timestamp", + })?; + let event_timestamp = + DateTime::::from(DateTime::parse_from_rfc3339(×tamp)?); + tasks.spawn({ + let inner_request_task = request_task.clone(); + let inner_inner_agent = Arc::clone(&inner_agent); + let inner_namespace_lookup = namespace_lookup.clone(); + async move { + inner_request_task( + inner_inner_agent, + inner_namespace_lookup, + (event_timestamp, event_metadata), + input, + ) + .then(move |response| async move { + let _: Result<(), SendError>> = + inner_response_tx.send(response).await; + Ok::<_, OrcaError>(()) + }) + .await + } + }); + } + } + }); + services.spawn(async move { + loop { + let response = response_rx.recv().await.context(selector::MissingInfo { + details: "channel empty or closed", + })?; + response_task(Arc::clone(&agent.client), response?).await?; + } + }); + + services.join_next().await.context(selector::MissingInfo { + details: "no available services", + })?? +} diff --git a/src/core/orchestrator/docker.rs b/src/orchestrator/docker.rs similarity index 50% rename from src/core/orchestrator/docker.rs rename to src/orchestrator/docker.rs index 3ee50c3d..e3b0a1bb 100644 --- a/src/core/orchestrator/docker.rs +++ b/src/orchestrator/docker.rs @@ -1,30 +1,49 @@ use crate::{ - core::util::get, - uniffi::{ - error::{Result, selector}, - model::{packet::PathSet, pod::PodJob}, - orchestrator::{PodRunInfo, PodStatus, docker::LocalDockerOrchestrator}, + error::{Kind, OrcaError, Result, selector}, + model::{ + packet::PathSet, + pod::{PodJob, PodResult, PodStatus}, }, + orchestrator::{ASYNC_RUNTIME, ImageKind, Orchestrator, PodRun, PodRunInfo}, + util::get, }; use bollard::{ - container::{Config, CreateContainerOptions, ListContainersOptions}, + Docker, + container::{ + Config, CreateContainerOptions, ListContainersOptions, LogOutput, LogsOptions, + RemoveContainerOptions, StartContainerOptions, WaitContainerOptions, + }, + errors::Error::DockerContainerWaitError, + image::{CreateImageOptions, ImportImageOptions}, models::{ContainerStateStatusEnum, HostConfig}, secret::{ContainerInspectResponse, ContainerSummary}, }; use chrono::DateTime; -use futures_util::future::join_all; +use derive_more::Display; +use futures_util::{ + future::join_all, + stream::{StreamExt as _, TryStreamExt as _}, +}; use names::{Generator, Name}; use regex::Regex; -use snafu::OptionExt as _; +use snafu::{OptionExt as _, futures::TryFutureExt as _}; use std::{ + backtrace::Backtrace, collections::HashMap, fs, path::{self, PathBuf}, - sync::LazyLock, + sync::{Arc, LazyLock}, + time::Duration, +}; +use tokio::{fs::File, time::sleep as async_sleep}; +use tokio_util::{ + bytes::{Bytes, BytesMut}, + codec::{BytesCodec, FramedRead}, }; +use uniffi; #[expect(clippy::expect_used, reason = "Valid static regex")] -pub static RE_IMAGE_TAG: LazyLock = LazyLock::new(|| { +static RE_IMAGE_TAG: LazyLock = LazyLock::new(|| { Regex::new( r"(?x) \s @@ -35,6 +54,322 @@ pub static RE_IMAGE_TAG: LazyLock = LazyLock::new(|| { .expect("Invalid image tag regex.") }); +/// Support for an orchestration engine using a local docker installation. +#[derive(uniffi::Object, Debug, Display)] +#[display("{self:#?}")] +#[uniffi::export(Display)] +pub struct LocalDockerOrchestrator { + /// API to interact with Docker daemon. + pub api: Docker, +} + +#[uniffi::export(async_runtime = "tokio")] +#[async_trait::async_trait] +impl Orchestrator for LocalDockerOrchestrator { + fn start_with_altimage_blocking( + &self, + pod_job: &PodJob, + image: &ImageKind, + namespace_lookup: &HashMap, + ) -> Result { + ASYNC_RUNTIME.block_on(self.start_with_altimage(pod_job, image, namespace_lookup)) + } + fn start_blocking( + &self, + pod_job: &PodJob, + namespace_lookup: &HashMap, + ) -> Result { + ASYNC_RUNTIME.block_on(self.start(pod_job, namespace_lookup)) + } + fn list_blocking(&self) -> Result> { + ASYNC_RUNTIME.block_on(self.list()) + } + fn delete_blocking(&self, pod_run: &PodRun) -> Result<()> { + ASYNC_RUNTIME.block_on(self.delete(pod_run)) + } + fn get_info_blocking(&self, pod_run: &PodRun) -> Result { + ASYNC_RUNTIME.block_on(self.get_info(pod_run)) + } + fn get_result_blocking( + &self, + pod_run: &PodRun, + namespace_lookup: &HashMap, + ) -> Result { + ASYNC_RUNTIME.block_on(self.get_result(pod_run, namespace_lookup)) + } + fn get_logs_blocking(&self, pod_run: &PodRun) -> Result { + ASYNC_RUNTIME.block_on(self.get_logs(pod_run)) + } + #[expect( + clippy::try_err, + reason = r#" + - `map_err` workaround needed since `import_image_stream` requires resolved bytes + - Raising an error manually on occurrence to halt so we don't just ignore + - Should not get as far as `Ok(_)` + "# + )] + async fn start_with_altimage( + &self, + pod_job: &PodJob, + image: &ImageKind, + namespace_lookup: &HashMap, + ) -> Result { + let (assigned_name, container_options, container_config) = match image { + ImageKind::Published(remote_image) => Self::prepare_container_start_inputs( + namespace_lookup, + pod_job, + remote_image.clone(), + )?, + ImageKind::Tarball(image_info) => { + let location = namespace_lookup[&image_info.namespace].join(&image_info.path); + let byte_stream = FramedRead::new( + File::open(&location) + .context(selector::InvalidPath { path: &location }) + .await?, + BytesCodec::new(), + ) + .map_err(|err| -> Result<()> { + Err::(err.into())?; // raise on error since we discard below + Ok(()) + }) + .map(|result| result.ok().map_or(Bytes::new(), BytesMut::freeze)); + let mut stream = + self.api + .import_image_stream(ImportImageOptions::default(), byte_stream, None); + let mut local_image = String::new(); + while let Some(response) = stream.next().await { + local_image = RE_IMAGE_TAG + .captures_iter(&response?.stream.context(selector::MissingInfo { + details: location.to_string_lossy(), + })?) + .find_map(|x| x.name("image").map(|name| name.as_str().to_owned())) + .context(selector::MissingInfo { + details: format!( + "container tags in provided container alternate image where path = {}", + location.to_string_lossy() + ), + })?; + } + Self::prepare_container_start_inputs( + namespace_lookup, + pod_job, + local_image.clone(), + )? + } + }; + self.api + .create_container(container_options, container_config) + .await?; + match self + .api + .start_container(&assigned_name, None::>) + .await + { + Ok(()) => {} + Err(err) => Err(OrcaError { + kind: Kind::FailedToStartPod { + container_name: assigned_name.clone(), + reason: err.to_string(), + backtrace: Backtrace::capture().into(), + }, + })?, + } + + Ok(PodRun::new::(pod_job, assigned_name)) + } + async fn start( + &self, + pod_job: &PodJob, + namespace_lookup: &HashMap, + ) -> Result { + let image_options = Some(CreateImageOptions { + from_image: pod_job.pod.image.clone(), + ..Default::default() + }); + self.api + .create_image(image_options, None, None) + .try_collect::>() + .await?; + self.start_with_altimage( + pod_job, + &ImageKind::Published(pod_job.pod.image.clone()), + namespace_lookup, + ) + .await + } + async fn list(&self) -> Result> { + self.list_containers(HashMap::from([( + "label".to_owned(), + vec!["org.orcapod=true".to_owned()], + )])) + .await? + .map(|(assigned_name, run_info)| { + let pod_job: PodJob = + serde_json::from_str(get(&run_info.labels, "org.orcapod.pod_job")?)?; + Ok(PodRun::new::(&pod_job, assigned_name)) + }) + .collect() + } + async fn delete(&self, pod_run: &PodRun) -> Result<()> { + self.api + .remove_container( + &pod_run.assigned_name, + Some(RemoveContainerOptions { + force: true, + ..Default::default() + }), + ) + .await?; + Ok(()) + } + async fn get_info(&self, pod_run: &PodRun) -> Result { + let labels = vec![ + "org.orcapod=true".to_owned(), + format!( + "org.orcapod.pod_job.annotation={}", + serde_json::to_string(&pod_run.pod_job.annotation)? + ), + format!("org.orcapod.pod_job.hash={}", pod_run.pod_job.hash), + ]; + + // Add names to the filters + let container_filters = HashMap::from([ + ("label".to_owned(), labels), + ( + "name".to_owned(), + Vec::from([pod_run.assigned_name.clone()]), + ), + ]); + + let (_, run_info) = self + .list_containers(container_filters) + .await? + .next() + .context(selector::MissingInfo { + details: format!("pod run where pod_job.hash = {}", pod_run.pod_job.hash), + })?; + Ok(run_info) + } + #[expect( + clippy::wildcard_enum_match_arm, + reason = "Favor readability due to complexity in external dependency." + )] + async fn get_result( + &self, + pod_run: &PodRun, + namespace_lookup: &HashMap, + ) -> Result { + match self + .api + .wait_container(&pod_run.assigned_name, None::>) + .try_collect::>() + .await + { + Ok(_) => (), + Err(err) => match err { + DockerContainerWaitError { .. } => (), + _ => return Err(OrcaError::from(err)), + }, + } + + let mut result_info: PodRunInfo; + while { + result_info = self.get_info(pod_run).await?; + matches!(&result_info.status, PodStatus::Running) + } { + async_sleep(Duration::from_millis(100)).await; + } + + PodResult::new( + None, + Arc::clone(&pod_run.pod_job), + pod_run.assigned_name.clone(), + result_info.status, + result_info.created, + result_info.terminated.context(selector::MissingInfo { + details: format!( + "terminated where pod_run.assigned_name = {}, pod_run.pod_job.hash = {}", + pod_run.assigned_name, pod_run.pod_job.hash + ), + })?, + namespace_lookup, + self.get_logs(pod_run).await?, + ) + } + + async fn get_logs(&self, pod_run: &PodRun) -> Result { + let mut std_out = Vec::new(); + let mut std_err = Vec::new(); + + self.api + .logs::( + &pod_run.assigned_name, + Some(LogsOptions { + stdout: true, + stderr: true, + ..Default::default() + }), + ) + .try_collect::>() + .await? + .iter() + .for_each(|log_output| match log_output { + LogOutput::StdOut { message } => { + std_out.extend(message.to_vec()); + } + LogOutput::StdErr { message } => { + std_err.extend(message.to_vec()); + } + LogOutput::StdIn { .. } | LogOutput::Console { .. } => { + // Ignore stdin logs, as they are not relevant for our use case + } + }); + + let mut logs = String::from_utf8_lossy(&std_out).to_string(); + if !std_err.is_empty() { + logs.push_str("\nSTDERR:\n"); + logs.push_str(&String::from_utf8_lossy(&std_err)); + } + + // Check for errors in the docker state, if exist, attach it to logs + // This is for when the container exits immediately due to a bad command or similar + let error = self + .api + .inspect_container(&pod_run.assigned_name, None) + .await? + .state + .context(selector::FailedToExtractRunInfo { + container_name: &pod_run.assigned_name, + })? + .error + .context(selector::FailedToExtractRunInfo { + container_name: &pod_run.assigned_name, + })?; + + if !error.is_empty() { + logs.push_str(&error); + } + + Ok(logs) + } +} + +#[uniffi::export] +impl LocalDockerOrchestrator { + /// How to create a local docker orchestrator with an absolute path on docker host where binds + /// will be mounted from. + /// + /// # Errors + /// + /// Will return `Err` if there is an issue creating a local docker orchestrator. + #[uniffi::constructor] + pub fn new() -> Result { + Ok(Self { + api: Docker::connect_with_local_defaults()?, + }) + } +} + impl LocalDockerOrchestrator { fn prepare_mount_binds( namespace_lookup: &HashMap, diff --git a/src/uniffi/orchestrator/mod.rs b/src/orchestrator/mod.rs similarity index 87% rename from src/uniffi/orchestrator/mod.rs rename to src/orchestrator/mod.rs index 04d2412f..f3b01749 100644 --- a/src/uniffi/orchestrator/mod.rs +++ b/src/orchestrator/mod.rs @@ -1,14 +1,22 @@ -use crate::uniffi::{ +use async_trait; +use std::{ + collections::HashMap, + fmt, + path::PathBuf, + sync::{Arc, LazyLock}, +}; +use tokio::runtime::Runtime; +use uniffi; + +use crate::{ error::Result, model::{ packet::URI, - pod::{PodJob, PodResult}, + pod::{PodJob, PodResult, PodStatus}, }, + util::get_type_name, }; -use async_trait; -use serde::{Deserialize, Serialize}; -use std::{collections::HashMap, fmt, path::PathBuf, sync::Arc}; -use uniffi; + /// Options for sourcing compute environment images. #[derive(uniffi::Enum)] pub enum ImageKind { @@ -18,21 +26,7 @@ pub enum ImageKind { /// A packaged compute environment of image+tag as a tarball. Tarball(URI), } -/// Status of a particular compute run. -#[derive(uniffi::Enum, Serialize, Deserialize, Debug, PartialEq, Eq, Clone, Default)] -pub enum PodStatus { - /// Run is ongoing. - Running, - /// Run has completed successfully. - Completed, - /// Run failed with the provided error code. - Failed(i16), - /// For other container states that are not listed. - Undefined, - /// No status set. - #[default] - Unset, -} + /// Run metadata #[derive(uniffi::Record, Debug)] pub struct PodRunInfo { @@ -178,7 +172,26 @@ pub trait Orchestrator: Send + Sync + fmt::Debug { /// Get the logs for a specific pod run. async fn get_logs(&self, pod_run: &PodRun) -> Result; } -/// Orchestration execution agent daemon and client. + +#[expect( + clippy::expect_used, + reason = "Should be able to create Tokio runtime." +)] +static ASYNC_RUNTIME: LazyLock = + LazyLock::new(|| Runtime::new().expect("Unable to create Tokio runtime.")); + +impl PodRun { + /// Create a new `PodRun` + pub fn new(pod_job: &PodJob, assigned_name: String) -> Self { + Self { + pod_job: pod_job.clone().into(), + orchestrator_source: get_type_name::(), + assigned_name, + } + } +} + +/// Daemon agent for execution of pod jobs and pipeline jobs pub mod agent; -/// Orchestration implementation for Docker backend. +/// Docker-based orchestrator implementation pub mod docker; diff --git a/src/core/pipeline_runner.rs b/src/pipeline_runner.rs similarity index 98% rename from src/core/pipeline_runner.rs rename to src/pipeline_runner.rs index 4f836290..b4a1b365 100644 --- a/src/core/pipeline_runner.rs +++ b/src/pipeline_runner.rs @@ -1,26 +1,3 @@ -use crate::{ - core::{ - crypto::hash_buffer, - model::{pipeline::PipelineNode, serialize_hashmap}, - operator::{JoinOperator, Operator}, - util::{get, make_key_expr}, - }, - uniffi::{ - error::{ - Kind, OrcaError, Result, - selector::{self}, - }, - model::{ - packet::{Packet, PathSet, URI}, - pipeline::{Kernel, PipelineJob, PipelineResult, PipelineStatus}, - pod::{Pod, PodJob, PodResult}, - }, - orchestrator::{ - PodStatus, - agent::{Agent, AgentClient, Response}, - }, - }, -}; use async_trait::async_trait; use names::{Generator, Name}; use serde_yaml::Serializer; @@ -35,6 +12,23 @@ use tokio::{ task::JoinSet, }; +use crate::{ + crypto::hash_buffer, + error::{ + Kind, OrcaError, Result, + selector::{self}, + }, + model::{ + packet::{Packet, PathSet, URI}, + pipeline::{Kernel, PipelineJob, PipelineNode, PipelineResult, PipelineStatus}, + pod::{Pod, PodJob, PodResult, PodStatus}, + serialize_hashmap, + }, + operator::{JoinOperator, Operator}, + orchestrator::agent::{Agent, AgentClient, Response}, + util::{get, make_key_expr}, +}; + static NODE_OUTPUT_KEY_EXPR: &str = "output"; static FAILURE_KEY_EXP: &str = "failure"; diff --git a/src/uniffi/store/filestore.rs b/src/store/filestore.rs similarity index 56% rename from src/uniffi/store/filestore.rs rename to src/store/filestore.rs index d5b58b6e..7f599f3a 100644 --- a/src/uniffi/store/filestore.rs +++ b/src/store/filestore.rs @@ -1,27 +1,59 @@ +use colored::Colorize as _; +use glob::glob; +use heck::ToSnakeCase as _; +use regex::Regex; +use serde::{Serialize, de::DeserializeOwned}; +use serde_yaml; +use snafu::OptionExt as _; +use std::{ + backtrace::Backtrace, + collections::{HashMap, HashSet}, + fmt, fs, + path::{Path, PathBuf}, + sync::LazyLock, +}; + use crate::{ - core::{crypto::hash_buffer, model::ToYaml as _}, - uniffi::{ - error::{Kind, OrcaError, Result, selector}, - model::{ - ModelType, - pipeline::{JOIN_OPERATOR_HASH, Kernel, NodeURI, Pipeline}, - pod::{Pod, PodJob, PodResult}, - }, - operator::MapOperator, - store::{ModelID, ModelInfo, Store}, + crypto::hash_buffer, + error::{Kind, OrcaError, Result, selector}, + model::{ + Annotation, ModelType, ToYaml, + pipeline::{JOIN_OPERATOR_HASH, Kernel, NodeURI, Pipeline}, + pod::{Pod, PodJob, PodResult}, }, + operator::MapOperator, + store::{MODEL_NAMESPACE, ModelID, ModelInfo, Store}, + util::get_type_name, }; + +#[expect(clippy::expect_used, reason = "Valid static regex")] +static RE_MODEL_METADATA: LazyLock = LazyLock::new(|| { + Regex::new( + r"(?x) + ^ + (?.*?)/ + (?[a-z_]+)/ + (?[a-z_]+)/ + (?[0-9a-f]+)/ + ( + annotation/ + (?[0-9a-zA-Z\-]+) + - + (?[0-9]+\.[0-9]+\.[0-9]+) + \.yaml + | + spec\.yaml + ) + $ + ", + ) + .expect("Invalid model metadata regex.") +}); + use chrono::Utc; use derive_more::Display; use getset::CloneGetters; use serde::Deserialize; -use snafu::OptionExt as _; -use std::{ - backtrace::Backtrace, - collections::{HashMap, HashSet}, - fs, - path::PathBuf, -}; use uniffi; /// Support for a storage backend on a local filesystem directory. #[derive(uniffi::Object, Debug, Display, CloneGetters, Clone)] @@ -336,3 +368,207 @@ impl LocalFileStore { Self { directory } } } + +impl LocalFileStore { + /// Relative path where model specification is stored within the model directory. + pub const SPEC_RELPATH: &str = "spec.yaml"; + /// Relative path where model annotation is stored within the model directory. + pub fn make_annotation_relpath(name: &str, version: &str) -> PathBuf { + PathBuf::from(format!("annotation/{name}-{version}.yaml")) + } + /// Build the storage path with the model directory (`hash`) and a file's relative path. + pub fn make_path(&self, hash: &str, relpath: impl AsRef) -> PathBuf { + PathBuf::from(format!( + "{}/{}/{}/{}", + self.directory.to_string_lossy(), + MODEL_NAMESPACE, + get_type_name::().to_snake_case(), + hash + )) + .join(relpath) + } + + fn find_model_metadata(glob_pattern: &Path) -> Result> { + let paths = glob(&glob_pattern.to_string_lossy())?.filter_map(move |filepath| { + let filepath_string = String::from(filepath.ok()?.to_string_lossy()); + let group = RE_MODEL_METADATA.captures(&filepath_string)?; + Some(ModelInfo { + name: group.name("name").map(|name| name.as_str().to_owned()), + version: group + .name("version") + .map(|version| version.as_str().to_owned()), + hash: group["hash"].to_string(), + }) + }); + Ok(paths) + } + /// Find hash using name and version. + /// + /// # Errors + /// + /// Will return error if unable to find. + pub(crate) fn lookup_hash(&self, name: &str, version: &str) -> Result { + let model_info = Self::find_model_metadata( + &self.make_path::("*", Self::make_annotation_relpath(name, version)), + )? + .next() + .context(selector::MissingInfo { + details: format!( + "annotation where class = {}, name = {name}, version = {version}", + get_type_name::().to_snake_case() + ), + })?; + Ok(model_info.hash) + } + + pub(crate) fn save_file(file: impl AsRef, content: impl AsRef<[u8]>) -> Result<()> { + if let Some(parent) = file.as_ref().parent() { + fs::create_dir_all(parent)?; + } + fs::write(file, content)?; + Ok(()) + } + /// How any model is stored. + /// + /// # Errors + /// + /// Will return `Err` if there is an issue storing the model. + pub(crate) fn save_model( + &self, + model: &T, + hash: &str, + annotation: Option<&Annotation>, + ) -> Result<()> { + let class = get_type_name::().to_snake_case(); + // Save annotation if defined and doesn't collide globally i.e. model, name, version + if let Some(provided_annotation) = annotation { + let relpath = &Self::make_annotation_relpath( + &provided_annotation.name, + &provided_annotation.version, + ); + if let Some((found_hash, found_name, found_version)) = + Self::find_model_metadata(&self.make_path::("*", relpath))? + .next() + .and_then(|model_info| { + Some((model_info.hash, model_info.name?, model_info.version?)) + }) + { + println!( + "{}", + format!( + "Skip saving {} annotation since `{}`, `{}`, `{}` exists.", + class.bright_cyan(), + found_hash.bright_cyan(), + found_name.bright_cyan(), + found_version.bright_cyan(), + ) + .yellow(), + ); + } else { + Self::save_file( + self.make_path::(hash, relpath), + serde_yaml::to_string(provided_annotation)?, + )?; + } + } + // Save model specification and skip if it already exist e.g. on new annotations + let spec_file = &self.make_path::(hash, Self::SPEC_RELPATH); + if spec_file.exists() { + println!( + "{}", + format!( + "Skip saving {} model since `{}` exists.", + class.bright_cyan(), + hash.bright_cyan(), + ) + .yellow(), + ); + } else { + Self::save_file(spec_file, model.to_yaml()?)?; + } + Ok(()) + } + /// How to load any stored model into an instance. + /// + /// # Errors + /// + /// Will return `Err` if there is an issue loading the model from the store using `name` and + /// `version`. + pub(crate) fn load_model( + &self, + model_id: &ModelID, + ) -> Result<(T, Option, String)> { + let (hash, annotation) = self.decode_model_id::(model_id)?; + + Ok(( + serde_yaml::from_str(&fs::read_to_string( + self.make_path::(&hash, Self::SPEC_RELPATH), + )?)?, + annotation, + hash, + )) + } + + pub(crate) fn decode_model_id( + &self, + model_id: &ModelID, + ) -> Result<(String, Option)> { + match model_id { + ModelID::Hash(hash) => Ok((hash.to_owned(), None)), + ModelID::Annotation(name, version) => { + let hash = self.lookup_hash::(name, version)?; + let annotation_str = fs::read_to_string( + self.make_path::(&hash, Self::make_annotation_relpath(name, version)), + )?; + let annotation: Annotation = serde_yaml::from_str(&annotation_str)?; + Ok((hash, Some(annotation))) + } + } + } + /// How to query any stored models. + /// + /// # Errors + /// + /// Will return `Err` if there is an issue querying metadata from existing models in the store. + pub(crate) fn list_model(&self) -> Result> { + Ok(Self::find_model_metadata(&self.make_path::("**", "*"))?.collect()) + } + /// How to explicitly delete any stored model and all associated annotations (does not propagate). + /// + /// # Errors + /// + /// Will return `Err` if there is an issue deleting a model from the store using `name` and + /// `version`. + pub(crate) fn delete_model(&self, model_id: &ModelID) -> Result<()> { + // assumes propagate = false + let hash = match model_id { + ModelID::Hash(hash) => hash, + ModelID::Annotation(name, version) => &self.lookup_hash::(name, version)?, + }; + let spec_dir = self.make_path::(hash, ""); + fs::remove_dir_all(spec_dir)?; + + Ok(()) + } + + pub(crate) fn get_latest_pipeline_labels_file_name( + &self, + pipeline_hash: &str, + ) -> Result> { + let existing_labels_path = self.make_path::(pipeline_hash, "labels/"); + Ok(if existing_labels_path.exists() { + let mut label_file_names = fs::read_dir(&existing_labels_path)? + .map(|entry| Ok::<_, OrcaError>(entry?.file_name())) + .collect::, _>>()?; + + // Sort and get the latest one + label_file_names.sort(); + + label_file_names + .last() + .map(|os_str| os_str.to_string_lossy().to_string()) + } else { + None + }) + } +} diff --git a/src/uniffi/store/mod.rs b/src/store/mod.rs similarity index 98% rename from src/uniffi/store/mod.rs rename to src/store/mod.rs index ad8cc036..1422e7b4 100644 --- a/src/uniffi/store/mod.rs +++ b/src/store/mod.rs @@ -1,4 +1,7 @@ -use crate::uniffi::{ +/// Namespace where models will be stored. +const MODEL_NAMESPACE: &str = "orcapod_model"; + +use crate::{ error::Result, model::{ ModelType, diff --git a/src/uniffi/mod.rs b/src/uniffi/mod.rs deleted file mode 100644 index 2443ce00..00000000 --- a/src/uniffi/mod.rs +++ /dev/null @@ -1,10 +0,0 @@ -/// Error handling. -pub mod error; -/// Components of the data model. -pub mod model; -/// Operators for pipeline -pub mod operator; -/// Interface into container orchestration engine. -pub mod orchestrator; -/// Data persistence provided by a store backend. -pub mod store; diff --git a/src/uniffi/model/mod.rs b/src/uniffi/model/mod.rs deleted file mode 100644 index a266fd99..00000000 --- a/src/uniffi/model/mod.rs +++ /dev/null @@ -1,37 +0,0 @@ -use serde::{Deserialize, Serialize}; -use std::path::PathBuf; - -/// Available models. -#[derive(uniffi::Enum, Debug)] -pub enum ModelType { - /// See [`Pod`](crate::uniffi::model::pod::Pod). - Pod, - /// See [`PodJob`](crate::uniffi::model::pod::PodJob). - PodJob, - /// See [`PodResult`](crate::uniffi::model::pod::PodResult). - PodResult, -} - -/// Standard metadata structure for all model instances. -#[derive(uniffi::Record, Serialize, Deserialize, Debug, PartialEq, Eq, Clone)] -pub struct Annotation { - /// A unique name. - pub name: String, - /// A unique semantic version. - pub version: String, - /// A long form description. - pub description: String, -} - -uniffi::custom_type!(PathBuf, String, { - remote, - try_lift: |val| Ok(PathBuf::from(&val)), - lower: |obj| obj.display().to_string(), -}); - -/// Utility types for describing packets. -pub mod packet; -/// Models and utility types for pipelines. -pub mod pipeline; -/// Models and utility types for pods. -pub mod pod; diff --git a/src/uniffi/model/pipeline.rs b/src/uniffi/model/pipeline.rs deleted file mode 100644 index 07788648..00000000 --- a/src/uniffi/model/pipeline.rs +++ /dev/null @@ -1,315 +0,0 @@ -use crate::{ - core::{ - crypto::{hash_blob, hash_buffer, make_random_hash}, - graph::make_graph, - model::{ToYaml as _, pipeline::PipelineNode}, - validation::validate_packet, - }, - uniffi::{ - error::{OrcaError, Result, selector}, - model::{ - Annotation, - packet::{PathSet, URI}, - pod::Pod, - }, - operator::MapOperator, - }, -}; -use derive_more::Display; -use getset::CloneGetters; -use petgraph::graph::DiGraph; -use serde::{Deserialize, Serialize}; -use snafu::OptionExt as _; -use std::{ - collections::{HashMap, HashSet}, - hash::Hash, - path::PathBuf, - sync::Arc, -}; -use std::{hash::Hasher, sync::LazyLock}; -use uniffi; - -pub(crate) static JOIN_OPERATOR_HASH: LazyLock = - LazyLock::new(|| hash_buffer(b"join_operator")); - -/// Computational dependencies as a [DAG](https://en.wikipedia.org/wiki/Directed_acyclic_graph). -#[derive(uniffi::Object, Debug, Display, CloneGetters, Clone, Deserialize, Default)] -#[getset(get_clone, impl_attrs = "#[uniffi::export]")] -#[display("{self:#?}")] -#[uniffi::export(Display)] -pub struct Pipeline { - /// Hash for pipeline - #[serde(default)] - pub hash: String, - /// Annotations for the pipeline. - #[serde(default)] - pub annotation: Option, - /// Computational DAG in-memory. - #[getset(skip)] - #[serde(skip_deserializing)] - pub graph: DiGraph, - /// Exposed, internal input specification. Each input may be fed into more than one node/key if desired. - pub input_spec: HashMap>, - /// Exposed, internal output specification. Each output is associated with only one node/key. - pub output_spec: HashMap, -} - -#[uniffi::export] -impl Pipeline { - /// Construct a new pipeline instance. - /// - /// # Errors - /// - /// Will return `Err` if there is an issue initializing a `Pipeline` instance. - #[uniffi::constructor] - pub fn new( - graph_dot: &str, - metadata: &HashMap, - mut input_spec: HashMap>, - mut output_spec: HashMap, - annotation: Option, - ) -> Result { - // Note this gives us the graph, but the nodes do not have their hashes computed yet. - let mut graph = make_graph(graph_dot, metadata)?; - - // Run preprocessing to compute the hash for each node - for node_idx in graph.node_indices() { - Self::compute_hash_for_node_and_parents(node_idx, &input_spec, &mut graph); - } - - // Build LUT for node_label -> node_hash - let label_to_hash_lut = - graph - .node_indices() - .fold(HashMap::<&String, &String>::new(), |mut acc, node_idx| { - let node = &graph[node_idx]; - acc.insert(&node.label, &node.hash); - acc - }); - - // Build the new input_spec to refer to the hash instead of label - input_spec.iter_mut().try_for_each(|(_, node_uris)| { - node_uris.iter_mut().try_for_each(|node_uri| { - node_uri.node_id = (*label_to_hash_lut.get(&node_uri.node_id).context( - selector::InvalidInputSpecNodeNotInGraph { - node_name: node_uri.node_id.clone(), - }, - )?) - .clone(); - Ok::<(), OrcaError>(()) - }) - })?; - - // Update the output_spec to refer to the hash instead of label - output_spec.iter_mut().try_for_each(|(_, node_uri)| { - node_uri.node_id = (*label_to_hash_lut.get(&node_uri.node_id).context( - selector::InvalidOutputSpecNodeNotInGraph { - node_name: node_uri.node_id.clone(), - }, - )?) - .clone(); - - Ok::<(), OrcaError>(()) - })?; - - let pipeline_no_hash = Self { - hash: String::new(), - graph, - input_spec, - output_spec, - annotation, - }; - - // Run verification on the pipeline first before computing hash - pipeline_no_hash.validate()?; - - Ok(Self { - hash: hash_buffer(pipeline_no_hash.to_yaml()?.as_bytes()), - ..pipeline_no_hash - }) - } -} - -impl PartialEq for Pipeline { - fn eq(&self, other: &Self) -> bool { - self.hash == other.hash - && self.annotation == other.annotation - && self.input_spec.keys().collect::>() - == other.input_spec.keys().collect::>() - && self.input_spec.values().collect::>() - == other.input_spec.values().collect::>() - && self.output_spec == other.output_spec - } -} - -/// A compute pipeline job that supplies input/output targets. -#[expect( - clippy::field_scoped_visibility_modifiers, - reason = "Temporary until a proper hash is implemented." -)] -#[derive(uniffi::Object, Debug, Display, CloneGetters, Deserialize, Serialize, Clone)] -#[getset(get_clone, impl_attrs = "#[uniffi::export]")] -#[display("{self:#?}")] -#[uniffi::export(Display)] -pub struct PipelineJob { - /// todo: replace with a consistent hash - #[getset(skip)] - pub(crate) hash: String, - /// A pipeline to base the pipeline job on. - pub pipeline: Arc, - /// Attached, external input packet. Applies cartesian product by default on keys pointing to the same node. - pub input_packet: HashMap>, - /// Attached, external output directory. - pub output_dir: URI, -} - -#[uniffi::export] -impl PipelineJob { - /// Construct a new pipeline job instance. - /// - /// # Errors - /// - /// Will return `Err` if there is an issue initializing a `PipelineJob` instance. - #[uniffi::constructor] - pub fn new( - pipeline: Arc, - input_packet: &HashMap>, - output_dir: URI, - namespace_lookup: &HashMap, - ) -> Result { - validate_packet("input".into(), &pipeline.input_spec, input_packet)?; - let input_packet_with_checksum = input_packet - .iter() - .map(|(path_set_key, path_sets)| { - Ok(( - path_set_key.clone(), - path_sets - .iter() - .map(|path_set| { - Ok(match path_set { - PathSet::Unary(blob) => { - PathSet::Unary(hash_blob(namespace_lookup, blob)?) - } - PathSet::Collection(blobs) => PathSet::Collection( - blobs - .iter() - .map(|blob| hash_blob(namespace_lookup, blob)) - .collect::>()?, - ), - }) - }) - .collect::>()?, - )) - }) - .collect::>()?; - - Ok(Self { - hash: make_random_hash(), - pipeline, - input_packet: input_packet_with_checksum, - output_dir, - }) - } -} - -/// Struct to hold the result of a pipeline execution. -#[derive(uniffi::Object, Debug, Clone, Deserialize, Serialize, Display, CloneGetters)] -#[getset(get_clone, impl_attrs = "#[uniffi::export]")] -#[display("{self:#?}")] -#[uniffi::export(Display)] -pub struct PipelineResult { - /// The pipeline job that was executed. - pub pipeline_job: Arc, - /// The result of the pipeline execution. - pub output_packets: HashMap>, - /// Logs of any failures that occurred during the pipeline execution. - pub failure_logs: Vec, - /// The status of the pipeline execution. - pub status: PipelineStatus, -} - -/// The status of a pipeline execution. -#[derive(uniffi::Enum, Debug, Clone, Deserialize, Serialize, PartialEq, Eq)] -pub enum PipelineStatus { - /// The pipeline is currently running. - Running, - /// The pipeline has completed successfully. - Succeeded, - /// The pipeline has failed. - Failed, - /// The pipeline has partially succeeded. There should be some failure logs - PartiallySucceeded, -} -/// A node in a computational pipeline. -#[derive(uniffi::Enum, Debug, Clone, Deserialize, Serialize)] -pub enum Kernel { - /// Pod reference. - Pod { - /// See [`Pod`](crate::uniffi::model::pod::Pod). - pod: Arc, - }, - /// Cartesian product operation. See [`JoinOperator`](crate::core::operator::JoinOperator). - JoinOperator, - /// Rename a path set key operation. - MapOperator { - /// See [`MapOperator`](crate::core::operator::MapOperator). - mapper: Arc, - }, -} - -impl From for Kernel { - fn from(mapper: MapOperator) -> Self { - Self::MapOperator { - mapper: Arc::new(mapper), - } - } -} - -impl From for Kernel { - fn from(pod: Pod) -> Self { - Self::Pod { pod: Arc::new(pod) } - } -} - -impl From> for Kernel { - fn from(pod: Arc) -> Self { - Self::Pod { pod } - } -} - -impl Kernel { - /// Get a unique hash that represents the kernel. - /// The exception here is the `JoinOperator` doesn't have any pre execution configuration, since it's logic is completely dependent on what is fed to it during execution. - pub fn get_hash(&self) -> &str { - match self { - Self::Pod { pod } => &pod.hash, - Self::JoinOperator => &JOIN_OPERATOR_HASH, - Self::MapOperator { mapper } => &mapper.hash, - } - } -} - -impl PartialEq for Kernel { - fn eq(&self, other: &Self) -> bool { - self.get_hash() == other.get_hash() - } -} - -impl Eq for Kernel {} - -impl Hash for Kernel { - fn hash(&self, state: &mut H) { - self.get_hash().hash(state); - } -} - -/// Index from pipeline node into pod specification. -#[derive( - uniffi::Record, Debug, Clone, Deserialize, Serialize, PartialEq, Eq, Hash, PartialOrd, Ord, -)] -pub struct NodeURI { - /// Node reference name in pipeline. - pub node_id: String, - /// Specification key. - pub key: String, -} diff --git a/src/uniffi/operator.rs b/src/uniffi/operator.rs deleted file mode 100644 index 28fbd250..00000000 --- a/src/uniffi/operator.rs +++ /dev/null @@ -1,39 +0,0 @@ -use std::collections::HashMap; - -use serde::{Deserialize, Serialize}; - -use crate::core::model::ToYaml as _; -use crate::core::{crypto::hash_buffer, model::serialize_hashmap}; -use crate::uniffi::error::Result; - -/// Operator class that map `input_keys` to `output_key`, effectively renaming it -/// For use in pipelines -#[derive(uniffi::Object, Debug, Clone, Deserialize, Serialize, PartialEq, Eq, Default)] -pub struct MapOperator { - /// Unique hash of the map operator - #[serde(skip)] - pub hash: String, - /// Mapping of input keys to output keys - #[serde(serialize_with = "serialize_hashmap")] - pub map: HashMap, -} - -#[uniffi::export] -impl MapOperator { - #[uniffi::constructor] - /// Create a new `MapOperator` - /// - /// # Errors - /// Will error if there are issues converting the map to yaml for hashing - pub fn new(map: HashMap) -> Result { - let no_hash = Self { - map, - hash: String::new(), - }; - - Ok(Self { - hash: hash_buffer(no_hash.to_yaml()?), - ..no_hash - }) - } -} diff --git a/src/uniffi/orchestrator/docker.rs b/src/uniffi/orchestrator/docker.rs deleted file mode 100644 index 58114577..00000000 --- a/src/uniffi/orchestrator/docker.rs +++ /dev/null @@ -1,346 +0,0 @@ -use crate::{ - core::{ - orchestrator::{ASYNC_RUNTIME, docker::RE_IMAGE_TAG}, - util::get, - }, - uniffi::{ - error::{Kind, OrcaError, Result, selector}, - model::pod::{PodJob, PodResult}, - orchestrator::{ImageKind, Orchestrator, PodRun, PodRunInfo, PodStatus}, - }, -}; -use async_trait; -use bollard::{ - Docker, - container::{ - LogOutput, LogsOptions, RemoveContainerOptions, StartContainerOptions, WaitContainerOptions, - }, - errors::Error::DockerContainerWaitError, - image::{CreateImageOptions, ImportImageOptions}, -}; -use derive_more::Display; -use futures_util::stream::{StreamExt as _, TryStreamExt as _}; -use snafu::{OptionExt as _, futures::TryFutureExt as _}; -use std::{backtrace::Backtrace, collections::HashMap, path::PathBuf, sync::Arc, time::Duration}; -use tokio::{fs::File, time::sleep as async_sleep}; -use tokio_util::{ - bytes::{Bytes, BytesMut}, - codec::{BytesCodec, FramedRead}, -}; -use uniffi; - -/// Support for an orchestration engine using a local docker installation. -#[derive(uniffi::Object, Debug, Display)] -#[display("{self:#?}")] -#[uniffi::export(Display)] -pub struct LocalDockerOrchestrator { - /// API to interact with Docker daemon. - pub api: Docker, -} - -#[uniffi::export(async_runtime = "tokio")] -#[async_trait::async_trait] -impl Orchestrator for LocalDockerOrchestrator { - fn start_with_altimage_blocking( - &self, - pod_job: &PodJob, - image: &ImageKind, - namespace_lookup: &HashMap, - ) -> Result { - ASYNC_RUNTIME.block_on(self.start_with_altimage(pod_job, image, namespace_lookup)) - } - fn start_blocking( - &self, - pod_job: &PodJob, - namespace_lookup: &HashMap, - ) -> Result { - ASYNC_RUNTIME.block_on(self.start(pod_job, namespace_lookup)) - } - fn list_blocking(&self) -> Result> { - ASYNC_RUNTIME.block_on(self.list()) - } - fn delete_blocking(&self, pod_run: &PodRun) -> Result<()> { - ASYNC_RUNTIME.block_on(self.delete(pod_run)) - } - fn get_info_blocking(&self, pod_run: &PodRun) -> Result { - ASYNC_RUNTIME.block_on(self.get_info(pod_run)) - } - fn get_result_blocking( - &self, - pod_run: &PodRun, - namespace_lookup: &HashMap, - ) -> Result { - ASYNC_RUNTIME.block_on(self.get_result(pod_run, namespace_lookup)) - } - fn get_logs_blocking(&self, pod_run: &PodRun) -> Result { - ASYNC_RUNTIME.block_on(self.get_logs(pod_run)) - } - #[expect( - clippy::try_err, - reason = r#" - - `map_err` workaround needed since `import_image_stream` requires resolved bytes - - Raising an error manually on occurrence to halt so we don't just ignore - - Should not get as far as `Ok(_)` - "# - )] - async fn start_with_altimage( - &self, - pod_job: &PodJob, - image: &ImageKind, - namespace_lookup: &HashMap, - ) -> Result { - let (assigned_name, container_options, container_config) = match image { - ImageKind::Published(remote_image) => Self::prepare_container_start_inputs( - namespace_lookup, - pod_job, - remote_image.clone(), - )?, - ImageKind::Tarball(image_info) => { - let location = namespace_lookup[&image_info.namespace].join(&image_info.path); - let byte_stream = FramedRead::new( - File::open(&location) - .context(selector::InvalidPath { path: &location }) - .await?, - BytesCodec::new(), - ) - .map_err(|err| -> Result<()> { - Err::(err.into())?; // raise on error since we discard below - Ok(()) - }) - .map(|result| result.ok().map_or(Bytes::new(), BytesMut::freeze)); - let mut stream = - self.api - .import_image_stream(ImportImageOptions::default(), byte_stream, None); - let mut local_image = String::new(); - while let Some(response) = stream.next().await { - local_image = RE_IMAGE_TAG - .captures_iter(&response?.stream.context(selector::MissingInfo { - details: location.to_string_lossy(), - })?) - .find_map(|x| x.name("image").map(|name| name.as_str().to_owned())) - .context(selector::MissingInfo { - details: format!( - "container tags in provided container alternate image where path = {}", - location.to_string_lossy() - ), - })?; - } - Self::prepare_container_start_inputs( - namespace_lookup, - pod_job, - local_image.clone(), - )? - } - }; - self.api - .create_container(container_options, container_config) - .await?; - match self - .api - .start_container(&assigned_name, None::>) - .await - { - Ok(()) => {} - Err(err) => Err(OrcaError { - kind: Kind::FailedToStartPod { - container_name: assigned_name.clone(), - reason: err.to_string(), - backtrace: Backtrace::capture().into(), - }, - })?, - } - - Ok(PodRun::new::(pod_job, assigned_name)) - } - async fn start( - &self, - pod_job: &PodJob, - namespace_lookup: &HashMap, - ) -> Result { - let image_options = Some(CreateImageOptions { - from_image: pod_job.pod.image.clone(), - ..Default::default() - }); - self.api - .create_image(image_options, None, None) - .try_collect::>() - .await?; - self.start_with_altimage( - pod_job, - &ImageKind::Published(pod_job.pod.image.clone()), - namespace_lookup, - ) - .await - } - async fn list(&self) -> Result> { - self.list_containers(HashMap::from([( - "label".to_owned(), - vec!["org.orcapod=true".to_owned()], - )])) - .await? - .map(|(assigned_name, run_info)| { - let pod_job: PodJob = - serde_json::from_str(get(&run_info.labels, "org.orcapod.pod_job")?)?; - Ok(PodRun::new::(&pod_job, assigned_name)) - }) - .collect() - } - async fn delete(&self, pod_run: &PodRun) -> Result<()> { - self.api - .remove_container( - &pod_run.assigned_name, - Some(RemoveContainerOptions { - force: true, - ..Default::default() - }), - ) - .await?; - Ok(()) - } - async fn get_info(&self, pod_run: &PodRun) -> Result { - let labels = vec![ - "org.orcapod=true".to_owned(), - format!( - "org.orcapod.pod_job.annotation={}", - serde_json::to_string(&pod_run.pod_job.annotation)? - ), - format!("org.orcapod.pod_job.hash={}", pod_run.pod_job.hash), - ]; - - // Add names to the filters - let container_filters = HashMap::from([ - ("label".to_owned(), labels), - ( - "name".to_owned(), - Vec::from([pod_run.assigned_name.clone()]), - ), - ]); - - let (_, run_info) = self - .list_containers(container_filters) - .await? - .next() - .context(selector::MissingInfo { - details: format!("pod run where pod_job.hash = {}", pod_run.pod_job.hash), - })?; - Ok(run_info) - } - #[expect( - clippy::wildcard_enum_match_arm, - reason = "Favor readability due to complexity in external dependency." - )] - async fn get_result( - &self, - pod_run: &PodRun, - namespace_lookup: &HashMap, - ) -> Result { - match self - .api - .wait_container(&pod_run.assigned_name, None::>) - .try_collect::>() - .await - { - Ok(_) => (), - Err(err) => match err { - DockerContainerWaitError { .. } => (), - _ => return Err(OrcaError::from(err)), - }, - } - - let mut result_info: PodRunInfo; - while { - result_info = self.get_info(pod_run).await?; - matches!(&result_info.status, PodStatus::Running) - } { - async_sleep(Duration::from_millis(100)).await; - } - - PodResult::new( - None, - Arc::clone(&pod_run.pod_job), - pod_run.assigned_name.clone(), - result_info.status, - result_info.created, - result_info.terminated.context(selector::MissingInfo { - details: format!( - "terminated where pod_run.assigned_name = {}, pod_run.pod_job.hash = {}", - pod_run.assigned_name, pod_run.pod_job.hash - ), - })?, - namespace_lookup, - self.get_logs(pod_run).await?, - ) - } - - async fn get_logs(&self, pod_run: &PodRun) -> Result { - let mut std_out = Vec::new(); - let mut std_err = Vec::new(); - - self.api - .logs::( - &pod_run.assigned_name, - Some(LogsOptions { - stdout: true, - stderr: true, - ..Default::default() - }), - ) - .try_collect::>() - .await? - .iter() - .for_each(|log_output| match log_output { - LogOutput::StdOut { message } => { - std_out.extend(message.to_vec()); - } - LogOutput::StdErr { message } => { - std_err.extend(message.to_vec()); - } - LogOutput::StdIn { .. } | LogOutput::Console { .. } => { - // Ignore stdin logs, as they are not relevant for our use case - } - }); - - let mut logs = String::from_utf8_lossy(&std_out).to_string(); - if !std_err.is_empty() { - logs.push_str("\nSTDERR:\n"); - logs.push_str(&String::from_utf8_lossy(&std_err)); - } - - // Check for errors in the docker state, if exist, attach it to logs - // This is for when the container exits immediately due to a bad command or similar - let error = self - .api - .inspect_container(&pod_run.assigned_name, None) - .await? - .state - .context(selector::FailedToExtractRunInfo { - container_name: &pod_run.assigned_name, - })? - .error - .context(selector::FailedToExtractRunInfo { - container_name: &pod_run.assigned_name, - })?; - - if !error.is_empty() { - logs.push_str(&error); - } - - Ok(logs) - } -} - -#[uniffi::export] -impl LocalDockerOrchestrator { - /// How to create a local docker orchestrator with an absolute path on docker host where binds - /// will be mounted from. - /// - /// # Errors - /// - /// Will return `Err` if there is an issue creating a local docker orchestrator. - #[uniffi::constructor] - pub fn new() -> Result { - Ok(Self { - api: Docker::connect_with_local_defaults()?, - }) - } -} diff --git a/src/core/util.rs b/src/util.rs similarity index 96% rename from src/core/util.rs rename to src/util.rs index 8b033c80..824965ff 100644 --- a/src/core/util.rs +++ b/src/util.rs @@ -1,4 +1,4 @@ -use crate::uniffi::error::{Result, selector}; +use crate::error::{Result, selector}; use heck::ToSnakeCase as _; use snafu::OptionExt as _; use std::{ diff --git a/src/core/validation.rs b/src/validation.rs similarity index 92% rename from src/core/validation.rs rename to src/validation.rs index 162b4284..f1c506b6 100644 --- a/src/core/validation.rs +++ b/src/validation.rs @@ -1,4 +1,4 @@ -use crate::uniffi::error::{Result, selector}; +use crate::error::{Result, selector}; use snafu::OptionExt as _; use std::collections::{HashMap, HashSet}; diff --git a/tests/agent.rs b/tests/agent.rs index 59469796..b79a7145 100644 --- a/tests/agent.rs +++ b/tests/agent.rs @@ -10,7 +10,7 @@ pub mod fixture; use fixture::{NAMESPACE_LOOKUP_READ_ONLY, TestDirs, pod_jobs_stresser, pull_image}; use itertools::Itertools as _; -use orcapod::uniffi::{ +use orcapod::{ error::Result, model::pod::PodResult, orchestrator::{ diff --git a/tests/error.rs b/tests/error.rs index 4564e162..25c67393 100644 --- a/tests/error.rs +++ b/tests/error.rs @@ -10,7 +10,7 @@ use chrono::DateTime; use dot_parser::ast::Graph as DOTGraph; use fixture::{NAMESPACE_LOOKUP_READ_ONLY, pod_custom, pod_job_custom, pod_job_style, str_to_vec}; use glob::glob; -use orcapod::uniffi::{ +use orcapod::{ error::{OrcaError, Result}, model::packet::PathInfo, orchestrator::{ diff --git a/tests/fixture/mod.rs b/tests/fixture/mod.rs index 9692ed4a..1257f60f 100644 --- a/tests/fixture/mod.rs +++ b/tests/fixture/mod.rs @@ -8,16 +8,15 @@ )] use names::{Generator, Name}; -use orcapod::uniffi::{ +use orcapod::{ error::Result, model::{ Annotation, packet::{Blob, BlobKind, Packet, PathInfo, PathSet, URI}, pipeline::{Kernel, NodeURI, Pipeline, PipelineJob}, - pod::{Pod, PodJob, PodResult, RecommendSpecs}, + pod::{Pod, PodJob, PodResult, PodStatus, RecommendSpecs}, }, operator::MapOperator, - orchestrator::PodStatus, store::{ModelID, ModelInfo, Store}, }; use std::{ diff --git a/tests/orchestrator.rs b/tests/orchestrator.rs index a1ba528c..6cabbae7 100644 --- a/tests/orchestrator.rs +++ b/tests/orchestrator.rs @@ -10,10 +10,10 @@ pub mod fixture; use fixture::{TestDirs, container_image_style, pod_job_style}; use futures_util::future::join_all; -use orcapod::uniffi::{ +use orcapod::{ error::{OrcaError, Result}, - model::packet::URI, - orchestrator::{ImageKind, Orchestrator, PodRun, PodStatus, docker::LocalDockerOrchestrator}, + model::{packet::URI, pod::PodStatus}, + orchestrator::{ImageKind, Orchestrator, PodRun, docker::LocalDockerOrchestrator}, }; use pretty_assertions::assert_eq; use std::{collections::HashMap, path::PathBuf}; diff --git a/tests/pipeline.rs b/tests/pipeline.rs index 54f512a6..7d2bf505 100644 --- a/tests/pipeline.rs +++ b/tests/pipeline.rs @@ -10,7 +10,7 @@ pub mod fixture; use fixture::{NAMESPACE_LOOKUP_READ_ONLY, pod_custom}; use indoc::indoc; -use orcapod::uniffi::{ +use orcapod::{ error::Result, model::{ packet::{Blob, BlobKind, PathInfo, PathSet, URI}, diff --git a/tests/pipeline_runner.rs b/tests/pipeline_runner.rs index f8f47248..f677125a 100644 --- a/tests/pipeline_runner.rs +++ b/tests/pipeline_runner.rs @@ -16,12 +16,10 @@ use std::{ use crate::fixture::TestDirs; use fixture::pipeline_job; use orcapod::{ - core::pipeline_runner::DockerPipelineRunner, - uniffi::{ - error::Result, - model::pipeline::PipelineStatus, - orchestrator::{agent::Agent, docker::LocalDockerOrchestrator}, - }, + error::Result, + model::pipeline::PipelineStatus, + orchestrator::{agent::Agent, docker::LocalDockerOrchestrator}, + pipeline_runner::DockerPipelineRunner, }; use tokio::fs::read_to_string; diff --git a/tests/store.rs b/tests/store.rs index 4552c03e..07662c8a 100644 --- a/tests/store.rs +++ b/tests/store.rs @@ -11,7 +11,7 @@ pub mod fixture; use fixture::{ NAMESPACE_LOOKUP_READ_ONLY, TestDirs, TestSetup, pod_job_style, pod_result_style, pod_style, }; -use orcapod::uniffi::{ +use orcapod::{ error::Result, model::{ Annotation, ModelType, From f40b9152fa4eef32741989adfed28b6c42919645 Mon Sep 17 00:00:00 2001 From: synicix Date: Tue, 21 Oct 2025 02:36:08 +0000 Subject: [PATCH 2/7] Move pipeline tests to unit testing side --- src/model/pipeline.rs | 532 +++++++++++++++++++++++++++++++++++++++++- src/model/pod.rs | 36 +-- tests/fixture/mod.rs | 8 +- tests/pipeline.rs | 366 ----------------------------- tests/store.rs | 4 +- 5 files changed, 550 insertions(+), 396 deletions(-) delete mode 100644 tests/pipeline.rs diff --git a/src/model/pipeline.rs b/src/model/pipeline.rs index 19e6b1fb..62721b82 100644 --- a/src/model/pipeline.rs +++ b/src/model/pipeline.rs @@ -717,15 +717,203 @@ impl PipelineJob { #[cfg(test)] mod tests { + #![expect( + clippy::panic_in_result_fn, + clippy::indexing_slicing, + clippy::panic, + clippy::type_complexity, + reason = "OK in tests." + )] + use indoc::indoc; use pretty_assertions::assert_eq; - use std::collections::HashMap; + use std::{collections::HashMap, path::PathBuf, sync::Arc}; + + use crate::{ + error::Result, + model::{ + Annotation, ToYaml as _, + packet::{Blob, BlobKind, PathInfo, PathSet, URI}, + pipeline::{Kernel, NodeURI, Pipeline, PipelineJob}, + pod::{Pod, RecommendedSpecs, tests::pod_fixture}, + }, + operator::MapOperator, + }; + + // Pipeline Fixture + pub fn combine_txt_pod(pod_name: &str) -> Result { + Pod::new( + Some(Annotation { + name: pod_name.to_owned(), + description: "Takes two input files, remove the final next line and combine them" + .to_owned(), + version: "1.0.0".to_owned(), + }), + "alpine:3.14".to_owned(), + vec![ + "sh".into(), + "-c".into(), + format!( + "printf '%s %s\\n' \"$(cat input/input_1.txt | head -c -1)\" \"$(cat input/input_2.txt | head -c -1)\" > /output/output.txt" + ), + ], + HashMap::from([ + ( + "input_1".to_owned(), + PathInfo { + path: PathBuf::from("/input/input_1.txt"), + match_pattern: r".*\.txt".to_owned(), + }, + ), + ( + "input_2".into(), + PathInfo { + path: PathBuf::from("/input/input_2.txt"), + match_pattern: r".*\.txt".to_owned(), + }, + ), + ]), + PathBuf::from("/output"), + HashMap::from([( + "output".to_owned(), + PathInfo { + path: PathBuf::from("output.txt"), + match_pattern: r".*\.txt".to_owned(), + }, + )]), + RecommendedSpecs { + cpus: 0.25, + memory: 128_u64 << 20, + }, + None, + ) + } + + #[expect(clippy::too_many_lines, reason = "Test fixture.")] + fn pipeline_fixture() -> Result { + // Create a simple pipeline where the functions job is to add append their name into the input file + // Structure: A -> Mapper -> Joiner -> B -> Mapper -> C, D -> Mapper -> Joiner - use crate::error::Result; - use crate::model::ToYaml as _; - use crate::model::pipeline::NodeURI; - use crate::model::{Annotation, pipeline::Pipeline}; - use crate::operator::MapOperator; + // Create the kernel map + let mut kernel_map = HashMap::new(); + + // Insert the pod into the kernel map + for pod_name in ["A", "B", "C", "D", "E"] { + kernel_map.insert(pod_name.into(), combine_txt_pod(pod_name)?.into()); + } + + let output_to_input_1 = Arc::new(MapOperator::new(HashMap::from([( + "output".to_owned(), + "input_1".to_owned(), + )]))?); + + let output_to_input_2 = Arc::new(MapOperator::new(HashMap::from([( + "output".to_owned(), + "input_2".to_owned(), + )]))?); + + // Create a mapper for A, B, and C + kernel_map.insert( + "pod_a_mapper".into(), + Kernel::MapOperator { + mapper: Arc::clone(&output_to_input_1), + }, + ); + kernel_map.insert( + "pod_b_mapper".into(), + Kernel::MapOperator { + mapper: Arc::clone(&output_to_input_2), + }, + ); + kernel_map.insert( + "pod_c_mapper".into(), + Kernel::MapOperator { + mapper: Arc::clone(&output_to_input_1), + }, + ); + kernel_map.insert( + "pod_d_mapper".into(), + Kernel::MapOperator { + mapper: Arc::clone(&output_to_input_2), + }, + ); + + for joiner_name in ['c', 'd', 'e'] { + kernel_map.insert(format!("pod_{joiner_name}_joiner"), Kernel::JoinOperator); + } + + // Write all the edges in DOT format + let dot = " + digraph { + A -> pod_a_mapper -> pod_c_joiner; + B -> pod_b_mapper -> pod_c_joiner; + pod_c_joiner -> C -> pod_c_mapper-> pod_e_joiner; + D -> pod_d_mapper -> pod_e_joiner; + pod_e_joiner -> E; + } + "; + + Pipeline::new( + dot, + &kernel_map, + HashMap::from([ + ( + "where".into(), + vec![NodeURI { + node_id: "A".into(), + key: "input_1".into(), + }], + ), + ( + "is".into(), + vec![NodeURI { + node_id: "A".into(), + key: "input_2".into(), + }], + ), + ( + "the".into(), + vec![NodeURI { + node_id: "B".into(), + key: "input_1".into(), + }], + ), + ( + "cat_color".into(), + vec![NodeURI { + node_id: "B".into(), + key: "input_2".into(), + }], + ), + ( + "cat".into(), + vec![NodeURI { + node_id: "D".into(), + key: "input_1".into(), + }], + ), + ( + "action".into(), + vec![NodeURI { + node_id: "D".into(), + key: "input_2".into(), + }], + ), + ]), + HashMap::from([( + "output".to_owned(), + NodeURI { + node_id: "E".into(), + key: "output".into(), + }, + )]), + Some(Annotation { + name: "test".into(), + version: "0.0.0".into(), + description: "Test pipeline".into(), + }), + ) + } #[test] fn to_yaml() -> Result<()> { @@ -793,4 +981,336 @@ mod tests { Ok(()) } + + #[expect(clippy::too_many_lines, reason = "Test code")] + #[test] + fn preprocessing() -> Result<()> { + let pipeline = pipeline_fixture()?; + + // Assert that every node has a non-empty hash + let node_hashes = pipeline + .graph + .node_indices() + .map(|idx| { + ( + pipeline.graph[idx].label.as_str(), + pipeline.graph[idx].hash.as_str(), + ) + }) + .collect::>(); + + assert_eq!( + node_hashes, + HashMap::from([ + ( + "pod_c_joiner", + "d2141ce0c203a8b556d7dbbbc6268ac4bbfa444748f92baff42235787f2b7550" + ), + ( + "B", + "964ebb9ddd6bb7db56e53c19e9ac34dfd08779a656295b01e70b5973adc61103" + ), + ( + "C", + "96b30227e0243f282f7a898bd85a246127e664635a3969577932d7653cfb79cb" + ), + ( + "pod_a_mapper", + "83bd3d17026c882db6b6cca7ccca0173f478c11449cfa8bfb13a0518a7e5e32a" + ), + ( + "pod_b_mapper", + "dd73cd3ab345917b25fc028131d83da7ce1c53702fcbabdd19b86a8bdde158b3" + ), + ( + "pod_d_mapper", + "d37f595093e8f7235f97213b3f7ff88b12786e48ec4f22275018cc7d22c113f8" + ), + ( + "A", + "8e43dbc9fd55fa7d1a36fc4a6c036f4113b7aa7fcf38646a2f2472bac6774962" + ), + ( + "E", + "6ec68cc43ea15472731a318584cc8792fb2ff93c96fed6f3f998849b75976694" + ), + ( + "D", + "04cb341a09eeb771846377405a5f33d011f99a7dfa4739fd7876a7e70c994e4e" + ), + ( + "pod_c_mapper", + "240c8e7fa5e0bd88239aba625387ea495fc5323a5d4b6b519946b8f8b907ddf6" + ), + ( + "pod_e_joiner", + "36f3e88889ecf89183205f340043de61f3c6a254026aae5aa1ce587a666e8c30" + ), + ]), + "Node hashes did not match" + ); + + // Check if the input spec contains the correct node hashes + assert_eq!( + pipeline.input_spec, + HashMap::from([ + ( + "the".into(), + vec![NodeURI { + node_id: "964ebb9ddd6bb7db56e53c19e9ac34dfd08779a656295b01e70b5973adc61103" + .into(), + key: "input_1".into(), + },] + ), + ( + "where".into(), + vec![NodeURI { + node_id: "8e43dbc9fd55fa7d1a36fc4a6c036f4113b7aa7fcf38646a2f2472bac6774962" + .into(), + key: "input_1".into(), + },] + ), + ( + "cat_color".into(), + vec![NodeURI { + node_id: "964ebb9ddd6bb7db56e53c19e9ac34dfd08779a656295b01e70b5973adc61103" + .into(), + key: "input_2".into(), + },] + ), + ( + "is".into(), + vec![NodeURI { + node_id: "8e43dbc9fd55fa7d1a36fc4a6c036f4113b7aa7fcf38646a2f2472bac6774962" + .into(), + key: "input_2".into(), + },] + ), + ( + "cat".into(), + vec![NodeURI { + node_id: "04cb341a09eeb771846377405a5f33d011f99a7dfa4739fd7876a7e70c994e4e" + .into(), + key: "input_1".into(), + },] + ), + ( + "action".into(), + vec![NodeURI { + node_id: "04cb341a09eeb771846377405a5f33d011f99a7dfa4739fd7876a7e70c994e4e" + .into(), + key: "input_2".into(), + },] + ), + ]), + "Input spec did not match" + ); + + // Check if the output spec contain the correct node hashes + assert_eq!( + pipeline.output_spec, + HashMap::from([( + "output".into(), + NodeURI { + node_id: "6ec68cc43ea15472731a318584cc8792fb2ff93c96fed6f3f998849b75976694" + .into(), + key: "output".into(), + } + ),]), + "Output spec did not match" + ); + + Ok(()) + } + + #[test] + fn input_packet_checksum() -> Result<()> { + let pipeline = Pipeline::new( + indoc! {" + digraph { + A + } + "}, + &HashMap::from([( + "A".into(), + Kernel::Pod { + pod: pod_fixture()?.into(), + }, + )]), + HashMap::from([( + "input_txt".into(), + vec![NodeURI { + node_id: "A".into(), + key: "input_txt".into(), + }], + )]), + HashMap::new(), + None, + )?; + + let pipeline_job = PipelineJob::new( + pipeline.into(), + &HashMap::from([( + "input_txt".into(), + vec![PathSet::Collection(vec![Blob { + kind: BlobKind::File, + location: URI { + namespace: "default".into(), + path: "input_txt/cat.txt".into(), + }, + checksum: String::new(), + }])], + )]), + URI { + namespace: "default".into(), + path: "output/pipeline".into(), + }, + &HashMap::from([("default".to_owned(), PathBuf::from("./tests/extra/data"))]), + )?; + + let checksum = match &pipeline_job.input_packet["input_txt"].first() { + Some(PathSet::Collection(blobs)) => blobs[0].checksum.clone(), + Some(_) | None => panic!("Input configuration unexpectedly changed."), + }; + + assert_eq!( + checksum, "175cc6f362b2f75acd08a373e000144fdb8d14a833d4b70fd743f16a7039103f", + "Incorrect checksum" + ); + + Ok(()) + } + + /// Testing invalid conditions to make sure validation works + fn basic_pipeline_components() -> Result<( + String, + HashMap, + HashMap>, + HashMap, + )> { + let dot = indoc! {" + digraph { + A + } + "}; + + let metadata = HashMap::from([("A".into(), combine_txt_pod("A")?.into())]); + + let input_spec = HashMap::from([ + ( + "input_1".into(), + vec![NodeURI { + node_id: "A".into(), + key: "input_1".into(), + }], + ), + ( + "input_2".into(), + vec![NodeURI { + node_id: "A".into(), + key: "input_2".into(), + }], + ), + ]); + + let output_spec = HashMap::from([( + "output".into(), + NodeURI { + node_id: "A".into(), + key: "output".into(), + }, + )]); + + Ok((dot.to_owned(), metadata, input_spec, output_spec)) + } + + #[test] + fn invalid_input_spec() -> Result<()> { + let (dot, metadata, _, output_spec) = basic_pipeline_components()?; + + // Test invalid node reference in input_spec + assert!( + Pipeline::new( + &dot, + &metadata, + HashMap::from([( + "input_1".into(), + vec![NodeURI { + node_id: "B".into(), + key: "input_1".into(), + }], + )]), + output_spec.clone(), + None + ) + .is_err(), + "Pipeline creation should have failed due to invalid input_spec" + ); + + // Test invalid key reference in input_spec + assert!( + Pipeline::new( + &dot, + &metadata, + HashMap::from([( + "input_1".into(), + vec![NodeURI { + node_id: "A".into(), + key: "input_3".into(), + }], + )]), + output_spec, + None + ) + .is_err(), + "Pipeline creation should have failed due to invalid input_spec" + ); + + Ok(()) + } + + #[test] + fn invalid_output_spec() -> Result<()> { + let (dot, metadata, input_spec, _) = basic_pipeline_components()?; + + // Test invalid output_spec node reference + assert!( + Pipeline::new( + &dot, + &metadata, + input_spec.clone(), + HashMap::from([( + "A".into(), + NodeURI { + node_id: "B".into(), + key: "output".into(), + } + )]), + None + ) + .is_err(), + "Pipeline creation should have failed due to invalid output_spec" + ); + + // Test invalid output_spec key reference + assert!( + Pipeline::new( + &dot, + &metadata, + input_spec, + HashMap::from([( + "A".into(), + NodeURI { + node_id: "A".into(), + key: "output_dne".into(), + } + )]), + None + ) + .is_err(), + "Pipeline creation should have failed due to invalid output_spec" + ); + + Ok(()) + } } diff --git a/src/model/pod.rs b/src/model/pod.rs index d07dd0f3..9e43e7c5 100644 --- a/src/model/pod.rs +++ b/src/model/pod.rs @@ -45,7 +45,7 @@ pub struct Pod { pub output_spec: HashMap, /// Execution requirements for the pod. #[serde(default)] - pub recommend_specs: RecommendSpecs, + pub recommend_specs: RecommendedSpecs, /// Optional GPU requirements for the pod. If set, then the running system needs a GPU that meets the requirements. pub gpu_requirements: Option, } @@ -65,7 +65,7 @@ impl Pod { input_spec: HashMap, output_dir: PathBuf, output_spec: HashMap, - recommend_specs: RecommendSpecs, + recommend_specs: RecommendedSpecs, gpu_requirements: Option, ) -> Result { let pod_no_hash = Self { @@ -101,14 +101,14 @@ impl ToYaml for Pod { /// Execution recommendations for a pod, since it doesn't impact the actual reproducibility /// it shouldn't be hashed along with the pod #[derive(uniffi::Record, Serialize, Deserialize, Debug, PartialEq, Default, Clone)] -pub struct RecommendSpecs { +pub struct RecommendedSpecs { /// Optimal number of CPU cores needed to run the pod provided by the user pub cpus: f32, /// Optimal amount of memory needed to run the pod provided by the user, code can probably run with less but may hit OOM pub memory: u64, } -impl ToYaml for RecommendSpecs { +impl ToYaml for RecommendedSpecs { fn process_field( field_name: &str, field_value: &serde_yaml::Value, @@ -426,7 +426,7 @@ pub enum PodStatus { } #[cfg(test)] -mod tests { +pub(crate) mod tests { #![expect(clippy::unwrap_used, reason = "OK in tests.")] use std::{ collections::HashMap, @@ -441,7 +441,7 @@ mod tests { model::{ Annotation, ToYaml as _, packet::{Blob, BlobKind, PathInfo, PathSet, URI}, - pod::{Pod, PodJob, PodResult, PodStatus, RecommendSpecs}, + pod::{Pod, PodJob, PodResult, PodStatus, RecommendedSpecs}, }, }; @@ -454,7 +454,7 @@ mod tests { ]) }); - fn basic_pod() -> Result { + pub fn pod_fixture() -> Result { Pod::new( Some(Annotation { name: "test".into(), @@ -481,7 +481,7 @@ mod tests { match_pattern: r".*\.txt".into(), }, )]), - RecommendSpecs { + RecommendedSpecs { cpus: 0.20, memory: 128 << 20, }, @@ -489,8 +489,8 @@ mod tests { ) } - fn basic_pod_job() -> Result { - let pod = Arc::new(basic_pod()?); + fn pod_job_fixture() -> Result { + let pod = Arc::new(pod_fixture()?); PodJob::new( Some(Annotation { name: "test_job".into(), @@ -519,14 +519,14 @@ mod tests { ) } - fn basic_pod_result() -> Result { + fn pod_result_fixture() -> Result { PodResult::new( Some(Annotation { name: "test".into(), version: "0.1".into(), description: "Basic Result for testing hashing and yaml serialization".into(), }), - basic_pod_job()?.into(), + pod_job_fixture()?.into(), "randomly_assigned_name".into(), PodStatus::Completed, 1_737_922_307, @@ -539,7 +539,7 @@ mod tests { #[test] fn pod_hash() { assert_eq!( - basic_pod().unwrap().hash, + pod_fixture().unwrap().hash, "b5574e2efdf26361e8e8e886389a250cfbfcceed08b29325a78fd738cbb2a1b8", "Hash didn't match." ); @@ -548,7 +548,7 @@ mod tests { #[test] fn pod_to_yaml() { assert_eq!( - basic_pod().unwrap().to_yaml().unwrap(), + pod_fixture().unwrap().to_yaml().unwrap(), indoc! {r" class: pod image: alpine:3.14 @@ -574,7 +574,7 @@ mod tests { #[test] fn pod_job_hash() { assert_eq!( - basic_pod_job().unwrap().hash, + pod_job_fixture().unwrap().hash, "80348a4ef866a9dfc1a5d0a48467a6592ef2ed9e8de67930d64afefbb395f1c6", "Hash didn't match." ); @@ -583,7 +583,7 @@ mod tests { #[test] fn pod_job_to_yaml() { assert_eq!( - basic_pod_job().unwrap().to_yaml().unwrap(), + pod_job_fixture().unwrap().to_yaml().unwrap(), indoc! {" class: pod_job pod: b5574e2efdf26361e8e8e886389a250cfbfcceed08b29325a78fd738cbb2a1b8 @@ -609,7 +609,7 @@ mod tests { #[test] fn pod_result_hash() { assert_eq!( - basic_pod_result().unwrap().hash, + pod_result_fixture().unwrap().hash, "92809a4ce13b4fe8c8dcdcf2b48dd14a9dd885593fe3ab5d9809d27bc9a16354", "Hash didn't match." ); @@ -618,7 +618,7 @@ mod tests { #[test] fn pod_result_to_yaml() { assert_eq!( - basic_pod_result().unwrap().to_yaml().unwrap(), + pod_result_fixture().unwrap().to_yaml().unwrap(), indoc! {" class: pod_result pod_job: 80348a4ef866a9dfc1a5d0a48467a6592ef2ed9e8de67930d64afefbb395f1c6 diff --git a/tests/fixture/mod.rs b/tests/fixture/mod.rs index 1257f60f..451432b1 100644 --- a/tests/fixture/mod.rs +++ b/tests/fixture/mod.rs @@ -14,7 +14,7 @@ use orcapod::{ Annotation, packet::{Blob, BlobKind, Packet, PathInfo, PathSet, URI}, pipeline::{Kernel, NodeURI, Pipeline, PipelineJob}, - pod::{Pod, PodJob, PodResult, PodStatus, RecommendSpecs}, + pod::{Pod, PodJob, PodResult, PodStatus, RecommendedSpecs}, }, operator::MapOperator, store::{ModelID, ModelInfo, Store}, @@ -76,7 +76,7 @@ pub fn pod_style() -> Result { }, ), ]), - RecommendSpecs { + RecommendedSpecs { cpus: 0.25, memory: 1_u64 << 30, }, @@ -171,7 +171,7 @@ pub fn pod_custom( input_spec, PathBuf::from("/tmp/output"), HashMap::new(), - RecommendSpecs { + RecommendedSpecs { cpus: 0.1, memory: 50_u64 << 20, }, @@ -325,7 +325,7 @@ pub fn combine_txt_pod(pod_name: &str) -> Result { match_pattern: r".*\.txt".to_owned(), }, )]), - RecommendSpecs { + RecommendedSpecs { cpus: 0.25, memory: 128_u64 << 20, }, diff --git a/tests/pipeline.rs b/tests/pipeline.rs deleted file mode 100644 index 7d2bf505..00000000 --- a/tests/pipeline.rs +++ /dev/null @@ -1,366 +0,0 @@ -#![expect( - missing_docs, - clippy::panic_in_result_fn, - clippy::indexing_slicing, - clippy::panic, - clippy::type_complexity, - reason = "OK in tests." -)] - -pub mod fixture; -use fixture::{NAMESPACE_LOOKUP_READ_ONLY, pod_custom}; -use indoc::indoc; -use orcapod::{ - error::Result, - model::{ - packet::{Blob, BlobKind, PathInfo, PathSet, URI}, - pipeline::{Kernel, NodeURI, Pipeline, PipelineJob}, - }, -}; -use pretty_assertions::assert_eq; -use std::collections::HashMap; - -use crate::fixture::{combine_txt_pod, pipeline}; - -#[expect(clippy::too_many_lines, reason = "Test code")] -#[test] -fn preprocessing() -> Result<()> { - let pipeline = pipeline()?; - - // Assert that every node has a non-empty hash - let node_hashes = pipeline - .graph - .node_indices() - .map(|idx| { - ( - pipeline.graph[idx].label.as_str(), - pipeline.graph[idx].hash.as_str(), - ) - }) - .collect::>(); - - assert_eq!( - node_hashes, - HashMap::from([ - ( - "pod_c_joiner", - "d2141ce0c203a8b556d7dbbbc6268ac4bbfa444748f92baff42235787f2b7550" - ), - ( - "B", - "964ebb9ddd6bb7db56e53c19e9ac34dfd08779a656295b01e70b5973adc61103" - ), - ( - "C", - "96b30227e0243f282f7a898bd85a246127e664635a3969577932d7653cfb79cb" - ), - ( - "pod_a_mapper", - "83bd3d17026c882db6b6cca7ccca0173f478c11449cfa8bfb13a0518a7e5e32a" - ), - ( - "pod_b_mapper", - "dd73cd3ab345917b25fc028131d83da7ce1c53702fcbabdd19b86a8bdde158b3" - ), - ( - "pod_d_mapper", - "d37f595093e8f7235f97213b3f7ff88b12786e48ec4f22275018cc7d22c113f8" - ), - ( - "A", - "8e43dbc9fd55fa7d1a36fc4a6c036f4113b7aa7fcf38646a2f2472bac6774962" - ), - ( - "E", - "6ec68cc43ea15472731a318584cc8792fb2ff93c96fed6f3f998849b75976694" - ), - ( - "D", - "04cb341a09eeb771846377405a5f33d011f99a7dfa4739fd7876a7e70c994e4e" - ), - ( - "pod_c_mapper", - "240c8e7fa5e0bd88239aba625387ea495fc5323a5d4b6b519946b8f8b907ddf6" - ), - ( - "pod_e_joiner", - "36f3e88889ecf89183205f340043de61f3c6a254026aae5aa1ce587a666e8c30" - ), - ]), - "Node hashes did not match" - ); - - // Check if the input spec contains the correct node hashes - assert_eq!( - pipeline.input_spec, - HashMap::from([ - ( - "the".into(), - vec![NodeURI { - node_id: "964ebb9ddd6bb7db56e53c19e9ac34dfd08779a656295b01e70b5973adc61103" - .into(), - key: "input_1".into(), - },] - ), - ( - "where".into(), - vec![NodeURI { - node_id: "8e43dbc9fd55fa7d1a36fc4a6c036f4113b7aa7fcf38646a2f2472bac6774962" - .into(), - key: "input_1".into(), - },] - ), - ( - "cat_color".into(), - vec![NodeURI { - node_id: "964ebb9ddd6bb7db56e53c19e9ac34dfd08779a656295b01e70b5973adc61103" - .into(), - key: "input_2".into(), - },] - ), - ( - "is".into(), - vec![NodeURI { - node_id: "8e43dbc9fd55fa7d1a36fc4a6c036f4113b7aa7fcf38646a2f2472bac6774962" - .into(), - key: "input_2".into(), - },] - ), - ( - "cat".into(), - vec![NodeURI { - node_id: "04cb341a09eeb771846377405a5f33d011f99a7dfa4739fd7876a7e70c994e4e" - .into(), - key: "input_1".into(), - },] - ), - ( - "action".into(), - vec![NodeURI { - node_id: "04cb341a09eeb771846377405a5f33d011f99a7dfa4739fd7876a7e70c994e4e" - .into(), - key: "input_2".into(), - },] - ), - ]), - "Input spec did not match" - ); - - // Check if the output spec contain the correct node hashes - assert_eq!( - pipeline.output_spec, - HashMap::from([( - "output".into(), - NodeURI { - node_id: "6ec68cc43ea15472731a318584cc8792fb2ff93c96fed6f3f998849b75976694".into(), - key: "output".into(), - } - ),]), - "Output spec did not match" - ); - - Ok(()) -} - -#[test] -fn input_packet_checksum() -> Result<()> { - let pipeline = Pipeline::new( - indoc! {" - digraph { - A - } - "}, - &HashMap::from([( - "A".into(), - Kernel::Pod { - pod: pod_custom( - "alpine:3.14", - vec!["echo".into()], - HashMap::from([( - "node_key_1".into(), - PathInfo { - path: "/tmp/input".into(), - match_pattern: r".*\.jpeg".into(), - }, - )]), - )? - .into(), - }, - )]), - HashMap::from([( - "pipeline_key_1".into(), - vec![NodeURI { - node_id: "A".into(), - key: "node_key_1".into(), - }], - )]), - HashMap::new(), - None, - )?; - - let pipeline_job = PipelineJob::new( - pipeline.into(), - &HashMap::from([( - "pipeline_key_1".into(), - vec![PathSet::Collection(vec![Blob { - kind: BlobKind::File, - location: URI { - namespace: "default".into(), - path: "images/subject.jpeg".into(), - }, - checksum: String::new(), - }])], - )]), - URI { - namespace: "default".into(), - path: "output/pipeline".into(), - }, - &NAMESPACE_LOOKUP_READ_ONLY, - )?; - - let checksum = match &pipeline_job.input_packet["pipeline_key_1"].first() { - Some(PathSet::Collection(blobs)) => blobs[0].checksum.clone(), - Some(_) | None => panic!("Input configuration unexpectedly changed."), - }; - - assert_eq!( - checksum, - "8b44b8ea83b1f5eec3ac16cf941767e629896c465803fb69c21adbbf984516bd".to_owned(), - "Incorrect checksum" - ); - - Ok(()) -} - -/// Testing invalid conditions to make sure validation works -fn basic_pipeline_components() -> Result<( - String, - HashMap, - HashMap>, - HashMap, -)> { - let dot = indoc! {" - digraph { - A - } - "}; - - let metadata = HashMap::from([("A".into(), combine_txt_pod("A")?.into())]); - - let input_spec = HashMap::from([ - ( - "input_1".into(), - vec![NodeURI { - node_id: "A".into(), - key: "input_1".into(), - }], - ), - ( - "input_2".into(), - vec![NodeURI { - node_id: "A".into(), - key: "input_2".into(), - }], - ), - ]); - - let output_spec = HashMap::from([( - "output".into(), - NodeURI { - node_id: "A".into(), - key: "output".into(), - }, - )]); - - Ok((dot.to_owned(), metadata, input_spec, output_spec)) -} - -#[test] -fn invalid_input_spec() -> Result<()> { - let (dot, metadata, _, output_spec) = basic_pipeline_components()?; - - // Test invalid node reference in input_spec - assert!( - Pipeline::new( - &dot, - &metadata, - HashMap::from([( - "input_1".into(), - vec![NodeURI { - node_id: "B".into(), - key: "input_1".into(), - }], - )]), - output_spec.clone(), - None - ) - .is_err(), - "Pipeline creation should have failed due to invalid input_spec" - ); - - // Test invalid key reference in input_spec - assert!( - Pipeline::new( - &dot, - &metadata, - HashMap::from([( - "input_1".into(), - vec![NodeURI { - node_id: "A".into(), - key: "input_3".into(), - }], - )]), - output_spec, - None - ) - .is_err(), - "Pipeline creation should have failed due to invalid input_spec" - ); - - Ok(()) -} - -#[test] -fn invalid_output_spec() -> Result<()> { - let (dot, metadata, input_spec, _) = basic_pipeline_components()?; - - // Test invalid output_spec node reference - assert!( - Pipeline::new( - &dot, - &metadata, - input_spec.clone(), - HashMap::from([( - "A".into(), - NodeURI { - node_id: "B".into(), - key: "output".into(), - } - )]), - None - ) - .is_err(), - "Pipeline creation should have failed due to invalid output_spec" - ); - - // Test invalid output_spec key reference - assert!( - Pipeline::new( - &dot, - &metadata, - input_spec, - HashMap::from([( - "A".into(), - NodeURI { - node_id: "A".into(), - key: "output_dne".into(), - } - )]), - None - ) - .is_err(), - "Pipeline creation should have failed due to invalid output_spec" - ); - - Ok(()) -} diff --git a/tests/store.rs b/tests/store.rs index 07662c8a..03114324 100644 --- a/tests/store.rs +++ b/tests/store.rs @@ -16,7 +16,7 @@ use orcapod::{ model::{ Annotation, ModelType, packet::PathInfo, - pod::{Pod, RecommendSpecs}, + pod::{Pod, RecommendedSpecs}, }, operator::MapOperator, store::{ModelID, ModelInfo, Store as _, filestore::LocalFileStore}, @@ -284,7 +284,7 @@ fn pod_annotation_unique() -> Result<()> { }, )]); let output_dir: PathBuf = "/output".into(); - let exec_requirements = RecommendSpecs { + let exec_requirements = RecommendedSpecs { cpus: 0.25, memory: 1_u64 << 30, }; From c353b9cd05fc1dbfc50bcb4aa68a307aa972082c Mon Sep 17 00:00:00 2001 From: synicix Date: Wed, 22 Oct 2025 07:30:27 +0000 Subject: [PATCH 3/7] Clean up clippy restrictions and updated code --- Cargo.toml | 20 +- src/bin/uniffi-bindgen.rs | 1 - src/crypto.rs | 2 - src/model/mod.rs | 4 +- src/model/packet.rs | 4 +- src/model/pipeline.rs | 454 ++++++++-------- src/model/pod.rs | 32 +- src/orchestrator/agent.rs | 120 ++--- src/orchestrator/docker.rs | 1004 ++++++++++++++++++------------------ src/pipeline_runner.rs | 8 +- src/store/filestore.rs | 4 + tests/agent.rs | 1 - tests/error.rs | 2 - tests/fixture/mod.rs | 2 +- 14 files changed, 822 insertions(+), 836 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 601c668c..7e97935f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -113,28 +113,19 @@ restriction = "deny" style = "deny" suspicious = "deny" -allow_attributes = { level = "allow", priority = 127 } # Useful when suppressing warnings is also desired. arbitrary_source_item_ordering = { level = "allow", priority = 127 } # allow arbitrary ordering to keep relevant code nearby -arithmetic_side_effects = { level = "allow", priority = 127 } # allow arithmetic for convenience though it could overflow as_conversions = { level = "allow", priority = 127 } # allow casting -assertions_on_result_states = { level = "allow", priority = 127 } # allow checking is_ok/is_err -big_endian_bytes = { level = "allow", priority = 127 } # allow to_be_bytes / from_be_bytes blanket_clippy_restriction_lints = { level = "allow", priority = 127 } # allow setting all restrictions so we can omit specific ones -default_numeric_fallback = { level = "allow", priority = 127 } # allow type inferred by numeric literal +default_numeric_fallback = { level = "allow", priority = 127 } # allow type inferred by numeric literal, detection is buggy disallowed_script_idents = { level = "allow", priority = 127 } # skip since we use only ascii -else_if_without_else = { level = "allow", priority = 127 } # missing else ok -exhaustive_enums = { level = "allow", priority = 127 } # revisit once lib is ready to be used externally +exhaustive_enums = { level = "allow", priority = 127 } # remove requirement to label enum as exhaustive exhaustive_structs = { level = "allow", priority = 127 } # revisit once lib is ready to be used externally field_scoped_visibility_modifiers = { level = "allow", priority = 127 } # allow field-level visibility modifiers float_arithmetic = { level = "allow", priority = 127 } # allow float arithmetic -host_endian_bytes = { level = "allow", priority = 127 } # allow to_ne_bytes / from_ne_bytes impl_trait_in_params = { level = "allow", priority = 127 } # impl in params ok implicit_return = { level = "allow", priority = 127 } # missing return ok -inline_asm_x86_intel_syntax = { level = "allow", priority = 127 } # intel syntax ok -integer_division = { level = "allow", priority = 127 } # allow discarding remainder iter_over_hash_type = { level = "allow", priority = 127 } # allow iterating over unordered iterables like `HashMap` little_endian_bytes = { level = "allow", priority = 127 } # allow to_le_bytes / from_le_bytes -missing_asserts_for_indexing = { level = "allow", priority = 127 } # missing assert before indexing ok missing_docs_in_private_items = { level = "allow", priority = 127 } # missing docs on private ok missing_inline_in_public_items = { level = "allow", priority = 127 } # let rust compiler determine best inline logic missing_trait_methods = { level = "allow", priority = 127 } # allow in favor of rustc `implement the missing item` @@ -150,21 +141,16 @@ print_stderr = { level = "allow", priority = 127 } # stderr print_stdout = { level = "allow", priority = 127 } # stdout prints ok pub_use = { level = "allow", priority = 127 } # ok to structure source into many files but clean up import pub_with_shorthand = { level = "allow", priority = 127 } # allow use of pub(super) -pub_without_shorthand = { level = "allow", priority = 127 } # allow use of pub(in super) question_mark_used = { level = "allow", priority = 127 } # allow question operator self_named_module_files = { level = "allow", priority = 127 } # mod files ok -semicolon_inside_block = { level = "allow", priority = 127 } # ok to keep inside block separated_literal_suffix = { level = "allow", priority = 127 } # literal suffixes should be separated by underscore +single_call_fn = { level = "allow", priority = 127 } # allow functions called only once, which allows better code organization single_char_lifetime_names = { level = "allow", priority = 127 } # single char lifetimes ok -single_component_path_imports = { level = "allow", priority = 127 } # allow for readability std_instead_of_alloc = { level = "allow", priority = 127 } # we should use std when possible std_instead_of_core = { level = "allow", priority = 127 } # we should use std when possible string_add = { level = "allow", priority = 127 } # simple concat ok -string_lit_chars_any = { level = "allow", priority = 127 } # favor readability until a perf case comes up use_debug = { level = "warn", priority = 127 } # debug print wildcard_enum_match_arm = { level = "allow", priority = 127 } # allow wildcard match arm in enums # temporary -single_call_fn = { level = "allow", priority = 127 } # remove once more models need pointer serializers/deserializers tests_outside_test_module = { level = "allow", priority = 127 } # for now due to false-positive for integration tests: https://github.com/rust-lang/rust-clippy/pull/13038 -todo = { level = "allow", priority = 127 } # allow while we are working towards MVP release diff --git a/src/bin/uniffi-bindgen.rs b/src/bin/uniffi-bindgen.rs index 342d0fb1..10c9649b 100644 --- a/src/bin/uniffi-bindgen.rs +++ b/src/bin/uniffi-bindgen.rs @@ -1,5 +1,4 @@ //! `uniffi` CLI to generate client bindings e.g. for Python. -use uniffi; fn main() { uniffi::uniffi_bindgen_main(); diff --git a/src/crypto.rs b/src/crypto.rs index fc8f32ad..912e4079 100644 --- a/src/crypto.rs +++ b/src/crypto.rs @@ -1,6 +1,4 @@ -use hex; use rand::{self, RngCore as _}; -use serde_yaml; use sha2::{Digest as _, Sha256}; use snafu::ResultExt as _; use std::{ diff --git a/src/model/mod.rs b/src/model/mod.rs index 4e23ade3..491ffb31 100644 --- a/src/model/mod.rs +++ b/src/model/mod.rs @@ -22,6 +22,8 @@ pub enum ModelType { PodJob, /// See [`PodResult`](crate::uniffi::model::pod::PodResult). PodResult, + /// See [`Pipeline`](crate::uniffi::model::pipeline::Pipeline). + Pipeline, } /// Standard metadata structure for all model instances. @@ -80,7 +82,7 @@ where sorted.serialize(serializer) } -#[allow(clippy::ref_option, reason = "Serde requires this signature.")] +#[expect(clippy::ref_option, reason = "Serde requires this signature.")] pub(crate) fn serialize_hashmap_option( map_option: &Option>, serializer: S, diff --git a/src/model/packet.rs b/src/model/packet.rs index 1f726f5a..01d2a63a 100644 --- a/src/model/packet.rs +++ b/src/model/packet.rs @@ -20,11 +20,11 @@ impl PathInfo {} /// File or directory options for BLOBs. #[derive(uniffi::Enum, Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)] pub enum BlobKind { + /// A single directory. + Directory, /// A single file. #[default] File, - /// A single directory. - Directory, } /// Location of BLOB data. diff --git a/src/model/pipeline.rs b/src/model/pipeline.rs index 62721b82..0dc208f1 100644 --- a/src/model/pipeline.rs +++ b/src/model/pipeline.rs @@ -132,203 +132,6 @@ impl Pipeline { } } -impl PartialEq for Pipeline { - fn eq(&self, other: &Self) -> bool { - self.hash == other.hash - && self.annotation == other.annotation - && self.input_spec.keys().collect::>() - == other.input_spec.keys().collect::>() - && self.input_spec.values().collect::>() - == other.input_spec.values().collect::>() - && self.output_spec == other.output_spec - } -} - -/// A compute pipeline job that supplies input/output targets. -#[expect( - clippy::field_scoped_visibility_modifiers, - reason = "Temporary until a proper hash is implemented." -)] -#[derive(uniffi::Object, Debug, Display, CloneGetters, Deserialize, Serialize, Clone)] -#[getset(get_clone, impl_attrs = "#[uniffi::export]")] -#[display("{self:#?}")] -#[uniffi::export(Display)] -pub struct PipelineJob { - /// todo: replace with a consistent hash - #[getset(skip)] - pub(crate) hash: String, - /// A pipeline to base the pipeline job on. - pub pipeline: Arc, - /// Attached, external input packet. Applies cartesian product by default on keys pointing to the same node. - pub input_packet: HashMap>, - /// Attached, external output directory. - pub output_dir: URI, -} - -#[uniffi::export] -impl PipelineJob { - /// Construct a new pipeline job instance. - /// - /// # Errors - /// - /// Will return `Err` if there is an issue initializing a `PipelineJob` instance. - #[uniffi::constructor] - pub fn new( - pipeline: Arc, - input_packet: &HashMap>, - output_dir: URI, - namespace_lookup: &HashMap, - ) -> Result { - validate_packet("input".into(), &pipeline.input_spec, input_packet)?; - let input_packet_with_checksum = input_packet - .iter() - .map(|(path_set_key, path_sets)| { - Ok(( - path_set_key.clone(), - path_sets - .iter() - .map(|path_set| { - Ok(match path_set { - PathSet::Unary(blob) => { - PathSet::Unary(hash_blob(namespace_lookup, blob)?) - } - PathSet::Collection(blobs) => PathSet::Collection( - blobs - .iter() - .map(|blob| hash_blob(namespace_lookup, blob)) - .collect::>()?, - ), - }) - }) - .collect::>()?, - )) - }) - .collect::>()?; - - Ok(Self { - hash: make_random_hash(), - pipeline, - input_packet: input_packet_with_checksum, - output_dir, - }) - } -} - -/// Struct to hold the result of a pipeline execution. -#[derive(uniffi::Object, Debug, Clone, Deserialize, Serialize, Display, CloneGetters)] -#[getset(get_clone, impl_attrs = "#[uniffi::export]")] -#[display("{self:#?}")] -#[uniffi::export(Display)] -pub struct PipelineResult { - /// The pipeline job that was executed. - pub pipeline_job: Arc, - /// The result of the pipeline execution. - pub output_packets: HashMap>, - /// Logs of any failures that occurred during the pipeline execution. - pub failure_logs: Vec, - /// The status of the pipeline execution. - pub status: PipelineStatus, -} - -/// The status of a pipeline execution. -#[derive(uniffi::Enum, Debug, Clone, Deserialize, Serialize, PartialEq, Eq)] -pub enum PipelineStatus { - /// The pipeline is currently running. - Running, - /// The pipeline has completed successfully. - Succeeded, - /// The pipeline has failed. - Failed, - /// The pipeline has partially succeeded. There should be some failure logs - PartiallySucceeded, -} -/// A node in a computational pipeline. -#[derive(uniffi::Enum, Debug, Clone, Deserialize, Serialize)] -pub enum Kernel { - /// Pod reference. - Pod { - /// See [`Pod`](crate::uniffi::model::pod::Pod). - pod: Arc, - }, - /// Cartesian product operation. See [`JoinOperator`](crate::core::operator::JoinOperator). - JoinOperator, - /// Rename a path set key operation. - MapOperator { - /// See [`MapOperator`](crate::core::operator::MapOperator). - mapper: Arc, - }, -} - -impl From for Kernel { - fn from(mapper: MapOperator) -> Self { - Self::MapOperator { - mapper: Arc::new(mapper), - } - } -} - -impl From for Kernel { - fn from(pod: Pod) -> Self { - Self::Pod { pod: Arc::new(pod) } - } -} - -impl From> for Kernel { - fn from(pod: Arc) -> Self { - Self::Pod { pod } - } -} - -impl Kernel { - /// Get a unique hash that represents the kernel. - /// The exception here is the `JoinOperator` doesn't have any pre execution configuration, since it's logic is completely dependent on what is fed to it during execution. - pub fn get_hash(&self) -> &str { - match self { - Self::Pod { pod } => &pod.hash, - Self::JoinOperator => &JOIN_OPERATOR_HASH, - Self::MapOperator { mapper } => &mapper.hash, - } - } -} - -impl PartialEq for Kernel { - fn eq(&self, other: &Self) -> bool { - self.get_hash() == other.get_hash() - } -} - -impl Eq for Kernel {} - -impl Hash for Kernel { - fn hash(&self, state: &mut H) { - self.get_hash().hash(state); - } -} - -/// Index from pipeline node into pod specification. -#[derive( - uniffi::Record, Debug, Clone, Deserialize, Serialize, PartialEq, Eq, Hash, PartialOrd, Ord, -)] -pub struct NodeURI { - /// Node reference name in pipeline. - pub node_id: String, - /// Specification key. - pub key: String, -} - -/// A node in the computation pipeline that stores its hash, kernel, and user provided label. -#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)] -pub struct PipelineNode { - /// Hash that represent the node - pub hash: String, - /// Kernel associated with the node - pub kernel: Kernel, - /// User provided label for the node - pub label: String, - /// This is meant for internal use only to track the node index in the graph - pub node_idx: NodeIndex, -} - impl Pipeline { /// Validate the pipeline to ensure that, based on user labels: /// 1. Each node's `input_spec` is covered by either its parent nodes or the pipeline's `input_spec` @@ -622,40 +425,85 @@ impl Pipeline { } } -impl Serialize for Pipeline { - fn serialize(&self, serializer: S) -> result::Result - where - S: serde::Serializer, - { - let mut state = serializer.serialize_struct("Pipeline", 4)?; - state.serialize_field("kernel_lut", &self.get_kernel_to_node_lut())?; - state.serialize_field("dot", &self.to_dot_lex())?; +impl PartialEq for Pipeline { + fn eq(&self, other: &Self) -> bool { + self.hash == other.hash + && self.annotation == other.annotation + && self.input_spec.keys().collect::>() + == other.input_spec.keys().collect::>() + && self.input_spec.values().collect::>() + == other.input_spec.values().collect::>() + && self.output_spec == other.output_spec + } +} - // Input spec needs to be sorted for consistent serialization - let input_spec_sorted: BTreeMap<_, Vec> = self - .input_spec +/// A compute pipeline job that supplies input/output targets. +#[expect( + clippy::field_scoped_visibility_modifiers, + reason = "Temporary until a proper hash is implemented." +)] +#[derive(uniffi::Object, Debug, Display, CloneGetters, Deserialize, Serialize, Clone)] +#[getset(get_clone, impl_attrs = "#[uniffi::export]")] +#[display("{self:#?}")] +#[uniffi::export(Display)] +pub struct PipelineJob { + /// todo: replace with a consistent hash + #[getset(skip)] + pub(crate) hash: String, + /// A pipeline to base the pipeline job on. + pub pipeline: Arc, + /// Attached, external input packet. Applies cartesian product by default on keys pointing to the same node. + pub input_packet: HashMap>, + /// Attached, external output directory. + pub output_dir: URI, +} + +#[uniffi::export] +impl PipelineJob { + /// Construct a new pipeline job instance. + /// + /// # Errors + /// + /// Will return `Err` if there is an issue initializing a `PipelineJob` instance. + #[uniffi::constructor] + pub fn new( + pipeline: Arc, + input_packet: &HashMap>, + output_dir: URI, + namespace_lookup: &HashMap, + ) -> Result { + validate_packet("input".into(), &pipeline.input_spec, input_packet)?; + let input_packet_with_checksum = input_packet .iter() - .map(|(k, v)| { - let mut sorted_v = v.clone(); - sorted_v.sort(); - (k, sorted_v) + .map(|(path_set_key, path_sets)| { + Ok(( + path_set_key.clone(), + path_sets + .iter() + .map(|path_set| { + Ok(match path_set { + PathSet::Unary(blob) => { + PathSet::Unary(hash_blob(namespace_lookup, blob)?) + } + PathSet::Collection(blobs) => PathSet::Collection( + blobs + .iter() + .map(|blob| hash_blob(namespace_lookup, blob)) + .collect::>()?, + ), + }) + }) + .collect::>()?, + )) }) - .collect(); - state.serialize_field("input_spec", &input_spec_sorted)?; - state.serialize_field("output_spec", &self.output_spec)?; - state.end() - } -} + .collect::>()?; -impl ToYaml for Pipeline { - fn process_field( - field_name: &str, - field_value: &serde_yaml::Value, - ) -> Option<(String, serde_yaml::Value)> { - match field_name { - "hash" | "annotation" => None, // Skip annotation field - _ => Some((field_name.to_owned(), field_value.clone())), - } + Ok(Self { + hash: make_random_hash(), + pipeline, + input_packet: input_packet_with_checksum, + output_dir, + }) } } @@ -715,6 +563,158 @@ impl PipelineJob { } } +impl Serialize for Pipeline { + fn serialize(&self, serializer: S) -> result::Result + where + S: serde::Serializer, + { + let mut state = serializer.serialize_struct("Pipeline", 4)?; + state.serialize_field("kernel_lut", &self.get_kernel_to_node_lut())?; + state.serialize_field("dot", &self.to_dot_lex())?; + + // Input spec needs to be sorted for consistent serialization + let input_spec_sorted: BTreeMap<_, Vec> = self + .input_spec + .iter() + .map(|(k, v)| { + let mut sorted_v = v.clone(); + sorted_v.sort(); + (k, sorted_v) + }) + .collect(); + state.serialize_field("input_spec", &input_spec_sorted)?; + state.serialize_field("output_spec", &self.output_spec)?; + state.end() + } +} + +impl ToYaml for Pipeline { + fn process_field( + field_name: &str, + field_value: &serde_yaml::Value, + ) -> Option<(String, serde_yaml::Value)> { + match field_name { + "hash" | "annotation" => None, // Skip annotation field + _ => Some((field_name.to_owned(), field_value.clone())), + } + } +} + +/// Struct to hold the result of a pipeline execution. +#[derive(uniffi::Object, Debug, Clone, Deserialize, Serialize, Display, CloneGetters)] +#[getset(get_clone, impl_attrs = "#[uniffi::export]")] +#[display("{self:#?}")] +#[uniffi::export(Display)] +pub struct PipelineResult { + /// The pipeline job that was executed. + pub pipeline_job: Arc, + /// The result of the pipeline execution. + pub output_packets: HashMap>, + /// Logs of any failures that occurred during the pipeline execution. + pub failure_logs: Vec, + /// The status of the pipeline execution. + pub status: PipelineStatus, +} + +/// The status of a pipeline execution. +#[derive(uniffi::Enum, Debug, Clone, Deserialize, Serialize, PartialEq, Eq)] +pub enum PipelineStatus { + /// The pipeline is currently running. + Running, + /// The pipeline has completed successfully. + Succeeded, + /// The pipeline has failed. + Failed, + /// The pipeline has partially succeeded. There should be some failure logs + PartiallySucceeded, +} +/// A node in a computational pipeline. +#[derive(uniffi::Enum, Debug, Clone, Deserialize, Serialize)] +pub enum Kernel { + /// Pod reference. + Pod { + /// See [`Pod`](crate::uniffi::model::pod::Pod). + pod: Arc, + }, + /// Cartesian product operation. See [`JoinOperator`](crate::core::operator::JoinOperator). + JoinOperator, + /// Rename a path set key operation. + MapOperator { + /// See [`MapOperator`](crate::core::operator::MapOperator). + mapper: Arc, + }, +} + +impl From for Kernel { + fn from(mapper: MapOperator) -> Self { + Self::MapOperator { + mapper: Arc::new(mapper), + } + } +} + +impl From for Kernel { + fn from(pod: Pod) -> Self { + Self::Pod { pod: Arc::new(pod) } + } +} + +impl From> for Kernel { + fn from(pod: Arc) -> Self { + Self::Pod { pod } + } +} + +impl Kernel { + /// Get a unique hash that represents the kernel. + /// The exception here is the `JoinOperator` doesn't have any pre execution configuration, since it's logic is completely dependent on what is fed to it during execution. + pub fn get_hash(&self) -> &str { + match self { + Self::Pod { pod } => &pod.hash, + Self::JoinOperator => &JOIN_OPERATOR_HASH, + Self::MapOperator { mapper } => &mapper.hash, + } + } +} + +impl PartialEq for Kernel { + fn eq(&self, other: &Self) -> bool { + self.get_hash() == other.get_hash() + } +} + +impl Eq for Kernel {} + +impl Hash for Kernel { + fn hash(&self, state: &mut H) { + self.get_hash().hash(state); + } +} + +/// Index from pipeline node into pod specification. +#[derive( + uniffi::Record, Debug, Clone, Deserialize, Serialize, PartialEq, Eq, Hash, PartialOrd, Ord, +)] +pub struct NodeURI { + /// Node reference name in pipeline. + pub node_id: String, + /// Specification key. + pub key: String, +} + +/// A node in the computation pipeline that stores its hash, kernel, and user provided label. +#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)] +pub struct PipelineNode { + /// Hash that represent the node + pub hash: String, + /// Kernel associated with the node + pub kernel: Kernel, + /// User provided label for the node + pub label: String, + /// This is meant for internal use only to track the node index in the graph + pub node_idx: NodeIndex, +} + #[cfg(test)] mod tests { #![expect( diff --git a/src/model/pod.rs b/src/model/pod.rs index 9e43e7c5..59812424 100644 --- a/src/model/pod.rs +++ b/src/model/pod.rs @@ -363,6 +363,22 @@ impl ToYaml for PodResult { } } +/// Status of a particular compute run. +#[derive(uniffi::Enum, Serialize, Deserialize, Debug, PartialEq, Eq, Clone, Default)] +pub enum PodStatus { + /// Run is ongoing. + Running, + /// Run has completed successfully. + Completed, + /// Run failed with the provided error code. + Failed(i16), + /// For other container states that are not listed. + Undefined, + /// No status set. + #[default] + Unset, +} + #[expect(clippy::expect_used, reason = "Serde requires this signature.")] fn deserialize_pod<'de, D>(deserializer: D) -> result::Result, D::Error> where @@ -409,22 +425,6 @@ where ) } -/// Status of a particular compute run. -#[derive(uniffi::Enum, Serialize, Deserialize, Debug, PartialEq, Eq, Clone, Default)] -pub enum PodStatus { - /// Run is ongoing. - Running, - /// Run has completed successfully. - Completed, - /// Run failed with the provided error code. - Failed(i16), - /// For other container states that are not listed. - Undefined, - /// No status set. - #[default] - Unset, -} - #[cfg(test)] pub(crate) mod tests { #![expect(clippy::unwrap_used, reason = "OK in tests.")] diff --git a/src/orchestrator/agent.rs b/src/orchestrator/agent.rs index 106e57ae..488a3b63 100644 --- a/src/orchestrator/agent.rs +++ b/src/orchestrator/agent.rs @@ -127,6 +127,66 @@ impl AgentClient { } } +impl AgentClient { + #[expect( + clippy::let_underscore_must_use, + reason = "write! on a `String` cannot fail. https://rust-lang.github.io/rust-clippy/master/index.html#format_collect" + )] + pub(crate) fn make_key_expr( + &self, + is_subscriber: bool, + topic: &str, + mut metadata: BTreeMap<&str, String>, + ) -> String { + metadata.insert("group", self.group.clone()); + metadata.insert("topic", topic.to_owned()); + + let delimiter = if is_subscriber { + "**/".to_owned() + } else { + metadata.insert("host", self.host.clone()); + metadata.insert("timestamp", Utc::now().to_rfc3339()); + String::new() + }; + + metadata + .iter() + .fold(delimiter.clone(), |mut key_expr, (key, value)| { + let _ = write!(key_expr, "{key}/{value}/{delimiter}"); + key_expr + }) + .trim_end_matches('/') + .to_owned() + } + + pub(crate) async fn publish( + &self, + topic: &str, + metadata: BTreeMap<&str, String>, + payload: &T, + ) -> Result<()> + where + T: Serialize + Sync + ?Sized, + { + Ok(self + .session + .put( + self.make_key_expr(false, topic, metadata), + &serde_json::to_vec(payload)?, + ) + .await + .context(selector::AgentCommunicationFailure {})?) + } + /// Send a log message to the agent network. + /// + /// # Errors + /// + /// Will fail if there is an issue sending the message. + pub(crate) async fn log(&self, message: &str) -> Result<()> { + self.publish("log", BTreeMap::new(), message).await + } +} + /// An execution agent. #[derive(uniffi::Object, CloneGetters, Display, Debug, Clone)] #[getset(get_clone, impl_attrs = "#[uniffi::export]")] @@ -237,66 +297,6 @@ pub(crate) fn extract_metadata(key_expr: &str) -> HashMap { .collect() } -impl AgentClient { - #[expect( - clippy::let_underscore_must_use, - reason = "write! on a `String` cannot fail. https://rust-lang.github.io/rust-clippy/master/index.html#format_collect" - )] - pub(crate) fn make_key_expr( - &self, - is_subscriber: bool, - topic: &str, - mut metadata: BTreeMap<&str, String>, - ) -> String { - metadata.insert("group", self.group.clone()); - metadata.insert("topic", topic.to_owned()); - - let delimiter = if is_subscriber { - "**/".to_owned() - } else { - metadata.insert("host", self.host.clone()); - metadata.insert("timestamp", Utc::now().to_rfc3339()); - String::new() - }; - - metadata - .iter() - .fold(delimiter.clone(), |mut key_expr, (key, value)| { - let _ = write!(key_expr, "{key}/{value}/{delimiter}"); - key_expr - }) - .trim_end_matches('/') - .to_owned() - } - - pub(crate) async fn publish( - &self, - topic: &str, - metadata: BTreeMap<&str, String>, - payload: &T, - ) -> Result<()> - where - T: Serialize + Sync + ?Sized, - { - Ok(self - .session - .put( - self.make_key_expr(false, topic, metadata), - &serde_json::to_vec(payload)?, - ) - .await - .context(selector::AgentCommunicationFailure {})?) - } - /// Send a log message to the agent network. - /// - /// # Errors - /// - /// Will fail if there is an issue sending the message. - pub(crate) async fn log(&self, message: &str) -> Result<()> { - self.publish("log", BTreeMap::new(), message).await - } -} - #[expect( clippy::excessive_nesting, clippy::let_underscore_must_use, diff --git a/src/orchestrator/docker.rs b/src/orchestrator/docker.rs index e3b0a1bb..7a3c8e90 100644 --- a/src/orchestrator/docker.rs +++ b/src/orchestrator/docker.rs @@ -63,225 +63,530 @@ pub struct LocalDockerOrchestrator { pub api: Docker, } -#[uniffi::export(async_runtime = "tokio")] -#[async_trait::async_trait] -impl Orchestrator for LocalDockerOrchestrator { - fn start_with_altimage_blocking( - &self, - pod_job: &PodJob, - image: &ImageKind, - namespace_lookup: &HashMap, - ) -> Result { - ASYNC_RUNTIME.block_on(self.start_with_altimage(pod_job, image, namespace_lookup)) - } - fn start_blocking( - &self, - pod_job: &PodJob, - namespace_lookup: &HashMap, - ) -> Result { - ASYNC_RUNTIME.block_on(self.start(pod_job, namespace_lookup)) - } - fn list_blocking(&self) -> Result> { - ASYNC_RUNTIME.block_on(self.list()) - } - fn delete_blocking(&self, pod_run: &PodRun) -> Result<()> { - ASYNC_RUNTIME.block_on(self.delete(pod_run)) - } - fn get_info_blocking(&self, pod_run: &PodRun) -> Result { - ASYNC_RUNTIME.block_on(self.get_info(pod_run)) +#[uniffi::export] +impl LocalDockerOrchestrator { + /// How to create a local docker orchestrator with an absolute path on docker host where binds + /// will be mounted from. + /// + /// # Errors + /// + /// Will return `Err` if there is an issue creating a local docker orchestrator. + #[uniffi::constructor] + pub fn new() -> Result { + Ok(Self { + api: Docker::connect_with_local_defaults()?, + }) } - fn get_result_blocking( - &self, - pod_run: &PodRun, +} + +impl LocalDockerOrchestrator { + fn prepare_mount_binds( namespace_lookup: &HashMap, - ) -> Result { - ASYNC_RUNTIME.block_on(self.get_result(pod_run, namespace_lookup)) - } - fn get_logs_blocking(&self, pod_run: &PodRun) -> Result { - ASYNC_RUNTIME.block_on(self.get_logs(pod_run)) + pod_job: &PodJob, + ) -> Result<(Vec, [String; 1])> { + // all host mounted paths need to be absolute + let host_output_directory = path::absolute( + namespace_lookup[&pod_job.output_dir.namespace].join(&pod_job.output_dir.path), + )?; + // Ensure output directory exists to prevent permissions issues if daemon's owner is root + fs::create_dir_all(&host_output_directory)?; + let output_bind = [format!( + "{}:{}", + host_output_directory.to_string_lossy(), + pod_job.pod.output_dir.to_string_lossy(), + )]; + let input_binds = pod_job.pod.input_spec.iter().try_fold::<_, _, Result<_>>( + vec![], + |mut flattened_binds, (stream_name, stream_info)| { + flattened_binds.extend(match get(&pod_job.input_packet, stream_name)? { + PathSet::Unary(blob) => { + vec![format!( + "{}:{}:{}", + path::absolute( + get(namespace_lookup, &blob.location.namespace)? + .join(&blob.location.path) + )? + .to_string_lossy(), + stream_info.path.to_string_lossy(), + "ro" + )] + } + PathSet::Collection(blobs) => blobs + .iter() + .map(|blob| { + Ok(format!( + "{}:{}:{}", + path::absolute( + get(namespace_lookup, &blob.location.namespace)? + .join(&blob.location.path) + )? + .to_string_lossy(), + stream_info + .path + .join(blob.location.path.file_name().context( + selector::MissingInfo { + details: format!( + "file or directory name where path = {}", + blob.location.path.to_string_lossy() + ), + } + )?) + .to_string_lossy(), + "ro" + )) + }) + .collect::>()?, + }); + Ok(flattened_binds) + }, + )?; + Ok((input_binds, output_bind)) } #[expect( - clippy::try_err, + clippy::cast_possible_wrap, + clippy::cast_possible_truncation, + clippy::indexing_slicing, reason = r#" - - `map_err` workaround needed since `import_image_stream` requires resolved bytes - - Raising an error manually on occurrence to halt so we don't just ignore - - Should not get as far as `Ok(_)` + - No issue in memory casting if between 0 - 2^63(i64:MAX, 8EB) + - No issue in cores casting if in increments of 1e-9(nanocore) + - Pod commands will always have at least 1 element "# )] - async fn start_with_altimage( - &self, - pod_job: &PodJob, - image: &ImageKind, + pub(crate) fn prepare_container_start_inputs( namespace_lookup: &HashMap, - ) -> Result { - let (assigned_name, container_options, container_config) = match image { - ImageKind::Published(remote_image) => Self::prepare_container_start_inputs( - namespace_lookup, - pod_job, - remote_image.clone(), - )?, - ImageKind::Tarball(image_info) => { - let location = namespace_lookup[&image_info.namespace].join(&image_info.path); - let byte_stream = FramedRead::new( - File::open(&location) - .context(selector::InvalidPath { path: &location }) - .await?, - BytesCodec::new(), - ) - .map_err(|err| -> Result<()> { - Err::(err.into())?; // raise on error since we discard below - Ok(()) - }) - .map(|result| result.ok().map_or(Bytes::new(), BytesMut::freeze)); - let mut stream = - self.api - .import_image_stream(ImportImageOptions::default(), byte_stream, None); - let mut local_image = String::new(); - while let Some(response) = stream.next().await { - local_image = RE_IMAGE_TAG - .captures_iter(&response?.stream.context(selector::MissingInfo { - details: location.to_string_lossy(), - })?) - .find_map(|x| x.name("image").map(|name| name.as_str().to_owned())) - .context(selector::MissingInfo { - details: format!( - "container tags in provided container alternate image where path = {}", - location.to_string_lossy() - ), - })?; - } - Self::prepare_container_start_inputs( - namespace_lookup, - pod_job, - local_image.clone(), - )? - } - }; - self.api - .create_container(container_options, container_config) - .await?; - match self - .api - .start_container(&assigned_name, None::>) - .await - { - Ok(()) => {} - Err(err) => Err(OrcaError { - kind: Kind::FailedToStartPod { - container_name: assigned_name.clone(), - reason: err.to_string(), - backtrace: Backtrace::capture().into(), - }, - })?, - } + pod_job: &PodJob, + image: String, + ) -> Result<( + String, + Option>, + Config, + )> { + // Prepare configuration + let (input_binds, output_bind) = Self::prepare_mount_binds(namespace_lookup, pod_job)?; + let container_name = + Generator::with_naming(Name::Plain) + .next() + .context(selector::MissingInfo { + details: "unable to generate a random name", + })?; + let labels = HashMap::from([ + ("org.orcapod".to_owned(), "true".to_owned()), + ( + "org.orcapod.pod_job".to_owned(), + serde_json::to_string(&pod_job)?, + ), + ( + "org.orcapod.pod_job.annotation".to_owned(), + serde_json::to_string(&pod_job.annotation)?, + ), + ("org.orcapod.pod_job.hash".to_owned(), pod_job.hash.clone()), + ]); - Ok(PodRun::new::(pod_job, assigned_name)) + Ok(( + container_name.clone(), + Some(CreateContainerOptions { + name: container_name, + platform: None, + }), + Config { + image: Some(image), + entrypoint: Some(pod_job.pod.command[..1].to_vec()), + cmd: Some(pod_job.pod.command[1..].to_vec()), + env: pod_job.env_vars.as_ref().map(|provided_env_vars| { + provided_env_vars + .iter() + .map(|(name, value)| format!("{name}={value}")) + .collect() + }), + host_config: Some(HostConfig { + nano_cpus: Some((pod_job.cpu_limit * 10_f32.powi(9)) as i64), // ncpu, ucores=3, mcores=6, cores=9 + memory: Some(pod_job.memory_limit as i64), + binds: Some([&*input_binds, &output_bind].concat()), + ..Default::default() + }), + labels: Some(labels), + ..Default::default() + }, + )) } - async fn start( + #[expect( + clippy::string_slice, + clippy::indexing_slicing, + reason = r#" + - Timestamp and memory should always have a value > 0 + - Container will always have a name with more than 1 character + - No issue in core casting if between 0 - 3.40e38(f32:MAX) + - No issue in exit code casting if between -3.27e4(i16:MIN) - 3.27e4(i16:MAX) + - Containers will always have at least 1 name with at least 2 characters + "# + )] + pub(crate) async fn list_containers( &self, - pod_job: &PodJob, - namespace_lookup: &HashMap, - ) -> Result { - let image_options = Some(CreateImageOptions { - from_image: pod_job.pod.image.clone(), - ..Default::default() - }); - self.api - .create_image(image_options, None, None) - .try_collect::>() - .await?; - self.start_with_altimage( - pod_job, - &ImageKind::Published(pod_job.pod.image.clone()), - namespace_lookup, + filters: HashMap>, // https://docs.rs/bollard/latest/bollard/container/struct.ListContainersOptions.html#structfield.filters + ) -> Result> { + Ok(join_all( + self.api + .list_containers(Some(ListContainersOptions { + all: true, + filters, + ..Default::default() + })) + .await? + .iter() + .map(|container_summary| async { + let container_name = + &container_summary + .names + .as_ref() + .context(selector::MissingInfo { + details: "container name(s)".to_owned(), + })?[0][1..]; + Ok(( + container_name.to_owned(), + container_summary.clone(), + self.api.inspect_container(container_name, None).await?, + )) + }), ) .await + .into_iter() + .filter_map(|result: Result<_>| { + let (container_name, container_summary, container_inspect_response) = result.ok()?; + + Self::extract_run_info(&container_summary, &container_inspect_response) + .map(|run_info| (container_name.clone(), run_info)) + })) } - async fn list(&self) -> Result> { - self.list_containers(HashMap::from([( - "label".to_owned(), - vec!["org.orcapod=true".to_owned()], - )])) - .await? - .map(|(assigned_name, run_info)| { - let pod_job: PodJob = - serde_json::from_str(get(&run_info.labels, "org.orcapod.pod_job")?)?; - Ok(PodRun::new::(&pod_job, assigned_name)) + + #[expect( + clippy::cast_sign_loss, + clippy::cast_precision_loss, + clippy::cast_possible_truncation, + reason = r#" + - Timestamp and memory should always have a value > 0 + - Container will always have a name with more than 1 character + - No issue in core casting if between 0 - 3.40e38(f32:MAX) + - No issue in exit code casting if between -3.27e4(i16:MIN) - 3.27e4(i16:MAX) + - Containers will always have at least 1 name with at least 2 characters + - This functions requires a lot of boilerplate code to extract the run info + "# + )] + fn extract_run_info( + container_summary: &ContainerSummary, + container_inspect_response: &ContainerInspectResponse, + ) -> Option { + let terminated_timestamp = DateTime::parse_from_rfc3339( + container_inspect_response + .state + .as_ref()? + .finished_at + .as_ref()?, + ) + .ok()? + .timestamp() as u64; + Some(PodRunInfo { + image: container_inspect_response + .config + .as_ref()? + .image + .as_ref()? + .clone(), + created: container_summary.created? as u64, + terminated: (terminated_timestamp > 0).then_some(terminated_timestamp), + env_vars: container_inspect_response + .config + .as_ref()? + .env + .as_ref()? + .iter() + .filter_map(|x| { + x.split_once('=') + .map(|(key, value)| (key.to_owned(), value.to_owned())) + }) + .collect(), + command: [ + container_inspect_response + .config + .as_ref()? + .entrypoint + .as_ref()? + .clone(), + container_inspect_response + .config + .as_ref()? + .cmd + .as_ref()? + .clone(), + ] + .concat(), + status: match ( + container_inspect_response.state.as_ref()?.status?, + container_inspect_response.state.as_ref()?.exit_code? as i16, + ) { + (ContainerStateStatusEnum::RUNNING | ContainerStateStatusEnum::RESTARTING, _) => { + PodStatus::Running + } + (ContainerStateStatusEnum::EXITED, 0) => PodStatus::Completed, + (ContainerStateStatusEnum::EXITED | ContainerStateStatusEnum::DEAD, code) => { + PodStatus::Failed(code) + } + (ContainerStateStatusEnum::CREATED, code) => { + if container_inspect_response.state.as_ref()?.error.is_some() { + PodStatus::Failed(code) + } else { + PodStatus::Running + } + } + _ => PodStatus::Undefined, + }, + mounts: container_inspect_response + .mounts + .as_ref()? + .iter() + .map(|mount_point| { + Some(format!( + "{}:{}{}", + mount_point.source.as_ref()?, + mount_point.destination.as_ref()?, + mount_point + .mode + .as_ref() + .map_or_else(String::new, |mode| format!(":{mode}")) + )) + }) + .collect::>>()?, + labels: container_inspect_response + .config + .as_ref()? + .labels + .as_ref()? + .clone(), + cpu_limit: container_inspect_response.host_config.as_ref()?.nano_cpus? as f32 + / 10_f32.powi(9), // ncpu, ucores=3, mcores=6, cores=9 + memory_limit: container_inspect_response.host_config.as_ref()?.memory? as u64, }) - .collect() - } - async fn delete(&self, pod_run: &PodRun) -> Result<()> { - self.api - .remove_container( - &pod_run.assigned_name, - Some(RemoveContainerOptions { - force: true, - ..Default::default() - }), - ) - .await?; - Ok(()) } - async fn get_info(&self, pod_run: &PodRun) -> Result { - let labels = vec![ - "org.orcapod=true".to_owned(), - format!( - "org.orcapod.pod_job.annotation={}", - serde_json::to_string(&pod_run.pod_job.annotation)? - ), - format!("org.orcapod.pod_job.hash={}", pod_run.pod_job.hash), - ]; - - // Add names to the filters - let container_filters = HashMap::from([ - ("label".to_owned(), labels), - ( - "name".to_owned(), - Vec::from([pod_run.assigned_name.clone()]), - ), - ]); +} - let (_, run_info) = self - .list_containers(container_filters) - .await? - .next() - .context(selector::MissingInfo { - details: format!("pod run where pod_job.hash = {}", pod_run.pod_job.hash), - })?; - Ok(run_info) +#[uniffi::export(async_runtime = "tokio")] +#[async_trait::async_trait] +impl Orchestrator for LocalDockerOrchestrator { + fn start_with_altimage_blocking( + &self, + pod_job: &PodJob, + image: &ImageKind, + namespace_lookup: &HashMap, + ) -> Result { + ASYNC_RUNTIME.block_on(self.start_with_altimage(pod_job, image, namespace_lookup)) } - #[expect( - clippy::wildcard_enum_match_arm, - reason = "Favor readability due to complexity in external dependency." - )] - async fn get_result( + fn start_blocking( + &self, + pod_job: &PodJob, + namespace_lookup: &HashMap, + ) -> Result { + ASYNC_RUNTIME.block_on(self.start(pod_job, namespace_lookup)) + } + fn list_blocking(&self) -> Result> { + ASYNC_RUNTIME.block_on(self.list()) + } + fn delete_blocking(&self, pod_run: &PodRun) -> Result<()> { + ASYNC_RUNTIME.block_on(self.delete(pod_run)) + } + fn get_info_blocking(&self, pod_run: &PodRun) -> Result { + ASYNC_RUNTIME.block_on(self.get_info(pod_run)) + } + fn get_result_blocking( &self, pod_run: &PodRun, namespace_lookup: &HashMap, ) -> Result { - match self - .api - .wait_container(&pod_run.assigned_name, None::>) - .try_collect::>() - .await - { - Ok(_) => (), - Err(err) => match err { - DockerContainerWaitError { .. } => (), - _ => return Err(OrcaError::from(err)), - }, - } - - let mut result_info: PodRunInfo; - while { - result_info = self.get_info(pod_run).await?; - matches!(&result_info.status, PodStatus::Running) - } { - async_sleep(Duration::from_millis(100)).await; - } - - PodResult::new( - None, + ASYNC_RUNTIME.block_on(self.get_result(pod_run, namespace_lookup)) + } + fn get_logs_blocking(&self, pod_run: &PodRun) -> Result { + ASYNC_RUNTIME.block_on(self.get_logs(pod_run)) + } + #[expect( + clippy::try_err, + reason = r#" + - `map_err` workaround needed since `import_image_stream` requires resolved bytes + - Raising an error manually on occurrence to halt so we don't just ignore + - Should not get as far as `Ok(_)` + "# + )] + async fn start_with_altimage( + &self, + pod_job: &PodJob, + image: &ImageKind, + namespace_lookup: &HashMap, + ) -> Result { + let (assigned_name, container_options, container_config) = match image { + ImageKind::Published(remote_image) => Self::prepare_container_start_inputs( + namespace_lookup, + pod_job, + remote_image.clone(), + )?, + ImageKind::Tarball(image_info) => { + let location = namespace_lookup[&image_info.namespace].join(&image_info.path); + let byte_stream = FramedRead::new( + File::open(&location) + .context(selector::InvalidPath { path: &location }) + .await?, + BytesCodec::new(), + ) + .map_err(|err| -> Result<()> { + Err::(err.into())?; // raise on error since we discard below + Ok(()) + }) + .map(|result| result.ok().map_or(Bytes::new(), BytesMut::freeze)); + let mut stream = + self.api + .import_image_stream(ImportImageOptions::default(), byte_stream, None); + let mut local_image = String::new(); + while let Some(response) = stream.next().await { + local_image = RE_IMAGE_TAG + .captures_iter(&response?.stream.context(selector::MissingInfo { + details: location.to_string_lossy(), + })?) + .find_map(|x| x.name("image").map(|name| name.as_str().to_owned())) + .context(selector::MissingInfo { + details: format!( + "container tags in provided container alternate image where path = {}", + location.to_string_lossy() + ), + })?; + } + Self::prepare_container_start_inputs( + namespace_lookup, + pod_job, + local_image.clone(), + )? + } + }; + self.api + .create_container(container_options, container_config) + .await?; + match self + .api + .start_container(&assigned_name, None::>) + .await + { + Ok(()) => {} + Err(err) => Err(OrcaError { + kind: Kind::FailedToStartPod { + container_name: assigned_name.clone(), + reason: err.to_string(), + backtrace: Backtrace::capture().into(), + }, + })?, + } + + Ok(PodRun::new::(pod_job, assigned_name)) + } + async fn start( + &self, + pod_job: &PodJob, + namespace_lookup: &HashMap, + ) -> Result { + let image_options = Some(CreateImageOptions { + from_image: pod_job.pod.image.clone(), + ..Default::default() + }); + self.api + .create_image(image_options, None, None) + .try_collect::>() + .await?; + self.start_with_altimage( + pod_job, + &ImageKind::Published(pod_job.pod.image.clone()), + namespace_lookup, + ) + .await + } + async fn list(&self) -> Result> { + self.list_containers(HashMap::from([( + "label".to_owned(), + vec!["org.orcapod=true".to_owned()], + )])) + .await? + .map(|(assigned_name, run_info)| { + let pod_job: PodJob = + serde_json::from_str(get(&run_info.labels, "org.orcapod.pod_job")?)?; + Ok(PodRun::new::(&pod_job, assigned_name)) + }) + .collect() + } + async fn delete(&self, pod_run: &PodRun) -> Result<()> { + self.api + .remove_container( + &pod_run.assigned_name, + Some(RemoveContainerOptions { + force: true, + ..Default::default() + }), + ) + .await?; + Ok(()) + } + async fn get_info(&self, pod_run: &PodRun) -> Result { + let labels = vec![ + "org.orcapod=true".to_owned(), + format!( + "org.orcapod.pod_job.annotation={}", + serde_json::to_string(&pod_run.pod_job.annotation)? + ), + format!("org.orcapod.pod_job.hash={}", pod_run.pod_job.hash), + ]; + + // Add names to the filters + let container_filters = HashMap::from([ + ("label".to_owned(), labels), + ( + "name".to_owned(), + Vec::from([pod_run.assigned_name.clone()]), + ), + ]); + + let (_, run_info) = self + .list_containers(container_filters) + .await? + .next() + .context(selector::MissingInfo { + details: format!("pod run where pod_job.hash = {}", pod_run.pod_job.hash), + })?; + Ok(run_info) + } + #[expect( + clippy::wildcard_enum_match_arm, + reason = "Favor readability due to complexity in external dependency." + )] + async fn get_result( + &self, + pod_run: &PodRun, + namespace_lookup: &HashMap, + ) -> Result { + match self + .api + .wait_container(&pod_run.assigned_name, None::>) + .try_collect::>() + .await + { + Ok(_) => (), + Err(err) => match err { + DockerContainerWaitError { .. } => (), + _ => return Err(OrcaError::from(err)), + }, + } + + let mut result_info: PodRunInfo; + while { + result_info = self.get_info(pod_run).await?; + matches!(&result_info.status, PodStatus::Running) + } { + async_sleep(Duration::from_millis(100)).await; + } + + PodResult::new( + None, Arc::clone(&pod_run.pod_job), pod_run.assigned_name.clone(), result_info.status, @@ -353,308 +658,3 @@ impl Orchestrator for LocalDockerOrchestrator { Ok(logs) } } - -#[uniffi::export] -impl LocalDockerOrchestrator { - /// How to create a local docker orchestrator with an absolute path on docker host where binds - /// will be mounted from. - /// - /// # Errors - /// - /// Will return `Err` if there is an issue creating a local docker orchestrator. - #[uniffi::constructor] - pub fn new() -> Result { - Ok(Self { - api: Docker::connect_with_local_defaults()?, - }) - } -} - -impl LocalDockerOrchestrator { - fn prepare_mount_binds( - namespace_lookup: &HashMap, - pod_job: &PodJob, - ) -> Result<(Vec, [String; 1])> { - // all host mounted paths need to be absolute - let host_output_directory = path::absolute( - namespace_lookup[&pod_job.output_dir.namespace].join(&pod_job.output_dir.path), - )?; - // Ensure output directory exists to prevent permissions issues if daemon's owner is root - fs::create_dir_all(&host_output_directory)?; - let output_bind = [format!( - "{}:{}", - host_output_directory.to_string_lossy(), - pod_job.pod.output_dir.to_string_lossy(), - )]; - let input_binds = pod_job.pod.input_spec.iter().try_fold::<_, _, Result<_>>( - vec![], - |mut flattened_binds, (stream_name, stream_info)| { - flattened_binds.extend(match get(&pod_job.input_packet, stream_name)? { - PathSet::Unary(blob) => { - vec![format!( - "{}:{}:{}", - path::absolute( - get(namespace_lookup, &blob.location.namespace)? - .join(&blob.location.path) - )? - .to_string_lossy(), - stream_info.path.to_string_lossy(), - "ro" - )] - } - PathSet::Collection(blobs) => blobs - .iter() - .map(|blob| { - Ok(format!( - "{}:{}:{}", - path::absolute( - get(namespace_lookup, &blob.location.namespace)? - .join(&blob.location.path) - )? - .to_string_lossy(), - stream_info - .path - .join(blob.location.path.file_name().context( - selector::MissingInfo { - details: format!( - "file or directory name where path = {}", - blob.location.path.to_string_lossy() - ), - } - )?) - .to_string_lossy(), - "ro" - )) - }) - .collect::>()?, - }); - Ok(flattened_binds) - }, - )?; - Ok((input_binds, output_bind)) - } - #[expect( - clippy::cast_possible_wrap, - clippy::cast_possible_truncation, - clippy::indexing_slicing, - reason = r#" - - No issue in memory casting if between 0 - 2^63(i64:MAX, 8EB) - - No issue in cores casting if in increments of 1e-9(nanocore) - - Pod commands will always have at least 1 element - "# - )] - pub(crate) fn prepare_container_start_inputs( - namespace_lookup: &HashMap, - pod_job: &PodJob, - image: String, - ) -> Result<( - String, - Option>, - Config, - )> { - // Prepare configuration - let (input_binds, output_bind) = Self::prepare_mount_binds(namespace_lookup, pod_job)?; - let container_name = - Generator::with_naming(Name::Plain) - .next() - .context(selector::MissingInfo { - details: "unable to generate a random name", - })?; - let labels = HashMap::from([ - ("org.orcapod".to_owned(), "true".to_owned()), - ( - "org.orcapod.pod_job".to_owned(), - serde_json::to_string(&pod_job)?, - ), - ( - "org.orcapod.pod_job.annotation".to_owned(), - serde_json::to_string(&pod_job.annotation)?, - ), - ("org.orcapod.pod_job.hash".to_owned(), pod_job.hash.clone()), - ]); - - Ok(( - container_name.clone(), - Some(CreateContainerOptions { - name: container_name, - platform: None, - }), - Config { - image: Some(image), - entrypoint: Some(pod_job.pod.command[..1].to_vec()), - cmd: Some(pod_job.pod.command[1..].to_vec()), - env: pod_job.env_vars.as_ref().map(|provided_env_vars| { - provided_env_vars - .iter() - .map(|(name, value)| format!("{name}={value}")) - .collect() - }), - host_config: Some(HostConfig { - nano_cpus: Some((pod_job.cpu_limit * 10_f32.powi(9)) as i64), // ncpu, ucores=3, mcores=6, cores=9 - memory: Some(pod_job.memory_limit as i64), - binds: Some([&*input_binds, &output_bind].concat()), - ..Default::default() - }), - labels: Some(labels), - ..Default::default() - }, - )) - } - #[expect( - clippy::string_slice, - clippy::indexing_slicing, - reason = r#" - - Timestamp and memory should always have a value > 0 - - Container will always have a name with more than 1 character - - No issue in core casting if between 0 - 3.40e38(f32:MAX) - - No issue in exit code casting if between -3.27e4(i16:MIN) - 3.27e4(i16:MAX) - - Containers will always have at least 1 name with at least 2 characters - "# - )] - pub(crate) async fn list_containers( - &self, - filters: HashMap>, // https://docs.rs/bollard/latest/bollard/container/struct.ListContainersOptions.html#structfield.filters - ) -> Result> { - Ok(join_all( - self.api - .list_containers(Some(ListContainersOptions { - all: true, - filters, - ..Default::default() - })) - .await? - .iter() - .map(|container_summary| async { - let container_name = - &container_summary - .names - .as_ref() - .context(selector::MissingInfo { - details: "container name(s)".to_owned(), - })?[0][1..]; - Ok(( - container_name.to_owned(), - container_summary.clone(), - self.api.inspect_container(container_name, None).await?, - )) - }), - ) - .await - .into_iter() - .filter_map(|result: Result<_>| { - let (container_name, container_summary, container_inspect_response) = result.ok()?; - - Self::extract_run_info(&container_summary, &container_inspect_response) - .map(|run_info| (container_name.clone(), run_info)) - })) - } - - #[expect( - clippy::cast_sign_loss, - clippy::cast_precision_loss, - clippy::cast_possible_truncation, - reason = r#" - - Timestamp and memory should always have a value > 0 - - Container will always have a name with more than 1 character - - No issue in core casting if between 0 - 3.40e38(f32:MAX) - - No issue in exit code casting if between -3.27e4(i16:MIN) - 3.27e4(i16:MAX) - - Containers will always have at least 1 name with at least 2 characters - - This functions requires a lot of boilerplate code to extract the run info - "# - )] - fn extract_run_info( - container_summary: &ContainerSummary, - container_inspect_response: &ContainerInspectResponse, - ) -> Option { - let terminated_timestamp = DateTime::parse_from_rfc3339( - container_inspect_response - .state - .as_ref()? - .finished_at - .as_ref()?, - ) - .ok()? - .timestamp() as u64; - Some(PodRunInfo { - image: container_inspect_response - .config - .as_ref()? - .image - .as_ref()? - .clone(), - created: container_summary.created? as u64, - terminated: (terminated_timestamp > 0).then_some(terminated_timestamp), - env_vars: container_inspect_response - .config - .as_ref()? - .env - .as_ref()? - .iter() - .filter_map(|x| { - x.split_once('=') - .map(|(key, value)| (key.to_owned(), value.to_owned())) - }) - .collect(), - command: [ - container_inspect_response - .config - .as_ref()? - .entrypoint - .as_ref()? - .clone(), - container_inspect_response - .config - .as_ref()? - .cmd - .as_ref()? - .clone(), - ] - .concat(), - status: match ( - container_inspect_response.state.as_ref()?.status?, - container_inspect_response.state.as_ref()?.exit_code? as i16, - ) { - (ContainerStateStatusEnum::RUNNING | ContainerStateStatusEnum::RESTARTING, _) => { - PodStatus::Running - } - (ContainerStateStatusEnum::EXITED, 0) => PodStatus::Completed, - (ContainerStateStatusEnum::EXITED | ContainerStateStatusEnum::DEAD, code) => { - PodStatus::Failed(code) - } - (ContainerStateStatusEnum::CREATED, code) => { - if container_inspect_response.state.as_ref()?.error.is_some() { - PodStatus::Failed(code) - } else { - PodStatus::Running - } - } - _ => PodStatus::Undefined, - }, - mounts: container_inspect_response - .mounts - .as_ref()? - .iter() - .map(|mount_point| { - Some(format!( - "{}:{}{}", - mount_point.source.as_ref()?, - mount_point.destination.as_ref()?, - mount_point - .mode - .as_ref() - .map_or_else(String::new, |mode| format!(":{mode}")) - )) - }) - .collect::>>()?, - labels: container_inspect_response - .config - .as_ref()? - .labels - .as_ref()? - .clone(), - cpu_limit: container_inspect_response.host_config.as_ref()?.nano_cpus? as f32 - / 10_f32.powi(9), // ncpu, ucores=3, mcores=6, cores=9 - memory_limit: container_inspect_response.host_config.as_ref()?.memory? as u64, - }) - } -} diff --git a/src/pipeline_runner.rs b/src/pipeline_runner.rs index b4a1b365..abd3506e 100644 --- a/src/pipeline_runner.rs +++ b/src/pipeline_runner.rs @@ -241,11 +241,11 @@ impl DockerPipelineRunner { // Wait for all nodes to be ready before sending inputs let num_of_nodes = graph.node_count(); - let mut ready_nodes = 0; + let mut ready_nodes: usize = 0; while (subscriber.recv_async().await).is_ok() { // Message is empty, just increment the counter - ready_nodes += 1; + ready_nodes = ready_nodes.saturating_add(1); if ready_nodes == num_of_nodes { break; // All nodes are ready, we can start sending inputs } @@ -472,7 +472,7 @@ impl DockerPipelineRunner { node.label, node.hash ); while status_subscriber.recv_async().await.is_ok() { - num_of_ready_event_handler += 1; + num_of_ready_event_handler = num_of_ready_event_handler.saturating_add(1); if num_of_ready_event_handler == nodes_to_sub_to.len() { // +1 for the stop request task break; // All tasks are ready, we can start sending inputs @@ -827,7 +827,7 @@ impl OperatorProcessor { } } -#[allow( +#[expect( clippy::excessive_nesting, reason = "Nesting manageable and mute github action error" )] diff --git a/src/store/filestore.rs b/src/store/filestore.rs index 7f599f3a..ff74e300 100644 --- a/src/store/filestore.rs +++ b/src/store/filestore.rs @@ -185,6 +185,10 @@ impl Store for LocalFileStore { &self.lookup_hash::(name, version)?, Self::make_annotation_relpath(name, version), ), + ModelType::Pipeline => self.make_path::( + &self.lookup_hash::(name, version)?, + Self::make_annotation_relpath(name, version), + ), }; fs::remove_file(&annotation_file_path)?; diff --git a/tests/agent.rs b/tests/agent.rs index b79a7145..790f3ac0 100644 --- a/tests/agent.rs +++ b/tests/agent.rs @@ -25,7 +25,6 @@ use std::{ time::{Duration, SystemTime, UNIX_EPOCH}, }; use tokio::{self, task::JoinSet, time::sleep as async_sleep}; -use zenoh; #[test] fn simple() -> Result<()> { diff --git a/tests/error.rs b/tests/error.rs index 25c67393..54246ce9 100644 --- a/tests/error.rs +++ b/tests/error.rs @@ -19,8 +19,6 @@ use orcapod::{ docker::LocalDockerOrchestrator, }, }; -use serde_json; -use serde_yaml; use std::{collections::HashMap, fs, ops::Deref as _, path::PathBuf, sync::Arc, time::Duration}; use tokio::{self, time::sleep as async_sleep}; diff --git a/tests/fixture/mod.rs b/tests/fixture/mod.rs index 451432b1..a8216016 100644 --- a/tests/fixture/mod.rs +++ b/tests/fixture/mod.rs @@ -205,7 +205,7 @@ pub fn pod_jobs_stresser( success_count: usize, error_count: usize, ) -> Result>> { - (1..=(success_count + error_count)) + (1..=success_count.saturating_add(error_count)) .map(|i| { if i <= success_count { return Ok(pod_job_custom( From f1981498c10beff95a9c5477a00c9547bb668f0c Mon Sep 17 00:00:00 2001 From: synicix Date: Wed, 22 Oct 2025 07:51:26 +0000 Subject: [PATCH 4/7] Remove excessive nesting --- .clippy.toml | 1 - src/pipeline_runner.rs | 4 ---- 2 files changed, 5 deletions(-) diff --git a/.clippy.toml b/.clippy.toml index 6b3b5fee..9da5cde6 100644 --- a/.clippy.toml +++ b/.clippy.toml @@ -1,3 +1,2 @@ -excessive-nesting-threshold = 6 too-many-arguments-threshold = 10 allowed-idents-below-min-chars = ["..", "k", "v", "f", "re", "id", "Ok", "'_"] diff --git a/src/pipeline_runner.rs b/src/pipeline_runner.rs index abd3506e..26862aad 100644 --- a/src/pipeline_runner.rs +++ b/src/pipeline_runner.rs @@ -827,10 +827,6 @@ impl OperatorProcessor { } } -#[expect( - clippy::excessive_nesting, - reason = "Nesting manageable and mute github action error" -)] #[async_trait] impl NodeProcessor for OperatorProcessor { async fn process_incoming_packet( From 7296224983f9160d397bb6390a57a83a9f7ea1fd Mon Sep 17 00:00:00 2001 From: synicix Date: Wed, 22 Oct 2025 07:51:35 +0000 Subject: [PATCH 5/7] Update diagram action --- .github/actions/build-diagram/action.yaml | 2 +- src/orchestrator/agent.rs | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/actions/build-diagram/action.yaml b/.github/actions/build-diagram/action.yaml index 77086d52..0fd202b7 100644 --- a/.github/actions/build-diagram/action.yaml +++ b/.github/actions/build-diagram/action.yaml @@ -16,7 +16,7 @@ runs: cargo modules dependencies --lib --max-depth 0 \ --no-uses --no-fns \ --focus-on " - orcapod::uniffi::{ + orcapod::{ model::{Pod,PodJob,PodResult}, store::filestore::LocalFileStore, orchestrator::{PodRun,docker::LocalDockerOrchestrator}, diff --git a/src/orchestrator/agent.rs b/src/orchestrator/agent.rs index 488a3b63..c3349b3a 100644 --- a/src/orchestrator/agent.rs +++ b/src/orchestrator/agent.rs @@ -298,7 +298,6 @@ pub(crate) fn extract_metadata(key_expr: &str) -> HashMap { } #[expect( - clippy::excessive_nesting, clippy::let_underscore_must_use, reason = "`result::Result<(), SendError<_>>` is the only uncaptured result since it would mean we can't transmit results over mpsc." )] From 915d7115b2dbc526aade377f6e696d5f58338712 Mon Sep 17 00:00:00 2001 From: synicix Date: Wed, 22 Oct 2025 08:06:40 +0000 Subject: [PATCH 6/7] Update python test to reflect renaming --- tests/extra/python/model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/extra/python/model.py b/tests/extra/python/model.py index 03c76d8c..23f09504 100755 --- a/tests/extra/python/model.py +++ b/tests/extra/python/model.py @@ -16,7 +16,7 @@ LocalFileStore, ModelId, ModelType, - RecommendSpecs, + RecommendedSpecs, OrcaError, ) @@ -33,7 +33,7 @@ def create_pod(data, _): input_spec={}, output_dir="/tmp/output", output_spec={}, - recommend_specs=RecommendSpecs( + recommend_specs=RecommendedSpecs( cpus=0.1, memory=10 << 20, ), From d3cf4fc3fd34006dfe5644f3ee786fc5f6c623e3 Mon Sep 17 00:00:00 2001 From: synicix Date: Wed, 22 Oct 2025 08:15:41 +0000 Subject: [PATCH 7/7] Fix renaming of RecommendSpecs to RecommendedSpecs --- tests/extra/python/agent.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/extra/python/agent.py b/tests/extra/python/agent.py index 8405d8d7..5e39aaf4 100644 --- a/tests/extra/python/agent.py +++ b/tests/extra/python/agent.py @@ -17,7 +17,7 @@ Uri, Pod, Annotation, - RecommendSpecs, + RecommendedSpecs, ) @@ -89,7 +89,7 @@ async def main(client, agent, test_dir, namespace_lookup, pod_jobs): input_spec={}, output_dir="/tmp/output", output_spec={}, - recommend_specs=RecommendSpecs( + recommend_specs=RecommendedSpecs( cpus=0.1, memory=128 << 20, ),