diff --git a/coman/.config/config.toml b/coman/.config/config.toml index d0da4d3..7481b10 100644 --- a/coman/.config/config.toml +++ b/coman/.config/config.toml @@ -11,11 +11,11 @@ sbatch_script_template = """ #SBATCH --job-name={{name}} #SBATCH --ntasks=1 #SBATCH --time=10:00 -srun --environment={{environment_file}} {{command}} +srun {% if environment_file %}--environment={{environment_file}}{% endif %} {{command}} """ edf_file_template = """ -image = "{{edf_image}}" +{% if edf_image %}image = "{{edf_image}}"{% endif %} mounts = [{% for source, target in mount %}"{{source}}:{{target}}",{% endfor %}] workdir = "{{container_workdir}}" diff --git a/coman/src/cli.rs b/coman/src/cli.rs index fa20afe..ca21c99 100644 --- a/coman/src/cli.rs +++ b/coman/src/cli.rs @@ -1,10 +1,11 @@ use std::{error::Error, path::PathBuf}; -use clap::{Parser, Subcommand, builder::TypedValueParser}; +use clap::{Args, Parser, Subcommand, builder::TypedValueParser}; use strum::VariantNames; use crate::{ config::{ComputePlatform, get_config_dir, get_data_dir, get_project_local_config_file}, + cscs::api_client::client::{EdfSpec as EdfSpecEnum, ScriptSpec as ScriptSpecEnum}, util::types::DockerImageUrl, }; @@ -56,6 +57,77 @@ pub enum CscsCommands { }, } +#[derive(Args, Clone, Debug)] +#[group(multiple = false)] +pub struct ScriptSpec { + #[arg( + long, + help = "generate and upload script file based on template (on by default unless `--local` or `--remote` are passed)" + )] + generate_script: bool, + #[arg(long, value_name = "PATH", help = "upload local script file")] + local_script: Option, + #[arg(long, value_name = "PATH", help = "use script file already present on remote")] + remote_script: Option, +} +impl Default for ScriptSpec { + fn default() -> Self { + Self { + generate_script: true, + local_script: Default::default(), + remote_script: Default::default(), + } + } +} + +impl From for ScriptSpecEnum { + fn from(val: ScriptSpec) -> Self { + if let Some(local_script) = val.local_script { + ScriptSpecEnum::Local(local_script) + } else if let Some(remote_script) = val.remote_script { + ScriptSpecEnum::Remote(remote_script) + } else { + ScriptSpecEnum::Generate + } + } +} + +#[derive(Args, Clone, Debug)] +#[group(multiple = false)] +pub struct EdfSpec { + #[arg( + long, + help = "generate and upload edf file based on template (on by default unless `--local` or `--remote` are passed)" + )] + generate_edf: bool, + #[arg(long, value_name = "PATH", help = "upload local edf file")] + local_edf: Option, + #[arg(long, value_name = "PATH", help = "use edf file already present on remote")] + remote_edf: Option, +} + +impl Default for EdfSpec { + fn default() -> Self { + Self { + generate_edf: true, + local_edf: Default::default(), + remote_edf: Default::default(), + } + } +} + +impl From for EdfSpecEnum { + fn from(val: EdfSpec) -> Self { + if let Some(local_edf) = val.local_edf { + EdfSpecEnum::Local(local_edf) + } else if let Some(remote_edf) = val.remote_edf { + EdfSpecEnum::Remote(remote_edf) + } else { + EdfSpecEnum::Generate + } + } +} + #[allow(clippy::large_enum_variant)] #[derive(Subcommand, Debug)] pub enum CscsJobCommands { @@ -74,8 +146,6 @@ pub enum CscsJobCommands { Submit { #[clap(short, long, help = "name of the job")] name: Option, - #[clap(short, long, help = "the path to the srun script file to use")] - script_file: Option, #[clap( short, long, @@ -88,10 +158,14 @@ pub enum CscsJobCommands { mount: Vec<(String, String)>, #[clap(short, long, help = "The docker image to use")] image: Option, - #[clap(short, long, help = "Path where stdout of the job gets written to")] + #[clap(long, help = "Path where stdout of the job gets written to")] stdout: Option, - #[clap(short, long, help = "Path where stderr of the job gets written to")] + #[clap(long, help = "Path where stderr of the job gets written to")] stderr: Option, + #[command(flatten)] + edf_spec: Option, + #[command(flatten)] + script_spec: Option, #[clap(trailing_var_arg = true, help = "The command to run in the container")] command: Option>, }, diff --git a/coman/src/cscs/api_client/client.rs b/coman/src/cscs/api_client/client.rs index 2c2d9da..6fdb162 100644 --- a/coman/src/cscs/api_client/client.rs +++ b/coman/src/cscs/api_client/client.rs @@ -24,10 +24,23 @@ use crate::{ trace_dbg, util::types::DockerImageUrl, }; +#[derive(Debug, Clone, Default)] +pub enum ScriptSpec { + #[default] + Generate, + Local(PathBuf), + Remote(PathBuf), +} +#[derive(Debug, Clone, Default)] +pub enum EdfSpec { + #[default] + Generate, + Local(PathBuf), + Remote(PathBuf), +} #[derive(Debug, Clone, Default)] pub struct JobStartOptions { - pub script_file: Option, pub image: Option, pub command: Option>, pub stdout: Option, @@ -35,6 +48,8 @@ pub struct JobStartOptions { pub container_workdir: Option, pub env: Vec<(String, String)>, pub mount: Vec<(String, String)>, + pub edf_spec: EdfSpec, + pub script_spec: ScriptSpec, } pub struct CscsApi { diff --git a/coman/src/cscs/cli.rs b/coman/src/cscs/cli.rs index 9540f36..d31feec 100644 --- a/coman/src/cscs/cli.rs +++ b/coman/src/cscs/cli.rs @@ -21,7 +21,7 @@ use crate::{ api_client::{client::JobStartOptions, types::JobStatus}, handlers::{ cscs_file_download, cscs_file_list, cscs_file_upload, cscs_job_cancel, cscs_job_details, cscs_job_list, - cscs_job_log, cscs_login, cscs_start_job, cscs_system_list, cscs_system_set, + cscs_job_log, cscs_job_start, cscs_login, cscs_system_list, cscs_system_set, }, }, }; @@ -107,7 +107,7 @@ pub(crate) async fn cli_cscs_job_start( platform: Option, account: Option, ) -> Result<()> { - match cscs_start_job(name, options, system, platform, account).await { + match cscs_job_start(name, options, system, platform, account).await { Ok(_) => { println!("Job started"); Ok(()) diff --git a/coman/src/cscs/handlers.rs b/coman/src/cscs/handlers.rs index 2bfeb2f..1592170 100644 --- a/coman/src/cscs/handlers.rs +++ b/coman/src/cscs/handlers.rs @@ -2,11 +2,15 @@ use std::os::unix::fs::MetadataExt; #[cfg(target_family = "windows")] use std::os::windows::fs::MetadataExt; -use std::{collections::HashMap, path::PathBuf}; +use std::{ + collections::HashMap, + path::{Path, PathBuf}, +}; use color_eyre::{Result, eyre::eyre}; use reqwest::Url; +use super::api_client::client::{EdfSpec, ScriptSpec}; use crate::{ config::{ComputePlatform, Config}, cscs::{ @@ -163,55 +167,26 @@ pub async fn cscs_job_cancel(job_id: i64, system: Option, platform: Opti } } -#[allow(clippy::too_many_arguments)] -pub async fn cscs_start_job( - name: Option, - options: JobStartOptions, - system: Option, - platform: Option, - account: Option, -) -> Result<()> { - match get_access_token().await { - Ok(access_token) => { - let api_client = CscsApi::new(access_token.0, platform).unwrap(); - let config = Config::new().unwrap(); - let current_system = &system.unwrap_or(config.cscs.current_system); - let account = account.or(config.cscs.account); - let user_info = api_client.get_userinfo(current_system).await?; - let job_name = name.or(config.name).unwrap_or(format!("{}-coman", user_info.name)); - let current_system_info = api_client.get_system(current_system).await?; - let scratch = match current_system_info { - Some(system) => PathBuf::from( - system - .file_systems - .iter() - .find(|fs| fs.data_type == FileSystemType::Scratch) - .ok_or(eyre!("couldn't find scratch space for system"))? - .path - .clone(), - ), - None => { - return Err(eyre!("couldn't get system description for {}", current_system)); - } - }; - let container_workdir = options - .container_workdir - .clone() - .unwrap_or(config.cscs.workdir.unwrap_or("/scratch".to_owned())); - let base_path = scratch.join(user_info.name.clone()).join(&job_name); +async fn handle_edf( + api_client: &CscsApi, + base_path: &Path, + current_system: &str, + envvars: &HashMap, + workdir: &str, + options: &JobStartOptions, +) -> Result { + let config = Config::new().unwrap(); + let environment_path = base_path.join("environment.toml"); + match options.edf_spec.clone() { + EdfSpec::Generate => { + let mut tera = tera::Tera::default(); - let mut envvars = config.cscs.env.clone(); - envvars.extend(options.env.clone()); + let environment_template = &config.cscs.edf_file_template; + tera.add_raw_template("environment.toml", environment_template)?; let mut mount: HashMap = options.mount.clone().into_iter().collect(); mount.entry("${SCRATCH}".to_owned()).or_insert("/scratch".to_owned()); - let mut tera = tera::Tera::default(); - - let environment_path = base_path.join("environment.toml"); - let environment_template = config.cscs.edf_file_template; - tera.add_raw_template("environment.toml", &environment_template)?; - - let docker_image = options.image.clone().unwrap_or(config.cscs.image.try_into()?); + let docker_image = options.image.clone().unwrap_or(config.cscs.image.clone().try_into()?); let meta = docker_image.inspect().await?; if let Some(system_info) = config.cscs.systems.get(current_system) { let mut compatible = false; @@ -237,24 +212,45 @@ pub async fn cscs_start_job( let mut context = tera::Context::new(); context.insert("edf_image", &docker_image.to_edf()); - context.insert("container_workdir", &container_workdir); + context.insert("container_workdir", &workdir); context.insert("env", &envvars); context.insert("mount", &mount); let environment_file = tera.render("environment.toml", &context)?; - api_client.mkdir(current_system, base_path.clone()).await?; - api_client.chmod(current_system, base_path.clone(), "700").await?; + api_client.mkdir(current_system, base_path.to_path_buf()).await?; + api_client.chmod(current_system, base_path.to_path_buf(), "700").await?; api_client .upload(current_system, environment_path.clone(), environment_file.into_bytes()) .await?; - - // upload script - let script_path = base_path.join("script.sh"); - let script_template = options - .script_file - .clone() - .map(std::fs::read_to_string) - .unwrap_or(Ok(config.cscs.sbatch_script_template))?; + Ok(environment_path) + } + EdfSpec::Local(local_path) => { + let environment_file = std::fs::read_to_string(local_path.clone())?; + api_client.mkdir(current_system, base_path.to_path_buf()).await?; + api_client.chmod(current_system, base_path.to_path_buf(), "700").await?; + api_client + .upload(current_system, environment_path.clone(), environment_file.into_bytes()) + .await?; + Ok(environment_path) + } + EdfSpec::Remote(path) => Ok(path), + } +} +async fn handle_script( + api_client: &CscsApi, + job_name: &str, + base_path: &Path, + current_system: &str, + environment_path: &Path, + workdir: &str, + options: &JobStartOptions, +) -> Result { + let config = Config::new().unwrap(); + let script_path = base_path.join("script.sh"); + match options.script_spec.clone() { + ScriptSpec::Generate => { + let script_template = config.cscs.sbatch_script_template; + let mut tera = tera::Tera::default(); tera.add_raw_template("script.sh", &script_template)?; let mut context = tera::Context::new(); context.insert("name", &job_name); @@ -262,13 +258,87 @@ pub async fn cscs_start_job( "command", &options.command.clone().unwrap_or(config.cscs.command).join(" "), ); - context.insert("environment_file", &environment_path); - context.insert("container_workdir", &container_workdir); + context.insert("environment_file", &environment_path.to_path_buf()); + context.insert("container_workdir", &workdir); let script = tera.render("script.sh", &context)?; api_client .upload(current_system, script_path.clone(), script.into_bytes()) .await?; + Ok(script_path) + } + ScriptSpec::Local(local_path) => { + let script = std::fs::read_to_string(local_path)?; + api_client + .upload(current_system, script_path.clone(), script.into_bytes()) + .await?; + + Ok(script_path) + } + ScriptSpec::Remote(script_path) => Ok(script_path), + } +} + +pub async fn cscs_job_start( + name: Option, + options: JobStartOptions, + system: Option, + platform: Option, + account: Option, +) -> Result<()> { + match get_access_token().await { + Ok(access_token) => { + let api_client = CscsApi::new(access_token.0, platform).unwrap(); + let config = Config::new().unwrap(); + let current_system = &system.unwrap_or(config.cscs.current_system); + let account = account.or(config.cscs.account); + let user_info = api_client.get_userinfo(current_system).await?; + let job_name = name.or(config.name).unwrap_or(format!("{}-coman", user_info.name)); + let current_system_info = api_client.get_system(current_system).await?; + let scratch = match current_system_info { + Some(system) => PathBuf::from( + system + .file_systems + .iter() + .find(|fs| fs.data_type == FileSystemType::Scratch) + .ok_or(eyre!("couldn't find scratch space for system"))? + .path + .clone(), + ), + None => { + return Err(eyre!("couldn't get system description for {}", current_system)); + } + }; + let container_workdir = options + .container_workdir + .clone() + .unwrap_or(config.cscs.workdir.unwrap_or("/scratch".to_owned())); + let base_path = scratch.join(user_info.name.clone()).join(&job_name); + + let mut envvars = config.cscs.env.clone(); + envvars.extend(options.env.clone()); + + let environment_path = handle_edf( + &api_client, + &base_path, + current_system, + &envvars, + &container_workdir, + &options, + ) + .await?; + + let script_path = handle_script( + &api_client, + &job_name, + &base_path, + current_system, + &environment_path, + &container_workdir, + &options, + ) + .await?; + // start job api_client .start_job(current_system, account, &job_name, script_path, envvars, options) diff --git a/coman/src/main.rs b/coman/src/main.rs index 8df51cf..ce9b0e5 100644 --- a/coman/src/main.rs +++ b/coman/src/main.rs @@ -73,7 +73,6 @@ async fn main() -> Result<()> { } cli::CscsJobCommands::Submit { name, - script_file, image, command, workdir, @@ -81,11 +80,12 @@ async fn main() -> Result<()> { mount, stdout, stderr, + edf_spec, + script_spec, } => { cli_cscs_job_start( name, JobStartOptions { - script_file, image, command, container_workdir: workdir, @@ -93,6 +93,8 @@ async fn main() -> Result<()> { mount, stdout, stderr, + edf_spec: edf_spec.unwrap_or_default().into(), + script_spec: script_spec.unwrap_or_default().into(), }, system, platform,