Skip to content

Commit 90b1239

Browse files
authored
bug fixes, custom stdout/stderr (#38)
* show project local config path in version command * exclude input when system select is shown * handle non-existent logs better * fix specifying a target folder for download in TUI * allow specifying custom stdout/err paths for jobs
1 parent 8e69c95 commit 90b1239

9 files changed

Lines changed: 149 additions & 82 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

coman/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "coman"
3-
version = "0.2.5"
3+
version = "0.3.1"
44
edition = "2024"
55
description = "Compute Manager for managing HPC compute"
66
authors = ["Ralf Grubenmann <ralf.grubenmann@sdsc.ethz.ch>"]

coman/src/cli.rs

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use clap::{Parser, Subcommand, builder::TypedValueParser};
44
use strum::VariantNames;
55

66
use crate::{
7-
config::{ComputePlatform, get_config_dir, get_data_dir},
7+
config::{ComputePlatform, get_config_dir, get_data_dir, get_project_local_config_file},
88
util::types::DockerImageUrl,
99
};
1010

@@ -31,6 +31,7 @@ pub enum CliCommands {
3131
},
3232
}
3333

34+
#[allow(clippy::large_enum_variant)]
3435
#[derive(Subcommand, Debug)]
3536
pub enum CscsCommands {
3637
#[clap(about = "Log in to CSCS")]
@@ -87,6 +88,10 @@ pub enum CscsJobCommands {
8788
mount: Vec<(String, String)>,
8889
#[clap(short, long, help = "The docker image to use")]
8990
image: Option<DockerImageUrl>,
91+
#[clap(short, long, help = "Path where stdout of the job gets written to")]
92+
stdout: Option<PathBuf>,
93+
#[clap(short, long, help = "Path where stderr of the job gets written to")]
94+
stderr: Option<PathBuf>,
9095
#[clap(trailing_var_arg = true, help = "The command to run in the container")]
9196
command: Option<Vec<String>>,
9297
},
@@ -155,18 +160,18 @@ const VERSION_MESSAGE: &str = concat!(
155160
);
156161

157162
pub fn version() -> String {
158-
let author = clap::crate_authors!();
159-
160163
// let current_exe_path = PathBuf::from(clap::crate_name!()).display().to_string();
161164
let config_dir_path = get_config_dir().display().to_string();
162165
let data_dir_path = get_data_dir().display().to_string();
166+
let project_config_dir = get_project_local_config_file()
167+
.map(|p| p.display().to_string())
168+
.unwrap_or("".to_owned());
163169

164170
format!(
165171
"\
166172
{VERSION_MESSAGE}
167173
168-
Authors: {author}
169-
174+
Project config directory: {project_config_dir}
170175
Config directory: {config_dir_path}
171176
Data directory: {data_dir_path}"
172177
)

coman/src/cscs/api_client/client.rs

Lines changed: 40 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ use eyre::eyre;
55
use firecrest_client::{
66
client::FirecrestClient,
77
compute_api::{
8-
cancel_compute_system_job, get_compute_system_job, get_compute_system_job_metadata, get_compute_system_jobs,
9-
post_compute_system_job,
8+
JobOptions, cancel_compute_system_job, get_compute_system_job, get_compute_system_job_metadata,
9+
get_compute_system_jobs, post_compute_system_job,
1010
},
1111
filesystem_api::{
1212
get_filesystem_ops_download, get_filesystem_ops_ls, get_filesystem_ops_stat, get_filesystem_ops_tail,
@@ -22,8 +22,21 @@ use crate::{
2222
config::{ComputePlatform, Config},
2323
cscs::api_client::types::{FileStat, Job, JobDetail, PathEntry, S3Upload, System, UserInfo},
2424
trace_dbg,
25+
util::types::DockerImageUrl,
2526
};
2627

28+
#[derive(Debug, Clone, Default)]
29+
pub struct JobStartOptions {
30+
pub script_file: Option<PathBuf>,
31+
pub image: Option<DockerImageUrl>,
32+
pub command: Option<Vec<String>>,
33+
pub stdout: Option<PathBuf>,
34+
pub stderr: Option<PathBuf>,
35+
pub container_workdir: Option<String>,
36+
pub env: Vec<(String, String)>,
37+
pub mount: Vec<(String, String)>,
38+
}
39+
2740
pub struct CscsApi {
2841
client: FirecrestClient,
2942
}
@@ -46,6 +59,7 @@ impl CscsApi {
4659
name: &str,
4760
script_path: PathBuf,
4861
envvars: HashMap<String, String>,
62+
options: JobStartOptions,
4963
) -> Result<()> {
5064
let workingdir = script_path.clone();
5165
let workingdir = workingdir.parent();
@@ -54,10 +68,14 @@ impl CscsApi {
5468
system_name,
5569
account,
5670
name,
57-
None,
58-
Some(script_path),
59-
workingdir.map(|p| p.to_path_buf()),
60-
envvars,
71+
JobOptions {
72+
script: None,
73+
script_path: Some(script_path),
74+
working_dir: workingdir.map(|p| p.to_path_buf()),
75+
envvars,
76+
stdout: options.stdout,
77+
stderr: options.stderr,
78+
},
6179
)
6280
.await?;
6381

@@ -231,10 +249,14 @@ mod tests {
231249
"",
232250
None,
233251
"",
234-
None,
235-
None,
236-
None,
237-
HashMap::new()
252+
JobOptions {
253+
script: None,
254+
script_path: None,
255+
working_dir: None,
256+
envvars: HashMap::new(),
257+
stdout: None,
258+
stderr: None
259+
}
238260
),
239261
Result<PostJobSubmissionResponse>
240262
))
@@ -243,7 +265,14 @@ mod tests {
243265
Result<PostJobSubmissionResponse>
244266
));
245267
let result = client
246-
.start_job("test", None, "test", PathBuf::from("/test"), HashMap::new())
268+
.start_job(
269+
"test",
270+
None,
271+
"test",
272+
PathBuf::from("/test"),
273+
HashMap::new(),
274+
JobStartOptions::default(),
275+
)
247276
.await;
248277
assert_ok!(result);
249278
}

coman/src/cscs/cli.rs

Lines changed: 3 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,12 @@ use tokio::{
1818
use crate::{
1919
config::ComputePlatform,
2020
cscs::{
21-
api_client::types::JobStatus,
21+
api_client::{client::JobStartOptions, types::JobStatus},
2222
handlers::{
2323
cscs_file_download, cscs_file_list, cscs_file_upload, cscs_job_cancel, cscs_job_details, cscs_job_list,
2424
cscs_job_log, cscs_login, cscs_start_job, cscs_system_list, cscs_system_set,
2525
},
2626
},
27-
util::types::DockerImageUrl,
2827
};
2928

3029
pub(crate) async fn cli_cscs_login() -> Result<()> {
@@ -103,30 +102,12 @@ pub(crate) async fn cli_cscs_job_log(
103102
#[allow(clippy::too_many_arguments)]
104103
pub(crate) async fn cli_cscs_job_start(
105104
name: Option<String>,
106-
script_file: Option<PathBuf>,
107-
image: Option<DockerImageUrl>,
108-
command: Option<Vec<String>>,
109-
workdir: Option<String>,
110-
env: Vec<(String, String)>,
111-
mount: Vec<(String, String)>,
105+
options: JobStartOptions,
112106
system: Option<String>,
113107
platform: Option<ComputePlatform>,
114108
account: Option<String>,
115109
) -> Result<()> {
116-
match cscs_start_job(
117-
name,
118-
script_file,
119-
image,
120-
command,
121-
workdir,
122-
env,
123-
mount,
124-
system,
125-
platform,
126-
account,
127-
)
128-
.await
129-
{
110+
match cscs_start_job(name, options, system, platform, account).await {
130111
Ok(_) => {
131112
println!("Job started");
132113
Ok(())

coman/src/cscs/handlers.rs

Lines changed: 34 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -11,18 +11,15 @@ use crate::{
1111
config::{ComputePlatform, Config},
1212
cscs::{
1313
api_client::{
14-
client::CscsApi,
14+
client::{CscsApi, JobStartOptions},
1515
types::{FileStat, FileSystemType, Job, JobDetail, PathEntry, PathType, S3Upload, System, UserInfo},
1616
},
1717
oauth2::{
1818
CLIENT_ID_SECRET_NAME, CLIENT_SECRET_SECRET_NAME, client_credentials_login, finish_cscs_device_login,
1919
start_cscs_device_login,
2020
},
2121
},
22-
util::{
23-
keyring::{Secret, get_secret, store_secret},
24-
types::DockerImageUrl,
25-
},
22+
util::keyring::{Secret, get_secret, store_secret},
2623
};
2724

2825
const CSCS_MAX_DIRECT_SIZE: usize = 5242880;
@@ -134,10 +131,19 @@ pub async fn cscs_job_log(
134131
return Err(eyre!("couldn't find job {}", job_id));
135132
}
136133
let path = if stderr {
137-
PathBuf::from(job.unwrap().stderr)
134+
job.unwrap().stderr
138135
} else {
139-
PathBuf::from(job.unwrap().stdout)
136+
job.unwrap().stdout
140137
};
138+
if path.is_empty() {
139+
return Err(eyre!(
140+
"No {} log exists for job {}",
141+
if stderr { "stderr" } else { "stdout" },
142+
job_id
143+
));
144+
}
145+
146+
let path = PathBuf::from(path);
141147
api_client.tail(current_system, path, 100).await
142148
}
143149
Err(e) => Err(e),
@@ -160,12 +166,7 @@ pub async fn cscs_job_cancel(job_id: i64, system: Option<String>, platform: Opti
160166
#[allow(clippy::too_many_arguments)]
161167
pub async fn cscs_start_job(
162168
name: Option<String>,
163-
script_file: Option<PathBuf>,
164-
image: Option<DockerImageUrl>,
165-
command: Option<Vec<String>>,
166-
container_workdir: Option<String>,
167-
env: Vec<(String, String)>,
168-
mount: Vec<(String, String)>,
169+
options: JobStartOptions,
169170
system: Option<String>,
170171
platform: Option<ComputePlatform>,
171172
account: Option<String>,
@@ -193,12 +194,15 @@ pub async fn cscs_start_job(
193194
return Err(eyre!("couldn't get system description for {}", current_system));
194195
}
195196
};
196-
let container_workdir = container_workdir.unwrap_or(config.cscs.workdir.unwrap_or("/scratch".to_owned()));
197+
let container_workdir = options
198+
.container_workdir
199+
.clone()
200+
.unwrap_or(config.cscs.workdir.unwrap_or("/scratch".to_owned()));
197201
let base_path = scratch.join(user_info.name.clone()).join(&job_name);
198202

199203
let mut envvars = config.cscs.env.clone();
200-
envvars.extend(env);
201-
let mut mount: HashMap<String, String> = mount.into_iter().collect();
204+
envvars.extend(options.env.clone());
205+
let mut mount: HashMap<String, String> = options.mount.clone().into_iter().collect();
202206
mount.entry("${SCRATCH}".to_owned()).or_insert("/scratch".to_owned());
203207

204208
let mut tera = tera::Tera::default();
@@ -207,7 +211,7 @@ pub async fn cscs_start_job(
207211
let environment_template = config.cscs.edf_file_template;
208212
tera.add_raw_template("environment.toml", &environment_template)?;
209213

210-
let docker_image = image.unwrap_or(config.cscs.image.try_into()?);
214+
let docker_image = options.image.clone().unwrap_or(config.cscs.image.try_into()?);
211215
let meta = docker_image.inspect().await?;
212216
if let Some(system_info) = config.cscs.systems.get(current_system) {
213217
let mut compatible = false;
@@ -246,13 +250,18 @@ pub async fn cscs_start_job(
246250

247251
// upload script
248252
let script_path = base_path.join("script.sh");
249-
let script_template = script_file
253+
let script_template = options
254+
.script_file
255+
.clone()
250256
.map(std::fs::read_to_string)
251257
.unwrap_or(Ok(config.cscs.sbatch_script_template))?;
252258
tera.add_raw_template("script.sh", &script_template)?;
253259
let mut context = tera::Context::new();
254260
context.insert("name", &job_name);
255-
context.insert("command", &command.unwrap_or(config.cscs.command).join(" "));
261+
context.insert(
262+
"command",
263+
&options.command.clone().unwrap_or(config.cscs.command).join(" "),
264+
);
256265
context.insert("environment_file", &environment_path);
257266
context.insert("container_workdir", &container_workdir);
258267
let script = tera.render("script.sh", &context)?;
@@ -262,7 +271,7 @@ pub async fn cscs_start_job(
262271

263272
// start job
264273
api_client
265-
.start_job(current_system, account, &job_name, script_path, envvars)
274+
.start_job(current_system, account, &job_name, script_path, envvars, options)
266275
.await?;
267276
Ok(())
268277
}
@@ -294,6 +303,11 @@ pub async fn cscs_file_download(
294303
system: Option<String>,
295304
platform: Option<ComputePlatform>,
296305
) -> Result<Option<(i64, Url, usize)>> {
306+
let local = if local.is_dir() {
307+
local.join(remote.file_name().ok_or(eyre!("couldn't get name of remote file"))?)
308+
} else {
309+
local
310+
};
297311
match get_access_token().await {
298312
Ok(access_token) => {
299313
let api_client = CscsApi::new(access_token.0, platform).unwrap();

coman/src/cscs/ports.rs

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,7 @@ impl PollAsync<UserEvent> for AsyncJobLogPort {
188188
match val {
189189
JobLogAction::Job(jobid) => {
190190
self.current_job = Some(jobid);
191+
self.stderr = false;
191192
}
192193
JobLogAction::SwitchLog => {
193194
self.stderr = !self.stderr;
@@ -200,10 +201,19 @@ impl PollAsync<UserEvent> for AsyncJobLogPort {
200201
if let Some(job_id) = self.current_job {
201202
match cscs_job_log(job_id as i64, self.stderr, None, None).await {
202203
Ok(log) => Ok(Some(Event::User(UserEvent::Cscs(CscsEvent::GotJobLog(log))))),
203-
Err(e) => Ok(Some(Event::User(UserEvent::Error(format!(
204-
"{:?}",
205-
Err::<(), Report>(e).wrap_err("couldn't get log")
206-
))))),
204+
Err(e) => {
205+
// if there was an error getting the log, if it's stderr, switch to stdout which should
206+
// always exist. If we're on stdout and it doesn't exist, unset log watching to not spam errors
207+
if self.stderr {
208+
self.stderr = false;
209+
} else {
210+
self.current_job = None;
211+
}
212+
Ok(Some(Event::User(UserEvent::Error(format!(
213+
"{:?}",
214+
Err::<(), Report>(e).wrap_err("couldn't get log")
215+
)))))
216+
}
207217
}
208218
} else {
209219
Ok(Some(Event::None))

0 commit comments

Comments
 (0)