Skip to content

Commit d41a9e0

Browse files
committed
allow overriding platform and system
1 parent 2545dcd commit d41a9e0

11 files changed

Lines changed: 137 additions & 64 deletions

File tree

Cargo.lock

Lines changed: 14 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

coman/.config/config.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
[cscs]
22
current_system = "daint"
3+
current_platform = "HPC"
34

45
image = "ubuntu"
56

coman/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ oci-distribution = "0.11.0"
6969
docker_credential = "1.3.2"
7070
chrono = "0.4.42"
7171
openssl = { version = "0.10.75", features = ["vendored"] }
72+
strum_macros = "0.27.2"
7273

7374
[build-dependencies]
7475
anyhow = "1.0.90"

coman/src/cli.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
use std::{error::Error, path::PathBuf};
22

3-
use clap::{Parser, Subcommand};
3+
use clap::{Parser, Subcommand, builder::TypedValueParser};
4+
use strum::VariantNames;
45

56
use crate::{
6-
config::{get_config_dir, get_data_dir},
7+
config::{ComputePlatform, get_config_dir, get_data_dir},
78
util::types::DockerImageUrl,
89
};
910

@@ -13,6 +14,10 @@ pub enum CliCommands {
1314
Cscs {
1415
#[command(subcommand)]
1516
command: CscsCommands,
17+
#[clap(short, long, help = "override compute system (e.g. 'eiger', 'daint')")]
18+
system: Option<String>,
19+
#[clap(short, long, ignore_case=true, value_parser=clap::builder::PossibleValuesParser::new(ComputePlatform::VARIANTS).map(|s|s.parse::<ComputePlatform>().unwrap()),help = "override compute platform (one of 'hpc', 'ml' or 'cw')")]
20+
platform: Option<ComputePlatform>,
1621
},
1722
#[clap(about = "Create a new project configuration file")]
1823
Init {

coman/src/config.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use directories::ProjectDirs;
77
use eyre::eyre;
88
use lazy_static::lazy_static;
99
use serde::{Deserialize, Serialize};
10+
use strum_macros::{EnumString, VariantNames};
1011

1112
const DEFAULT_CONFIG_TOML: &str = include_str!("../.config/config.toml");
1213

@@ -23,11 +24,23 @@ pub struct AppConfig {
2324
pub config_dir: PathBuf,
2425
}
2526

27+
#[derive(Clone, Debug, Serialize, Deserialize, Default, strum::Display, EnumString, VariantNames)]
28+
#[strum(serialize_all = "lowercase")]
29+
#[allow(clippy::upper_case_acronyms)]
30+
pub enum ComputePlatform {
31+
#[default]
32+
HPC,
33+
ML,
34+
CW,
35+
}
36+
2637
#[derive(Clone, Debug, Serialize, Deserialize, Default)]
2738
pub struct CscsConfig {
2839
#[serde(default)]
2940
pub current_system: String,
3041
#[serde(default)]
42+
pub current_platform: ComputePlatform,
43+
#[serde(default)]
3144
pub sbatch_script_template: String,
3245
#[serde(default)]
3346
pub workdir: Option<String>,

coman/src/cscs/api_client.rs

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,10 @@ use firecrest_client::{
1919
};
2020
use strum::Display;
2121

22-
use crate::trace_dbg;
22+
use crate::{
23+
config::{ComputePlatform, Config},
24+
trace_dbg,
25+
};
2326

2427
#[derive(Debug, Eq, Clone, PartialEq, PartialOrd, Ord, tabled::Tabled)]
2528
pub struct UserInfo {
@@ -251,9 +254,13 @@ pub struct CscsApi {
251254
}
252255

253256
impl CscsApi {
254-
pub fn new(token: String) -> Result<Self> {
257+
pub fn new(token: String, platform: Option<ComputePlatform>) -> Result<Self> {
258+
let config = Config::new()?;
255259
let client = FirecrestClient::default()
256-
.base_path("https://api.cscs.ch/hpc/firecrest/v2/".to_owned())?
260+
.base_path(format!(
261+
"https://api.cscs.ch/{}/firecrest/v2/",
262+
platform.unwrap_or(config.cscs.current_platform)
263+
))?
257264
.token(token);
258265
Ok(Self { client })
259266
}

coman/src/cscs/cli.rs

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ use color_eyre::{Result, eyre::Context};
44
use inquire::{Password, Text};
55

66
use crate::{
7+
config::ComputePlatform,
78
cscs::handlers::{
89
cscs_job_cancel, cscs_job_details, cscs_job_list, cscs_job_log, cscs_login, cscs_start_job, cscs_system_list,
910
cscs_system_set,
@@ -23,8 +24,8 @@ pub(crate) async fn cli_cscs_login() -> Result<()> {
2324
};
2425
Ok(())
2526
}
26-
pub(crate) async fn cli_cscs_job_list() -> Result<()> {
27-
match cscs_job_list().await {
27+
pub(crate) async fn cli_cscs_job_list(system: Option<String>, platform: Option<ComputePlatform>) -> Result<()> {
28+
match cscs_job_list(system, platform).await {
2829
Ok(jobs) => {
2930
let mut table = tabled::Table::new(jobs);
3031
table.with(tabled::settings::Style::modern());
@@ -34,8 +35,12 @@ pub(crate) async fn cli_cscs_job_list() -> Result<()> {
3435
Err(e) => Err(e),
3536
}
3637
}
37-
pub(crate) async fn cli_cscs_job_detail(job_id: i64) -> Result<()> {
38-
match cscs_job_details(job_id).await {
38+
pub(crate) async fn cli_cscs_job_detail(
39+
job_id: i64,
40+
system: Option<String>,
41+
platform: Option<ComputePlatform>,
42+
) -> Result<()> {
43+
match cscs_job_details(job_id, system, platform).await {
3944
Ok(Some(job)) => {
4045
let data = &[
4146
("Id", job.id.to_string()),
@@ -65,8 +70,12 @@ pub(crate) async fn cli_cscs_job_detail(job_id: i64) -> Result<()> {
6570
}
6671
}
6772

68-
pub(crate) async fn cli_cscs_job_log(job_id: i64) -> Result<()> {
69-
match cscs_job_log(job_id).await {
73+
pub(crate) async fn cli_cscs_job_log(
74+
job_id: i64,
75+
system: Option<String>,
76+
platform: Option<ComputePlatform>,
77+
) -> Result<()> {
78+
match cscs_job_log(job_id, system, platform).await {
7079
Ok(content) => {
7180
println!("{}", content);
7281
Ok(())
@@ -81,16 +90,22 @@ pub(crate) async fn cli_cscs_job_start(
8190
command: Option<Vec<String>>,
8291
workdir: Option<String>,
8392
env: Vec<(String, String)>,
93+
system: Option<String>,
94+
platform: Option<ComputePlatform>,
8495
) -> Result<()> {
85-
cscs_start_job(script_file, image, command, workdir, env).await
96+
cscs_start_job(script_file, image, command, workdir, env, system, platform).await
8697
}
8798

88-
pub(crate) async fn cli_cscs_job_cancel(job_id: i64) -> Result<()> {
89-
cscs_job_cancel(job_id).await
99+
pub(crate) async fn cli_cscs_job_cancel(
100+
job_id: i64,
101+
system: Option<String>,
102+
platform: Option<ComputePlatform>,
103+
) -> Result<()> {
104+
cscs_job_cancel(job_id, system, platform).await
90105
}
91106

92-
pub(crate) async fn cli_cscs_system_list() -> Result<()> {
93-
match cscs_system_list().await {
107+
pub(crate) async fn cli_cscs_system_list(platform: Option<ComputePlatform>) -> Result<()> {
108+
match cscs_system_list(platform).await {
94109
Ok(systems) => {
95110
let mut table = tabled::Table::new(systems);
96111
table.with(tabled::settings::Style::modern());

coman/src/cscs/handlers.rs

Lines changed: 42 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use std::path::PathBuf;
33
use color_eyre::{Result, eyre::eyre};
44

55
use crate::{
6-
config::Config,
6+
config::{ComputePlatform, Config},
77
cscs::{
88
api_client::{CscsApi, FileSystemType, Job, JobDetail, System},
99
oauth2::{
@@ -54,10 +54,10 @@ pub async fn cscs_login_device_code() -> Result<(Secret, Option<Secret>)> {
5454
finish_cscs_device_login(details).await
5555
}
5656

57-
pub async fn cscs_system_list() -> Result<Vec<System>> {
57+
pub async fn cscs_system_list(platform: Option<ComputePlatform>) -> Result<Vec<System>> {
5858
match get_access_token().await {
5959
Ok(access_token) => {
60-
let api_client = CscsApi::new(access_token.0).unwrap();
60+
let api_client = CscsApi::new(access_token.0, platform).unwrap();
6161
api_client.list_systems().await
6262
}
6363
Err(e) => Err(e),
@@ -77,51 +77,62 @@ pub async fn cscs_system_set(system_name: String, global: bool) -> Result<()> {
7777
Ok(())
7878
}
7979

80-
pub async fn cscs_job_list() -> Result<Vec<Job>> {
80+
pub async fn cscs_job_list(system: Option<String>, platform: Option<ComputePlatform>) -> Result<Vec<Job>> {
8181
match get_access_token().await {
8282
Ok(access_token) => {
83-
let api_client = CscsApi::new(access_token.0).unwrap();
83+
let api_client = CscsApi::new(access_token.0, platform).unwrap();
8484
let config = Config::new().unwrap();
85-
api_client.list_jobs(&config.cscs.current_system, Some(true)).await
85+
api_client
86+
.list_jobs(&system.unwrap_or(config.cscs.current_system), Some(true))
87+
.await
8688
}
8789
Err(e) => Err(e),
8890
}
8991
}
9092

91-
pub async fn cscs_job_details(job_id: i64) -> Result<Option<JobDetail>> {
93+
pub async fn cscs_job_details(
94+
job_id: i64,
95+
system: Option<String>,
96+
platform: Option<ComputePlatform>,
97+
) -> Result<Option<JobDetail>> {
9298
match get_access_token().await {
9399
Ok(access_token) => {
94-
let api_client = CscsApi::new(access_token.0).unwrap();
100+
let api_client = CscsApi::new(access_token.0, platform).unwrap();
95101
let config = Config::new().unwrap();
96-
api_client.get_job(&config.cscs.current_system, job_id).await
102+
api_client
103+
.get_job(&system.unwrap_or(config.cscs.current_system), job_id)
104+
.await
97105
}
98106
Err(e) => Err(e),
99107
}
100108
}
101109

102-
pub async fn cscs_job_log(job_id: i64) -> Result<String> {
110+
pub async fn cscs_job_log(job_id: i64, system: Option<String>, platform: Option<ComputePlatform>) -> Result<String> {
103111
match get_access_token().await {
104112
Ok(access_token) => {
105-
let api_client = CscsApi::new(access_token.0).unwrap();
113+
let api_client = CscsApi::new(access_token.0, platform).unwrap();
106114
let config = Config::new().unwrap();
107-
let job = api_client.get_job(&config.cscs.current_system, job_id).await?;
115+
let current_system = &system.unwrap_or(config.cscs.current_system);
116+
let job = api_client.get_job(current_system, job_id).await?;
108117
if job.is_none() {
109118
return Err(eyre!("couldn't find job {}", job_id));
110119
}
111120
api_client
112-
.tail(&config.cscs.current_system, PathBuf::from(job.unwrap().stdout), 100)
121+
.tail(current_system, PathBuf::from(job.unwrap().stdout), 100)
113122
.await
114123
}
115124
Err(e) => Err(e),
116125
}
117126
}
118127

119-
pub async fn cscs_job_cancel(job_id: i64) -> Result<()> {
128+
pub async fn cscs_job_cancel(job_id: i64, system: Option<String>, platform: Option<ComputePlatform>) -> Result<()> {
120129
match get_access_token().await {
121130
Ok(access_token) => {
122-
let api_client = CscsApi::new(access_token.0).unwrap();
131+
let api_client = CscsApi::new(access_token.0, platform).unwrap();
123132
let config = Config::new().unwrap();
124-
api_client.cancel_job(&config.cscs.current_system, job_id).await
133+
api_client
134+
.cancel_job(&system.unwrap_or(config.cscs.current_system), job_id)
135+
.await
125136
}
126137
Err(e) => Err(e),
127138
}
@@ -133,14 +144,17 @@ pub async fn cscs_start_job(
133144
command: Option<Vec<String>>,
134145
container_workdir: Option<String>,
135146
env: Vec<(String, String)>,
147+
system: Option<String>,
148+
platform: Option<ComputePlatform>,
136149
) -> Result<()> {
137150
match get_access_token().await {
138151
Ok(access_token) => {
139-
let api_client = CscsApi::new(access_token.0).unwrap();
152+
let api_client = CscsApi::new(access_token.0, platform).unwrap();
140153
let config = Config::new().unwrap();
141-
let user_info = api_client.get_userinfo(&config.cscs.current_system).await?;
142-
let current_system = api_client.get_system(&config.cscs.current_system).await?;
143-
let scratch = match current_system {
154+
let current_system = &system.unwrap_or(config.cscs.current_system);
155+
let user_info = api_client.get_userinfo(current_system).await?;
156+
let current_system_info = api_client.get_system(current_system).await?;
157+
let scratch = match current_system_info {
144158
Some(system) => PathBuf::from(
145159
system
146160
.file_systems
@@ -151,10 +165,7 @@ pub async fn cscs_start_job(
151165
.clone(),
152166
),
153167
None => {
154-
return Err(eyre!(
155-
"couldn't get system description for {}",
156-
config.cscs.current_system
157-
));
168+
return Err(eyre!("couldn't get system description for {}", current_system));
158169
}
159170
};
160171
let container_workdir = container_workdir.unwrap_or(config.cscs.workdir.unwrap_or("/scratch".to_owned()));
@@ -173,7 +184,7 @@ pub async fn cscs_start_job(
173184

174185
let docker_image = image.unwrap_or(config.cscs.image.try_into()?);
175186
let meta = docker_image.inspect().await?;
176-
if let Some(system_info) = config.cscs.systems.get(&config.cscs.current_system) {
187+
if let Some(system_info) = config.cscs.systems.get(current_system) {
177188
let mut compatible = false;
178189
for sys_platform in system_info.architecture.iter() {
179190
if meta.platforms.contains(&sys_platform.clone().into()) {
@@ -184,7 +195,7 @@ pub async fn cscs_start_job(
184195
if !compatible {
185196
return Err(eyre!(
186197
"System {} only supports images with architecture(s) '{}' but the supplied image is for architecture(s) '{}'",
187-
config.cscs.current_system,
198+
current_system,
188199
system_info.architecture.join(","),
189200
meta.platforms
190201
.iter()
@@ -201,16 +212,10 @@ pub async fn cscs_start_job(
201212
context.insert("env", &envvars);
202213

203214
let environment_file = tera.render("environment.toml", &context)?;
204-
api_client.mkdir(&config.cscs.current_system, base_path.clone()).await?;
205-
api_client
206-
.chmod(&config.cscs.current_system, base_path.clone(), "700")
207-
.await?;
215+
api_client.mkdir(current_system, base_path.clone()).await?;
216+
api_client.chmod(current_system, base_path.clone(), "700").await?;
208217
api_client
209-
.upload(
210-
&config.cscs.current_system,
211-
environment_path.clone(),
212-
environment_file.into_bytes(),
213-
)
218+
.upload(current_system, environment_path.clone(), environment_file.into_bytes())
214219
.await?;
215220

216221
// upload script
@@ -227,12 +232,12 @@ pub async fn cscs_start_job(
227232
context.insert("container_workdir", &container_workdir);
228233
let script = tera.render("script.sh", &context)?;
229234
api_client
230-
.upload(&config.cscs.current_system, script_path.clone(), script.into_bytes())
235+
.upload(current_system, script_path.clone(), script.into_bytes())
231236
.await?;
232237

233238
// start job
234239
api_client
235-
.start_job(&config.cscs.current_system, &name, script_path, envvars)
240+
.start_job(current_system, &name, script_path, envvars)
236241
.await?;
237242
Ok(())
238243
}

0 commit comments

Comments
 (0)