Skip to content

Commit 8432f82

Browse files
committed
Add FOM on the vm_id that executes the job
This is occasionally useful as an inexact way to locate the VM that runs rank 0. Sample output: ``` modifier::GcpMetadata::machine-type = h3-standard-88 modifier::GcpMetadata::image = projects/schedmd-slurm-public/global/images/slurm-gcp-6-6-hpc-rocky-linux-8-1721332881 modifier::GcpMetadata::ghostname = caeslurmv6-h3nodeset-0.c.hpc-workload-performance.internal modifier::GcpMetadata::main-gid = 6390746155122030581 modifier::GcpMetadata::gids = 4685835775755810805, 6390746155122030581 modifier::GcpMetadata::project-id = 767663353010 modifier::GcpMetadata::Level 0 Groups (cluster) = 1 modifier::GcpMetadata::Level 1 Groups (rack) = 1 modifier::GcpMetadata::Level 2 Groups (host) = 2 modifier::GcpMetadata::All Hosts = e2bf4b97dbb6b9490659f4dccf029b7d/7431cbebb4a74fea0965ac21f2c4de01/f63c0e19559b2cb3622e5a3b481d7cfc,e2bf4b97dbb6b9490659f4dccf029b7d/7431cbebb4a74fea0965ac21f2c4de01/2e9aa20cd62c8f42860812d9e4e6002c ```
1 parent 355c245 commit 8432f82

File tree

1 file changed

+22
-8
lines changed
  • var/ramble/repos/builtin/modifiers/gcp-metadata

1 file changed

+22
-8
lines changed

var/ramble/repos/builtin/modifiers/gcp-metadata/modifier.py

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -88,26 +88,34 @@ def gcp_metadata_exec(self, executable_name, executable, app_inst=None):
8888
)
8989

9090
payloads = [
91-
# type, end point, per_node
92-
("instance", "machine-type", False),
93-
("instance", "image", False),
94-
("instance", "hostname", False),
91+
# type, end point, per_node, log_name
92+
("instance", "machine-type", False, None),
93+
("instance", "image", False, None),
94+
("instance", "hostname", False, None),
9595
(
9696
"instance",
9797
"id",
9898
True,
99+
None,
99100
), # True since we want the gid of every node
100-
("project", "numeric-project-id", False),
101-
("instance", "attributes/physical_host", True),
101+
("project", "numeric-project-id", False, None),
102+
("instance", "attributes/physical_host", True, None),
102103
]
103104

104-
for type, end_point, per_node in payloads:
105+
n_nodes = int(self.expander.expand_var_name("n_nodes"))
106+
if n_nodes > 1 and self._usage_mode != "local":
107+
# Single-out the vm_id of the executing-node
108+
payloads.append(("instance", "id", False, "main-gid"))
109+
110+
for type, end_point, per_node, log_name in payloads:
105111
prefix = ""
106112
suffix = ""
107113
if per_node:
108114
prefix = self.expander.expand_var("{metadata_parallel_prefix}")
109115
suffix = self.expander.expand_var("{metadata_parallel_suffix}")
110-
log_name = end_point.split("/")[-1]
116+
log_name = (
117+
log_name if log_name is not None else end_point.split("/")[-1]
118+
)
111119
pre_cmds.append(
112120
CommandExecutable(
113121
"machine-type",
@@ -221,6 +229,12 @@ def _prepare_analysis(self, workspace):
221229
group_name="ghostname",
222230
log_file="{experiment_run_dir}/gcp-metadata.hostname.log",
223231
)
232+
figure_of_merit(
233+
"main-gid",
234+
fom_regex=r"(?P<gid>.*)",
235+
group_name="gid",
236+
log_file="{experiment_run_dir}/gcp-metadata.main-gid.log",
237+
)
224238

225239
# This returns a list of all known gids in the job
226240
figure_of_merit(

0 commit comments

Comments
 (0)