Skip to content

Commit 01d5e1b

Browse files
dianargslabasan
authored andcommitted
Update spank plugin to avoid popen() call.
Signed-off-by: Diana Guttman <[email protected]> Signed-off-by: Christopher M. Cantalupo <[email protected]> Signed-off-by: Brad Geltz <[email protected]>
1 parent f609247 commit 01d5e1b

2 files changed

Lines changed: 144 additions & 87 deletions

File tree

Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ check: msrsave/msrsave_test
4949
msrsave/msrsave_test
5050

5151
msrsave/msrsave.o: msrsave/msrsave.c msrsave/msrsave.h
52+
$(CC) $(CFLAGS) -fPIC -shared -c msrsave/msrsave.c -o $@
5253

5354
msrsave/msrsave_main.o: msrsave/msrsave_main.c msrsave/msrsave.h
5455

@@ -65,7 +66,7 @@ spank: msrsave/libspank_msrsafe.so
6566
msrsave/spank_msrsafe.o: msrsave/spank_msrsafe.c
6667
$(CC) $(CFLAGS) $(SLURM_CFLAGS) -c $^ -o $@
6768

68-
msrsave/libspank_msrsafe.so: msrsave/spank_msrsafe.o
69+
msrsave/libspank_msrsafe.so: msrsave/spank_msrsafe.o msrsave/msrsave.o
6970
$(CC) $(LDFLAGS) $(SLURM_LDFLAGS) $^ -o $@
7071

7172
INSTALL ?= install

msrsave/spank_msrsafe.c

Lines changed: 142 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
* The msr-safe Linux kernel module enables user access to read and
3535
* write capabilities for a restricted set of whitelisted Model
3636
* Specific Registers (MSRs) on x86 platforms. The purpose of this
37-
* slurm plugin is to ensure that MSRs modified within a users slurm
37+
* slurm plugin is to ensure that MSRs modified within a user's slurm
3838
* job allocation are reset to their original state before the compute
3939
* node is returned to the pool available to other users of the
4040
* system. The msr-safe kernel module is targeting HPC systems that
@@ -49,129 +49,185 @@
4949
#include <stdio.h>
5050
#include <signal.h>
5151
#include <errno.h>
52+
#include <limits.h>
53+
#include <unistd.h>
54+
#include <string.h>
5255

5356
#include "slurm/spank.h"
5457

58+
#include "msrsave.h"
59+
5560
#define SLURM_SPANK_MSRSAFE_BUFFER_SIZE 1024
5661

5762
SPANK_PLUGIN(msr-safe, 1);
5863

64+
int slurm_spank_init(spank_t spank_ctx, int argc, char **argv);
65+
int slurm_spank_slurmd_init(spank_t spank_ctx, int argc, char **argv);
5966
int slurm_spank_job_prolog(spank_t spank_ctx, int argc, char **argv);
6067
int slurm_spank_job_epilog(spank_t spank_ctx, int argc, char **argv);
6168

62-
static int slurm_spank_msrsafe_system(const char *cmd);
63-
static void slurm_spank_msrsafe_popen_complete(int signum);
64-
static int slurm_spank_msrsafe_popen(const char *cmd, FILE **fid);
69+
static int slurm_spank_msrsafe_read_log(FILE *log_fid);
6570

66-
static volatile unsigned g_is_popen_complete = 0;
67-
static struct sigaction g_popen_complete_signal_action;
71+
#ifdef SLURM_SPANK_MSRSAVE_TEST
72+
/* If test is defined then print to standard output rather
73+
than slurm log. */
6874

69-
static void slurm_spank_msrsafe_popen_complete(int signum)
70-
{
71-
if (signum == SIGCHLD) {
72-
g_is_popen_complete = 1;
73-
}
74-
}
75+
#include <stdio.h>
76+
#define slurm_info printf
7577

76-
static int slurm_spank_msrsafe_popen(const char *cmd, FILE **fid)
78+
int main(int argc, char **argv)
7779
{
78-
int err = 0;
79-
*fid = NULL;
80-
81-
struct sigaction save_action;
82-
g_popen_complete_signal_action.sa_handler = slurm_spank_msrsafe_popen_complete;
83-
sigemptyset(&g_popen_complete_signal_action.sa_mask);
84-
g_popen_complete_signal_action.sa_flags = 0;
85-
err = sigaction(SIGCHLD, &g_popen_complete_signal_action, &save_action);
86-
if (!err) {
87-
*fid = popen(cmd, "r");
88-
while (*fid && !g_is_popen_complete) {
89-
90-
}
91-
g_is_popen_complete = 0;
92-
sigaction(SIGCHLD, &save_action, NULL);
93-
}
94-
if (!err && *fid == NULL) {
95-
err = errno ? errno : -1;
96-
}
97-
return err;
80+
spank_t spank_ctx;
81+
printf("SAVE SCRIPT:\n");
82+
slurm_spank_job_prolog(spank_ctx, 0, NULL);
83+
printf("\n\nRESTORE SCRIPT:\n");
84+
slurm_spank_job_epilog(spank_ctx, 0, NULL);
85+
printf("\n\n");
86+
return 0;
9887
}
88+
/* END TEST PROGRAM */
9989

100-
#ifndef SLURM_SPANK_MSRSAVE_TEST
101-
/* Do not compile slurm_spank_msrsafe_system if testing since requires
102-
linking to slurm library for slurm_error() API and the function is
103-
not executed by the test program. */
90+
#endif
10491

105-
static int slurm_spank_msrsafe_system(const char *cmd)
92+
static int slurm_spank_msrsafe_read_log(FILE *log_fid)
10693
{
107-
const size_t buffer_size = SLURM_SPANK_MSRSAFE_BUFFER_SIZE - 1;
10894
char buffer[SLURM_SPANK_MSRSAFE_BUFFER_SIZE];
109-
FILE *fid = NULL;
110-
int err = slurm_spank_msrsafe_popen(cmd, &fid);
111-
if (!err) {
112-
size_t num_read = 0;
95+
size_t buffer_pos = 0;
96+
int err = 0;
97+
int character = 0;
98+
99+
if (log_fid == NULL) {
100+
err = 1;
101+
}
102+
else {
113103
do {
114-
num_read = fread(buffer, sizeof(*buffer), buffer_size, fid);
115-
buffer[num_read] = '\0';
116-
if (num_read) {
104+
character = fgetc(log_fid);
105+
if (character == '\n' ||
106+
character == EOF ||
107+
buffer_pos == SLURM_SPANK_MSRSAFE_BUFFER_SIZE - 1) {
108+
buffer[buffer_pos] = '\0';
117109
slurm_info("%s", buffer);
110+
buffer_pos = 0;
118111
}
119-
} while (num_read == buffer_size);
120-
err = pclose(fid);
112+
else {
113+
buffer[buffer_pos] = character;
114+
++buffer_pos;
115+
}
116+
} while (character != EOF);
121117
}
122118
return err;
123119
}
124120

125-
#else /* BEGIN TEST PROGRAM */
126-
/* If test is defined then print the scripts to standard output rather
127-
than executing them. */
128-
129-
#include <stdio.h>
130-
#define slurm_spank_msrsafe_system printf
121+
#ifndef SLURM_SPANK_MSRSAVE_FILE_PREFIX
122+
#define SLURM_SPANK_MSRSAVE_FILE_PREFIX "/var/run/slurm-msrsave"
123+
#endif
131124

132-
int main(int argc, char **argv)
125+
int slurm_spank_init(spank_t spank_ctx, int argc, char **argv)
133126
{
134-
spank_t spank_ctx;
135-
const char *test_cmd = "ls --version";
136-
printf("SAVE SCRIPT:\n");
137-
slurm_spank_job_prolog(spank_ctx, 0, NULL);
138-
printf("\n\nRESTORE SCRIPT:\n");
139-
slurm_spank_job_epilog(spank_ctx, 0, NULL);
140-
printf("\n\n");
141-
FILE *fid;
142-
char buffer[4096] = {0};
143-
int err = slurm_spank_msrsafe_popen(test_cmd, &fid);
144-
printf("CALLING \"%s\":\n", test_cmd);
145-
fread(buffer, sizeof(char), 4096, fid);
146-
printf("%s", buffer);
127+
slurm_info("Loaded msrsave restore plugin.");
147128
return 0;
148129
}
149130

150-
#endif /* END TEST PROGRAM */
151-
152-
#ifndef SLURM_SPANK_MSRSAVE_FILE_PREFIX
153-
#define SLURM_SPANK_MSRSAVE_FILE_PREFIX "/var/run/slurm-msrsave"
154-
#endif
131+
int slurm_spank_slurmd_init(spank_t spank_ctx, int argc, char **argv)
132+
{
133+
slurm_info("Loaded msrsave restore plugin.");
134+
}
155135

156136
int slurm_spank_job_prolog(spank_t spank_ctx, int argc, char **argv)
157137
{
158-
const char *save_script = "if [ -e /dev/cpu/msr_whitelist ]; then "
159-
"tmp_file=$(mktemp " SLURM_SPANK_MSRSAVE_FILE_PREFIX "-$(hostname -s).XXXXXXXXXX) && "
160-
"/usr/sbin/msrsave $tmp_file 2>&1; "
161-
"fi";
162-
return slurm_spank_msrsafe_system(save_script);
138+
slurm_info("Running msr-safe plugin to save register values.");
139+
140+
int err = 0;
141+
FILE *out_log = NULL;
142+
char out_log_name[NAME_MAX * 2];
143+
char msrsave_file[NAME_MAX * 2];
144+
const char *whitelist_path = "/dev/cpu/msr_whitelist";
145+
const char *msr_path = "/dev/cpu/%d/msr_safe";
146+
int num_cpu = sysconf(_SC_NPROCESSORS_ONLN);
147+
char hostname[NAME_MAX];
148+
hostname[NAME_MAX - 1] = '\0';
149+
err = gethostname(hostname, NAME_MAX - 1);
150+
if (err) {
151+
slurm_info("gethostname failed.");
152+
}
153+
if (!err) {
154+
snprintf(out_log_name, NAME_MAX * 2, "/tmp/slurm-msrsave-outlog-%s.XXXXXXXXXX", hostname);
155+
err = mkstemp(out_log_name);
156+
if (err) {
157+
slurm_info("failed to create msrsave output log");
158+
}
159+
}
160+
if (!err) {
161+
out_log = fopen(out_log_name, "w+");
162+
if (out_log == NULL) {
163+
slurm_info("failed to open %s for writing", out_log_name);
164+
}
165+
}
166+
if (!err) {
167+
snprintf(msrsave_file, NAME_MAX * 2, "%s-%s", SLURM_SPANK_MSRSAVE_FILE_PREFIX, hostname);
168+
err = msr_save(msrsave_file, whitelist_path, msr_path, num_cpu, out_log, out_log);
169+
if (err) {
170+
slurm_info("msr_save failed:");
171+
}
172+
rewind(out_log);
173+
slurm_spank_msrsafe_read_log(out_log);
174+
}
175+
if (!err) {
176+
slurm_info("Completed msr-safe plugin to save register values.");
177+
}
178+
if (out_log) {
179+
fclose(out_log);
180+
unlink(out_log_name);
181+
}
182+
return err;
163183
}
164184

165185
int slurm_spank_job_epilog(spank_t spank_ctx, int argc, char **argv)
166186
{
167-
const char *restore_script = "if [ -e /dev/cpu/msr_whitelist ]; then "
168-
"tmp_files=$(ls -t " SLURM_SPANK_MSRSAVE_FILE_PREFIX "-$(hostname -s).*) && "
169-
"tmp_file=$(echo $tmp_files | head -n1) && "
170-
"/usr/sbin/msrsave -r $tmp_file 2>&1 && "
171-
"rm $tmp_file 2>&1; "
172-
"fi";
173-
174-
return slurm_spank_msrsafe_system(restore_script);
187+
slurm_info("Running msr-safe plugin to restore register values.");
188+
int err = 0;
189+
FILE *out_log = NULL;
190+
char out_log_name[NAME_MAX * 2];
191+
char msrsave_file[NAME_MAX * 2];
192+
const char *whitelist_path = "/dev/cpu/msr_whitelist";
193+
const char *msr_path = "/dev/cpu/%d/msr_safe";
194+
int num_cpu = sysconf(_SC_NPROCESSORS_ONLN);
195+
char hostname[NAME_MAX];
196+
hostname[NAME_MAX - 1] = '\0';
197+
err = gethostname(hostname, NAME_MAX - 1);
198+
if (err) {
199+
slurm_info("gethostname failed.");
200+
}
201+
if (!err) {
202+
snprintf(out_log_name, NAME_MAX * 2, "/tmp/slurm-msrsave-outlog-%s.XXXXXXXXXX", hostname);
203+
err = mkstemp(out_log_name);
204+
if (err) {
205+
slurm_info("failed to create msrsave output log");
206+
}
207+
}
208+
if (!err) {
209+
out_log = fopen(out_log_name, "w+");
210+
if (out_log == NULL) {
211+
slurm_info("failed to open %s for writing", out_log_name);
212+
}
213+
}
214+
if (!err) {
215+
snprintf(msrsave_file, NAME_MAX * 2, "%s-%s", SLURM_SPANK_MSRSAVE_FILE_PREFIX, hostname);
216+
err = msr_restore(msrsave_file, whitelist_path, msr_path, num_cpu, out_log, out_log);
217+
if (err) {
218+
slurm_info("msr_restore failed:");
219+
}
220+
rewind(out_log);
221+
slurm_spank_msrsafe_read_log(out_log);
222+
}
223+
if (!err) {
224+
slurm_info("Completed msr-safe plugin to restore register values.");
225+
}
226+
if (out_log) {
227+
fclose(out_log);
228+
unlink(out_log_name);
229+
}
230+
return err;
175231
}
176232

177233
#undef SLURM_SPANK_MSRSAVE_FILE_PREFIX

0 commit comments

Comments
 (0)