Skip to content

Commit ee8d2e1

Browse files
committed
ADD: stanage cluster settings for Sheffield University Stanage HPC cluster
1 parent 2412ce3 commit ee8d2e1

File tree

2 files changed

+180
-6
lines changed

2 files changed

+180
-6
lines changed

src/m/classes/clusters/frontera.m

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,10 @@ function BuildQueueScript(cluster,dirname,modelname,solution,io_gather,isvalgrin
124124
fprintf(fid,'#SBATCH -n %i \n',cluster.numnodes*max(cluster.nprocs()/cluster.numnodes,56));
125125
fprintf(fid,'#SBATCH -N %i \n',cluster.numnodes);
126126
fprintf(fid,'#SBATCH -t %02i:00:00 \n\n',ceil(cluster.time));
127+
if length(find(cluster.email=='@'))>0
128+
fprintf(fid,'#SBATCH --mail-user=%s \n',cluster.email);
129+
fprintf(fid,'#SBATCH --mail-type=all \n\n');
130+
end
127131
for i=1:numel(cluster.modules),
128132
fprintf(fid,['module load ' cluster.modules{i} '\n']);
129133
end
@@ -132,12 +136,6 @@ function BuildQueueScript(cluster,dirname,modelname,solution,io_gather,isvalgrin
132136
fprintf(fid,'export KMP_AFFINITY="granularity=fine,compact,verbose" \n\n');
133137
end
134138

135-
if length(find(cluster.email=='@'))>0
136-
fprintf(fid,'#SBATCH --mail-user=%s \n',cluster.email);
137-
fprintf(fid,'#SBATCH --mail-type=end \n\n');
138-
139-
%fprintf(fid,'ssh login1 "mail -s ''SLURM Jobid=${SLURM_JOBID} Name=${SLURM_JOB_NAME} Began on Lonestar 5.'' %s <<< ''Job Started'' " \n\n',cluster.email);
140-
end
141139

142140
fprintf(fid,'export PATH="$PATH:."\n\n');
143141
fprintf(fid,'export ISSM_DIR="%s/../"\n',cluster.codepath); %FIXME

src/m/classes/clusters/stanage.m

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
%STANAGE (Sheffield University) cluster class definition
2+
%
3+
% Usage:
4+
% cluster=stanage();
5+
% cluster=stanage('np',3);
6+
% cluster=stanage('np',3,'login','username');
7+
8+
classdef stanage
9+
properties (SetAccess=public)
10+
% {{{
11+
name = 'stanage'
12+
login = '';
13+
numnodes = 1;
14+
cpuspernode = 16;
15+
codepath = '';
16+
executionpath = '';
17+
interactive = 0;
18+
time = 10; %in hours
19+
memory = 32; %in Gb
20+
email = 'END,FAIL';
21+
email_domain = 'sheffield.ac.uk';
22+
deleteckptdata= 0;
23+
end
24+
%}}}
25+
methods
26+
function cluster=stanage(varargin) % {{{
27+
28+
%initialize cluster using default settings if provided
29+
if (exist('stanage_settings')==2), stanage_settings; end
30+
31+
%use provided options to change fields
32+
cluster=AssignObjectFields(pairoptions(varargin{:}),cluster);
33+
end
34+
%}}}
35+
function disp(cluster) % {{{
36+
% display the object
37+
disp(sprintf('class ''%s'' object ''%s'' = ',class(cluster),inputname(1)));
38+
disp(sprintf(' name: %s',cluster.name));
39+
disp(sprintf(' login: %s',cluster.login));
40+
disp(sprintf(' numnodes: %i',cluster.numnodes));
41+
disp(sprintf(' cpuspernode: %i',cluster.cpuspernode));
42+
disp(sprintf(' time: %i hours',cluster.time));
43+
disp(sprintf(' memory: %i Gb',cluster.memory));
44+
disp(sprintf(' email: %s (notifications: BEGIN,END,FAIL)',cluster.email));
45+
disp(sprintf(' deleteckptdata: %i',cluster.deleteckptdata));
46+
disp(sprintf(' codepath: %s',cluster.codepath));
47+
disp(sprintf(' executionpath: %s',cluster.executionpath));
48+
disp(sprintf(' interactive: %i',cluster.interactive));
49+
end
50+
%}}}
51+
function numprocs=nprocs(cluster) % {{{
52+
%compute number of processors
53+
numprocs=cluster.numnodes*cluster.cpuspernode;
54+
end
55+
%}}}
56+
function md = checkconsistency(cluster,md,solution,analyses) % {{{
57+
%Miscellaneous
58+
if isempty(cluster.login), md = checkmessage(md,'login empty'); end
59+
if isempty(cluster.codepath), md = checkmessage(md,'codepath empty'); end
60+
if isempty(cluster.executionpath), md = checkmessage(md,'executionpath empty'); end
61+
end
62+
%}}}
63+
function BuildKrigingQueueScript(cluster,dirname,modelname,solution,io_gather,isvalgrind,isgprof,isdakota,isoceancoupling) % {{{
64+
65+
if(isvalgrind), disp('valgrind not supported by cluster, ignoring...'); end
66+
if(isgprof), disp('gprof not supported by cluster, ignoring...'); end
67+
68+
%write queuing script
69+
fid=fopen([modelname '.queue'],'w');
70+
fprintf(fid,'#!/bin/bash\n');
71+
fprintf(fid,'#SBATCH --job-name=%s\n',modelname);
72+
fprintf(fid,'#SBATCH --output=%s.outlog \n',modelname);
73+
fprintf(fid,'#SBATCH --error=%s.errlog \n',modelname);
74+
fprintf(fid,'#SBATCH --nodes=%i\n',cluster.numnodes);
75+
fprintf(fid,'#SBATCH --ntasks-per-node=%i\n',cluster.cpuspernode);
76+
fprintf(fid,'#SBATCH --time=%s\n',datestr(cluster.time/24,'HH:MM:SS')); %walltime is in HH:MM:SS format. cluster.time is in hour
77+
fprintf(fid,'#SBATCH --mem=%iG\n',cluster.memory);
78+
if ~isempty(cluster.email)
79+
fprintf(fid,'#SBATCH --mail-type=%s\n',cluster.email);
80+
fprintf(fid,'#SBATCH --mail-user=%s@%s\n',cluster.login, cluster.email_domain);
81+
end
82+
fprintf(fid,'\n');
83+
84+
fprintf(fid,'export ISSM_DIR="%s/../"\n',cluster.codepath);
85+
fprintf(fid,'source $ISSM_DIR/etc/environment.sh\n');
86+
fprintf(fid,'cd %s/%s\n\n',cluster.executionpath,dirname);
87+
fprintf(fid,'srun %s/kriging.exe %s %s\n', cluster.codepath,[cluster.executionpath '/' modelname],modelname);
88+
if ~io_gather, %concatenate the output files:
89+
fprintf(fid,'cat %s.outbin.* > %s.outbin',modelname,modelname);
90+
end
91+
fclose(fid);
92+
end
93+
%}}}
94+
function BuildQueueScript(cluster,dirname,modelname,solution,io_gather,isvalgrind,isgprof,isdakota,isoceancoupling) % {{{
95+
96+
if(isvalgrind), disp('valgrind not supported by cluster, ignoring...'); end
97+
if(isgprof), disp('gprof not supported by cluster, ignoring...'); end
98+
99+
%write queuing script
100+
fid=fopen([modelname '.queue'],'w');
101+
fprintf(fid,'#!/bin/bash\n');
102+
fprintf(fid,'#SBATCH --job-name=%s\n',modelname);
103+
fprintf(fid,'#SBATCH --output=%s.outlog \n',modelname);
104+
fprintf(fid,'#SBATCH --error=%s.errlog \n',modelname);
105+
fprintf(fid,'#SBATCH --nodes=%i\n',cluster.numnodes);
106+
fprintf(fid,'#SBATCH --ntasks-per-node=%i\n',cluster.cpuspernode);
107+
fprintf(fid,'#SBATCH --time=%s\n', datestr(cluster.time/24,'HH:MM:SS')); %walltime is in HH:MM:SS format. cluster.time is in hour
108+
fprintf(fid,'#SBATCH --mem=%iG\n',cluster.memory);
109+
if ~isempty(cluster.email)
110+
fprintf(fid,'#SBATCH --mail-type=%s\n',cluster.email);
111+
fprintf(fid,'#SBATCH --mail-user=%s@%s\n',cluster.login, cluster.email_domain);
112+
end
113+
fprintf(fid,'\n');
114+
fprintf(fid,'export ISSM_DIR="%s/../"\n',cluster.codepath);
115+
fprintf(fid,'source $ISSM_DIR/etc/environment.sh\n');
116+
fprintf(fid,'cd %s/%s\n\n',cluster.executionpath,dirname);
117+
fprintf(fid,'mpirun -n %i %s/issm.exe %s %s %s\n',cluster.nprocs(), cluster.codepath,solution,[cluster.executionpath '/' dirname],modelname);
118+
if ~io_gather, %concatenate the output files:
119+
fprintf(fid,'cat %s.outbin.* > %s.outbin',modelname,modelname);
120+
end
121+
122+
if (cluster.deleteckptdata)
123+
fprintf(fid,'rm -rf *.rst *.ckpt\n');
124+
end
125+
fclose(fid);
126+
127+
%in interactive mode, create a run file, and errlog and outlog file
128+
if cluster.interactive,
129+
fid=fopen([modelname '.run'],'w');
130+
fprintf(fid,'mpirun -n %i %s/issm.exe %s %s %s\n',cluster.nprocs(), cluster.codepath,solution,[cluster.executionpath '/' dirname],modelname);
131+
if ~io_gather, %concatenate the output files:
132+
fprintf(fid,'cat %s.outbin.* > %s.outbin',modelname,modelname);
133+
end
134+
fclose(fid);
135+
fid=fopen([modelname '.errlog'],'w');
136+
fclose(fid);
137+
fid=fopen([modelname '.outlog'],'w');
138+
fclose(fid);
139+
end
140+
end %}}}
141+
function UploadQueueJob(cluster,modelname,dirname,filelist) % {{{
142+
143+
%compress the files into one zip.
144+
compressstring=['tar -zcf ' dirname '.tar.gz '];
145+
for i=1:numel(filelist),
146+
compressstring = [compressstring ' ' filelist{i}];
147+
end
148+
if cluster.interactive,
149+
compressstring = [compressstring ' ' modelname '.errlog ' modelname '.outlog '];
150+
end
151+
system(compressstring);
152+
153+
disp('uploading input file and queuing script');
154+
issmscpout(cluster.name,cluster.executionpath,cluster.login,0,{[dirname '.tar.gz']});
155+
156+
end %}}}
157+
function LaunchQueueJob(cluster,modelname,dirname,filelist,restart,batch) % {{{
158+
159+
disp('launching solution sequence on remote cluster');
160+
if ~isempty(restart)
161+
launchcommand=['cd ' cluster.executionpath ' && cd ' dirname ' && hostname && sbatch ' modelname '.queue '];
162+
else
163+
launchcommand=['cd ' cluster.executionpath ' && rm -rf ./' dirname ' && mkdir ' dirname ...
164+
' && cd ' dirname ' && mv ../' dirname '.tar.gz ./ && tar -zxf ' dirname '.tar.gz && hostname && sbatch ' modelname '.queue '];
165+
end
166+
issmssh(cluster.name,cluster.login,0,launchcommand);
167+
end %}}}
168+
function Download(cluster,dirname,filelist) % {{{
169+
170+
%copy files from cluster to current directory
171+
directory=[cluster.executionpath '/' dirname '/'];
172+
issmscpin(cluster.name,cluster.login,0,directory,filelist);
173+
174+
end %}}}
175+
end
176+
end

0 commit comments

Comments
 (0)