Skip to content

Commit da04782

Browse files
authored
Merge pull request #532 from ACCESS-NRI/527-compress-model-logs
Model log compression
2 parents 27aac37 + c111d6b commit da04782

File tree

7 files changed

+206
-22
lines changed

7 files changed

+206
-22
lines changed

docs/source/config.rst

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,22 @@ section for details.
273273
POSIX filesystem.
274274

275275

276+
Archiving
277+
---------
278+
279+
``archive``
280+
On completion of a model run, payu moves model output, restart, and log
281+
files from the temporary work area to the experiment archive directory.
282+
The following settings control the steps taken during the archive step:
283+
284+
``enable`` (*Default:* ``True``)
285+
Flag to enable/disable the archive step. If ``False`` all output, restart,
286+
and log files will remain in the work directory, and any collation, post-processing,
287+
and syncing will not be run.
288+
``compress_logs`` (*Default:* ``True``)
289+
Compress model log files into a tarball. Currently only implemented for CICE4.
290+
291+
276292
Collation
277293
---------
278294

payu/experiment.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -503,7 +503,7 @@ def setup(self, force_archive=False):
503503

504504
# Check restart pruning for valid configuration values and
505505
# warns user if more restarts than expected would be pruned
506-
if self.config.get('archive', True):
506+
if self.archiving():
507507
self.get_restarts_to_prune()
508508

509509
def run(self, *user_flags):
@@ -769,8 +769,16 @@ def run(self, *user_flags):
769769
if run_script:
770770
self.run_userscript(run_script)
771771

772+
def archiving(self):
773+
"""
774+
Determine whether to run archive step based on config.yaml settings.
775+
Default to True when archive settings are absent.
776+
"""
777+
archive_config = self.config.get('archive', {})
778+
return archive_config.get('enable', True)
779+
772780
def archive(self, force_prune_restarts=False):
773-
if not self.config.get('archive', True):
781+
if not self.archiving():
774782
print('payu: not archiving due to config.yaml setting.')
775783
return
776784

payu/fsops.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,12 @@ def read_config(config_fname=None):
126126

127127
config['collate'] = collate_config
128128

129+
# Transform legacy archive config options
130+
archive_config = config.pop('archive', {})
131+
if type(archive_config) is bool:
132+
archive_config = {'enable': archive_config}
133+
config['archive'] = archive_config
134+
129135
# Transform legacy modules config options
130136
modules_config = config.pop('modules', {})
131137
if type(modules_config) is list:

payu/models/cice.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
import sys
1818
import shutil
1919
import datetime
20+
import re
21+
import tarfile
2022

2123
# Extensions
2224
import f90nml
@@ -51,6 +53,13 @@ def __init__(self, expt, name, config):
5153

5254
self.copy_inputs = False
5355

56+
# regex patterns for matching log files. When empty, no logs compressed
57+
self.logs_to_compress = [r"iceout[0-9]{3}",
58+
r"debug\.root\.[0-9]{2}",
59+
r"ice_diag\.d",
60+
r"ice_diag_out"]
61+
self.log_tar_name = "logfiles.tar.gz"
62+
5463
def set_model_pathnames(self):
5564
super(Cice, self).set_model_pathnames()
5665

@@ -328,6 +337,40 @@ def archive(self, **kwargs):
328337
else:
329338
shutil.rmtree(self.work_input_path)
330339

340+
archive_config = self.expt.config.get('archive', {})
341+
compressing_logs = archive_config.get('compress_logs', True)
342+
if compressing_logs:
343+
self.compress_log_files()
344+
345+
def get_log_files(self):
346+
"""
347+
Find model log files in the work directory based on regex patterns
348+
in self.logs_to_compress.
349+
350+
Returns
351+
-------
352+
log_files: list of paths to model log files.
353+
"""
354+
log_files = []
355+
for filename in os.listdir(self.work_path):
356+
if re.match("|".join(self.logs_to_compress), filename):
357+
log_files.append(os.path.join(self.work_path, filename))
358+
return log_files
359+
360+
def compress_log_files(self):
361+
"""
362+
Compress model log files into tarball.
363+
"""
364+
log_files = self.get_log_files()
365+
with tarfile.open(name=os.path.join(self.work_path, self.log_tar_name),
366+
mode="w:gz") as tar:
367+
for file in log_files:
368+
tar.add(file, arcname=os.path.basename(file))
369+
370+
# Delete files after tarball is written
371+
for file in log_files:
372+
os.remove(file)
373+
331374
def collate(self):
332375
pass
333376

payu/models/cice5.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@ def __init__(self, expt, name, config):
4242
self.copy_restarts = True
4343
self.copy_inputs = True
4444

45+
# Empty list means no log files will be compressed
46+
self.logs_to_compress = []
47+
4548
def set_local_timestep(self, t_step):
4649
dt = self.ice_in['setup_nml']['dt']
4750
npt = self.ice_in['setup_nml']['npt']

test/models/test_cice.py

Lines changed: 127 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33

44
import pytest
55
import f90nml
6+
import tarfile
7+
from pathlib import Path
68

79
import payu
810

@@ -124,41 +126,48 @@ def empty_workdir():
124126
workdir.symlink_to(expt_workdir)
125127

126128
yield expt_workdir
127-
shutil.rmtree(expt_workdir)
129+
try:
130+
shutil.rmtree(expt_workdir)
131+
except FileNotFoundError:
132+
pass
128133
workdir.unlink()
129134

130135

136+
@pytest.fixture
137+
def cice_nml():
138+
nml_path = os.path.join(ctrldir, CICE_NML_NAME)
139+
f90nml.write(DEFAULT_CICE_NML, nml_path)
140+
141+
yield nml_path
142+
143+
# Cleanup
144+
os.remove(nml_path)
145+
146+
131147
# Important to test None case without separate ice history file
132148
@pytest.fixture(params=[None,
133149
{"icefields_nml": {"f_icy": "m"}},
134150
{"icefields_nml": {"f_icy": "m", "f_new": "y"}}])
135-
def cice_config_files(request):
151+
def cice_history_nml(request):
136152
"""
137-
Write the default cice_in.nml namelist, and if included, separate ice
138-
history namelist used by ESM1.5.
153+
Write separate ice history namelist used by ESM1.5, if provided.
139154
"""
140-
cice_nml = DEFAULT_CICE_NML
141155
ice_history = request.param
156+
ice_history_path = os.path.join(ctrldir, HIST_NML_NAME)
142157

143-
with cd(ctrldir):
144-
# 2. Create config.nml
145-
f90nml.write(cice_nml, CICE_NML_NAME)
146-
147-
if ice_history:
148-
f90nml.write(ice_history, HIST_NML_NAME)
158+
if ice_history:
159+
f90nml.write(ice_history, ice_history_path)
149160

150161
yield {'ice_history': ice_history}
151162

152163
# cleanup
153-
with cd(ctrldir):
154-
os.remove(CICE_NML_NAME)
155-
if ice_history:
156-
os.remove(HIST_NML_NAME)
164+
if ice_history:
165+
os.remove(ice_history_path)
157166

158167

159168
@pytest.mark.parametrize("config", [DEFAULT_CONFIG],
160169
indirect=True)
161-
def test_setup(config, cice_config_files):
170+
def test_setup(config, cice_nml, cice_history_nml):
162171
"""
163172
Confirm that
164173
1: payu overwrites cice_in with ice_history
@@ -183,9 +192,9 @@ def test_setup(config, cice_config_files):
183192
# Check cice_in was patched with ice_history
184193
work_input_fpath = os.path.join(model.work_path, CICE_NML_NAME)
185194
input_nml = f90nml.read(work_input_fpath)
186-
if cice_config_files['ice_history']:
195+
if cice_history_nml['ice_history']:
187196
assert (input_nml["icefields_nml"] ==
188-
cice_config_files["ice_history"]["icefields_nml"])
197+
cice_history_nml["ice_history"]["icefields_nml"])
189198
else:
190199
assert input_nml["icefields_nml"] == DEFAULT_CICE_NML["icefields_nml"]
191200

@@ -238,7 +247,7 @@ def prior_restart_cice4(run_timing_params):
238247

239248
@pytest.mark.parametrize("config", [CONFIG_WITH_RESTART],
240249
indirect=True)
241-
def test_restart_setup(config, cice_config_files, prior_restart_cice4,
250+
def test_restart_setup(config, cice_nml, cice_history_nml, prior_restart_cice4,
242251
run_timing_params):
243252
"""
244253
Test that seting up an experiment from a cloned control directory
@@ -280,7 +289,7 @@ def test_restart_setup(config, cice_config_files, prior_restart_cice4,
280289

281290
@pytest.mark.parametrize("config", [DEFAULT_CONFIG],
282291
indirect=True)
283-
def test_no_restart_ptr(config, cice_config_files):
292+
def test_no_restart_ptr(config, cice_nml, cice_history_nml):
284293
"""
285294
Test that payu raises an error if no prior restart path is specified,
286295
restart is `true` in cice_in.nml, and the restart pointer is missing.
@@ -300,3 +309,101 @@ def test_no_restart_ptr(config, cice_config_files):
300309
with pytest.raises(RuntimeError,
301310
match="Cannot find previous restart file"):
302311
model.setup()
312+
313+
314+
CONFIG_WITH_COMPRESSION = {
315+
"laboratory": "lab",
316+
"jobname": "testrun",
317+
"model": "cice",
318+
"exe": "test.exe",
319+
"experiment": ctrldir_basename,
320+
"metadata": {"enable": False},
321+
"compress_logs": True
322+
}
323+
324+
325+
@pytest.fixture
326+
def cice4_log_files():
327+
"""
328+
Create cice log files based on ESM1.5 logs.
329+
"""
330+
non_pe_logs = {
331+
"ice_diag_out": "block id, proc, local_block:",
332+
"ice_diag.d": "istep0 = ******",
333+
"debug.root.03": "oasis_io_read_avfile:av2_isst_ia:NetCDF:"
334+
}
335+
pe_logs = {
336+
f'iceout{x:03d}': "Fake iceout file {x}"
337+
for x in range(85, 96)
338+
}
339+
340+
log_files = non_pe_logs | pe_logs
341+
342+
log_paths = []
343+
for log_name, log_contents in log_files.items():
344+
log_path = Path(expt_workdir/log_name)
345+
with open(log_path, "w") as log:
346+
log.write(log_contents)
347+
log_paths.append(log_path)
348+
349+
yield log_files
350+
351+
# Cleanup
352+
for log_file in log_paths:
353+
try:
354+
log_file.unlink()
355+
except FileNotFoundError:
356+
pass
357+
358+
359+
@pytest.fixture
360+
def non_log_file():
361+
"""
362+
Create a cice4 output file to be ignored by log compression.
363+
Use cice_in.nml which is copied to the work directory in ESM1.5.
364+
"""
365+
non_log_path = Path(expt_workdir)/CICE_NML_NAME
366+
non_log_path.touch()
367+
368+
yield non_log_path
369+
370+
# Cleanup
371+
non_log_path.unlink()
372+
373+
374+
@pytest.mark.parametrize("config", [CONFIG_WITH_COMPRESSION],
375+
indirect=True)
376+
def test_log_compression(config, cice4_log_files, non_log_file,
377+
cice_nml # Required by expt.__init__
378+
):
379+
"""
380+
Test that logfiles produced by cice during ESM1.5 simulations are
381+
properly compressed into a tarball by cice.compress_log_files().
382+
"""
383+
with cd(ctrldir):
384+
# Initialise laboratory and experiment
385+
lab = payu.laboratory.Laboratory(lab_path=str(labdir))
386+
expt = payu.experiment.Experiment(lab, reproduce=False)
387+
model = expt.models[0]
388+
389+
# Function to test
390+
model.compress_log_files()
391+
392+
# Check that log tarball created and no original logs remain
393+
assert set(os.listdir(expt_workdir)) == {model.log_tar_name,
394+
non_log_file.name}
395+
396+
# Check all logs present in tarball
397+
log_file_names = {log_name for
398+
log_name in cice4_log_files}
399+
400+
with tarfile.open(os.path.join(expt_workdir, model.log_tar_name),
401+
mode="r") as tar:
402+
assert set(tar.getnames()) == log_file_names
403+
404+
# Check contents of compressed files
405+
for entry in tar:
406+
entry_name = entry.name
407+
with tar.extractfile(entry) as open_entry:
408+
file_contents = open_entry.read().decode("utf-8")
409+
assert file_contents == cice4_log_files[entry_name]

test/test_payu.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ def test_read_config():
150150
assert(config.pop('collate') == {})
151151
assert(config.pop('control_path') == os.getcwd())
152152
assert(config.pop('modules') == {})
153+
assert(config.pop('archive') == {})
153154
assert(config == {})
154155

155156
os.remove(config_tmp)

0 commit comments

Comments
 (0)