Skip to content

Commit eb436df

Browse files
authored
Automatic DQM jobs on DIRAC (#151)
* Add script to be used as cronjob to automatically submit DQM jobs on DIRAC once a run has been transferred from CEA to DIRAC * Check if a DQM job has already been submitted for a given NectarCAM run * Activate proper conda environment when starting the script * Cleaning * Add a check whether a run is already present in the ZODB database before parsing it. * Add cronjob script to parse DQM results and feed the ZODB database. * Process a list of runs instead of a single run at once. * Adapt cronjob to pass a list of runs as argument to DQM parser script. * Do not fail when a DQM result could not be fetched from DIRAC, but instead skip to next DQM run. * Change location of log file. * Automatically renew DIRAC proxy --------- Co-authored-by: Jean-Philippe Lenain <[email protected]>
1 parent 65957d2 commit eb436df

File tree

3 files changed

+151
-58
lines changed

3 files changed

+151
-58
lines changed
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#!/usr/bin/env bash
2+
# -*- coding: utf-8 -*-
3+
#
4+
# This script is to be used as a cronjob on the nectarcam-dqm-rw VM on the LPNHE OpenStack cloud platform, in order to feed the ZODB database from DQM run on DIRAC.
5+
6+
# Log everything to $LOGFILE
7+
LOGFILE=${0%".sh"}_$(date +%F).log
8+
LOGFILE=$HOME/log/$(basename $LOGFILE)
9+
exec 1>"$LOGFILE" 2>&1
10+
11+
. "/opt/conda/etc/profile.d/conda.sh"
12+
conda activate nectar-dev
13+
14+
# Initialize DIRAC proxy from user certificate:
15+
if ! dirac-proxy-init -M -g cta_nectarcam --pwstdin < ~/.dirac.pwd; then
16+
echo "DIRAC proxy initialization failed..."
17+
exit 1
18+
fi
19+
20+
remoteParentDir="/vo.cta.in2p3.fr/user/j/jlenain/nectarcam/dqm"
21+
nectarchainScriptDir="/opt/cta/nectarchain/src/nectarchain/user_scripts/jlenain"
22+
23+
python ${nectarchainScriptDir}/parse_dqm_fits_file.py -r $(dls ${remoteParentDir} | grep -ve "/vo.cta" | awk -F. '{print $1}' | awk -Fn '{print $2}' | tr '\n' ' ')
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#!/usr/bin/env bash
2+
# -*- coding: utf-8 -*-
3+
#
4+
# Author: Jean-Philippe Lenain <[email protected]>
5+
#
6+
# Script as a cronjob to dynamically launch NectarCAM DQM runs on DIRAC after data transfer, to be run once a day on sedipccaa23 in CEA/Irfu.
7+
8+
# Log everything to $LOGFILE
9+
LOGFILE=${0%".sh"}_$(date +%F).log
10+
LOGFILE=$HOME/log/$(basename $LOGFILE)
11+
exec 1>"$LOGFILE" 2>&1
12+
13+
source /opt/cta/mambaforge/etc/profile.d/conda.sh
14+
conda activate ctadirac
15+
16+
localParentDir="/data/nvme/ZFITS"
17+
remoteParentDir="/vo.cta.in2p3.fr/nectarcam"
18+
nectarchainScriptDir="$HOME/local/src/python/cta-observatory/nectarchain/src/nectarchain/user_scripts/jlenain/dqm_job_submitter"
19+
20+
cd $nectarchainScriptDir || (echo "Failed to cd into ${nectarchainScriptDir}, exiting..."; exit 1)
21+
22+
for run in $(find ${localParentDir} -type f -name "NectarCAM*.fits.fz" | awk -F. '{print $2}' | awk -Fn '{print $2}' | sort | uniq); do
23+
echo "Probing files for run ${run}"
24+
nbLocalFiles=$(find ${localParentDir} -type f -name "NectarCAM.Run${run}.????.fits.fz" | wc -l)
25+
echo " Found $nbLocalFiles local files for run $run"
26+
nbRemoteFiles=$(dfind ${remoteParentDir} | grep -e "NectarCAM.Run${run}" | grep --count -e "fits.fz")
27+
echo " Found $nbRemoteFiles remote files on DIRAC for run $run"
28+
# If number of local and remote files matching, will attempt to launch a DQM run
29+
if [ ${nbLocalFiles} -eq ${nbRemoteFiles} ]; then
30+
echo " Run $run: number of local and remote files matching, will attempt to submit a DQM job"
31+
# Has this DQM run already been submitted ?
32+
if [ $(dstat | grep --count -e "NectarCAM DQM run ${run}") -eq 0 ]; then
33+
yyyymmdd=$(find ${localParentDir} -type f -name "NectarCAM.Run${run}.????.fits.fz" | head -n 1 | awk -F/ '{print $6}')
34+
yyyy=${yyyymmdd:0:4}
35+
mm=${yyyymmdd:4:2}
36+
dd=${yyyymmdd:6:2}
37+
cmd="python submit_dqm_processor.py -d "${yyyy}-${mm}-${dd}" -r $run"
38+
echo "Running: $cmd"
39+
eval $cmd
40+
else
41+
echo " DQM job for run $run already submitted, either ongoing or failed, skipping it."
42+
fi
43+
else
44+
echo " Run $run is not yet complete on DIRAC, will wait another day before launching a DQM job on it."
45+
fi
46+
done

src/nectarchain/user_scripts/jlenain/parse_dqm_fits_file.py

Lines changed: 82 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,11 @@
2828
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
2929
)
3030
parser.add_argument(
31-
"-r",
32-
"--run",
33-
default=None,
34-
help="process a specific run.",
35-
type=str,
31+
"-f",
32+
"--force",
33+
default=False,
34+
action="store_true",
35+
help="if this run is already in the DB, force re-parsing its DQM output again.",
3636
)
3737
parser.add_argument(
3838
"-p",
@@ -41,62 +41,86 @@
4141
help="path on DIRAC where to grab DQM outputs (optional).",
4242
type=str,
4343
)
44+
parser.add_argument(
45+
"-r",
46+
"--runs",
47+
nargs="+",
48+
default=None,
49+
help="process a specific run or a list of runs.",
50+
)
4451
args = parser.parse_args()
4552

46-
if args.run is None:
47-
logger.critical("A run number should be provided.")
53+
if args.runs is None:
54+
logger.critical("At least one run number should be provided.")
4855
sys.exit(1)
4956

50-
lfn = f"{args.path}/NectarCAM_DQM_Run{args.run}.tar.gz"
51-
52-
if not os.path.exists(os.path.basename(lfn)):
53-
DIRAC.initialize()
54-
55-
dirac = Dirac()
56-
57-
dirac.getFile(
58-
lfn=lfn,
59-
destDir=f".",
60-
printOutput=True,
61-
)
62-
63-
with tarfile.open(os.path.basename(lfn), "r") as tar:
64-
tar.extractall(".")
65-
66-
fits_file = (
67-
f"./NectarCAM_DQM_Run{args.run}/output/NectarCAM_Run{args.run}/"
68-
f"NectarCAM_Run{args.run}_calib/NectarCAM_Run{args.run}_Results.fits"
69-
)
70-
71-
hdu = fits.open(fits_file)
72-
73-
# Explore FITS file structure
74-
hdu.info()
75-
76-
outdict = dict()
77-
78-
for h in range(1, len(hdu)):
79-
extname = hdu[h].header["EXTNAME"]
80-
outdict[extname] = dict()
81-
for i in range(hdu[extname].header["TFIELDS"]):
82-
keyname = hdu[extname].header[f"TTYPE{i+1}"]
83-
outdict[extname][keyname] = hdu[extname].data[keyname]
84-
85-
try:
86-
db = DQMDB(read_only=False)
87-
db.insert(f"NectarCAM_Run{args.run}", outdict)
88-
db.commit_and_close()
89-
except ZEO.Exceptions.ClientDisconnected as e:
90-
logger.critical(f"Impossible to feed the ZODB data base. Received error: {e}")
91-
92-
# Remove DQM archive file and directory
93-
try:
94-
os.remove(f"NectarCAM_DQM_Run{args.run}.tar.gz")
95-
except OSError:
96-
logger.warning(
97-
f"Could not remove NectarCAM_DQM_Run{args.run}.tar.gz or it does not exist"
57+
db_read = DQMDB(read_only=True)
58+
db_read_keys = list(db_read.root.keys())
59+
db_read.abort_and_close()
60+
61+
for run in args.runs:
62+
if not args.force and f"NectarCAM_Run{run}" in db_read_keys:
63+
logger.warning(
64+
f'The run {run} is already present in the DB, will not parse this DQM run, or consider forcing it with the "--force" option.'
65+
)
66+
continue
67+
68+
lfn = f"{args.path}/NectarCAM_DQM_Run{run}.tar.gz"
69+
70+
if not os.path.exists(os.path.basename(lfn)):
71+
DIRAC.initialize()
72+
73+
dirac = Dirac()
74+
75+
dirac.getFile(
76+
lfn=lfn,
77+
destDir=f".",
78+
printOutput=True,
79+
)
80+
81+
try:
82+
with tarfile.open(os.path.basename(lfn), "r") as tar:
83+
tar.extractall(".")
84+
except FileNotFoundError as e:
85+
logger.warning(
86+
f"Could not fetch DQM results from DIRAC for run {run}, received error {e}, skipping this run..."
87+
)
88+
continue
89+
90+
fits_file = (
91+
f"./NectarCAM_DQM_Run{run}/output/NectarCAM_Run{run}/"
92+
f"NectarCAM_Run{run}_calib/NectarCAM_Run{run}_Results.fits"
9893
)
9994

100-
dirpath = Path(f"./NectarCAM_DQM_Run{args.run}")
101-
if dirpath.exists() and dirpath.is_dir():
102-
shutil.rmtree(dirpath)
95+
hdu = fits.open(fits_file)
96+
97+
# Explore FITS file structure
98+
hdu.info()
99+
100+
outdict = dict()
101+
102+
for h in range(1, len(hdu)):
103+
extname = hdu[h].header["EXTNAME"]
104+
outdict[extname] = dict()
105+
for i in range(hdu[extname].header["TFIELDS"]):
106+
keyname = hdu[extname].header[f"TTYPE{i+1}"]
107+
outdict[extname][keyname] = hdu[extname].data[keyname]
108+
109+
try:
110+
db = DQMDB(read_only=False)
111+
db.insert(f"NectarCAM_Run{run}", outdict)
112+
db.commit_and_close()
113+
except ZEO.Exceptions.ClientDisconnected as e:
114+
logger.critical(f"Impossible to feed the ZODB data base. Received error: {e}")
115+
116+
# Remove DQM archive file and directory
117+
try:
118+
os.remove(f"NectarCAM_DQM_Run{run}.tar.gz")
119+
except OSError:
120+
logger.warning(
121+
f"Could not remove NectarCAM_DQM_Run{run}.tar.gz or it does not exist"
122+
)
123+
124+
dirpath = Path(f"./NectarCAM_DQM_Run{run}")
125+
if dirpath.exists() and dirpath.is_dir():
126+
shutil.rmtree(dirpath)

0 commit comments

Comments
 (0)