forked from angerami/MLTree
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlaunch_jobs.py
executable file
·52 lines (43 loc) · 2.46 KB
/
launch_jobs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#! /usr/bin/env python
# @file submit_grid.py
# @author Joakim Olsson <[email protected]>
# @brief Launch grid jobs with the MLTree package
# @date October 2016
import os
import subprocess as sp
try:
__version__ = sp.check_output(["git","describe","--tags"], cwd=os.path.dirname(os.path.realpath(__file__))).strip()
except:
print("git not available to extract current tag")
__version__ = "test"
import argparse
parser = argparse.ArgumentParser(add_help=True, description="Launch grid jobs", epilog="version: {0:s}".format(__version__))
parser.add_argument("--user", required=True, type=str, dest="user", metavar="<user>", help="Username")
parser.add_argument("--tag", required=False, type=str, dest="tag", default = __version__, metavar="<tag>", help="Output file tag")
parser.add_argument("--datasets", type=str, dest="datasets", required=False, default="datasets.json", metavar = "<datasets.json>", help="JSON file specifying the input and output datasets.")
parser.add_argument("--nFilesPerJob", required=False, type=int, dest="nFilesPerJob", default=1, help="Number of files per job")
# If sub-jobs exceed the walltime limit, they will get killed. When you want to submit long running jobs (e.g., customized G4 simulation), submit them to sites where longer walltime limit is available by specifying the expected execution time (in second) to the --maxCpuCount option.
parser.add_argument("--maxCpuCount", required=False, type=int, dest="maxCpuCount", default=172800, help="Max CPU time (default: 48 hrs)")
parser.add_argument("--dry-run", dest="dryrun", action="store_true", help="Don't submit any jobs")
args = parser.parse_args()
import json
datasets = json.load(file(args.datasets))
inDSs = datasets.get("inDSs", {})
outDSs = datasets.get("outDSs", {})
doBuild = True
doBuildAll = True
setup = "MLTree/MLTreeMaker.py"
config = "--nFilesPerJob "+str(args.nFilesPerJob)+" --maxCpuCount "+str(args.maxCpuCount)
comFirst = "pathena {} --outDS {} --inDS {} {}"
comLater = "pathena {} --outDS {} --inDS {} --libDS LAST {}"
# Submit jobs to the grid with pathena
# https://twiki.cern.ch/twiki/bin/view/PanDA/PandaAthena
for i, inDS, outDS in zip(xrange(len(inDSs)), inDSs, outDSs):
outDS = "user."+args.user+"."+outDS+"_"+args.tag
if (i==0 and doBuild) or doBuildAll:
command = comFirst.format(setup, outDS, inDS, config)
else:
command = comLater.format(setup, outDS, inDS, config)
sp.call("echo "+command, shell=True)
if not args.dryrun:
sp.call(command, shell=True)