Skip to content

WIP: QG scale factor workflows #125

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 24 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
0a345c6
feat: add dijet_producer
toicca Feb 13, 2025
07812c8
feat: add selection modifier, fix bugs
toicca Feb 14, 2025
71b18c9
feat: QG selection to DY
toicca Feb 17, 2025
b5acd53
fix: add met filters and fixes to dijet
toicca Feb 17, 2025
47ee5f2
feat: different object selection for QG
toicca Feb 17, 2025
62c57e8
Merge branch 'cms-btv-pog:master' into dijet_wf
toicca Feb 17, 2025
9ad2243
Merge branch 'dijet_wf' of github.com:toicca/BTVNanoCommissioning int…
toicca Feb 17, 2025
9741eb3
fix: correctly label systematics
toicca Feb 18, 2025
8393e46
Merge branch 'dijet_wf' into master
toicca Mar 20, 2025
548c822
Merge pull request #9 from toicca/master
toicca Mar 20, 2025
e3b15a2
chore: update gitignore to ignore arrays
toicca Mar 20, 2025
9b98f9f
fix: ignore individual weights for now
toicca Mar 24, 2025
b85bd2b
fix: jet_id also for qg DY, and remove pfcands from arrays
toicca Apr 7, 2025
a93c990
feat: add exit code to condor executable
toicca Apr 7, 2025
9c27e3c
feat: add retries to submission script
toicca Apr 7, 2025
f2a9a33
feat: only create hist dir if required
toicca Apr 7, 2025
c3f0e39
fix: always exit with output in condor script
toicca Apr 7, 2025
6e44240
chore: linting with black
toicca Apr 8, 2025
82ac499
fix: randomize dijet selection
toicca Apr 17, 2025
6aa00a0
feat: add trigger, run and lumi information to arrays
toicca Apr 17, 2025
1a7adeb
fix: remove unnecessary zerobias from dijets
toicca Apr 17, 2025
e3c035b
fix: always write weights for MC
toicca Apr 22, 2025
b646511
fix: clean jet selection in DY
toicca Apr 22, 2025
749cb49
fix: DY selection to have jets above aeta 3
toicca Apr 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@ dask-worker-space/
.job_wrapper_failure
jobs_*
.success
arrays*/
16 changes: 16 additions & 0 deletions condor_lxplus/execute.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
#!/bin/bash -xe
export EOS_MGM_URL=root://eosuser.cern.ch

JOBID=$1
COMMDIR=$2

echo "Running job $JOBID"

export HOME=`pwd`
if [ -d /afs/cern.ch/user/${USER:0:1}/$USER ]; then
export HOME=/afs/cern.ch/user/${USER:0:1}/$USER # crucial on lxplus condor but cannot set on cmsconnect
Expand All @@ -18,13 +21,17 @@ voms-proxy-info
export PATH="$4:$PATH"

# Build the sample json given the job id
echo "Building sample json for job $JOBID"
python -c "import json; json.dump(json.load(open('$WORKDIR/split_samples.json'))['$JOBID'], open('$WORKDIR/sample.json', 'w'), indent=4)"

echo "Sample json:"
cat $WORKDIR/sample.json
declare -A ARGS
for key in workflow output samplejson year campaign isSyst isArray noHist overwrite voms chunk skipbadfiles outputDir remoteRepo; do
ARGS[$key]=$(jq -r ".$key" $WORKDIR/arguments.json)
done

echo "Arguments:"
# Unparse arguments and send to runner.py
OPTS="--wf ${ARGS[workflow]} --year ${ARGS[year]} --campaign ${ARGS[campaign]} --chunk ${ARGS[chunk]}"
if [ "${ARGS[voms]}" != "null" ]; then
Expand All @@ -48,5 +55,14 @@ OPTS="$OPTS --executor iterative --overwrite --outputdir $3"
echo "Now launching: python runner.py $OPTS"
python runner.py $OPTS

OUTPUT=$?

if [ $OUTPUT -ne 0 ]; then
echo "Job $JOBID failed with exit code $OUTPUT"
exit $OUTPUT
fi

echo "Job $JOBID completed successfully"
touch $WORKDIR/.success

exit $OUTPUT
17 changes: 17 additions & 0 deletions condor_lxplus/submitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,11 @@ def get_condor_submitter_parser(parser):
default=None,
help="If specified, access BTVNanoCommsioning from a remote tarball (downloaded via https), instead of from a transferred sandbox",
)
parser.add_argument(
"--max_retries",
default=3,
help="Maximum number of retries for failed jobs. Failed jobs are forced to another machine.",
)
return parser


Expand Down Expand Up @@ -260,6 +265,15 @@ def get_main_parser():
with open(os.path.join(job_dir, "jobnum_list.txt"), "w") as f:
f.write("\n".join([str(i) for i in range(counter)]))

if args.max_retries > 0:
retry_str = f"""
on_exit_remove = (ExitBySignal == False) && (ExitCode == 0)
max_retries = {args.max_retries}
requirements = Machine =!= LastRemoteHost
"""
else:
retry_str = ""

## store the jdl file
jdl_template = """Universe = vanilla
Executable = {executable}
Expand All @@ -281,6 +295,8 @@ def get_main_parser():
transfer_input_files = {transfer_input_files}
transfer_output_files = .success

{retry_str}

Queue JOBNUM from {jobnum_file}
""".format(
executable=f"{base_dir}/condor_lxplus/execute.sh",
Expand All @@ -289,6 +305,7 @@ def get_main_parser():
envpath=envpath,
log_dir=f"{base_dir}/{job_dir}/log",
transfer_input_files=f"{base_dir}/{job_dir}/arguments.json,{base_dir}/{job_dir}/split_samples.json,{base_dir}/{job_dir}/jobnum_list.txt",
retry_str=retry_str,
jobnum_file=f"{base_dir}/{job_dir}/jobnum_list.txt",
)
with open(os.path.join(job_dir, "submit.jdl"), "w") as f:
Expand Down
3 changes: 2 additions & 1 deletion runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,8 @@ def debug_parser(parser):
coffeaoutput = (
f'{histoutdir}/hists_{args.workflow}_{(sample_json).rstrip(".json")}.coffea'
)
os.system(f"mkdir -p {histoutdir}")
if not args.noHist:
os.system(f"mkdir -p {histoutdir}")
# load dataset
with open(args.samplejson) as f:
sample_dict = json.load(f)
Expand Down
6 changes: 4 additions & 2 deletions src/BTVNanoCommissioning/utils/array_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,16 @@ def array_writer(
othersMC=["Pileup_nTrueInt", "Pileup_nPU"], # other fields, for MC only
empty=False,
):
if not isRealData and systname != ["nominal"]:
if not isRealData and not empty:
pruned_event["weight"] = weights.weight()
for ind_wei in weights.weightStatistics.keys():
pruned_event[f"{ind_wei}_weight"] = weights.partial_weight(
include=[ind_wei]
)
if len(systname) > 1:
for syst in systname[1:]:
for syst in systname:
if syst == "nominal":
continue
pruned_event[f"weight_syst_{syst}"] = weights.weight(modifier=syst)

if empty:
Expand Down
2 changes: 2 additions & 0 deletions src/BTVNanoCommissioning/utils/histogrammer.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,8 @@ def histogrammer(events, workflow, year="2022", campaign="Summer22"):
_hist_dict[f"dr_{i}jet"] = Hist.Hist(
syst_axis, flav_axis, dr_axis, Hist.storage.Weight()
)
elif "qgtag" in workflow:
pass

### Common kinematic variables histogram creation
if "Wc_sf" not in workflow:
Expand Down
16 changes: 16 additions & 0 deletions src/BTVNanoCommissioning/utils/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,4 +285,20 @@
"TTto2L2Nu_TuneCP5Up_13p6TeV_powheg-pythia8",
],
},
"qgtag_dijet": {
"data": ["ZeroBias", "JetMET0", "JetMET1"],
"MC": [
"QCD-4Jets_HT-40to70_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"QCD-4Jets_HT-70to100_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"QCD-4Jets_HT-100to200_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"QCD-4Jets_HT-200to400_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"QCD-4Jets_HT-400to600_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"QCD-4Jets_HT-600to800_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"QCD-4Jets_HT-800to1000_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"QCD-4Jets_HT-1000to1200_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"QCD-4Jets_HT-1200to1500_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"QCD-4Jets_HT-1500to2000_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"QCD-4Jets_HT-2000_TuneCP5_13p6TeV_madgraphMLM-pythia8",
],
},
}
4 changes: 2 additions & 2 deletions src/BTVNanoCommissioning/utils/selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def jet_id(events, campaign, max_eta=2.5, min_pt=20):
(events.Jet.jetId >= 2) & (events.Jet.neHEF < 0.99),
ak.where(
(abs(events.Jet.eta) > 3.0),
(events.Jet.jetId & (1 << 1)) & (events.Jet.neEmEF < 0.4),
(events.Jet.jetId >= 2) & (events.Jet.neEmEF < 0.4),
ak.zeros_like(events.Jet.pt, dtype=bool),
),
),
Expand Down Expand Up @@ -86,7 +86,7 @@ def jet_id(events, campaign, max_eta=2.5, min_pt=20):
& ((events.Jet.pt > 50) | (events.Jet.puId >= 7))
)
else:
jetmask = (events.Jet.pt > min_pt) & (abs(events.Jet.eta) <= max_eta) & (jetid)
jetmask = (events.Jet.pt > min_pt) & (abs(events.Jet.eta) <= max_eta)# & (jetid)
return jetmask


Expand Down
11 changes: 11 additions & 0 deletions src/BTVNanoCommissioning/workflows/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,11 @@
NanoProcessor as BTA_ttbar_processor,
) # ttbar -kinFit

## QG - Dijet producer
from BTVNanoCommissioning.workflows.qgtag_dijet_producer import (
NanoProcessor as QGtagDijetProcessor,
)

# from BTVNanoCommissioning.workflows.example import (
# NanoProcessor as ExampleProcessor,
# )
Expand Down Expand Up @@ -98,6 +103,12 @@
# DY
workflows["ctag_DY_sf"] = partial(CTAGDYValidSFProcessor, selectionModifier="DYM")
workflows["ectag_DY_sf"] = partial(CTAGDYValidSFProcessor, selectionModifier="DYE")
workflows["qgtag_DY_sf"] = partial(CTAGDYValidSFProcessor, selectionModifier="QG")

# QG
workflows["qgtag_dijet"] = partial(QGtagDijetProcessor, selectionModifier="DiPFJetAve")
workflows["qgtag_dijet_zb"] = partial(QGtagDijetProcessor, selectionModifier="ZB")
workflows["qgtag_dijet_pfjet"] = partial(QGtagDijetProcessor, selectionModifier="PFJet")

# Tutorial
# workflows["example"] = ExampleProcessor
Expand Down
98 changes: 73 additions & 25 deletions src/BTVNanoCommissioning/workflows/ctag_DY_valid_sf.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def process_shift(self, events, shift_name):

isMu = False
isEle = False
if "DYM" in self.selMod:
if "DYM" in self.selMod or "QG" in self.selMod:
triggers = ["Mu17_TrkIsoVVL_Mu8_TrkIsoVVL_DZ_Mass8"]
isMu = True
elif "DYE" in self.selMod:
Expand All @@ -77,7 +77,7 @@ def process_shift(self, events, shift_name):
else:
raise ValueError(self.selMod, "is not a valid selection modifier.")

histname = {"DYM": "ctag_DY_sf", "DYE": "ectag_DY_sf"}
histname = {"DYM": "ctag_DY_sf", "DYE": "ectag_DY_sf", "QG": "qgtag_DY_sf"}
output = {} if self.noHist else histogrammer(events, histname[self.selMod])

if isRealData:
Expand Down Expand Up @@ -128,17 +128,6 @@ def process_shift(self, events, shift_name):
axis=-1,
)

pl_iso = ak.all(
events.Jet.metric_table(pos_dilep) > 0.4, axis=2, mask_identity=True
)
nl_iso = ak.all(
events.Jet.metric_table(neg_dilep) > 0.4, axis=2, mask_identity=True
)
jet_sel = ak.fill_none(
jet_id(events, self._campaign) & pl_iso & nl_iso,
False,
axis=-1,
)

pos_dilep = ak.pad_none(pos_dilep, 1, axis=1)
neg_dilep = ak.pad_none(neg_dilep, 1, axis=1)
Expand All @@ -155,15 +144,21 @@ def process_shift(self, events, shift_name):
nl_iso = ak.all(
events.Jet.metric_table(neg_dilep[:, 0]) > 0.4, axis=2, mask_identity=True
)
event_jet = events.Jet[
ak.fill_none(
jet_id(events, self._campaign) & pl_iso & nl_iso,
False,
axis=-1,
)
]

if "QG" in self.selMod:
jetmask = jet_id(events, self._campaign, max_eta=5.13)
else:
jetmask = jet_id(events, self._campaign)

jet_sel = ak.fill_none(
pl_iso & nl_iso & jetmask,
False,
axis=-1,
)

event_jet = events.Jet[jet_sel]

req_jets = ak.count(event_jet.pt, axis=1) >= 1
# event_jet = ak.pad_none(event_jet, 1, axis=1)

# store jet index for PFCands, create mask on the jet index
jetindx = ak.mask(
Expand All @@ -173,8 +168,25 @@ def process_shift(self, events, shift_name):
jetindx = ak.pad_none(jetindx, 1)
jetindx = jetindx[:, 0]

selection = (
req_lumi & req_trig & req_dilep & req_dilepmass & req_jets & req_metfilter
)

if "QG" in self.selMod:
temp_jet = ak.pad_none(event_jet, 1, axis=1)

req_lead_jet = ak.fill_none(
(
np.abs(temp_jet[:, 0].delta_phi(pos_dilep[:, 0] + neg_dilep[:, 0]))
> 2.7
),
False,
axis=-1,
)
selection = selection & req_lead_jet

event_level = ak.fill_none(
req_lumi & req_trig & req_dilep & req_dilepmass & req_jets & req_metfilter,
selection,
False,
)
if len(events[event_level]) == 0:
Expand Down Expand Up @@ -208,7 +220,11 @@ def process_shift(self, events, shift_name):
)
# Keep the structure of events and pruned the object size
pruned_ev = events[event_level]
pruned_ev["SelJet"] = event_jet[event_level]
if self.selMod == "QG":
pruned_ev["SelJet"] = event_jet[event_level][:, 0]
else:
pruned_ev["SelJet"] = event_jet[event_level]

if isMu:
pruned_ev["MuonPlus"] = sposmu
pruned_ev["MuonMinus"] = snegmu
Expand All @@ -233,7 +249,7 @@ def process_shift(self, events, shift_name):
pruned_ev["dr_mu1jet"] = sposmu.delta_r(sel_jet)
pruned_ev["dr_mu2jet"] = snegmu.delta_r(sel_jet)
# Find the PFCands associate with selected jets. Search from jetindex->JetPFCands->PFCand
if "PFCands" in events.fields:
if "PFCands" in events.fields and "QG" not in self.selMod:
pruned_ev["PFCands"] = PFCand_link(events, event_level, jetindx)

####################
Expand All @@ -254,8 +270,40 @@ def process_shift(self, events, shift_name):
)
# Output arrays
if self.isArray:
if "QG" in self.selMod:
othersData = [
"SV_*",
"PV_npvs",
"PV_npvsGood",
"Rho_*",
"SoftMuon_dxySig",
"Muon_sip3d",
"run",
"luminosityBlock",
]
for trigger in triggers:
othersData.append(f"HLT_{trigger}")
else:
othersData = [
"PFCands_*",
"MuonJet_*",
"SV_*",
"PV_npvs",
"PV_npvsGood",
"Rho_*",
"SoftMuon_dxySig",
"Muon_sip3d",
]

array_writer(
self, pruned_ev, events, weights, systematics, dataset, isRealData
self,
pruned_ev,
events,
weights,
systematics,
dataset,
isRealData,
othersData=othersData,
)

return {dataset: output}
Expand Down
Loading
Loading