From 710330f64a2b02f0ce2c33ac424b8412c5c4c70b Mon Sep 17 00:00:00 2001 From: AdrianoDee Date: Thu, 7 Nov 2024 15:49:23 +0100 Subject: [PATCH 1/2] Add lumisection output options, 100k wfs --- .../python/relval_steps.py | 4 ++-- .../scripts/das-up-to-nevents.py | 23 ++++++++++++++++--- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/Configuration/PyReleaseValidation/python/relval_steps.py b/Configuration/PyReleaseValidation/python/relval_steps.py index 260dfec22b5ac..648644e46e927 100644 --- a/Configuration/PyReleaseValidation/python/relval_steps.py +++ b/Configuration/PyReleaseValidation/python/relval_steps.py @@ -45,8 +45,8 @@ steps = Steps() #### Event to runs -event_steps = [0.01,0.05,0.15,0.25,0.5,1] #in millions -event_steps_k = ["10k","50k","150k","250k","500k","1M"] +event_steps = [0.01,0.05,0.1,0.15,0.25,0.5,1] #in millions +event_steps_k = ["10k","50k","100k","150k","250k","500k","1M"] ##TODO add an helper to convert the numbers to strings event_steps_dict = dict(zip(event_steps_k,event_steps)) #### Production test section #### steps['ProdMinBias']=merge([{'cfg':'MinBias_8TeV_pythia8_TuneCUETP8M1_cff','--relval':'9000,300'},step1Defaults]) diff --git a/Configuration/PyReleaseValidation/scripts/das-up-to-nevents.py b/Configuration/PyReleaseValidation/scripts/das-up-to-nevents.py index 1af66830c13bf..8ad27812aefc1 100755 --- a/Configuration/PyReleaseValidation/scripts/das-up-to-nevents.py +++ b/Configuration/PyReleaseValidation/scripts/das-up-to-nevents.py @@ -11,6 +11,8 @@ import os import json import sys +import itertools +import json ## Helpers base_cert_url = "https://cms-service-dqmdc.web.cern.ch/CAF/certification/" @@ -27,6 +29,13 @@ def get_url_clean(url): return BeautifulSoup(buffer.getvalue(), "lxml").text +def get_lumi_ranges(i): + result = [] + for _, b in itertools.groupby(enumerate(i), lambda pair: pair[1] - pair[0]): + b = list(b) + result.append([b[0][1],b[-1][1]]) + return result + def das_do_command(cmd): out = subprocess.check_output(cmd, shell=True, executable="/bin/bash").decode('utf8') return out.split("\n") @@ -90,6 +99,7 @@ def no_intersection(): parser.add_argument('--pandas', '-pd',action='store_true',help="Store the whole dataset (no event or threshold cut) in a csv") parser.add_argument('--proxy','-p', help='Allow to parse a x509 proxy if needed', type=str, default=None) parser.add_argument('--site','-s', help='Only data at specific site', type=str, default=None) + parser.add_argument('--lumis','-l', help='Output file for lumi ranges for the selected files (if black no lumiranges calculated)', type=str, default=None) parser.add_argument('--precheck','-pc', action='store_true', help='Check run per run before building the dataframes, to avoid huge caching.') args = parser.parse_args() @@ -106,6 +116,7 @@ def no_intersection(): threshold = args.threshold outfile = args.outfile site = args.site + lumis = args.lumis ## get the greatest golden json year = dataset.split("Run")[1][2:4] # from 20XX to XX @@ -134,7 +145,7 @@ def no_intersection(): if web_fallback: cert_url = base_cert_url + cert_type + "/" json_list = get_url_clean(cert_url).split("\n") - json_list = [c for c in json_list if "Golden" in c and "era" not in c] + json_list = [c for c in json_list if "Golden" in c and "era" not in c and "Cert_C" in c] json_list = [[cc for cc in c.split(" ") if cc.startswith("Cert_C") and cc.endswith("json")][0] for c in json_list] # the larger the better, assuming file naming schema @@ -222,8 +233,14 @@ def no_intersection(): df = df[df["events"] <= events] #jump too big files df.loc[:,"sum_evs"] = df.loc[:,"events"].cumsum() df = df[df["sum_evs"] < events] - + files = df.file + + if lumis is not None: + lumi_ranges = { int(r) : list(get_lumi_ranges(np.sort(np.concatenate(df.loc[df["run"]==r,"lumis"].values).ravel()).tolist())) for r in np.unique(df.run.values)} + + with open(lumis, 'w') as fp: + json.dump(lumi_ranges, fp) if outfile is not None: with open(outfile, 'w') as f: @@ -234,4 +251,4 @@ def no_intersection(): sys.exit(0) - + \ No newline at end of file From 1be626079fa9c07dbced2467d95479ddfe2521c8 Mon Sep 17 00:00:00 2001 From: AdrianoDee Date: Thu, 7 Nov 2024 20:06:08 +0100 Subject: [PATCH 2/2] Naming fix for Parking; using lumis in golden wfs --- Configuration/PyReleaseValidation/python/MatrixUtil.py | 6 +++--- Configuration/PyReleaseValidation/python/WorkFlowRunner.py | 7 +++++++ .../PyReleaseValidation/python/relval_data_highstats.py | 4 ++-- .../PyReleaseValidation/python/relval_standard.py | 4 ++-- Configuration/PyReleaseValidation/python/relval_steps.py | 4 ++-- .../PyReleaseValidation/scripts/das-up-to-nevents.py | 2 +- 6 files changed, 17 insertions(+), 10 deletions(-) diff --git a/Configuration/PyReleaseValidation/python/MatrixUtil.py b/Configuration/PyReleaseValidation/python/MatrixUtil.py index 05f43391f1c96..8dcc10ce98fdc 100644 --- a/Configuration/PyReleaseValidation/python/MatrixUtil.py +++ b/Configuration/PyReleaseValidation/python/MatrixUtil.py @@ -134,10 +134,10 @@ def das(self, das_options, dataset): elif self.skimEvents: from os import getenv if getenv("JENKINS_PREFIX") is not None: - # to be assured that whatever happens the files are only those at CERN - command = "das-up-to-nevents.py -d %s -e %d -pc"%(dataset,self.events) + # to be sure that whatever happens the files are only those at CERN + command = "das-up-to-nevents.py -d %s -e %d -pc -l lumi_ranges.txt"%(dataset,self.events) else: - command = "das-up-to-nevents.py -d %s -e %d"%(dataset,self.events) + command = "das-up-to-nevents.py -d %s -e %d -l lumi_ranges.txt"%(dataset,self.events) # Run filter on DAS output if self.ib_blacklist: command += " | grep -E -v " diff --git a/Configuration/PyReleaseValidation/python/WorkFlowRunner.py b/Configuration/PyReleaseValidation/python/WorkFlowRunner.py index 14203705882e9..78a11ed4c09f3 100644 --- a/Configuration/PyReleaseValidation/python/WorkFlowRunner.py +++ b/Configuration/PyReleaseValidation/python/WorkFlowRunner.py @@ -137,7 +137,14 @@ def closeCmd(i,ID): isInputOk = False inFile = 'filelist:' + basename(dasOutputPath) + + if com.skimEvents: + lumiRangeFile='step%d_lumiRanges.log'%(istep,) + cmd2 = preamble + "mv lumi_ranges.txt " + lumiRangeFile + retStep = self.doCmd(cmd2) + print("---") + else: #chaining IO , which should be done in WF object already and not using stepX.root but .root cmd += com diff --git a/Configuration/PyReleaseValidation/python/relval_data_highstats.py b/Configuration/PyReleaseValidation/python/relval_data_highstats.py index 13d682b79767f..4054f4ea92862 100644 --- a/Configuration/PyReleaseValidation/python/relval_data_highstats.py +++ b/Configuration/PyReleaseValidation/python/relval_data_highstats.py @@ -23,7 +23,7 @@ wf_number = wf_number + offset_pd * p_n wf_number = wf_number + offset_events * evs wf_number = round(wf_number,6) - step_name = "Run" + pd + era.split("Run")[1] + "_" + e_key + step_name = "Run" + pd.replace("ParkingDouble","Park2") + era.split("Run")[1] + "_" + e_key workflows[wf_number] = ['',[step_name,'HLTDR3_2024','AODNANORUN3_reHLT_2024','HARVESTRUN3_2024']] ## 2023 @@ -38,7 +38,7 @@ wf_number = wf_number + offset_pd * p_n wf_number = wf_number + offset_events * evs wf_number = round(wf_number,6) - step_name = "Run" + pd + era.split("Run")[1] + "_" + e_key + step_name = "Run" + pd.replace("ParkingDouble","Park2") + era.split("Run")[1] + "_" + e_key workflows[wf_number] = ['',[step_name,'HLTDR3_2023','AODNANORUN3_reHLT_2023','HARVESTRUN3_2023']] diff --git a/Configuration/PyReleaseValidation/python/relval_standard.py b/Configuration/PyReleaseValidation/python/relval_standard.py index 770dd9a6f8701..4add8aceaba16 100644 --- a/Configuration/PyReleaseValidation/python/relval_standard.py +++ b/Configuration/PyReleaseValidation/python/relval_standard.py @@ -579,7 +579,7 @@ wf_number = wf_number + offset_pd * p_n wf_number = wf_number + 0.0001 * 0.01 wf_number = round(wf_number,6) - step_name = "Run" + pd + era.split("Run")[1] + "_10k" + step_name = "Run" + pd.replace("ParkingDouble","Park2") + era.split("Run")[1] + "_10k" workflows[wf_number] = ['',[step_name,'HLTDR3_2024','AODNANORUN3_reHLT_2024','HARVESTRUN3_2024']] # 2023 @@ -591,7 +591,7 @@ wf_number = wf_number + offset_pd * p_n wf_number = wf_number + 0.0001 * 0.01 wf_number = round(wf_number,6) - step_name = "Run" + pd + era.split("Run")[1] + "_10k" + step_name = "Run" + pd.replace("ParkingDouble","Park2") + era.split("Run")[1] + "_10k" workflows[wf_number] = ['',[step_name,'HLTDR3_2023','AODNANORUN3_reHLT_2023','HARVESTRUN3_2023']] # 2022 diff --git a/Configuration/PyReleaseValidation/python/relval_steps.py b/Configuration/PyReleaseValidation/python/relval_steps.py index 648644e46e927..8be8397586abd 100644 --- a/Configuration/PyReleaseValidation/python/relval_steps.py +++ b/Configuration/PyReleaseValidation/python/relval_steps.py @@ -653,7 +653,7 @@ for pd in pds_2024: dataset = "/" + pd + "/" + era + "-v1/RAW" for e_key,evs in event_steps_dict.items(): - step_name = "Run" + pd + era.split("Run")[1] + "_" + e_key + step_name = "Run" + pd.replace("ParkingDouble","Park2") + era.split("Run")[1] + "_" + e_key steps[step_name] = {'INPUT':InputInfo(dataSet=dataset,label=era.split("Run")[1],events=int(evs*1e6), skimEvents=True, location='STD')} ###2023 @@ -665,7 +665,7 @@ for pd in pds_2023: dataset = "/" + pd + "/" + era + "-v1/RAW" for e_key,evs in event_steps_dict.items(): - step_name = "Run" + pd + era.split("Run")[1] + "_" + e_key + step_name = "Run" + pd.replace("ParkingDouble","Park2") + era.split("Run")[1] + "_" + e_key steps[step_name] = {'INPUT':InputInfo(dataSet=dataset,label=era.split("Run")[1],events=int(evs*1e6), skimEvents=True, location='STD')} ###2022 diff --git a/Configuration/PyReleaseValidation/scripts/das-up-to-nevents.py b/Configuration/PyReleaseValidation/scripts/das-up-to-nevents.py index 8ad27812aefc1..e4e05305d623f 100755 --- a/Configuration/PyReleaseValidation/scripts/das-up-to-nevents.py +++ b/Configuration/PyReleaseValidation/scripts/das-up-to-nevents.py @@ -237,7 +237,7 @@ def no_intersection(): files = df.file if lumis is not None: - lumi_ranges = { int(r) : list(get_lumi_ranges(np.sort(np.concatenate(df.loc[df["run"]==r,"lumis"].values).ravel()).tolist())) for r in np.unique(df.run.values)} + lumi_ranges = { int(r) : list(get_lumi_ranges(np.sort(np.concatenate(df.loc[df["run"]==r,"lumis"].values).ravel()).tolist())) for r in np.unique(df.run.values).tolist()} with open(lumis, 'w') as fp: json.dump(lumi_ranges, fp)