From 710330f64a2b02f0ce2c33ac424b8412c5c4c70b Mon Sep 17 00:00:00 2001
From: AdrianoDee <adriano.di.florio@cern.ch>
Date: Thu, 7 Nov 2024 15:49:23 +0100
Subject: [PATCH 1/2] Add lumisection output options, 100k wfs

---
 .../python/relval_steps.py                    |  4 ++--
 .../scripts/das-up-to-nevents.py              | 23 ++++++++++++++++---
 2 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/Configuration/PyReleaseValidation/python/relval_steps.py b/Configuration/PyReleaseValidation/python/relval_steps.py
index 260dfec22b5ac..648644e46e927 100644
--- a/Configuration/PyReleaseValidation/python/relval_steps.py
+++ b/Configuration/PyReleaseValidation/python/relval_steps.py
@@ -45,8 +45,8 @@
 steps = Steps()
 
 #### Event to runs
-event_steps = [0.01,0.05,0.15,0.25,0.5,1] #in millions
-event_steps_k = ["10k","50k","150k","250k","500k","1M"]
+event_steps = [0.01,0.05,0.1,0.15,0.25,0.5,1] #in millions
+event_steps_k = ["10k","50k","100k","150k","250k","500k","1M"] ##TODO add an helper to convert the numbers to strings
 event_steps_dict = dict(zip(event_steps_k,event_steps))
 #### Production test section ####
 steps['ProdMinBias']=merge([{'cfg':'MinBias_8TeV_pythia8_TuneCUETP8M1_cff','--relval':'9000,300'},step1Defaults])
diff --git a/Configuration/PyReleaseValidation/scripts/das-up-to-nevents.py b/Configuration/PyReleaseValidation/scripts/das-up-to-nevents.py
index 1af66830c13bf..8ad27812aefc1 100755
--- a/Configuration/PyReleaseValidation/scripts/das-up-to-nevents.py
+++ b/Configuration/PyReleaseValidation/scripts/das-up-to-nevents.py
@@ -11,6 +11,8 @@
 import os
 import json
 import sys
+import itertools
+import json
 
 ## Helpers
 base_cert_url = "https://cms-service-dqmdc.web.cern.ch/CAF/certification/"
@@ -27,6 +29,13 @@ def get_url_clean(url):
     
     return BeautifulSoup(buffer.getvalue(), "lxml").text
 
+def get_lumi_ranges(i):
+    result = []
+    for _, b in itertools.groupby(enumerate(i), lambda pair: pair[1] - pair[0]):
+        b = list(b)
+        result.append([b[0][1],b[-1][1]]) 
+    return result
+
 def das_do_command(cmd):
     out = subprocess.check_output(cmd, shell=True, executable="/bin/bash").decode('utf8')
     return out.split("\n")
@@ -90,6 +99,7 @@ def no_intersection():
     parser.add_argument('--pandas', '-pd',action='store_true',help="Store the whole dataset (no event or threshold cut) in a csv") 
     parser.add_argument('--proxy','-p', help='Allow to parse a x509 proxy if needed', type=str, default=None)
     parser.add_argument('--site','-s', help='Only data at specific site', type=str, default=None)
+    parser.add_argument('--lumis','-l', help='Output file for lumi ranges for the selected files (if black no lumiranges calculated)', type=str, default=None)
     parser.add_argument('--precheck','-pc', action='store_true', help='Check run per run before building the dataframes, to avoid huge caching.')
     args = parser.parse_args()
 
@@ -106,6 +116,7 @@ def no_intersection():
     threshold = args.threshold
     outfile   = args.outfile
     site      = args.site
+    lumis     = args.lumis
 
     ## get the greatest golden json
     year = dataset.split("Run")[1][2:4] # from 20XX to XX
@@ -134,7 +145,7 @@ def no_intersection():
     if web_fallback:
         cert_url = base_cert_url + cert_type + "/"
         json_list = get_url_clean(cert_url).split("\n")
-        json_list = [c for c in json_list if "Golden" in c and "era" not in c]
+        json_list = [c for c in json_list if "Golden" in c and "era" not in c and "Cert_C" in c]
         json_list = [[cc for cc in c.split(" ") if cc.startswith("Cert_C") and cc.endswith("json")][0] for c in json_list]
 
     # the larger the better, assuming file naming schema 
@@ -222,8 +233,14 @@ def no_intersection():
         df = df[df["events"] <= events] #jump too big files
         df.loc[:,"sum_evs"] = df.loc[:,"events"].cumsum()
         df = df[df["sum_evs"] < events]
-            
+        
     files = df.file
+    
+    if lumis is not None:
+        lumi_ranges = { int(r) : list(get_lumi_ranges(np.sort(np.concatenate(df.loc[df["run"]==r,"lumis"].values).ravel()).tolist())) for r in np.unique(df.run.values)}
+        
+        with open(lumis, 'w') as fp:
+            json.dump(lumi_ranges, fp)
 
     if outfile is not None:
         with open(outfile, 'w') as f:
@@ -234,4 +251,4 @@ def no_intersection():
 
     sys.exit(0)
 
-    
+    
\ No newline at end of file

From 1be626079fa9c07dbced2467d95479ddfe2521c8 Mon Sep 17 00:00:00 2001
From: AdrianoDee <adriano.di.florio@cern.ch>
Date: Thu, 7 Nov 2024 20:06:08 +0100
Subject: [PATCH 2/2] Naming fix for Parking; using lumis in golden wfs

---
 Configuration/PyReleaseValidation/python/MatrixUtil.py     | 6 +++---
 Configuration/PyReleaseValidation/python/WorkFlowRunner.py | 7 +++++++
 .../PyReleaseValidation/python/relval_data_highstats.py    | 4 ++--
 .../PyReleaseValidation/python/relval_standard.py          | 4 ++--
 Configuration/PyReleaseValidation/python/relval_steps.py   | 4 ++--
 .../PyReleaseValidation/scripts/das-up-to-nevents.py       | 2 +-
 6 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/Configuration/PyReleaseValidation/python/MatrixUtil.py b/Configuration/PyReleaseValidation/python/MatrixUtil.py
index 05f43391f1c96..8dcc10ce98fdc 100644
--- a/Configuration/PyReleaseValidation/python/MatrixUtil.py
+++ b/Configuration/PyReleaseValidation/python/MatrixUtil.py
@@ -134,10 +134,10 @@ def das(self, das_options, dataset):
         elif self.skimEvents:
             from os import getenv
             if getenv("JENKINS_PREFIX") is not None:
-                # to be assured that whatever happens the files are only those at CERN
-                command = "das-up-to-nevents.py -d %s -e %d -pc"%(dataset,self.events)
+                # to be sure that whatever happens the files are only those at CERN
+                command = "das-up-to-nevents.py -d %s -e %d -pc -l lumi_ranges.txt"%(dataset,self.events)
             else:
-                command = "das-up-to-nevents.py -d %s -e %d"%(dataset,self.events)
+                command = "das-up-to-nevents.py -d %s -e %d -l lumi_ranges.txt"%(dataset,self.events)
         # Run filter on DAS output 
         if self.ib_blacklist:
             command += " | grep -E -v "
diff --git a/Configuration/PyReleaseValidation/python/WorkFlowRunner.py b/Configuration/PyReleaseValidation/python/WorkFlowRunner.py
index 14203705882e9..78a11ed4c09f3 100644
--- a/Configuration/PyReleaseValidation/python/WorkFlowRunner.py
+++ b/Configuration/PyReleaseValidation/python/WorkFlowRunner.py
@@ -137,7 +137,14 @@ def closeCmd(i,ID):
                         isInputOk = False
                  
                 inFile = 'filelist:' + basename(dasOutputPath)
+
+                if com.skimEvents:
+                    lumiRangeFile='step%d_lumiRanges.log'%(istep,)
+                    cmd2 = preamble + "mv lumi_ranges.txt " + lumiRangeFile
+                    retStep = self.doCmd(cmd2)
+
                 print("---")
+
             else:
                 #chaining IO , which should be done in WF object already and not using stepX.root but <stepName>.root
                 cmd += com
diff --git a/Configuration/PyReleaseValidation/python/relval_data_highstats.py b/Configuration/PyReleaseValidation/python/relval_data_highstats.py
index 13d682b79767f..4054f4ea92862 100644
--- a/Configuration/PyReleaseValidation/python/relval_data_highstats.py
+++ b/Configuration/PyReleaseValidation/python/relval_data_highstats.py
@@ -23,7 +23,7 @@
             wf_number = wf_number + offset_pd * p_n
             wf_number = wf_number + offset_events * evs 
             wf_number = round(wf_number,6)
-            step_name = "Run" + pd + era.split("Run")[1] + "_" + e_key
+            step_name = "Run" + pd.replace("ParkingDouble","Park2") + era.split("Run")[1] + "_" + e_key
             workflows[wf_number] = ['',[step_name,'HLTDR3_2024','AODNANORUN3_reHLT_2024','HARVESTRUN3_2024']]
 
 ## 2023
@@ -38,7 +38,7 @@
             wf_number = wf_number + offset_pd * p_n
             wf_number = wf_number + offset_events * evs 
             wf_number = round(wf_number,6)
-            step_name = "Run" + pd + era.split("Run")[1] + "_" + e_key
+            step_name = "Run" + pd.replace("ParkingDouble","Park2") + era.split("Run")[1] + "_" + e_key
             workflows[wf_number] = ['',[step_name,'HLTDR3_2023','AODNANORUN3_reHLT_2023','HARVESTRUN3_2023']]
 
 
diff --git a/Configuration/PyReleaseValidation/python/relval_standard.py b/Configuration/PyReleaseValidation/python/relval_standard.py
index 770dd9a6f8701..4add8aceaba16 100644
--- a/Configuration/PyReleaseValidation/python/relval_standard.py
+++ b/Configuration/PyReleaseValidation/python/relval_standard.py
@@ -579,7 +579,7 @@
         wf_number = wf_number + offset_pd * p_n
         wf_number = wf_number + 0.0001 * 0.01 
         wf_number = round(wf_number,6)
-        step_name = "Run" + pd + era.split("Run")[1] + "_10k"
+        step_name = "Run" + pd.replace("ParkingDouble","Park2") + era.split("Run")[1] + "_10k"
         workflows[wf_number] = ['',[step_name,'HLTDR3_2024','AODNANORUN3_reHLT_2024','HARVESTRUN3_2024']]
 
 # 2023
@@ -591,7 +591,7 @@
         wf_number = wf_number + offset_pd * p_n
         wf_number = wf_number + 0.0001 * 0.01
         wf_number = round(wf_number,6)
-        step_name = "Run" + pd + era.split("Run")[1] + "_10k"
+        step_name = "Run" + pd.replace("ParkingDouble","Park2") + era.split("Run")[1] + "_10k"
         workflows[wf_number] = ['',[step_name,'HLTDR3_2023','AODNANORUN3_reHLT_2023','HARVESTRUN3_2023']]
 
 # 2022
diff --git a/Configuration/PyReleaseValidation/python/relval_steps.py b/Configuration/PyReleaseValidation/python/relval_steps.py
index 648644e46e927..8be8397586abd 100644
--- a/Configuration/PyReleaseValidation/python/relval_steps.py
+++ b/Configuration/PyReleaseValidation/python/relval_steps.py
@@ -653,7 +653,7 @@
     for pd in pds_2024:
         dataset = "/" + pd + "/" + era + "-v1/RAW"
         for e_key,evs in event_steps_dict.items():
-            step_name = "Run" + pd + era.split("Run")[1] + "_" + e_key
+            step_name = "Run" + pd.replace("ParkingDouble","Park2") + era.split("Run")[1] + "_" + e_key
             steps[step_name] = {'INPUT':InputInfo(dataSet=dataset,label=era.split("Run")[1],events=int(evs*1e6), skimEvents=True, location='STD')}
 
 ###2023 
@@ -665,7 +665,7 @@
     for pd in pds_2023:
         dataset = "/" + pd + "/" + era + "-v1/RAW"
         for e_key,evs in event_steps_dict.items():
-            step_name = "Run" + pd + era.split("Run")[1] + "_" + e_key
+            step_name = "Run" + pd.replace("ParkingDouble","Park2") + era.split("Run")[1] + "_" + e_key
             steps[step_name] = {'INPUT':InputInfo(dataSet=dataset,label=era.split("Run")[1],events=int(evs*1e6), skimEvents=True, location='STD')}
 
 ###2022 
diff --git a/Configuration/PyReleaseValidation/scripts/das-up-to-nevents.py b/Configuration/PyReleaseValidation/scripts/das-up-to-nevents.py
index 8ad27812aefc1..e4e05305d623f 100755
--- a/Configuration/PyReleaseValidation/scripts/das-up-to-nevents.py
+++ b/Configuration/PyReleaseValidation/scripts/das-up-to-nevents.py
@@ -237,7 +237,7 @@ def no_intersection():
     files = df.file
     
     if lumis is not None:
-        lumi_ranges = { int(r) : list(get_lumi_ranges(np.sort(np.concatenate(df.loc[df["run"]==r,"lumis"].values).ravel()).tolist())) for r in np.unique(df.run.values)}
+        lumi_ranges = { int(r) : list(get_lumi_ranges(np.sort(np.concatenate(df.loc[df["run"]==r,"lumis"].values).ravel()).tolist())) for r in np.unique(df.run.values).tolist()}
         
         with open(lumis, 'w') as fp:
             json.dump(lumi_ranges, fp)