Skip to content

Commit 5acf6b6

Browse files
authored
Merge pull request #834 from cms-analysis/anigamova-uniform_pdf_for_flatParam
uniform pdf for flat param
2 parents 617a7f9 + 3976c43 commit 5acf6b6

File tree

6 files changed

+87
-16
lines changed

6 files changed

+87
-16
lines changed

docs/part3/commonstatsmethods.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,9 @@ The choice of test-statistic can be made via the option `--testStat` and differe
346346
347347
While the above shortcuts are the common variants, you can also try others. The treatment of the nuisances can be changed to the so-called "Hybrid-Bayesian" method which effectively integrates over the nuisance parameters. This is especially relevant when you have very few expected events in your data and you are using those events to constrain background processes. This can be achieved by setting `--generateNuisances=1 --generateExternalMeasurements=0`. You might also want to avoid first fitting to the data to choose the nominal values in this case, which can be done by also setting `--fitNuisances=0`.
348348
349+
!!! warning
350+
If you have unconstrained parameters in your model (`rateParam` or if using a `_norm` variable for a pdf) and you want to use the "Hybrid-Bayesian" method, you **must** declare these as `flatParam` in your datacard and when running text2workspace you must add the option `--X-assign-flatParam-prior` in the command line. This will create uniform priors for these parameters, which is needed for this method and which otherwise would not get created.
351+
349352
!!! info
350353
Note that (observed and toy) values of the test statistic stored in the instances of `RooStats::HypoTestResult` when the option `--saveHybridResult` has been specified, are defined without the factor 2 and therefore are twice as small as the values given by the formulas above. This factor is however included automatically by all plotting script supplied within the Combine package.
351354

docs/part3/runningthetool.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ You can turn off the internal logic by setting `--X-rtd TMCSO_AdaptivePseudoAsim
186186

187187
#### Nuisance parameter generation
188188

189-
The default method of dealing with systematics is to generate random values (around their nominal values, see above) for the nuisance parameters, according to their prior pdfs centred around their default values, *before* generating the data. The *unconstrained* nuisance parameters (eg `flatParam` or `rateParam`) or those with *flat* priors are **not** randomised before the data generation.
189+
The default method of dealing with systematics is to generate random values (around their nominal values, see above) for the nuisance parameters, according to their prior pdfs centred around their default values, *before* generating the data. The *unconstrained* nuisance parameters (eg `flatParam` or `rateParam`) or those with *flat* priors are **not** randomised before the data generation. If you wish to also randomise these parameters, you **must** declare these as `flatParam` in your datacard and when running text2workspace you must add the option `--X-assign-flatParam-prior` in the command line.
190190

191191
The following are options which define how the toys will be generated,
192192

python/Datacard.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,12 @@ def __init__(self):
6363
self.groups = {}
6464
self.discretes = []
6565

66+
# list of parameters called _norm in user input workspace
67+
self.pdfnorms = {}
68+
69+
# collection of nuisances to auto-produce flat priors for
70+
self.toCreateFlatParam = {}
71+
6672
def print_structure(self):
6773
"""
6874
Print the contents of the -> should allow for direct text2workspace on python config
@@ -140,6 +146,7 @@ def print_structure(self):
140146
print("DC.binParFlags = ", self.binParFlags, "#", type(self.binParFlags))
141147
print("DC.groups = ", self.groups, "#", type(self.groups))
142148
print("DC.discretes = ", self.discretes, "#", type(self.discretes))
149+
print("DC.pdfnorms = ", self.pdfnorms, "#", type(self.pdfnorms))
143150

144151
print(
145152
"""

python/DatacardParser.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,13 @@ def addDatacardParserOptions(parser):
279279
default=None,
280280
help="Simplify MH dependent objects: 'fixed', 'pol<N>' with N=0..4",
281281
)
282+
parser.add_option(
283+
"--X-assign-flatParam-prior",
284+
dest="flatParamPrior",
285+
default=False,
286+
action="store_true",
287+
help="Assign RooUniform pdf for flatParam NPs",
288+
)
282289

283290

284291
def strip(l):
@@ -359,6 +366,9 @@ def parseCard(file, options):
359366
if not hasattr(options, "evaluateEdits"):
360367
setattr(options, "evaluateEdits", True)
361368

369+
if not hasattr(options, "flatParamPrior"):
370+
setattr(options, "flatParamPrior", False)
371+
362372
try:
363373
for lineNumber, l in enumerate(file):
364374
f = l.split()
@@ -522,6 +532,9 @@ def parseCard(file, options):
522532
elif pdf == "flatParam":
523533
ret.flatParamNuisances[lsyst] = True
524534
# for flat parametric uncertainties, code already does the right thing as long as they are non-constant RooRealVars linked to the model
535+
if options.flatParamPrior:
536+
ret.systs.append([lsyst, nofloat, pdf, args, []])
537+
ret.add_syst_id(lsyst)
525538
continue
526539
elif pdf == "extArg":
527540
# look for additional parameters in workspaces
@@ -668,7 +681,7 @@ def parseCard(file, options):
668681
syst2 = []
669682
for lsyst, nofloat, pdf, args, errline in ret.systs:
670683
nonNullEntries = 0
671-
if pdf == "param" or pdf == "constr" or pdf == "discrete" or pdf == "rateParam": # this doesn't have an errline
684+
if pdf == "param" or pdf == "constr" or pdf == "discrete" or pdf == "rateParam" or pdf == "flatParam": # this doesn't have an errline
672685
syst2.append((lsyst, nofloat, pdf, args, errline))
673686
continue
674687
for b, p, s in ret.keyline:

python/ModelTools.py

Lines changed: 60 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,14 @@ def __init__(self, datacard, options):
154154
self.selfNormBins = []
155155
self.extraNuisances = []
156156
self.extraGlobalObservables = []
157+
self.globalobs = []
158+
159+
def getSafeNormName(self, n):
160+
# need to be careful in case user has _norm name and wants to auto-create flatPrior
161+
if self.options.flatParamPrior:
162+
if n in self.DC.pdfnorms.keys():
163+
return self.DC.pdfnorms[n]
164+
return n
157165

158166
def setPhysics(self, physicsModel):
159167
self.physics = physicsModel
@@ -174,6 +182,8 @@ def doModel(self, justCheckPhysicsModel=False):
174182
self.doNuisances()
175183
self.doExtArgs()
176184
self.doRateParams()
185+
self.doAutoFlatNuisancePriors()
186+
self.doFillNuisPdfsAndSets()
177187
self.doExpectedEvents()
178188
if justCheckPhysicsModel:
179189
self.physics.done()
@@ -316,6 +326,12 @@ def doRateParams(self):
316326
v = float(argv)
317327
removeRange = len(param_range) == 0
318328
if param_range == "":
329+
if self.options.flatParamPrior:
330+
raise ValueError(
331+
"Cannot create flat Prior for rateParam nuisance parameter '"
332+
+ argu
333+
+ "' without specifying a range [a,b]. Please fix in the datacard"
334+
)
319335
## check range. The parameter needs to be created in range. Then we will remove it
320336
param_range = "%g,%g" % (-2.0 * abs(v), 2.0 * abs(v))
321337
# additional check for range requested
@@ -382,7 +398,7 @@ def doNuisances(self):
382398
if len(self.DC.systs) == 0:
383399
return
384400
self.doComment(" ----- nuisances -----")
385-
globalobs = []
401+
# globalobs = []
386402

387403
for n, nofloat, pdf, args, errline in self.DC.systs:
388404
is_func_scaled = False
@@ -433,7 +449,7 @@ def doNuisances(self):
433449
# Use existing constraint since it could be a param
434450
self.out.var(n).setVal(0)
435451
self.out.var(n).setError(1)
436-
globalobs.append("%s_In" % n)
452+
self.globalobs.append("%s_In" % n)
437453
if self.options.bin:
438454
self.out.var("%s_In" % n).setConstant(True)
439455
if self.options.optimizeBoundNuisances and not is_func_scaled:
@@ -466,7 +482,7 @@ def doNuisances(self):
466482
theta,
467483
),
468484
)
469-
globalobs.append("%s_In" % n)
485+
self.globalobs.append("%s_In" % n)
470486
if self.options.bin:
471487
self.out.var("%s_In" % n).setConstant(True)
472488
elif pdf == "gmN":
@@ -501,7 +517,7 @@ def doNuisances(self):
501517
"Poisson",
502518
"%s_In[%d,%f,%f], %s[%f,%f,%f], 1" % (n, args[0], minObs, maxObs, n, args[0] + 1, minExp, maxExp),
503519
)
504-
globalobs.append("%s_In" % n)
520+
self.globalobs.append("%s_In" % n)
505521
if self.options.bin:
506522
self.out.var("%s_In" % n).setConstant(True)
507523
elif pdf == "trG":
@@ -515,13 +531,21 @@ def doNuisances(self):
515531
trG_max = -1.0 / v
516532
r = "%f,%f" % (trG_min, trG_max)
517533
self.doObj("%s_Pdf" % n, "Gaussian", "%s[0,%s], %s_In[0,%s], 1" % (n, r, n, r))
518-
globalobs.append("%s_In" % n)
534+
self.globalobs.append("%s_In" % n)
519535
if self.options.bin:
520536
self.out.var("%s_In" % n).setConstant(True)
521537
elif pdf == "lnU" or pdf == "shapeU":
522538
self.doObj("%s_Pdf" % n, "Uniform", "%s[-1,1]" % n)
523539
elif pdf == "unif":
524540
self.doObj("%s_Pdf" % n, "Uniform", "%s[%f,%f]" % (n, args[0], args[1]))
541+
elif pdf == "flatParam" and self.options.flatParamPrior:
542+
c_param_name = self.getSafeNormName(n)
543+
if self.out.var(c_param_name):
544+
v, x1, x2 = self.out.var(c_param_name).getVal(), self.out.var(c_param_name).getMin(), self.out.var(c_param_name).getMax()
545+
self.DC.toCreateFlatParam[c_param_name] = [v, x1, x2]
546+
else:
547+
self.DC.toCreateFlatParam[c_param_name] = []
548+
525549
elif pdf == "dFD" or pdf == "dFD2":
526550
dFD_min = -(1 + 8 / args[0])
527551
dFD_max = +(1 + 8 / args[0])
@@ -546,7 +570,7 @@ def doNuisances(self):
546570
ROOFIT_EXPR_PDF,
547571
"'1/(2*(1+exp(%f*(@0-1)))*(1+exp(-%f*(@0+1))))', %s[0,%s], %s_In[0,%s]" % (args[0], args[0], n, r, n, r),
548572
)
549-
globalobs.append("%s_In" % n)
573+
self.globalobs.append("%s_In" % n)
550574
if self.options.bin:
551575
self.out.var("%s_In" % n).setConstant(True)
552576
elif pdf == "constr":
@@ -760,7 +784,7 @@ def doNuisances(self):
760784
self.out.function("%s_BoundLo" % n),
761785
self.out.function("%s_BoundHi" % n),
762786
)
763-
globalobs.append("%s_In" % n)
787+
self.globalobs.append("%s_In" % n)
764788
# if self.options.optimizeBoundNuisances: self.out.var(n).setAttribute("optimizeBounds")
765789
elif pdf == "extArg":
766790
continue
@@ -772,15 +796,18 @@ def doNuisances(self):
772796
# self.out.var(n).Print('V')
773797
if n in self.DC.frozenNuisances:
774798
self.out.var(n).setConstant(True)
799+
800+
def doFillNuisPdfsAndSets(self):
775801
if self.options.bin:
776802
# avoid duplicating _Pdf in list
777803
setNuisPdf = []
778804
nuisPdfs = ROOT.RooArgList()
779805
nuisVars = ROOT.RooArgSet()
780806
for n, nf, p, a, e in self.DC.systs:
807+
c_param_name = self.getSafeNormName(n)
781808
if p != "constr":
782-
nuisVars.add(self.out.var(n))
783-
setNuisPdf.append(n)
809+
nuisVars.add(self.out.var(c_param_name))
810+
setNuisPdf.append(c_param_name)
784811
setNuisPdf = set(setNuisPdf)
785812
for n in setNuisPdf:
786813
nuisPdfs.add(self.out.pdf(n + "_Pdf"))
@@ -789,15 +816,33 @@ def doNuisances(self):
789816
self.out.safe_import(self.out.nuisPdf)
790817
self.out.nuisPdfs = nuisPdfs
791818
gobsVars = ROOT.RooArgSet()
792-
for g in globalobs:
819+
for g in self.globalobs:
793820
gobsVars.add(self.out.var(g))
794821
self.out.defineSet("globalObservables", gobsVars)
795822
else: # doesn't work for too many nuisances :-(
796823
# avoid duplicating _Pdf in list
797-
setNuisPdf = set([n for (n, nf, p, a, e) in self.DC.systs])
798-
self.doSet("nuisances", ",".join(["%s" % n for (n, nf, p, a, e) in self.DC.systs]))
824+
setNuisPdf = set([self.getSafeNormName(n) for (n, nf, p, a, e) in self.DC.systs])
825+
self.doSet("nuisances", ",".join(["%s" % self.getSafeNormName(n) for (n, nf, p, a, e) in self.DC.systs]))
799826
self.doObj("nuisancePdf", "PROD", ",".join(["%s_Pdf" % n for n in setNuisPdf]))
800-
self.doSet("globalObservables", ",".join(globalobs))
827+
self.doSet("globalObservables", ",".join(self.globalobs))
828+
829+
def doAutoFlatNuisancePriors(self):
830+
if len(self.DC.toCreateFlatParam.keys()) > 0:
831+
for flatNP in self.DC.toCreateFlatParam.items():
832+
c_param_name = flatNP[0]
833+
c_param_details = flatNP[1]
834+
if len(c_param_details):
835+
v, x1, x2 = c_param_details
836+
else:
837+
v, x1, x2 = self.out.var(c_param_name).getVal(), self.out.var(c_param_name).getMin(), self.out.var(c_param_name).getMax()
838+
if self.options.verbose > 2:
839+
print("Will create flat prior for parameter ", c_param_name, " with range [", x1, x2, "]")
840+
self.doExp(
841+
"%s_diff_expr" % c_param_name, "%s-%s_In" % (c_param_name, c_param_name), "%s,%s_In[%g,%g,%g]" % (c_param_name, c_param_name, v, x1, x2)
842+
)
843+
self.doObj("%s_Pdf" % c_param_name, "Uniform", "%s_diff_expr" % c_param_name)
844+
self.out.var("%s_In" % c_param_name).setConstant(True)
845+
self.globalobs.append("%s_In" % c_param_name)
801846

802847
def doNuisancesGroups(self):
803848
# Prepare a dictionary of which group a certain nuisance belongs to
@@ -878,7 +923,7 @@ def doExpectedEvents(self):
878923
continue
879924
if pdf == "constr":
880925
continue
881-
if pdf == "rateParam":
926+
if pdf == "rateParam" or pdf == "flatParam":
882927
continue
883928
if p not in errline[b]:
884929
continue
@@ -899,6 +944,7 @@ def doExpectedEvents(self):
899944
logNorms.append((errline[b][p], n))
900945
elif pdf == "gmM":
901946
factors.append(n)
947+
# elif pdf == "trG" or pdf == "unif" or pdf == "flatParam" or pdf == "dFD" or pdf == "dFD2":
902948
elif pdf == "trG" or pdf == "unif" or pdf == "dFD" or pdf == "dFD2":
903949
myname = "n_exp_shift_bin%s_proc_%s_%s" % (b, p, n)
904950
self.doObj(myname, ROOFIT_EXPR, "'1+%f*@0', %s" % (errline[b][p], n))

python/ShapeTools.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -615,6 +615,8 @@ def prepareAllShapes(self):
615615
raise RuntimeError("There's more than once choice of observables: %s\n" % str(list(shapeObs.keys())))
616616
self.out.binVars = list(shapeObs.values())[0]
617617
self.out.safe_import(self.out.binVars)
618+
# keep hold of pdf_norm renaming
619+
self.DC.pdfnorms = self.norm_rename_map.copy()
618620

619621
def doCombinedDataset(self):
620622
if len(self.DC.bins) == 1 and self.options.forceNonSimPdf:

0 commit comments

Comments
 (0)