Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 55 additions & 34 deletions rhalphalib/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,60 +353,81 @@ def renderCard(self, outputFilename, workspaceName):
if effect != "-":
fout.write(effect + "\n")

def autoMCStats(self, epsilon=0, threshold=0, include_signal=0, channel_name=None):
def autoMCStats(channel, epsilon=1e-5, effect_threshold=0.01, threshold=0, include_signal=0, channel_name=None):
"""
Barlow-Beeston-lite method i.e. single stats parameter for all processes per bin.
Same general algorithm as described in
https://cms-analysis.github.io/HiggsAnalysis-CombinedLimit/part2/bin-wise-stats/
but *without the analytic minimisation*.
`include_signal` only refers to whether signal stats are included in the *decision* to use bb-lite or not.
"""
if not len(self._samples):
raise RuntimeError("Channel %r has no samples for which to run autoMCStats" % (self))

name = self._name if channel_name is None else channel_name

first_sample = self._samples[list(self._samples.keys())[0]]

for i in range(first_sample.observable.nbins):
ntot_bb, etot2_bb = 0, 0 # for the decision to use bblite or not
ntot, etot2 = 0, 0 # for the bblite uncertainty

# check if neff = ntot^2 / etot2 > threshold
for sample in self._samples.values():
ntot += sample._nominal[i]
etot2 += sample._sumw2[i]

if not len(channel._samples):
raise RuntimeError("Channel %r has no samples for which to run autoMCStats" % (channel))

name = channel._name if channel_name is None else channel_name

first_sample = channel._samples[list(channel._samples.keys())[0]]

ntot_bb, etot2_bb = np.zeros_like(first_sample._nominal), np.zeros_like(first_sample._sumw2)
ntot, etot2 = np.zeros_like(first_sample._nominal), np.zeros_like(first_sample._sumw2)

for sample in channel._samples.values():
if isinstance(sample, TransferFactorSample):
ntot += sample._nominal_values
etot2 += (sample._stat_unc*sample._nominal_values)**2
elif isinstance(sample, TemplateSample):
ntot += sample._nominal
etot2 += sample._sumw2
if not include_signal and sample._sampletype == Sample.SIGNAL:
continue

ntot_bb += sample._nominal[i]
etot2_bb += sample._sumw2[i]

if etot2 <= 0.0:
ntot_bb += sample._nominal
etot2_bb += sample._sumw2
else:
continue
elif etot2_bb <= 0:

for sample in channel._samples.values():
if not isinstance(sample, TransferFactorSample):
continue
channel._samples[sample.name] = TransferFactorSample(sample.name,
Sample.BACKGROUND,
sample.transferfactor,
sample.dependentsample,
nominal_values=sample.nominal_values,
stat_unc=np.sqrt(etot2)/ntot,
channel_name=name)

for i in range(first_sample.observable.nbins):
if etot2[i] <= 0.0:
continue
elif etot2_bb[i] <= 0:
# this means there is signal but no background, so create stats unc. for signal only
for sample in self._samples.values():
for sample in channel._samples.values():
if sample._sampletype == Sample.SIGNAL:
sample_name = None if channel_name is None else channel_name + "_" + sample._name[sample._name.find("_") + 1 :]
sample.autoMCStats(epsilon=epsilon, sample_name=sample_name, bini=i)

continue

neff_bb = ntot_bb**2 / etot2_bb
neff_bb = ntot_bb[i]**2 / etot2_bb[i]
if neff_bb <= threshold:
for sample in self._samples.values():
for sample in channel._samples.values():
sample_name = None if channel_name is None else channel_name + "_" + sample._name[sample._name.find("_") + 1 :]
sample.autoMCStats(epsilon=epsilon, sample_name=sample_name, bini=i)
else:
effect_up = np.ones_like(first_sample._nominal)
effect_down = np.ones_like(first_sample._nominal)

effect_up[i] = (ntot + np.sqrt(etot2)) / ntot
effect_down[i] = max((ntot - np.sqrt(etot2)) / ntot, epsilon)


effect = np.sqrt(etot2[i])/ntot[i]
if effect < effect_threshold:
continue
effect_up[i] = 1.0 + min(1.0, effect)
effect_down[i] = max(epsilon, 1.0 - min(1.0, effect))

param = NuisanceParameter(name + "_mcstat_bin%i" % i, combinePrior="shape")

for sample in self._samples.values():

for sample in channel._samples.values():
if not isinstance(sample, TemplateSample):
continue
if sample._nominal[i] <= 1e-5:
continue
sample.setParamEffect(param, effect_up, effect_down)
54 changes: 47 additions & 7 deletions rhalphalib/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ def setParamEffect(self, param, effect_up, effect_down=None, scale=None):
"""
if not isinstance(param, NuisanceParameter):
if isinstance(param, IndependentParameter) and isinstance(effect_up, DependentParameter):
extras = effect_up.getDependents() - {param}
extras = effect_up.getDependents(deep=True) - {param}
if not all(isinstance(p, IndependentParameter) for p in extras):
raise ValueError("Normalization effects can only depend on one or more IndependentParameters")
self._extra_dependencies.update(extras)
Expand Down Expand Up @@ -322,7 +322,8 @@ def getExpectation(self, nominal=False):
if nominal:
return nominalval
else:
out = np.array([IndependentParameter(self.name + "_bin%d_nominal" % i, v, constant=True) for i, v in enumerate(nominalval)])
#out = np.array([IndependentParameter(self.name + "_bin%d_nominal" % i, v, constant=True) for i, v in enumerate(nominalval)])
out = nominalval
for param in self.parameters:
effect_up = self.getParamEffect(param, up=True)
if effect_up is None:
Expand Down Expand Up @@ -381,7 +382,10 @@ def renderRoofit(self, workspace):
# Normalization systematics can just go into combine datacards (although if we build PDF here, will need it)
if isinstance(effect_up, DependentParameter):
# this is a rateParam, we should add the IndependentParameter to the workspace
param.renderRoofit(workspace)
#param.renderRoofit(workspace)
dependents = effect_up.getDependents(deep=True)
for p in dependents:
p.renderRoofit(workspace)
continue
name = self.name + "_" + param.name + "Up"
shape = nominal * effect_up
Expand Down Expand Up @@ -418,7 +422,7 @@ def combineParamEffect(self, param):
# about here's where I start to feel painted into a corner
dep = self.getParamEffect(param, up=True)
channel, sample = self.name[: self.name.find("_")], self.name[self.name.find("_") + 1 :]
dependents = dep.getDependents()
dependents = dep.getDependents(deep=True)
formula = dep.formula(rendering=True).format(**{var.name: "@%d" % i for i, var in enumerate(dependents)})
return "{0} rateParam {1} {2} {3} {4}".format(
dep.name,
Expand Down Expand Up @@ -630,7 +634,18 @@ def combineParamEffect(self, param):


class TransferFactorSample(ParametericSample):
def __init__(self, name, sampletype, transferfactor, dependentsample, observable=None, min_val=None):
def __init__(self,
samplename,
sampletype,
transferfactor,
dependentsample,
nominal_values=None,
stat_unc=None,
observable=None,
min_val=None,
epsilon=1e-5,
effect_threshold=0.01,
channel_name=None):
"""
Create a sample that depends on another Sample by some transfer factor.
The transfor factor can be a constant, an array of parameters of same length
Expand All @@ -653,15 +668,32 @@ def __init__(self, name, sampletype, transferfactor, dependentsample, observable
params[idx] = p.max(min_val)
elif len(transferfactor.shape) <= 1:
observable = dependentsample.observable
params = transferfactor * dependentsample.getExpectation()
if stat_unc is not None:
name = samplename if channel_name is None else channel_name
MCStatTemplate = (np.ones_like(stat_unc), observable._binning, observable._name)
MCStat = TemplateSample(name+'_mcstat', Sample.BACKGROUND, MCStatTemplate)
for i in range(MCStat.observable.nbins):
effect_up = np.ones_like(MCStat._nominal)
effect_down = np.ones_like(MCStat._nominal)
if stat_unc[i] < effect_threshold:
continue
effect_up[i] = 1.0 + min(1.0, stat_unc[i])
effect_down[i] = max(epsilon, 1.0 - min(1.0, stat_unc[i]))
param = NuisanceParameter(name + '_mcstat_bin%i' % i, combinePrior='shape')
MCStat.setParamEffect(param, effect_up, effect_down)
params = transferfactor * MCStat.getExpectation() * dependentsample.getExpectation()
else:
params = transferfactor * dependentsample.getExpectation()
if min_val is not None:
for i, p in enumerate(params):
params[i] = p.max(min_val)
else:
raise ValueError("Transfer factor has invalid dimension")
super(TransferFactorSample, self).__init__(name, sampletype, observable, params)
super(TransferFactorSample, self).__init__(samplename, sampletype, observable, params)
self._transferfactor = transferfactor
self._dependentsample = dependentsample
self._stat_unc = stat_unc
self._nominal_values = nominal_values

@property
def transferfactor(self):
Expand All @@ -670,3 +702,11 @@ def transferfactor(self):
@property
def dependentsample(self):
return self._dependentsample

@property
def stat_unc(self):
return self._stat_unc

@property
def nominal_values(self):
return self._nominal_values