From 012e073cec0605d4e68d020bd310a211b3395a32 Mon Sep 17 00:00:00 2001 From: Matteo Cremonesi Date: Sun, 18 Feb 2024 12:24:56 -0500 Subject: [PATCH 1/4] Update sample.py --- rhalphalib/sample.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/rhalphalib/sample.py b/rhalphalib/sample.py index 4ba9b04..7620483 100644 --- a/rhalphalib/sample.py +++ b/rhalphalib/sample.py @@ -157,7 +157,7 @@ def setParamEffect(self, param, effect_up, effect_down=None, scale=None): """ if not isinstance(param, NuisanceParameter): if isinstance(param, IndependentParameter) and isinstance(effect_up, DependentParameter): - extras = effect_up.getDependents() - {param} + extras = effect_up.getDependents(deep=True) - {param} if not all(isinstance(p, IndependentParameter) for p in extras): raise ValueError("Normalization effects can only depend on one or more IndependentParameters") self._extra_dependencies.update(extras) @@ -322,7 +322,8 @@ def getExpectation(self, nominal=False): if nominal: return nominalval else: - out = np.array([IndependentParameter(self.name + "_bin%d_nominal" % i, v, constant=True) for i, v in enumerate(nominalval)]) + #out = np.array([IndependentParameter(self.name + "_bin%d_nominal" % i, v, constant=True) for i, v in enumerate(nominalval)]) + out = nominalval for param in self.parameters: effect_up = self.getParamEffect(param, up=True) if effect_up is None: @@ -381,7 +382,10 @@ def renderRoofit(self, workspace): # Normalization systematics can just go into combine datacards (although if we build PDF here, will need it) if isinstance(effect_up, DependentParameter): # this is a rateParam, we should add the IndependentParameter to the workspace - param.renderRoofit(workspace) + #param.renderRoofit(workspace) + dependents = effect_up.getDependents(deep=True) + for p in dependents: + p.renderRoofit(workspace) continue name = self.name + "_" + param.name + "Up" shape = nominal * effect_up @@ -418,7 +422,7 @@ def combineParamEffect(self, param): # about here's where I start to feel painted into a corner dep = self.getParamEffect(param, up=True) channel, sample = self.name[: self.name.find("_")], self.name[self.name.find("_") + 1 :] - dependents = dep.getDependents() + dependents = dep.getDependents(deep=True) formula = dep.formula(rendering=True).format(**{var.name: "@%d" % i for i, var in enumerate(dependents)}) return "{0} rateParam {1} {2} {3} {4}".format( dep.name, From 067cf18508a4f8c30598c2ae5d6dd4bc10f63335 Mon Sep 17 00:00:00 2001 From: Matteo Cremonesi Date: Sun, 18 Feb 2024 12:28:22 -0500 Subject: [PATCH 2/4] Update sample.py --- rhalphalib/sample.py | 43 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/rhalphalib/sample.py b/rhalphalib/sample.py index 7620483..ee60fe3 100644 --- a/rhalphalib/sample.py +++ b/rhalphalib/sample.py @@ -634,7 +634,18 @@ def combineParamEffect(self, param): class TransferFactorSample(ParametericSample): - def __init__(self, name, sampletype, transferfactor, dependentsample, observable=None, min_val=None): + def __init__(self, + samplename, + sampletype, + transferfactor, + dependentsample, + nominal_values=None, + stat_unc=None, + observable=None, + min_val=None, + epsilon=1e-5, + effect_threshold=0.01, + channel_name=None): """ Create a sample that depends on another Sample by some transfer factor. The transfor factor can be a constant, an array of parameters of same length @@ -657,15 +668,33 @@ def __init__(self, name, sampletype, transferfactor, dependentsample, observable params[idx] = p.max(min_val) elif len(transferfactor.shape) <= 1: observable = dependentsample.observable - params = transferfactor * dependentsample.getExpectation() + if stat_unc is not None: + name = samplename if channel_name is None else channel_name + MCStatTemplate = (np.ones_like(stat_unc), observable._binning, observable._name) + MCStat = TemplateSample(name+'_mcstat', Sample.BACKGROUND, MCStatTemplate) + for i in range(MCStat.observable.nbins): + effect_up = np.ones_like(MCStat._nominal) + effect_down = np.ones_like(MCStat._nominal) + if stat_unc[i] < effect_threshold: + continue + effect_up[i] = 1.0 + min(1.0, stat_unc[i]) + effect_down[i] = max(epsilon, 1.0 - min(1.0, stat_unc[i])) + print("TF",samplename, i, stat_unc[i], name + '_mcstat_bin%i' % i) + param = NuisanceParameter(name + '_mcstat_bin%i' % i, combinePrior='shape') + MCStat.setParamEffect(param, effect_up, effect_down) + params = transferfactor * MCStat.getExpectation() * dependentsample.getExpectation() + else: + params = transferfactor * dependentsample.getExpectation() if min_val is not None: for i, p in enumerate(params): params[i] = p.max(min_val) else: raise ValueError("Transfer factor has invalid dimension") - super(TransferFactorSample, self).__init__(name, sampletype, observable, params) + super(TransferFactorSample, self).__init__(samplename, sampletype, observable, params) self._transferfactor = transferfactor self._dependentsample = dependentsample + self._stat_unc = stat_unc + self._nominal_values = nominal_values @property def transferfactor(self): @@ -674,3 +703,11 @@ def transferfactor(self): @property def dependentsample(self): return self._dependentsample + + @property + def stat_unc(self): + return self._stat_unc + + @property + def nominal_values(self): + return self._nominal_values From 2c92cb4ed2680b4949bed109c9cf26dd1eb41e9e Mon Sep 17 00:00:00 2001 From: Matteo Cremonesi Date: Sun, 18 Feb 2024 12:32:43 -0500 Subject: [PATCH 3/4] Update model.py --- rhalphalib/model.py | 89 ++++++++++++++++++++++++++++----------------- 1 file changed, 55 insertions(+), 34 deletions(-) diff --git a/rhalphalib/model.py b/rhalphalib/model.py index 583ab61..368f9c5 100644 --- a/rhalphalib/model.py +++ b/rhalphalib/model.py @@ -353,7 +353,7 @@ def renderCard(self, outputFilename, workspaceName): if effect != "-": fout.write(effect + "\n") - def autoMCStats(self, epsilon=0, threshold=0, include_signal=0, channel_name=None): + def autoMCStats(channel, epsilon=1e-5, effect_threshold=0.01, threshold=0, include_signal=0, channel_name=None): """ Barlow-Beeston-lite method i.e. single stats parameter for all processes per bin. Same general algorithm as described in @@ -361,52 +361,73 @@ def autoMCStats(self, epsilon=0, threshold=0, include_signal=0, channel_name=Non but *without the analytic minimisation*. `include_signal` only refers to whether signal stats are included in the *decision* to use bb-lite or not. """ - if not len(self._samples): - raise RuntimeError("Channel %r has no samples for which to run autoMCStats" % (self)) - - name = self._name if channel_name is None else channel_name - - first_sample = self._samples[list(self._samples.keys())[0]] - - for i in range(first_sample.observable.nbins): - ntot_bb, etot2_bb = 0, 0 # for the decision to use bblite or not - ntot, etot2 = 0, 0 # for the bblite uncertainty - - # check if neff = ntot^2 / etot2 > threshold - for sample in self._samples.values(): - ntot += sample._nominal[i] - etot2 += sample._sumw2[i] - + if not len(channel._samples): + raise RuntimeError("Channel %r has no samples for which to run autoMCStats" % (channel)) + + name = channel._name if channel_name is None else channel_name + + first_sample = channel._samples[list(channel._samples.keys())[0]] + + ntot_bb, etot2_bb = np.zeros_like(first_sample._nominal), np.zeros_like(first_sample._sumw2) + ntot, etot2 = np.zeros_like(first_sample._nominal), np.zeros_like(first_sample._sumw2) + + for sample in channel._samples.values(): + if isinstance(sample, TransferFactorSample): + ntot += sample._nominal_values + etot2 += (sample._stat_unc*sample._nominal_values)**2 + elif isinstance(sample, TemplateSample): + ntot += sample._nominal + etot2 += sample._sumw2 if not include_signal and sample._sampletype == Sample.SIGNAL: continue - - ntot_bb += sample._nominal[i] - etot2_bb += sample._sumw2[i] - - if etot2 <= 0.0: + ntot_bb += sample._nominal + etot2_bb += sample._sumw2 + else: continue - elif etot2_bb <= 0: + + for sample in channel._samples.values(): + if not isinstance(sample, TransferFactorSample): + continue + channel._samples[sample.name] = TransferFactorSample(sample.name, + Sample.BACKGROUND, + sample.transferfactor, + sample.dependentsample, + nominal_values=sample.nominal_values, + stat_unc=np.sqrt(etot2)/ntot, + channel_name=name) + + for i in range(first_sample.observable.nbins): + if etot2[i] <= 0.0: + continue + elif etot2_bb[i] <= 0: # this means there is signal but no background, so create stats unc. for signal only - for sample in self._samples.values(): + for sample in channel._samples.values(): if sample._sampletype == Sample.SIGNAL: sample_name = None if channel_name is None else channel_name + "_" + sample._name[sample._name.find("_") + 1 :] sample.autoMCStats(epsilon=epsilon, sample_name=sample_name, bini=i) - + continue - - neff_bb = ntot_bb**2 / etot2_bb + + neff_bb = ntot_bb[i]**2 / etot2_bb[i] if neff_bb <= threshold: - for sample in self._samples.values(): + for sample in channel._samples.values(): sample_name = None if channel_name is None else channel_name + "_" + sample._name[sample._name.find("_") + 1 :] sample.autoMCStats(epsilon=epsilon, sample_name=sample_name, bini=i) else: effect_up = np.ones_like(first_sample._nominal) effect_down = np.ones_like(first_sample._nominal) - - effect_up[i] = (ntot + np.sqrt(etot2)) / ntot - effect_down[i] = max((ntot - np.sqrt(etot2)) / ntot, epsilon) - + + effect = np.sqrt(etot2[i])/ntot[i] + if effect < effect_threshold: + continue + effect_up[i] = 1.0 + min(1.0, effect) + effect_down[i] = max(epsilon, 1.0 - min(1.0, effect)) + param = NuisanceParameter(name + "_mcstat_bin%i" % i, combinePrior="shape") - - for sample in self._samples.values(): + + for sample in channel._samples.values(): + if not isinstance(sample, TemplateSample): + continue + if sample._nominal[i] <= 1e-5: + continue sample.setParamEffect(param, effect_up, effect_down) From 9852e22389c2578e4f180f213a76be3e48d4b461 Mon Sep 17 00:00:00 2001 From: Matteo Cremonesi Date: Sun, 18 Feb 2024 12:33:02 -0500 Subject: [PATCH 4/4] Update sample.py --- rhalphalib/sample.py | 1 - 1 file changed, 1 deletion(-) diff --git a/rhalphalib/sample.py b/rhalphalib/sample.py index ee60fe3..e123b3e 100644 --- a/rhalphalib/sample.py +++ b/rhalphalib/sample.py @@ -679,7 +679,6 @@ def __init__(self, continue effect_up[i] = 1.0 + min(1.0, stat_unc[i]) effect_down[i] = max(epsilon, 1.0 - min(1.0, stat_unc[i])) - print("TF",samplename, i, stat_unc[i], name + '_mcstat_bin%i' % i) param = NuisanceParameter(name + '_mcstat_bin%i' % i, combinePrior='shape') MCStat.setParamEffect(param, effect_up, effect_down) params = transferfactor * MCStat.getExpectation() * dependentsample.getExpectation()