nsmith- · mcremone · Feb 18, 2024 · Feb 18, 2024 · Feb 18, 2024 · Feb 18, 2024
diff --git a/rhalphalib/model.py b/rhalphalib/model.py
@@ -353,60 +353,81 @@ def renderCard(self, outputFilename, workspaceName):
                     if effect != "-":
                         fout.write(effect + "\n")
 
-    def autoMCStats(self, epsilon=0, threshold=0, include_signal=0, channel_name=None):
+    def autoMCStats(channel, epsilon=1e-5, effect_threshold=0.01, threshold=0, include_signal=0, channel_name=None):
         """
         Barlow-Beeston-lite method i.e. single stats parameter for all processes per bin.
         Same general algorithm as described in
         https://cms-analysis.github.io/HiggsAnalysis-CombinedLimit/part2/bin-wise-stats/
         but *without the analytic minimisation*.
         `include_signal` only refers to whether signal stats are included in the *decision* to use bb-lite or not.
         """
-        if not len(self._samples):
-            raise RuntimeError("Channel %r has no samples for which to run autoMCStats" % (self))
-
-        name = self._name if channel_name is None else channel_name
-
-        first_sample = self._samples[list(self._samples.keys())[0]]
-
-        for i in range(first_sample.observable.nbins):
-            ntot_bb, etot2_bb = 0, 0  # for the decision to use bblite or not
-            ntot, etot2 = 0, 0  # for the bblite uncertainty
-
-            # check if neff = ntot^2 / etot2 > threshold
-            for sample in self._samples.values():
-                ntot += sample._nominal[i]
-                etot2 += sample._sumw2[i]
-
+        if not len(channel._samples):
+            raise RuntimeError("Channel %r has no samples for which to run autoMCStats" % (channel))
+
+        name = channel._name if channel_name is None else channel_name
+
+        first_sample = channel._samples[list(channel._samples.keys())[0]]
+
+        ntot_bb, etot2_bb = np.zeros_like(first_sample._nominal), np.zeros_like(first_sample._sumw2)
+        ntot, etot2 = np.zeros_like(first_sample._nominal), np.zeros_like(first_sample._sumw2)
+
+        for sample in channel._samples.values():
+            if isinstance(sample, TransferFactorSample):
+                ntot += sample._nominal_values
+                etot2 += (sample._stat_unc*sample._nominal_values)**2
+            elif isinstance(sample, TemplateSample):
+                ntot += sample._nominal
+                etot2 += sample._sumw2
                 if not include_signal and sample._sampletype == Sample.SIGNAL:
                     continue
-
-                ntot_bb += sample._nominal[i]
-                etot2_bb += sample._sumw2[i]
-
-            if etot2 <= 0.0:
+                ntot_bb += sample._nominal
+                etot2_bb += sample._sumw2
+            else:
                 continue
-            elif etot2_bb <= 0:
+
+        for sample in channel._samples.values():
+            if not isinstance(sample, TransferFactorSample):
+                continue
+            channel._samples[sample.name] = TransferFactorSample(sample.name, 
+                                                                 Sample.BACKGROUND, 
+                                                                 sample.transferfactor, 
+                                                                 sample.dependentsample, 
+                                                                 nominal_values=sample.nominal_values, 
+                                                                 stat_unc=np.sqrt(etot2)/ntot, 
+                                                                 channel_name=name)
+
+        for i in range(first_sample.observable.nbins):
+            if etot2[i] <= 0.0:
+                continue
+            elif etot2_bb[i] <= 0:
                 # this means there is signal but no background, so create stats unc. for signal only
-                for sample in self._samples.values():
+                for sample in channel._samples.values():
                     if sample._sampletype == Sample.SIGNAL:
                         sample_name = None if channel_name is None else channel_name + "_" + sample._name[sample._name.find("_") + 1 :]
                         sample.autoMCStats(epsilon=epsilon, sample_name=sample_name, bini=i)
-
+        
                 continue
-
-            neff_bb = ntot_bb**2 / etot2_bb
+    
+            neff_bb = ntot_bb[i]**2 / etot2_bb[i]
             if neff_bb <= threshold:
-                for sample in self._samples.values():
+                for sample in channel._samples.values():
                     sample_name = None if channel_name is None else channel_name + "_" + sample._name[sample._name.find("_") + 1 :]
                     sample.autoMCStats(epsilon=epsilon, sample_name=sample_name, bini=i)
             else:
                 effect_up = np.ones_like(first_sample._nominal)
                 effect_down = np.ones_like(first_sample._nominal)
-
-                effect_up[i] = (ntot + np.sqrt(etot2)) / ntot
-                effect_down[i] = max((ntot - np.sqrt(etot2)) / ntot, epsilon)
-
+
+                effect = np.sqrt(etot2[i])/ntot[i]
+                if effect < effect_threshold:
+                    continue
+                effect_up[i] = 1.0 + min(1.0, effect)
+                effect_down[i] = max(epsilon, 1.0 - min(1.0, effect))
+
                 param = NuisanceParameter(name + "_mcstat_bin%i" % i, combinePrior="shape")
-
-                for sample in self._samples.values():
+
+                for sample in channel._samples.values():
+                    if not isinstance(sample, TemplateSample):
+                        continue
+                    if sample._nominal[i] <= 1e-5:
+                        continue
                     sample.setParamEffect(param, effect_up, effect_down)
diff --git a/rhalphalib/sample.py b/rhalphalib/sample.py
@@ -157,7 +157,7 @@ def setParamEffect(self, param, effect_up, effect_down=None, scale=None):
         """
         if not isinstance(param, NuisanceParameter):
             if isinstance(param, IndependentParameter) and isinstance(effect_up, DependentParameter):
-                extras = effect_up.getDependents() - {param}
+                extras = effect_up.getDependents(deep=True) - {param}
                 if not all(isinstance(p, IndependentParameter) for p in extras):
                     raise ValueError("Normalization effects can only depend on one or more IndependentParameters")
                 self._extra_dependencies.update(extras)
@@ -322,7 +322,8 @@ def getExpectation(self, nominal=False):
         if nominal:
             return nominalval
         else:
-            out = np.array([IndependentParameter(self.name + "_bin%d_nominal" % i, v, constant=True) for i, v in enumerate(nominalval)])
+            #out = np.array([IndependentParameter(self.name + "_bin%d_nominal" % i, v, constant=True) for i, v in enumerate(nominalval)])
+            out = nominalval
             for param in self.parameters:
                 effect_up = self.getParamEffect(param, up=True)
                 if effect_up is None:
@@ -381,7 +382,10 @@ def renderRoofit(self, workspace):
                     # Normalization systematics can just go into combine datacards (although if we build PDF here, will need it)
                     if isinstance(effect_up, DependentParameter):
                         # this is a rateParam, we should add the IndependentParameter to the workspace
-                        param.renderRoofit(workspace)
+                        #param.renderRoofit(workspace)
+                        dependents = effect_up.getDependents(deep=True)
+                        for p in dependents:
+                            p.renderRoofit(workspace)
                     continue
                 name = self.name + "_" + param.name + "Up"
                 shape = nominal * effect_up
@@ -418,7 +422,7 @@ def combineParamEffect(self, param):
             # about here's where I start to feel painted into a corner
             dep = self.getParamEffect(param, up=True)
             channel, sample = self.name[: self.name.find("_")], self.name[self.name.find("_") + 1 :]
-            dependents = dep.getDependents()
+            dependents = dep.getDependents(deep=True)
             formula = dep.formula(rendering=True).format(**{var.name: "@%d" % i for i, var in enumerate(dependents)})
             return "{0} rateParam {1} {2} {3} {4}".format(
                 dep.name,
@@ -630,7 +634,18 @@ def combineParamEffect(self, param):
 
 
 class TransferFactorSample(ParametericSample):
-    def __init__(self, name, sampletype, transferfactor, dependentsample, observable=None, min_val=None):
+    def __init__(self, 
+                 samplename, 
+                 sampletype, 
+                 transferfactor, 
+                 dependentsample, 
+                 nominal_values=None, 
+                 stat_unc=None, 
+                 observable=None, 
+                 min_val=None, 
+                 epsilon=1e-5, 
+                 effect_threshold=0.01, 
+                 channel_name=None):
         """
         Create a sample that depends on another Sample by some transfer factor.
         The transfor factor can be a constant, an array of parameters of same length
@@ -653,15 +668,32 @@ def __init__(self, name, sampletype, transferfactor, dependentsample, observable
                     params[idx] = p.max(min_val)
         elif len(transferfactor.shape) <= 1:
             observable = dependentsample.observable
-            params = transferfactor * dependentsample.getExpectation()
+            if stat_unc is not None:
+                name = samplename if channel_name is None else channel_name
+                MCStatTemplate = (np.ones_like(stat_unc), observable._binning, observable._name)
+                MCStat = TemplateSample(name+'_mcstat', Sample.BACKGROUND, MCStatTemplate)
+                for i in range(MCStat.observable.nbins):
+                    effect_up = np.ones_like(MCStat._nominal)
+                    effect_down = np.ones_like(MCStat._nominal)
+                    if stat_unc[i] < effect_threshold:
+                        continue
+                    effect_up[i] = 1.0 + min(1.0, stat_unc[i])
+                    effect_down[i] = max(epsilon, 1.0 - min(1.0, stat_unc[i]))
+                    param = NuisanceParameter(name + '_mcstat_bin%i' % i, combinePrior='shape')
+                    MCStat.setParamEffect(param, effect_up, effect_down)
+                params = transferfactor * MCStat.getExpectation() * dependentsample.getExpectation()
+            else:
+                params = transferfactor * dependentsample.getExpectation()
             if min_val is not None:
                 for i, p in enumerate(params):
                     params[i] = p.max(min_val)
         else:
             raise ValueError("Transfer factor has invalid dimension")
-        super(TransferFactorSample, self).__init__(name, sampletype, observable, params)
+        super(TransferFactorSample, self).__init__(samplename, sampletype, observable, params)
         self._transferfactor = transferfactor
         self._dependentsample = dependentsample
+        self._stat_unc = stat_unc
+        self._nominal_values = nominal_values
 
     @property
     def transferfactor(self):
@@ -670,3 +702,11 @@ def transferfactor(self):
     @property
     def dependentsample(self):
         return self._dependentsample
+
+    @property
+    def stat_unc(self):
+        return self._stat_unc
+
+    @property
+    def nominal_values(self):
+        return self._nominal_values