diff --git a/README.md b/README.md index 32d527d..e963402 100644 --- a/README.md +++ b/README.md @@ -84,10 +84,10 @@ This is often the case in the standard profile likelihood unfolding. By default, systematic variations are asymmetric. However, defining only symmetric variations can be beneficial as a fully symmetric tensor has reduced memory consumption, simplifications in the likelihood function in the fit, and is usually numerically more stable. Different symmetrization options are supported: - * "average": TBD - * "conservative": TBD - * "linear": TBD - * "quadratic": TBD + * "average": In each bin average the up and down variation and mirror. This is the recommended option for vairations that are expected to be symmetric. + * "conservative": In each bin use the larger variation between up and down and mirror. This option is less recommended but can be used as cross check against "average". + * "linear": The up and down variations are decomposed into the components for the average and difference with a scale factor for the difference shape of 1, corresponding to linear interpolation. This option is recommended for variations that are expected to be asymmetric. + * "quadratic": This option is recommended for variations that are expected to be asymmetric with a scale factor for the difference shape of $\sqrt{3}$, corresponding to quadratic interpolation and more conservative than "linear". If a systematic variation is added by providing a single histogram, the variation is mirrored. ### Masked channels @@ -120,7 +120,13 @@ rabbit_fit test_tensor.hdf5 -o results/fitresult.hdf5 -t 0 --doImpacts --globalI ``` ### Bin-by-bin statistical uncertainties -Bin-by-bin statistical uncertainties on the templates are added by default and can be disabled at runtime using the `--noBinByBinStat` option. The Barlow-Beeston lite method is used to add implicit nuisance parameters for each template bin. By default this is implemented using a gamma distribution for the probability density, but Gaussian uncertainties can also be used with `--binByBinStatType normal`. +Bin-by-bin statistical uncertainties on the templates are added by default and can be disabled at runtime using the `--noBinByBinStat` option. +The Barlow-Beeston method is used to add implicit nuisance parameters for each template bin. +By default, the lite variant is used where one parameter is introduced per template bin, for the sum of all processes. +The Barlow-Beeston-full method can be used by specifying `--binByBinStatMode full` which introduces implicit nuisance parameters for each process and each template bin. +By default these nuisance parameters are multiplied to the expected events and follow a gamma distribution for the probability density. +Gaussian uncertainties can also be used with `--binByBinStatType normal-additive` for an additive scaling or `--binByBinStatType normal-multiplicative` for a multiplicative scaling. +In the case of `--binByBinStatMode full` and `--binByBinStatType gamma` no analytic solution is available and in each bin a 1D minimization is performed using Newton's method which can significantly increase the time required in the fit. ### Mappings Perform mappings on the parameters and observables (the histogram bins in the (masked) channels). diff --git a/rabbit/fitter.py b/rabbit/fitter.py index 538eb12..4e14673 100644 --- a/rabbit/fitter.py +++ b/rabbit/fitter.py @@ -81,15 +81,6 @@ def __init__( else: self.binByBinStatType = options.binByBinStatType - if ( - self.binByBinStat - and self.binByBinStatMode == "full" - and not self.binByBinStatType.startswith("normal") - ): - raise Exception( - 'bin-by-bin stat only for option "--binByBinStatMode full" with "--binByBinStatType normal"' - ) - if ( options.covarianceFit and self.binByBinStat @@ -246,9 +237,14 @@ def __init__( self.sumw = self.indata.sumw if self.binByBinStatType in ["gamma", "normal-multiplicative"]: - self.kstat = self.sumw**2 / self.varbeta - self.betamask = (self.varbeta == 0.0) | (self.kstat == 0.0) - self.kstat = tf.where(self.betamask, 1.0, self.kstat) + self.betamask = (self.varbeta == 0.0) | (self.sumw == 0.0) + self.kstat = tf.where(self.betamask, 1.0, self.sumw**2 / self.varbeta) + + if self.binByBinStatType == "gamma" and self.binByBinStatMode == "full": + self.nbeta = tf.Variable( + tf.ones_like(self.nobs), trainable=True, name="nbeta" + ) + elif self.binByBinStatType == "normal-additive": # precompute decomposition of composite matrix to speed up # calculation of profiled beta values @@ -1427,13 +1423,74 @@ def _compute_yields_with_beta(self, profile=True, compute_norm=False, full=True) if self.chisqFit: if self.binByBinStatType == "gamma": kstat = self.kstat[: self.indata.nbins] + betamask = self.betamask[: self.indata.nbins] - abeta = nexp_profile**2 - bbeta = kstat * self.varnobs - nexp_profile * self.nobs - cbeta = -kstat * self.varnobs * beta0 - beta = solve_quad_eq(abeta, bbeta, cbeta) + if self.binByBinStatMode == "lite": + abeta = nexp_profile**2 + bbeta = kstat * self.varnobs - nexp_profile * self.nobs + cbeta = -kstat * self.varnobs * beta0 + beta = solve_quad_eq(abeta, bbeta, cbeta) + elif self.binByBinStatMode == "full": + norm_profile = norm[: self.indata.nbins] + + # solving nbeta numerically using newtons method (does not work with forward differentiation i.e. use --globalImpacts with --globalImpactsDisableJVP) + def fnll_nbeta(x): + beta = ( + kstat + * beta0 + / ( + kstat + + ((x - self.nobs) / self.varnobs)[..., None] + * norm_profile + ) + ) + beta = tf.where(betamask, beta0, beta) + new_nexp = tf.reduce_sum(beta * norm_profile, axis=-1) + ln = 0.5 * (new_nexp - self.nobs) ** 2 / self.varnobs + lbeta = tf.reduce_sum( + kstat * (beta - beta0) + - kstat + * beta0 + * (tf.math.log(beta) - tf.math.log(beta0)), + axis=-1, + ) + return ln + lbeta + + def body(i, edm): + with tf.GradientTape() as t2: + with tf.GradientTape() as t1: + nll = fnll_nbeta(nexp_profile * self.nbeta) + grad = t1.gradient(nll, self.nbeta) + hess = t2.gradient(grad, self.nbeta) + + eps = 1e-8 + safe_hess = tf.where(hess > 0, hess, tf.ones_like(hess)) + step = grad / (safe_hess + eps) + + self.nbeta.assign_sub(step) + + return i + 1, tf.reduce_max(0.5 * grad * step) + + def cond(i, edm): + return tf.logical_and(i < 50, edm > 1e-10) + + i0 = tf.constant(0) + edm0 = tf.constant(tf.float64.max) + tf.while_loop(cond, body, loop_vars=(i0, edm0)) + + beta = ( + kstat + * beta0 + / ( + kstat + + ( + (nexp_profile * self.nbeta - self.nobs) + / self.varnobs + )[..., None] + * norm_profile + ) + ) - betamask = self.betamask[: self.indata.nbins] beta = tf.where(betamask, beta0, beta) elif self.binByBinStatType == "normal-multiplicative": kstat = self.kstat[: self.indata.nbins] @@ -1582,7 +1639,72 @@ def _compute_yields_with_beta(self, profile=True, compute_norm=False, full=True) kstat = self.kstat[: self.indata.nbins] betamask = self.betamask[: self.indata.nbins] - beta = (self.nobs + kstat * beta0) / (nexp_profile + kstat) + if self.binByBinStatMode == "lite": + beta = (self.nobs + kstat * beta0) / (nexp_profile + kstat) + elif self.binByBinStatMode == "full": + norm_profile = norm[: self.indata.nbins] + + # solving nbeta numerically using newtons method (does not work with forward differentiation i.e. use --globalImpacts with --globalImpactsDisableJVP) + def fnll_nbeta(x): + beta = ( + kstat + * beta0 + / ( + norm_profile + + kstat + - (self.nobs / x)[..., None] * norm_profile + ) + ) + beta = tf.where(betamask, beta0, beta) + new_nexp = tf.reduce_sum(beta * norm_profile, axis=-1) + ln = ( + new_nexp + - self.nobs + - self.nobs + * (tf.math.log(new_nexp) - tf.math.log(self.nobs)) + ) + lbeta = tf.reduce_sum( + kstat * (beta - beta0) + - kstat + * beta0 + * (tf.math.log(beta) - tf.math.log(beta0)), + axis=-1, + ) + return ln + lbeta + + def body(i, edm): + with tf.GradientTape() as t2: + with tf.GradientTape() as t1: + nll = fnll_nbeta(nexp_profile * self.nbeta) + grad = t1.gradient(nll, self.nbeta) + hess = t2.gradient(grad, self.nbeta) + + eps = 1e-8 + safe_hess = tf.where(hess > 0, hess, tf.ones_like(hess)) + step = grad / (safe_hess + eps) + self.nbeta.assign_sub(step) + return i + 1, tf.reduce_max(0.5 * grad * step) + + def cond(i, edm): + return tf.logical_and(i < 50, edm > 1e-10) + + i0 = tf.constant(0) + edm0 = tf.constant(tf.float64.max) + tf.while_loop(cond, body, loop_vars=(i0, edm0)) + + beta = ( + kstat + * beta0 + / ( + norm_profile + - (self.nobs / (nexp_profile * self.nbeta))[ + ..., None + ] + * norm_profile + + kstat + ) + ) + beta = tf.where(betamask, beta0, beta) elif self.binByBinStatType == "normal-multiplicative": kstat = self.kstat[: self.indata.nbins]