Skip to content
60 changes: 53 additions & 7 deletions Corrections.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import re
import itertools

from .CorrectionsCore import *
Expand Down Expand Up @@ -64,7 +65,7 @@ def getGlobal():
def __init__(
self,
*,
global_params,
setup,
stage,
dataset_name,
dataset_cfg,
Expand All @@ -74,7 +75,7 @@ def __init__(
isData,
trigger_class,
):
self.global_params = global_params
self.global_params = setup.global_params
self.dataset_name = dataset_name
self.dataset_cfg = dataset_cfg
self.process_name = process_name
Expand All @@ -83,15 +84,16 @@ def __init__(
self.isData = isData
self.trigger_dict = trigger_class.trigger_dict if trigger_class else {}

self.period = global_params["era"]
self.period = self.global_params["era"]
self.stage = stage
self.law_run_version = setup.law_run_version

self.to_apply = {}
correction_origins = {}
for cfg_name, cfg in [
("dataset", dataset_cfg),
("process", process_cfg),
("global", global_params),
("global", self.global_params),
]:
if not cfg:
continue
Expand All @@ -113,6 +115,7 @@ def __init__(
f"Warning: correction {corr_name} is already defined in {correction_origins[corr_name]}. Skipping definition from {cfg_name}",
file=sys.stderr,
)

if len(self.to_apply) > 0:
print(
f'Corrections to apply: {", ".join(self.to_apply.keys())}',
Expand All @@ -133,6 +136,7 @@ def __init__(
self.fatjet_ = None
self.Vpt_ = None
self.JetVetoMap_ = None
self.btag_shape_norm_ = None

@property
def xs_db(self):
Expand Down Expand Up @@ -283,6 +287,33 @@ def trg(self):
)
return self.trg_

@property
def btag_norm(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def btag_norm(self):
def btag_shapeNorm(self):

to avoid possible confusions with WP-based btag SFs that directly correct "normalization" instead of the btag discriminator shape.

if self.btag_shape_norm_ is None:
if not self.isData:
from .btag import btagShapeWeightCorrector

params = self.to_apply["btag"]
pattern = params["normFilePattern"]
formatted_pattern = pattern.format(
dataset_name=self.dataset_name,
period=self.period,
version=self.law_run_version,
)
producers = self.global_params["payload_producers"]
btag_shape_producer_cfg = producers["BtagShape"]
bins = btag_shape_producer_cfg["bins"]
norm_file_path = os.path.join(
os.environ["ANALYSIS_PATH"], formatted_pattern
)
print(f"Applying shape weight normalization from {norm_file_path}")
self.btag_shape_norm_ = btagShapeWeightCorrector(
norm_file_path=norm_file_path, bins=bins
)
else:
raise RuntimeError("btag_shape_norm not applicable to data.")
return self.btag_shape_norm_

def applyScaleUncertainties(self, df, ana_reco_objects):
source_dict = {central: []}
if "tauES" in self.to_apply and not self.isData:
Expand Down Expand Up @@ -486,20 +517,35 @@ def getNormalisationCorrections(
)
all_weights.extend(tau_SF_branches)
if "btag" in self.to_apply:
btag_sf_mode = self.to_apply["btag"]["modes"][self.stage]
if btag_sf_mode in ["shape", "wp"]:
btag_sf_mode = self.to_apply["btag"]["modes"].get(self.stage, "none")
if btag_sf_mode in ["shape", "shape_and_norm", "wp"]:
if btag_sf_mode == "shape":
df, bTagSF_branches = self.btag.getBTagShapeSF(
df, unc_source, unc_scale, isCentral, return_variations
)
elif btag_sf_mode == "shape_and_norm":
assert (
self.btag_norm is not None
), "btagShapeWeightCorrector must be initialzied at in `shape_and_norm` mode."

df, bTagSF_branches = self.btag.getBTagShapeSF(
df, unc_source, unc_scale, isCentral, return_variations
)

df = self.btag_norm.UpdateBtagWeight(
df=df,
unc_src=unc_source,
unc_scale=unc_scale,
sf_branches=bTagSF_branches,
)
else:
df, bTagSF_branches = self.btag.getBTagWPSF(
df, isCentral and return_variations, isCentral
)
all_weights.extend(bTagSF_branches)
elif btag_sf_mode != "none":
raise RuntimeError(
f"btag mode {btag_sf_mode} not recognized. Supported modes are 'shape', 'wp' and 'none'."
f"btag mode {btag_sf_mode} not recognized. Supported modes are 'shape', 'shape_and_norm', 'wp' and 'none'."
)
if "mu" in self.to_apply:
if self.mu.low_available:
Expand Down
93 changes: 93 additions & 0 deletions btag.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from .CorrectionsCore import *
from FLAF.Common.Utilities import WorkingPointsbTag
import yaml
import json

# https://twiki.cern.ch/twiki/bin/viewauth/CMS/BTagShapeCalibration
# https://twiki.cern.ch/twiki/bin/view/CMS/BTagCalibration
Expand Down Expand Up @@ -254,3 +255,95 @@ def getBTagShapeSF(self, df, src_name, scale_name, isCentral, return_variations)
)
SF_branches.append(branch_name_final)
return df, SF_branches


ROOT.gInterpreter.Declare(r"""
#include <map>
#include <string>

struct BTagMapApplier {
std::map<std::string,float> corr;

float operator()(float w, const std::string &key) const {
auto it = corr.find(key);
const float r = (it != corr.end()) ? it->second : 1.0;
return w * r;
}
};
""")


class btagShapeWeightCorrector:
def __init__(self, *, norm_file_path, bins):
self.norm_file_path = norm_file_path
with open(norm_file_path, "r") as norm_file:
self.shape_weight_corr_dict = json.load(norm_file)
self.bins = bins
self._appliers = []

def _define_key_column(self, df, keycol, syst):
# key = norm_<syst>_<bin_name>
pieces = []
for bin_name, cut in self.bins.items():
pieces.append(f'({cut}) ? std::string("norm_{syst}_{bin_name}") : ')
key_expr = "".join(pieces) + 'std::string("__default__")'

cols = set(df.GetColumnNames())
return (
df.Redefine(keycol, key_expr)
if keycol in cols
else df.Define(keycol, key_expr)
)

def UpdateBtagWeight(self, *, df, unc_src, unc_scale, sf_branches):
unc_src_scale = f"{unc_src}_{unc_scale}" if unc_src != unc_scale else unc_src
if unc_src_scale not in self.shape_weight_corr_dict:
raise KeyError(
f"Key `{unc_src_scale}` not found in `{self.norm_file_path}`."
)

# btag branches have format weight_bTagShape_{syst}_rel
# need to extract syst => need token #2
systs = [b.split("_")[2] for b in sf_branches]

applier = ROOT.BTagMapApplier()
applier.corr["__default__"] = 1.0
for k, v in self.shape_weight_corr_dict[unc_src_scale].items():
applier.corr[k] = float(v)
self._appliers.append(applier)

# only correct weights for uncertainty variations
# branches are defined as relative, i.e. branch/central
# to correct, need to first multiply by central
# after this loop _rel branches will have the full value
# => will have to divide them by central after it is corrected
for syst in systs:
if syst == "Central":
continue
keycol = f"btag_shape_norm_key_{syst}"
df = self._define_key_column(df, keycol, syst)

branch_name = f"weight_bTagShape_{syst}_rel"
# rel := rel * central * corr(norm_<syst>_<bin>)
df = df.Redefine(
branch_name, f"(float){branch_name} * weight_bTagShape_Central"
).Redefine(branch_name, applier, [branch_name, keycol])

# correct central separately after everything else was corrected and central is not needed
df = self._define_key_column(df, "btag_shape_norm_key_Central", "Central")
df = df.Redefine(
"weight_bTagShape_Central",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You should correct all btag-related up/down uncertainties, not only the central one

applier,
["weight_bTagShape_Central", "btag_shape_norm_key_Central"],
)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We expect _rel branches to be relative, you need to divide them by central once central is redefined

# redefine all _rel branches by dividing them by updated Central to make them contain relative value
for syst in systs:
if syst == "Central":
continue
branch_name = f"weight_bTagShape_{syst}_rel"
df = df.Redefine(
branch_name, f"(float){branch_name} / weight_bTagShape_Central"
)

return df
27 changes: 18 additions & 9 deletions btagShape.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,12 +136,14 @@ namespace correction {
return false;
}

bTagShapeCorrProvider(const std::string& fileName, const std::string& year, std::string const& tagger_name, const bool wantShape=true)
: corrections_(CorrectionSet::from_file(fileName)),
_year(year) {
if (wantShape){
shape_corr_ = corrections_->at(tagger_name + "_shape");
}
bTagShapeCorrProvider(const std::string& fileName,
const std::string& year,
std::string const& tagger_name,
const bool wantShape = true)
: corrections_(CorrectionSet::from_file(fileName)), _year(year) {
if (wantShape) {
shape_corr_ = corrections_->at(tagger_name + "_shape");
}
std::cerr << "Initialized bTagShapeCorrProvider::bTagShapeCorrProvider()" << std::endl;
}

Expand All @@ -152,8 +154,10 @@ namespace correction {
UncScale scale) const {
double sf_product = 1.;
std::string source_str = getUncName().at(source);
for (size_t jet_idx = 0; jet_idx < Jet_p4.size(); jet_idx++) {
if (Jet_bTag_score[jet_idx] < 0.0)
for (size_t jet_idx = 0; jet_idx < Jet_p4.size(); ++jet_idx) {
if ((Jet_bTag_score[jet_idx] > 1.0 || Jet_bTag_score[jet_idx] < 0.0) ||
std::abs(Jet_p4[jet_idx].eta()) >= 2.5 || Jet_p4[jet_idx].pt() < 20.0 ||
(Jet_Flavour[jet_idx] != 0 || Jet_Flavour[jet_idx] != 4 || Jet_Flavour[jet_idx] != 5))
continue;
const UncScale jet_tag_scale = sourceApplies(source, Jet_Flavour[jet_idx]) ? scale : UncScale::Central;
const std::string& scale_str = getScaleStr(jet_tag_scale);
Expand All @@ -180,13 +184,18 @@ namespace correction {
} catch (...) {
std::cerr << "bTagShapeCorrProvider::getBTagShapeSF : Unknown error occurred when evaluating "
"correction\n";
std::cerr << "\tunc_name=" << unc_name << "\n"
<< "\tjet_idx=" << jet_idx << "\n"
<< "\tJet_Flavour=" << Jet_Flavour[jet_idx] << "\n"
<< "\tabs(Jet_eta)=" << std::abs(Jet_p4[jet_idx].eta()) << "\n"
<< "\tJet_pt=" << Jet_p4[jet_idx].pt() << "\n"
<< "\tJetbtag_score=" << Jet_bTag_score[jet_idx] << "\n";
throw;
}
}
return sf_product;
}

private:
private:
std::unique_ptr<CorrectionSet> corrections_;
Correction::Ref shape_corr_;
Expand Down