From 511b74957784b12bc13e206e73da87dfa5520c93 Mon Sep 17 00:00:00 2001 From: Artem Bolshov Date: Fri, 30 Jan 2026 17:48:07 +0100 Subject: [PATCH 01/13] add check to avoid crashes due to invalid jets when computing shape sf --- btagShape.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/btagShape.h b/btagShape.h index e9bfa5a..af22bf7 100644 --- a/btagShape.h +++ b/btagShape.h @@ -153,7 +153,7 @@ namespace correction { double sf_product = 1.; std::string source_str = getUncName().at(source); for (size_t jet_idx = 0; jet_idx < Jet_p4.size(); jet_idx++) { - if (Jet_bTag_score[jet_idx] < 0.0) + if ((Jet_bTag_score[jet_idx] > 1.0 || Jet_bTag_score[jet_idx] < 0.0) || std::abs(Jet_p4[jet_idx].eta()) >= 2.5 || Jet_p4[jet_idx].pt() < 20.0) continue; const UncScale jet_tag_scale = sourceApplies(source, Jet_Flavour[jet_idx]) ? scale : UncScale::Central; const std::string& scale_str = getScaleStr(jet_tag_scale); @@ -186,7 +186,6 @@ namespace correction { return sf_product; } - private: private: std::unique_ptr corrections_; Correction::Ref shape_corr_; From 1e290fa90d23471393b08aa0a34e3de46e403c27 Mon Sep 17 00:00:00 2001 From: Artem Bolshov Date: Mon, 9 Feb 2026 23:31:36 +0100 Subject: [PATCH 02/13] modify corrections --- Corrections.py | 34 +++++++++++++++++---- btag.py | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 110 insertions(+), 5 deletions(-) diff --git a/Corrections.py b/Corrections.py index 3bfa724..4730b43 100644 --- a/Corrections.py +++ b/Corrections.py @@ -59,7 +59,7 @@ def getGlobal(): def __init__( self, *, - global_params, + setup, stage, dataset_name, dataset_cfg, @@ -69,7 +69,7 @@ def __init__( isData, trigger_class, ): - self.global_params = global_params + self.global_params = setup.global_params self.dataset_name = dataset_name self.dataset_cfg = dataset_cfg self.process_name = process_name @@ -78,15 +78,16 @@ def __init__( self.isData = isData self.trigger_dict = trigger_class.trigger_dict if trigger_class else {} - self.period = global_params["era"] + self.period = self.global_params["era"] self.stage = stage - + self.law_run_version = setup.law_run_version + self.to_apply = {} correction_origins = {} for cfg_name, cfg in [ ("dataset", dataset_cfg), ("process", process_cfg), - ("global", global_params), + ("global", self.global_params), ]: if not cfg: continue @@ -128,6 +129,7 @@ def __init__( self.fatjet_ = None self.Vpt_ = None self.JetVetoMap_ = None + self.btag_norm_ = None @property def xs_db(self): @@ -278,6 +280,25 @@ def trg(self): ) return self.trg_ + @property + def btag_norm(self): + if self.btag_norm_ is None: + if self.stage == "HistTuple": + from .btag import btagShapeWeightCorrector + params = self.to_apply["btag"] + pattern = params["normFilePattern"] + formatted_pattern = pattern.format( + dataset_name=self.dataset_name, + period=self.period, + version=self.law_run_version, + ) + norm_file_path = os.path.join(os.getcwd(), formatted_pattern) + print(f"Applying shape weight normalization from {norm_file_path}") + self.btag_norm_ = btagShapeWeightCorrector(norm_file_path=norm_file_path) + else: + return None + return self.btag_norm_ + def applyScaleUncertainties(self, df, ana_reco_objects): source_dict = {central: []} if "tauES" in self.to_apply and not self.isData: @@ -487,6 +508,9 @@ def getNormalisationCorrections( df, bTagSF_branches = self.btag.getBTagShapeSF( df, unc_source, unc_scale, isCentral, return_variations ) + if self.stage == "HistTuple": + assert self.btag_norm is not None, "btagShapeWeightCorrector must be initialzied at HistTuple stage" + df = self.btag_norm.UpdateBtagWeight(df=df, unc_src="Central", unc_scale="Central") else: df, bTagSF_branches = self.btag.getBTagWPSF( df, isCentral and return_variations, isCentral diff --git a/btag.py b/btag.py index cdab697..1df9118 100644 --- a/btag.py +++ b/btag.py @@ -3,6 +3,7 @@ from .CorrectionsCore import * from FLAF.Common.Utilities import WorkingPointsbTag import yaml +import json # https://twiki.cern.ch/twiki/bin/viewauth/CMS/BTagShapeCalibration # https://twiki.cern.ch/twiki/bin/view/CMS/BTagCalibration @@ -254,3 +255,83 @@ def getBTagShapeSF(self, df, src_name, scale_name, isCentral, return_variations) ) SF_branches.append(branch_name_final) return df, SF_branches + + +class btagShapeWeightCorrector: + cat_to_channelId = {"e": 1, "mu": 2, "eE": 11, "eMu": 12, "muMu": 22} + + def __init__( + self, + *, + norm_file_path + ): + with open(norm_file_path, "r") as norm_file: + self.shape_weight_corr_dict = json.load(norm_file) + + self.initialized = [] + for key in self.shape_weight_corr_dict.keys(): + if key not in self.initialized: + self._InitCppMap(key) + self.initialized.append(key) + + def _InitCppMap(self, unc_src_scale): + correction_factors = self.shape_weight_corr_dict[unc_src_scale] + + ROOT.gInterpreter.Declare("#include ") + + # init c++ map + cpp_map_entries = [] + for cat, multipl_dict in correction_factors.items(): + channelId = btagShapeWeightCorrector.cat_to_channelId[cat] + for key, ratio in multipl_dict.items(): + # key has structure f"ratio_ncetnralJet_{number}"" + num_jet = int(key.split("_")[-1]) + cpp_map_entries.append(f"{{{{{channelId}, {num_jet}}}, {ratio}}}") + cpp_init = ", ".join(cpp_map_entries) + + ROOT.gInterpreter.Declare( + f""" + static const std::map, float> ratios_{unc_src_scale} = {{ + {cpp_init} + }}; + + float integral_correction_ratio_{unc_src_scale}(int ncentralJet, int channelId) {{ + std::pair key{{channelId, ncentralJet}}; + try + {{ + float ratio = ratios_{unc_src_scale}.at(key); + return ratio; + }} + catch (...) + {{ + return 1.0f; + }} + }}""" + ) + + def UpdateBtagWeight( + self, + *, + df, + unc_src, + unc_scale, + ): + + if unc_src != unc_scale: + unc_src_scale = f"{unc_src}_{unc_scale}" + else: + unc_src_scale = unc_src + + if unc_src_scale not in self.shape_weight_corr_dict.keys(): + raise RuntimeError( + f"`BtagShapeWeightCorrection.json` does not contain key `{unc_src_scale}`." + ) + + df = df.Redefine( + "weight_bTagShape_Central", + f"""if (ncentralJet >= 2 && ncentralJet <= 8) + return integral_correction_ratio_{unc_src_scale}(ncentralJet, channelId)*weight_bTagShape_Central; + return weight_bTagShape_Central;""", + ) + + return df \ No newline at end of file From 00ab14ae61fe9859da411fcf8a71c0a7d653cc4f Mon Sep 17 00:00:00 2001 From: Artem Bolshov Date: Tue, 10 Feb 2026 21:06:13 +0100 Subject: [PATCH 03/13] change loading norm corrections --- Corrections.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/Corrections.py b/Corrections.py index 4730b43..1e6a860 100644 --- a/Corrections.py +++ b/Corrections.py @@ -1,4 +1,5 @@ import os +import re import itertools from .CorrectionsCore import * @@ -283,7 +284,17 @@ def trg(self): @property def btag_norm(self): if self.btag_norm_ is None: - if self.stage == "HistTuple": + if self.stage == "HistTuple" and not self.isData: + def transform_path(path): + pattern = r'^/store/user/([^/]+)/' + + def replace(match): + username = match.group(1) + first_letter = username[0].lower() + return f'/eos/user/{first_letter}/{username}/' + + return re.sub(pattern, replace, path) + from .btag import btagShapeWeightCorrector params = self.to_apply["btag"] pattern = params["normFilePattern"] @@ -292,7 +303,9 @@ def btag_norm(self): period=self.period, version=self.law_run_version, ) - norm_file_path = os.path.join(os.getcwd(), formatted_pattern) + fs_anaTuple = self.global_params["fs_anaTuple"][0] + fs_anaTuple = fs_anaTuple.split(":")[1] + norm_file_path = os.path.join(transform_path(fs_anaTuple), formatted_pattern) print(f"Applying shape weight normalization from {norm_file_path}") self.btag_norm_ = btagShapeWeightCorrector(norm_file_path=norm_file_path) else: From 94eafe8de5669903f4b04ee04a06e65dbfe6c7a6 Mon Sep 17 00:00:00 2001 From: Artem Bolshov Date: Wed, 11 Feb 2026 18:30:55 +0100 Subject: [PATCH 04/13] use ANALYSIS_PATH --- Corrections.py | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/Corrections.py b/Corrections.py index 1e6a860..3f0789d 100644 --- a/Corrections.py +++ b/Corrections.py @@ -285,16 +285,6 @@ def trg(self): def btag_norm(self): if self.btag_norm_ is None: if self.stage == "HistTuple" and not self.isData: - def transform_path(path): - pattern = r'^/store/user/([^/]+)/' - - def replace(match): - username = match.group(1) - first_letter = username[0].lower() - return f'/eos/user/{first_letter}/{username}/' - - return re.sub(pattern, replace, path) - from .btag import btagShapeWeightCorrector params = self.to_apply["btag"] pattern = params["normFilePattern"] @@ -303,9 +293,7 @@ def replace(match): period=self.period, version=self.law_run_version, ) - fs_anaTuple = self.global_params["fs_anaTuple"][0] - fs_anaTuple = fs_anaTuple.split(":")[1] - norm_file_path = os.path.join(transform_path(fs_anaTuple), formatted_pattern) + norm_file_path = os.path.join(os.environ["ANALYSIS_PATH"], formatted_pattern) print(f"Applying shape weight normalization from {norm_file_path}") self.btag_norm_ = btagShapeWeightCorrector(norm_file_path=norm_file_path) else: From 66f6ff5bab12bb820db78f3588560ab3f74dc33b Mon Sep 17 00:00:00 2001 From: Artem Bolshov Date: Fri, 13 Feb 2026 18:14:56 +0100 Subject: [PATCH 05/13] add check for missing btag config --- Corrections.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/Corrections.py b/Corrections.py index 3f0789d..259d33b 100644 --- a/Corrections.py +++ b/Corrections.py @@ -110,6 +110,28 @@ def __init__( f"Warning: correction {corr_name} is already defined in {correction_origins[corr_name]}. Skipping definition from {cfg_name}", file=sys.stderr, ) + + corr_cfg = self.global_params["corrections"] + btag_required = "btag" in corr_cfg + btag_skipped = "btag" not in self.to_apply + if btag_skipped and btag_required: + print(f"Skipped btag") + btag_cfg = corr_cfg["btag"] + btag_stages = btag_cfg.get("stages", []) + if not btag_stages: + btag_stages.append(btag_cfg["stage"]) + required_at_histTuple = "HistTuple" in btag_stages + required_at_analysisCache = "AnalysisCache" in btag_stages + shape_mode = "shape" in btag_cfg["modes"].values() + # btag shape normalization doesn't need to be applied to working points + if not shape_mode: + print(f"Btag mode not shape, appending btag config") + self.to_apply["btag"] = btag_cfg + else: + if not required_at_histTuple and not required_at_analysisCache: + print(f"Btag shape not required at HistTuple or AnalysisCache stages, appending btag config") + self.to_apply["btag"] = btag_cfg + if len(self.to_apply) > 0: print( f'Corrections to apply: {", ".join(self.to_apply.keys())}', From 9127aa1c05ec527373df59b558abe1b693492c28 Mon Sep 17 00:00:00 2001 From: Artem Bolshov Date: Fri, 13 Feb 2026 18:43:59 +0100 Subject: [PATCH 06/13] fix key error when accessing btag_sf_mode --- Corrections.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Corrections.py b/Corrections.py index 259d33b..18f7b5e 100644 --- a/Corrections.py +++ b/Corrections.py @@ -525,7 +525,7 @@ def getNormalisationCorrections( ) all_weights.extend(tau_SF_branches) if "btag" in self.to_apply: - btag_sf_mode = self.to_apply["btag"]["modes"][self.stage] + btag_sf_mode = self.to_apply["btag"]["modes"].get(self.stage, "none") if btag_sf_mode in ["shape", "wp"]: if btag_sf_mode == "shape": df, bTagSF_branches = self.btag.getBTagShapeSF( From 111e1ed68c4a30f4829e0f45032a9b6566e1b062 Mon Sep 17 00:00:00 2001 From: Artem Bolshov Date: Wed, 18 Feb 2026 18:44:29 +0100 Subject: [PATCH 07/13] apply modifications --- Corrections.py | 5 ++- btag.py | 94 ++++++++++++++++++-------------------------------- 2 files changed, 37 insertions(+), 62 deletions(-) diff --git a/Corrections.py b/Corrections.py index 18f7b5e..d1114cc 100644 --- a/Corrections.py +++ b/Corrections.py @@ -315,9 +315,12 @@ def btag_norm(self): period=self.period, version=self.law_run_version, ) + producers = self.global_params["payload_producers"] + btag_shape_producer_cfg = producers["BtagShape"] + bins = btag_shape_producer_cfg["bins"] norm_file_path = os.path.join(os.environ["ANALYSIS_PATH"], formatted_pattern) print(f"Applying shape weight normalization from {norm_file_path}") - self.btag_norm_ = btagShapeWeightCorrector(norm_file_path=norm_file_path) + self.btag_norm_ = btagShapeWeightCorrector(norm_file_path=norm_file_path, bins=bins) else: return None return self.btag_norm_ diff --git a/btag.py b/btag.py index 1df9118..8b9c6a1 100644 --- a/btag.py +++ b/btag.py @@ -258,80 +258,52 @@ def getBTagShapeSF(self, df, src_name, scale_name, isCentral, return_variations) class btagShapeWeightCorrector: - cat_to_channelId = {"e": 1, "mu": 2, "eE": 11, "eMu": 12, "muMu": 22} - - def __init__( - self, - *, - norm_file_path - ): + def __init__(self, *, norm_file_path, bins): + self.norm_file_path = norm_file_path with open(norm_file_path, "r") as norm_file: self.shape_weight_corr_dict = json.load(norm_file) - - self.initialized = [] - for key in self.shape_weight_corr_dict.keys(): - if key not in self.initialized: - self._InitCppMap(key) - self.initialized.append(key) + self.bins = bins + ROOT.gInterpreter.Declare("#include ") + self._declared = set() def _InitCppMap(self, unc_src_scale): correction_factors = self.shape_weight_corr_dict[unc_src_scale] + self._map_name = f"_btag_corr_map_{unc_src_scale}" - ROOT.gInterpreter.Declare("#include ") + if self._map_name not in self._declared: + ROOT.gInterpreter.Declare(f""" + static const std::map {self._map_name}; + """) + self._declared.add(self._map_name) - # init c++ map - cpp_map_entries = [] - for cat, multipl_dict in correction_factors.items(): - channelId = btagShapeWeightCorrector.cat_to_channelId[cat] - for key, ratio in multipl_dict.items(): - # key has structure f"ratio_ncetnralJet_{number}"" - num_jet = int(key.split("_")[-1]) - cpp_map_entries.append(f"{{{{{channelId}, {num_jet}}}, {ratio}}}") - cpp_init = ", ".join(cpp_map_entries) + m = getattr(ROOT, self._map_name) + m.clear() + m["__default__"] = 1.0 + for k, v in correction_factors.items(): + m[k] = float(v) - ROOT.gInterpreter.Declare( - f""" - static const std::map, float> ratios_{unc_src_scale} = {{ - {cpp_init} - }}; + self._m = m - float integral_correction_ratio_{unc_src_scale}(int ncentralJet, int channelId) {{ - std::pair key{{channelId, ncentralJet}}; - try - {{ - float ratio = ratios_{unc_src_scale}.at(key); - return ratio; - }} - catch (...) - {{ - return 1.0f; - }} - }}""" - ) - - def UpdateBtagWeight( - self, - *, - df, - unc_src, - unc_scale, - ): - - if unc_src != unc_scale: - unc_src_scale = f"{unc_src}_{unc_scale}" - else: - unc_src_scale = unc_src + def UpdateBtagWeight(self, *, df, unc_src, unc_scale): + unc_src_scale = f"{unc_src}_{unc_scale}" if unc_src != unc_scale else unc_src - if unc_src_scale not in self.shape_weight_corr_dict.keys(): - raise RuntimeError( - f"`BtagShapeWeightCorrection.json` does not contain key `{unc_src_scale}`." + if unc_src_scale not in self.shape_weight_corr_dict: + raise KeyError( + f"Key `{unc_src_scale}` not found in `{self.norm_file_path}`." ) + pieces = [] + for name, cut in self.bins.items(): + pieces.append(f"({cut}) ? std::string("{name}") : ") + binname_expr = "".join(pieces) + "std::string("__default__")" + + df = df.Redefine("btag_bin", binname_expr) if "btag_bin" in df.GetColumnNames() \ + else df.Define("btag_bin", binname_expr) + + self._InitCppMap(unc_src_scale) + df = df.Redefine( "weight_bTagShape_Central", - f"""if (ncentralJet >= 2 && ncentralJet <= 8) - return integral_correction_ratio_{unc_src_scale}(ncentralJet, channelId)*weight_bTagShape_Central; - return weight_bTagShape_Central;""", + f"weight_bTagShape_Central * {self._map_name}.at(btag_bin)" ) - return df \ No newline at end of file From 8f2d4e2a014ea92d7802e4bb2c4b5c096b0ed2a5 Mon Sep 17 00:00:00 2001 From: Artem Bolshov Date: Wed, 18 Feb 2026 19:31:07 +0100 Subject: [PATCH 08/13] fix syntax errors --- btag.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/btag.py b/btag.py index 8b9c6a1..d1079dc 100644 --- a/btag.py +++ b/btag.py @@ -294,8 +294,8 @@ def UpdateBtagWeight(self, *, df, unc_src, unc_scale): pieces = [] for name, cut in self.bins.items(): - pieces.append(f"({cut}) ? std::string("{name}") : ") - binname_expr = "".join(pieces) + "std::string("__default__")" + pieces.append(f'({cut}) ? std::string("{name}") : ') + binname_expr = "".join(pieces) + 'std::string("__default__")' df = df.Redefine("btag_bin", binname_expr) if "btag_bin" in df.GetColumnNames() \ else df.Define("btag_bin", binname_expr) From 8c24e76f6790e8484aa9c37968c5b5256c4a20f8 Mon Sep 17 00:00:00 2001 From: Artem Bolshov Date: Thu, 19 Feb 2026 23:41:24 +0100 Subject: [PATCH 09/13] formatting --- Corrections.py | 25 ++++++++++++++++++------- btag.py | 11 +++++++---- btagShape.h | 17 ++++++++++------- 3 files changed, 35 insertions(+), 18 deletions(-) diff --git a/Corrections.py b/Corrections.py index b44c91b..43695e2 100644 --- a/Corrections.py +++ b/Corrections.py @@ -87,7 +87,7 @@ def __init__( self.period = self.global_params["era"] self.stage = stage self.law_run_version = setup.law_run_version - + self.to_apply = {} correction_origins = {} for cfg_name, cfg in [ @@ -115,7 +115,7 @@ def __init__( f"Warning: correction {corr_name} is already defined in {correction_origins[corr_name]}. Skipping definition from {cfg_name}", file=sys.stderr, ) - + corr_cfg = self.global_params["corrections"] btag_required = "btag" in corr_cfg btag_skipped = "btag" not in self.to_apply @@ -134,7 +134,9 @@ def __init__( self.to_apply["btag"] = btag_cfg else: if not required_at_histTuple and not required_at_analysisCache: - print(f"Btag shape not required at HistTuple or AnalysisCache stages, appending btag config") + print( + f"Btag shape not required at HistTuple or AnalysisCache stages, appending btag config" + ) self.to_apply["btag"] = btag_cfg if len(self.to_apply) > 0: @@ -313,6 +315,7 @@ def btag_norm(self): if self.btag_norm_ is None: if self.stage == "HistTuple" and not self.isData: from .btag import btagShapeWeightCorrector + params = self.to_apply["btag"] pattern = params["normFilePattern"] formatted_pattern = pattern.format( @@ -323,9 +326,13 @@ def btag_norm(self): producers = self.global_params["payload_producers"] btag_shape_producer_cfg = producers["BtagShape"] bins = btag_shape_producer_cfg["bins"] - norm_file_path = os.path.join(os.environ["ANALYSIS_PATH"], formatted_pattern) + norm_file_path = os.path.join( + os.environ["ANALYSIS_PATH"], formatted_pattern + ) print(f"Applying shape weight normalization from {norm_file_path}") - self.btag_norm_ = btagShapeWeightCorrector(norm_file_path=norm_file_path, bins=bins) + self.btag_norm_ = btagShapeWeightCorrector( + norm_file_path=norm_file_path, bins=bins + ) else: return None return self.btag_norm_ @@ -540,8 +547,12 @@ def getNormalisationCorrections( df, unc_source, unc_scale, isCentral, return_variations ) if self.stage == "HistTuple": - assert self.btag_norm is not None, "btagShapeWeightCorrector must be initialzied at HistTuple stage" - df = self.btag_norm.UpdateBtagWeight(df=df, unc_src="Central", unc_scale="Central") + assert ( + self.btag_norm is not None + ), "btagShapeWeightCorrector must be initialzied at HistTuple stage" + df = self.btag_norm.UpdateBtagWeight( + df=df, unc_src="Central", unc_scale="Central" + ) else: df, bTagSF_branches = self.btag.getBTagWPSF( df, isCentral and return_variations, isCentral diff --git a/btag.py b/btag.py index d1079dc..033294d 100644 --- a/btag.py +++ b/btag.py @@ -297,13 +297,16 @@ def UpdateBtagWeight(self, *, df, unc_src, unc_scale): pieces.append(f'({cut}) ? std::string("{name}") : ') binname_expr = "".join(pieces) + 'std::string("__default__")' - df = df.Redefine("btag_bin", binname_expr) if "btag_bin" in df.GetColumnNames() \ - else df.Define("btag_bin", binname_expr) + df = ( + df.Redefine("btag_bin", binname_expr) + if "btag_bin" in df.GetColumnNames() + else df.Define("btag_bin", binname_expr) + ) self._InitCppMap(unc_src_scale) df = df.Redefine( "weight_bTagShape_Central", - f"weight_bTagShape_Central * {self._map_name}.at(btag_bin)" + f"weight_bTagShape_Central * {self._map_name}.at(btag_bin)", ) - return df \ No newline at end of file + return df diff --git a/btagShape.h b/btagShape.h index af22bf7..03d61d9 100644 --- a/btagShape.h +++ b/btagShape.h @@ -136,12 +136,14 @@ namespace correction { return false; } - bTagShapeCorrProvider(const std::string& fileName, const std::string& year, std::string const& tagger_name, const bool wantShape=true) - : corrections_(CorrectionSet::from_file(fileName)), - _year(year) { - if (wantShape){ - shape_corr_ = corrections_->at(tagger_name + "_shape"); - } + bTagShapeCorrProvider(const std::string& fileName, + const std::string& year, + std::string const& tagger_name, + const bool wantShape = true) + : corrections_(CorrectionSet::from_file(fileName)), _year(year) { + if (wantShape) { + shape_corr_ = corrections_->at(tagger_name + "_shape"); + } std::cerr << "Initialized bTagShapeCorrProvider::bTagShapeCorrProvider()" << std::endl; } @@ -153,7 +155,8 @@ namespace correction { double sf_product = 1.; std::string source_str = getUncName().at(source); for (size_t jet_idx = 0; jet_idx < Jet_p4.size(); jet_idx++) { - if ((Jet_bTag_score[jet_idx] > 1.0 || Jet_bTag_score[jet_idx] < 0.0) || std::abs(Jet_p4[jet_idx].eta()) >= 2.5 || Jet_p4[jet_idx].pt() < 20.0) + if ((Jet_bTag_score[jet_idx] > 1.0 || Jet_bTag_score[jet_idx] < 0.0) || + std::abs(Jet_p4[jet_idx].eta()) >= 2.5 || Jet_p4[jet_idx].pt() < 20.0) continue; const UncScale jet_tag_scale = sourceApplies(source, Jet_Flavour[jet_idx]) ? scale : UncScale::Central; const std::string& scale_str = getScaleStr(jet_tag_scale); From 862267bcdc3ea6054187d166bef4be1d7fb9fbe4 Mon Sep 17 00:00:00 2001 From: Artem Bolshov Date: Fri, 20 Feb 2026 15:40:14 +0100 Subject: [PATCH 10/13] undo btag checks in constructor --- Corrections.py | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/Corrections.py b/Corrections.py index 43695e2..8c61cc5 100644 --- a/Corrections.py +++ b/Corrections.py @@ -116,29 +116,6 @@ def __init__( file=sys.stderr, ) - corr_cfg = self.global_params["corrections"] - btag_required = "btag" in corr_cfg - btag_skipped = "btag" not in self.to_apply - if btag_skipped and btag_required: - print(f"Skipped btag") - btag_cfg = corr_cfg["btag"] - btag_stages = btag_cfg.get("stages", []) - if not btag_stages: - btag_stages.append(btag_cfg["stage"]) - required_at_histTuple = "HistTuple" in btag_stages - required_at_analysisCache = "AnalysisCache" in btag_stages - shape_mode = "shape" in btag_cfg["modes"].values() - # btag shape normalization doesn't need to be applied to working points - if not shape_mode: - print(f"Btag mode not shape, appending btag config") - self.to_apply["btag"] = btag_cfg - else: - if not required_at_histTuple and not required_at_analysisCache: - print( - f"Btag shape not required at HistTuple or AnalysisCache stages, appending btag config" - ) - self.to_apply["btag"] = btag_cfg - if len(self.to_apply) > 0: print( f'Corrections to apply: {", ".join(self.to_apply.keys())}', From 0a90d030841feecc1579dc306da8380add20a062 Mon Sep 17 00:00:00 2001 From: Artem Bolshov Date: Mon, 23 Feb 2026 22:40:40 +0100 Subject: [PATCH 11/13] apply comments --- Corrections.py | 38 +++++++++++++++++++++++--------------- btag.py | 32 +++++++++++++++++++++++++------- 2 files changed, 48 insertions(+), 22 deletions(-) diff --git a/Corrections.py b/Corrections.py index 8c61cc5..b02c633 100644 --- a/Corrections.py +++ b/Corrections.py @@ -136,7 +136,7 @@ def __init__( self.fatjet_ = None self.Vpt_ = None self.JetVetoMap_ = None - self.btag_norm_ = None + self.btag_shape_norm_ = None @property def xs_db(self): @@ -289,8 +289,8 @@ def trg(self): @property def btag_norm(self): - if self.btag_norm_ is None: - if self.stage == "HistTuple" and not self.isData: + if self.btag_shape_norm_ is None: + if not self.isData: from .btag import btagShapeWeightCorrector params = self.to_apply["btag"] @@ -307,12 +307,12 @@ def btag_norm(self): os.environ["ANALYSIS_PATH"], formatted_pattern ) print(f"Applying shape weight normalization from {norm_file_path}") - self.btag_norm_ = btagShapeWeightCorrector( + self.btag_shape_norm_ = btagShapeWeightCorrector( norm_file_path=norm_file_path, bins=bins ) else: - return None - return self.btag_norm_ + raise RuntimeError("btag_shape_norm not applicable to data.") + return self.btag_shape_norm_ def applyScaleUncertainties(self, df, ana_reco_objects): source_dict = {central: []} @@ -518,18 +518,26 @@ def getNormalisationCorrections( all_weights.extend(tau_SF_branches) if "btag" in self.to_apply: btag_sf_mode = self.to_apply["btag"]["modes"].get(self.stage, "none") - if btag_sf_mode in ["shape", "wp"]: + if btag_sf_mode in ["shape", "shape_and_norm", "wp"]: if btag_sf_mode == "shape": df, bTagSF_branches = self.btag.getBTagShapeSF( df, unc_source, unc_scale, isCentral, return_variations ) - if self.stage == "HistTuple": - assert ( - self.btag_norm is not None - ), "btagShapeWeightCorrector must be initialzied at HistTuple stage" - df = self.btag_norm.UpdateBtagWeight( - df=df, unc_src="Central", unc_scale="Central" - ) + elif btag_sf_mode == "shape_and_norm": + assert ( + self.btag_norm is not None + ), "btagShapeWeightCorrector must be initialzied at in `shape_and_norm` mode." + + df, bTagSF_branches = self.btag.getBTagShapeSF( + df, unc_source, unc_scale, isCentral, return_variations + ) + + df = self.btag_norm.UpdateBtagWeight( + df=df, + unc_src=unc_source, + unc_scale=unc_scale, + sf_branches=bTagSF_branches, + ) else: df, bTagSF_branches = self.btag.getBTagWPSF( df, isCentral and return_variations, isCentral @@ -537,7 +545,7 @@ def getNormalisationCorrections( all_weights.extend(bTagSF_branches) elif btag_sf_mode != "none": raise RuntimeError( - f"btag mode {btag_sf_mode} not recognized. Supported modes are 'shape', 'wp' and 'none'." + f"btag mode {btag_sf_mode} not recognized. Supported modes are 'shape', 'shape_and_norm', 'wp' and 'none'." ) if "mu" in self.to_apply: if self.mu.low_available: diff --git a/btag.py b/btag.py index 033294d..d529d05 100644 --- a/btag.py +++ b/btag.py @@ -284,7 +284,7 @@ def _InitCppMap(self, unc_src_scale): self._m = m - def UpdateBtagWeight(self, *, df, unc_src, unc_scale): + def UpdateBtagWeight(self, *, df, unc_src, unc_scale, sf_branches): unc_src_scale = f"{unc_src}_{unc_scale}" if unc_src != unc_scale else unc_src if unc_src_scale not in self.shape_weight_corr_dict: @@ -292,21 +292,39 @@ def UpdateBtagWeight(self, *, df, unc_src, unc_scale): f"Key `{unc_src_scale}` not found in `{self.norm_file_path}`." ) + # btag branches have format weight_bTagShape_{syst}_rel + # need to extract syst => need token #2 + systs = [b.split("_")[2] for b in sf_branches] pieces = [] - for name, cut in self.bins.items(): - pieces.append(f'({cut}) ? std::string("{name}") : ') + for bin_name, cut in self.bins.items(): + for syst in systs: + pieces.append(f'({cut}) ? std::string("norm_{syst}_{bin_name}") : ') binname_expr = "".join(pieces) + 'std::string("__default__")' df = ( - df.Redefine("btag_bin", binname_expr) - if "btag_bin" in df.GetColumnNames() - else df.Define("btag_bin", binname_expr) + df.Redefine("btag_shape_norm_corr_bin", binname_expr) + if "btag_shape_norm_corr_bin" in df.GetColumnNames() + else df.Define("btag_shape_norm_corr_bin", binname_expr) ) self._InitCppMap(unc_src_scale) + for syst in systs: + # only correct weights for uncertainty variations + # branches are defined as relative, i.e. branch/central + # to correct, need to first multiply by central + if syst == "Central": + continue + + branch_name = f"weight_bTagShape_{syst}_rel" + df = df.Redefine( + branch_name, + f"{branch_name} * weight_bTagShape_Central * {self._map_name}.at(btag_shape_norm_corr_bin)", + ) + + # correct central separately after everything else was corrected and central is not needed df = df.Redefine( "weight_bTagShape_Central", - f"weight_bTagShape_Central * {self._map_name}.at(btag_bin)", + f"weight_bTagShape_Central * {self._map_name}.at(btag_shape_norm_corr_bin)", ) return df From 9ba93740bc20605c84079b3347d18ca0a7af98ef Mon Sep 17 00:00:00 2001 From: Artem Bolshov Date: Tue, 24 Feb 2026 19:15:13 +0100 Subject: [PATCH 12/13] git rid of static map and add checks in getBTagShapeSF --- btag.py | 86 +++++++++++++++++++++++++++++------------------------ btagShape.h | 11 +++++-- 2 files changed, 56 insertions(+), 41 deletions(-) diff --git a/btag.py b/btag.py index d529d05..584d481 100644 --- a/btag.py +++ b/btag.py @@ -257,36 +257,46 @@ def getBTagShapeSF(self, df, src_name, scale_name, isCentral, return_variations) return df, SF_branches +ROOT.gInterpreter.Declare(r""" +#include +#include + +struct BTagMapApplier { + std::map corr; + + float operator()(float w, const std::string &key) const { + auto it = corr.find(key); + const float r = (it != corr.end()) ? it->second : 1.0; + return w * r; + } +}; +""") + + class btagShapeWeightCorrector: def __init__(self, *, norm_file_path, bins): self.norm_file_path = norm_file_path with open(norm_file_path, "r") as norm_file: self.shape_weight_corr_dict = json.load(norm_file) self.bins = bins - ROOT.gInterpreter.Declare("#include ") - self._declared = set() + self._appliers = [] - def _InitCppMap(self, unc_src_scale): - correction_factors = self.shape_weight_corr_dict[unc_src_scale] - self._map_name = f"_btag_corr_map_{unc_src_scale}" - - if self._map_name not in self._declared: - ROOT.gInterpreter.Declare(f""" - static const std::map {self._map_name}; - """) - self._declared.add(self._map_name) - - m = getattr(ROOT, self._map_name) - m.clear() - m["__default__"] = 1.0 - for k, v in correction_factors.items(): - m[k] = float(v) - - self._m = m + def _define_key_column(self, df, keycol, syst): + # key = norm__ + pieces = [] + for bin_name, cut in self.bins.items(): + pieces.append(f'({cut}) ? std::string("norm_{syst}_{bin_name}") : ') + key_expr = "".join(pieces) + 'std::string("__default__")' + + cols = set(df.GetColumnNames()) + return ( + df.Redefine(keycol, key_expr) + if keycol in cols + else df.Define(keycol, key_expr) + ) def UpdateBtagWeight(self, *, df, unc_src, unc_scale, sf_branches): unc_src_scale = f"{unc_src}_{unc_scale}" if unc_src != unc_scale else unc_src - if unc_src_scale not in self.shape_weight_corr_dict: raise KeyError( f"Key `{unc_src_scale}` not found in `{self.norm_file_path}`." @@ -295,36 +305,34 @@ def UpdateBtagWeight(self, *, df, unc_src, unc_scale, sf_branches): # btag branches have format weight_bTagShape_{syst}_rel # need to extract syst => need token #2 systs = [b.split("_")[2] for b in sf_branches] - pieces = [] - for bin_name, cut in self.bins.items(): - for syst in systs: - pieces.append(f'({cut}) ? std::string("norm_{syst}_{bin_name}") : ') - binname_expr = "".join(pieces) + 'std::string("__default__")' - - df = ( - df.Redefine("btag_shape_norm_corr_bin", binname_expr) - if "btag_shape_norm_corr_bin" in df.GetColumnNames() - else df.Define("btag_shape_norm_corr_bin", binname_expr) - ) - self._InitCppMap(unc_src_scale) + applier = ROOT.BTagMapApplier() + applier.corr["__default__"] = 1.0 + for k, v in self.shape_weight_corr_dict[unc_src_scale].items(): + applier.corr[k] = float(v) + self._appliers.append(applier) + # only correct weights for uncertainty variations + # branches are defined as relative, i.e. branch/central + # to correct, need to first multiply by central for syst in systs: - # only correct weights for uncertainty variations - # branches are defined as relative, i.e. branch/central - # to correct, need to first multiply by central if syst == "Central": continue + keycol = f"btag_shape_norm_key_{syst}" + df = self._define_key_column(df, keycol, syst) branch_name = f"weight_bTagShape_{syst}_rel" + # rel := rel * central * corr(norm__) df = df.Redefine( - branch_name, - f"{branch_name} * weight_bTagShape_Central * {self._map_name}.at(btag_shape_norm_corr_bin)", - ) + branch_name, f"(float){branch_name} * weight_bTagShape_Central" + ).Redefine(branch_name, applier, [branch_name, keycol]) # correct central separately after everything else was corrected and central is not needed + df = self._define_key_column(df, "btag_shape_norm_key_Central", "Central") df = df.Redefine( "weight_bTagShape_Central", - f"weight_bTagShape_Central * {self._map_name}.at(btag_shape_norm_corr_bin)", + applier, + ["weight_bTagShape_Central", "btag_shape_norm_key_Central"], ) + return df diff --git a/btagShape.h b/btagShape.h index 03d61d9..1fb651a 100644 --- a/btagShape.h +++ b/btagShape.h @@ -154,9 +154,10 @@ namespace correction { UncScale scale) const { double sf_product = 1.; std::string source_str = getUncName().at(source); - for (size_t jet_idx = 0; jet_idx < Jet_p4.size(); jet_idx++) { + for (size_t jet_idx = 0; jet_idx < Jet_p4.size(); ++jet_idx) { if ((Jet_bTag_score[jet_idx] > 1.0 || Jet_bTag_score[jet_idx] < 0.0) || - std::abs(Jet_p4[jet_idx].eta()) >= 2.5 || Jet_p4[jet_idx].pt() < 20.0) + std::abs(Jet_p4[jet_idx].eta()) >= 2.5 || Jet_p4[jet_idx].pt() < 20.0 || + (Jet_Flavour[jet_idx] != 0 || Jet_Flavour[jet_idx] != 4 || Jet_Flavour[jet_idx] != 5)) continue; const UncScale jet_tag_scale = sourceApplies(source, Jet_Flavour[jet_idx]) ? scale : UncScale::Central; const std::string& scale_str = getScaleStr(jet_tag_scale); @@ -183,6 +184,12 @@ namespace correction { } catch (...) { std::cerr << "bTagShapeCorrProvider::getBTagShapeSF : Unknown error occurred when evaluating " "correction\n"; + std::cerr << "\tunc_name=" << unc_name << "\n" + << "\tjet_idx=" << jet_idx << "\n" + << "\tJet_Flavour=" << Jet_Flavour[jet_idx] << "\n" + << "\tabs(Jet_eta)=" << std::abs(Jet_p4[jet_idx].eta()) << "\n" + << "\tJet_pt=" << Jet_p4[jet_idx].pt() << "\n" + << "\tJetbtag_score=" << Jet_bTag_score[jet_idx] << "\n"; throw; } } From 97a46054aa21f62ea33438c175bac8e8b8668ba4 Mon Sep 17 00:00:00 2001 From: Artem Bolshov Date: Wed, 25 Feb 2026 15:55:20 +0100 Subject: [PATCH 13/13] correctly redefine _rel branches --- btag.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/btag.py b/btag.py index 584d481..16d7ca2 100644 --- a/btag.py +++ b/btag.py @@ -315,6 +315,8 @@ def UpdateBtagWeight(self, *, df, unc_src, unc_scale, sf_branches): # only correct weights for uncertainty variations # branches are defined as relative, i.e. branch/central # to correct, need to first multiply by central + # after this loop _rel branches will have the full value + # => will have to divide them by central after it is corrected for syst in systs: if syst == "Central": continue @@ -335,4 +337,13 @@ def UpdateBtagWeight(self, *, df, unc_src, unc_scale, sf_branches): ["weight_bTagShape_Central", "btag_shape_norm_key_Central"], ) + # redefine all _rel branches by dividing them by updated Central to make them contain relative value + for syst in systs: + if syst == "Central": + continue + branch_name = f"weight_bTagShape_{syst}_rel" + df = df.Redefine( + branch_name, f"(float){branch_name} / weight_bTagShape_Central" + ) + return df