From 82fb47c9a59c89036572b15d96ef99472b750810 Mon Sep 17 00:00:00 2001 From: RektPunk Date: Sat, 20 Dec 2025 15:58:21 +0900 Subject: [PATCH 1/2] use vectorize instead of merge --- xbooster/lgb_constructor.py | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/xbooster/lgb_constructor.py b/xbooster/lgb_constructor.py index e79d0b5..3d570fb 100644 --- a/xbooster/lgb_constructor.py +++ b/xbooster/lgb_constructor.py @@ -494,27 +494,23 @@ def _convert_tree_to_points(self, X: pd.DataFrame) -> pd.DataFrame: # pylint: d # Get leaf indices for all trees X_leaf_indices = self.get_leafs(X, output_type="leaf_index") - - result = pd.DataFrame() - for col in X_leaf_indices.columns: - tree_number = col.split("_")[1] + n_samples, n_trees = X_leaf_indices.shape + points_matrix = np.zeros((n_samples, n_trees)) + leaf_idx_values = X_leaf_indices.values + for t in range(n_trees): # Get points for this tree - subset_points_df = self.lgb_scorecard_with_points[ - self.lgb_scorecard_with_points["Tree"] == int(tree_number) - ].copy() - - # Merge leaf indices with points - merged_df = pd.merge( - X_leaf_indices[[col]].round(4), - subset_points_df[["Node", "Points"]], - left_on=col, - right_on="Node", - how="left", - ) - result[f"Score_{tree_number}"] = merged_df["Points"] + tree_points = self.lgb_scorecard_with_points[ + self.lgb_scorecard_with_points["Tree"] == t + ] + # Mapping dictionary instead of merge + mapping_dict = dict(zip(tree_points["Node"], tree_points["Points"])) + points_matrix[:, t] = np.vectorize(mapping_dict.get)(leaf_idx_values[:, t]) + result = pd.DataFrame( + points_matrix, index=X.index, columns=[f"Score_{i}" for i in range(n_trees)] + ) # Add total score - result = pd.concat([result, result.sum(axis=1).rename("Score")], axis=1) + result["Score"] = points_matrix.sum(axis=1) return result def predict_score(self, X: pd.DataFrame) -> pd.Series: # pylint: disable=C0103 From 71078fb16b359ae3f7730ade26757248077dfb6c Mon Sep 17 00:00:00 2001 From: RektPunk Date: Sun, 21 Dec 2025 22:57:55 +0900 Subject: [PATCH 2/2] use map instead of np vectorize --- xbooster/lgb_constructor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xbooster/lgb_constructor.py b/xbooster/lgb_constructor.py index 3d570fb..4027b1f 100644 --- a/xbooster/lgb_constructor.py +++ b/xbooster/lgb_constructor.py @@ -504,7 +504,7 @@ def _convert_tree_to_points(self, X: pd.DataFrame) -> pd.DataFrame: # pylint: d ] # Mapping dictionary instead of merge mapping_dict = dict(zip(tree_points["Node"], tree_points["Points"])) - points_matrix[:, t] = np.vectorize(mapping_dict.get)(leaf_idx_values[:, t]) + points_matrix[:, t] = pd.Series(leaf_idx_values[:, t]).map(mapping_dict).to_numpy() result = pd.DataFrame( points_matrix, index=X.index, columns=[f"Score_{i}" for i in range(n_trees)]