Skip to content

Commit

Permalink
Merge pull request #20 from CyberAgentAILab/fix/nn-case
Browse files Browse the repository at this point in the history
Fix test data leak and support NN
  • Loading branch information
TomeHirata authored Sep 5, 2024
2 parents cf31d38 + 48fdf66 commit 0c470ee
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 23 deletions.
51 changes: 29 additions & 22 deletions dte_adj/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -481,26 +481,27 @@ def _compute_cumulative_distribution(
cumulative_distribution = np.zeros(n_loc)
superset_prediction = np.zeros((n_records, n_loc))
treatment_mask = treatment_arms == target_treatment_arm
confounding_in_arm = confoundings[treatment_mask]
n_records_in_arm = len(confounding_in_arm)
if self.is_multi_task:
confounding_in_arm = confoundings[treatment_mask]
n_records_in_arm = len(confounding_in_arm)
outcome_in_arm = outcomes[treatment_mask] # (n_records)
subset_prediction = np.zeros(
(n_records_in_arm, n_loc)
) # (n_records_in_arm, n_loc)
binominal = (outcomes.reshape(-1, 1) <= locations) * 1 # (n_records, n_loc)
cdf = binominal[treatment_mask].mean(axis=0) # (n_loc)
for fold in range(self.folds):
superset_mask = np.arange(n_records) % self.folds == fold
subset_mask = superset_mask & treatment_mask
subset_mask_inner = superset_mask[treatment_mask]
confounding_train = confoundings[~subset_mask]
confounding_fit = confoundings[subset_mask]
binominal_train = binominal[~subset_mask]
subset_test_mask = superset_mask & treatment_mask
subset_train_mask = (~superset_mask) & treatment_mask
subset_test_mask_inner = superset_mask[treatment_mask]
confounding_train = confoundings[subset_train_mask]
binominal_train = binominal[subset_train_mask]
model = deepcopy(self.base_model)
model.fit(confounding_train, binominal_train)
subset_prediction[subset_mask_inner] = self._compute_model_prediction(
model, confounding_fit
subset_prediction[subset_test_mask_inner] = (
self._compute_model_prediction(
model, confoundings[subset_test_mask]
)
)
superset_prediction[superset_mask] = self._compute_model_prediction(
model, confoundings[superset_mask]
Expand All @@ -510,26 +511,26 @@ def _compute_cumulative_distribution(
) # (n_loc)
else:
for i, location in enumerate(locations):
confounding_in_arm = confoundings[treatment_mask]
outcome_in_arm = outcomes[treatment_mask]
subset_prediction = np.zeros(outcome_in_arm.shape[0])
subset_prediction = np.zeros(n_records_in_arm)
binominal = (outcomes <= location) * 1 # (n_records)
cdf = binominal[treatment_mask].mean()
for fold in range(self.folds):
superset_mask = np.arange(n_records) % self.folds == fold
subset_mask = superset_mask & treatment_mask
subset_mask_inner = superset_mask[treatment_mask]
confounding_train = confoundings[~subset_mask]
confounding_fit = confoundings[subset_mask]
binominal_train = binominal[~subset_mask]
subset_test_mask = superset_mask & treatment_mask
subset_train_mask = (~superset_mask) & treatment_mask
subset_test_mask_inner = superset_mask[treatment_mask]
confounding_train = confoundings[subset_train_mask]
binominal_train = binominal[subset_train_mask]
if len(np.unique(binominal_train)) == 1:
subset_prediction[subset_mask_inner] = binominal_train[0]
subset_prediction[subset_test_mask_inner] = binominal_train[0]
superset_prediction[superset_mask, i] = binominal_train[0]
continue
model = deepcopy(self.base_model)
model.fit(confounding_train, binominal_train)
subset_prediction[subset_mask_inner] = (
self._compute_model_prediction(model, confounding_fit)
subset_prediction[subset_test_mask_inner] = (
self._compute_model_prediction(
model, confoundings[subset_test_mask]
)
)
superset_prediction[superset_mask, i] = (
self._compute_model_prediction(
Expand All @@ -544,7 +545,13 @@ def _compute_cumulative_distribution(
def _compute_model_prediction(self, model, confoundings: np.ndarray) -> np.ndarray:
if hasattr(model, "predict_proba"):
if self.is_multi_task:
# suppose the shape of prediction is (n_records, n_locations)
return model.predict_proba(confoundings)
return model.predict_proba(confoundings)[:, 1]
probabilities = model.predict_proba(confoundings)
if probabilities.ndim == 1:
# when the shape of prediction is (n_records)
return probabilities
# when the shape of prediction is (n_records, 2)
return probabilities[:, 1]
else:
return model.predict(confoundings)
2 changes: 1 addition & 1 deletion tests/test_adjusted_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,5 +82,5 @@ def test_compute_cumulative_distribution(self):
self.assertAlmostEqual(cumulative_distribution[i], (i + 1) / 10, places=2)

for i in range(20):
for j in range(1, 10):
for j in range(1, 8):
self.assertAlmostEqual(superset_prediction[i, j], 0.5, places=2)

0 comments on commit 0c470ee

Please sign in to comment.