Skip to content

Commit

Permalink
Fix handling truth values of filtering columns in pandas
Browse files Browse the repository at this point in the history
  • Loading branch information
LinaHeinzke committed Feb 20, 2024
1 parent 1ee99f9 commit a4721c6
Show file tree
Hide file tree
Showing 4 changed files with 8 additions and 10 deletions.
4 changes: 2 additions & 2 deletions src/add_dti_annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def add_dti_annotations(
dataset.drug_mechanism_pairs_set
)
)
& (dataset.df_result["therapeutic_target"] == True)
& (dataset.df_result["therapeutic_target"])
),
"DTI",
] = "DT"
Expand All @@ -133,7 +133,7 @@ def add_dti_annotations(
dataset.drug_mechanism_pairs_set
)
)
& (dataset.df_result["therapeutic_target"] == False)
& ~(dataset.df_result["therapeutic_target"])
),
"DTI",
] = "NDT"
Expand Down
10 changes: 4 additions & 6 deletions src/add_filtering_columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,9 +159,9 @@ def add_subset_filtering_columns(
dataset.df_result[col_name] = False
dataset.df_result.loc[(dataset.df_result.index.isin(df.index)), col_name] = True
# check that filtering works
assert dataset.df_result[dataset.df_result[col_name] == True][
df.columns
].equals(df), f"Filtering is not accurate for {col_name}."
assert dataset.df_result[dataset.df_result[col_name]][df.columns].equals(
df
), f"Filtering is not accurate for {col_name}."

if logging.DEBUG >= logging.root.level:
for [df_subset, subset_desc] in subsets:
Expand Down Expand Up @@ -200,9 +200,7 @@ def add_filtering_columns(
# consider only binding assays
# assay description = binding
desc = "B"
df_combined_subset = dataset.df_result[
dataset.df_result["keep_for_binding"] == True
].copy()
df_combined_subset = dataset.df_result[dataset.df_result["keep_for_binding"]].copy()
add_subset_filtering_columns(
df_combined_subset,
dataset,
Expand Down
2 changes: 1 addition & 1 deletion src/get_drug_mechanism_ct_pairs.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ def add_drug_mechanism_ct_pairs(dataset: Dataset, chembl_con: sqlite3.Connection
dataset.df_result.loc[
(
(dataset.df_result["pchembl_value_mean_B"].notnull())
| (dataset.df_result["pair_mutation_in_dm_table"] == True)
| (dataset.df_result["pair_mutation_in_dm_table"])
),
"keep_for_binding",
] = True
2 changes: 1 addition & 1 deletion src/sanity_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def check_pairs_without_pchembl_are_in_drug_mechanisms(df_result: pd.DataFrame):
]:
assert df_result[(df_result[pchembl_col].isnull())].equals(
df_result[
(df_result["pair_mutation_in_dm_table"] == True)
(df_result["pair_mutation_in_dm_table"])
& (df_result[pchembl_col].isnull())
]
), f"Missing pchembl value in column {pchembl_col}"
Expand Down

0 comments on commit a4721c6

Please sign in to comment.