Skip to content

Commit

Permalink
ran black
Browse files Browse the repository at this point in the history
  • Loading branch information
nikolaitennant committed Nov 2, 2024
1 parent 3485f77 commit 3dc3687
Show file tree
Hide file tree
Showing 87 changed files with 240 additions and 163 deletions.
13 changes: 6 additions & 7 deletions Analysis.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -137,9 +137,7 @@
"source": [
"# Convert the data to a DataFrame for easier manipulation\n",
"adata.obs = adata.obs.reset_index(drop=False)\n",
"df_expression = pd.DataFrame(\n",
" adata.X.todense(), columns=adata.var_names\n",
")\n",
"df_expression = pd.DataFrame(adata.X.todense(), columns=adata.var_names)\n",
"df_expression[\"sex\"] = adata.obs[\"sex\"]\n",
"df_expression[\"age\"] = adata.obs[\"age\"]\n",
"\n",
Expand Down Expand Up @@ -200,7 +198,6 @@
}
],
"source": [
"\n",
"# Plot gene expression over age by sex in a subplot grid\n",
"import math\n",
"\n",
Expand All @@ -212,7 +209,7 @@
"rows = math.ceil(num_genes / cols)\n",
"\n",
"# Create subplots\n",
"fig, axes = plt.subplots(rows, cols, figsize=(5*cols, 4*rows))\n",
"fig, axes = plt.subplots(rows, cols, figsize=(5 * cols, 4 * rows))\n",
"axes = axes.flatten()\n",
"\n",
"# Colors for sexes\n",
Expand Down Expand Up @@ -387,7 +384,7 @@
"rows = math.ceil(num_genes / cols)\n",
"\n",
"# Create subplots\n",
"fig, axes = plt.subplots(rows, cols, figsize=(4*cols, 4*rows))\n",
"fig, axes = plt.subplots(rows, cols, figsize=(4 * cols, 4 * rows))\n",
"axes = axes.flatten() # Flatten in case of single row\n",
"\n",
"for idx, gene in enumerate(valid_matched_genes):\n",
Expand Down Expand Up @@ -421,7 +418,9 @@
"\n",
"# Calculate total variance explained\n",
"total_variance_explained = sum(adata_vis.uns[\"pca\"][\"variance_ratio\"])\n",
"print(f\"Total variance explained by the first 50 PCs: {total_variance_explained*100:.2f}%\")\n",
"print(\n",
" f\"Total variance explained by the first 50 PCs: {total_variance_explained*100:.2f}%\"\n",
")\n",
"\n",
"# Compute the neighborhood graph and UMAP\n",
"sc.pp.neighbors(adata_vis, n_pcs=50)\n",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
{
"Test": {
"Accuracy": "68.33%",
"Precision": "70.42%",
"Recall": "64.02%",
"F1": "64.33%",
"AUC": "87.49%"
"Accuracy": "94.37%",
"Precision": "93.74%",
"Recall": "93.94%",
"F1": "93.81%",
"AUC": "99.45%"
},
"Baseline": {
"Accuracy": "33.33%",
"Precision": "8.33%",
"Accuracy": "33.67%",
"Precision": "8.42%",
"Recall": "25.00%",
"F1": "12.50%"
"F1": "12.60%"
}
}
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
{
"Test": {
"Accuracy": "71.08%",
"Precision": "69.68%",
"Recall": "67.44%",
"F1": "67.79%",
"AUC": "90.37%"
"Accuracy": "79.62%",
"Precision": "77.77%",
"Recall": "78.26%",
"F1": "77.67%",
"AUC": "95.17%"
},
"Baseline": {
"Accuracy": "34.50%",
"Precision": "8.62%",
"Accuracy": "33.67%",
"Precision": "8.42%",
"Recall": "25.00%",
"F1": "12.83%"
"F1": "12.60%"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"Test": {
"Accuracy": "81.55%",
"Precision": "79.06%",
"Recall": "79.09%",
"F1": "78.97%",
"AUC": "95.50%"
},
"Baseline": {
"Accuracy": "33.69%",
"Precision": "8.42%",
"Recall": "25.00%",
"F1": "12.60%"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"Test": {
"Accuracy": "86.26%",
"Precision": "83.79%",
"Recall": "83.10%",
"F1": "83.41%",
"AUC": "97.07%"
},
"Baseline": {
"Accuracy": "34.78%",
"Precision": "8.69%",
"Recall": "25.00%",
"F1": "12.90%"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"Test": {
"Accuracy": "81.66%",
"Precision": "80.38%",
"Recall": "78.06%",
"F1": "78.83%",
"AUC": "95.51%"
},
"Baseline": {
"Accuracy": "37.57%",
"Precision": "9.39%",
"Recall": "25.00%",
"F1": "13.65%"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"Test": {
"Accuracy": "82.06%",
"Precision": "78.87%",
"Recall": "78.81%",
"F1": "78.82%",
"AUC": "95.24%"
},
"Baseline": {
"Accuracy": "37.26%",
"Precision": "9.32%",
"Recall": "25.00%",
"F1": "13.57%"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"Test": {
"Accuracy": "85.34%",
"Precision": "83.14%",
"Recall": "79.25%",
"F1": "80.52%",
"AUC": "95.90%"
},
"Baseline": {
"Accuracy": "37.87%",
"Precision": "9.47%",
"Recall": "25.00%",
"F1": "13.73%"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"Test": {
"Accuracy": "81.05%",
"Precision": "80.31%",
"Recall": "80.16%",
"F1": "80.10%",
"AUC": "95.60%"
},
"Baseline": {
"Accuracy": "30.93%",
"Precision": "7.73%",
"Recall": "25.00%",
"F1": "11.81%"
}
}
Binary file modified Code/__pycache__/Config.cpython-39.pyc
Binary file not shown.
Binary file modified Code/__pycache__/Model.cpython-39.pyc
Binary file not shown.
Binary file modified Code/__pycache__/Utilities.cpython-39.pyc
Binary file not shown.
Binary file modified Code/__pycache__/eda.cpython-39.pyc
Binary file not shown.
Binary file modified Code/__pycache__/interpreter.cpython-39.pyc
Binary file not shown.
Binary file modified Code/__pycache__/pipeline_manager.cpython-39.pyc
Binary file not shown.
Binary file modified Code/__pycache__/preprocess.cpython-39.pyc
Binary file not shown.
Binary file modified Code/__pycache__/visuals.cpython-39.pyc
Binary file not shown.
16 changes: 8 additions & 8 deletions Code/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
"tissue": "head", # Options: 'head', 'body', 'all'
"model_type": "CNN", # Options: 'CNN', 'MLP', 'XGBoost', 'RandomForest', 'LogisticRegression'
"encoding_variable": "age", # Options: 'sex_age', 'sex', 'age'
"cell_type": "all", # Options: 'all', 'CNS neuron', 'sensory neuron', 'epithelial cell', 'fat cell', 'glial cell', 'muscle cell'
"cell_type": "sensory neuron", # Options: 'all', 'CNS neuron', 'sensory neuron', 'epithelial cell', 'fat cell', 'glial cell', 'muscle cell'
"sex_type": "all", # Options: 'all', 'male', 'female'
},
"Sampling": {
Expand Down Expand Up @@ -62,16 +62,16 @@
},
"GenePreprocessing": {
"GeneFiltering": {
"remove_sex_genes": False, # Options: True, False
"remove_sex_genes": True, # Options: True, False
"remove_autosomal_genes": False, # Options: True, False
"only_keep_lnc_genes": False, # Options: True, False
"remove_lnc_genes": False, # Options: True, False
"remove_unaccounted_genes": False, # Options: True, False
"remove_unaccounted_genes": True, # Options: True, False
"select_batch_genes": False, # Options: True, False #need to create direcotries for this
"highly_variable_genes": False, # Options: True, False #need to create direcotries for this
},
"GeneBalancing": {
"balance_genes": False, # Options: True, False
"balance_genes": True, # Options: True, False
"balance_lnc_genes": False, # Options: True, False
},
"GeneShuffle": {
Expand All @@ -80,16 +80,16 @@
},
},
"FeatureImportanceAndVisualizations": {
"run_visualization": True, # Options: True, False
"run_interpreter": True, # Options: True, False (SHAP)
"run_visualization": False, # Options: True, False
"run_interpreter": False, # Options: True, False (SHAP)
"load_SHAP": False, # Options: True to load SHAP values, False to compute them, only works if run_interpreter is True
"reference_size": 5000, # Reference data size for SHAP
"save_predictions": False, # Options: True, False; (Model predictions csv file)
},
"DataSplit": {
"validation_split": 0.1, # Fraction of data for validation
"test_split": 0.1, # Fraction of data for testing
"random_state": 42, # Random state for reproducibility
"random_state": 11, # Random state for reproducibility
},
"Training": {
"epochs": 15, # Number of epochs for training
Expand All @@ -104,7 +104,7 @@
"dropout_rate": 0.3, # Dropout rate
"learning_rate": 0.0006, # Learning rate
"activation_function": "relu", # Activation function
"reference_size": 1000, # Reference data size for SHAP

},
"CNN_Model": {
"filters": [32, 64, 128], # Number of filters in each convolutional layer
Expand Down
45 changes: 32 additions & 13 deletions Code/interpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,10 +134,10 @@ def __init__(
"""
self.config = config
self.model = model
self.test_data = test_data
self.test_labels = test_labels
self.test_data = test_data
self.test_labels = test_labels
self.label_encoder = label_encoder
self.reference_data = reference_data
self.reference_data = reference_data
self.path_manager = path_manager

self.shap_dir = self.path_manager.get_visualization_directory(
Expand Down Expand Up @@ -213,25 +213,40 @@ def compute_shap_values(self):

# Adjust SHAP values and test data shapes based on the system type
device = self.config.Device.processor.lower()
if device == 'm':
if device == "m":
# Adjust SHAP values for macOS
if isinstance(shap_values, list):
squeezed_shap_values = [np.squeeze(val, axis=1) if val.ndim >= 3 else val for val in shap_values]
squeezed_shap_values = [
np.squeeze(val, axis=1) if val.ndim >= 3 else val
for val in shap_values
]
else:
squeezed_shap_values = (np.squeeze(shap_values, axis=1) if shap_values.ndim >= 3 else shap_values)
squeezed_shap_values = (
np.squeeze(shap_values, axis=1)
if shap_values.ndim >= 3
else shap_values
)

else:
# Adjust SHAP values for Windows
if isinstance(shap_values, list):
squeezed_shap_values = [np.squeeze(val, axis=1) if val.ndim > 3 else val for val in shap_values]
squeezed_shap_values = [
np.squeeze(val, axis=1) if val.ndim > 3 else val
for val in shap_values
]
else:
squeezed_shap_values = (np.squeeze(shap_values, axis=1) if shap_values.ndim > 3 else shap_values)
squeezed_shap_values = (
np.squeeze(shap_values, axis=1)
if shap_values.ndim > 3
else shap_values
)

# Convert the SHAP values to a list of arrays for compatibility with the rest of the code
squeezed_shap_values = [
squeezed_shap_values[:, :, i] for i in range(squeezed_shap_values.shape[2])
squeezed_shap_values[:, :, i]
for i in range(squeezed_shap_values.shape[2])
]

return squeezed_shap_values, squeezed_test_data

def save_shap_values(self, shap_values):
Expand All @@ -250,7 +265,9 @@ def save_shap_values(self, shap_values):
),
"model_weights_hash": model_weights_hash,
"test_data_hash": self.compute_sha256_hash(self.test_data.tobytes()),
"reference_data_hash": self.compute_sha256_hash(self.reference_data.tobytes()),
"reference_data_hash": self.compute_sha256_hash(
self.reference_data.tobytes()
),
}

# Save SHAP values, metadata, and the data
Expand Down Expand Up @@ -590,7 +607,9 @@ def save_predictions_to_csv(self, file_name_template="{}_{}_predictions.csv"):
)

# Determine the relevant train and test attributes based on the method
method = self.config.DataParameters.TrainTestSplit.method # This could be 'sex', 'tissue', etc.
method = (
self.config.DataParameters.TrainTestSplit.method
) # This could be 'sex', 'tissue', etc.
train_attribute = self.config.DataParameters.TrainTestSplit.train.get(
method, "unknown"
)
Expand Down Expand Up @@ -621,4 +640,4 @@ def compute_metrics(self):
"""
self._evaluate_model_performance()
self._calculate_and_save_metrics()
self.save_predictions_to_csv()
self.save_predictions_to_csv()
Loading

0 comments on commit 3dc3687

Please sign in to comment.