Merge pull request #70 from FNLCR-DMAP/dev

ruiheesi · web-flow · commit eb9af30ea4eb · 2023-07-06T14:37:43.000-04:00
Minor updates from NIDAP application
diff --git a/src/spac/transformations.py b/src/spac/transformations.py
@@ -5,7 +5,7 @@
 import scanpy.external as sce
 
 
-def phenograph_clustering(adata, features, layer, k=30):
+def phenograph_clustering(adata, features, layer=None, k=30):
     """
     Calculate automatic phenotypes using phenograph.
 
@@ -39,8 +39,8 @@ def phenograph_clustering(adata, features, layer, k=30):
             not all(isinstance(feature, str) for feature in features)):
         raise TypeError("`features` must be a list of strings")
 
-    if layer not in adata.layers.keys():
-        raise ValueError(f"`layer` not found in `adata.layers`. "
+    if layer is not None and layer not in adata.layers.keys():
+        raise ValueError(f"`{layer}` not found in `adata.layers`. "
                          f"Available layers are {list(adata.layers.keys())}")
 
     if not isinstance(k, int) or k <= 0:
@@ -50,7 +50,11 @@ def phenograph_clustering(adata, features, layer, k=30):
         raise ValueError("One or more of the `features` are not in "
                          "`adata.var_names`")
 
-    phenograph_df = adata.to_df(layer=layer)[features]
+    if layer is not None:
+        phenograph_df = adata.to_df(layer=layer)[features]
+    else:
+        phenograph_df = adata.to_df()[features]
+
     phenograph_out = sce.tl.phenograph(phenograph_df,
                                        clustering_algo="louvain",
                                        k=k)
@@ -245,8 +249,15 @@ def rename_observations(adata, src_observation, dest_observation, mappings):
     adata.obs[dest_observation] = (
         adata.obs[src_observation]
         .map(mappings)
-        .fillna(adata.obs[src_observation])
         .astype("category")
     )
 
+    # Ensure that all categories are covered
+    if adata.obs[dest_observation].isna().any():
+        raise ValueError(
+            "Not all unique values in the source observation are "
+            "covered by the mappings. "
+            "Please ensure that the mappings cover all unique values."
+        )
+
     return adata
diff --git a/src/spac/visualization.py b/src/spac/visualization.py
@@ -9,7 +9,7 @@
 from matplotlib.colors import ListedColormap, BoundaryNorm
 
 
-def tsne_plot(adata, ax=None, **kwargs):
+def tsne_plot(adata, color_column=None, ax=None, **kwargs):
     """
     Visualize scatter plot in tSNE basis.
 
@@ -18,6 +18,8 @@ def tsne_plot(adata, ax=None, **kwargs):
     adata : anndata.AnnData
         The AnnData object with t-SNE coordinates precomputed by the 'tsne'
         function and stored in 'adata.obsm["X_tsne"]'.
+    color_column : str, optional
+        The name of the column to use for coloring the scatter plot points.
     ax : matplotlib.axes.Axes, optional (default: None)
         A matplotlib axes object to plot on.
         If not provided, a new figure and axes will be created.
@@ -42,6 +44,17 @@ def tsne_plot(adata, ax=None, **kwargs):
     # Create a new figure and axes if not provided
     if ax is None:
         fig, ax = plt.subplots()
+    else:
+        fig = ax.get_figure()
+
+    if color_column and (color_column not in adata.obs.columns and
+                         color_column not in adata.var.columns):
+        err_msg = f"'{color_column}' not found in adata.obs or adata.var."
+        raise KeyError(err_msg)
+
+    # Add color column to the kwargs for the scanpy plot
+    if color_column:
+        kwargs['color'] = color_column
 
     # Plot the t-SNE
     sc.pl.tsne(adata, ax=ax, **kwargs)
@@ -133,10 +146,10 @@ def histogram(adata, feature_name=None, observation_name=None, layer=None,
             fig, axs = plt.subplots(n_groups, 1, figsize=(5, 5*n_groups))
             if n_groups == 1:
                 axs = [axs]
-            for i, ax in enumerate(axs):
+            for i, ax_i in enumerate(axs):
                 sns.histplot(data=df[df[group_by] == groups[i]].dropna(),
-                             x=x, ax=ax, **kwargs)
-                ax.set_title(groups[i])
+                             x=x, ax=ax_i, **kwargs)
+                ax_i.set_title(groups[i])
             return fig, axs
 
     sns.histplot(data=df, x=x, ax=ax, **kwargs)
@@ -556,7 +569,7 @@ def spatial_plot(
         raise ValueError(err_msg_ax)
 
     if feature is not None:
-        
+
         feature_index = feature_names.index(feature)
         feature_obs = feature + "spatial_plot"
         if vmin == -999:
diff --git a/tests/test_transformations/test_phenograph_clustering.py b/tests/test_transformations/test_phenograph_clustering.py
@@ -52,6 +52,15 @@ def test_typical_case(self, mock_phenograph):
         self.assertEqual(self.adata.uns['phenograph_features'],
                          self.features)
 
+    @patch('scanpy.external.tl.phenograph',
+           return_value=(np.random.randint(0, 3, 100), {}))
+    def test_layer_none_case(self, mock_phenograph):
+        # This test checks if the function works correctly when layer is None.
+        phenograph_clustering(self.adata, self.features, None)
+        self.assertIn('phenograph', self.adata.obs)
+        self.assertEqual(self.adata.uns['phenograph_features'],
+                         self.features)
+
     def test_invalid_adata(self):
         # This test checks if the function raises a TypeError when the
         # adata argument is not an AnnData object.
diff --git a/tests/test_transformations/test_rename_observations.py b/tests/test_transformations/test_rename_observations.py
@@ -61,21 +61,6 @@ def test_invalid_mappings(self):
                 {"5": "group_8"}
             )
 
-    def test_partial_mappings(self):
-        """Test rename_observations with partial mappings."""
-        mappings = {"0": "group_8", "1": "group_2"}
-        dest_observation = "renamed_observations"
-        result = rename_observations(
-            self.adata, "phenograph", dest_observation, mappings
-        )
-        expected = pd.Series(
-            ["group_8", "group_2", "group_8", "2", "group_2", "2"],
-            index=self.adata.obs.index,
-            name=dest_observation,
-            dtype="category"
-        )
-        pd.testing.assert_series_equal(result.obs[dest_observation], expected)
-
     def test_rename_observations_basic(self):
         """Test basic functionality of rename_observations."""
         data_matrix = np.random.rand(3, 4)
@@ -157,6 +142,26 @@ def test_multiple_observations_to_one_group(self):
             all(renamed_clusters == ["group_0", "group_0", "group_0"])
         )
 
+    def test_not_all_categories_covered(self):
+        """
+        Test rename_observations with mappings that do not cover
+        all unique values in the source observation.
+        """
+        mappings = {"0": "group_8", "1": "group_2"}
+        with self.assertRaises(ValueError) as cm:
+            rename_observations(
+                self.adata,
+                "phenograph",
+                "incomplete_dest",
+                mappings
+            )
+        self.assertEqual(
+            str(cm.exception),
+            "Not all unique values in the source observation are "
+            "covered by the mappings. "
+            "Please ensure that the mappings cover all unique values."
+        )
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_visualization/test_histogram.py b/tests/test_visualization/test_histogram.py
@@ -32,14 +32,24 @@ def test_histogram_observation_name(self):
         self.assertEqual(sum(p.get_height() for p in ax.patches), total_obs)
 
     def test_histogram_feature_group_by(self):
+        # Call the function with a feature_name and group_by argument,
+        # setting together=False to create separate plots for each group.
         fig, axs = histogram(
             self.adata,
             feature_name='marker1',
             group_by='obs2',
             together=False
         )
+
+        # Check that the function returned a list of Axes objects,
+        # one for each group. In this case,
+        # we expect there to be 2 groups, as obs2 has 2 unique values.
         self.assertEqual(len(axs), 2)
 
+        # Check that each object in axs is indeed an Axes object.
+        self.assertIsInstance(axs[0], mpl.axes.Axes)
+        self.assertIsInstance(axs[1], mpl.axes.Axes)
+
     def test_both_feature_and_observation(self):
         err_msg = ("Cannot pass both feature_name and "
                    "observation_name, choose one")
diff --git a/tests/test_visualization/test_tsne_plot.py b/tests/test_visualization/test_tsne_plot.py
@@ -1,6 +1,7 @@
 import unittest
 import anndata
 import numpy as np
+import matplotlib.pyplot as plt
 from spac.visualization import tsne_plot
 
 
@@ -9,6 +10,8 @@ class TestTsnePlot(unittest.TestCase):
     def setUp(self):
         self.adata = anndata.AnnData(X=np.random.rand(10, 10))
         self.adata.obsm['X_tsne'] = np.random.rand(10, 2)
+        self.adata.obs['color_column'] = np.random.choice(
+            ['A', 'B', 'C'], size=10)
 
     def test_invalid_input_type(self):
         with self.assertRaises(ValueError) as cm:
@@ -24,6 +27,25 @@ def test_no_tsne_data(self):
                          "adata.obsm does not contain 'X_tsne',"
                          " perform t-SNE transformation first.")
 
+    def test_color_column(self):
+        fig, ax = tsne_plot(self.adata, color_column='color_column')
+        self.assertIsNotNone(fig)
+        self.assertIsNotNone(ax)
+
+    def test_ax_provided(self):
+        fig, ax_provided = plt.subplots()
+        fig_returned, ax_returned = tsne_plot(self.adata, ax=ax_provided)
+        self.assertIs(fig, fig_returned)
+        self.assertIs(ax_provided, ax_returned)
+
+    def test_color_column_invalid(self):
+        with self.assertRaises(KeyError) as cm:
+            tsne_plot(self.adata, color_column='invalid_column')
+        self.assertEqual(
+            str(cm.exception),
+            "\"'invalid_column' not found in adata.obs or adata.var.\""
+        )
+
 
 if __name__ == '__main__':
     unittest.main()