FNLCR-DMAP · ying39purdue · Apr 15, 2025 · Apr 2, 2025 · Apr 2, 2025 · Apr 2, 2025
diff --git a/src/spac/visualization.py b/src/spac/visualization.py
@@ -400,7 +400,7 @@ def tsne_plot(adata, color_column=None, ax=None, **kwargs):
 
 def histogram(adata, feature=None, annotation=None, layer=None,
               group_by=None, together=False, ax=None,
-              x_log_scale=False, y_log_scale=False, **kwargs):
+              x_log_scale=False, y_log_scale=False, facet=False, **kwargs):
     """
     Plot the histogram of cells based on a specific feature from adata.X
     or annotation from adata.obs.
@@ -442,6 +442,9 @@ def histogram(adata, feature=None, annotation=None, layer=None,
     y_log_scale : bool, default False
         If True, the y-axis will be set to log scale.
 
+    facet : bool, default False
+        If True, group by function outputs facet plots
+
     **kwargs
         Additional keyword arguments passed to seaborn histplot function.
         Key arguments include:
@@ -562,7 +565,8 @@ def cal_bin_num(
     ):
         bins = max(int(2*(num_rows ** (1/3))), 1)
         print(f'Automatically calculated number of bins is: {bins}')
-        return(bins)
+
+        return (bins)
 
     num_rows = plot_data.shape[0]
 
@@ -624,6 +628,7 @@ def calculate_histogram(data, bins, bin_edges=None):
     if group_by:
         groups = df[group_by].dropna().unique().tolist()
         n_groups = len(groups)
+
         if n_groups == 0:
             raise ValueError("There must be at least one group to create a"
                              " histogram.")
@@ -660,62 +665,86 @@ def calculate_histogram(data, bins, bin_edges=None):
             if feature:
                 ax.set_title(f'Layer: {layer}')
             axs.append(ax)
+
         else:
-            fig, ax_array = plt.subplots(
-                n_groups, 1, figsize=(5, 5 * n_groups)
-            )
+            if not facet:
+                fig, ax_array = plt.subplots(
+                    n_groups, 1, figsize=(5, 5 * n_groups)
+                )
+
+                # Convert a single Axes object to a list
+                # Ensure ax_array is always iterable
+                if n_groups == 1:
+                    ax_array = [ax_array]
 
-            # Convert a single Axes object to a list
-            # Ensure ax_array is always iterable
-            if n_groups == 1:
-                ax_array = [ax_array]
-            else:
-                ax_array = ax_array.flatten()
-
-            for i, ax_i in enumerate(ax_array):
-                group_data = plot_data[plot_data[group_by] ==
-                             groups[i]][data_column]
-                hist_data = calculate_histogram(group_data, kwargs['bins'])
-
-                sns.histplot(data=hist_data, x="bin_center", ax=ax_i,
-                    weights='count', **kwargs)
-                # If plotting feature specify which layer
-                if feature:
-                    ax_i.set_title(f'{groups[i]} with Layer: {layer}')
                 else:
-                    ax_i.set_title(f'{groups[i]}')
+                    ax_array = ax_array.flatten()
+
+                for i, ax_i in enumerate(ax_array):
+                    group_data = plot_data[plot_data[group_by] ==
+                                           groups[i]][data_column]
+                    hist_data = calculate_histogram(group_data, kwargs['bins'])
+
+                    sns.histplot(data=hist_data, x="bin_center", ax=ax_i,
+                                 weights='count', **kwargs)
+                    # If plotting feature specify which layer
+                    if feature:
+                        ax_i.set_title(f'{groups[i]} with Layer: {layer}')
+                    else:
+                        ax_array = ax_array.flatten()
+
+                    # Set axis scales if y_log_scale is True
+                    if y_log_scale:
+                        ax_i.set_yscale('log')
+
+                    # Adjust x-axis label if x_log_scale is True
+                    if x_log_scale:
+                        xlabel = f'log({data_column})'
+                    else:
+                        xlabel = data_column
+                    ax_i.set_xlabel(xlabel)
+
+                    # Adjust y-axis label based on 'stat' parameter
+                    stat = kwargs.get('stat', 'count')
+                    ylabel_map = {
+                        'count': 'Count',
+                        'frequency': 'Frequency',
+                        'density': 'Density',
+                        'probability': 'Probability'
+                    }
+                    ylabel = ylabel_map.get(stat, 'Count')
+                    if y_log_scale:
+                        ylabel = f'log({ylabel})'
+                    ax_i.set_ylabel(ylabel)
+                    axs.append(ax_i)
+            else:
+                hist = sns.FacetGrid(plot_data, col=group_by)
+                # Map the histogram function to the grid
+                hist.map(sns.histplot, data_column, **kwargs)
 
-                # Set axis scales if y_log_scale is True
-                if y_log_scale:
-                    ax_i.set_yscale('log')
+                # Set rotation of label
+                hist.set_xticklabels(rotation=20, ha='right')
+
+                # Titles for each facet
+                hist.set_titles("{col_name}")
+
+                # Ajust top margin
+                hist.figure.subplots_adjust(left=.1,
+                                            top=0.85,
+                                            bottom=0.15,
+                                            hspace=0.3)
+
+                fig = hist.figure
+                axs.extend(hist.axes.flat)
+                hist_data = plot_data
 
-                # Adjust x-axis label if x_log_scale is True
-                if x_log_scale:
-                    xlabel = f'log({data_column})'
-                else:
-                    xlabel = data_column
-                ax_i.set_xlabel(xlabel)
-
-                # Adjust y-axis label based on 'stat' parameter
-                stat = kwargs.get('stat', 'count')
-                ylabel_map = {
-                    'count': 'Count',
-                    'frequency': 'Frequency',
-                    'density': 'Density',
-                    'probability': 'Probability'
-                }
-                ylabel = ylabel_map.get(stat, 'Count')
-                if y_log_scale:
-                    ylabel = f'log({ylabel})'
-                ax_i.set_ylabel(ylabel)
-
-                axs.append(ax_i)
     else:
         # Precompute histogram data for single plot
         hist_data = calculate_histogram(plot_data[data_column], kwargs['bins'])
         if pd.api.types.is_numeric_dtype(plot_data[data_column]):
             ax.set_xlim(hist_data['bin_left'].min(),
-            hist_data['bin_right'].max())
+                        hist_data['bin_right'].max())
+
 
         sns.histplot(
             data=hist_data,
@@ -730,35 +759,38 @@ def calculate_histogram(data, bins, bin_edges=None):
             ax.set_title(f'Layer: {layer}')
         axs.append(ax)
 
-    # Set axis scales if y_log_scale is True
-    if y_log_scale:
-        ax.set_yscale('log')
+    axes = axs if isinstance(axs, (list, np.ndarray)) else [axs]
+    for ax in axes:
+        # Set axis scales if y_log_scale is True
+        if y_log_scale:
+            ax.set_yscale('log')
 
-    # Adjust x-axis label if x_log_scale is True
-    if x_log_scale:
-        xlabel = f'log({data_column})'
-    else:
-        xlabel = data_column
-    ax.set_xlabel(xlabel)
-
-    # Adjust y-axis label based on 'stat' parameter
-    stat = kwargs.get('stat', 'count')
-    ylabel_map = {
-        'count': 'Count',
-        'frequency': 'Frequency',
-        'density': 'Density',
-        'probability': 'Probability'
-    }
-    ylabel = ylabel_map.get(stat, 'Count')
-    if y_log_scale:
-        ylabel = f'log({ylabel})'
-    ax.set_ylabel(ylabel)
+        # Adjust x-axis label if x_log_scale is True
+        if x_log_scale:
+            xlabel = f'log({data_column})'
+        else:
+            xlabel = data_column
+        ax.set_xlabel(xlabel)
+
+        # Adjust y-axis label based on 'stat' parameter
+        stat = kwargs.get('stat', 'count')
+        ylabel_map = {
+            'count': 'Count',
+            'frequency': 'Frequency',
+            'density': 'Density',
+            'probability': 'Probability'
+        }
+        ylabel = ylabel_map.get(stat, 'Count')
+        if y_log_scale:
+            ylabel = f'log({ylabel})'
+        ax.set_ylabel(ylabel)
 
     if len(axs) == 1:
         return {"fig": fig, "axs": axs[0], "df": hist_data}
     else:
         return {"fig": fig, "axs": axs, "df": hist_data}
 
+
 def heatmap(adata, column, layer=None, **kwargs):
     """
     Plot the heatmap of the mean feature of cells that belong to a `column`.

diff --git a/tests/test_visualization/test_histogram.py b/tests/test_visualization/test_histogram.py
@@ -222,7 +222,7 @@ def test_y_log_scale_axis(self):
 
     def test_y_log_scale_label(self):
         """Test that y-axis label is updated when y_log_scale is True."""
-        fig, ax, dfd = histogram(
+        fig, ax, df = histogram(
             self.adata, 
             feature='marker1', 
             y_log_scale=True
@@ -413,6 +413,44 @@ def test_default_bins_calculation(self):
         expected_bins = max(int(2 * (self.adata.shape[0] ** (1 / 3))), 1)
         self.assertEqual(n_bins, expected_bins)
 
+    def test_facet_plot(self):
+        """Test that facet plot works."""
+        fig, ax, df = histogram(
+            self.adata,
+            feature='marker1',
+            group_by='annotation2',
+            facet=True,
+        ).values()
+
+        # Check if axs is a collection (list/array of Axes)
+        self.assertIsInstance(ax, (list, np.ndarray),
+                              "Output is not a multi-axis grid")
+
+        # Check number of facets equals number of unique groups
+        unique_groups = self.adata.obs['annotation2'].dropna().unique()
+        self.assertEqual(len(ax), len(unique_groups),
+                         f"Expected {len(unique_groups)}"
+                         f" facet plots, got {len(ax)}.")
+
+        # Validate each axis: title, xlabel, and ylabel
+        for i, axis in enumerate(ax):
+            # Check that title is set and matches the group
+            title = axis.get_title()
+            self.assertTrue(title, f"Facet {i} is missing a title.")
+            self.assertTrue(any(str(group) in title
+                            for group in unique_groups),
+                            f"Title '{title}' does not contain"
+                            f"any expected group names.")
+
+            # Check X and Y labels
+            self.assertIn('marker1', axis.get_xlabel(),
+                          f"Facet {i} X-axis label"
+                          f" '{axis.get_xlabel()}' is incorrect.")
+            self.assertIn(axis.get_ylabel(),
+                          ['Count', 'Frequency', 'Density', 'Probability'],
+                          f"Facet {i} Y-axis label"
+                          f" '{axis.get_ylabel()}' is not a valid stat.")
+
 
 if __name__ == '__main__':
     unittest.main()