From 6ce438cbd31e17d32e2f054d38a648aa6517f621 Mon Sep 17 00:00:00 2001 From: ying39purdue Date: Tue, 15 Apr 2025 17:58:55 -0400 Subject: [PATCH 1/9] histogram rebase to dev --- src/spac/visualization.py | 185 ++++++++++++--------- tests/test_visualization/test_histogram.py | 40 ++++- 2 files changed, 145 insertions(+), 80 deletions(-) diff --git a/src/spac/visualization.py b/src/spac/visualization.py index 0ab0ee11..aa4098ac 100644 --- a/src/spac/visualization.py +++ b/src/spac/visualization.py @@ -398,7 +398,7 @@ def tsne_plot(adata, color_column=None, ax=None, **kwargs): def histogram(adata, feature=None, annotation=None, layer=None, group_by=None, together=False, ax=None, - x_log_scale=False, y_log_scale=False, **kwargs): + x_log_scale=False, y_log_scale=False, facet=False, **kwargs): """ Plot the histogram of cells based on a specific feature from adata.X or annotation from adata.obs. @@ -440,6 +440,9 @@ def histogram(adata, feature=None, annotation=None, layer=None, y_log_scale : bool, default False If True, the y-axis will be set to log scale. + facet : bool, default False + If True, group by function outputs facet plots + **kwargs Additional keyword arguments passed to seaborn histplot function. Key arguments include: @@ -612,7 +615,7 @@ def calculate_histogram(data, bins, bin_edges=None): else: counts = data.value_counts().sort_index() return pd.DataFrame({ - 'bin_center': counts.index, + 'bin_center': counts.index, 'bin_left': counts.index, 'bin_right': counts.index, 'count': counts.values @@ -641,7 +644,7 @@ def calculate_histogram(data, bins, bin_edges=None): group_data = plot_data[ plot_data[group_by] == group ][data_column] - group_hist = calculate_histogram(group_data, kwargs['bins'], + group_hist = calculate_histogram(group_data, kwargs['bins'], bin_edges=global_bin_edges) group_hist[group_by] = group hist_data.append(group_hist) @@ -651,112 +654,136 @@ def calculate_histogram(data, bins, bin_edges=None): kwargs.setdefault("multiple", "stack") kwargs.setdefault("element", "bars") - - sns.histplot(data=hist_data, x='bin_center', weights='count', + sns.histplot(data=hist_data, x='bin_center', weights='count', hue=group_by, ax=ax, **kwargs) # If plotting feature specify which layer if feature: ax.set_title(f'Layer: {layer}') axs.append(ax) else: - fig, ax_array = plt.subplots( - n_groups, 1, figsize=(5, 5 * n_groups) - ) + if not facet: + fig, ax_array = plt.subplots( + n_groups, 1, figsize=(5, 5 * n_groups) + ) - # Convert a single Axes object to a list - # Ensure ax_array is always iterable - if n_groups == 1: - ax_array = [ax_array] - else: - ax_array = ax_array.flatten() - - for i, ax_i in enumerate(ax_array): - group_data = plot_data[plot_data[group_by] == - groups[i]][data_column] - hist_data = calculate_histogram(group_data, kwargs['bins']) - - sns.histplot(data=hist_data, x="bin_center", ax=ax_i, - weights='count', **kwargs) - # If plotting feature specify which layer - if feature: - ax_i.set_title(f'{groups[i]} with Layer: {layer}') + # Convert a single Axes object to a list + # Ensure ax_array is always iterable + if n_groups == 1: + ax_array = [ax_array] else: - ax_i.set_title(f'{groups[i]}') + ax_array = ax_array.flatten() + + for i, ax_i in enumerate(ax_array): + group_data = plot_data[plot_data[group_by] == + groups[i]][data_column] + hist_data = calculate_histogram(group_data, kwargs['bins']) + + sns.histplot(data=hist_data, x="bin_center", ax=ax_i, + weights='count', **kwargs) + # If plotting feature specify which layer + if feature: + ax_i.set_title(f'{groups[i]} with Layer: {layer}') + else: + ax_array = ax_array.flatten() + + # Set axis scales if y_log_scale is True + if y_log_scale: + ax_i.set_yscale('log') + + # Adjust x-axis label if x_log_scale is True + if x_log_scale: + xlabel = f'log({data_column})' + else: + xlabel = data_column + ax_i.set_xlabel(xlabel) + + # Adjust y-axis label based on 'stat' parameter + stat = kwargs.get('stat', 'count') + ylabel_map = { + 'count': 'Count', + 'frequency': 'Frequency', + 'density': 'Density', + 'probability': 'Probability' + } + ylabel = ylabel_map.get(stat, 'Count') + if y_log_scale: + ylabel = f'log({ylabel})' + ax_i.set_ylabel(ylabel) + + axs.append(ax_i) + else: + hist = sns.FacetGrid(plot_data, col=group_by) + # Map the histogram function to the grid + hist.map(sns.histplot, data_column, **kwargs) - # Set axis scales if y_log_scale is True - if y_log_scale: - ax_i.set_yscale('log') + # Set rotation of label + hist.set_xticklabels(rotation=20, ha='right') + + # Titles for each facet + hist.set_titles("{col_name}") + + # Ajust top margin + hist.figure.subplots_adjust(left=.1, + top=0.85, + bottom=0.15, + hspace=0.3) + + fig = hist.figure + axs.extend(hist.axes.flat) - # Adjust x-axis label if x_log_scale is True - if x_log_scale: - xlabel = f'log({data_column})' - else: - xlabel = data_column - ax_i.set_xlabel(xlabel) - - # Adjust y-axis label based on 'stat' parameter - stat = kwargs.get('stat', 'count') - ylabel_map = { - 'count': 'Count', - 'frequency': 'Frequency', - 'density': 'Density', - 'probability': 'Probability' - } - ylabel = ylabel_map.get(stat, 'Count') - if y_log_scale: - ylabel = f'log({ylabel})' - ax_i.set_ylabel(ylabel) - - axs.append(ax_i) else: # Precompute histogram data for single plot hist_data = calculate_histogram(plot_data[data_column], kwargs['bins']) if pd.api.types.is_numeric_dtype(plot_data[data_column]): - ax.set_xlim(hist_data['bin_left'].min(), - hist_data['bin_right'].max()) - + ax.set_xlim(hist_data['bin_left'].min(), + hist_data['bin_right'].max()) + sns.histplot( - data=hist_data, + data=hist_data, x='bin_center', - weights="count", - ax=ax, + weights="count", + ax=ax, **kwargs ) - + # If plotting feature specify which layer if feature: ax.set_title(f'Layer: {layer}') axs.append(ax) - # Set axis scales if y_log_scale is True - if y_log_scale: - ax.set_yscale('log') + axes = axs if isinstance(axs, (list, np.ndarray)) else [axs] + for ax in axes: + # Set axis scales if y_log_scale is True + if y_log_scale: + ax.set_yscale('log') - # Adjust x-axis label if x_log_scale is True - if x_log_scale: - xlabel = f'log({data_column})' - else: - xlabel = data_column - ax.set_xlabel(xlabel) - - # Adjust y-axis label based on 'stat' parameter - stat = kwargs.get('stat', 'count') - ylabel_map = { - 'count': 'Count', - 'frequency': 'Frequency', - 'density': 'Density', - 'probability': 'Probability' - } - ylabel = ylabel_map.get(stat, 'Count') - if y_log_scale: - ylabel = f'log({ylabel})' - ax.set_ylabel(ylabel) + # Adjust x-axis label if x_log_scale is True + if x_log_scale: + xlabel = f'log({data_column})' + else: + xlabel = data_column + ax.set_xlabel(xlabel) + + # Adjust y-axis label based on 'stat' parameter + stat = kwargs.get('stat', 'count') + ylabel_map = { + 'count': 'Count', + 'frequency': 'Frequency', + 'density': 'Density', + 'probability': 'Probability' + } + ylabel = ylabel_map.get(stat, 'Count') + if y_log_scale: + ylabel = f'log({ylabel})' + ax.set_ylabel(ylabel) + ax.tick_params(axis='x', rotation=90, labelsize=10) if len(axs) == 1: return {"fig": fig, "axs": axs[0], "df": plot_data} else: return {"fig": fig, "axs": axs, "df": plot_data} + def heatmap(adata, column, layer=None, **kwargs): """ Plot the heatmap of the mean feature of cells that belong to a `column`. diff --git a/tests/test_visualization/test_histogram.py b/tests/test_visualization/test_histogram.py index f8ba95ea..999b66e5 100644 --- a/tests/test_visualization/test_histogram.py +++ b/tests/test_visualization/test_histogram.py @@ -222,7 +222,7 @@ def test_y_log_scale_axis(self): def test_y_log_scale_label(self): """Test that y-axis label is updated when y_log_scale is True.""" - fig, ax, dfd = histogram( + fig, ax, df = histogram( self.adata, feature='marker1', y_log_scale=True @@ -413,6 +413,44 @@ def test_default_bins_calculation(self): expected_bins = max(int(2 * (self.adata.shape[0] ** (1 / 3))), 1) self.assertEqual(n_bins, expected_bins) + def test_facet_plot(self): + """Test that facet plot works.""" + fig, ax, df = histogram( + self.adata, + feature='marker1', + group_by='annotation2', + facet=True, + ).values() + + # Check if axs is a collection (list/array of Axes) + self.assertIsInstance(ax, (list, np.ndarray), + "Output is not a multi-axis grid") + + # Check number of facets equals number of unique groups + unique_groups = self.adata.obs['annotation2'].dropna().unique() + self.assertEqual(len(ax), len(unique_groups), + f"Expected {len(unique_groups)}" + f" facet plots, got {len(ax)}.") + + # Validate each axis: title, xlabel, and ylabel + for i, axis in enumerate(ax): + # Check that title is set and matches the group + title = axis.get_title() + self.assertTrue(title, f"Facet {i} is missing a title.") + self.assertTrue(any(str(group) in title + for group in unique_groups), + f"Title '{title}' does not contain" + f"any expected group names.") + + # Check X and Y labels + self.assertIn('marker1', axis.get_xlabel(), + f"Facet {i} X-axis label" + f" '{axis.get_xlabel()}' is incorrect.") + self.assertIn(axis.get_ylabel(), + ['Count', 'Frequency', 'Density', 'Probability'], + f"Facet {i} Y-axis label" + f" '{axis.get_ylabel()}' is not a valid stat.") + if __name__ == '__main__': unittest.main() From 88dd59c04d8006cd1f11ed565c34e38d1aab74d7 Mon Sep 17 00:00:00 2001 From: ying39purdue Date: Tue, 1 Apr 2025 22:40:08 -0400 Subject: [PATCH 2/9] add facet plots on histogram group by --- src/spac/visualization.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/spac/visualization.py b/src/spac/visualization.py index aa4098ac..cef3a54e 100644 --- a/src/spac/visualization.py +++ b/src/spac/visualization.py @@ -563,7 +563,8 @@ def cal_bin_num( ): bins = max(int(2*(num_rows ** (1/3))), 1) print(f'Automatically calculated number of bins is: {bins}') - return(bins) + + return (bins) num_rows = plot_data.shape[0] @@ -625,6 +626,7 @@ def calculate_histogram(data, bins, bin_edges=None): if group_by: groups = df[group_by].dropna().unique().tolist() n_groups = len(groups) + if n_groups == 0: raise ValueError("There must be at least one group to create a" " histogram.") @@ -660,6 +662,7 @@ def calculate_histogram(data, bins, bin_edges=None): if feature: ax.set_title(f'Layer: {layer}') axs.append(ax) + else: if not facet: fig, ax_array = plt.subplots( @@ -709,7 +712,6 @@ def calculate_histogram(data, bins, bin_edges=None): if y_log_scale: ylabel = f'log({ylabel})' ax_i.set_ylabel(ylabel) - axs.append(ax_i) else: hist = sns.FacetGrid(plot_data, col=group_by) From 193a666f81dc1931377487742b2edb721049aeea Mon Sep 17 00:00:00 2001 From: ying39purdue Date: Tue, 1 Apr 2025 22:50:35 -0400 Subject: [PATCH 3/9] add unittest for facet output --- tests/test_visualization/test_histogram.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/test_visualization/test_histogram.py b/tests/test_visualization/test_histogram.py index 999b66e5..7256078d 100644 --- a/tests/test_visualization/test_histogram.py +++ b/tests/test_visualization/test_histogram.py @@ -412,6 +412,18 @@ def test_default_bins_calculation(self): # Using 2 * (n ** 1/3) heuristic for default bins expected_bins = max(int(2 * (self.adata.shape[0] ** (1 / 3))), 1) self.assertEqual(n_bins, expected_bins) + + def test_facet_plot(self): + """Test that facet plot works.""" + fig, ax = histogram( + self.adata, + feature='marker1', + group_by='annotation2', + facet=True, + ) + + # Check if axs is a collection (list/array of Axes) + self.assertIsInstance(ax, (list, np.ndarray), "Output is not a multi-axis grid") def test_facet_plot(self): """Test that facet plot works.""" From ae848f3570cead0fb3a080a51c5e07e08e2d950a Mon Sep 17 00:00:00 2001 From: ying39purdue Date: Wed, 2 Apr 2025 12:46:07 -0400 Subject: [PATCH 4/9] formatting of facet plot, and unittest --- src/spac/visualization.py | 5 +++++ tests/test_visualization/test_histogram.py | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/spac/visualization.py b/src/spac/visualization.py index cef3a54e..fffedf5a 100644 --- a/src/spac/visualization.py +++ b/src/spac/visualization.py @@ -681,8 +681,13 @@ def calculate_histogram(data, bins, bin_edges=None): groups[i]][data_column] hist_data = calculate_histogram(group_data, kwargs['bins']) +<<<<<<< HEAD sns.histplot(data=hist_data, x="bin_center", ax=ax_i, weights='count', **kwargs) +======= + sns.histplot(data=group_data, x=data_column, + ax=ax_i, **kwargs) +>>>>>>> 11127cf (formatting of facet plot, and unittest) # If plotting feature specify which layer if feature: ax_i.set_title(f'{groups[i]} with Layer: {layer}') diff --git a/tests/test_visualization/test_histogram.py b/tests/test_visualization/test_histogram.py index 7256078d..bf35dc27 100644 --- a/tests/test_visualization/test_histogram.py +++ b/tests/test_visualization/test_histogram.py @@ -423,7 +423,8 @@ def test_facet_plot(self): ) # Check if axs is a collection (list/array of Axes) - self.assertIsInstance(ax, (list, np.ndarray), "Output is not a multi-axis grid") + self.assertIsInstance(ax, (list, np.ndarray), + "Output is not a multi-axis grid") def test_facet_plot(self): """Test that facet plot works.""" From 40572f04d751a0546f2e5e94257e3d32060c6728 Mon Sep 17 00:00:00 2001 From: ying39purdue Date: Wed, 2 Apr 2025 12:47:18 -0400 Subject: [PATCH 5/9] formatting of facet plot, and unittest --- src/spac/visualization.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/spac/visualization.py b/src/spac/visualization.py index fffedf5a..cef3a54e 100644 --- a/src/spac/visualization.py +++ b/src/spac/visualization.py @@ -681,13 +681,8 @@ def calculate_histogram(data, bins, bin_edges=None): groups[i]][data_column] hist_data = calculate_histogram(group_data, kwargs['bins']) -<<<<<<< HEAD sns.histplot(data=hist_data, x="bin_center", ax=ax_i, weights='count', **kwargs) -======= - sns.histplot(data=group_data, x=data_column, - ax=ax_i, **kwargs) ->>>>>>> 11127cf (formatting of facet plot, and unittest) # If plotting feature specify which layer if feature: ax_i.set_title(f'{groups[i]} with Layer: {layer}') From 78c15f64a6bac0c04ad2bb44febad6e0f4bac3bd Mon Sep 17 00:00:00 2001 From: ying39purdue Date: Tue, 8 Apr 2025 15:52:20 -0400 Subject: [PATCH 6/9] remove axis rotation and formatting --- src/spac/visualization.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/spac/visualization.py b/src/spac/visualization.py index cef3a54e..20aefb6c 100644 --- a/src/spac/visualization.py +++ b/src/spac/visualization.py @@ -778,7 +778,6 @@ def calculate_histogram(data, bins, bin_edges=None): if y_log_scale: ylabel = f'log({ylabel})' ax.set_ylabel(ylabel) - ax.tick_params(axis='x', rotation=90, labelsize=10) if len(axs) == 1: return {"fig": fig, "axs": axs[0], "df": plot_data} From 157902e0e1ad8a28826225d19382ad7393f2e512 Mon Sep 17 00:00:00 2001 From: ying39purdue Date: Thu, 10 Apr 2025 23:44:22 -0400 Subject: [PATCH 7/9] Unittest addition of element numbers, title, and axis labels check --- tests/test_visualization/test_histogram.py | 31 +++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/tests/test_visualization/test_histogram.py b/tests/test_visualization/test_histogram.py index bf35dc27..702d429a 100644 --- a/tests/test_visualization/test_histogram.py +++ b/tests/test_visualization/test_histogram.py @@ -412,7 +412,7 @@ def test_default_bins_calculation(self): # Using 2 * (n ** 1/3) heuristic for default bins expected_bins = max(int(2 * (self.adata.shape[0] ** (1 / 3))), 1) self.assertEqual(n_bins, expected_bins) - + def test_facet_plot(self): """Test that facet plot works.""" fig, ax = histogram( @@ -421,10 +421,35 @@ def test_facet_plot(self): group_by='annotation2', facet=True, ) - + # Check if axs is a collection (list/array of Axes) self.assertIsInstance(ax, (list, np.ndarray), - "Output is not a multi-axis grid") + "Output is not a multi-axis grid") + + # Check number of facets equals number of unique groups + unique_groups = self.adata.obs['annotation2'].dropna().unique() + self.assertEqual(len(ax), len(unique_groups), + f"Expected {len(unique_groups)}" + f" facet plots, got {len(ax)}.") + + # Validate each axis: title, xlabel, and ylabel + for i, axis in enumerate(ax): + # Check that title is set and matches the group + title = axis.get_title() + self.assertTrue(title, f"Facet {i} is missing a title.") + self.assertTrue(any(str(group) in title + for group in unique_groups), + f"Title '{title}' does not contain" + f"any expected group names.") + + # Check X and Y labels + self.assertIn('marker1', axis.get_xlabel(), + f"Facet {i} X-axis label" + f" '{axis.get_xlabel()}' is incorrect.") + self.assertIn(axis.get_ylabel(), + ['Count', 'Frequency', 'Density', 'Probability'], + f"Facet {i} Y-axis label" + f" '{axis.get_ylabel()}' is not a valid stat.") def test_facet_plot(self): """Test that facet plot works.""" From a143bd2eb74c4b951ee8ec301087402c8d072e42 Mon Sep 17 00:00:00 2001 From: ying39purdue Date: Tue, 15 Apr 2025 18:10:40 -0400 Subject: [PATCH 8/9] unittest rebase with dev --- tests/test_visualization/test_histogram.py | 38 ---------------------- 1 file changed, 38 deletions(-) diff --git a/tests/test_visualization/test_histogram.py b/tests/test_visualization/test_histogram.py index 702d429a..999b66e5 100644 --- a/tests/test_visualization/test_histogram.py +++ b/tests/test_visualization/test_histogram.py @@ -413,44 +413,6 @@ def test_default_bins_calculation(self): expected_bins = max(int(2 * (self.adata.shape[0] ** (1 / 3))), 1) self.assertEqual(n_bins, expected_bins) - def test_facet_plot(self): - """Test that facet plot works.""" - fig, ax = histogram( - self.adata, - feature='marker1', - group_by='annotation2', - facet=True, - ) - - # Check if axs is a collection (list/array of Axes) - self.assertIsInstance(ax, (list, np.ndarray), - "Output is not a multi-axis grid") - - # Check number of facets equals number of unique groups - unique_groups = self.adata.obs['annotation2'].dropna().unique() - self.assertEqual(len(ax), len(unique_groups), - f"Expected {len(unique_groups)}" - f" facet plots, got {len(ax)}.") - - # Validate each axis: title, xlabel, and ylabel - for i, axis in enumerate(ax): - # Check that title is set and matches the group - title = axis.get_title() - self.assertTrue(title, f"Facet {i} is missing a title.") - self.assertTrue(any(str(group) in title - for group in unique_groups), - f"Title '{title}' does not contain" - f"any expected group names.") - - # Check X and Y labels - self.assertIn('marker1', axis.get_xlabel(), - f"Facet {i} X-axis label" - f" '{axis.get_xlabel()}' is incorrect.") - self.assertIn(axis.get_ylabel(), - ['Count', 'Frequency', 'Density', 'Probability'], - f"Facet {i} Y-axis label" - f" '{axis.get_ylabel()}' is not a valid stat.") - def test_facet_plot(self): """Test that facet plot works.""" fig, ax, df = histogram( From f3ad9512424d85d211610dcfd7837be518a17579 Mon Sep 17 00:00:00 2001 From: ying39purdue Date: Wed, 16 Apr 2025 09:39:55 -0400 Subject: [PATCH 9/9] correct facet function return --- src/spac/visualization.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/spac/visualization.py b/src/spac/visualization.py index 20aefb6c..44bdc310 100644 --- a/src/spac/visualization.py +++ b/src/spac/visualization.py @@ -732,6 +732,7 @@ def calculate_histogram(data, bins, bin_edges=None): fig = hist.figure axs.extend(hist.axes.flat) + hist_data = plot_data else: # Precompute histogram data for single plot