Skip to content
172 changes: 102 additions & 70 deletions src/spac/visualization.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,7 +400,7 @@ def tsne_plot(adata, color_column=None, ax=None, **kwargs):

def histogram(adata, feature=None, annotation=None, layer=None,
group_by=None, together=False, ax=None,
x_log_scale=False, y_log_scale=False, **kwargs):
x_log_scale=False, y_log_scale=False, facet=False, **kwargs):
"""
Plot the histogram of cells based on a specific feature from adata.X
or annotation from adata.obs.
Expand Down Expand Up @@ -442,6 +442,9 @@ def histogram(adata, feature=None, annotation=None, layer=None,
y_log_scale : bool, default False
If True, the y-axis will be set to log scale.

facet : bool, default False
If True, group by function outputs facet plots

**kwargs
Additional keyword arguments passed to seaborn histplot function.
Key arguments include:
Expand Down Expand Up @@ -562,7 +565,8 @@ def cal_bin_num(
):
bins = max(int(2*(num_rows ** (1/3))), 1)
print(f'Automatically calculated number of bins is: {bins}')
return(bins)

return (bins)

num_rows = plot_data.shape[0]

Expand Down Expand Up @@ -624,6 +628,7 @@ def calculate_histogram(data, bins, bin_edges=None):
if group_by:
groups = df[group_by].dropna().unique().tolist()
n_groups = len(groups)

if n_groups == 0:
raise ValueError("There must be at least one group to create a"
" histogram.")
Expand Down Expand Up @@ -660,62 +665,86 @@ def calculate_histogram(data, bins, bin_edges=None):
if feature:
ax.set_title(f'Layer: {layer}')
axs.append(ax)

else:
fig, ax_array = plt.subplots(
n_groups, 1, figsize=(5, 5 * n_groups)
)
if not facet:
fig, ax_array = plt.subplots(
n_groups, 1, figsize=(5, 5 * n_groups)
)

# Convert a single Axes object to a list
# Ensure ax_array is always iterable
if n_groups == 1:
ax_array = [ax_array]

# Convert a single Axes object to a list
# Ensure ax_array is always iterable
if n_groups == 1:
ax_array = [ax_array]
else:
ax_array = ax_array.flatten()

for i, ax_i in enumerate(ax_array):
group_data = plot_data[plot_data[group_by] ==
groups[i]][data_column]
hist_data = calculate_histogram(group_data, kwargs['bins'])

sns.histplot(data=hist_data, x="bin_center", ax=ax_i,
weights='count', **kwargs)
# If plotting feature specify which layer
if feature:
ax_i.set_title(f'{groups[i]} with Layer: {layer}')
else:
ax_i.set_title(f'{groups[i]}')
ax_array = ax_array.flatten()

for i, ax_i in enumerate(ax_array):
group_data = plot_data[plot_data[group_by] ==
groups[i]][data_column]
hist_data = calculate_histogram(group_data, kwargs['bins'])

sns.histplot(data=hist_data, x="bin_center", ax=ax_i,
weights='count', **kwargs)
# If plotting feature specify which layer
if feature:
ax_i.set_title(f'{groups[i]} with Layer: {layer}')
else:
ax_array = ax_array.flatten()

# Set axis scales if y_log_scale is True
if y_log_scale:
ax_i.set_yscale('log')

# Adjust x-axis label if x_log_scale is True
if x_log_scale:
xlabel = f'log({data_column})'
else:
xlabel = data_column
ax_i.set_xlabel(xlabel)

# Adjust y-axis label based on 'stat' parameter
stat = kwargs.get('stat', 'count')
ylabel_map = {
'count': 'Count',
'frequency': 'Frequency',
'density': 'Density',
'probability': 'Probability'
}
ylabel = ylabel_map.get(stat, 'Count')
if y_log_scale:
ylabel = f'log({ylabel})'
ax_i.set_ylabel(ylabel)
axs.append(ax_i)
else:
hist = sns.FacetGrid(plot_data, col=group_by)
# Map the histogram function to the grid
hist.map(sns.histplot, data_column, **kwargs)

# Set axis scales if y_log_scale is True
if y_log_scale:
ax_i.set_yscale('log')
# Set rotation of label
hist.set_xticklabels(rotation=20, ha='right')

# Titles for each facet
hist.set_titles("{col_name}")

# Ajust top margin
hist.figure.subplots_adjust(left=.1,
top=0.85,
bottom=0.15,
hspace=0.3)

fig = hist.figure
axs.extend(hist.axes.flat)
hist_data = plot_data

# Adjust x-axis label if x_log_scale is True
if x_log_scale:
xlabel = f'log({data_column})'
else:
xlabel = data_column
ax_i.set_xlabel(xlabel)

# Adjust y-axis label based on 'stat' parameter
stat = kwargs.get('stat', 'count')
ylabel_map = {
'count': 'Count',
'frequency': 'Frequency',
'density': 'Density',
'probability': 'Probability'
}
ylabel = ylabel_map.get(stat, 'Count')
if y_log_scale:
ylabel = f'log({ylabel})'
ax_i.set_ylabel(ylabel)

axs.append(ax_i)
else:
# Precompute histogram data for single plot
hist_data = calculate_histogram(plot_data[data_column], kwargs['bins'])
if pd.api.types.is_numeric_dtype(plot_data[data_column]):
ax.set_xlim(hist_data['bin_left'].min(),
hist_data['bin_right'].max())
hist_data['bin_right'].max())


sns.histplot(
data=hist_data,
Expand All @@ -730,35 +759,38 @@ def calculate_histogram(data, bins, bin_edges=None):
ax.set_title(f'Layer: {layer}')
axs.append(ax)

# Set axis scales if y_log_scale is True
if y_log_scale:
ax.set_yscale('log')
axes = axs if isinstance(axs, (list, np.ndarray)) else [axs]
for ax in axes:
# Set axis scales if y_log_scale is True
if y_log_scale:
ax.set_yscale('log')

# Adjust x-axis label if x_log_scale is True
if x_log_scale:
xlabel = f'log({data_column})'
else:
xlabel = data_column
ax.set_xlabel(xlabel)

# Adjust y-axis label based on 'stat' parameter
stat = kwargs.get('stat', 'count')
ylabel_map = {
'count': 'Count',
'frequency': 'Frequency',
'density': 'Density',
'probability': 'Probability'
}
ylabel = ylabel_map.get(stat, 'Count')
if y_log_scale:
ylabel = f'log({ylabel})'
ax.set_ylabel(ylabel)
# Adjust x-axis label if x_log_scale is True
if x_log_scale:
xlabel = f'log({data_column})'
else:
xlabel = data_column
ax.set_xlabel(xlabel)

# Adjust y-axis label based on 'stat' parameter
stat = kwargs.get('stat', 'count')
ylabel_map = {
'count': 'Count',
'frequency': 'Frequency',
'density': 'Density',
'probability': 'Probability'
}
ylabel = ylabel_map.get(stat, 'Count')
if y_log_scale:
ylabel = f'log({ylabel})'
ax.set_ylabel(ylabel)

if len(axs) == 1:
return {"fig": fig, "axs": axs[0], "df": hist_data}
else:
return {"fig": fig, "axs": axs, "df": hist_data}


def heatmap(adata, column, layer=None, **kwargs):
"""
Plot the heatmap of the mean feature of cells that belong to a `column`.
Expand Down
40 changes: 39 additions & 1 deletion tests/test_visualization/test_histogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ def test_y_log_scale_axis(self):

def test_y_log_scale_label(self):
"""Test that y-axis label is updated when y_log_scale is True."""
fig, ax, dfd = histogram(
fig, ax, df = histogram(
self.adata,
feature='marker1',
y_log_scale=True
Expand Down Expand Up @@ -413,6 +413,44 @@ def test_default_bins_calculation(self):
expected_bins = max(int(2 * (self.adata.shape[0] ** (1 / 3))), 1)
self.assertEqual(n_bins, expected_bins)

def test_facet_plot(self):
"""Test that facet plot works."""
fig, ax, df = histogram(
self.adata,
feature='marker1',
group_by='annotation2',
facet=True,
).values()

# Check if axs is a collection (list/array of Axes)
self.assertIsInstance(ax, (list, np.ndarray),
"Output is not a multi-axis grid")

# Check number of facets equals number of unique groups
unique_groups = self.adata.obs['annotation2'].dropna().unique()
self.assertEqual(len(ax), len(unique_groups),
f"Expected {len(unique_groups)}"
f" facet plots, got {len(ax)}.")

# Validate each axis: title, xlabel, and ylabel
for i, axis in enumerate(ax):
# Check that title is set and matches the group
title = axis.get_title()
self.assertTrue(title, f"Facet {i} is missing a title.")
self.assertTrue(any(str(group) in title
for group in unique_groups),
f"Title '{title}' does not contain"
f"any expected group names.")

# Check X and Y labels
self.assertIn('marker1', axis.get_xlabel(),
f"Facet {i} X-axis label"
f" '{axis.get_xlabel()}' is incorrect.")
self.assertIn(axis.get_ylabel(),
['Count', 'Frequency', 'Density', 'Probability'],
f"Facet {i} Y-axis label"
f" '{axis.get_ylabel()}' is not a valid stat.")


if __name__ == '__main__':
unittest.main()