From 8027c0bc88c903895bde5b111044e37d3641e226 Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Sun, 16 Feb 2025 13:36:41 +0100 Subject: [PATCH 01/16] feature: Initial lazy tiff reader --- src/spatialdata_io/readers/generic.py | 186 ++++++++++++++++++++++++-- 1 file changed, 176 insertions(+), 10 deletions(-) diff --git a/src/spatialdata_io/readers/generic.py b/src/spatialdata_io/readers/generic.py index 24ee7071..30a16dd7 100644 --- a/src/spatialdata_io/readers/generic.py +++ b/src/spatialdata_io/readers/generic.py @@ -1,23 +1,34 @@ from __future__ import annotations import warnings -from collections.abc import Sequence +from collections.abc import Callable, Sequence from pathlib import Path +from typing import Any, Protocol, TypeVar +import dask.array as da import numpy as np -from dask_image.imread import imread +from dask import delayed from geopandas import GeoDataFrame +from numpy.typing import NDArray from spatialdata._docs import docstring_parameter from spatialdata.models import Image2DModel, ShapesModel from spatialdata.models._utils import DEFAULT_COORDINATE_SYSTEM from spatialdata.transformations import Identity +from tifffile import memmap as tiffmmemap from xarray import DataArray -VALID_IMAGE_TYPES = [".tif", ".tiff", ".png", ".jpg", ".jpeg"] +VALID_IMAGE_TYPES = [".tif", ".tiff"] VALID_SHAPE_TYPES = [".geojson"] +DEFAULT_CHUNKSIZE = (1000, 1000) __all__ = ["generic", "geojson", "image", "VALID_IMAGE_TYPES", "VALID_SHAPE_TYPES"] +T = TypeVar("T", bound=np.generic) # Restrict to NumPy scalar types + + +class DaskArray(Protocol[T]): + dtype: np.dtype[T] + @docstring_parameter( valid_image_types=", ".join(VALID_IMAGE_TYPES), @@ -68,11 +79,166 @@ def geojson(input: Path, coordinate_system: str) -> GeoDataFrame: return ShapesModel.parse(input, transformations={coordinate_system: Identity()}) +def _compute_sizes_positions(size: int, chunk: int, min_coord: int) -> tuple[NDArray[np.int_], NDArray[np.int_]]: + """Calculate chunk sizes and positions for a given dimension and chunk size""" + # All chunks have the same size except for the last one + positions = np.arange(min_coord, size, chunk) + lengths = np.full_like(positions, chunk, dtype=int) + + if positions[-1] + chunk > size: + lengths[-1] = size - positions[-1] + + return positions, lengths + + +def _compute_chunks( + dimensions: tuple[int, int], + chunk_size: tuple[int, int], + min_coordinates: tuple[int, int] = (0, 0), +) -> NDArray[np.int_]: + """Create all chunk specs for a given image and chunk size. + + Creates specifications (x, y, width, height) with (x, y) being the upper left corner + of chunks of size chunk_size. Chunks at the edges correspond to the remainder of + chunk size and dimensions + + Parameters + ---------- + dimensions : tuple[int, int] + Size of the image in (width, height). + chunk_size : tuple[int, int] + Size of individual tiles in (width, height). + min_coordinates : tuple[int, int], optional + Minimum coordinates (x, y) in the image, defaults to (0, 0). + + Returns + ------- + np.ndarray + Array of shape (n_tiles_x, n_tiles_y, 4). Each entry defines a tile + as (x, y, width, height). + """ + x_positions, widths = _compute_sizes_positions(dimensions[1], chunk_size[1], min_coord=min_coordinates[1]) + y_positions, heights = _compute_sizes_positions(dimensions[0], chunk_size[0], min_coord=min_coordinates[0]) + + # Generate the tiles + tiles = np.array( + [ + [[x, y, w, h] for y, h in zip(y_positions, heights, strict=True)] + for x, w in zip(x_positions, widths, strict=True) + ], + dtype=int, + ) + return tiles + + +def _read_chunks( + func: Callable[..., NDArray[np.int_]], + slide: Any, + coords: NDArray[np.int_], + n_channel: int, + dtype: type, + **func_kwargs: Any, +) -> list[list[da.array]]: + """Abstract factory method to tile a large microscopy image. + + Parameters + ---------- + func + Function to retrieve a rectangular tile from the slide image. Must take the + arguments: + + - slide Full slide image + - x0: x (col) coordinate of upper left corner of chunk + - y0: y (row) coordinate of upper left corner of chunk + - width: Width of chunk + - height: Height of chunk + + and should return the chunk as numpy array of shape (c, y, x) + slide + Slide image in lazyly loaded format compatible with func + coords + Coordinates of the upper left corner of the image in format (n_row_x, n_row_y, 4) + where the last dimension defines the rectangular tile in format (x, y, width, height). + n_row_x represents the number of chunks in x dimension and n_row_y the number of chunks + in y dimension. + n_channel + Number of channels in array + dtype + Data type of image + func_kwargs + Additional keyword arguments passed to func + + Returns + ------- + list[list[da.array]] + List (length: n_row_x) of lists (length: n_row_y) of chunks. + Represents all chunks of the full image. + """ + func_kwargs = func_kwargs if func_kwargs else {} + + # Collect each delayed chunk as item in list of list + # Inner list becomes dim=-1 (cols/x) + # Outer list becomes dim=-2 (rows/y) + # see dask.array.block + chunks = [ + [ + da.from_delayed( + delayed(func)( + slide, + x0=coords[x, y, 0], + y0=coords[x, y, 1], + width=coords[x, y, 2], + height=coords[x, y, 3], + **func_kwargs, + ), + dtype=dtype, + shape=(n_channel, *coords[x, y, [2, 3]]), + ) + for y in range(coords.shape[0]) + ] + for x in range(coords.shape[1]) + ] + return chunks + + +def _tiff_to_chunks(input: Path) -> list[list[DaskArray[np.int_]]]: + """Chunkwise reader for tiff files. + + Parameters + ---------- + input + Path to image + + Returns + ------- + list[list[DaskArray]] + """ + # Lazy file reader + slide = tiffmmemap(input) + + # Get dimensions in (y, x) + slide_dimensions = slide.shape[:-1] + + # Get number of channels (c) + n_channel = slide.shape[-1] + + # Compute chunk coords + chunk_coords = _compute_chunks(slide_dimensions, chunk_size=DEFAULT_CHUNKSIZE, min_coordinates=(0, 0)) + + # Define reader func + def _reader_func(slide: NDArray[np.int_], x0: int, y0: int, width: int, height: int) -> NDArray[np.int_]: + return np.array(slide[y0 : y0 + height, x0 : x0 + width, :]).T + + return _read_chunks(_reader_func, slide, coords=chunk_coords, n_channel=n_channel, dtype=slide.dtype) + + def image(input: Path, data_axes: Sequence[str], coordinate_system: str) -> DataArray: - """Reads an image file and returns a parsed Image2D spatial element""" - # this function is just a draft, the more general one will be available when - # https://github.com/scverse/spatialdata-io/pull/234 is merged - image = imread(input) - if len(image.shape) == len(data_axes) + 1 and image.shape[0] == 1: - image = np.squeeze(image, axis=0) - return Image2DModel.parse(image, dims=data_axes, transformations={coordinate_system: Identity()}) + """Reads an image file and returns a parsed Image2DModel""" + if input.suffix in [".tiff", ".tif"]: + chunks = _tiff_to_chunks(input) + else: + raise NotImplementedError(f"File format {input.suffix} not implemented") + + img = da.block(chunks, allow_unknown_chunksizes=True) + + return Image2DModel.parse(img, dims=data_axes, transformations={coordinate_system: Identity()}) From 7ff1d2340ef2b284067299b28de4848026517387 Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Sun, 16 Feb 2025 14:48:20 +0100 Subject: [PATCH 02/16] Updated comments --- src/spatialdata_io/readers/generic.py | 38 ++++++++++++++++----------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/src/spatialdata_io/readers/generic.py b/src/spatialdata_io/readers/generic.py index 30a16dd7..6f902b6a 100644 --- a/src/spatialdata_io/readers/generic.py +++ b/src/spatialdata_io/readers/generic.py @@ -123,8 +123,8 @@ def _compute_chunks( # Generate the tiles tiles = np.array( [ - [[x, y, w, h] for y, h in zip(y_positions, heights, strict=True)] - for x, w in zip(x_positions, widths, strict=True) + [[x, y, w, h] for x, w in zip(x_positions, widths, strict=True)] + for y, h in zip(y_positions, heights, strict=True) ], dtype=int, ) @@ -136,7 +136,7 @@ def _read_chunks( slide: Any, coords: NDArray[np.int_], n_channel: int, - dtype: type, + dtype: np.number, **func_kwargs: Any, ) -> list[list[da.array]]: """Abstract factory method to tile a large microscopy image. @@ -185,23 +185,23 @@ def _read_chunks( da.from_delayed( delayed(func)( slide, - x0=coords[x, y, 0], - y0=coords[x, y, 1], - width=coords[x, y, 2], - height=coords[x, y, 3], + x0=coords[y, x, 0], + y0=coords[y, x, 1], + width=coords[y, x, 2], + height=coords[y, x, 3], **func_kwargs, ), dtype=dtype, - shape=(n_channel, *coords[x, y, [2, 3]]), + shape=(n_channel, *coords[y, x, [3, 2]]), ) - for y in range(coords.shape[0]) + for x in range(coords.shape[1]) ] - for x in range(coords.shape[1]) + for y in range(coords.shape[0]) ] return chunks -def _tiff_to_chunks(input: Path) -> list[list[DaskArray[np.int_]]]: +def _tiff_to_chunks(input: Path, axes_dim_mapping: dict[str, int]) -> list[list[DaskArray[np.int_]]]: """Chunkwise reader for tiff files. Parameters @@ -216,26 +216,32 @@ def _tiff_to_chunks(input: Path) -> list[list[DaskArray[np.int_]]]: # Lazy file reader slide = tiffmmemap(input) - # Get dimensions in (y, x) - slide_dimensions = slide.shape[:-1] + # Transpose to cyx order + slide = np.transpose(slide, (axes_dim_mapping["c"], axes_dim_mapping["y"], axes_dim_mapping["x"])) + + # Get dimensions in (x, y) + slide_dimensions = slide.shape[2], slide.shape[1] # Get number of channels (c) - n_channel = slide.shape[-1] + n_channel = slide.shape[0] # Compute chunk coords chunk_coords = _compute_chunks(slide_dimensions, chunk_size=DEFAULT_CHUNKSIZE, min_coordinates=(0, 0)) # Define reader func def _reader_func(slide: NDArray[np.int_], x0: int, y0: int, width: int, height: int) -> NDArray[np.int_]: - return np.array(slide[y0 : y0 + height, x0 : x0 + width, :]).T + return np.array(slide[:, y0 : y0 + height, x0 : x0 + width]) return _read_chunks(_reader_func, slide, coords=chunk_coords, n_channel=n_channel, dtype=slide.dtype) def image(input: Path, data_axes: Sequence[str], coordinate_system: str) -> DataArray: """Reads an image file and returns a parsed Image2DModel""" + # Map passed data axes to position of dimension + axes_dim_mapping = {axes: ndim for ndim, axes in enumerate(data_axes)} + if input.suffix in [".tiff", ".tif"]: - chunks = _tiff_to_chunks(input) + chunks = _tiff_to_chunks(input, axes_dim_mapping=axes_dim_mapping) else: raise NotImplementedError(f"File format {input.suffix} not implemented") From 826133ab4cac7619cc8f46aab8eeb6dfe632595a Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Sun, 16 Feb 2025 14:51:46 +0100 Subject: [PATCH 03/16] Updated comments --- src/spatialdata_io/readers/generic.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/spatialdata_io/readers/generic.py b/src/spatialdata_io/readers/generic.py index 6f902b6a..3cac8876 100644 --- a/src/spatialdata_io/readers/generic.py +++ b/src/spatialdata_io/readers/generic.py @@ -79,7 +79,7 @@ def geojson(input: Path, coordinate_system: str) -> GeoDataFrame: return ShapesModel.parse(input, transformations={coordinate_system: Identity()}) -def _compute_sizes_positions(size: int, chunk: int, min_coord: int) -> tuple[NDArray[np.int_], NDArray[np.int_]]: +def _compute_chunk_sizes_positions(size: int, chunk: int, min_coord: int) -> tuple[NDArray[np.int_], NDArray[np.int_]]: """Calculate chunk sizes and positions for a given dimension and chunk size""" # All chunks have the same size except for the last one positions = np.arange(min_coord, size, chunk) @@ -117,8 +117,8 @@ def _compute_chunks( Array of shape (n_tiles_x, n_tiles_y, 4). Each entry defines a tile as (x, y, width, height). """ - x_positions, widths = _compute_sizes_positions(dimensions[1], chunk_size[1], min_coord=min_coordinates[1]) - y_positions, heights = _compute_sizes_positions(dimensions[0], chunk_size[0], min_coord=min_coordinates[0]) + x_positions, widths = _compute_chunk_sizes_positions(dimensions[1], chunk_size[1], min_coord=min_coordinates[1]) + y_positions, heights = _compute_chunk_sizes_positions(dimensions[0], chunk_size[0], min_coord=min_coordinates[0]) # Generate the tiles tiles = np.array( @@ -139,7 +139,7 @@ def _read_chunks( dtype: np.number, **func_kwargs: Any, ) -> list[list[da.array]]: - """Abstract factory method to tile a large microscopy image. + """Abstract method to tile a large microscopy image. Parameters ---------- From df258bc7f79811a0fca6e873fdadceca198faa38 Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Sun, 16 Feb 2025 15:03:04 +0100 Subject: [PATCH 04/16] Move utility functions to designated submodule readers._utils._image --- src/spatialdata_io/readers/_utils/_image.py | 129 +++++++++++++++++++ src/spatialdata_io/readers/generic.py | 131 +------------------- 2 files changed, 135 insertions(+), 125 deletions(-) create mode 100644 src/spatialdata_io/readers/_utils/_image.py diff --git a/src/spatialdata_io/readers/_utils/_image.py b/src/spatialdata_io/readers/_utils/_image.py new file mode 100644 index 00000000..ab175512 --- /dev/null +++ b/src/spatialdata_io/readers/_utils/_image.py @@ -0,0 +1,129 @@ +from collections.abc import Callable +from typing import Any + +import dask.array as da +import numpy as np +from dask import delayed +from numpy.typing import NDArray + + +def _compute_chunk_sizes_positions(size: int, chunk: int, min_coord: int) -> tuple[NDArray[np.int_], NDArray[np.int_]]: + """Calculate chunk sizes and positions for a given dimension and chunk size""" + # All chunks have the same size except for the last one + positions = np.arange(min_coord, size, chunk) + lengths = np.full_like(positions, chunk, dtype=int) + + if positions[-1] + chunk > size: + lengths[-1] = size - positions[-1] + + return positions, lengths + + +def _compute_chunks( + dimensions: tuple[int, int], + chunk_size: tuple[int, int], + min_coordinates: tuple[int, int] = (0, 0), +) -> NDArray[np.int_]: + """Create all chunk specs for a given image and chunk size. + + Creates specifications (x, y, width, height) with (x, y) being the upper left corner + of chunks of size chunk_size. Chunks at the edges correspond to the remainder of + chunk size and dimensions + + Parameters + ---------- + dimensions : tuple[int, int] + Size of the image in (width, height). + chunk_size : tuple[int, int] + Size of individual tiles in (width, height). + min_coordinates : tuple[int, int], optional + Minimum coordinates (x, y) in the image, defaults to (0, 0). + + Returns + ------- + np.ndarray + Array of shape (n_tiles_x, n_tiles_y, 4). Each entry defines a tile + as (x, y, width, height). + """ + x_positions, widths = _compute_chunk_sizes_positions(dimensions[1], chunk_size[1], min_coord=min_coordinates[1]) + y_positions, heights = _compute_chunk_sizes_positions(dimensions[0], chunk_size[0], min_coord=min_coordinates[0]) + + # Generate the tiles + tiles = np.array( + [ + [[x, y, w, h] for x, w in zip(x_positions, widths, strict=True)] + for y, h in zip(y_positions, heights, strict=True) + ], + dtype=int, + ) + return tiles + + +def _read_chunks( + func: Callable[..., NDArray[np.int_]], + slide: Any, + coords: NDArray[np.int_], + n_channel: int, + dtype: np.number, + **func_kwargs: Any, +) -> list[list[da.array]]: + """Abstract method to tile a large microscopy image. + + Parameters + ---------- + func + Function to retrieve a rectangular tile from the slide image. Must take the + arguments: + + - slide Full slide image + - x0: x (col) coordinate of upper left corner of chunk + - y0: y (row) coordinate of upper left corner of chunk + - width: Width of chunk + - height: Height of chunk + + and should return the chunk as numpy array of shape (c, y, x) + slide + Slide image in lazyly loaded format compatible with func + coords + Coordinates of the upper left corner of the image in format (n_row_x, n_row_y, 4) + where the last dimension defines the rectangular tile in format (x, y, width, height). + n_row_x represents the number of chunks in x dimension and n_row_y the number of chunks + in y dimension. + n_channel + Number of channels in array + dtype + Data type of image + func_kwargs + Additional keyword arguments passed to func + + Returns + ------- + list[list[da.array]] + List (length: n_row_x) of lists (length: n_row_y) of chunks. + Represents all chunks of the full image. + """ + func_kwargs = func_kwargs if func_kwargs else {} + + # Collect each delayed chunk as item in list of list + # Inner list becomes dim=-1 (cols/x) + # Outer list becomes dim=-2 (rows/y) + # see dask.array.block + chunks = [ + [ + da.from_delayed( + delayed(func)( + slide, + x0=coords[y, x, 0], + y0=coords[y, x, 1], + width=coords[y, x, 2], + height=coords[y, x, 3], + **func_kwargs, + ), + dtype=dtype, + shape=(n_channel, *coords[y, x, [3, 2]]), + ) + for x in range(coords.shape[1]) + ] + for y in range(coords.shape[0]) + ] + return chunks diff --git a/src/spatialdata_io/readers/generic.py b/src/spatialdata_io/readers/generic.py index 3cac8876..65158413 100644 --- a/src/spatialdata_io/readers/generic.py +++ b/src/spatialdata_io/readers/generic.py @@ -1,13 +1,12 @@ from __future__ import annotations import warnings -from collections.abc import Callable, Sequence +from collections.abc import Sequence from pathlib import Path -from typing import Any, Protocol, TypeVar +from typing import Protocol, TypeVar import dask.array as da import numpy as np -from dask import delayed from geopandas import GeoDataFrame from numpy.typing import NDArray from spatialdata._docs import docstring_parameter @@ -17,6 +16,8 @@ from tifffile import memmap as tiffmmemap from xarray import DataArray +from ._utils._image import _compute_chunks, _read_chunks + VALID_IMAGE_TYPES = [".tif", ".tiff"] VALID_SHAPE_TYPES = [".geojson"] DEFAULT_CHUNKSIZE = (1000, 1000) @@ -79,128 +80,6 @@ def geojson(input: Path, coordinate_system: str) -> GeoDataFrame: return ShapesModel.parse(input, transformations={coordinate_system: Identity()}) -def _compute_chunk_sizes_positions(size: int, chunk: int, min_coord: int) -> tuple[NDArray[np.int_], NDArray[np.int_]]: - """Calculate chunk sizes and positions for a given dimension and chunk size""" - # All chunks have the same size except for the last one - positions = np.arange(min_coord, size, chunk) - lengths = np.full_like(positions, chunk, dtype=int) - - if positions[-1] + chunk > size: - lengths[-1] = size - positions[-1] - - return positions, lengths - - -def _compute_chunks( - dimensions: tuple[int, int], - chunk_size: tuple[int, int], - min_coordinates: tuple[int, int] = (0, 0), -) -> NDArray[np.int_]: - """Create all chunk specs for a given image and chunk size. - - Creates specifications (x, y, width, height) with (x, y) being the upper left corner - of chunks of size chunk_size. Chunks at the edges correspond to the remainder of - chunk size and dimensions - - Parameters - ---------- - dimensions : tuple[int, int] - Size of the image in (width, height). - chunk_size : tuple[int, int] - Size of individual tiles in (width, height). - min_coordinates : tuple[int, int], optional - Minimum coordinates (x, y) in the image, defaults to (0, 0). - - Returns - ------- - np.ndarray - Array of shape (n_tiles_x, n_tiles_y, 4). Each entry defines a tile - as (x, y, width, height). - """ - x_positions, widths = _compute_chunk_sizes_positions(dimensions[1], chunk_size[1], min_coord=min_coordinates[1]) - y_positions, heights = _compute_chunk_sizes_positions(dimensions[0], chunk_size[0], min_coord=min_coordinates[0]) - - # Generate the tiles - tiles = np.array( - [ - [[x, y, w, h] for x, w in zip(x_positions, widths, strict=True)] - for y, h in zip(y_positions, heights, strict=True) - ], - dtype=int, - ) - return tiles - - -def _read_chunks( - func: Callable[..., NDArray[np.int_]], - slide: Any, - coords: NDArray[np.int_], - n_channel: int, - dtype: np.number, - **func_kwargs: Any, -) -> list[list[da.array]]: - """Abstract method to tile a large microscopy image. - - Parameters - ---------- - func - Function to retrieve a rectangular tile from the slide image. Must take the - arguments: - - - slide Full slide image - - x0: x (col) coordinate of upper left corner of chunk - - y0: y (row) coordinate of upper left corner of chunk - - width: Width of chunk - - height: Height of chunk - - and should return the chunk as numpy array of shape (c, y, x) - slide - Slide image in lazyly loaded format compatible with func - coords - Coordinates of the upper left corner of the image in format (n_row_x, n_row_y, 4) - where the last dimension defines the rectangular tile in format (x, y, width, height). - n_row_x represents the number of chunks in x dimension and n_row_y the number of chunks - in y dimension. - n_channel - Number of channels in array - dtype - Data type of image - func_kwargs - Additional keyword arguments passed to func - - Returns - ------- - list[list[da.array]] - List (length: n_row_x) of lists (length: n_row_y) of chunks. - Represents all chunks of the full image. - """ - func_kwargs = func_kwargs if func_kwargs else {} - - # Collect each delayed chunk as item in list of list - # Inner list becomes dim=-1 (cols/x) - # Outer list becomes dim=-2 (rows/y) - # see dask.array.block - chunks = [ - [ - da.from_delayed( - delayed(func)( - slide, - x0=coords[y, x, 0], - y0=coords[y, x, 1], - width=coords[y, x, 2], - height=coords[y, x, 3], - **func_kwargs, - ), - dtype=dtype, - shape=(n_channel, *coords[y, x, [3, 2]]), - ) - for x in range(coords.shape[1]) - ] - for y in range(coords.shape[0]) - ] - return chunks - - def _tiff_to_chunks(input: Path, axes_dim_mapping: dict[str, int]) -> list[list[DaskArray[np.int_]]]: """Chunkwise reader for tiff files. @@ -208,6 +87,8 @@ def _tiff_to_chunks(input: Path, axes_dim_mapping: dict[str, int]) -> list[list[ ---------- input Path to image + axes_dim_mapping + Mapping between dimension name (x, y, c) and index Returns ------- From db0d78278e13d0f35e18cc645900e82ee3a97985 Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Sun, 16 Feb 2025 15:27:52 +0100 Subject: [PATCH 05/16] Initial tests utils --- tests/readers/test_utils_image.py | 59 +++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 tests/readers/test_utils_image.py diff --git a/tests/readers/test_utils_image.py b/tests/readers/test_utils_image.py new file mode 100644 index 00000000..d9f0c295 --- /dev/null +++ b/tests/readers/test_utils_image.py @@ -0,0 +1,59 @@ +import numpy as np +import pytest +from numpy.typing import NDArray + +from spatialdata_io.readers._utils._image import ( + _compute_chunk_sizes_positions, + _compute_chunks, +) + + +@pytest.mark.parametrize( + ("size", "chunk", "min_coordinate", "positions", "lengths"), + [ + (300, 100, 0, np.array([0, 100, 200]), np.array([100, 100, 100])), + (300, 200, 0, np.array([0, 200]), np.array([200, 100])), + (300, 100, -100, np.array([-100, 0, 100]), np.array([100, 100, 100])), + (300, 200, -100, np.array([-100, 100]), np.array([200, 100])), + ], +) +def test_compute_chunk_sizes_positions( + size: int, + chunk: int, + min_coordinate: int, + positions: NDArray[np.number], + lengths: NDArray[np.number], +) -> None: + computed_positions, computed_lengths = _compute_chunk_sizes_positions(size, chunk, min_coordinate) + assert (positions == computed_positions).all() + assert (lengths == computed_lengths).all() + + +@pytest.mark.parametrize( + ("dimensions", "chunk_size", "min_coordinates", "result"), + [ + # Regular grid 2x2 + ( + (2, 2), + (1, 1), + (0, 0), + np.array([[[0, 0, 1, 1], [1, 0, 1, 1]], [[0, 1, 1, 1], [1, 1, 1, 1]]]), + ), + # Different tile sizes + ( + (3, 3), + (2, 2), + (0, 0), + np.array([[[0, 0, 2, 2], [2, 0, 1, 2]], [[0, 2, 2, 1], [2, 2, 1, 1]]]), + ), + ], +) +def test_compute_chunks( + dimensions: tuple[int, int], + chunk_size: tuple[int, int], + min_coordinates: tuple[int, int], + result: NDArray[np.number], +) -> None: + tiles = _compute_chunks(dimensions, chunk_size, min_coordinates) + + assert (tiles == result).all(), tiles From c03932f0d1f932a2b6b55a91ace1aeaede555cdd Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Sun, 16 Feb 2025 15:28:41 +0100 Subject: [PATCH 06/16] Fixes edge cases for min coordinate --- src/spatialdata_io/readers/_utils/_image.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/spatialdata_io/readers/_utils/_image.py b/src/spatialdata_io/readers/_utils/_image.py index ab175512..28e6e992 100644 --- a/src/spatialdata_io/readers/_utils/_image.py +++ b/src/spatialdata_io/readers/_utils/_image.py @@ -10,11 +10,11 @@ def _compute_chunk_sizes_positions(size: int, chunk: int, min_coord: int) -> tuple[NDArray[np.int_], NDArray[np.int_]]: """Calculate chunk sizes and positions for a given dimension and chunk size""" # All chunks have the same size except for the last one - positions = np.arange(min_coord, size, chunk) + positions = np.arange(min_coord, min_coord + size, chunk) lengths = np.full_like(positions, chunk, dtype=int) - if positions[-1] + chunk > size: - lengths[-1] = size - positions[-1] + if positions[-1] + chunk > size + min_coord: + lengths[-1] = size + min_coord - positions[-1] return positions, lengths From 339cbd88aae511d723ddc1eebfe82496f1340cf1 Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Sun, 16 Feb 2025 19:33:07 +0100 Subject: [PATCH 07/16] Added test for negative coordinates --- tests/readers/test_utils_image.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/readers/test_utils_image.py b/tests/readers/test_utils_image.py index d9f0c295..f1c58fa1 100644 --- a/tests/readers/test_utils_image.py +++ b/tests/readers/test_utils_image.py @@ -46,6 +46,12 @@ def test_compute_chunk_sizes_positions( (0, 0), np.array([[[0, 0, 2, 2], [2, 0, 1, 2]], [[0, 2, 2, 1], [2, 2, 1, 1]]]), ), + ( + (2, 2), + (1, 1), + (-1, 0), + np.array([[[0, -1, 1, 1], [1, -1, 1, 1]], [[0, 0, 1, 1], [1, 0, 1, 1]]]), + ), ], ) def test_compute_chunks( @@ -56,4 +62,4 @@ def test_compute_chunks( ) -> None: tiles = _compute_chunks(dimensions, chunk_size, min_coordinates) - assert (tiles == result).all(), tiles + assert (tiles == result).all() From 24c6eece2dfdd07f2b63a5d3807be0d689ad7818 Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Mon, 17 Feb 2025 16:46:03 +0100 Subject: [PATCH 08/16] Add support for png/jpg again --- src/spatialdata_io/readers/generic.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/spatialdata_io/readers/generic.py b/src/spatialdata_io/readers/generic.py index 65158413..8aec6765 100644 --- a/src/spatialdata_io/readers/generic.py +++ b/src/spatialdata_io/readers/generic.py @@ -7,6 +7,7 @@ import dask.array as da import numpy as np +from dask_image.imread import imread from geopandas import GeoDataFrame from numpy.typing import NDArray from spatialdata._docs import docstring_parameter @@ -18,7 +19,7 @@ from ._utils._image import _compute_chunks, _read_chunks -VALID_IMAGE_TYPES = [".tif", ".tiff"] +VALID_IMAGE_TYPES = [".tif", ".tiff", ".png", ".jpg", ".jpeg"] VALID_SHAPE_TYPES = [".geojson"] DEFAULT_CHUNKSIZE = (1000, 1000) @@ -123,6 +124,10 @@ def image(input: Path, data_axes: Sequence[str], coordinate_system: str) -> Data if input.suffix in [".tiff", ".tif"]: chunks = _tiff_to_chunks(input, axes_dim_mapping=axes_dim_mapping) + elif input.suffix in [".png", "jpg", "jpeg"]: + image = imread(input) + if len(image.shape) == len(data_axes) + 1 and image.shape[0] == 1: + image = np.squeeze(image, axis=0) else: raise NotImplementedError(f"File format {input.suffix} not implemented") From dbdc7c7516823eb58d2547630dffe731a646ab13 Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Mon, 17 Feb 2025 17:11:06 +0100 Subject: [PATCH 09/16] Add initial test --- tests/test_generic.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tests/test_generic.py b/tests/test_generic.py index d4dda1a7..e8414fa7 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -9,9 +9,12 @@ from PIL import Image from spatialdata import SpatialData from spatialdata.datasets import blobs +from tifffile import imread as tiffread +from tifffile import imwrite as tiffwrite from spatialdata_io.__main__ import read_generic_wrapper from spatialdata_io.converters.generic_to_zarr import generic_to_zarr +from spatialdata_io.readers.generic import image @contextmanager @@ -33,6 +36,32 @@ def save_temp_files() -> Generator[tuple[Path, Path, Path], None, None]: yield jpg_path, geojson_path, Path(tmpdir) +@pytest.fixture(scope="module", params=[("c", "y", "x"), ("x", "y", "c")]) +def save_tiff_files( + request: pytest.FixtureRequest, +) -> Generator[tuple[Path, tuple[str], Path], None, None]: + with tempfile.TemporaryDirectory() as tmpdir: + axes = request.param + sdata = blobs() + # save the image as tiff + x = sdata["blobs_image"].transpose(*axes).data.compute() + img = np.clip(x * 255, 0, 255).astype(np.uint8) + + tiff_path = Path(tmpdir) / "blobs_image.tiff" + tiffwrite(tiff_path, img) + + yield tiff_path, axes, Path(tmpdir) + + +def test_read_tiff(save_tiff_files: tuple[Path, tuple[str], Path]) -> None: + tiff_path, axes, _ = save_tiff_files + img = image(tiff_path, data_axes=axes, coordinate_system="global") + + reference = tiffread(tiff_path) + + assert (img.compute() == reference).all() + + @pytest.mark.parametrize("cli", [True, False]) @pytest.mark.parametrize("element_name", [None, "test_element"]) def test_read_generic_image(runner: CliRunner, cli: bool, element_name: str | None) -> None: From da98469859daebb57753e372ad2a53dbff08fa8d Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Mon, 17 Feb 2025 17:19:45 +0100 Subject: [PATCH 10/16] Fix: Fix jpeg and png reader, fix issues with local variable name --- src/spatialdata_io/readers/generic.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/spatialdata_io/readers/generic.py b/src/spatialdata_io/readers/generic.py index 8aec6765..fd8c17dd 100644 --- a/src/spatialdata_io/readers/generic.py +++ b/src/spatialdata_io/readers/generic.py @@ -124,13 +124,14 @@ def image(input: Path, data_axes: Sequence[str], coordinate_system: str) -> Data if input.suffix in [".tiff", ".tif"]: chunks = _tiff_to_chunks(input, axes_dim_mapping=axes_dim_mapping) - elif input.suffix in [".png", "jpg", "jpeg"]: + image = da.block(chunks, allow_unknown_chunksizes=True) + + elif input.suffix in [".png", ".jpg", ".jpeg"]: image = imread(input) if len(image.shape) == len(data_axes) + 1 and image.shape[0] == 1: image = np.squeeze(image, axis=0) + else: raise NotImplementedError(f"File format {input.suffix} not implemented") - img = da.block(chunks, allow_unknown_chunksizes=True) - - return Image2DModel.parse(img, dims=data_axes, transformations={coordinate_system: Identity()}) + return Image2DModel.parse(image, dims=data_axes, transformations={coordinate_system: Identity()}) From 2349be7a7d036a9c801c66f7d5b698b3d7de9ea1 Mon Sep 17 00:00:00 2001 From: Lucas Diedrich <86159938+lucas-diedrich@users.noreply.github.com> Date: Fri, 2 May 2025 17:41:37 +0200 Subject: [PATCH 11/16] Update src/spatialdata_io/readers/_utils/_image.py Co-authored-by: Wouter-Michiel Vierdag --- src/spatialdata_io/readers/_utils/_image.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/spatialdata_io/readers/_utils/_image.py b/src/spatialdata_io/readers/_utils/_image.py index 28e6e992..8896d78f 100644 --- a/src/spatialdata_io/readers/_utils/_image.py +++ b/src/spatialdata_io/readers/_utils/_image.py @@ -20,7 +20,7 @@ def _compute_chunk_sizes_positions(size: int, chunk: int, min_coord: int) -> tup def _compute_chunks( - dimensions: tuple[int, int], + shape: tuple[int, int], chunk_size: tuple[int, int], min_coordinates: tuple[int, int] = (0, 0), ) -> NDArray[np.int_]: @@ -32,7 +32,7 @@ def _compute_chunks( Parameters ---------- - dimensions : tuple[int, int] + shape : tuple[int, int] Size of the image in (width, height). chunk_size : tuple[int, int] Size of individual tiles in (width, height). From 46ed3e5f6e12f792b7f61da396a967ee87a22e6d Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Fri, 2 May 2025 17:46:51 +0200 Subject: [PATCH 12/16] [Refactor|API] Rename dimensions to shape to stick to numpy convention --- src/spatialdata_io/readers/_utils/_image.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/spatialdata_io/readers/_utils/_image.py b/src/spatialdata_io/readers/_utils/_image.py index 8896d78f..d644f934 100644 --- a/src/spatialdata_io/readers/_utils/_image.py +++ b/src/spatialdata_io/readers/_utils/_image.py @@ -45,8 +45,8 @@ def _compute_chunks( Array of shape (n_tiles_x, n_tiles_y, 4). Each entry defines a tile as (x, y, width, height). """ - x_positions, widths = _compute_chunk_sizes_positions(dimensions[1], chunk_size[1], min_coord=min_coordinates[1]) - y_positions, heights = _compute_chunk_sizes_positions(dimensions[0], chunk_size[0], min_coord=min_coordinates[0]) + x_positions, widths = _compute_chunk_sizes_positions(shape[1], chunk_size[1], min_coord=min_coordinates[1]) + y_positions, heights = _compute_chunk_sizes_positions(shape[0], chunk_size[0], min_coord=min_coordinates[0]) # Generate the tiles tiles = np.array( From 99731fedbc9502ddae055e9b26de7251ba5b52b7 Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Fri, 2 May 2025 17:48:54 +0200 Subject: [PATCH 13/16] [Refactor] Make suggested simplification of code, suggested by @melonora --- src/spatialdata_io/readers/_utils/_image.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/spatialdata_io/readers/_utils/_image.py b/src/spatialdata_io/readers/_utils/_image.py index d644f934..4621d5fb 100644 --- a/src/spatialdata_io/readers/_utils/_image.py +++ b/src/spatialdata_io/readers/_utils/_image.py @@ -11,10 +11,7 @@ def _compute_chunk_sizes_positions(size: int, chunk: int, min_coord: int) -> tup """Calculate chunk sizes and positions for a given dimension and chunk size""" # All chunks have the same size except for the last one positions = np.arange(min_coord, min_coord + size, chunk) - lengths = np.full_like(positions, chunk, dtype=int) - - if positions[-1] + chunk > size + min_coord: - lengths[-1] = size + min_coord - positions[-1] + lengths = np.minimum(chunk, min_coord + size - positions) return positions, lengths From f03ca8e8d3f867424b1a526020521c97846d3a68 Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Fri, 2 May 2025 17:52:39 +0200 Subject: [PATCH 14/16] [Test] Add test for compressed tiffs --- tests/test_generic.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/tests/test_generic.py b/tests/test_generic.py index e8414fa7..3251e8fd 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -36,19 +36,29 @@ def save_temp_files() -> Generator[tuple[Path, Path, Path], None, None]: yield jpg_path, geojson_path, Path(tmpdir) -@pytest.fixture(scope="module", params=[("c", "y", "x"), ("x", "y", "c")]) +@pytest.fixture( + scope="module", + params=[ + {"axes": ("c", "y", "x"), "compression": None}, + {"axes": ("x", "y", "c"), "compression": None}, + {"axes": ("c", "y", "x"), "compression": "lzw"}, + {"axes": ("x", "y", "c"), "compression": "lzw"}, + ], +) def save_tiff_files( request: pytest.FixtureRequest, ) -> Generator[tuple[Path, tuple[str], Path], None, None]: with tempfile.TemporaryDirectory() as tmpdir: - axes = request.param + axes = request.param["axes"] + compression = request.param["compression"] + sdata = blobs() # save the image as tiff x = sdata["blobs_image"].transpose(*axes).data.compute() img = np.clip(x * 255, 0, 255).astype(np.uint8) tiff_path = Path(tmpdir) / "blobs_image.tiff" - tiffwrite(tiff_path, img) + tiffwrite(tiff_path, img, compression=compression) yield tiff_path, axes, Path(tmpdir) From 9e057de33bcdf5072bf3627220d05d6f7b57efde Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Fri, 2 May 2025 18:24:24 +0200 Subject: [PATCH 15/16] [Fix] Account for compressed images --- src/spatialdata_io/readers/generic.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/src/spatialdata_io/readers/generic.py b/src/spatialdata_io/readers/generic.py index fd8c17dd..9eb799dd 100644 --- a/src/spatialdata_io/readers/generic.py +++ b/src/spatialdata_io/readers/generic.py @@ -7,6 +7,7 @@ import dask.array as da import numpy as np +import tifffile from dask_image.imread import imread from geopandas import GeoDataFrame from numpy.typing import NDArray @@ -14,7 +15,6 @@ from spatialdata.models import Image2DModel, ShapesModel from spatialdata.models._utils import DEFAULT_COORDINATE_SYSTEM from spatialdata.transformations import Identity -from tifffile import memmap as tiffmmemap from xarray import DataArray from ._utils._image import _compute_chunks, _read_chunks @@ -81,7 +81,9 @@ def geojson(input: Path, coordinate_system: str) -> GeoDataFrame: return ShapesModel.parse(input, transformations={coordinate_system: Identity()}) -def _tiff_to_chunks(input: Path, axes_dim_mapping: dict[str, int]) -> list[list[DaskArray[np.int_]]]: +def _tiff_to_chunks( + input: Path, axes_dim_mapping: dict[str, int] +) -> DaskArray[np.int_] | list[list[DaskArray[np.int_]]]: """Chunkwise reader for tiff files. Parameters @@ -96,7 +98,7 @@ def _tiff_to_chunks(input: Path, axes_dim_mapping: dict[str, int]) -> list[list[ list[list[DaskArray]] """ # Lazy file reader - slide = tiffmmemap(input) + slide = tifffile.memmap(input) # Transpose to cyx order slide = np.transpose(slide, (axes_dim_mapping["c"], axes_dim_mapping["y"], axes_dim_mapping["x"])) @@ -123,8 +125,20 @@ def image(input: Path, data_axes: Sequence[str], coordinate_system: str) -> Data axes_dim_mapping = {axes: ndim for ndim, axes in enumerate(data_axes)} if input.suffix in [".tiff", ".tif"]: - chunks = _tiff_to_chunks(input, axes_dim_mapping=axes_dim_mapping) - image = da.block(chunks, allow_unknown_chunksizes=True) + try: + chunks = _tiff_to_chunks(input, axes_dim_mapping=axes_dim_mapping) + image = da.block(chunks, allow_unknown_chunksizes=True) + + # Edge case: Compressed images are not memory-mappable + except ValueError: + warnings.warn( + "Image data are not memory-mappable, potentially due to compression. Trying to load the image into memory at once", + stacklevel=2, + ) + image = imread(input) + if len(image.shape) == len(data_axes) + 1 and image.shape[0] == 1: + image = np.squeeze(image, axis=0) + image = image.transpose(axes_dim_mapping["c"], axes_dim_mapping["y"], axes_dim_mapping["x"]) elif input.suffix in [".png", ".jpg", ".jpeg"]: image = imread(input) From c05b71872151525f456399b8f56b55b006aba650 Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Fri, 2 May 2025 18:31:59 +0200 Subject: [PATCH 16/16] [Refactor] Remove unnecessary type hint --- src/spatialdata_io/readers/generic.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/spatialdata_io/readers/generic.py b/src/spatialdata_io/readers/generic.py index 9eb799dd..b632eba7 100644 --- a/src/spatialdata_io/readers/generic.py +++ b/src/spatialdata_io/readers/generic.py @@ -81,9 +81,7 @@ def geojson(input: Path, coordinate_system: str) -> GeoDataFrame: return ShapesModel.parse(input, transformations={coordinate_system: Identity()}) -def _tiff_to_chunks( - input: Path, axes_dim_mapping: dict[str, int] -) -> DaskArray[np.int_] | list[list[DaskArray[np.int_]]]: +def _tiff_to_chunks(input: Path, axes_dim_mapping: dict[str, int]) -> list[list[DaskArray[np.int_]]]: """Chunkwise reader for tiff files. Parameters