From a2d73828e215e615c853a94bc33ec3c3698aeb37 Mon Sep 17 00:00:00 2001 From: Blampey Quentin Date: Fri, 31 Oct 2025 14:11:59 +0100 Subject: [PATCH] add molecular_cartography reader from sopa --- docs/api.md | 2 + src/spatialdata_io/__init__.py | 1 + src/spatialdata_io/_constants/_constants.py | 8 ++ .../readers/molecular_cartography.py | 84 +++++++++++++++++++ 4 files changed, 95 insertions(+) create mode 100644 src/spatialdata_io/readers/molecular_cartography.py diff --git a/docs/api.md b/docs/api.md index 95ff1650..cdd3d665 100644 --- a/docs/api.md +++ b/docs/api.md @@ -23,8 +23,10 @@ I/O for the `spatialdata` project. curio dbit experimental.iss + macsima mcmicro merscope + molecular_cartography seqfish steinbock stereoseq diff --git a/src/spatialdata_io/__init__.py b/src/spatialdata_io/__init__.py index 0648c09e..0207ca1d 100644 --- a/src/spatialdata_io/__init__.py +++ b/src/spatialdata_io/__init__.py @@ -9,6 +9,7 @@ from spatialdata_io.readers.macsima import macsima from spatialdata_io.readers.mcmicro import mcmicro from spatialdata_io.readers.merscope import merscope +from spatialdata_io.readers.molecular_cartography import molecular_cartography from spatialdata_io.readers.seqfish import seqfish from spatialdata_io.readers.steinbock import steinbock from spatialdata_io.readers.stereoseq import stereoseq diff --git a/src/spatialdata_io/_constants/_constants.py b/src/spatialdata_io/_constants/_constants.py index c2f6c21f..a77407a2 100644 --- a/src/spatialdata_io/_constants/_constants.py +++ b/src/spatialdata_io/_constants/_constants.py @@ -402,3 +402,11 @@ class VisiumHDKeys(ModeEnum): MICROSCOPE_COLROW_TO_SPOT_COLROW = ("microscope_colrow_to_spot_colrow",) SPOT_COLROW_TO_MICROSCOPE_COLROW = ("spot_colrow_to_microscope_colrow",) FILE_FORMAT = "file_format" + + +@unique +class MolecularCartographyKeys(ModeEnum): + """Keys for *Molecular Cartography* formatted dataset.""" + + FEATURE_KEY = "target_name" + POINTS_SUFFIX = "_results.txt" diff --git a/src/spatialdata_io/readers/molecular_cartography.py b/src/spatialdata_io/readers/molecular_cartography.py new file mode 100644 index 00000000..804dcd49 --- /dev/null +++ b/src/spatialdata_io/readers/molecular_cartography.py @@ -0,0 +1,84 @@ +from collections.abc import Mapping +from pathlib import Path +from types import MappingProxyType +from typing import Any + +import dask.array as da +import pandas as pd +from dask_image.imread import imread +from spatialdata import SpatialData +from spatialdata.models import Image2DModel, PointsModel + +from spatialdata_io._constants._constants import MolecularCartographyKeys + +__all__ = ["molecular_cartography"] + + +def molecular_cartography( + path: str | Path, + region: str, + imread_kwargs: Mapping[str, Any] = MappingProxyType({}), + image_models_kwargs: Mapping[str, Any] = MappingProxyType({}), +) -> SpatialData: + """Read *Molecular Cartography* data from *Resolve Bioscience* as a `SpatialData` object. + + This function reads the following files: + + - {dataset_id}_*.tiff: The images for the given region. + - {dataset_id}_results.txt: The transcript locations for the given region. + + Parameters + ---------- + path: Path to the Molecular Cartography directory containing one or multiple region(s). + region: Name of the region to read. The region name can be found before the `_results.txt` file, e.g. `A2-1`. + image_models_kwargs: Keyword arguments passed to `spatialdata.models.Image2DModel`. + imread_kwargs: Keyword arguments passed to `dask_image.imread.imread`. + + Returns + ------- + :class:`spatialdata.SpatialData` + """ + if "chunks" not in image_models_kwargs: + if isinstance(image_models_kwargs, MappingProxyType): + image_models_kwargs = {} + assert isinstance(image_models_kwargs, dict) + image_models_kwargs["chunks"] = (1, 4096, 4096) + if "scale_factors" not in image_models_kwargs: + if isinstance(image_models_kwargs, MappingProxyType): + image_models_kwargs = {} + assert isinstance(image_models_kwargs, dict) + image_models_kwargs["scale_factors"] = [2, 2, 2, 2] + + path = Path(path) + dataset_id = _get_dataset_id(path, region) + + # Read the points + transcripts = pd.read_csv(path / f"{dataset_id}{MolecularCartographyKeys.POINTS_SUFFIX}", sep="\t", header=None) + transcripts.columns = ["x", "y", "z", MolecularCartographyKeys.FEATURE_KEY, "unnamed"] + + transcripts = PointsModel.parse(transcripts, feature_key=MolecularCartographyKeys.FEATURE_KEY) + transcripts_name = f"{dataset_id}_points" + + # Read the images + images_paths = list(path.glob(f"{dataset_id}_*.tiff")) + c_coords = [image_path.stem.split("_")[-1] for image_path in images_paths] + + image = Image2DModel.parse( + da.concatenate([imread(image_path, **imread_kwargs) for image_path in images_paths], axis=0), + dims=("c", "y", "x"), + c_coords=c_coords, + rgb=None, + **image_models_kwargs, + ) + image_name = f"{dataset_id}_image" + + return SpatialData(images={image_name: image}, points={transcripts_name: transcripts}) + + +def _get_dataset_id(path: Path, region: str) -> str: + _dataset_ids = [path.name[:-12] for path in path.glob("*_results.txt")] + region_to_id = {dataset_id.split("_")[-1]: dataset_id for dataset_id in _dataset_ids} + + assert region in region_to_id, f"Region {region} not found. Must be one of {list(region_to_id.keys())}" + + return region_to_id[region]