From 902cfbd90ac40358652a60d251c288b3942d0934 Mon Sep 17 00:00:00 2001 From: mkeller <7525285+keller-mark@users.noreply.github.com> Date: Fri, 31 Mar 2023 16:38:08 -0400 Subject: [PATCH 1/2] FISH-based data template --- templates/fish-based.ipynb | 1332 ++++++++++++++++++++++++++++++++++++ 1 file changed, 1332 insertions(+) create mode 100644 templates/fish-based.ipynb diff --git a/templates/fish-based.ipynb b/templates/fish-based.ipynb new file mode 100644 index 0000000..e9fa454 --- /dev/null +++ b/templates/fish-based.ipynb @@ -0,0 +1,1332 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "e819ce7a-fb23-4c0a-8efd-1b7cdcb552e3", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# TEMP: download/construct \"raw\" dataset for a complete example" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "4b0ace49-3c42-4c11-b3e3-198d5c2dba6f", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from ome_zarr.io import parse_url\n", + "from ome_zarr.reader import Reader\n", + "\n", + "from tifffile import imwrite\n", + "from os.path import join\n", + "import zarr\n", + "from anndata import read_zarr, AnnData\n", + "import numpy as np\n", + "import pandas as pd\n", + "from skimage.draw import polygon2mask\n", + "import tqdm" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "a2a4b8f0-870f-4bbb-8724-437cd79638c3", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Adapting http://vitessce.io/#?dataset=codeluppi-2018-via-zarr" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "c6ab9d1a-6dbe-4d62-89ca-be41415f369b", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "cells_adata_zarr = \"data/raw/codeluppi_2018_nature_methods.cells.h5ad.zarr\"\n", + "cells_csv = \"https://s3.amazonaws.com/vitessce-data/0.0.33/main/codeluppi-2018/codeluppi_2018_nature_methods.cells.csv\"\n", + "molecules_adata_zarr = \"https://s3.amazonaws.com/vitessce-data/0.0.33/main/codeluppi-2018-via-zarr/codeluppi_2018_nature_methods.molecules.h5ad.zarr\"\n", + "molecules_csv = \"https://s3.amazonaws.com/vitessce-data/0.0.33/main/codeluppi-2018/codeluppi_2018_nature_methods.molecules.csv\"\n", + "background_ome_zarr = \"https://vitessce-data.storage.googleapis.com/0.0.34/main/codeluppi-2018/codeluppi_2018_nature_methods.image.ome.zarr\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2d3f3bc8-1d31-41ac-ac02-65929ca4afdb", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Convert OME-Zarr to normal TIFF" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e3d5309e-1b02-4ffd-b943-57f449d5da49", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# read the image data\n", + "store = parse_url(background_ome_zarr, mode=\"r\")\n", + "reader = Reader(store)\n", + "# nodes may include images, labels etc\n", + "nodes = list(reader())\n", + "# first node will be the image pixel data\n", + "image_node = nodes[0]\n", + "\n", + "dask_data = image_node.data\n", + "\n", + "hires_arr = dask_data[2]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "32b87d3c-b18a-407a-b0b0-6ec88cedf199", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "RAW_DIR = join(\"data\", \"raw\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b591bf59-3f68-4d56-b0f1-669e04125c26", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "imwrite(\n", + " join(RAW_DIR, 'codeluppi_2018_nature_methods.image.tiff'),\n", + " hires_arr,\n", + " photometric='minisblack',\n", + " metadata={'axes': 'CYX'}\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4487ebfd-4480-48dc-8b7d-4a6c6eeab957", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Convert cells.zarr/obsm/X_segmentations to a bitmask TIFF" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "00b5563b-e70e-4675-a94a-338c56ba1c27", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "bitmask_shape = (hires_arr.shape[2], hires_arr.shape[1])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cfea1161-616b-425e-942d-b19232036d96", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "bitmask_shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c9d6800f-0119-4b5f-8eb8-a009b214387c", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "bitmask_arr = np.zeros(bitmask_shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16d6ac9b-a349-43a8-935c-f7c6bec743cf", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "cells_store = zarr.open(cells_adata_zarr)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c5e47371-49b4-44cc-85a8-223b106ffc10", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "segmentations_arr = cells_store['obsm/X_segmentations'][()]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7587becd-9cf1-47ac-b412-2cea40c2a298", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "segmentations_arr.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c311d30e-60c4-43ea-84f4-ac0c6add5516", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "for i in tqdm.tqdm(range(segmentations_arr.shape[0])):\n", + " # Re-scale the vertices so they match the background scale\n", + " vertices = np.multiply(segmentations_arr[i], 0.25)\n", + " mask = polygon2mask(bitmask_shape, vertices)\n", + " bitmask_arr[mask] = i+1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9625735f-98fe-4487-8f4e-7322ccbc1ae8", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Update the array axes so they are in CYX order to enable conversion to OME-TIFF.\n", + "bitmask_arr = bitmask_arr.transpose((1, 0)) # (y, x)\n", + "bitmask_arr = bitmask_arr[np.newaxis, :] # (c, y, x)\n", + "bitmask_arr.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ae851409-4be2-4878-9b8a-0af18b211d1f", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "imwrite(\n", + " join(RAW_DIR, 'codeluppi_2018_nature_methods.bitmask.tiff'),\n", + " bitmask_arr,\n", + " photometric='minisblack',\n", + " metadata={'axes': 'CYX'}\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d600e4a6-6445-45ec-a6e2-57b12674fd5b", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Convert molecules.zarr to TSV (one row per molecule)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "e24cfd60-3574-4f6a-9109-1f3b8da8c1e6", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "molecules_store = zarr.open(molecules_adata_zarr)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "85350784-b365-4766-8796-4420cb1b9251", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "positions_arr = molecules_store['obsm/X_spatial'][()]\n", + "gene_cats_arr = molecules_store['obs/__categories/Gene'][()]\n", + "gene_codes_arr = molecules_store['obs/Gene'][()]\n", + "genes_arr = [gene_cats_arr[cat_i] for cat_i in gene_codes_arr]" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "3d3a7763-f600-4209-9c8d-9ab7fee3ac2e", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(1976659, 2)" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "positions_arr.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "76b7ec3d-b11d-4599-b6a0-ba637c488f2d", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "molecules_df = pd.DataFrame(index=range(positions_arr.shape[0]))\n", + "molecules_df[\"x\"] = np.multiply(positions_arr[:, 0], 0.25)\n", + "molecules_df[\"y\"] = np.multiply(positions_arr[:, 1], 0.25)\n", + "molecules_df[\"gene\"] = genes_arr" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "c384a702-57de-4072-9333-4b7b76a73a7d", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
xygene
04553.755013.00Acta2_Hybridization5
16229.004253.75Acta2_Hybridization5
22100.0011130.50Acta2_Hybridization5
35666.755802.00Acta2_Hybridization5
41584.009388.75Acta2_Hybridization5
\n", + "
" + ], + "text/plain": [ + " x y gene\n", + "0 4553.75 5013.00 Acta2_Hybridization5\n", + "1 6229.00 4253.75 Acta2_Hybridization5\n", + "2 2100.00 11130.50 Acta2_Hybridization5\n", + "3 5666.75 5802.00 Acta2_Hybridization5\n", + "4 1584.00 9388.75 Acta2_Hybridization5" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "molecules_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "331bc482-87c1-43b9-8109-a6e3409d2160", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "molecules_df.index = molecules_df.index.rename(\"index\")" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "07ed3119-3dea-4812-ae41-b617070c9c53", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "molecules_df.to_csv(join(\"data\", \"raw\", \"codeluppi_2018_nature_methods.molecules.tsv\"), sep=\"\\t\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "592bd5ee-e41b-4b72-b9ce-fdc68c0ec173", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Convert cells.h5ad.zarr to cells.h5ad" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "268e1187-34d5-43a1-95e0-2fa31d3ee7aa", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "cells_adata = read_zarr(cells_adata_zarr)\n", + "cells_adata" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3b6d2e8e-f923-4ff7-87c1-9912fab04ca2", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "cells_adata.write(join(\"data\", \"raw\", \"codeluppi_2018_nature_methods.cells.h5ad\"))" + ] + }, + { + "cell_type": "markdown", + "id": "4b2a05b1-3bdc-4e4b-84c3-321b83f8e10e", + "metadata": {}, + "source": [ + "# Template: visualize FISH-based data with Vitessce" + ] + }, + { + "cell_type": "markdown", + "id": "a73738aa-5f47-41ab-8ff6-503e0abf3657", + "metadata": { + "tags": [] + }, + "source": [ + "## Code to change\n", + "\n", + "Places where you will need to edit the code are marked by `# TODO(template)` comments." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "89b4c5b8-bce9-4dfb-b9c4-f227fa0ae231", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import os\n", + "import json\n", + "from os.path import join\n", + "from vitessce import (\n", + " VitessceConfig,\n", + " Component as cm,\n", + " CoordinationType as ct,\n", + " FileType as ft,\n", + " AnnDataWrapper,\n", + " OmeTiffWrapper,\n", + " MultiImageWrapper,\n", + " BASE_URL_PLACEHOLDER,\n", + ")\n", + "from vitessce.data_utils import (\n", + " rgb_img_to_ome_tiff,\n", + " multiplex_img_to_ome_tiff,\n", + " optimize_adata,\n", + " VAR_CHUNK_SIZE,\n", + ")\n", + "from anndata import read_h5ad, AnnData\n", + "import pandas as pd\n", + "import numpy as np\n", + "from tifffile import imread\n", + "from skimage.draw import disk" + ] + }, + { + "cell_type": "markdown", + "id": "40c0b1be-6a8f-4515-82fe-84973956336e", + "metadata": {}, + "source": [ + "## Variables to fill in" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "26a63bc6-dd16-4516-8fa1-5c160bb62a6c", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "#=== INPUTS ===\n", + "# TODO(template): specify the path to the .h5ad file containing cells and per-cell metadata\n", + "PATH_TO_INPUT_CELLS_H5AD = join('.', 'data', 'raw', 'codeluppi_2018_nature_methods.cells.h5ad')\n", + "\n", + "# TODO(template): specify the path to the .tsv file containing molecule x-y coordinates\n", + "PATH_TO_INPUT_MOLECULES_TSV = join('.', 'data', 'raw', 'codeluppi_2018_nature_methods.molecules.tsv')\n", + "MOLECULES_TSV_X_COL = \"x\"\n", + "MOLECULES_TSV_Y_COL = \"y\"\n", + "MOLECULES_TSV_GENE_COL = \"gene\"\n", + "\n", + "# TODO(template): specify the file path for the input background TIFF image\n", + "PATH_TO_INPUT_BACKGROUND_TIFF = join('.', 'data', 'raw', 'codeluppi_2018_nature_methods.image.tiff')\n", + "BACKGROUND_CHANNEL_NAMES = ['nuclei', 'polyT']\n", + "# TODO(template): specify the file path for the input cell segmentation bitmask TIFF image (assumed to have the same XY dimensions as the background image)\n", + "PATH_TO_INPUT_BITMASK_TIFF = join('.', 'data', 'raw', 'codeluppi_2018_nature_methods.bitmask.tiff')\n", + "BITMASK_CHANNEL_NAMES = [\"cells\"]\n", + "\n", + "#=== OUTPUTS ===\n", + "# TODO(template)\n", + "PATH_TO_OUTPUT_CELLS_ANNDATA_ZARR = join('.', 'processed_data', 'fish_based', 'output.cells.anndata.zarr')\n", + "PATH_TO_OUTPUT_MOLECULES_ANNDATA_ZARR = join('.', 'processed_data', 'fish_based', 'output.molecules.anndata.zarr')\n", + "\n", + "# TODO(template): specify some file paths for the converted OME-TIFF image and bitmask files\n", + "PATH_TO_OUTPUT_IMAGE_OME_TIFF = join('.', 'processed_data', 'fish_based', 'image.ome.tif')\n", + "PATH_TO_OUTPUT_IMAGE_PYRAMIDAL_OME_TIFF = join('.', 'processed_data', 'fish_based', 'image.pyramid.ome.tif')\n", + "PATH_TO_OUTPUT_BITMASK_OME_TIFF = join('.', 'processed_data', 'fish_based', 'bitmask.ome.tif')\n", + "PATH_TO_OUTPUT_BITMASK_PYRAMIDAL_OME_TIFF = join('.', 'processed_data', 'fish_based', 'bitmask.pyramid.ome.tif')\n", + "\n", + "\n", + "# TODO(template): this folder should not yet exist, but will be created in step 3.\n", + "PATH_TO_EXPORT_DIRECTORY = join('.', 'exported_fish_based_data')\n", + "\n", + "# TODO(template): provide names and descriptions\n", + "CONFIG_NAME = 'My config'\n", + "CONFIG_DESCRIPTION = 'This dataset reveals...'\n", + "DATASET_NAME = 'My dataset'\n", + "IMG_NAME = 'My image'" + ] + }, + { + "cell_type": "markdown", + "id": "6a6fb045-ca75-45c8-9939-119b8e5d2a5f", + "metadata": {}, + "source": [ + "## 1.1 Convert H5AD to AnnData-Zarr" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "d9ec5b65-180c-4a64-a4cb-8b259261161e", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "cells_adata = read_h5ad(PATH_TO_INPUT_CELLS_H5AD)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "254e4711", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ClusterSubclusterRegion
1Inhibitory neuronsInhibitory CPLayer 6
2Inhibitory neuronsInhibitory CPInternal Capsule Caudoputamen
3Inhibitory neuronsInhibitory CPLayer 4
4Inhibitory neuronsInhibitory CPInternal Capsule Caudoputamen
5Inhibitory neuronsInhibitory CPInternal Capsule Caudoputamen
............
4835VasculatureVascular Smooth MuscleLayer 6
4836VasculatureVascular Smooth MuscleLayer 2-3 lateral
4837VasculatureVascular Smooth MuscleWhite matter
4838VasculatureVascular Smooth MuscleLayer 6
4839VasculatureVascular Smooth MuscleHippocampus
\n", + "

4839 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " Cluster Subcluster \\\n", + "1 Inhibitory neurons Inhibitory CP \n", + "2 Inhibitory neurons Inhibitory CP \n", + "3 Inhibitory neurons Inhibitory CP \n", + "4 Inhibitory neurons Inhibitory CP \n", + "5 Inhibitory neurons Inhibitory CP \n", + "... ... ... \n", + "4835 Vasculature Vascular Smooth Muscle \n", + "4836 Vasculature Vascular Smooth Muscle \n", + "4837 Vasculature Vascular Smooth Muscle \n", + "4838 Vasculature Vascular Smooth Muscle \n", + "4839 Vasculature Vascular Smooth Muscle \n", + "\n", + " Region \n", + "1 Layer 6 \n", + "2 Internal Capsule Caudoputamen \n", + "3 Layer 4 \n", + "4 Internal Capsule Caudoputamen \n", + "5 Internal Capsule Caudoputamen \n", + "... ... \n", + "4835 Layer 6 \n", + "4836 Layer 2-3 lateral \n", + "4837 White matter \n", + "4838 Layer 6 \n", + "4839 Hippocampus \n", + "\n", + "[4839 rows x 3 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Create an integer index starting at 1 (0 is reserved for the background)\n", + "cells_adata.obs.index = list(range(1, cells_adata.shape[0]+1))\n", + "cells_adata.obs" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "9e282613-2b00-4a32-b2dd-15d9e9c20846", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "AnnData object with n_obs × n_vars = 4839 × 33\n", + " obs: 'Cluster', 'Subcluster', 'Region'\n", + " var: 'Fluorophore', 'Hybridization'\n", + " obsm: 'X_centroid', 'X_pca', 'X_segmentations', 'X_spatial', 'X_tsne', 'X_umap'\n", + " varm: 'PCs'\n", + " layers: 'X_uint8'" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cells_adata" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "1546c023-d614-4337-a3d1-f8b04c49c8e1", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/mkeller/anaconda3/envs/vitessce-tutorial-env/lib/python3.8/site-packages/vitessce/data_utils/anndata.py:153: FutureWarning: X.dtype being converted to np.float32 from uint16. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.\n", + " adata = AnnData(X=new_X, obs=new_obs, var=new_var, obsm=new_obsm, varm=new_varm, layers=new_layers)\n", + "/Users/mkeller/anaconda3/envs/vitessce-tutorial-env/lib/python3.8/site-packages/anndata/_core/anndata.py:121: ImplicitModificationWarning: Transforming to str index.\n", + " warnings.warn(\"Transforming to str index.\", ImplicitModificationWarning)\n" + ] + } + ], + "source": [ + "cells_adata = optimize_adata(\n", + " cells_adata,\n", + " # TODO(template): Specify the columns and keys that will be used in the visualization.\n", + " obs_cols=[\"Cluster\", \"Subcluster\", \"Region\"],\n", + " var_cols=[\"Fluorophore\", \"Hybridization\"],\n", + " obsm_keys=[\"X_pca\", \"X_tsne\", \"X_umap\", \"X_spatial\"],\n", + " optimize_X=True,\n", + " # Vitessce plays nicely with dense matrices saved with chunking\n", + " # and this one is small enough that dense is not a huge overhead.\n", + " to_dense_X=True,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "7dc753bb-51a8-400b-9721-d39048b5de83", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "cells_adata.write_zarr(PATH_TO_OUTPUT_CELLS_ANNDATA_ZARR, chunks=[cells_adata.shape[0], VAR_CHUNK_SIZE])" + ] + }, + { + "cell_type": "markdown", + "id": "425f4c97-8ae1-4c77-b5a4-5c3f3d0c4cad", + "metadata": {}, + "source": [ + "## 1.2 Convert TSV to AnnData-Zarr" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "2d5a12ec-c121-4076-8357-89efac0d53fb", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/mkeller/anaconda3/envs/vitessce-tutorial-env/lib/python3.8/site-packages/anndata/_core/anndata.py:121: ImplicitModificationWarning: Transforming to str index.\n", + " warnings.warn(\"Transforming to str index.\", ImplicitModificationWarning)\n" + ] + } + ], + "source": [ + "molecules_df = pd.read_csv(PATH_TO_INPUT_MOLECULES_TSV, sep=\"\\t\", index_col=0)\n", + "obs_df = molecules_df[[MOLECULES_TSV_GENE_COL]].rename(columns={ MOLECULES_TSV_GENE_COL: \"gene\" })\n", + "xy_arr = molecules_df[[MOLECULES_TSV_X_COL, MOLECULES_TSV_Y_COL]].values\n", + "\n", + "molecules_adata = AnnData(\n", + " obs=obs_df,\n", + " obsm={ \"X_spatial\": xy_arr }\n", + ")\n", + "\n", + "molecules_adata = optimize_adata(\n", + " molecules_adata,\n", + " # TODO(template): Specify the columns and keys that will be used in the visualization.\n", + " obs_cols=[\"gene\"],\n", + " obsm_keys=[\"X_spatial\"],\n", + " remove_X=True\n", + ")\n", + "\n", + "molecules_adata.write_zarr(PATH_TO_OUTPUT_MOLECULES_ANNDATA_ZARR)" + ] + }, + { + "cell_type": "markdown", + "id": "3dcbd7eb-40f1-446e-be84-080cecc3e4b0", + "metadata": {}, + "source": [ + "## 1.2 Convert TIFFs to OME-TIFFs" + ] + }, + { + "cell_type": "markdown", + "id": "21182034-21ee-4d06-ac24-65924bf76e31", + "metadata": { + "tags": [] + }, + "source": [ + "### 1.2.1 Background image" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "8985aed2-9ee2-426a-b2de-b62d43254d89", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(2, 12917, 7932)" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "img_arr = imread(PATH_TO_INPUT_BACKGROUND_TIFF)\n", + "img_arr.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "52f3a50d-ab6a-4aa4-8984-38bdebf88a4d", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# If needed, update the array axes so they are in CYX order to enable conversion to OME-TIFF.\n", + "#img_arr = img_arr.transpose((2, 0, 1))\n", + "#img_arr.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "1acfdeb8-37fa-4a71-a0db-1854e544da57", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "multiplex_img_to_ome_tiff(img_arr, BACKGROUND_CHANNEL_NAMES, PATH_TO_OUTPUT_IMAGE_OME_TIFF, axes=\"CYX\")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "b346e9b5-e14a-401c-8ef5-e651e082508b", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "./processed_data/fish_based/image.ome.tif\n", + "OMETiffReader initializing ./processed_data/fish_based/image.ome.tif\n", + "Reading IFDs\n", + "Populating metadata\n", + "[OME-TIFF] -> ./processed_data/fish_based/image.pyramid.ome.tif [OME-TIFF]\n", + "Tile size = 512 x 512\n", + "\tConverted 1/2 planes (50%)\n", + "\tConverted 2/2 planes (100%)\n", + "Tile size = 512 x 512\n", + "\tConverted 1/2 planes (50%)\n", + "\tConverted 2/2 planes (100%)\n", + "Tile size = 512 x 512\n", + "\tConverted 1/2 planes (50%)\n", + "\tConverted 2/2 planes (100%)\n", + "Tile size = 512 x 512\n", + "\tConverted 1/2 planes (50%)\n", + "\tConverted 2/2 planes (100%)\n", + "Tile size = 495 x 512\n", + "\tConverted 2/2 planes (100%)\n", + "Tile size = 247 x 66\n", + "\tConverted 2/2 planes (100%)\n", + "[done]\n", + "34.427s elapsed (171.08333+2623.3333ms per plane, 864ms overhead)\n" + ] + } + ], + "source": [ + "# For larger images, you will want to comment out the above line and un-comment the line below,\n", + "# to increase the tile size (128 -> 512) and the number of pyramid resolutions (2 -> 6).\n", + "!BF_MAX_MEM=2048m ~/software/bftools/bfconvert -overwrite -tilex 512 -tiley 512 -pyramid-resolutions 6 -pyramid-scale 2 -compression LZW {PATH_TO_OUTPUT_IMAGE_OME_TIFF} {PATH_TO_OUTPUT_IMAGE_PYRAMIDAL_OME_TIFF}" + ] + }, + { + "cell_type": "markdown", + "id": "503fc7ec-6afe-4c4e-9835-f2f1c06ec0de", + "metadata": { + "tags": [] + }, + "source": [ + "### 1.2.2 Segmentation bitmask / label image" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "b7b747c9-7aa1-41f4-a1d6-3c87dfc2d424", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(1, 12917, 7932)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bitmask_arr = imread(PATH_TO_INPUT_BITMASK_TIFF)\n", + "bitmask_arr.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "afd20985-50fe-4058-a52e-25653816a527", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# If needed, update the array axes so they are in CYX order to enable conversion to OME-TIFF.\n", + "#bitmask_arr = bitmask_arr.transpose((2, 0, 1))\n", + "#bitmask_arr.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "d9c04175-6a7f-4403-9840-4fdb1ee1103f", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "multiplex_img_to_ome_tiff(bitmask_arr, BITMASK_CHANNEL_NAMES, PATH_TO_OUTPUT_BITMASK_OME_TIFF, axes=\"CYX\")" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "6dca683a-131f-4366-9f53-81497351771a", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "./processed_data/fish_based/bitmask.ome.tif\n", + "OMETiffReader initializing ./processed_data/fish_based/bitmask.ome.tif\n", + "Reading IFDs\n", + "Populating metadata\n", + "[OME-TIFF] -> ./processed_data/fish_based/bitmask.pyramid.ome.tif [OME-TIFF]\n", + "Tile size = 512 x 512\n", + "Reading IFDs\n", + "Populating metadata\n", + "\tConverted 1/1 planes (100%)\n", + "Tile size = 512 x 512\n", + "\tConverted 1/1 planes (100%)\n", + "Tile size = 512 x 512\n", + "\tConverted 1/1 planes (100%)\n", + "Tile size = 512 x 512\n", + "\tConverted 1/1 planes (100%)\n", + "Tile size = 495 x 512\n", + "\tConverted 1/1 planes (100%)\n", + "Tile size = 247 x 16\n", + "\tConverted 1/1 planes (100%)\n", + "[done]\n", + "102.176s elapsed (768.3333+16125.833ms per plane, 776ms overhead)\n" + ] + } + ], + "source": [ + "# For larger images, you will want to comment out the above line and un-comment the line below,\n", + "# to increase the tile size (128 -> 512) and the number of pyramid resolutions (2 -> 6).\n", + "!BF_MAX_MEM=2048m ~/software/bftools/bfconvert -overwrite -tilex 512 -tiley 512 -pyramid-resolutions 6 -pyramid-scale 2 -compression LZW {PATH_TO_OUTPUT_BITMASK_OME_TIFF} {PATH_TO_OUTPUT_BITMASK_PYRAMIDAL_OME_TIFF}" + ] + }, + { + "cell_type": "markdown", + "id": "8052199b-b6a1-4d59-8a05-e15662beb457", + "metadata": {}, + "source": [ + "## 2. Configure the visualization" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "b3d1baeb-de12-4cc3-92d1-d5009fa9e79c", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "vc = VitessceConfig(schema_version=\"1.0.15\", name=CONFIG_NAME, description=CONFIG_DESCRIPTION)\n", + "\n", + "dataset = vc.add_dataset(name=DATASET_NAME).add_object(AnnDataWrapper(\n", + " adata_path=PATH_TO_OUTPUT_CELLS_ANNDATA_ZARR,\n", + " # TODO(template): update the arrays of interest and where they are located in the AnnData object.\n", + " obs_embedding_paths=[\"obsm/X_umap\"],\n", + " obs_embedding_names=[\"UMAP\"],\n", + " obs_set_paths=[\"obs/Cluster\"],\n", + " obs_set_names=[\"Cluster\"],\n", + " obs_feature_matrix_path=\"X\",\n", + " coordination_values={\n", + " \"obsType\": \"cell\",\n", + " \"featureType\": \"gene\",\n", + " \"featureValueType\": \"expression\"\n", + " }\n", + ")).add_object(AnnDataWrapper(\n", + " adata_path=PATH_TO_OUTPUT_MOLECULES_ANNDATA_ZARR,\n", + " # TODO(template): update the arrays of interest and where they are located in the AnnData object.\n", + " obs_locations_path=\"obsm/X_spatial\",\n", + " obs_labels_path=\"obs/gene\",\n", + " coordination_values={\n", + " \"obsType\": \"molecule\"\n", + " }\n", + ")).add_object(MultiImageWrapper([\n", + " OmeTiffWrapper(\n", + " img_path=PATH_TO_OUTPUT_BITMASK_PYRAMIDAL_OME_TIFF,\n", + " name=\"Cell segmentations\",\n", + " is_bitmask=True\n", + " ),\n", + " OmeTiffWrapper(\n", + " img_path=PATH_TO_OUTPUT_IMAGE_PYRAMIDAL_OME_TIFF,\n", + " name=\"Background image\",\n", + " is_bitmask=False\n", + " ),\n", + "], use_physical_size_scaling=True))\n", + "\n", + "# TODO(template): Update the views of interest.\n", + "spatial_colored_by_cluster = vc.add_view(cm.SPATIAL, dataset=dataset)\n", + "# TODO: update the mapping to match one of the elements of `obs_embedding_names` above, if necessary.\n", + "scatterplot = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"UMAP\")\n", + "\n", + "layer_controller = vc.add_view(cm.LAYER_CONTROLLER, dataset=dataset).set_props(disableChannelsIfRgbDetected=True)\n", + "spot_set_manager = vc.add_view(cm.OBS_SETS, dataset=dataset)\n", + "gene_list = vc.add_view(cm.FEATURE_LIST, dataset=dataset)\n", + "heatmap = vc.add_view(cm.HEATMAP, dataset=dataset).set_props(transpose=True)\n", + "\n", + "# TODO(template): Update the layout of views.\n", + "vc.layout(\n", + " (spatial_colored_by_cluster | scatterplot)\n", + " / ((layer_controller | spot_set_manager) | (gene_list | heatmap))\n", + ");" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "c07cb582-6c3d-45d7-937b-e54bd8fc93d8", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# TODO(template): configure view coordinations and initial coordination values\n", + "spatial_views = [\n", + " spatial_colored_by_cluster,\n", + " layer_controller,\n", + "]\n", + "all_views = [\n", + " *spatial_views,\n", + " spot_set_manager,\n", + " gene_list,\n", + " heatmap,\n", + " scatterplot,\n", + "]\n", + "\n", + "spatial_point_layer_value = {\n", + " \"opacity\": 1,\n", + " \"radius\": 20,\n", + " \"visible\": True\n", + "}\n", + "\n", + "spatial_segmentation_layer_value = [{\n", + " \"type\": \"bitmask\",\n", + " \"visible\": True,\n", + " \"index\": 0,\n", + " \"colormap\": None,\n", + " \"transparentColor\": None,\n", + " \"opacity\": 1,\n", + " \"domainType\": \"Min/Max\",\n", + " \"channels\": [\n", + " {\n", + " \"selection\": { \"c\": 0, \"t\": 0, \"z\": 0 },\n", + " \"color\": [255, 0, 0],\n", + " \"visible\": True,\n", + " \"slider\": [0, 1]\n", + " }\n", + " ]\n", + "}]\n", + "\n", + "spatial_image_layer_value = [{\n", + " \"type\": \"raster\",\n", + " \"index\": 0,\n", + " \"colormap\": None,\n", + " \"transparentColor\": None,\n", + " \"opacity\": 1,\n", + " \"domainType\": \"Min/Max\",\n", + " \"channels\": [\n", + " {\n", + " \"selection\": { \"c\": 0, \"t\": 0, \"z\": 0 },\n", + " \"color\": [255, 0, 0],\n", + " \"visible\": True,\n", + " \"slider\": [0, 255]\n", + " },\n", + " {\n", + " \"selection\": { \"c\": 1, \"t\": 0, \"z\": 0 },\n", + " \"color\": [0, 255, 0],\n", + " \"visible\": True,\n", + " \"slider\": [0, 255]\n", + " },\n", + " {\n", + " \"selection\": { \"c\": 2, \"t\": 0, \"z\": 0 },\n", + " \"color\": [0, 0, 255],\n", + " \"visible\": True,\n", + " \"slider\": [0, 255]\n", + " }\n", + " ]\n", + "}]\n", + "\n", + "vc.link_views(spatial_views, [ct.SPATIAL_IMAGE_LAYER, ct.SPATIAL_SEGMENTATION_LAYER, ct.SPATIAL_POINT_LAYER], [spatial_image_layer_value, spatial_segmentation_layer_value, spatial_point_layer_value])\n" + ] + }, + { + "cell_type": "markdown", + "id": "098add3f-634e-492e-918b-eacf24598a1e", + "metadata": {}, + "source": [ + "### Render the widget" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "0b62f4fe-ad8b-458b-9cfe-a54a647138a1", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'http://vitessce.io/#?theme=light&url=data:,%7B%22version%22%3A+%221.0.15%22%2C+%22name%22%3A+%22My+config%22%2C+%22description%22%3A+%22This+dataset+reveals...%22%2C+%22datasets%22%3A+%5B%7B%22uid%22%3A+%22A%22%2C+%22name%22%3A+%22My+dataset%22%2C+%22files%22%3A+%5B%7B%22fileType%22%3A+%22anndata.zarr%22%2C+%22url%22%3A+%22http%3A%2F%2Flocalhost%3A8000%2FA%2F0%2F44170ceb-30d8-49bb-81a2-2a1fec250bea%22%2C+%22options%22%3A+%7B%22obsEmbedding%22%3A+%5B%7B%22path%22%3A+%22obsm%2FX_umap%22%2C+%22dims%22%3A+%5B0%2C+1%5D%2C+%22embeddingType%22%3A+%22UMAP%22%7D%5D%2C+%22obsSets%22%3A+%5B%7B%22name%22%3A+%22Cluster%22%2C+%22path%22%3A+%22obs%2FCluster%22%7D%5D%2C+%22obsFeatureMatrix%22%3A+%7B%22path%22%3A+%22X%22%7D%7D%2C+%22coordinationValues%22%3A+%7B%22obsType%22%3A+%22cell%22%2C+%22featureType%22%3A+%22gene%22%2C+%22featureValueType%22%3A+%22expression%22%7D%7D%2C+%7B%22fileType%22%3A+%22anndata.zarr%22%2C+%22url%22%3A+%22http%3A%2F%2Flocalhost%3A8000%2FA%2F1%2Ff514c617-14fd-40a5-b43c-dc8f4e1ff454%22%2C+%22options%22%3A+%7B%22obsLocations%22%3A+%7B%22path%22%3A+%22obsm%2FX_spatial%22%7D%2C+%22obsLabels%22%3A+%7B%22path%22%3A+%22obs%2Fgene%22%7D%7D%2C+%22coordinationValues%22%3A+%7B%22obsType%22%3A+%22molecule%22%7D%7D%2C+%7B%22fileType%22%3A+%22raster.json%22%2C+%22options%22%3A+%7B%22schemaVersion%22%3A+%220.0.2%22%2C+%22usePhysicalSizeScaling%22%3A+true%2C+%22images%22%3A+%5B%7B%22name%22%3A+%22Cell+segmentations%22%2C+%22type%22%3A+%22ome-tiff%22%2C+%22url%22%3A+%22http%3A%2F%2Flocalhost%3A8000%2FA%2F2%2F4ecb04d7-dfa2-488f-8b8b-55688ea7d188%22%2C+%22metadata%22%3A+%7B%22omeTiffOffsetsUrl%22%3A+%22http%3A%2F%2Flocalhost%3A8000%2FA%2F2%2Fb29f0a86-2809-4fe2-9ade-b7f5b8b9e11f%22%2C+%22isBitmask%22%3A+true%7D%7D%2C+%7B%22name%22%3A+%22Background+image%22%2C+%22type%22%3A+%22ome-tiff%22%2C+%22url%22%3A+%22http%3A%2F%2Flocalhost%3A8000%2FA%2F2%2Fbc72944b-f9ee-43e4-b88f-90779234b39a%22%2C+%22metadata%22%3A+%7B%22omeTiffOffsetsUrl%22%3A+%22http%3A%2F%2Flocalhost%3A8000%2FA%2F2%2F49368b36-6741-4530-bfc2-241630a81ef4%22%2C+%22isBitmask%22%3A+false%7D%7D%5D%2C+%22renderLayers%22%3A+%5B%22Cell+segmentations%22%2C+%22Background+image%22%5D%7D%7D%5D%7D%5D%2C+%22coordinationSpace%22%3A+%7B%22dataset%22%3A+%7B%22A%22%3A+%22A%22%7D%2C+%22embeddingType%22%3A+%7B%22A%22%3A+%22UMAP%22%7D%2C+%22spatialImageLayer%22%3A+%7B%22A%22%3A+%5B%7B%22type%22%3A+%22raster%22%2C+%22index%22%3A+0%2C+%22colormap%22%3A+null%2C+%22transparentColor%22%3A+null%2C+%22opacity%22%3A+1%2C+%22domainType%22%3A+%22Min%2FMax%22%2C+%22channels%22%3A+%5B%7B%22selection%22%3A+%7B%22c%22%3A+0%2C+%22t%22%3A+0%2C+%22z%22%3A+0%7D%2C+%22color%22%3A+%5B255%2C+0%2C+0%5D%2C+%22visible%22%3A+true%2C+%22slider%22%3A+%5B0%2C+255%5D%7D%2C+%7B%22selection%22%3A+%7B%22c%22%3A+1%2C+%22t%22%3A+0%2C+%22z%22%3A+0%7D%2C+%22color%22%3A+%5B0%2C+255%2C+0%5D%2C+%22visible%22%3A+true%2C+%22slider%22%3A+%5B0%2C+255%5D%7D%2C+%7B%22selection%22%3A+%7B%22c%22%3A+2%2C+%22t%22%3A+0%2C+%22z%22%3A+0%7D%2C+%22color%22%3A+%5B0%2C+0%2C+255%5D%2C+%22visible%22%3A+true%2C+%22slider%22%3A+%5B0%2C+255%5D%7D%5D%7D%5D%7D%2C+%22spatialSegmentationLayer%22%3A+%7B%22A%22%3A+%5B%7B%22type%22%3A+%22bitmask%22%2C+%22visible%22%3A+true%2C+%22index%22%3A+0%2C+%22colormap%22%3A+null%2C+%22transparentColor%22%3A+null%2C+%22opacity%22%3A+1%2C+%22domainType%22%3A+%22Min%2FMax%22%2C+%22channels%22%3A+%5B%7B%22selection%22%3A+%7B%22c%22%3A+0%2C+%22t%22%3A+0%2C+%22z%22%3A+0%7D%2C+%22color%22%3A+%5B255%2C+0%2C+0%5D%2C+%22visible%22%3A+true%2C+%22slider%22%3A+%5B0%2C+1%5D%7D%5D%7D%5D%7D%2C+%22spatialPointLayer%22%3A+%7B%22A%22%3A+%7B%22opacity%22%3A+1%2C+%22radius%22%3A+20%2C+%22visible%22%3A+true%7D%7D%7D%2C+%22layout%22%3A+%5B%7B%22component%22%3A+%22spatial%22%2C+%22coordinationScopes%22%3A+%7B%22dataset%22%3A+%22A%22%2C+%22spatialImageLayer%22%3A+%22A%22%2C+%22spatialSegmentationLayer%22%3A+%22A%22%2C+%22spatialPointLayer%22%3A+%22A%22%7D%2C+%22x%22%3A+0.0%2C+%22y%22%3A+0.0%2C+%22w%22%3A+6.0%2C+%22h%22%3A+6.0%7D%2C+%7B%22component%22%3A+%22scatterplot%22%2C+%22coordinationScopes%22%3A+%7B%22dataset%22%3A+%22A%22%2C+%22embeddingType%22%3A+%22A%22%7D%2C+%22x%22%3A+6.0%2C+%22y%22%3A+0.0%2C+%22w%22%3A+6.0%2C+%22h%22%3A+6.0%7D%2C+%7B%22component%22%3A+%22layerController%22%2C+%22coordinationScopes%22%3A+%7B%22dataset%22%3A+%22A%22%2C+%22spatialImageLayer%22%3A+%22A%22%2C+%22spatialSegmentationLayer%22%3A+%22A%22%2C+%22spatialPointLayer%22%3A+%22A%22%7D%2C+%22x%22%3A+0.0%2C+%22y%22%3A+6.0%2C+%22w%22%3A+3.0%2C+%22h%22%3A+6.0%2C+%22props%22%3A+%7B%22disableChannelsIfRgbDetected%22%3A+true%7D%7D%2C+%7B%22component%22%3A+%22obsSets%22%2C+%22coordinationScopes%22%3A+%7B%22dataset%22%3A+%22A%22%7D%2C+%22x%22%3A+3.0%2C+%22y%22%3A+6.0%2C+%22w%22%3A+3.0%2C+%22h%22%3A+6.0%7D%2C+%7B%22component%22%3A+%22featureList%22%2C+%22coordinationScopes%22%3A+%7B%22dataset%22%3A+%22A%22%7D%2C+%22x%22%3A+6.0%2C+%22y%22%3A+6.0%2C+%22w%22%3A+3.0%2C+%22h%22%3A+6.0%7D%2C+%7B%22component%22%3A+%22heatmap%22%2C+%22coordinationScopes%22%3A+%7B%22dataset%22%3A+%22A%22%7D%2C+%22x%22%3A+9.0%2C+%22y%22%3A+6.0%2C+%22w%22%3A+3.0%2C+%22h%22%3A+6.0%2C+%22props%22%3A+%7B%22transpose%22%3A+true%7D%7D%5D%2C+%22initStrategy%22%3A+%22auto%22%7D'" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vc.web_app()" + ] + }, + { + "cell_type": "markdown", + "id": "da6e4cb6-5c55-4839-8b7a-ca13c6e319ad", + "metadata": {}, + "source": [ + "## 3. Export the configuration and data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5fea1850-7098-44ce-823b-30b77ab84b3b", + "metadata": {}, + "outputs": [], + "source": [ + "# TODO(template): The export function does not clear the contents of the `out_dir`.\n", + "# You may want to ensure that this folder does not yet exist:\n", + "os.makedirs(PATH_TO_EXPORT_DIRECTORY, exist_ok=False)\n", + "\n", + "config_dict = vc.export(to=\"files\", base_url=BASE_URL_PLACEHOLDER, out_dir=PATH_TO_EXPORT_DIRECTORY)\n", + "\n", + "# Use `open` to create a new empty file at ./exported_data/vitessce.json\n", + "with open(join(PATH_TO_EXPORT_DIRECTORY, \"vitessce.json\"), \"w\") as f:\n", + " json.dump(config_dict, f)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From ebb7234bd3ab552392fc2b18c9d9583fa7d1e132 Mon Sep 17 00:00:00 2001 From: mkeller <7525285+keller-mark@users.noreply.github.com> Date: Fri, 31 Mar 2023 16:44:00 -0400 Subject: [PATCH 2/2] Remove raw data processing --- templates/fish-based.ipynb | 486 +------------------------------------ 1 file changed, 2 insertions(+), 484 deletions(-) diff --git a/templates/fish-based.ipynb b/templates/fish-based.ipynb index e9fa454..ff65087 100644 --- a/templates/fish-based.ipynb +++ b/templates/fish-based.ipynb @@ -1,487 +1,5 @@ { "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "e819ce7a-fb23-4c0a-8efd-1b7cdcb552e3", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# TEMP: download/construct \"raw\" dataset for a complete example" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "4b0ace49-3c42-4c11-b3e3-198d5c2dba6f", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "from ome_zarr.io import parse_url\n", - "from ome_zarr.reader import Reader\n", - "\n", - "from tifffile import imwrite\n", - "from os.path import join\n", - "import zarr\n", - "from anndata import read_zarr, AnnData\n", - "import numpy as np\n", - "import pandas as pd\n", - "from skimage.draw import polygon2mask\n", - "import tqdm" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "a2a4b8f0-870f-4bbb-8724-437cd79638c3", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# Adapting http://vitessce.io/#?dataset=codeluppi-2018-via-zarr" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "c6ab9d1a-6dbe-4d62-89ca-be41415f369b", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "cells_adata_zarr = \"data/raw/codeluppi_2018_nature_methods.cells.h5ad.zarr\"\n", - "cells_csv = \"https://s3.amazonaws.com/vitessce-data/0.0.33/main/codeluppi-2018/codeluppi_2018_nature_methods.cells.csv\"\n", - "molecules_adata_zarr = \"https://s3.amazonaws.com/vitessce-data/0.0.33/main/codeluppi-2018-via-zarr/codeluppi_2018_nature_methods.molecules.h5ad.zarr\"\n", - "molecules_csv = \"https://s3.amazonaws.com/vitessce-data/0.0.33/main/codeluppi-2018/codeluppi_2018_nature_methods.molecules.csv\"\n", - "background_ome_zarr = \"https://vitessce-data.storage.googleapis.com/0.0.34/main/codeluppi-2018/codeluppi_2018_nature_methods.image.ome.zarr\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2d3f3bc8-1d31-41ac-ac02-65929ca4afdb", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# Convert OME-Zarr to normal TIFF" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e3d5309e-1b02-4ffd-b943-57f449d5da49", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# read the image data\n", - "store = parse_url(background_ome_zarr, mode=\"r\")\n", - "reader = Reader(store)\n", - "# nodes may include images, labels etc\n", - "nodes = list(reader())\n", - "# first node will be the image pixel data\n", - "image_node = nodes[0]\n", - "\n", - "dask_data = image_node.data\n", - "\n", - "hires_arr = dask_data[2]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "32b87d3c-b18a-407a-b0b0-6ec88cedf199", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "RAW_DIR = join(\"data\", \"raw\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b591bf59-3f68-4d56-b0f1-669e04125c26", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "imwrite(\n", - " join(RAW_DIR, 'codeluppi_2018_nature_methods.image.tiff'),\n", - " hires_arr,\n", - " photometric='minisblack',\n", - " metadata={'axes': 'CYX'}\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4487ebfd-4480-48dc-8b7d-4a6c6eeab957", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# Convert cells.zarr/obsm/X_segmentations to a bitmask TIFF" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "00b5563b-e70e-4675-a94a-338c56ba1c27", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "bitmask_shape = (hires_arr.shape[2], hires_arr.shape[1])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cfea1161-616b-425e-942d-b19232036d96", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "bitmask_shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c9d6800f-0119-4b5f-8eb8-a009b214387c", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "bitmask_arr = np.zeros(bitmask_shape)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "16d6ac9b-a349-43a8-935c-f7c6bec743cf", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "cells_store = zarr.open(cells_adata_zarr)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c5e47371-49b4-44cc-85a8-223b106ffc10", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "segmentations_arr = cells_store['obsm/X_segmentations'][()]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7587becd-9cf1-47ac-b412-2cea40c2a298", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "segmentations_arr.shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c311d30e-60c4-43ea-84f4-ac0c6add5516", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "for i in tqdm.tqdm(range(segmentations_arr.shape[0])):\n", - " # Re-scale the vertices so they match the background scale\n", - " vertices = np.multiply(segmentations_arr[i], 0.25)\n", - " mask = polygon2mask(bitmask_shape, vertices)\n", - " bitmask_arr[mask] = i+1" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9625735f-98fe-4487-8f4e-7322ccbc1ae8", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# Update the array axes so they are in CYX order to enable conversion to OME-TIFF.\n", - "bitmask_arr = bitmask_arr.transpose((1, 0)) # (y, x)\n", - "bitmask_arr = bitmask_arr[np.newaxis, :] # (c, y, x)\n", - "bitmask_arr.shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ae851409-4be2-4878-9b8a-0af18b211d1f", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "imwrite(\n", - " join(RAW_DIR, 'codeluppi_2018_nature_methods.bitmask.tiff'),\n", - " bitmask_arr,\n", - " photometric='minisblack',\n", - " metadata={'axes': 'CYX'}\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d600e4a6-6445-45ec-a6e2-57b12674fd5b", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# Convert molecules.zarr to TSV (one row per molecule)" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "e24cfd60-3574-4f6a-9109-1f3b8da8c1e6", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "molecules_store = zarr.open(molecules_adata_zarr)" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "85350784-b365-4766-8796-4420cb1b9251", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "positions_arr = molecules_store['obsm/X_spatial'][()]\n", - "gene_cats_arr = molecules_store['obs/__categories/Gene'][()]\n", - "gene_codes_arr = molecules_store['obs/Gene'][()]\n", - "genes_arr = [gene_cats_arr[cat_i] for cat_i in gene_codes_arr]" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "3d3a7763-f600-4209-9c8d-9ab7fee3ac2e", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "(1976659, 2)" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "positions_arr.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "76b7ec3d-b11d-4599-b6a0-ba637c488f2d", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "molecules_df = pd.DataFrame(index=range(positions_arr.shape[0]))\n", - "molecules_df[\"x\"] = np.multiply(positions_arr[:, 0], 0.25)\n", - "molecules_df[\"y\"] = np.multiply(positions_arr[:, 1], 0.25)\n", - "molecules_df[\"gene\"] = genes_arr" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "c384a702-57de-4072-9333-4b7b76a73a7d", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
xygene
04553.755013.00Acta2_Hybridization5
16229.004253.75Acta2_Hybridization5
22100.0011130.50Acta2_Hybridization5
35666.755802.00Acta2_Hybridization5
41584.009388.75Acta2_Hybridization5
\n", - "
" - ], - "text/plain": [ - " x y gene\n", - "0 4553.75 5013.00 Acta2_Hybridization5\n", - "1 6229.00 4253.75 Acta2_Hybridization5\n", - "2 2100.00 11130.50 Acta2_Hybridization5\n", - "3 5666.75 5802.00 Acta2_Hybridization5\n", - "4 1584.00 9388.75 Acta2_Hybridization5" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "molecules_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "331bc482-87c1-43b9-8109-a6e3409d2160", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "molecules_df.index = molecules_df.index.rename(\"index\")" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "07ed3119-3dea-4812-ae41-b617070c9c53", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "molecules_df.to_csv(join(\"data\", \"raw\", \"codeluppi_2018_nature_methods.molecules.tsv\"), sep=\"\\t\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "592bd5ee-e41b-4b72-b9ce-fdc68c0ec173", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# Convert cells.h5ad.zarr to cells.h5ad" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "268e1187-34d5-43a1-95e0-2fa31d3ee7aa", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "cells_adata = read_zarr(cells_adata_zarr)\n", - "cells_adata" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3b6d2e8e-f923-4ff7-87c1-9912fab04ca2", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "cells_adata.write(join(\"data\", \"raw\", \"codeluppi_2018_nature_methods.cells.h5ad\"))" - ] - }, { "cell_type": "markdown", "id": "4b2a05b1-3bdc-4e4b-84c3-321b83f8e10e", @@ -572,11 +90,11 @@ "BITMASK_CHANNEL_NAMES = [\"cells\"]\n", "\n", "#=== OUTPUTS ===\n", - "# TODO(template)\n", + "# TODO(template): specify file paths for the converted AnnData objects\n", "PATH_TO_OUTPUT_CELLS_ANNDATA_ZARR = join('.', 'processed_data', 'fish_based', 'output.cells.anndata.zarr')\n", "PATH_TO_OUTPUT_MOLECULES_ANNDATA_ZARR = join('.', 'processed_data', 'fish_based', 'output.molecules.anndata.zarr')\n", "\n", - "# TODO(template): specify some file paths for the converted OME-TIFF image and bitmask files\n", + "# TODO(template): specify file paths for the converted OME-TIFF image and bitmask files\n", "PATH_TO_OUTPUT_IMAGE_OME_TIFF = join('.', 'processed_data', 'fish_based', 'image.ome.tif')\n", "PATH_TO_OUTPUT_IMAGE_PYRAMIDAL_OME_TIFF = join('.', 'processed_data', 'fish_based', 'image.pyramid.ome.tif')\n", "PATH_TO_OUTPUT_BITMASK_OME_TIFF = join('.', 'processed_data', 'fish_based', 'bitmask.ome.tif')\n",