From 902cfbd90ac40358652a60d251c288b3942d0934 Mon Sep 17 00:00:00 2001
From: mkeller <7525285+keller-mark@users.noreply.github.com>
Date: Fri, 31 Mar 2023 16:38:08 -0400
Subject: [PATCH 1/2] FISH-based data template
---
templates/fish-based.ipynb | 1332 ++++++++++++++++++++++++++++++++++++
1 file changed, 1332 insertions(+)
create mode 100644 templates/fish-based.ipynb
diff --git a/templates/fish-based.ipynb b/templates/fish-based.ipynb
new file mode 100644
index 0000000..e9fa454
--- /dev/null
+++ b/templates/fish-based.ipynb
@@ -0,0 +1,1332 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e819ce7a-fb23-4c0a-8efd-1b7cdcb552e3",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# TEMP: download/construct \"raw\" dataset for a complete example"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "4b0ace49-3c42-4c11-b3e3-198d5c2dba6f",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "from ome_zarr.io import parse_url\n",
+ "from ome_zarr.reader import Reader\n",
+ "\n",
+ "from tifffile import imwrite\n",
+ "from os.path import join\n",
+ "import zarr\n",
+ "from anndata import read_zarr, AnnData\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "from skimage.draw import polygon2mask\n",
+ "import tqdm"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "a2a4b8f0-870f-4bbb-8724-437cd79638c3",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# Adapting http://vitessce.io/#?dataset=codeluppi-2018-via-zarr"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "c6ab9d1a-6dbe-4d62-89ca-be41415f369b",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "cells_adata_zarr = \"data/raw/codeluppi_2018_nature_methods.cells.h5ad.zarr\"\n",
+ "cells_csv = \"https://s3.amazonaws.com/vitessce-data/0.0.33/main/codeluppi-2018/codeluppi_2018_nature_methods.cells.csv\"\n",
+ "molecules_adata_zarr = \"https://s3.amazonaws.com/vitessce-data/0.0.33/main/codeluppi-2018-via-zarr/codeluppi_2018_nature_methods.molecules.h5ad.zarr\"\n",
+ "molecules_csv = \"https://s3.amazonaws.com/vitessce-data/0.0.33/main/codeluppi-2018/codeluppi_2018_nature_methods.molecules.csv\"\n",
+ "background_ome_zarr = \"https://vitessce-data.storage.googleapis.com/0.0.34/main/codeluppi-2018/codeluppi_2018_nature_methods.image.ome.zarr\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2d3f3bc8-1d31-41ac-ac02-65929ca4afdb",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# Convert OME-Zarr to normal TIFF"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e3d5309e-1b02-4ffd-b943-57f449d5da49",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# read the image data\n",
+ "store = parse_url(background_ome_zarr, mode=\"r\")\n",
+ "reader = Reader(store)\n",
+ "# nodes may include images, labels etc\n",
+ "nodes = list(reader())\n",
+ "# first node will be the image pixel data\n",
+ "image_node = nodes[0]\n",
+ "\n",
+ "dask_data = image_node.data\n",
+ "\n",
+ "hires_arr = dask_data[2]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "32b87d3c-b18a-407a-b0b0-6ec88cedf199",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "RAW_DIR = join(\"data\", \"raw\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b591bf59-3f68-4d56-b0f1-669e04125c26",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "imwrite(\n",
+ " join(RAW_DIR, 'codeluppi_2018_nature_methods.image.tiff'),\n",
+ " hires_arr,\n",
+ " photometric='minisblack',\n",
+ " metadata={'axes': 'CYX'}\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4487ebfd-4480-48dc-8b7d-4a6c6eeab957",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# Convert cells.zarr/obsm/X_segmentations to a bitmask TIFF"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "00b5563b-e70e-4675-a94a-338c56ba1c27",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "bitmask_shape = (hires_arr.shape[2], hires_arr.shape[1])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "cfea1161-616b-425e-942d-b19232036d96",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "bitmask_shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c9d6800f-0119-4b5f-8eb8-a009b214387c",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "bitmask_arr = np.zeros(bitmask_shape)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "16d6ac9b-a349-43a8-935c-f7c6bec743cf",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "cells_store = zarr.open(cells_adata_zarr)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c5e47371-49b4-44cc-85a8-223b106ffc10",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "segmentations_arr = cells_store['obsm/X_segmentations'][()]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7587becd-9cf1-47ac-b412-2cea40c2a298",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "segmentations_arr.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c311d30e-60c4-43ea-84f4-ac0c6add5516",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "for i in tqdm.tqdm(range(segmentations_arr.shape[0])):\n",
+ " # Re-scale the vertices so they match the background scale\n",
+ " vertices = np.multiply(segmentations_arr[i], 0.25)\n",
+ " mask = polygon2mask(bitmask_shape, vertices)\n",
+ " bitmask_arr[mask] = i+1"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "9625735f-98fe-4487-8f4e-7322ccbc1ae8",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# Update the array axes so they are in CYX order to enable conversion to OME-TIFF.\n",
+ "bitmask_arr = bitmask_arr.transpose((1, 0)) # (y, x)\n",
+ "bitmask_arr = bitmask_arr[np.newaxis, :] # (c, y, x)\n",
+ "bitmask_arr.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ae851409-4be2-4878-9b8a-0af18b211d1f",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "imwrite(\n",
+ " join(RAW_DIR, 'codeluppi_2018_nature_methods.bitmask.tiff'),\n",
+ " bitmask_arr,\n",
+ " photometric='minisblack',\n",
+ " metadata={'axes': 'CYX'}\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d600e4a6-6445-45ec-a6e2-57b12674fd5b",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# Convert molecules.zarr to TSV (one row per molecule)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "e24cfd60-3574-4f6a-9109-1f3b8da8c1e6",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "molecules_store = zarr.open(molecules_adata_zarr)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "85350784-b365-4766-8796-4420cb1b9251",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "positions_arr = molecules_store['obsm/X_spatial'][()]\n",
+ "gene_cats_arr = molecules_store['obs/__categories/Gene'][()]\n",
+ "gene_codes_arr = molecules_store['obs/Gene'][()]\n",
+ "genes_arr = [gene_cats_arr[cat_i] for cat_i in gene_codes_arr]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "3d3a7763-f600-4209-9c8d-9ab7fee3ac2e",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(1976659, 2)"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "positions_arr.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "76b7ec3d-b11d-4599-b6a0-ba637c488f2d",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "molecules_df = pd.DataFrame(index=range(positions_arr.shape[0]))\n",
+ "molecules_df[\"x\"] = np.multiply(positions_arr[:, 0], 0.25)\n",
+ "molecules_df[\"y\"] = np.multiply(positions_arr[:, 1], 0.25)\n",
+ "molecules_df[\"gene\"] = genes_arr"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "c384a702-57de-4072-9333-4b7b76a73a7d",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " x | \n",
+ " y | \n",
+ " gene | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 4553.75 | \n",
+ " 5013.00 | \n",
+ " Acta2_Hybridization5 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 6229.00 | \n",
+ " 4253.75 | \n",
+ " Acta2_Hybridization5 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 2100.00 | \n",
+ " 11130.50 | \n",
+ " Acta2_Hybridization5 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 5666.75 | \n",
+ " 5802.00 | \n",
+ " Acta2_Hybridization5 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 1584.00 | \n",
+ " 9388.75 | \n",
+ " Acta2_Hybridization5 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " x y gene\n",
+ "0 4553.75 5013.00 Acta2_Hybridization5\n",
+ "1 6229.00 4253.75 Acta2_Hybridization5\n",
+ "2 2100.00 11130.50 Acta2_Hybridization5\n",
+ "3 5666.75 5802.00 Acta2_Hybridization5\n",
+ "4 1584.00 9388.75 Acta2_Hybridization5"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "molecules_df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "331bc482-87c1-43b9-8109-a6e3409d2160",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "molecules_df.index = molecules_df.index.rename(\"index\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "id": "07ed3119-3dea-4812-ae41-b617070c9c53",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "molecules_df.to_csv(join(\"data\", \"raw\", \"codeluppi_2018_nature_methods.molecules.tsv\"), sep=\"\\t\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "592bd5ee-e41b-4b72-b9ce-fdc68c0ec173",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# Convert cells.h5ad.zarr to cells.h5ad"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "268e1187-34d5-43a1-95e0-2fa31d3ee7aa",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "cells_adata = read_zarr(cells_adata_zarr)\n",
+ "cells_adata"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3b6d2e8e-f923-4ff7-87c1-9912fab04ca2",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "cells_adata.write(join(\"data\", \"raw\", \"codeluppi_2018_nature_methods.cells.h5ad\"))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4b2a05b1-3bdc-4e4b-84c3-321b83f8e10e",
+ "metadata": {},
+ "source": [
+ "# Template: visualize FISH-based data with Vitessce"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a73738aa-5f47-41ab-8ff6-503e0abf3657",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "## Code to change\n",
+ "\n",
+ "Places where you will need to edit the code are marked by `# TODO(template)` comments."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "89b4c5b8-bce9-4dfb-b9c4-f227fa0ae231",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import json\n",
+ "from os.path import join\n",
+ "from vitessce import (\n",
+ " VitessceConfig,\n",
+ " Component as cm,\n",
+ " CoordinationType as ct,\n",
+ " FileType as ft,\n",
+ " AnnDataWrapper,\n",
+ " OmeTiffWrapper,\n",
+ " MultiImageWrapper,\n",
+ " BASE_URL_PLACEHOLDER,\n",
+ ")\n",
+ "from vitessce.data_utils import (\n",
+ " rgb_img_to_ome_tiff,\n",
+ " multiplex_img_to_ome_tiff,\n",
+ " optimize_adata,\n",
+ " VAR_CHUNK_SIZE,\n",
+ ")\n",
+ "from anndata import read_h5ad, AnnData\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "from tifffile import imread\n",
+ "from skimage.draw import disk"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "40c0b1be-6a8f-4515-82fe-84973956336e",
+ "metadata": {},
+ "source": [
+ "## Variables to fill in"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "26a63bc6-dd16-4516-8fa1-5c160bb62a6c",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "#=== INPUTS ===\n",
+ "# TODO(template): specify the path to the .h5ad file containing cells and per-cell metadata\n",
+ "PATH_TO_INPUT_CELLS_H5AD = join('.', 'data', 'raw', 'codeluppi_2018_nature_methods.cells.h5ad')\n",
+ "\n",
+ "# TODO(template): specify the path to the .tsv file containing molecule x-y coordinates\n",
+ "PATH_TO_INPUT_MOLECULES_TSV = join('.', 'data', 'raw', 'codeluppi_2018_nature_methods.molecules.tsv')\n",
+ "MOLECULES_TSV_X_COL = \"x\"\n",
+ "MOLECULES_TSV_Y_COL = \"y\"\n",
+ "MOLECULES_TSV_GENE_COL = \"gene\"\n",
+ "\n",
+ "# TODO(template): specify the file path for the input background TIFF image\n",
+ "PATH_TO_INPUT_BACKGROUND_TIFF = join('.', 'data', 'raw', 'codeluppi_2018_nature_methods.image.tiff')\n",
+ "BACKGROUND_CHANNEL_NAMES = ['nuclei', 'polyT']\n",
+ "# TODO(template): specify the file path for the input cell segmentation bitmask TIFF image (assumed to have the same XY dimensions as the background image)\n",
+ "PATH_TO_INPUT_BITMASK_TIFF = join('.', 'data', 'raw', 'codeluppi_2018_nature_methods.bitmask.tiff')\n",
+ "BITMASK_CHANNEL_NAMES = [\"cells\"]\n",
+ "\n",
+ "#=== OUTPUTS ===\n",
+ "# TODO(template)\n",
+ "PATH_TO_OUTPUT_CELLS_ANNDATA_ZARR = join('.', 'processed_data', 'fish_based', 'output.cells.anndata.zarr')\n",
+ "PATH_TO_OUTPUT_MOLECULES_ANNDATA_ZARR = join('.', 'processed_data', 'fish_based', 'output.molecules.anndata.zarr')\n",
+ "\n",
+ "# TODO(template): specify some file paths for the converted OME-TIFF image and bitmask files\n",
+ "PATH_TO_OUTPUT_IMAGE_OME_TIFF = join('.', 'processed_data', 'fish_based', 'image.ome.tif')\n",
+ "PATH_TO_OUTPUT_IMAGE_PYRAMIDAL_OME_TIFF = join('.', 'processed_data', 'fish_based', 'image.pyramid.ome.tif')\n",
+ "PATH_TO_OUTPUT_BITMASK_OME_TIFF = join('.', 'processed_data', 'fish_based', 'bitmask.ome.tif')\n",
+ "PATH_TO_OUTPUT_BITMASK_PYRAMIDAL_OME_TIFF = join('.', 'processed_data', 'fish_based', 'bitmask.pyramid.ome.tif')\n",
+ "\n",
+ "\n",
+ "# TODO(template): this folder should not yet exist, but will be created in step 3.\n",
+ "PATH_TO_EXPORT_DIRECTORY = join('.', 'exported_fish_based_data')\n",
+ "\n",
+ "# TODO(template): provide names and descriptions\n",
+ "CONFIG_NAME = 'My config'\n",
+ "CONFIG_DESCRIPTION = 'This dataset reveals...'\n",
+ "DATASET_NAME = 'My dataset'\n",
+ "IMG_NAME = 'My image'"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6a6fb045-ca75-45c8-9939-119b8e5d2a5f",
+ "metadata": {},
+ "source": [
+ "## 1.1 Convert H5AD to AnnData-Zarr"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "d9ec5b65-180c-4a64-a4cb-8b259261161e",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "cells_adata = read_h5ad(PATH_TO_INPUT_CELLS_H5AD)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "254e4711",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Cluster | \n",
+ " Subcluster | \n",
+ " Region | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 1 | \n",
+ " Inhibitory neurons | \n",
+ " Inhibitory CP | \n",
+ " Layer 6 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " Inhibitory neurons | \n",
+ " Inhibitory CP | \n",
+ " Internal Capsule Caudoputamen | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " Inhibitory neurons | \n",
+ " Inhibitory CP | \n",
+ " Layer 4 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " Inhibitory neurons | \n",
+ " Inhibitory CP | \n",
+ " Internal Capsule Caudoputamen | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " Inhibitory neurons | \n",
+ " Inhibitory CP | \n",
+ " Internal Capsule Caudoputamen | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 4835 | \n",
+ " Vasculature | \n",
+ " Vascular Smooth Muscle | \n",
+ " Layer 6 | \n",
+ "
\n",
+ " \n",
+ " | 4836 | \n",
+ " Vasculature | \n",
+ " Vascular Smooth Muscle | \n",
+ " Layer 2-3 lateral | \n",
+ "
\n",
+ " \n",
+ " | 4837 | \n",
+ " Vasculature | \n",
+ " Vascular Smooth Muscle | \n",
+ " White matter | \n",
+ "
\n",
+ " \n",
+ " | 4838 | \n",
+ " Vasculature | \n",
+ " Vascular Smooth Muscle | \n",
+ " Layer 6 | \n",
+ "
\n",
+ " \n",
+ " | 4839 | \n",
+ " Vasculature | \n",
+ " Vascular Smooth Muscle | \n",
+ " Hippocampus | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
4839 rows × 3 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Cluster Subcluster \\\n",
+ "1 Inhibitory neurons Inhibitory CP \n",
+ "2 Inhibitory neurons Inhibitory CP \n",
+ "3 Inhibitory neurons Inhibitory CP \n",
+ "4 Inhibitory neurons Inhibitory CP \n",
+ "5 Inhibitory neurons Inhibitory CP \n",
+ "... ... ... \n",
+ "4835 Vasculature Vascular Smooth Muscle \n",
+ "4836 Vasculature Vascular Smooth Muscle \n",
+ "4837 Vasculature Vascular Smooth Muscle \n",
+ "4838 Vasculature Vascular Smooth Muscle \n",
+ "4839 Vasculature Vascular Smooth Muscle \n",
+ "\n",
+ " Region \n",
+ "1 Layer 6 \n",
+ "2 Internal Capsule Caudoputamen \n",
+ "3 Layer 4 \n",
+ "4 Internal Capsule Caudoputamen \n",
+ "5 Internal Capsule Caudoputamen \n",
+ "... ... \n",
+ "4835 Layer 6 \n",
+ "4836 Layer 2-3 lateral \n",
+ "4837 White matter \n",
+ "4838 Layer 6 \n",
+ "4839 Hippocampus \n",
+ "\n",
+ "[4839 rows x 3 columns]"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Create an integer index starting at 1 (0 is reserved for the background)\n",
+ "cells_adata.obs.index = list(range(1, cells_adata.shape[0]+1))\n",
+ "cells_adata.obs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "9e282613-2b00-4a32-b2dd-15d9e9c20846",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "AnnData object with n_obs × n_vars = 4839 × 33\n",
+ " obs: 'Cluster', 'Subcluster', 'Region'\n",
+ " var: 'Fluorophore', 'Hybridization'\n",
+ " obsm: 'X_centroid', 'X_pca', 'X_segmentations', 'X_spatial', 'X_tsne', 'X_umap'\n",
+ " varm: 'PCs'\n",
+ " layers: 'X_uint8'"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "cells_adata"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "1546c023-d614-4337-a3d1-f8b04c49c8e1",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/Users/mkeller/anaconda3/envs/vitessce-tutorial-env/lib/python3.8/site-packages/vitessce/data_utils/anndata.py:153: FutureWarning: X.dtype being converted to np.float32 from uint16. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.\n",
+ " adata = AnnData(X=new_X, obs=new_obs, var=new_var, obsm=new_obsm, varm=new_varm, layers=new_layers)\n",
+ "/Users/mkeller/anaconda3/envs/vitessce-tutorial-env/lib/python3.8/site-packages/anndata/_core/anndata.py:121: ImplicitModificationWarning: Transforming to str index.\n",
+ " warnings.warn(\"Transforming to str index.\", ImplicitModificationWarning)\n"
+ ]
+ }
+ ],
+ "source": [
+ "cells_adata = optimize_adata(\n",
+ " cells_adata,\n",
+ " # TODO(template): Specify the columns and keys that will be used in the visualization.\n",
+ " obs_cols=[\"Cluster\", \"Subcluster\", \"Region\"],\n",
+ " var_cols=[\"Fluorophore\", \"Hybridization\"],\n",
+ " obsm_keys=[\"X_pca\", \"X_tsne\", \"X_umap\", \"X_spatial\"],\n",
+ " optimize_X=True,\n",
+ " # Vitessce plays nicely with dense matrices saved with chunking\n",
+ " # and this one is small enough that dense is not a huge overhead.\n",
+ " to_dense_X=True,\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "7dc753bb-51a8-400b-9721-d39048b5de83",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "cells_adata.write_zarr(PATH_TO_OUTPUT_CELLS_ANNDATA_ZARR, chunks=[cells_adata.shape[0], VAR_CHUNK_SIZE])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "425f4c97-8ae1-4c77-b5a4-5c3f3d0c4cad",
+ "metadata": {},
+ "source": [
+ "## 1.2 Convert TSV to AnnData-Zarr"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "2d5a12ec-c121-4076-8357-89efac0d53fb",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/Users/mkeller/anaconda3/envs/vitessce-tutorial-env/lib/python3.8/site-packages/anndata/_core/anndata.py:121: ImplicitModificationWarning: Transforming to str index.\n",
+ " warnings.warn(\"Transforming to str index.\", ImplicitModificationWarning)\n"
+ ]
+ }
+ ],
+ "source": [
+ "molecules_df = pd.read_csv(PATH_TO_INPUT_MOLECULES_TSV, sep=\"\\t\", index_col=0)\n",
+ "obs_df = molecules_df[[MOLECULES_TSV_GENE_COL]].rename(columns={ MOLECULES_TSV_GENE_COL: \"gene\" })\n",
+ "xy_arr = molecules_df[[MOLECULES_TSV_X_COL, MOLECULES_TSV_Y_COL]].values\n",
+ "\n",
+ "molecules_adata = AnnData(\n",
+ " obs=obs_df,\n",
+ " obsm={ \"X_spatial\": xy_arr }\n",
+ ")\n",
+ "\n",
+ "molecules_adata = optimize_adata(\n",
+ " molecules_adata,\n",
+ " # TODO(template): Specify the columns and keys that will be used in the visualization.\n",
+ " obs_cols=[\"gene\"],\n",
+ " obsm_keys=[\"X_spatial\"],\n",
+ " remove_X=True\n",
+ ")\n",
+ "\n",
+ "molecules_adata.write_zarr(PATH_TO_OUTPUT_MOLECULES_ANNDATA_ZARR)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3dcbd7eb-40f1-446e-be84-080cecc3e4b0",
+ "metadata": {},
+ "source": [
+ "## 1.2 Convert TIFFs to OME-TIFFs"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "21182034-21ee-4d06-ac24-65924bf76e31",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "### 1.2.1 Background image"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "8985aed2-9ee2-426a-b2de-b62d43254d89",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(2, 12917, 7932)"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "img_arr = imread(PATH_TO_INPUT_BACKGROUND_TIFF)\n",
+ "img_arr.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "52f3a50d-ab6a-4aa4-8984-38bdebf88a4d",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# If needed, update the array axes so they are in CYX order to enable conversion to OME-TIFF.\n",
+ "#img_arr = img_arr.transpose((2, 0, 1))\n",
+ "#img_arr.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "1acfdeb8-37fa-4a71-a0db-1854e544da57",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "multiplex_img_to_ome_tiff(img_arr, BACKGROUND_CHANNEL_NAMES, PATH_TO_OUTPUT_IMAGE_OME_TIFF, axes=\"CYX\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "b346e9b5-e14a-401c-8ef5-e651e082508b",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "./processed_data/fish_based/image.ome.tif\n",
+ "OMETiffReader initializing ./processed_data/fish_based/image.ome.tif\n",
+ "Reading IFDs\n",
+ "Populating metadata\n",
+ "[OME-TIFF] -> ./processed_data/fish_based/image.pyramid.ome.tif [OME-TIFF]\n",
+ "Tile size = 512 x 512\n",
+ "\tConverted 1/2 planes (50%)\n",
+ "\tConverted 2/2 planes (100%)\n",
+ "Tile size = 512 x 512\n",
+ "\tConverted 1/2 planes (50%)\n",
+ "\tConverted 2/2 planes (100%)\n",
+ "Tile size = 512 x 512\n",
+ "\tConverted 1/2 planes (50%)\n",
+ "\tConverted 2/2 planes (100%)\n",
+ "Tile size = 512 x 512\n",
+ "\tConverted 1/2 planes (50%)\n",
+ "\tConverted 2/2 planes (100%)\n",
+ "Tile size = 495 x 512\n",
+ "\tConverted 2/2 planes (100%)\n",
+ "Tile size = 247 x 66\n",
+ "\tConverted 2/2 planes (100%)\n",
+ "[done]\n",
+ "34.427s elapsed (171.08333+2623.3333ms per plane, 864ms overhead)\n"
+ ]
+ }
+ ],
+ "source": [
+ "# For larger images, you will want to comment out the above line and un-comment the line below,\n",
+ "# to increase the tile size (128 -> 512) and the number of pyramid resolutions (2 -> 6).\n",
+ "!BF_MAX_MEM=2048m ~/software/bftools/bfconvert -overwrite -tilex 512 -tiley 512 -pyramid-resolutions 6 -pyramid-scale 2 -compression LZW {PATH_TO_OUTPUT_IMAGE_OME_TIFF} {PATH_TO_OUTPUT_IMAGE_PYRAMIDAL_OME_TIFF}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "503fc7ec-6afe-4c4e-9835-f2f1c06ec0de",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "### 1.2.2 Segmentation bitmask / label image"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "b7b747c9-7aa1-41f4-a1d6-3c87dfc2d424",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(1, 12917, 7932)"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bitmask_arr = imread(PATH_TO_INPUT_BITMASK_TIFF)\n",
+ "bitmask_arr.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "afd20985-50fe-4058-a52e-25653816a527",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# If needed, update the array axes so they are in CYX order to enable conversion to OME-TIFF.\n",
+ "#bitmask_arr = bitmask_arr.transpose((2, 0, 1))\n",
+ "#bitmask_arr.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "d9c04175-6a7f-4403-9840-4fdb1ee1103f",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "multiplex_img_to_ome_tiff(bitmask_arr, BITMASK_CHANNEL_NAMES, PATH_TO_OUTPUT_BITMASK_OME_TIFF, axes=\"CYX\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "6dca683a-131f-4366-9f53-81497351771a",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "./processed_data/fish_based/bitmask.ome.tif\n",
+ "OMETiffReader initializing ./processed_data/fish_based/bitmask.ome.tif\n",
+ "Reading IFDs\n",
+ "Populating metadata\n",
+ "[OME-TIFF] -> ./processed_data/fish_based/bitmask.pyramid.ome.tif [OME-TIFF]\n",
+ "Tile size = 512 x 512\n",
+ "Reading IFDs\n",
+ "Populating metadata\n",
+ "\tConverted 1/1 planes (100%)\n",
+ "Tile size = 512 x 512\n",
+ "\tConverted 1/1 planes (100%)\n",
+ "Tile size = 512 x 512\n",
+ "\tConverted 1/1 planes (100%)\n",
+ "Tile size = 512 x 512\n",
+ "\tConverted 1/1 planes (100%)\n",
+ "Tile size = 495 x 512\n",
+ "\tConverted 1/1 planes (100%)\n",
+ "Tile size = 247 x 16\n",
+ "\tConverted 1/1 planes (100%)\n",
+ "[done]\n",
+ "102.176s elapsed (768.3333+16125.833ms per plane, 776ms overhead)\n"
+ ]
+ }
+ ],
+ "source": [
+ "# For larger images, you will want to comment out the above line and un-comment the line below,\n",
+ "# to increase the tile size (128 -> 512) and the number of pyramid resolutions (2 -> 6).\n",
+ "!BF_MAX_MEM=2048m ~/software/bftools/bfconvert -overwrite -tilex 512 -tiley 512 -pyramid-resolutions 6 -pyramid-scale 2 -compression LZW {PATH_TO_OUTPUT_BITMASK_OME_TIFF} {PATH_TO_OUTPUT_BITMASK_PYRAMIDAL_OME_TIFF}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8052199b-b6a1-4d59-8a05-e15662beb457",
+ "metadata": {},
+ "source": [
+ "## 2. Configure the visualization"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "b3d1baeb-de12-4cc3-92d1-d5009fa9e79c",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "vc = VitessceConfig(schema_version=\"1.0.15\", name=CONFIG_NAME, description=CONFIG_DESCRIPTION)\n",
+ "\n",
+ "dataset = vc.add_dataset(name=DATASET_NAME).add_object(AnnDataWrapper(\n",
+ " adata_path=PATH_TO_OUTPUT_CELLS_ANNDATA_ZARR,\n",
+ " # TODO(template): update the arrays of interest and where they are located in the AnnData object.\n",
+ " obs_embedding_paths=[\"obsm/X_umap\"],\n",
+ " obs_embedding_names=[\"UMAP\"],\n",
+ " obs_set_paths=[\"obs/Cluster\"],\n",
+ " obs_set_names=[\"Cluster\"],\n",
+ " obs_feature_matrix_path=\"X\",\n",
+ " coordination_values={\n",
+ " \"obsType\": \"cell\",\n",
+ " \"featureType\": \"gene\",\n",
+ " \"featureValueType\": \"expression\"\n",
+ " }\n",
+ ")).add_object(AnnDataWrapper(\n",
+ " adata_path=PATH_TO_OUTPUT_MOLECULES_ANNDATA_ZARR,\n",
+ " # TODO(template): update the arrays of interest and where they are located in the AnnData object.\n",
+ " obs_locations_path=\"obsm/X_spatial\",\n",
+ " obs_labels_path=\"obs/gene\",\n",
+ " coordination_values={\n",
+ " \"obsType\": \"molecule\"\n",
+ " }\n",
+ ")).add_object(MultiImageWrapper([\n",
+ " OmeTiffWrapper(\n",
+ " img_path=PATH_TO_OUTPUT_BITMASK_PYRAMIDAL_OME_TIFF,\n",
+ " name=\"Cell segmentations\",\n",
+ " is_bitmask=True\n",
+ " ),\n",
+ " OmeTiffWrapper(\n",
+ " img_path=PATH_TO_OUTPUT_IMAGE_PYRAMIDAL_OME_TIFF,\n",
+ " name=\"Background image\",\n",
+ " is_bitmask=False\n",
+ " ),\n",
+ "], use_physical_size_scaling=True))\n",
+ "\n",
+ "# TODO(template): Update the views of interest.\n",
+ "spatial_colored_by_cluster = vc.add_view(cm.SPATIAL, dataset=dataset)\n",
+ "# TODO: update the mapping to match one of the elements of `obs_embedding_names` above, if necessary.\n",
+ "scatterplot = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"UMAP\")\n",
+ "\n",
+ "layer_controller = vc.add_view(cm.LAYER_CONTROLLER, dataset=dataset).set_props(disableChannelsIfRgbDetected=True)\n",
+ "spot_set_manager = vc.add_view(cm.OBS_SETS, dataset=dataset)\n",
+ "gene_list = vc.add_view(cm.FEATURE_LIST, dataset=dataset)\n",
+ "heatmap = vc.add_view(cm.HEATMAP, dataset=dataset).set_props(transpose=True)\n",
+ "\n",
+ "# TODO(template): Update the layout of views.\n",
+ "vc.layout(\n",
+ " (spatial_colored_by_cluster | scatterplot)\n",
+ " / ((layer_controller | spot_set_manager) | (gene_list | heatmap))\n",
+ ");"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "c07cb582-6c3d-45d7-937b-e54bd8fc93d8",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# TODO(template): configure view coordinations and initial coordination values\n",
+ "spatial_views = [\n",
+ " spatial_colored_by_cluster,\n",
+ " layer_controller,\n",
+ "]\n",
+ "all_views = [\n",
+ " *spatial_views,\n",
+ " spot_set_manager,\n",
+ " gene_list,\n",
+ " heatmap,\n",
+ " scatterplot,\n",
+ "]\n",
+ "\n",
+ "spatial_point_layer_value = {\n",
+ " \"opacity\": 1,\n",
+ " \"radius\": 20,\n",
+ " \"visible\": True\n",
+ "}\n",
+ "\n",
+ "spatial_segmentation_layer_value = [{\n",
+ " \"type\": \"bitmask\",\n",
+ " \"visible\": True,\n",
+ " \"index\": 0,\n",
+ " \"colormap\": None,\n",
+ " \"transparentColor\": None,\n",
+ " \"opacity\": 1,\n",
+ " \"domainType\": \"Min/Max\",\n",
+ " \"channels\": [\n",
+ " {\n",
+ " \"selection\": { \"c\": 0, \"t\": 0, \"z\": 0 },\n",
+ " \"color\": [255, 0, 0],\n",
+ " \"visible\": True,\n",
+ " \"slider\": [0, 1]\n",
+ " }\n",
+ " ]\n",
+ "}]\n",
+ "\n",
+ "spatial_image_layer_value = [{\n",
+ " \"type\": \"raster\",\n",
+ " \"index\": 0,\n",
+ " \"colormap\": None,\n",
+ " \"transparentColor\": None,\n",
+ " \"opacity\": 1,\n",
+ " \"domainType\": \"Min/Max\",\n",
+ " \"channels\": [\n",
+ " {\n",
+ " \"selection\": { \"c\": 0, \"t\": 0, \"z\": 0 },\n",
+ " \"color\": [255, 0, 0],\n",
+ " \"visible\": True,\n",
+ " \"slider\": [0, 255]\n",
+ " },\n",
+ " {\n",
+ " \"selection\": { \"c\": 1, \"t\": 0, \"z\": 0 },\n",
+ " \"color\": [0, 255, 0],\n",
+ " \"visible\": True,\n",
+ " \"slider\": [0, 255]\n",
+ " },\n",
+ " {\n",
+ " \"selection\": { \"c\": 2, \"t\": 0, \"z\": 0 },\n",
+ " \"color\": [0, 0, 255],\n",
+ " \"visible\": True,\n",
+ " \"slider\": [0, 255]\n",
+ " }\n",
+ " ]\n",
+ "}]\n",
+ "\n",
+ "vc.link_views(spatial_views, [ct.SPATIAL_IMAGE_LAYER, ct.SPATIAL_SEGMENTATION_LAYER, ct.SPATIAL_POINT_LAYER], [spatial_image_layer_value, spatial_segmentation_layer_value, spatial_point_layer_value])\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "098add3f-634e-492e-918b-eacf24598a1e",
+ "metadata": {},
+ "source": [
+ "### Render the widget"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "0b62f4fe-ad8b-458b-9cfe-a54a647138a1",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'http://vitessce.io/#?theme=light&url=data:,%7B%22version%22%3A+%221.0.15%22%2C+%22name%22%3A+%22My+config%22%2C+%22description%22%3A+%22This+dataset+reveals...%22%2C+%22datasets%22%3A+%5B%7B%22uid%22%3A+%22A%22%2C+%22name%22%3A+%22My+dataset%22%2C+%22files%22%3A+%5B%7B%22fileType%22%3A+%22anndata.zarr%22%2C+%22url%22%3A+%22http%3A%2F%2Flocalhost%3A8000%2FA%2F0%2F44170ceb-30d8-49bb-81a2-2a1fec250bea%22%2C+%22options%22%3A+%7B%22obsEmbedding%22%3A+%5B%7B%22path%22%3A+%22obsm%2FX_umap%22%2C+%22dims%22%3A+%5B0%2C+1%5D%2C+%22embeddingType%22%3A+%22UMAP%22%7D%5D%2C+%22obsSets%22%3A+%5B%7B%22name%22%3A+%22Cluster%22%2C+%22path%22%3A+%22obs%2FCluster%22%7D%5D%2C+%22obsFeatureMatrix%22%3A+%7B%22path%22%3A+%22X%22%7D%7D%2C+%22coordinationValues%22%3A+%7B%22obsType%22%3A+%22cell%22%2C+%22featureType%22%3A+%22gene%22%2C+%22featureValueType%22%3A+%22expression%22%7D%7D%2C+%7B%22fileType%22%3A+%22anndata.zarr%22%2C+%22url%22%3A+%22http%3A%2F%2Flocalhost%3A8000%2FA%2F1%2Ff514c617-14fd-40a5-b43c-dc8f4e1ff454%22%2C+%22options%22%3A+%7B%22obsLocations%22%3A+%7B%22path%22%3A+%22obsm%2FX_spatial%22%7D%2C+%22obsLabels%22%3A+%7B%22path%22%3A+%22obs%2Fgene%22%7D%7D%2C+%22coordinationValues%22%3A+%7B%22obsType%22%3A+%22molecule%22%7D%7D%2C+%7B%22fileType%22%3A+%22raster.json%22%2C+%22options%22%3A+%7B%22schemaVersion%22%3A+%220.0.2%22%2C+%22usePhysicalSizeScaling%22%3A+true%2C+%22images%22%3A+%5B%7B%22name%22%3A+%22Cell+segmentations%22%2C+%22type%22%3A+%22ome-tiff%22%2C+%22url%22%3A+%22http%3A%2F%2Flocalhost%3A8000%2FA%2F2%2F4ecb04d7-dfa2-488f-8b8b-55688ea7d188%22%2C+%22metadata%22%3A+%7B%22omeTiffOffsetsUrl%22%3A+%22http%3A%2F%2Flocalhost%3A8000%2FA%2F2%2Fb29f0a86-2809-4fe2-9ade-b7f5b8b9e11f%22%2C+%22isBitmask%22%3A+true%7D%7D%2C+%7B%22name%22%3A+%22Background+image%22%2C+%22type%22%3A+%22ome-tiff%22%2C+%22url%22%3A+%22http%3A%2F%2Flocalhost%3A8000%2FA%2F2%2Fbc72944b-f9ee-43e4-b88f-90779234b39a%22%2C+%22metadata%22%3A+%7B%22omeTiffOffsetsUrl%22%3A+%22http%3A%2F%2Flocalhost%3A8000%2FA%2F2%2F49368b36-6741-4530-bfc2-241630a81ef4%22%2C+%22isBitmask%22%3A+false%7D%7D%5D%2C+%22renderLayers%22%3A+%5B%22Cell+segmentations%22%2C+%22Background+image%22%5D%7D%7D%5D%7D%5D%2C+%22coordinationSpace%22%3A+%7B%22dataset%22%3A+%7B%22A%22%3A+%22A%22%7D%2C+%22embeddingType%22%3A+%7B%22A%22%3A+%22UMAP%22%7D%2C+%22spatialImageLayer%22%3A+%7B%22A%22%3A+%5B%7B%22type%22%3A+%22raster%22%2C+%22index%22%3A+0%2C+%22colormap%22%3A+null%2C+%22transparentColor%22%3A+null%2C+%22opacity%22%3A+1%2C+%22domainType%22%3A+%22Min%2FMax%22%2C+%22channels%22%3A+%5B%7B%22selection%22%3A+%7B%22c%22%3A+0%2C+%22t%22%3A+0%2C+%22z%22%3A+0%7D%2C+%22color%22%3A+%5B255%2C+0%2C+0%5D%2C+%22visible%22%3A+true%2C+%22slider%22%3A+%5B0%2C+255%5D%7D%2C+%7B%22selection%22%3A+%7B%22c%22%3A+1%2C+%22t%22%3A+0%2C+%22z%22%3A+0%7D%2C+%22color%22%3A+%5B0%2C+255%2C+0%5D%2C+%22visible%22%3A+true%2C+%22slider%22%3A+%5B0%2C+255%5D%7D%2C+%7B%22selection%22%3A+%7B%22c%22%3A+2%2C+%22t%22%3A+0%2C+%22z%22%3A+0%7D%2C+%22color%22%3A+%5B0%2C+0%2C+255%5D%2C+%22visible%22%3A+true%2C+%22slider%22%3A+%5B0%2C+255%5D%7D%5D%7D%5D%7D%2C+%22spatialSegmentationLayer%22%3A+%7B%22A%22%3A+%5B%7B%22type%22%3A+%22bitmask%22%2C+%22visible%22%3A+true%2C+%22index%22%3A+0%2C+%22colormap%22%3A+null%2C+%22transparentColor%22%3A+null%2C+%22opacity%22%3A+1%2C+%22domainType%22%3A+%22Min%2FMax%22%2C+%22channels%22%3A+%5B%7B%22selection%22%3A+%7B%22c%22%3A+0%2C+%22t%22%3A+0%2C+%22z%22%3A+0%7D%2C+%22color%22%3A+%5B255%2C+0%2C+0%5D%2C+%22visible%22%3A+true%2C+%22slider%22%3A+%5B0%2C+1%5D%7D%5D%7D%5D%7D%2C+%22spatialPointLayer%22%3A+%7B%22A%22%3A+%7B%22opacity%22%3A+1%2C+%22radius%22%3A+20%2C+%22visible%22%3A+true%7D%7D%7D%2C+%22layout%22%3A+%5B%7B%22component%22%3A+%22spatial%22%2C+%22coordinationScopes%22%3A+%7B%22dataset%22%3A+%22A%22%2C+%22spatialImageLayer%22%3A+%22A%22%2C+%22spatialSegmentationLayer%22%3A+%22A%22%2C+%22spatialPointLayer%22%3A+%22A%22%7D%2C+%22x%22%3A+0.0%2C+%22y%22%3A+0.0%2C+%22w%22%3A+6.0%2C+%22h%22%3A+6.0%7D%2C+%7B%22component%22%3A+%22scatterplot%22%2C+%22coordinationScopes%22%3A+%7B%22dataset%22%3A+%22A%22%2C+%22embeddingType%22%3A+%22A%22%7D%2C+%22x%22%3A+6.0%2C+%22y%22%3A+0.0%2C+%22w%22%3A+6.0%2C+%22h%22%3A+6.0%7D%2C+%7B%22component%22%3A+%22layerController%22%2C+%22coordinationScopes%22%3A+%7B%22dataset%22%3A+%22A%22%2C+%22spatialImageLayer%22%3A+%22A%22%2C+%22spatialSegmentationLayer%22%3A+%22A%22%2C+%22spatialPointLayer%22%3A+%22A%22%7D%2C+%22x%22%3A+0.0%2C+%22y%22%3A+6.0%2C+%22w%22%3A+3.0%2C+%22h%22%3A+6.0%2C+%22props%22%3A+%7B%22disableChannelsIfRgbDetected%22%3A+true%7D%7D%2C+%7B%22component%22%3A+%22obsSets%22%2C+%22coordinationScopes%22%3A+%7B%22dataset%22%3A+%22A%22%7D%2C+%22x%22%3A+3.0%2C+%22y%22%3A+6.0%2C+%22w%22%3A+3.0%2C+%22h%22%3A+6.0%7D%2C+%7B%22component%22%3A+%22featureList%22%2C+%22coordinationScopes%22%3A+%7B%22dataset%22%3A+%22A%22%7D%2C+%22x%22%3A+6.0%2C+%22y%22%3A+6.0%2C+%22w%22%3A+3.0%2C+%22h%22%3A+6.0%7D%2C+%7B%22component%22%3A+%22heatmap%22%2C+%22coordinationScopes%22%3A+%7B%22dataset%22%3A+%22A%22%7D%2C+%22x%22%3A+9.0%2C+%22y%22%3A+6.0%2C+%22w%22%3A+3.0%2C+%22h%22%3A+6.0%2C+%22props%22%3A+%7B%22transpose%22%3A+true%7D%7D%5D%2C+%22initStrategy%22%3A+%22auto%22%7D'"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "vc.web_app()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "da6e4cb6-5c55-4839-8b7a-ca13c6e319ad",
+ "metadata": {},
+ "source": [
+ "## 3. Export the configuration and data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5fea1850-7098-44ce-823b-30b77ab84b3b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# TODO(template): The export function does not clear the contents of the `out_dir`.\n",
+ "# You may want to ensure that this folder does not yet exist:\n",
+ "os.makedirs(PATH_TO_EXPORT_DIRECTORY, exist_ok=False)\n",
+ "\n",
+ "config_dict = vc.export(to=\"files\", base_url=BASE_URL_PLACEHOLDER, out_dir=PATH_TO_EXPORT_DIRECTORY)\n",
+ "\n",
+ "# Use `open` to create a new empty file at ./exported_data/vitessce.json\n",
+ "with open(join(PATH_TO_EXPORT_DIRECTORY, \"vitessce.json\"), \"w\") as f:\n",
+ " json.dump(config_dict, f)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.0"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
From ebb7234bd3ab552392fc2b18c9d9583fa7d1e132 Mon Sep 17 00:00:00 2001
From: mkeller <7525285+keller-mark@users.noreply.github.com>
Date: Fri, 31 Mar 2023 16:44:00 -0400
Subject: [PATCH 2/2] Remove raw data processing
---
templates/fish-based.ipynb | 486 +------------------------------------
1 file changed, 2 insertions(+), 484 deletions(-)
diff --git a/templates/fish-based.ipynb b/templates/fish-based.ipynb
index e9fa454..ff65087 100644
--- a/templates/fish-based.ipynb
+++ b/templates/fish-based.ipynb
@@ -1,487 +1,5 @@
{
"cells": [
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "e819ce7a-fb23-4c0a-8efd-1b7cdcb552e3",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "# TEMP: download/construct \"raw\" dataset for a complete example"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "id": "4b0ace49-3c42-4c11-b3e3-198d5c2dba6f",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "from ome_zarr.io import parse_url\n",
- "from ome_zarr.reader import Reader\n",
- "\n",
- "from tifffile import imwrite\n",
- "from os.path import join\n",
- "import zarr\n",
- "from anndata import read_zarr, AnnData\n",
- "import numpy as np\n",
- "import pandas as pd\n",
- "from skimage.draw import polygon2mask\n",
- "import tqdm"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 12,
- "id": "a2a4b8f0-870f-4bbb-8724-437cd79638c3",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "# Adapting http://vitessce.io/#?dataset=codeluppi-2018-via-zarr"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 13,
- "id": "c6ab9d1a-6dbe-4d62-89ca-be41415f369b",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "cells_adata_zarr = \"data/raw/codeluppi_2018_nature_methods.cells.h5ad.zarr\"\n",
- "cells_csv = \"https://s3.amazonaws.com/vitessce-data/0.0.33/main/codeluppi-2018/codeluppi_2018_nature_methods.cells.csv\"\n",
- "molecules_adata_zarr = \"https://s3.amazonaws.com/vitessce-data/0.0.33/main/codeluppi-2018-via-zarr/codeluppi_2018_nature_methods.molecules.h5ad.zarr\"\n",
- "molecules_csv = \"https://s3.amazonaws.com/vitessce-data/0.0.33/main/codeluppi-2018/codeluppi_2018_nature_methods.molecules.csv\"\n",
- "background_ome_zarr = \"https://vitessce-data.storage.googleapis.com/0.0.34/main/codeluppi-2018/codeluppi_2018_nature_methods.image.ome.zarr\""
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "2d3f3bc8-1d31-41ac-ac02-65929ca4afdb",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "# Convert OME-Zarr to normal TIFF"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "e3d5309e-1b02-4ffd-b943-57f449d5da49",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "# read the image data\n",
- "store = parse_url(background_ome_zarr, mode=\"r\")\n",
- "reader = Reader(store)\n",
- "# nodes may include images, labels etc\n",
- "nodes = list(reader())\n",
- "# first node will be the image pixel data\n",
- "image_node = nodes[0]\n",
- "\n",
- "dask_data = image_node.data\n",
- "\n",
- "hires_arr = dask_data[2]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "32b87d3c-b18a-407a-b0b0-6ec88cedf199",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "RAW_DIR = join(\"data\", \"raw\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "b591bf59-3f68-4d56-b0f1-669e04125c26",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "imwrite(\n",
- " join(RAW_DIR, 'codeluppi_2018_nature_methods.image.tiff'),\n",
- " hires_arr,\n",
- " photometric='minisblack',\n",
- " metadata={'axes': 'CYX'}\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "4487ebfd-4480-48dc-8b7d-4a6c6eeab957",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "# Convert cells.zarr/obsm/X_segmentations to a bitmask TIFF"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "00b5563b-e70e-4675-a94a-338c56ba1c27",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "bitmask_shape = (hires_arr.shape[2], hires_arr.shape[1])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "cfea1161-616b-425e-942d-b19232036d96",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "bitmask_shape"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "c9d6800f-0119-4b5f-8eb8-a009b214387c",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "bitmask_arr = np.zeros(bitmask_shape)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "16d6ac9b-a349-43a8-935c-f7c6bec743cf",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "cells_store = zarr.open(cells_adata_zarr)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "c5e47371-49b4-44cc-85a8-223b106ffc10",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "segmentations_arr = cells_store['obsm/X_segmentations'][()]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "7587becd-9cf1-47ac-b412-2cea40c2a298",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "segmentations_arr.shape"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "c311d30e-60c4-43ea-84f4-ac0c6add5516",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "for i in tqdm.tqdm(range(segmentations_arr.shape[0])):\n",
- " # Re-scale the vertices so they match the background scale\n",
- " vertices = np.multiply(segmentations_arr[i], 0.25)\n",
- " mask = polygon2mask(bitmask_shape, vertices)\n",
- " bitmask_arr[mask] = i+1"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "9625735f-98fe-4487-8f4e-7322ccbc1ae8",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "# Update the array axes so they are in CYX order to enable conversion to OME-TIFF.\n",
- "bitmask_arr = bitmask_arr.transpose((1, 0)) # (y, x)\n",
- "bitmask_arr = bitmask_arr[np.newaxis, :] # (c, y, x)\n",
- "bitmask_arr.shape"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "ae851409-4be2-4878-9b8a-0af18b211d1f",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "imwrite(\n",
- " join(RAW_DIR, 'codeluppi_2018_nature_methods.bitmask.tiff'),\n",
- " bitmask_arr,\n",
- " photometric='minisblack',\n",
- " metadata={'axes': 'CYX'}\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "d600e4a6-6445-45ec-a6e2-57b12674fd5b",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "# Convert molecules.zarr to TSV (one row per molecule)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 14,
- "id": "e24cfd60-3574-4f6a-9109-1f3b8da8c1e6",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "molecules_store = zarr.open(molecules_adata_zarr)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 15,
- "id": "85350784-b365-4766-8796-4420cb1b9251",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "positions_arr = molecules_store['obsm/X_spatial'][()]\n",
- "gene_cats_arr = molecules_store['obs/__categories/Gene'][()]\n",
- "gene_codes_arr = molecules_store['obs/Gene'][()]\n",
- "genes_arr = [gene_cats_arr[cat_i] for cat_i in gene_codes_arr]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 16,
- "id": "3d3a7763-f600-4209-9c8d-9ab7fee3ac2e",
- "metadata": {
- "tags": []
- },
- "outputs": [
- {
- "data": {
- "text/plain": [
- "(1976659, 2)"
- ]
- },
- "execution_count": 16,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "positions_arr.shape"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 17,
- "id": "76b7ec3d-b11d-4599-b6a0-ba637c488f2d",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "molecules_df = pd.DataFrame(index=range(positions_arr.shape[0]))\n",
- "molecules_df[\"x\"] = np.multiply(positions_arr[:, 0], 0.25)\n",
- "molecules_df[\"y\"] = np.multiply(positions_arr[:, 1], 0.25)\n",
- "molecules_df[\"gene\"] = genes_arr"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 18,
- "id": "c384a702-57de-4072-9333-4b7b76a73a7d",
- "metadata": {
- "tags": []
- },
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " x | \n",
- " y | \n",
- " gene | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 4553.75 | \n",
- " 5013.00 | \n",
- " Acta2_Hybridization5 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 6229.00 | \n",
- " 4253.75 | \n",
- " Acta2_Hybridization5 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 2100.00 | \n",
- " 11130.50 | \n",
- " Acta2_Hybridization5 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 5666.75 | \n",
- " 5802.00 | \n",
- " Acta2_Hybridization5 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 1584.00 | \n",
- " 9388.75 | \n",
- " Acta2_Hybridization5 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " x y gene\n",
- "0 4553.75 5013.00 Acta2_Hybridization5\n",
- "1 6229.00 4253.75 Acta2_Hybridization5\n",
- "2 2100.00 11130.50 Acta2_Hybridization5\n",
- "3 5666.75 5802.00 Acta2_Hybridization5\n",
- "4 1584.00 9388.75 Acta2_Hybridization5"
- ]
- },
- "execution_count": 18,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "molecules_df.head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 19,
- "id": "331bc482-87c1-43b9-8109-a6e3409d2160",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "molecules_df.index = molecules_df.index.rename(\"index\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 20,
- "id": "07ed3119-3dea-4812-ae41-b617070c9c53",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "molecules_df.to_csv(join(\"data\", \"raw\", \"codeluppi_2018_nature_methods.molecules.tsv\"), sep=\"\\t\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "592bd5ee-e41b-4b72-b9ce-fdc68c0ec173",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "# Convert cells.h5ad.zarr to cells.h5ad"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "268e1187-34d5-43a1-95e0-2fa31d3ee7aa",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "cells_adata = read_zarr(cells_adata_zarr)\n",
- "cells_adata"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "3b6d2e8e-f923-4ff7-87c1-9912fab04ca2",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "cells_adata.write(join(\"data\", \"raw\", \"codeluppi_2018_nature_methods.cells.h5ad\"))"
- ]
- },
{
"cell_type": "markdown",
"id": "4b2a05b1-3bdc-4e4b-84c3-321b83f8e10e",
@@ -572,11 +90,11 @@
"BITMASK_CHANNEL_NAMES = [\"cells\"]\n",
"\n",
"#=== OUTPUTS ===\n",
- "# TODO(template)\n",
+ "# TODO(template): specify file paths for the converted AnnData objects\n",
"PATH_TO_OUTPUT_CELLS_ANNDATA_ZARR = join('.', 'processed_data', 'fish_based', 'output.cells.anndata.zarr')\n",
"PATH_TO_OUTPUT_MOLECULES_ANNDATA_ZARR = join('.', 'processed_data', 'fish_based', 'output.molecules.anndata.zarr')\n",
"\n",
- "# TODO(template): specify some file paths for the converted OME-TIFF image and bitmask files\n",
+ "# TODO(template): specify file paths for the converted OME-TIFF image and bitmask files\n",
"PATH_TO_OUTPUT_IMAGE_OME_TIFF = join('.', 'processed_data', 'fish_based', 'image.ome.tif')\n",
"PATH_TO_OUTPUT_IMAGE_PYRAMIDAL_OME_TIFF = join('.', 'processed_data', 'fish_based', 'image.pyramid.ome.tif')\n",
"PATH_TO_OUTPUT_BITMASK_OME_TIFF = join('.', 'processed_data', 'fish_based', 'bitmask.ome.tif')\n",