From 3cc5bfaa28c9058c8872eb9624095aee17c45a0e Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Mon, 6 May 2024 11:21:03 +0200 Subject: [PATCH 1/9] (chore): change variable names --- vitessce/wrappers.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/vitessce/wrappers.py b/vitessce/wrappers.py index 19b0d460..96167884 100644 --- a/vitessce/wrappers.py +++ b/vitessce/wrappers.py @@ -894,12 +894,12 @@ def image_file_def_creator(base_url): class AnnDataWrapper(AbstractWrapper): - def __init__(self, adata_path=None, adata_url=None, obs_feature_matrix_path=None, feature_filter_path=None, initial_feature_filter_path=None, obs_set_paths=None, obs_set_names=None, obs_locations_path=None, obs_segmentations_path=None, obs_embedding_paths=None, obs_embedding_names=None, obs_embedding_dims=None, obs_spots_path=None, obs_points_path=None, request_init=None, feature_labels_path=None, obs_labels_path=None, convert_to_dense=True, coordination_values=None, obs_labels_paths=None, obs_labels_names=None, **kwargs): + def __init__(self, base_path=None, base_url=None, obs_feature_matrix_path=None, feature_filter_path=None, initial_feature_filter_path=None, obs_set_paths=None, obs_set_names=None, obs_locations_path=None, obs_segmentations_path=None, obs_embedding_paths=None, obs_embedding_names=None, obs_embedding_dims=None, obs_spots_path=None, obs_points_path=None, request_init=None, feature_labels_path=None, obs_labels_path=None, convert_to_dense=True, coordination_values=None, obs_labels_paths=None, obs_labels_names=None, **kwargs): """ Wrap an AnnData object by creating an instance of the ``AnnDataWrapper`` class. - :param str adata_path: A path to an AnnData object written to a Zarr store containing single-cell experiment data. - :param str adata_url: A remote url pointing to a zarr-backed AnnData store. + :param str base_path: A path to an AnnData object written to a Zarr store containing single-cell experiment data. + :param str base_url: A remote url pointing to a zarr-backed AnnData store. :param str obs_feature_matrix_path: Location of the expression (cell x gene) matrix, like `X` or `obsm/highly_variable_genes_subset` :param str feature_filter_path: A string like `var/highly_variable` used in conjunction with `obs_feature_matrix_path` if obs_feature_matrix_path points to a subset of `X` of the full `var` list. :param str initial_feature_filter_path: A string like `var/highly_variable` used in conjunction with `obs_feature_matrix_path` if obs_feature_matrix_path points to a subset of `X` of the full `var` list. @@ -924,15 +924,15 @@ def __init__(self, adata_path=None, adata_url=None, obs_feature_matrix_path=None """ super().__init__(**kwargs) self._repr = make_repr(locals()) - self._adata_path = adata_path - self._adata_url = adata_url - if adata_url is not None and (adata_path is not None): + self._path = base_path + self._url = base_url + if base_url is not None and (base_path is not None): raise ValueError( - "Did not expect adata_url to be provided with adata_path") - if adata_url is None and (adata_path is None): + "Did not expect base_url to be provided with base_path") + if base_url is None and (base_path is None): raise ValueError( - "Expected either adata_url or adata_path to be provided") - if adata_path is not None: + "Expected either base_url or base_path to be provided") + if base_path is not None: self.is_remote = False self.zarr_folder = 'anndata.zarr' else: @@ -952,7 +952,7 @@ def __init__(self, adata_path=None, adata_url=None, obs_feature_matrix_path=None self._spatial_spots_obsm = obs_spots_path self._spatial_points_obsm = obs_points_path self._request_init = request_init - self._gene_alias = feature_labels_path + self._feature_labels = feature_labels_path # Support legacy provision of single obs labels path if (obs_labels_path is not None): self._obs_labels_paths = [obs_labels_path] @@ -970,22 +970,22 @@ def convert_and_save(self, dataset_uid, obj_i, base_dir=None): file_def_creator = self.make_file_def_creator( dataset_uid, obj_i) - routes = self.make_anndata_routes(dataset_uid, obj_i) + routes = self.make_routes(dataset_uid, obj_i) self.file_def_creators.append(file_def_creator) self.routes += routes - def make_anndata_routes(self, dataset_uid, obj_i): + def make_routes(self, dataset_uid, obj_i): if self.is_remote: return [] else: - return self.get_local_dir_route(dataset_uid, obj_i, self._adata_path, self.local_dir_uid) + return self.get_local_dir_route(dataset_uid, obj_i, self._path, self.local_dir_uid) def get_zarr_url(self, base_url="", dataset_uid="", obj_i=""): if self.is_remote: - return self._adata_url + return self._url else: - return self.get_local_dir_url(base_url, dataset_uid, obj_i, self._adata_path, self.local_dir_uid) + return self.get_local_dir_url(base_url, dataset_uid, obj_i, self._path, self.local_dir_uid) def make_file_def_creator(self, dataset_uid, obj_i): def get_anndata_zarr(base_url): @@ -1046,9 +1046,9 @@ def get_anndata_zarr(base_url): options["obsFeatureMatrix"]["featureFilterPath"] = self._gene_var_filter if self._matrix_gene_var_filter is not None: options["obsFeatureMatrix"]["initialFeatureFilterPath"] = self._matrix_gene_var_filter - if self._gene_alias is not None: + if self._feature_labels is not None: options["featureLabels"] = { - "path": self._gene_alias + "path": self._feature_labels } if self._obs_labels_paths is not None: if self._obs_labels_names is not None and len(self._obs_labels_paths) == len(self._obs_labels_names): From bd6fccf79493c126ffec2cf0ab9bb029839af086 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Mon, 6 May 2024 11:26:01 +0200 Subject: [PATCH 2/9] (chore): fix notebooks --- ...rowser_to_vitessce_config_conversion.ipynb | 432 ++++++------ docs/notebooks/data_export_files.ipynb | 470 ++++++------- docs/notebooks/data_export_s3.ipynb | 458 ++++++------ docs/notebooks/widget_brain.ipynb | 588 ++++++++-------- .../widget_brain_with_base_dir.ipynb | 656 +++++++++--------- docs/notebooks/widget_genomic_profiles.ipynb | 438 ++++++------ docs/notebooks/widget_loom.ipynb | 436 ++++++------ docs/notebooks/widget_pbmc.ipynb | 416 +++++------ docs/notebooks/widget_pbmc_remote.ipynb | 304 ++++---- tests/test_wrappers.py | 8 +- 10 files changed, 2103 insertions(+), 2103 deletions(-) diff --git a/docs/notebooks/cellbrowser_to_vitessce_config_conversion.ipynb b/docs/notebooks/cellbrowser_to_vitessce_config_conversion.ipynb index 3ca1e0cf..8535fbac 100644 --- a/docs/notebooks/cellbrowser_to_vitessce_config_conversion.ipynb +++ b/docs/notebooks/cellbrowser_to_vitessce_config_conversion.ipynb @@ -1,217 +1,217 @@ { - "cells": [ - { - "cell_type": "markdown", - "id": "710bc947", - "metadata": {}, - "source": [ - "# Load UCSC Cell Browser project in Vitessce" - ] - }, - { - "cell_type": "markdown", - "id": "fad939f6-bd8b-46f8-8dd1-f0816d8ca5b3", - "metadata": {}, - "source": [ - "This notebook shows you how to use the `convert_cell_browser_project_to_anndata` function, which allows you to take an existing project, published in https://cells.ucsc.edu/ and:\n", - "1. Convert it into the AnnData format that is supported by Vitessce\n", - "2. Save the AnnData object as a Zarr store\n", - "3. Configure Vitessce with the AnnData-Zarr store\n", - "4. Render a Vitessce widget based on the config (step 3) directly in the notebook.\n", - "\n", - "The dataset that you choose to convert needs to be a valid UCSC Cell Browser \"project\", accessible from https://cells.ucsc.edu/, with a configuration available in https://github.com/ucscGenomeBrowser/cellbrowser-confs\n", - "\n", - "The `convert_cell_browser_project_to_anndata` function takes the name of that project as an input. For example, to convert this project, https://cells.ucsc.edu/?ds=adultPancreas, you will neeed to pass `\"adultPancreas\"` as the project name." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "532fea6a-69d4-4cac-8afb-6d334dbe7ca1", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import json\n", - "from os.path import join\n", - "from vitessce import (\n", - " convert_cell_browser_project_to_anndata,\n", - " AnnDataWrapper,\n", - " VitessceConfig,\n", - ")\n", - "from vitessce.data_utils import VAR_CHUNK_SIZE" - ] - }, - { - "cell_type": "markdown", - "id": "a8077cfd-abc2-488d-9d91-83bc29a0bbe9", - "metadata": {}, - "source": [ - "## 1. Convert UCSC Cell Browser project to a format that is supported by Vitessce\n", - "#### Output:\n", - "An AnnData object\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "743c2d61-d98c-4e8d-a821-d5fe0ec2d93b", - "metadata": {}, - "outputs": [], - "source": [ - "## 3. Convert UCSC Cell Browser project to a Vitessce view config" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "6fb3e7dc-baf8-49e9-9d24-264bcd668b49", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Converting CellBrowser config for project adultPancreas to Anndata-Zarr object\n", - "Successfully fetched configuration: https://cells.ucsc.edu/adultPancreas/dataset.json.\n", - "CellBrowser config is valid. Proceeding further with conversion.\n", - "Downloading expression matrix ...\n", - "Successfully downloaded expression matrix https://cells.ucsc.edu/adultPancreas/exprMatrix.tsv.gz.\n", - "Loading expression matrix into Anndata object ...\n", - "This dataset uses the format identifier|symbol for the ad.obs gene names (e.g. “ENSG0123123.3|HOX3”). We are keeping only the symbol.\n", - "Adding cell metadata to Anndata object ...\n", - "Successfully downloaded metadata meta.tsv.\n", - "Successful extraction of the following coordinates and URLS: {'X_tsne': 'tMinusSNE.coords.tsv.gz'}\n", - "Adding X_tsne to Anndata object ...\n", - "X_tsne successfully added.\n", - "Done adding coordinates to the Anndata object.\n", - "Filtering out all non-marker genes from Anndata object ...\n", - "Successfully filtered out all non-marker genes from Anndata object.\n", - "About to write the Anndata object to the Zarr store. The following properties will be saved:\n", - " Obs columns: ['cluster', 'age', 'age_unit', 'Key', 'experiment_name', 'fragAnalyzerRange', 'nCells', 'ng_ul', 'plate_nr', 'sample_recieve_date', 'chip_type', 'c1_chip_id', 'enrichment_method', 'capture_position', 'gene_body_coverage', 'intron_exon_ratio', 'mapped_reads', 'total_reads', 'n_genes']\n", - " Obsm keys: ['X_tsne']\n", - " Var columns: ['gene', 'n_cells']\n", - "obsm X_tsne is an instance of DataFrame, converting it to numpy array.\n" - ] - } - ], - "source": [ - "# Example run, coverting \"adultPancreas\" project:\n", - "adata = convert_cell_browser_project_to_anndata(project_name=\"adultPancreas\", keep_only_marker_genes=True)" - ] - }, - { - "cell_type": "markdown", - "id": "cf3cfcbe-4048-4a60-8988-b8c0eace23e2", - "metadata": {}, - "source": [ - "## 2. Save the AnnData object as a Zarr store" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "id": "8835ab53-2ee3-490e-a68c-c2d8952277a9", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "zarr_filepath = join(\"data\", \"out.adata.zarr\")\n", - "os.makedirs(os.path.dirname(zarr_filepath), exist_ok=True)\n", - "adata.write_zarr(zarr_filepath, chunks=[adata.shape[0], VAR_CHUNK_SIZE])" - ] - }, - { - "cell_type": "markdown", - "id": "d61667b4-dc32-4376-bff1-b4a5bf74140f", - "metadata": {}, - "source": [ - "## 3. Configure Vitessce with the AnnData-Zarr store" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "id": "259c1804-2e67-4a92-bc90-5ba5e3dba7b3", - "metadata": {}, - "outputs": [], - "source": [ - "anndata_wrapper_inst = AnnDataWrapper(\n", - " adata_path=zarr_filepath,\n", - " obs_feature_matrix_path=\"X\",\n", - " obs_embedding_paths=[\"obsm/X_tsne\"],\n", - " obs_embedding_names=[\"t-SNE\"],\n", - " obs_set_paths=[\"obs/cluster\", \"obs/age\"],\n", - " obs_set_names=[\"cluster\", \"age\"],\n", - ")\n", - "vc = VitessceConfig(schema_version=\"1.0.15\", name=\"Vitessce configuration for CellBrowser project adultPancreas\")\n", - "anndata_wrapper_inst.auto_view_config(vc)" - ] - }, - { - "cell_type": "markdown", - "id": "22e7d2fd-2c2e-4ce5-b551-7809cdc6568e", - "metadata": {}, - "source": [ - "## 4. Render the Vitessce widget" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "id": "cb9cb8e3-8ef4-49d9-b0a0-ba2f0fc80637", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "e5878bf30e1f4428a14604731928972d", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "VitessceWidget(config={'version': '1.0.15', 'name': 'Vitessce configuration for CellBrowser project adultPancr…" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "vw = vc.widget()\n", - "vw" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "774b8156-5cc6-4d17-884b-595957366230", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.0" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} + "cells": [ + { + "cell_type": "markdown", + "id": "710bc947", + "metadata": {}, + "source": [ + "# Load UCSC Cell Browser project in Vitessce" + ] + }, + { + "cell_type": "markdown", + "id": "fad939f6-bd8b-46f8-8dd1-f0816d8ca5b3", + "metadata": {}, + "source": [ + "This notebook shows you how to use the `convert_cell_browser_project_to_anndata` function, which allows you to take an existing project, published in https://cells.ucsc.edu/ and:\n", + "1. Convert it into the AnnData format that is supported by Vitessce\n", + "2. Save the AnnData object as a Zarr store\n", + "3. Configure Vitessce with the AnnData-Zarr store\n", + "4. Render a Vitessce widget based on the config (step 3) directly in the notebook.\n", + "\n", + "The dataset that you choose to convert needs to be a valid UCSC Cell Browser \"project\", accessible from https://cells.ucsc.edu/, with a configuration available in https://github.com/ucscGenomeBrowser/cellbrowser-confs\n", + "\n", + "The `convert_cell_browser_project_to_anndata` function takes the name of that project as an input. For example, to convert this project, https://cells.ucsc.edu/?ds=adultPancreas, you will neeed to pass `\"adultPancreas\"` as the project name." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "532fea6a-69d4-4cac-8afb-6d334dbe7ca1", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import json\n", + "from os.path import join\n", + "from vitessce import (\n", + " convert_cell_browser_project_to_anndata,\n", + " AnnDataWrapper,\n", + " VitessceConfig,\n", + ")\n", + "from vitessce.data_utils import VAR_CHUNK_SIZE" + ] + }, + { + "cell_type": "markdown", + "id": "a8077cfd-abc2-488d-9d91-83bc29a0bbe9", + "metadata": {}, + "source": [ + "## 1. Convert UCSC Cell Browser project to a format that is supported by Vitessce\n", + "#### Output:\n", + "An AnnData object\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "743c2d61-d98c-4e8d-a821-d5fe0ec2d93b", + "metadata": {}, + "outputs": [], + "source": [ + "## 3. Convert UCSC Cell Browser project to a Vitessce view config" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "6fb3e7dc-baf8-49e9-9d24-264bcd668b49", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Converting CellBrowser config for project adultPancreas to Anndata-Zarr object\n", + "Successfully fetched configuration: https://cells.ucsc.edu/adultPancreas/dataset.json.\n", + "CellBrowser config is valid. Proceeding further with conversion.\n", + "Downloading expression matrix ...\n", + "Successfully downloaded expression matrix https://cells.ucsc.edu/adultPancreas/exprMatrix.tsv.gz.\n", + "Loading expression matrix into Anndata object ...\n", + "This dataset uses the format identifier|symbol for the ad.obs gene names (e.g. “ENSG0123123.3|HOX3”). We are keeping only the symbol.\n", + "Adding cell metadata to Anndata object ...\n", + "Successfully downloaded metadata meta.tsv.\n", + "Successful extraction of the following coordinates and URLS: {'X_tsne': 'tMinusSNE.coords.tsv.gz'}\n", + "Adding X_tsne to Anndata object ...\n", + "X_tsne successfully added.\n", + "Done adding coordinates to the Anndata object.\n", + "Filtering out all non-marker genes from Anndata object ...\n", + "Successfully filtered out all non-marker genes from Anndata object.\n", + "About to write the Anndata object to the Zarr store. The following properties will be saved:\n", + " Obs columns: ['cluster', 'age', 'age_unit', 'Key', 'experiment_name', 'fragAnalyzerRange', 'nCells', 'ng_ul', 'plate_nr', 'sample_recieve_date', 'chip_type', 'c1_chip_id', 'enrichment_method', 'capture_position', 'gene_body_coverage', 'intron_exon_ratio', 'mapped_reads', 'total_reads', 'n_genes']\n", + " Obsm keys: ['X_tsne']\n", + " Var columns: ['gene', 'n_cells']\n", + "obsm X_tsne is an instance of DataFrame, converting it to numpy array.\n" + ] + } + ], + "source": [ + "# Example run, coverting \"adultPancreas\" project:\n", + "adata = convert_cell_browser_project_to_anndata(project_name=\"adultPancreas\", keep_only_marker_genes=True)" + ] + }, + { + "cell_type": "markdown", + "id": "cf3cfcbe-4048-4a60-8988-b8c0eace23e2", + "metadata": {}, + "source": [ + "## 2. Save the AnnData object as a Zarr store" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "8835ab53-2ee3-490e-a68c-c2d8952277a9", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "zarr_filepath = join(\"data\", \"out.adata.zarr\")\n", + "os.makedirs(os.path.dirname(zarr_filepath), exist_ok=True)\n", + "adata.write_zarr(zarr_filepath, chunks=[adata.shape[0], VAR_CHUNK_SIZE])" + ] + }, + { + "cell_type": "markdown", + "id": "d61667b4-dc32-4376-bff1-b4a5bf74140f", + "metadata": {}, + "source": [ + "## 3. Configure Vitessce with the AnnData-Zarr store" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "259c1804-2e67-4a92-bc90-5ba5e3dba7b3", + "metadata": {}, + "outputs": [], + "source": [ + "anndata_wrapper_inst = AnnDataWrapper(\n", + " base_path=zarr_filepath,\n", + " obs_feature_matrix_path=\"X\",\n", + " obs_embedding_paths=[\"obsm/X_tsne\"],\n", + " obs_embedding_names=[\"t-SNE\"],\n", + " obs_set_paths=[\"obs/cluster\", \"obs/age\"],\n", + " obs_set_names=[\"cluster\", \"age\"],\n", + ")\n", + "vc = VitessceConfig(schema_version=\"1.0.15\", name=\"Vitessce configuration for CellBrowser project adultPancreas\")\n", + "anndata_wrapper_inst.auto_view_config(vc)" + ] + }, + { + "cell_type": "markdown", + "id": "22e7d2fd-2c2e-4ce5-b551-7809cdc6568e", + "metadata": {}, + "source": [ + "## 4. Render the Vitessce widget" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "cb9cb8e3-8ef4-49d9-b0a0-ba2f0fc80637", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "e5878bf30e1f4428a14604731928972d", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VitessceWidget(config={'version': '1.0.15', 'name': 'Vitessce configuration for CellBrowser project adultPancr…" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vw = vc.widget()\n", + "vw" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "774b8156-5cc6-4d17-884b-595957366230", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/docs/notebooks/data_export_files.ipynb b/docs/notebooks/data_export_files.ipynb index d2b17c6e..299cf174 100644 --- a/docs/notebooks/data_export_files.ipynb +++ b/docs/notebooks/data_export_files.ipynb @@ -1,236 +1,236 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "nbsphinx": "hidden" - }, - "source": [ - "# Vitessce Data Preparation Tutorial" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Export data to local files" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 1. Import dependencies\n", - "\n", - "We need to import the classes and functions that we will be using from the corresponding packages." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import json\n", - "from urllib.parse import quote_plus\n", - "from os.path import join, isfile, isdir\n", - "from urllib.request import urlretrieve\n", - "from anndata import read_h5ad\n", - "import scanpy as sc\n", - "\n", - "from vitessce import (\n", - " VitessceWidget,\n", - " VitessceConfig,\n", - " Component as cm,\n", - " CoordinationType as ct,\n", - " AnnDataWrapper,\n", - ")\n", - "from vitessce.data_utils import (\n", - " optimize_adata,\n", - " VAR_CHUNK_SIZE,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 2. Download and process data\n", - "\n", - "For this example, we need to download a dataset from the COVID-19 Cell Atlas https://www.covid19cellatlas.org/index.healthy.html#habib17." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "adata_filepath = join(\"data\", \"habib17.processed.h5ad\")\n", - "if not isfile(adata_filepath):\n", - " os.makedirs(\"data\", exist_ok=True)\n", - " urlretrieve('https://covid19.cog.sanger.ac.uk/habib17.processed.h5ad', adata_filepath)\n", - "\n", - "adata = read_h5ad(adata_filepath)\n", - "top_dispersion = adata.var[\"dispersions_norm\"][\n", - " sorted(\n", - " range(len(adata.var[\"dispersions_norm\"])),\n", - " key=lambda k: adata.var[\"dispersions_norm\"][k],\n", - " )[-51:][0]\n", - "]\n", - "adata.var[\"top_highly_variable\"] = (\n", - " adata.var[\"dispersions_norm\"] > top_dispersion\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "zarr_filepath = join(\"data\", \"habib17.processed.zarr\")\n", - "if not isdir(zarr_filepath):\n", - " adata = optimize_adata(\n", - " adata,\n", - " obs_cols=[\"CellType\"],\n", - " obsm_keys=[\"X_umap\"],\n", - " var_cols=[\"top_highly_variable\"],\n", - " optimize_X=True,\n", - " )\n", - " adata.write_zarr(zarr_filepath, chunks=[adata.shape[0], VAR_CHUNK_SIZE])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 3. Create the Vitessce configuration" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Set up the configuration by adding the views and datasets of interest." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vc = VitessceConfig(schema_version=\"1.0.15\", name='Habib et al', description='COVID-19 Healthy Donor Brain')\n", - "dataset = vc.add_dataset(name='Brain').add_object(AnnDataWrapper(\n", - " adata_path=zarr_filepath,\n", - " obs_embedding_paths=[\"obsm/X_umap\"],\n", - " obs_embedding_names=[\"UMAP\"],\n", - " obs_set_paths=[\"obs/CellType\"],\n", - " obs_set_names=[\"Cell Type\"],\n", - " obs_feature_matrix_path=\"X\",\n", - " feature_filter_path=\"var/top_highly_variable\"\n", - "))\n", - "scatterplot = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"X_umap\")\n", - "cell_sets = vc.add_view(cm.OBS_SETS, dataset=dataset)\n", - "genes = vc.add_view(cm.FEATURE_LIST, dataset=dataset)\n", - "heatmap = vc.add_view(cm.HEATMAP, dataset=dataset)\n", - "vc.layout((scatterplot | (cell_sets / genes)) / heatmap);" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 4. Export files to a local directory\n", - "\n", - "The `.export(to='files')` method on the view config instance will export files to the specified directory `out_dir`. The `base_url` parameter is required so that the file URLs in the view config point to the location where you ultimately intend to serve the files." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "config_dict = vc.export(to='files', base_url='http://localhost:3000', out_dir='./test')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 5. Serve the files" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now that the files have been saved to the `./test` directory, they can be served by any static web server.\n", - "\n", - "If you would like to serve the files locally, we recommend [http-server](https://github.com/http-party/http-server) which can be installed with NPM or Homebrew:\n", - "```sh\n", - "cd test\n", - "http-server ./ --cors -p 3000\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 6. View on vitessce.io\n", - "\n", - "The returned view config dict can be converted to a URL, and if the files are served on the internet (rather than locally), this URL can be used to share the interactive visualizations with colleagues." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vitessce_url = \"http://vitessce.io/?url=data:,\" + quote_plus(json.dumps(config_dict))\n", - "import webbrowser\n", - "webbrowser.open(vitessce_url)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.0" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "nbsphinx": "hidden" + }, + "source": [ + "# Vitessce Data Preparation Tutorial" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Export data to local files" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Import dependencies\n", + "\n", + "We need to import the classes and functions that we will be using from the corresponding packages." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import json\n", + "from urllib.parse import quote_plus\n", + "from os.path import join, isfile, isdir\n", + "from urllib.request import urlretrieve\n", + "from anndata import read_h5ad\n", + "import scanpy as sc\n", + "\n", + "from vitessce import (\n", + " VitessceWidget,\n", + " VitessceConfig,\n", + " Component as cm,\n", + " CoordinationType as ct,\n", + " AnnDataWrapper,\n", + ")\n", + "from vitessce.data_utils import (\n", + " optimize_adata,\n", + " VAR_CHUNK_SIZE,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Download and process data\n", + "\n", + "For this example, we need to download a dataset from the COVID-19 Cell Atlas https://www.covid19cellatlas.org/index.healthy.html#habib17." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "adata_filepath = join(\"data\", \"habib17.processed.h5ad\")\n", + "if not isfile(adata_filepath):\n", + " os.makedirs(\"data\", exist_ok=True)\n", + " urlretrieve('https://covid19.cog.sanger.ac.uk/habib17.processed.h5ad', adata_filepath)\n", + "\n", + "adata = read_h5ad(adata_filepath)\n", + "top_dispersion = adata.var[\"dispersions_norm\"][\n", + " sorted(\n", + " range(len(adata.var[\"dispersions_norm\"])),\n", + " key=lambda k: adata.var[\"dispersions_norm\"][k],\n", + " )[-51:][0]\n", + "]\n", + "adata.var[\"top_highly_variable\"] = (\n", + " adata.var[\"dispersions_norm\"] > top_dispersion\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "zarr_filepath = join(\"data\", \"habib17.processed.zarr\")\n", + "if not isdir(zarr_filepath):\n", + " adata = optimize_adata(\n", + " adata,\n", + " obs_cols=[\"CellType\"],\n", + " obsm_keys=[\"X_umap\"],\n", + " var_cols=[\"top_highly_variable\"],\n", + " optimize_X=True,\n", + " )\n", + " adata.write_zarr(zarr_filepath, chunks=[adata.shape[0], VAR_CHUNK_SIZE])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Create the Vitessce configuration" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Set up the configuration by adding the views and datasets of interest." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vc = VitessceConfig(schema_version=\"1.0.15\", name='Habib et al', description='COVID-19 Healthy Donor Brain')\n", + "dataset = vc.add_dataset(name='Brain').add_object(AnnDataWrapper(\n", + " base_path=zarr_filepath,\n", + " obs_embedding_paths=[\"obsm/X_umap\"],\n", + " obs_embedding_names=[\"UMAP\"],\n", + " obs_set_paths=[\"obs/CellType\"],\n", + " obs_set_names=[\"Cell Type\"],\n", + " obs_feature_matrix_path=\"X\",\n", + " feature_filter_path=\"var/top_highly_variable\"\n", + "))\n", + "scatterplot = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"X_umap\")\n", + "cell_sets = vc.add_view(cm.OBS_SETS, dataset=dataset)\n", + "genes = vc.add_view(cm.FEATURE_LIST, dataset=dataset)\n", + "heatmap = vc.add_view(cm.HEATMAP, dataset=dataset)\n", + "vc.layout((scatterplot | (cell_sets / genes)) / heatmap);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Export files to a local directory\n", + "\n", + "The `.export(to='files')` method on the view config instance will export files to the specified directory `out_dir`. The `base_url` parameter is required so that the file URLs in the view config point to the location where you ultimately intend to serve the files." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "config_dict = vc.export(to='files', base_url='http://localhost:3000', out_dir='./test')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Serve the files" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now that the files have been saved to the `./test` directory, they can be served by any static web server.\n", + "\n", + "If you would like to serve the files locally, we recommend [http-server](https://github.com/http-party/http-server) which can be installed with NPM or Homebrew:\n", + "```sh\n", + "cd test\n", + "http-server ./ --cors -p 3000\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 6. View on vitessce.io\n", + "\n", + "The returned view config dict can be converted to a URL, and if the files are served on the internet (rather than locally), this URL can be used to share the interactive visualizations with colleagues." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vitessce_url = \"http://vitessce.io/?url=data:,\" + quote_plus(json.dumps(config_dict))\n", + "import webbrowser\n", + "webbrowser.open(vitessce_url)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/docs/notebooks/data_export_s3.ipynb b/docs/notebooks/data_export_s3.ipynb index c84a0e43..3ae539c4 100644 --- a/docs/notebooks/data_export_s3.ipynb +++ b/docs/notebooks/data_export_s3.ipynb @@ -1,230 +1,230 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "nbsphinx": "hidden" - }, - "source": [ - "# Vitessce Data Preparation Tutorial" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Export data to AWS S3" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 1. Import dependencies\n", - "\n", - "We need to import the classes and functions that we will be using from the corresponding packages." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import boto3\n", - "import json\n", - "from urllib.parse import quote_plus\n", - "from os.path import join, isfile, isdir\n", - "from urllib.request import urlretrieve\n", - "from anndata import read_h5ad\n", - "import scanpy as sc\n", - "\n", - "from vitessce import (\n", - " VitessceWidget,\n", - " VitessceConfig,\n", - " Component as cm,\n", - " CoordinationType as ct,\n", - " AnnDataWrapper,\n", - ")\n", - "from vitessce.data_utils import (\n", - " optimize_adata,\n", - " VAR_CHUNK_SIZE,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 2. Download and process data\n", - "\n", - "For this example, we need to download a dataset from the COVID-19 Cell Atlas https://www.covid19cellatlas.org/index.healthy.html#habib17." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "adata_filepath = join(\"data\", \"habib17.processed.h5ad\")\n", - "if not isfile(adata_filepath):\n", - " os.makedirs(\"data\", exist_ok=True)\n", - " urlretrieve('https://covid19.cog.sanger.ac.uk/habib17.processed.h5ad', adata_filepath)\n", - "\n", - "adata = read_h5ad(adata_filepath)\n", - "top_dispersion = adata.var[\"dispersions_norm\"][\n", - " sorted(\n", - " range(len(adata.var[\"dispersions_norm\"])),\n", - " key=lambda k: adata.var[\"dispersions_norm\"][k],\n", - " )[-51:][0]\n", - "]\n", - "adata.var[\"top_highly_variable\"] = (\n", - " adata.var[\"dispersions_norm\"] > top_dispersion\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "zarr_filepath = join(\"data\", \"habib17.processed.zarr\")\n", - "if not isdir(zarr_filepath):\n", - " adata = optimize_adata(\n", - " adata,\n", - " obs_cols=[\"CellType\"],\n", - " obsm_keys=[\"X_umap\"],\n", - " var_cols=[\"top_highly_variable\"],\n", - " optimize_X=True,\n", - " )\n", - " adata.write_zarr(zarr_filepath, chunks=[adata.shape[0], VAR_CHUNK_SIZE])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 3. Create the Vitessce configuration" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Set up the configuration by adding the views and datasets of interest." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vc = VitessceConfig(schema_version=\"1.0.15\", name='Habib et al', description='COVID-19 Healthy Donor Brain')\n", - "dataset = vc.add_dataset(name='Brain').add_object(AnnDataWrapper(\n", - " adata_path=zarr_filepath,\n", - " obs_embedding_paths=[\"obsm/X_umap\"],\n", - " obs_embedding_names=[\"UMAP\"],\n", - " obs_set_paths=[\"obs/CellType\"],\n", - " obs_set_names=[\"Cell Type\"],\n", - " obs_feature_matrix_path=\"X\",\n", - " feature_filter_path=\"var/top_highly_variable\"\n", - "))\n", - "scatterplot = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"UMAP\")\n", - "cell_sets = vc.add_view(cm.OBS_SETS, dataset=dataset)\n", - "genes = vc.add_view(cm.FEATURE_LIST, dataset=dataset)\n", - "heatmap = vc.add_view(cm.HEATMAP, dataset=dataset)\n", - "vc.layout((scatterplot | (cell_sets / genes)) / heatmap);" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 4. Create a `boto3` resource with S3 credentials" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "s3 = boto3.resource(\n", - " service_name='s3',\n", - " aws_access_key_id=os.environ['VITESSCE_S3_ACCESS_KEY_ID'],\n", - " aws_secret_access_key=os.environ['VITESSCE_S3_SECRET_ACCESS_KEY'],\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 5. Upload files to S3\n", - "\n", - "The `.export(to='S3')` method on the view config instance will upload all data objects to the specified bucket. Then, the processed view config will be returned as a `dict`, with the file URLs filled in, pointing to the S3 bucket files. For more information about configuring the S3 bucket so that files are accessible over the internet, visit the \"Hosting Data\" page of our core documentation site." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "config_dict = vc.export(to='S3', s3=s3, bucket_name='vitessce-export-examples', prefix='test')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 6. View on vitessce.io\n", - "\n", - "The returned view config dict can be converted to a URL, and can be used to share the interactive visualizations with colleagues." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vitessce_url = \"http://vitessce.io/?url=data:,\" + quote_plus(json.dumps(config_dict))\n", - "import webbrowser\n", - "webbrowser.open(vitessce_url)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.0" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "nbsphinx": "hidden" + }, + "source": [ + "# Vitessce Data Preparation Tutorial" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Export data to AWS S3" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Import dependencies\n", + "\n", + "We need to import the classes and functions that we will be using from the corresponding packages." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import boto3\n", + "import json\n", + "from urllib.parse import quote_plus\n", + "from os.path import join, isfile, isdir\n", + "from urllib.request import urlretrieve\n", + "from anndata import read_h5ad\n", + "import scanpy as sc\n", + "\n", + "from vitessce import (\n", + " VitessceWidget,\n", + " VitessceConfig,\n", + " Component as cm,\n", + " CoordinationType as ct,\n", + " AnnDataWrapper,\n", + ")\n", + "from vitessce.data_utils import (\n", + " optimize_adata,\n", + " VAR_CHUNK_SIZE,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Download and process data\n", + "\n", + "For this example, we need to download a dataset from the COVID-19 Cell Atlas https://www.covid19cellatlas.org/index.healthy.html#habib17." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "adata_filepath = join(\"data\", \"habib17.processed.h5ad\")\n", + "if not isfile(adata_filepath):\n", + " os.makedirs(\"data\", exist_ok=True)\n", + " urlretrieve('https://covid19.cog.sanger.ac.uk/habib17.processed.h5ad', adata_filepath)\n", + "\n", + "adata = read_h5ad(adata_filepath)\n", + "top_dispersion = adata.var[\"dispersions_norm\"][\n", + " sorted(\n", + " range(len(adata.var[\"dispersions_norm\"])),\n", + " key=lambda k: adata.var[\"dispersions_norm\"][k],\n", + " )[-51:][0]\n", + "]\n", + "adata.var[\"top_highly_variable\"] = (\n", + " adata.var[\"dispersions_norm\"] > top_dispersion\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "zarr_filepath = join(\"data\", \"habib17.processed.zarr\")\n", + "if not isdir(zarr_filepath):\n", + " adata = optimize_adata(\n", + " adata,\n", + " obs_cols=[\"CellType\"],\n", + " obsm_keys=[\"X_umap\"],\n", + " var_cols=[\"top_highly_variable\"],\n", + " optimize_X=True,\n", + " )\n", + " adata.write_zarr(zarr_filepath, chunks=[adata.shape[0], VAR_CHUNK_SIZE])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Create the Vitessce configuration" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Set up the configuration by adding the views and datasets of interest." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vc = VitessceConfig(schema_version=\"1.0.15\", name='Habib et al', description='COVID-19 Healthy Donor Brain')\n", + "dataset = vc.add_dataset(name='Brain').add_object(AnnDataWrapper(\n", + " base_path=zarr_filepath,\n", + " obs_embedding_paths=[\"obsm/X_umap\"],\n", + " obs_embedding_names=[\"UMAP\"],\n", + " obs_set_paths=[\"obs/CellType\"],\n", + " obs_set_names=[\"Cell Type\"],\n", + " obs_feature_matrix_path=\"X\",\n", + " feature_filter_path=\"var/top_highly_variable\"\n", + "))\n", + "scatterplot = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"UMAP\")\n", + "cell_sets = vc.add_view(cm.OBS_SETS, dataset=dataset)\n", + "genes = vc.add_view(cm.FEATURE_LIST, dataset=dataset)\n", + "heatmap = vc.add_view(cm.HEATMAP, dataset=dataset)\n", + "vc.layout((scatterplot | (cell_sets / genes)) / heatmap);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Create a `boto3` resource with S3 credentials" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "s3 = boto3.resource(\n", + " service_name='s3',\n", + " aws_access_key_id=os.environ['VITESSCE_S3_ACCESS_KEY_ID'],\n", + " aws_secret_access_key=os.environ['VITESSCE_S3_SECRET_ACCESS_KEY'],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Upload files to S3\n", + "\n", + "The `.export(to='S3')` method on the view config instance will upload all data objects to the specified bucket. Then, the processed view config will be returned as a `dict`, with the file URLs filled in, pointing to the S3 bucket files. For more information about configuring the S3 bucket so that files are accessible over the internet, visit the \"Hosting Data\" page of our core documentation site." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "config_dict = vc.export(to='S3', s3=s3, bucket_name='vitessce-export-examples', prefix='test')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 6. View on vitessce.io\n", + "\n", + "The returned view config dict can be converted to a URL, and can be used to share the interactive visualizations with colleagues." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vitessce_url = \"http://vitessce.io/?url=data:,\" + quote_plus(json.dumps(config_dict))\n", + "import webbrowser\n", + "webbrowser.open(vitessce_url)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/docs/notebooks/widget_brain.ipynb b/docs/notebooks/widget_brain.ipynb index 759bd928..340e7de7 100644 --- a/docs/notebooks/widget_brain.ipynb +++ b/docs/notebooks/widget_brain.ipynb @@ -1,295 +1,295 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "nbsphinx": "hidden" - }, - "source": [ - "# Vitessce Widget Tutorial" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Visualization of single-cell RNA seq data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 1. Import dependencies\n", - "\n", - "We need to import the classes and functions that we will be using from the corresponding packages." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "from os.path import join, isfile, isdir\n", - "from urllib.request import urlretrieve\n", - "from anndata import read_h5ad\n", - "import scanpy as sc\n", - "\n", - "from vitessce import (\n", - " VitessceConfig,\n", - " Component as cm,\n", - " CoordinationType as ct,\n", - " AnnDataWrapper,\n", - ")\n", - "from vitessce.data_utils import (\n", - " optimize_adata,\n", - " VAR_CHUNK_SIZE,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 2. Download the data\n", - "\n", - "For this example, we need to download a dataset from the COVID-19 Cell Atlas https://www.covid19cellatlas.org/index.healthy.html#habib17." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "adata_filepath = join(\"data\", \"habib17.processed.h5ad\")\n", - "if not isfile(adata_filepath):\n", - " os.makedirs(\"data\", exist_ok=True)\n", - " urlretrieve('https://covid19.cog.sanger.ac.uk/habib17.processed.h5ad', adata_filepath)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 3. Load the data\n", - "\n", - "Note: this function may print a `FutureWarning`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "adata = read_h5ad(adata_filepath)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "tags": [] - }, - "source": [ - "## 3.1. Preprocess the Data For Visualization\n", - "\n", - "This dataset contains 25,587 genes. We prepare to visualize the top 50 highly variable genes for the heatmap as ranked by dispersion norm, although one may use any boolean array filter for the heatmap." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "top_dispersion = adata.var[\"dispersions_norm\"][\n", - " sorted(\n", - " range(len(adata.var[\"dispersions_norm\"])),\n", - " key=lambda k: adata.var[\"dispersions_norm\"][k],\n", - " )[-51:][0]\n", - "]\n", - "adata.var[\"top_highly_variable\"] = (\n", - " adata.var[\"dispersions_norm\"] > top_dispersion\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 3.2 Save the Data to Zarr store\n", - "\n", - "We want to convert the original `h5ad` file to a [Zarr](https://zarr.readthedocs.io/en/stable/) store, which Vitessce is able to load. We can use the `optimize_adata` function to ensure that all arrays and dataframe columns that we intend to use in our visualization are in the optimal format to be loaded by Vitessce. This function will cast arrays to numerical data types that take up less space (as long as the values allow). Note: unused arrays and columns (i.e., not specified in any of the parameters to `optimize_adata`) will not be copied into the new AnnData object." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "zarr_filepath = join(\"data\", \"habib17.processed.zarr\")\n", - "if not isdir(zarr_filepath):\n", - " adata = optimize_adata(\n", - " adata,\n", - " obs_cols=[\"CellType\"],\n", - " obsm_keys=[\"X_umap\"],\n", - " optimize_X=True,\n", - " var_cols=[\"top_highly_variable\"],\n", - " )\n", - " adata.write_zarr(zarr_filepath, chunks=[adata.shape[0], VAR_CHUNK_SIZE])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 4. Create the Vitessce widget configuration\n", - "\n", - "Vitessce needs to know which pieces of data we are interested in visualizing, the visualization types we would like to use, and how we want to coordinate (or link) the views." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4.1. Instantiate a `VitessceConfig` object\n", - "\n", - "Use the `VitessceConfig` constructor to create an instance." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vc = VitessceConfig(schema_version=\"1.0.15\", name='Habib et al', description='COVID-19 Healthy Donor Brain')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4.2. Add a dataset to the `VitessceConfig` instance\n", - "\n", - "In Vitessce, a dataset is a container for one file per data type. The `.add_dataset(name)` method on the `vc` instance sets up and returns a new dataset instance.\n", - "\n", - "Then, we can call the dataset's `.add_object(wrapper_object)` method to attach a \"data wrapper\" instance to our new dataset. For example, the `AnnDataWrapper` helps to configure AnnData Zarr stores for use in the Vitessce configuration.\n", - "\n", - "Dataset wrapper classes may require additional parameters to resolve ambiguities. For instance, `AnnData` objects may store multiple clusterings or cell type annotation columns in the `adata.obs` dataframe. We can use the parameter `obs_set_paths` to tell Vitessce that certain columns of the `obs` dataframe correspond to cell type annotations or cell clusterings." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dataset = vc.add_dataset(name='Brain').add_object(AnnDataWrapper(\n", - " adata_path=zarr_filepath,\n", - " obs_embedding_paths=[\"obsm/X_umap\"],\n", - " obs_embedding_names=[\"UMAP\"],\n", - " obs_set_paths=[\"obs/CellType\"],\n", - " obs_set_names=[\"Cell Type\"],\n", - " obs_feature_matrix_path=\"X\",\n", - " initial_feature_filter_path=\"var/top_highly_variable\"\n", - " )\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4.3. Add visualizations to the `VitessceConfig` instance\n", - "\n", - "Now that we have added a dataset, we can configure visualizations. The `.add_view` method adds a view (i.e. visualization or controller component) to the configuration.\n", - "\n", - "The `Component` enum class (which we have imported as `cm` here) can be used to fill in the `component_type` parameter.\n", - "\n", - "For convenience, the `SCATTERPLOT` component type takes the extra `mapping` keyword argument, which specifies which embedding should be used for mapping cells to (x,y) points on the plot." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "scatterplot = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"UMAP\")\n", - "cell_sets = vc.add_view(cm.OBS_SETS, dataset=dataset)\n", - "genes = vc.add_view(cm.FEATURE_LIST, dataset=dataset)\n", - "heatmap = vc.add_view(cm.HEATMAP, dataset=dataset)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4.4. Define the visualization layout\n", - "\n", - "The `vc.layout(view_concat)` method allows us to specify how our views will be arranged in the layout grid in the widget. The `|` and `/` characters are magic syntax for `hconcat(v1, v2)` and `vconcat(v1, v2)`, respectively." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vc.layout((scatterplot | cell_sets) / (heatmap | genes));" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 5. Create the widget\n", - "\n", - "The `vc.widget()` method returns the configured widget instance." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vw = vc.widget()\n", - "vw" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.0" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "nbsphinx": "hidden" + }, + "source": [ + "# Vitessce Widget Tutorial" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Visualization of single-cell RNA seq data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Import dependencies\n", + "\n", + "We need to import the classes and functions that we will be using from the corresponding packages." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from os.path import join, isfile, isdir\n", + "from urllib.request import urlretrieve\n", + "from anndata import read_h5ad\n", + "import scanpy as sc\n", + "\n", + "from vitessce import (\n", + " VitessceConfig,\n", + " Component as cm,\n", + " CoordinationType as ct,\n", + " AnnDataWrapper,\n", + ")\n", + "from vitessce.data_utils import (\n", + " optimize_adata,\n", + " VAR_CHUNK_SIZE,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Download the data\n", + "\n", + "For this example, we need to download a dataset from the COVID-19 Cell Atlas https://www.covid19cellatlas.org/index.healthy.html#habib17." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "adata_filepath = join(\"data\", \"habib17.processed.h5ad\")\n", + "if not isfile(adata_filepath):\n", + " os.makedirs(\"data\", exist_ok=True)\n", + " urlretrieve('https://covid19.cog.sanger.ac.uk/habib17.processed.h5ad', adata_filepath)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Load the data\n", + "\n", + "Note: this function may print a `FutureWarning`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "adata = read_h5ad(adata_filepath)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "## 3.1. Preprocess the Data For Visualization\n", + "\n", + "This dataset contains 25,587 genes. We prepare to visualize the top 50 highly variable genes for the heatmap as ranked by dispersion norm, although one may use any boolean array filter for the heatmap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "top_dispersion = adata.var[\"dispersions_norm\"][\n", + " sorted(\n", + " range(len(adata.var[\"dispersions_norm\"])),\n", + " key=lambda k: adata.var[\"dispersions_norm\"][k],\n", + " )[-51:][0]\n", + "]\n", + "adata.var[\"top_highly_variable\"] = (\n", + " adata.var[\"dispersions_norm\"] > top_dispersion\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3.2 Save the Data to Zarr store\n", + "\n", + "We want to convert the original `h5ad` file to a [Zarr](https://zarr.readthedocs.io/en/stable/) store, which Vitessce is able to load. We can use the `optimize_adata` function to ensure that all arrays and dataframe columns that we intend to use in our visualization are in the optimal format to be loaded by Vitessce. This function will cast arrays to numerical data types that take up less space (as long as the values allow). Note: unused arrays and columns (i.e., not specified in any of the parameters to `optimize_adata`) will not be copied into the new AnnData object." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "zarr_filepath = join(\"data\", \"habib17.processed.zarr\")\n", + "if not isdir(zarr_filepath):\n", + " adata = optimize_adata(\n", + " adata,\n", + " obs_cols=[\"CellType\"],\n", + " obsm_keys=[\"X_umap\"],\n", + " optimize_X=True,\n", + " var_cols=[\"top_highly_variable\"],\n", + " )\n", + " adata.write_zarr(zarr_filepath, chunks=[adata.shape[0], VAR_CHUNK_SIZE])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Create the Vitessce widget configuration\n", + "\n", + "Vitessce needs to know which pieces of data we are interested in visualizing, the visualization types we would like to use, and how we want to coordinate (or link) the views." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4.1. Instantiate a `VitessceConfig` object\n", + "\n", + "Use the `VitessceConfig` constructor to create an instance." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vc = VitessceConfig(schema_version=\"1.0.15\", name='Habib et al', description='COVID-19 Healthy Donor Brain')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4.2. Add a dataset to the `VitessceConfig` instance\n", + "\n", + "In Vitessce, a dataset is a container for one file per data type. The `.add_dataset(name)` method on the `vc` instance sets up and returns a new dataset instance.\n", + "\n", + "Then, we can call the dataset's `.add_object(wrapper_object)` method to attach a \"data wrapper\" instance to our new dataset. For example, the `AnnDataWrapper` helps to configure AnnData Zarr stores for use in the Vitessce configuration.\n", + "\n", + "Dataset wrapper classes may require additional parameters to resolve ambiguities. For instance, `AnnData` objects may store multiple clusterings or cell type annotation columns in the `adata.obs` dataframe. We can use the parameter `obs_set_paths` to tell Vitessce that certain columns of the `obs` dataframe correspond to cell type annotations or cell clusterings." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataset = vc.add_dataset(name='Brain').add_object(AnnDataWrapper(\n", + " base_path=zarr_filepath,\n", + " obs_embedding_paths=[\"obsm/X_umap\"],\n", + " obs_embedding_names=[\"UMAP\"],\n", + " obs_set_paths=[\"obs/CellType\"],\n", + " obs_set_names=[\"Cell Type\"],\n", + " obs_feature_matrix_path=\"X\",\n", + " initial_feature_filter_path=\"var/top_highly_variable\"\n", + " )\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4.3. Add visualizations to the `VitessceConfig` instance\n", + "\n", + "Now that we have added a dataset, we can configure visualizations. The `.add_view` method adds a view (i.e. visualization or controller component) to the configuration.\n", + "\n", + "The `Component` enum class (which we have imported as `cm` here) can be used to fill in the `component_type` parameter.\n", + "\n", + "For convenience, the `SCATTERPLOT` component type takes the extra `mapping` keyword argument, which specifies which embedding should be used for mapping cells to (x,y) points on the plot." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "scatterplot = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"UMAP\")\n", + "cell_sets = vc.add_view(cm.OBS_SETS, dataset=dataset)\n", + "genes = vc.add_view(cm.FEATURE_LIST, dataset=dataset)\n", + "heatmap = vc.add_view(cm.HEATMAP, dataset=dataset)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4.4. Define the visualization layout\n", + "\n", + "The `vc.layout(view_concat)` method allows us to specify how our views will be arranged in the layout grid in the widget. The `|` and `/` characters are magic syntax for `hconcat(v1, v2)` and `vconcat(v1, v2)`, respectively." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vc.layout((scatterplot | cell_sets) / (heatmap | genes));" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Create the widget\n", + "\n", + "The `vc.widget()` method returns the configured widget instance." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vw = vc.widget()\n", + "vw" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/docs/notebooks/widget_brain_with_base_dir.ipynb b/docs/notebooks/widget_brain_with_base_dir.ipynb index 015a3e67..269dd2e6 100644 --- a/docs/notebooks/widget_brain_with_base_dir.ipynb +++ b/docs/notebooks/widget_brain_with_base_dir.ipynb @@ -1,329 +1,329 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "nbsphinx": "hidden" - }, - "source": [ - "# Vitessce Widget Tutorial" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Configure relative to a base_dir" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 1. Import dependencies\n", - "\n", - "We need to import the classes and functions that we will be using from the corresponding packages." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "from os.path import join, isfile, isdir\n", - "from urllib.request import urlretrieve\n", - "from anndata import read_h5ad\n", - "\n", - "from vitessce import (\n", - " VitessceConfig,\n", - " Component as cm,\n", - " CoordinationType as ct,\n", - " AnnDataWrapper,\n", - " BASE_URL_PLACEHOLDER,\n", - ")\n", - "from vitessce.data_utils import (\n", - " optimize_adata,\n", - " VAR_CHUNK_SIZE,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 2. Define a `base_dir`\n", - "\n", - "We will define a `base_dir` inside which our data will live. We will provide this to `VitessceConfig` in order to construct a configuration that contains URL paths relative to this directory." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "BASE_DIR = \"data\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 3. Download the data\n", - "\n", - "For this example, we need to download a dataset from the COVID-19 Cell Atlas https://www.covid19cellatlas.org/index.healthy.html#habib17." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "adata_relative_filepath = \"habib17.processed.h5ad\" # Relative to BASE_DIR\n", - "adata_filepath = join(BASE_DIR, adata_relative_filepath)\n", - "if not isfile(adata_filepath):\n", - " os.makedirs(BASE_DIR, exist_ok=True)\n", - " urlretrieve('https://covid19.cog.sanger.ac.uk/habib17.processed.h5ad', adata_filepath)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 4. Load the data\n", - "\n", - "Note: this function may print a `FutureWarning`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "adata = read_h5ad(adata_filepath)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "tags": [] - }, - "source": [ - "## 4.1. Preprocess the Data For Visualization\n", - "\n", - "This dataset contains 25,587 genes. We prepare to visualize the top 50 highly variable genes for the heatmap as ranked by dispersion norm, although one may use any boolean array filter for the heatmap." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "top_dispersion = adata.var[\"dispersions_norm\"][\n", - " sorted(\n", - " range(len(adata.var[\"dispersions_norm\"])),\n", - " key=lambda k: adata.var[\"dispersions_norm\"][k],\n", - " )[-51:][0]\n", - "]\n", - "adata.var[\"top_highly_variable\"] = (\n", - " adata.var[\"dispersions_norm\"] > top_dispersion\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 4.2 Save the Data to Zarr store\n", - "\n", - "We want to convert the original `h5ad` file to a [Zarr](https://zarr.readthedocs.io/en/stable/) store, which Vitessce is able to load. We can use the `optimize_adata` function to ensure that all arrays and dataframe columns that we intend to use in our visualization are in the optimal format to be loaded by Vitessce. This function will cast arrays to numerical data types that take up less space (as long as the values allow). Note: unused arrays and columns (i.e., not specified in any of the parameters to `optimize_adata`) will not be copied into the new AnnData object." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "zarr_relative_filepath = \"habib17.processed.zarr\" # Relative to BASE_DIR\n", - "zarr_filepath = join(BASE_DIR, zarr_relative_filepath)\n", - "if not isdir(zarr_filepath):\n", - " adata = optimize_adata(\n", - " adata,\n", - " obs_cols=[\"CellType\"],\n", - " obsm_keys=[\"X_umap\"],\n", - " optimize_X=True,\n", - " var_cols=[\"top_highly_variable\"],\n", - " )\n", - " adata.write_zarr(zarr_filepath, chunks=[adata.shape[0], VAR_CHUNK_SIZE])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 5. Create the Vitessce widget configuration\n", - "\n", - "Vitessce needs to know which pieces of data we are interested in visualizing, the visualization types we would like to use, and how we want to coordinate (or link) the views." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 5.1. Instantiate a `VitessceConfig` object\n", - "\n", - "Use the `VitessceConfig` constructor to create an instance. In this case, we want to construct our configuration using local data that is relative to a particular directory, so we provide the `base_dir` parameter.\n", - "\n", - "Note: This `base_dir` parameter is optional. When it is omitted, local data paths are assumed to be relative to the current working directory." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vc = VitessceConfig(schema_version=\"1.0.15\", name='Habib et al', base_dir=BASE_DIR)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 5.2. Add a dataset to the `VitessceConfig` instance\n", - "\n", - "In Vitessce, a dataset is a container for one file per data type. The `.add_dataset(name)` method on the `vc` instance sets up and returns a new dataset instance.\n", - "\n", - "Then, we can call the dataset's `.add_object(wrapper_object)` method to attach a \"data wrapper\" instance to our new dataset. For example, the `AnnDataWrapper` helps to configure AnnData Zarr stores for use in the Vitessce configuration.\n", - "\n", - "Dataset wrapper classes may require additional parameters to resolve ambiguities. For instance, `AnnData` objects may store multiple clusterings or cell type annotation columns in the `adata.obs` dataframe. We can use the parameter `obs_set_paths` to tell Vitessce that certain columns of the `obs` dataframe correspond to cell type annotations or cell clusterings." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dataset = vc.add_dataset(name='Brain').add_object(AnnDataWrapper(\n", - " adata_path=zarr_relative_filepath, # Relative to BASE_DIR (because we specified base_dir in the VitessceConfig constructor)\n", - " obs_embedding_paths=[\"obsm/X_umap\"],\n", - " obs_embedding_names=[\"UMAP\"],\n", - " obs_set_paths=[\"obs/CellType\"],\n", - " obs_set_names=[\"Cell Type\"],\n", - " obs_feature_matrix_path=\"X\",\n", - " initial_feature_filter_path=\"var/top_highly_variable\"\n", - " )\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 5.3. Add visualizations to the `VitessceConfig` instance\n", - "\n", - "Now that we have added a dataset, we can configure visualizations. The `.add_view` method adds a view (i.e. visualization or controller component) to the configuration.\n", - "\n", - "The `Component` enum class (which we have imported as `cm` here) can be used to fill in the `component_type` parameter.\n", - "\n", - "For convenience, the `SCATTERPLOT` component type takes the extra `mapping` keyword argument, which specifies which embedding should be used for mapping cells to (x,y) points on the plot." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "scatterplot = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"UMAP\")\n", - "cell_sets = vc.add_view(cm.OBS_SETS, dataset=dataset)\n", - "genes = vc.add_view(cm.FEATURE_LIST, dataset=dataset)\n", - "heatmap = vc.add_view(cm.HEATMAP, dataset=dataset)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 5.4. Define the visualization layout\n", - "\n", - "The `vc.layout(view_concat)` method allows us to specify how our views will be arranged in the layout grid in the widget. The `|` and `/` characters are magic syntax for `hconcat(v1, v2)` and `vconcat(v1, v2)`, respectively." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vc.layout((scatterplot | cell_sets) / (heatmap | genes));" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 6. Create the widget\n", - "\n", - "The `vc.widget()` method returns the configured widget instance." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vw = vc.widget()\n", - "vw" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 7. Check the URLs in the configuration\n", - "\n", - "We can check that the data URLs in the configuration respected the specified `base_dir`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "config_dict = vc.to_dict(base_url=BASE_URL_PLACEHOLDER)\n", - "config_dict" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.0" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "nbsphinx": "hidden" + }, + "source": [ + "# Vitessce Widget Tutorial" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Configure relative to a base_dir" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Import dependencies\n", + "\n", + "We need to import the classes and functions that we will be using from the corresponding packages." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from os.path import join, isfile, isdir\n", + "from urllib.request import urlretrieve\n", + "from anndata import read_h5ad\n", + "\n", + "from vitessce import (\n", + " VitessceConfig,\n", + " Component as cm,\n", + " CoordinationType as ct,\n", + " AnnDataWrapper,\n", + " BASE_URL_PLACEHOLDER,\n", + ")\n", + "from vitessce.data_utils import (\n", + " optimize_adata,\n", + " VAR_CHUNK_SIZE,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Define a `base_dir`\n", + "\n", + "We will define a `base_dir` inside which our data will live. We will provide this to `VitessceConfig` in order to construct a configuration that contains URL paths relative to this directory." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "BASE_DIR = \"data\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Download the data\n", + "\n", + "For this example, we need to download a dataset from the COVID-19 Cell Atlas https://www.covid19cellatlas.org/index.healthy.html#habib17." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "adata_relative_filepath = \"habib17.processed.h5ad\" # Relative to BASE_DIR\n", + "adata_filepath = join(BASE_DIR, adata_relative_filepath)\n", + "if not isfile(adata_filepath):\n", + " os.makedirs(BASE_DIR, exist_ok=True)\n", + " urlretrieve('https://covid19.cog.sanger.ac.uk/habib17.processed.h5ad', adata_filepath)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Load the data\n", + "\n", + "Note: this function may print a `FutureWarning`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "adata = read_h5ad(adata_filepath)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "## 4.1. Preprocess the Data For Visualization\n", + "\n", + "This dataset contains 25,587 genes. We prepare to visualize the top 50 highly variable genes for the heatmap as ranked by dispersion norm, although one may use any boolean array filter for the heatmap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "top_dispersion = adata.var[\"dispersions_norm\"][\n", + " sorted(\n", + " range(len(adata.var[\"dispersions_norm\"])),\n", + " key=lambda k: adata.var[\"dispersions_norm\"][k],\n", + " )[-51:][0]\n", + "]\n", + "adata.var[\"top_highly_variable\"] = (\n", + " adata.var[\"dispersions_norm\"] > top_dispersion\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4.2 Save the Data to Zarr store\n", + "\n", + "We want to convert the original `h5ad` file to a [Zarr](https://zarr.readthedocs.io/en/stable/) store, which Vitessce is able to load. We can use the `optimize_adata` function to ensure that all arrays and dataframe columns that we intend to use in our visualization are in the optimal format to be loaded by Vitessce. This function will cast arrays to numerical data types that take up less space (as long as the values allow). Note: unused arrays and columns (i.e., not specified in any of the parameters to `optimize_adata`) will not be copied into the new AnnData object." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "zarr_relative_filepath = \"habib17.processed.zarr\" # Relative to BASE_DIR\n", + "zarr_filepath = join(BASE_DIR, zarr_relative_filepath)\n", + "if not isdir(zarr_filepath):\n", + " adata = optimize_adata(\n", + " adata,\n", + " obs_cols=[\"CellType\"],\n", + " obsm_keys=[\"X_umap\"],\n", + " optimize_X=True,\n", + " var_cols=[\"top_highly_variable\"],\n", + " )\n", + " adata.write_zarr(zarr_filepath, chunks=[adata.shape[0], VAR_CHUNK_SIZE])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Create the Vitessce widget configuration\n", + "\n", + "Vitessce needs to know which pieces of data we are interested in visualizing, the visualization types we would like to use, and how we want to coordinate (or link) the views." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 5.1. Instantiate a `VitessceConfig` object\n", + "\n", + "Use the `VitessceConfig` constructor to create an instance. In this case, we want to construct our configuration using local data that is relative to a particular directory, so we provide the `base_dir` parameter.\n", + "\n", + "Note: This `base_dir` parameter is optional. When it is omitted, local data paths are assumed to be relative to the current working directory." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vc = VitessceConfig(schema_version=\"1.0.15\", name='Habib et al', base_dir=BASE_DIR)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 5.2. Add a dataset to the `VitessceConfig` instance\n", + "\n", + "In Vitessce, a dataset is a container for one file per data type. The `.add_dataset(name)` method on the `vc` instance sets up and returns a new dataset instance.\n", + "\n", + "Then, we can call the dataset's `.add_object(wrapper_object)` method to attach a \"data wrapper\" instance to our new dataset. For example, the `AnnDataWrapper` helps to configure AnnData Zarr stores for use in the Vitessce configuration.\n", + "\n", + "Dataset wrapper classes may require additional parameters to resolve ambiguities. For instance, `AnnData` objects may store multiple clusterings or cell type annotation columns in the `adata.obs` dataframe. We can use the parameter `obs_set_paths` to tell Vitessce that certain columns of the `obs` dataframe correspond to cell type annotations or cell clusterings." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataset = vc.add_dataset(name='Brain').add_object(AnnDataWrapper(\n", + " base_path=zarr_relative_filepath, # Relative to BASE_DIR (because we specified base_dir in the VitessceConfig constructor)\n", + " obs_embedding_paths=[\"obsm/X_umap\"],\n", + " obs_embedding_names=[\"UMAP\"],\n", + " obs_set_paths=[\"obs/CellType\"],\n", + " obs_set_names=[\"Cell Type\"],\n", + " obs_feature_matrix_path=\"X\",\n", + " initial_feature_filter_path=\"var/top_highly_variable\"\n", + " )\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 5.3. Add visualizations to the `VitessceConfig` instance\n", + "\n", + "Now that we have added a dataset, we can configure visualizations. The `.add_view` method adds a view (i.e. visualization or controller component) to the configuration.\n", + "\n", + "The `Component` enum class (which we have imported as `cm` here) can be used to fill in the `component_type` parameter.\n", + "\n", + "For convenience, the `SCATTERPLOT` component type takes the extra `mapping` keyword argument, which specifies which embedding should be used for mapping cells to (x,y) points on the plot." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "scatterplot = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"UMAP\")\n", + "cell_sets = vc.add_view(cm.OBS_SETS, dataset=dataset)\n", + "genes = vc.add_view(cm.FEATURE_LIST, dataset=dataset)\n", + "heatmap = vc.add_view(cm.HEATMAP, dataset=dataset)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 5.4. Define the visualization layout\n", + "\n", + "The `vc.layout(view_concat)` method allows us to specify how our views will be arranged in the layout grid in the widget. The `|` and `/` characters are magic syntax for `hconcat(v1, v2)` and `vconcat(v1, v2)`, respectively." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vc.layout((scatterplot | cell_sets) / (heatmap | genes));" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 6. Create the widget\n", + "\n", + "The `vc.widget()` method returns the configured widget instance." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vw = vc.widget()\n", + "vw" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 7. Check the URLs in the configuration\n", + "\n", + "We can check that the data URLs in the configuration respected the specified `base_dir`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "config_dict = vc.to_dict(base_url=BASE_URL_PLACEHOLDER)\n", + "config_dict" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/docs/notebooks/widget_genomic_profiles.ipynb b/docs/notebooks/widget_genomic_profiles.ipynb index 1794598c..37c00cc5 100644 --- a/docs/notebooks/widget_genomic_profiles.ipynb +++ b/docs/notebooks/widget_genomic_profiles.ipynb @@ -1,220 +1,220 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "nbsphinx": "hidden" - }, - "source": [ - "# Vitessce Widget Tutorial" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Visualization of genomic profiles" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 1. Import dependencies\n", - "\n", - "We need to import the classes and functions that we will be using from the corresponding packages." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from vitessce import (\n", - " VitessceConfig,\n", - " ViewType as vt,\n", - " CoordinationType as ct,\n", - " AnnDataWrapper,\n", - " MultivecZarrWrapper,\n", - ")\n", - "from vitessce.data_utils import (\n", - " adata_to_multivec_zarr,\n", - ")\n", - "from os.path import join\n", - "from scipy.io import mmread\n", - "import pandas as pd\n", - "import numpy as np\n", - "from anndata import AnnData" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 2. Load the data\n", - "\n", - "In this step, we load the raw data that has been downloaded from the HuBMAP portal https://portal.hubmapconsortium.org/browse/dataset/210d118a14c8624b6bb9610a9062656e" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mtx = mmread(join('data', 'snapatac', 'filtered_cell_by_bin.mtx')).toarray()\n", - "barcodes_df = pd.read_csv(join('data', 'snapatac', 'barcodes.txt'), header=None)\n", - "bins_df = pd.read_csv(join('data', 'snapatac', 'bins.txt'), header=None, names=[\"interval\"])\n", - "clusters_df = pd.read_csv(join('data', 'snapatac', 'umap_coords_clusters.csv'), index_col=0)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 3. Convert the data to Vitessce-compatible formats\n", - "\n", - "Vitessce can load AnnData objects saved to Zarr formats efficiently." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# The genome assembly is GRCh38 but the chromosome names in the bin names do not start with the \"chr\" prefix.\n", - "# This is incompatible with the chromosome names from `negspy`, so we need to append the prefix.\n", - "bins_df[\"interval\"] = bins_df[\"interval\"].apply(lambda x: \"chr\" + x)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "obs = clusters_df[[\"cluster\"]]\n", - "obs[\"cluster\"] = obs[\"cluster\"].astype(str)\n", - "obsm = { \"X_umap\": clusters_df[[\"umap.1\", \"umap.2\"]].values }\n", - "adata = AnnData(X=mtx, obs=obs, var=bins_df, obsm=obsm)\n", - "adata" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "multivec_zarr_path = join(\"data\", \"HBM485.TBWH.322.multivec.zarr\")\n", - "adata_zarr_path = join(\"data\", \"HBM485.TBWH.322.adata.zarr\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Sort cluster IDs\n", - "cluster_ids = obs[\"cluster\"].unique().tolist()\n", - "cluster_ids.sort(key=int)\n", - "# Save genomic profiles to multivec-zarr format.\n", - "adata_to_multivec_zarr(adata, multivec_zarr_path, obs_set_col=\"cluster\", obs_set_name=\"Cluster\", obs_set_vals=cluster_ids)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Save anndata object to AnnData-Zarr format.\n", - "adata.write_zarr(adata_zarr_path)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "tags": [] - }, - "source": [ - "## 4. Make a Vitessce configuration\n", - "\n", - "We need to tell Vitessce about the data that we want to load and the visualization components that we want to include in the widget.\n", - "For this dataset, we want to add the `GENOMIC_PROFILES` component, which renders genome browser tracks with [HiGlass](http://higlass.io)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vc = VitessceConfig(schema_version=\"1.0.15\", name='HuBMAP snATAC-seq')\n", - "dataset = vc.add_dataset(name='HBM485.TBWH.322').add_object(MultivecZarrWrapper(\n", - " zarr_path=multivec_zarr_path\n", - ")).add_object(AnnDataWrapper(\n", - " adata_path=adata_zarr_path,\n", - " obs_embedding_paths=[\"obsm/X_umap\"],\n", - " obs_embedding_names=[\"UMAP\"],\n", - " obs_set_paths=[\"obs/cluster\"],\n", - " obs_set_names=[\"Cluster\"],\n", - "))\n", - "\n", - "genomic_profiles = vc.add_view(vt.GENOMIC_PROFILES, dataset=dataset)\n", - "scatter = vc.add_view(vt.SCATTERPLOT, dataset=dataset, mapping = \"UMAP\")\n", - "cell_sets = vc.add_view(vt.OBS_SETS, dataset=dataset)\n", - "\n", - "vc.layout(genomic_profiles / (scatter | cell_sets));" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 5. Create the widget" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vw = vc.widget(height=800)\n", - "vw" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "nbsphinx": "hidden" + }, + "source": [ + "# Vitessce Widget Tutorial" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Visualization of genomic profiles" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Import dependencies\n", + "\n", + "We need to import the classes and functions that we will be using from the corresponding packages." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from vitessce import (\n", + " VitessceConfig,\n", + " ViewType as vt,\n", + " CoordinationType as ct,\n", + " AnnDataWrapper,\n", + " MultivecZarrWrapper,\n", + ")\n", + "from vitessce.data_utils import (\n", + " adata_to_multivec_zarr,\n", + ")\n", + "from os.path import join\n", + "from scipy.io import mmread\n", + "import pandas as pd\n", + "import numpy as np\n", + "from anndata import AnnData" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Load the data\n", + "\n", + "In this step, we load the raw data that has been downloaded from the HuBMAP portal https://portal.hubmapconsortium.org/browse/dataset/210d118a14c8624b6bb9610a9062656e" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mtx = mmread(join('data', 'snapatac', 'filtered_cell_by_bin.mtx')).toarray()\n", + "barcodes_df = pd.read_csv(join('data', 'snapatac', 'barcodes.txt'), header=None)\n", + "bins_df = pd.read_csv(join('data', 'snapatac', 'bins.txt'), header=None, names=[\"interval\"])\n", + "clusters_df = pd.read_csv(join('data', 'snapatac', 'umap_coords_clusters.csv'), index_col=0)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Convert the data to Vitessce-compatible formats\n", + "\n", + "Vitessce can load AnnData objects saved to Zarr formats efficiently." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# The genome assembly is GRCh38 but the chromosome names in the bin names do not start with the \"chr\" prefix.\n", + "# This is incompatible with the chromosome names from `negspy`, so we need to append the prefix.\n", + "bins_df[\"interval\"] = bins_df[\"interval\"].apply(lambda x: \"chr\" + x)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "obs = clusters_df[[\"cluster\"]]\n", + "obs[\"cluster\"] = obs[\"cluster\"].astype(str)\n", + "obsm = { \"X_umap\": clusters_df[[\"umap.1\", \"umap.2\"]].values }\n", + "adata = AnnData(X=mtx, obs=obs, var=bins_df, obsm=obsm)\n", + "adata" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "multivec_zarr_path = join(\"data\", \"HBM485.TBWH.322.multivec.zarr\")\n", + "adata_zarr_path = join(\"data\", \"HBM485.TBWH.322.adata.zarr\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Sort cluster IDs\n", + "cluster_ids = obs[\"cluster\"].unique().tolist()\n", + "cluster_ids.sort(key=int)\n", + "# Save genomic profiles to multivec-zarr format.\n", + "adata_to_multivec_zarr(adata, multivec_zarr_path, obs_set_col=\"cluster\", obs_set_name=\"Cluster\", obs_set_vals=cluster_ids)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Save anndata object to AnnData-Zarr format.\n", + "adata.write_zarr(adata_zarr_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "## 4. Make a Vitessce configuration\n", + "\n", + "We need to tell Vitessce about the data that we want to load and the visualization components that we want to include in the widget.\n", + "For this dataset, we want to add the `GENOMIC_PROFILES` component, which renders genome browser tracks with [HiGlass](http://higlass.io)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vc = VitessceConfig(schema_version=\"1.0.15\", name='HuBMAP snATAC-seq')\n", + "dataset = vc.add_dataset(name='HBM485.TBWH.322').add_object(MultivecZarrWrapper(\n", + " zarr_path=multivec_zarr_path\n", + ")).add_object(AnnDataWrapper(\n", + " base_path=adata_zarr_path,\n", + " obs_embedding_paths=[\"obsm/X_umap\"],\n", + " obs_embedding_names=[\"UMAP\"],\n", + " obs_set_paths=[\"obs/cluster\"],\n", + " obs_set_names=[\"Cluster\"],\n", + "))\n", + "\n", + "genomic_profiles = vc.add_view(vt.GENOMIC_PROFILES, dataset=dataset)\n", + "scatter = vc.add_view(vt.SCATTERPLOT, dataset=dataset, mapping = \"UMAP\")\n", + "cell_sets = vc.add_view(vt.OBS_SETS, dataset=dataset)\n", + "\n", + "vc.layout(genomic_profiles / (scatter | cell_sets));" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Create the widget" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vw = vc.widget(height=800)\n", + "vw" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/docs/notebooks/widget_loom.ipynb b/docs/notebooks/widget_loom.ipynb index 0c4f958b..ad47c182 100644 --- a/docs/notebooks/widget_loom.ipynb +++ b/docs/notebooks/widget_loom.ipynb @@ -1,219 +1,219 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "nbsphinx": "hidden" - }, - "source": [ - "# Vitessce Widget Tutorial" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Visualization of a Loom file" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 1. Import dependencies" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "from os.path import join, isfile, isdir\n", - "from urllib.request import urlretrieve\n", - "from anndata import read_loom\n", - "import numpy as np\n", - "\n", - "from vitessce import (\n", - " VitessceConfig,\n", - " Component as cm,\n", - " CoordinationType as ct,\n", - " AnnDataWrapper,\n", - ")\n", - "from vitessce.data_utils import (\n", - " optimize_adata,\n", - " to_diamond,\n", - " VAR_CHUNK_SIZE,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 2. Download data\n", - "\n", - "Download `osmFISH_SScortex_mouse_all_cells.loom` from http://loom.linnarssonlab.org/." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "loom_filepath = join(\"data\", \"osmFISH_SScortex_mouse_all_cells.loom\")\n", - "if not isfile(loom_filepath):\n", - " os.makedirs(\"data\", exist_ok=True)\n", - " urlretrieve('http://loom.linnarssonlab.org/clone/osmFISH/osmFISH_SScortex_mouse_all_cells.loom', loom_filepath)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 3. Open Loom file with AnnData's read_loom" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "adata = read_loom(loom_filepath, obsm_names={\"tSNE\": [\"_tSNE_1\", \"_tSNE_2\"], \"spatial\": [\"X\", \"Y\"]})" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Generate pseudo-segmentations as diamond-shaped polygons centered on the spatial coordinate of each cell, and store in `adata.obsm[\"segmentations\"]`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "num_cells = adata.obs.shape[0]\n", - "adata.obsm[\"segmentations\"] = np.zeros((num_cells, 4, 2))\n", - "radius = 100\n", - "for i in range(num_cells):\n", - " adata.obsm[\"segmentations\"][i, :, :] = to_diamond(adata.obsm['spatial'][i, 0], adata.obsm['spatial'][i, 1], radius)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Save the AnnData object to a Zarr store:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "zarr_filepath = join(\"data\", \"osmFISH_SScortex_mouse_all_cells.zarr\")\n", - "if not isdir(zarr_filepath) or True:\n", - " adata = optimize_adata(\n", - " adata,\n", - " obs_cols=[\"ClusterName\"],\n", - " obsm_keys=[\"tSNE\", \"spatial\", \"segmentations\"],\n", - " optimize_X=True,\n", - " )\n", - " adata.write_zarr(zarr_filepath, chunks=[adata.shape[0], VAR_CHUNK_SIZE])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 4. Configure Vitessce\n", - "\n", - "Create a Vitessce view config." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vc = VitessceConfig(schema_version=\"1.0.15\", name='Loom Example', description='osmFISH dataset of the mouse cortex including all cells')\n", - "w = AnnDataWrapper(adata_path=zarr_filepath, obs_set_paths=[\"obs/ClusterName\"], obs_set_names=[\"Clusters\"], obs_locations_path=\"obsm/spatial\", obs_segmentations_path=\"obsm/segmentations\", obs_embedding_paths=[\"obsm/tSNE\"])\n", - "dataset = vc.add_dataset(name='SScortex').add_object(w)\n", - "\n", - "tsne = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"tSNE\")\n", - "cell_sets = vc.add_view(cm.OBS_SETS, dataset=dataset)\n", - "spatial = vc.add_view(cm.SPATIAL, dataset=dataset)\n", - "\n", - "spatial_segmentation_layer_value = {\n", - " \"opacity\": 1,\n", - " \"radius\": 0,\n", - " \"visible\": True,\n", - " \"stroked\": False\n", - "}\n", - "\n", - "vc.link_views([spatial], [ct.SPATIAL_ZOOM, ct.SPATIAL_TARGET_X, ct.SPATIAL_TARGET_Y, ct.SPATIAL_SEGMENTATION_LAYER], [-6.43, 10417.69, 24885.55, spatial_segmentation_layer_value])\n", - "vc.layout(spatial | (tsne / cell_sets));" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 5. Render the widget" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "A widget can be created with the `.widget()` method on the config instance." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vw = vc.widget()\n", - "vw" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.0" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "nbsphinx": "hidden" + }, + "source": [ + "# Vitessce Widget Tutorial" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Visualization of a Loom file" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Import dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from os.path import join, isfile, isdir\n", + "from urllib.request import urlretrieve\n", + "from anndata import read_loom\n", + "import numpy as np\n", + "\n", + "from vitessce import (\n", + " VitessceConfig,\n", + " Component as cm,\n", + " CoordinationType as ct,\n", + " AnnDataWrapper,\n", + ")\n", + "from vitessce.data_utils import (\n", + " optimize_adata,\n", + " to_diamond,\n", + " VAR_CHUNK_SIZE,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Download data\n", + "\n", + "Download `osmFISH_SScortex_mouse_all_cells.loom` from http://loom.linnarssonlab.org/." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "loom_filepath = join(\"data\", \"osmFISH_SScortex_mouse_all_cells.loom\")\n", + "if not isfile(loom_filepath):\n", + " os.makedirs(\"data\", exist_ok=True)\n", + " urlretrieve('http://loom.linnarssonlab.org/clone/osmFISH/osmFISH_SScortex_mouse_all_cells.loom', loom_filepath)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Open Loom file with AnnData's read_loom" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "adata = read_loom(loom_filepath, obsm_names={\"tSNE\": [\"_tSNE_1\", \"_tSNE_2\"], \"spatial\": [\"X\", \"Y\"]})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Generate pseudo-segmentations as diamond-shaped polygons centered on the spatial coordinate of each cell, and store in `adata.obsm[\"segmentations\"]`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "num_cells = adata.obs.shape[0]\n", + "adata.obsm[\"segmentations\"] = np.zeros((num_cells, 4, 2))\n", + "radius = 100\n", + "for i in range(num_cells):\n", + " adata.obsm[\"segmentations\"][i, :, :] = to_diamond(adata.obsm['spatial'][i, 0], adata.obsm['spatial'][i, 1], radius)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Save the AnnData object to a Zarr store:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "zarr_filepath = join(\"data\", \"osmFISH_SScortex_mouse_all_cells.zarr\")\n", + "if not isdir(zarr_filepath) or True:\n", + " adata = optimize_adata(\n", + " adata,\n", + " obs_cols=[\"ClusterName\"],\n", + " obsm_keys=[\"tSNE\", \"spatial\", \"segmentations\"],\n", + " optimize_X=True,\n", + " )\n", + " adata.write_zarr(zarr_filepath, chunks=[adata.shape[0], VAR_CHUNK_SIZE])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Configure Vitessce\n", + "\n", + "Create a Vitessce view config." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vc = VitessceConfig(schema_version=\"1.0.15\", name='Loom Example', description='osmFISH dataset of the mouse cortex including all cells')\n", + "w = AnnDataWrapper(base_path=zarr_filepath, obs_set_paths=[\"obs/ClusterName\"], obs_set_names=[\"Clusters\"], obs_locations_path=\"obsm/spatial\", obs_segmentations_path=\"obsm/segmentations\", obs_embedding_paths=[\"obsm/tSNE\"])\n", + "dataset = vc.add_dataset(name='SScortex').add_object(w)\n", + "\n", + "tsne = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"tSNE\")\n", + "cell_sets = vc.add_view(cm.OBS_SETS, dataset=dataset)\n", + "spatial = vc.add_view(cm.SPATIAL, dataset=dataset)\n", + "\n", + "spatial_segmentation_layer_value = {\n", + " \"opacity\": 1,\n", + " \"radius\": 0,\n", + " \"visible\": True,\n", + " \"stroked\": False\n", + "}\n", + "\n", + "vc.link_views([spatial], [ct.SPATIAL_ZOOM, ct.SPATIAL_TARGET_X, ct.SPATIAL_TARGET_Y, ct.SPATIAL_SEGMENTATION_LAYER], [-6.43, 10417.69, 24885.55, spatial_segmentation_layer_value])\n", + "vc.layout(spatial | (tsne / cell_sets));" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Render the widget" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A widget can be created with the `.widget()` method on the config instance." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vw = vc.widget()\n", + "vw" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/docs/notebooks/widget_pbmc.ipynb b/docs/notebooks/widget_pbmc.ipynb index f972949c..0cfeaa89 100644 --- a/docs/notebooks/widget_pbmc.ipynb +++ b/docs/notebooks/widget_pbmc.ipynb @@ -1,209 +1,209 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "nbsphinx": "hidden" - }, - "source": [ - "# Vitessce Widget Tutorial" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Visualization of 3k PBMC reference" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 1. Import dependencies\n", - "\n", - "We need to import the classes and functions that we will be using from the corresponding packages." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "from os.path import join, isfile, isdir\n", - "from urllib.request import urlretrieve\n", - "from anndata import read_h5ad\n", - "import scanpy as sc\n", - "\n", - "from vitessce import (\n", - " VitessceConfig,\n", - " Component as cm,\n", - " CoordinationType as ct,\n", - " AnnDataWrapper,\n", - ")\n", - "from vitessce.data_utils import (\n", - " optimize_adata,\n", - " VAR_CHUNK_SIZE,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 2. Download the dataset\n", - "\n", - "Download `pbmc3k_final.h5ad` from https://seurat.nygenome.org/pbmc3k_final.h5ad" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "adata_filepath = join(\"data\", \"pbmc3k_final.h5ad\")\n", - "if not isfile(adata_filepath):\n", - " os.makedirs(\"data\", exist_ok=True)\n", - " urlretrieve('https://seurat.nygenome.org/pbmc3k_final.h5ad', adata_filepath)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 3. Load the dataset\n", - "\n", - "Load the dataset using AnnData's `read_h5ad` function." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "adata = read_h5ad(adata_filepath)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 3.1 Save the AnnData object to Zarr" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "zarr_filepath = join(\"data\", \"pbmc3k_final.zarr\")\n", - "if not isdir(zarr_filepath):\n", - " adata = optimize_adata(\n", - " adata,\n", - " obs_cols=[\"leiden\"],\n", - " obsm_keys=[\"X_umap\", \"X_pca\"],\n", - " optimize_X=True,\n", - " )\n", - " adata.write_zarr(zarr_filepath, chunks=[adata.shape[0], VAR_CHUNK_SIZE])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 4. Create a Vitessce view config\n", - "\n", - "Define the data and views you would like to include in the widget." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vc = VitessceConfig(schema_version=\"1.0.15\", name='PBMC Reference')\n", - "dataset = vc.add_dataset(name='PBMC 3k').add_object(AnnDataWrapper(\n", - " adata_path=zarr_filepath,\n", - " obs_set_paths=[\"obs/leiden\"],\n", - " obs_set_names=[\"Leiden\"],\n", - " obs_embedding_paths=[\"obsm/X_umap\", \"obsm/X_pca\"],\n", - " obs_embedding_names=[\"UMAP\", \"PCA\"],\n", - " obs_feature_matrix_path=\"X\"\n", - "))\n", - "\n", - "umap = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"UMAP\")\n", - "pca = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"PCA\")\n", - "cell_sets = vc.add_view(cm.OBS_SETS, dataset=dataset)\n", - "genes = vc.add_view(cm.FEATURE_LIST, dataset=dataset)\n", - "heatmap = vc.add_view(cm.HEATMAP, dataset=dataset)\n", - "\n", - "vc.layout((umap / pca) | ((cell_sets | genes) / heatmap));" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 5. Create the Vitessce widget" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "A widget can be created with the `.widget()` method on the config instance. Here, the `proxy=True` parameter allows this widget to be used in a cloud notebook environment, such as Binder." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vw = vc.widget()\n", - "vw" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.0" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "nbsphinx": "hidden" + }, + "source": [ + "# Vitessce Widget Tutorial" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Visualization of 3k PBMC reference" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Import dependencies\n", + "\n", + "We need to import the classes and functions that we will be using from the corresponding packages." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from os.path import join, isfile, isdir\n", + "from urllib.request import urlretrieve\n", + "from anndata import read_h5ad\n", + "import scanpy as sc\n", + "\n", + "from vitessce import (\n", + " VitessceConfig,\n", + " Component as cm,\n", + " CoordinationType as ct,\n", + " AnnDataWrapper,\n", + ")\n", + "from vitessce.data_utils import (\n", + " optimize_adata,\n", + " VAR_CHUNK_SIZE,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Download the dataset\n", + "\n", + "Download `pbmc3k_final.h5ad` from https://seurat.nygenome.org/pbmc3k_final.h5ad" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "adata_filepath = join(\"data\", \"pbmc3k_final.h5ad\")\n", + "if not isfile(adata_filepath):\n", + " os.makedirs(\"data\", exist_ok=True)\n", + " urlretrieve('https://seurat.nygenome.org/pbmc3k_final.h5ad', adata_filepath)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Load the dataset\n", + "\n", + "Load the dataset using AnnData's `read_h5ad` function." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "adata = read_h5ad(adata_filepath)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3.1 Save the AnnData object to Zarr" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "zarr_filepath = join(\"data\", \"pbmc3k_final.zarr\")\n", + "if not isdir(zarr_filepath):\n", + " adata = optimize_adata(\n", + " adata,\n", + " obs_cols=[\"leiden\"],\n", + " obsm_keys=[\"X_umap\", \"X_pca\"],\n", + " optimize_X=True,\n", + " )\n", + " adata.write_zarr(zarr_filepath, chunks=[adata.shape[0], VAR_CHUNK_SIZE])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Create a Vitessce view config\n", + "\n", + "Define the data and views you would like to include in the widget." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vc = VitessceConfig(schema_version=\"1.0.15\", name='PBMC Reference')\n", + "dataset = vc.add_dataset(name='PBMC 3k').add_object(AnnDataWrapper(\n", + " base_path=zarr_filepath,\n", + " obs_set_paths=[\"obs/leiden\"],\n", + " obs_set_names=[\"Leiden\"],\n", + " obs_embedding_paths=[\"obsm/X_umap\", \"obsm/X_pca\"],\n", + " obs_embedding_names=[\"UMAP\", \"PCA\"],\n", + " obs_feature_matrix_path=\"X\"\n", + "))\n", + "\n", + "umap = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"UMAP\")\n", + "pca = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"PCA\")\n", + "cell_sets = vc.add_view(cm.OBS_SETS, dataset=dataset)\n", + "genes = vc.add_view(cm.FEATURE_LIST, dataset=dataset)\n", + "heatmap = vc.add_view(cm.HEATMAP, dataset=dataset)\n", + "\n", + "vc.layout((umap / pca) | ((cell_sets | genes) / heatmap));" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Create the Vitessce widget" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A widget can be created with the `.widget()` method on the config instance. Here, the `proxy=True` parameter allows this widget to be used in a cloud notebook environment, such as Binder." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vw = vc.widget()\n", + "vw" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/docs/notebooks/widget_pbmc_remote.ipynb b/docs/notebooks/widget_pbmc_remote.ipynb index d238dca7..33a4d63f 100644 --- a/docs/notebooks/widget_pbmc_remote.ipynb +++ b/docs/notebooks/widget_pbmc_remote.ipynb @@ -1,153 +1,153 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "nbsphinx": "hidden" - }, - "source": [ - "# Vitessce Widget Tutorial" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Visualization of 3k PBMC reference from Remote Zarr Store" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 1. Import dependencies\n", - "\n", - "We need to import the classes and functions that we will be using from the corresponding packages." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "from os.path import join\n", - "from urllib.request import urlretrieve\n", - "from anndata import read_h5ad\n", - "import scanpy as sc\n", - "\n", - "from vitessce import (\n", - " VitessceConfig,\n", - " Component as cm,\n", - " CoordinationType as ct,\n", - " AnnDataWrapper,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 2. Set the URL for the Remote Dataset\n", - "\n", - "For this example, we already have uploaded the `pbmc3k` dataset as a zarr store from the [scanpy docs](https://scanpy.readthedocs.io/en/stable/api/scanpy.datasets.pbmc3k.html) to the cloud." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "url = 'https://storage.googleapis.com/vitessce-demo-data/anndata-test/pbmc3k_processed.zarr/'" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 3. Create a Vitessce view config\n", - "\n", - "Define the data and views you would like to include in the widget." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vc = VitessceConfig(schema_version=\"1.0.15\", name='PBMC Reference')\n", - "dataset = vc.add_dataset(name='PBMC 3k').add_object(AnnDataWrapper(adata_url=url, obs_set_paths=[\"obs/louvain\"], obs_set_names=[\"Louvain\"], obs_embedding_paths=[\"obsm/X_umap\", \"obsm/X_pca\"], obs_embedding_names=[\"UMAP\", \"PCA\"], obs_feature_matrix_path=\"X\"))\n", - "\n", - "umap = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"UMAP\")\n", - "pca = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"PCA\")\n", - "cell_sets = vc.add_view(cm.OBS_SETS, dataset=dataset)\n", - "genes = vc.add_view(cm.FEATURE_LIST, dataset=dataset)\n", - "heatmap = vc.add_view(cm.HEATMAP, dataset=dataset)\n", - "\n", - "vc.layout((umap / pca) | ((cell_sets | genes) / heatmap));" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 4. Create the Vitessce widget" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "A widget can be created with the `.widget()` method on the config instance. Here, the `proxy=True` parameter allows this widget to be used in a cloud notebook environment, such as Binder." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vw = vc.widget()\n", - "vw" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.0" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "nbsphinx": "hidden" + }, + "source": [ + "# Vitessce Widget Tutorial" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Visualization of 3k PBMC reference from Remote Zarr Store" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Import dependencies\n", + "\n", + "We need to import the classes and functions that we will be using from the corresponding packages." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from os.path import join\n", + "from urllib.request import urlretrieve\n", + "from anndata import read_h5ad\n", + "import scanpy as sc\n", + "\n", + "from vitessce import (\n", + " VitessceConfig,\n", + " Component as cm,\n", + " CoordinationType as ct,\n", + " AnnDataWrapper,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Set the URL for the Remote Dataset\n", + "\n", + "For this example, we already have uploaded the `pbmc3k` dataset as a zarr store from the [scanpy docs](https://scanpy.readthedocs.io/en/stable/api/scanpy.datasets.pbmc3k.html) to the cloud." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "url = 'https://storage.googleapis.com/vitessce-demo-data/anndata-test/pbmc3k_processed.zarr/'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Create a Vitessce view config\n", + "\n", + "Define the data and views you would like to include in the widget." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vc = VitessceConfig(schema_version=\"1.0.15\", name='PBMC Reference')\n", + "dataset = vc.add_dataset(name='PBMC 3k').add_object(AnnDataWrapper(base_url=url, obs_set_paths=[\"obs/louvain\"], obs_set_names=[\"Louvain\"], obs_embedding_paths=[\"obsm/X_umap\", \"obsm/X_pca\"], obs_embedding_names=[\"UMAP\", \"PCA\"], obs_feature_matrix_path=\"X\"))\n", + "\n", + "umap = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"UMAP\")\n", + "pca = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"PCA\")\n", + "cell_sets = vc.add_view(cm.OBS_SETS, dataset=dataset)\n", + "genes = vc.add_view(cm.FEATURE_LIST, dataset=dataset)\n", + "heatmap = vc.add_view(cm.HEATMAP, dataset=dataset)\n", + "\n", + "vc.layout((umap / pca) | ((cell_sets | genes) / heatmap));" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Create the Vitessce widget" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A widget can be created with the `.widget()` method on the config instance. Here, the `proxy=True` parameter allows this widget to be used in a cloud notebook environment, such as Binder." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vw = vc.widget()\n", + "vw" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/tests/test_wrappers.py b/tests/test_wrappers.py index d351cb1c..21bc35b0 100644 --- a/tests/test_wrappers.py +++ b/tests/test_wrappers.py @@ -208,8 +208,8 @@ def test_obs_segmentations_ome_zarr(self): }) def test_anndata(self): - adata_path = data_path / 'test.h5ad.zarr' - w = AnnDataWrapper(adata_path, + base_path = data_path / 'test.h5ad.zarr' + w = AnnDataWrapper(base_path, obs_set_paths=['obs/CellType'], obs_set_names=['Cell Type'], obs_labels_names=['Cell Label'], obs_labels_paths=['obs/CellLabel'], obs_embedding_paths=['obsm/X_umap'], obs_embedding_names=['UMAP']) @@ -225,8 +225,8 @@ def test_anndata(self): }}) def test_anndata_with_base_dir(self): - adata_path = 'test.h5ad.zarr' - w = AnnDataWrapper(adata_path, obs_set_paths=['obs/CellType'], obs_set_names=['Cell Type'], obs_embedding_paths=[ + base_path = 'test.h5ad.zarr' + w = AnnDataWrapper(base_path, obs_set_paths=['obs/CellType'], obs_set_names=['Cell Type'], obs_embedding_paths=[ 'obsm/X_umap'], obs_embedding_names=['UMAP']) w.base_dir = data_path w.local_dir_uid = 'anndata.zarr' From c1b8e8152554f99faab1795b2b2a1ca3192a3b8b Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Mon, 6 May 2024 11:31:12 +0200 Subject: [PATCH 3/9] (refactor): `path`->`elem` --- ...rowser_to_vitessce_config_conversion.ipynb | 6 +- docs/notebooks/data_export_files.ipynb | 8 +- docs/notebooks/data_export_s3.ipynb | 8 +- docs/notebooks/web_app_brain.ipynb | 532 +++++++++--------- docs/notebooks/widget_brain.ipynb | 10 +- .../widget_brain_with_base_dir.ipynb | 10 +- docs/notebooks/widget_genomic_profiles.ipynb | 4 +- docs/notebooks/widget_loom.ipynb | 2 +- docs/notebooks/widget_pbmc.ipynb | 6 +- docs/notebooks/widget_pbmc_remote.ipynb | 2 +- docs/notebooks/widget_shortcut.ipynb | 350 ++++++------ tests/test_wrappers.py | 8 +- vitessce/wrappers.py | 62 +- 13 files changed, 505 insertions(+), 503 deletions(-) diff --git a/docs/notebooks/cellbrowser_to_vitessce_config_conversion.ipynb b/docs/notebooks/cellbrowser_to_vitessce_config_conversion.ipynb index 8535fbac..8fcf4ab0 100644 --- a/docs/notebooks/cellbrowser_to_vitessce_config_conversion.ipynb +++ b/docs/notebooks/cellbrowser_to_vitessce_config_conversion.ipynb @@ -139,10 +139,10 @@ "source": [ "anndata_wrapper_inst = AnnDataWrapper(\n", " base_path=zarr_filepath,\n", - " obs_feature_matrix_path=\"X\",\n", - " obs_embedding_paths=[\"obsm/X_tsne\"],\n", + " obs_feature_matrix_elem=\"X\",\n", + " obs_embedding_elems=[\"obsm/X_tsne\"],\n", " obs_embedding_names=[\"t-SNE\"],\n", - " obs_set_paths=[\"obs/cluster\", \"obs/age\"],\n", + " obs_set_elems=[\"obs/cluster\", \"obs/age\"],\n", " obs_set_names=[\"cluster\", \"age\"],\n", ")\n", "vc = VitessceConfig(schema_version=\"1.0.15\", name=\"Vitessce configuration for CellBrowser project adultPancreas\")\n", diff --git a/docs/notebooks/data_export_files.ipynb b/docs/notebooks/data_export_files.ipynb index 299cf174..b5ca784c 100644 --- a/docs/notebooks/data_export_files.ipynb +++ b/docs/notebooks/data_export_files.ipynb @@ -125,12 +125,12 @@ "vc = VitessceConfig(schema_version=\"1.0.15\", name='Habib et al', description='COVID-19 Healthy Donor Brain')\n", "dataset = vc.add_dataset(name='Brain').add_object(AnnDataWrapper(\n", " base_path=zarr_filepath,\n", - " obs_embedding_paths=[\"obsm/X_umap\"],\n", + " obs_embedding_elems=[\"obsm/X_umap\"],\n", " obs_embedding_names=[\"UMAP\"],\n", - " obs_set_paths=[\"obs/CellType\"],\n", + " obs_set_elems=[\"obs/CellType\"],\n", " obs_set_names=[\"Cell Type\"],\n", - " obs_feature_matrix_path=\"X\",\n", - " feature_filter_path=\"var/top_highly_variable\"\n", + " obs_feature_matrix_elem=\"X\",\n", + " feature_filter_elem=\"var/top_highly_variable\"\n", "))\n", "scatterplot = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"X_umap\")\n", "cell_sets = vc.add_view(cm.OBS_SETS, dataset=dataset)\n", diff --git a/docs/notebooks/data_export_s3.ipynb b/docs/notebooks/data_export_s3.ipynb index 3ae539c4..67d24cc1 100644 --- a/docs/notebooks/data_export_s3.ipynb +++ b/docs/notebooks/data_export_s3.ipynb @@ -126,12 +126,12 @@ "vc = VitessceConfig(schema_version=\"1.0.15\", name='Habib et al', description='COVID-19 Healthy Donor Brain')\n", "dataset = vc.add_dataset(name='Brain').add_object(AnnDataWrapper(\n", " base_path=zarr_filepath,\n", - " obs_embedding_paths=[\"obsm/X_umap\"],\n", + " obs_embedding_elems=[\"obsm/X_umap\"],\n", " obs_embedding_names=[\"UMAP\"],\n", - " obs_set_paths=[\"obs/CellType\"],\n", + " obs_set_elems=[\"obs/CellType\"],\n", " obs_set_names=[\"Cell Type\"],\n", - " obs_feature_matrix_path=\"X\",\n", - " feature_filter_path=\"var/top_highly_variable\"\n", + " obs_feature_matrix_elem=\"X\",\n", + " feature_filter_elem=\"var/top_highly_variable\"\n", "))\n", "scatterplot = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"UMAP\")\n", "cell_sets = vc.add_view(cm.OBS_SETS, dataset=dataset)\n", diff --git a/docs/notebooks/web_app_brain.ipynb b/docs/notebooks/web_app_brain.ipynb index 85d7905e..9bb2b872 100644 --- a/docs/notebooks/web_app_brain.ipynb +++ b/docs/notebooks/web_app_brain.ipynb @@ -1,267 +1,267 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "nbsphinx": "hidden" - }, - "source": [ - "# Vitessce Web App Tutorial" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Visualization of single-cell RNA seq data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 1. Import dependencies\n", - "\n", - "We need to import the classes and functions that we will be using from the corresponding packages." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "from os.path import join\n", - "from urllib.request import urlretrieve\n", - "from anndata import read_h5ad\n", - "import scanpy as sc\n", - "\n", - "from vitessce import (\n", - " VitessceConfig,\n", - " Component as cm,\n", - " CoordinationType as ct,\n", - " AnnDataWrapper,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 2. Download the data\n", - "\n", - "For this example, we need to download a dataset from the COVID-19 Cell Atlas https://www.covid19cellatlas.org/index.healthy.html#habib17." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "os.makedirs(\"data\", exist_ok=True)\n", - "adata_filepath = join(\"data\", \"habib17.processed.h5ad\")\n", - "urlretrieve('https://covid19.cog.sanger.ac.uk/habib17.processed.h5ad', adata_filepath)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 3. Load the data\n", - "\n", - "Note: this function may print a `FutureWarning`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "adata = read_h5ad(adata_filepath)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 3.1. Preprocess the Data For Visualization\n", - "\n", - "This dataset contains 25,587 genes. In order to visualize it efficiently, we convert it to CSC sparse format so that we can make fast requests for gene data. We also prepare to visualize the top 50 highly variable genes for the heatmap as ranked by dispersion norm, although one may use any boolean array filter for the heatmap." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "top_dispersion = adata.var[\"dispersions_norm\"][\n", - " sorted(\n", - " range(len(adata.var[\"dispersions_norm\"])),\n", - " key=lambda k: adata.var[\"dispersions_norm\"][k],\n", - " )[-51:][0]\n", - "]\n", - "adata.var[\"top_highly_variable\"] = (\n", - " adata.var[\"dispersions_norm\"] > top_dispersion\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 4. Create the Vitessce widget configuration\n", - "\n", - "Vitessce needs to know which pieces of data we are interested in visualizing, the visualization types we would like to use, and how we want to coordinate (or link) the views." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4.1. Instantiate a `VitessceConfig` object\n", - "\n", - "Use the `VitessceConfig(name, description)` constructor to create an instance." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vc = VitessceConfig(schema_version=\"1.0.15\", name='Habib et al', description='COVID-19 Healthy Donor Brain')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4.2. Add a dataset to the `VitessceConfig` instance\n", - "\n", - "In Vitessce, a dataset is a container for one file per data type. The `.add_dataset(name)` method on the `vc` instance sets up and returns a new dataset instance.\n", - "\n", - "Then, we can call the dataset's `.add_object(wrapper_object)` method to attach a \"data wrapper\" instance to our new dataset. For example, the `AnnDataWrapper` class knows how to convert AnnData objects to the corresponding Vitessce data types.\n", - "\n", - "Dataset wrapper classes may require additional parameters to resolve ambiguities. For instance, `AnnData` objects may store multiple clusterings or cell type annotation columns in the `adata.obs` dataframe. We can use the parameter `cell_set_obs_cols` to tell Vitessce which columns of the `obs` dataframe correspond to cell sets." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dataset = vc.add_dataset(name='Brain').add_object(AnnDataWrapper(\n", - " adata,\n", - " obs_embedding_paths=[\"obsm/X_umap\"],\n", - " obs_embedding_names=[\"UMAP\"],\n", - " obs_set_paths=[\"obs/CellType\"],\n", - " obs_set_names=[\"Cell Type\"],\n", - " obs_feature_matrix_path=\"X\",\n", - " feature_filter_path=\"var/top_highly_variable\"\n", - " )\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4.3. Add visualizations to the `VitessceConfig` instance\n", - "\n", - "Now that we have added a dataset, we can configure visualizations. The `.add_view(dataset, component_type)` method adds a view (i.e. visualization or controller component) to the configuration.\n", - "\n", - "The `Component` enum class (which we have imported as `cm` here) can be used to fill in the `component_type` parameter.\n", - "\n", - "For convenience, the `SCATTERPLOT` component type takes the extra `mapping` keyword argument, which specifies which embedding should be used for mapping cells to (x,y) points on the plot." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "scatterplot = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"UMAP\")\n", - "cell_sets = vc.add_view(cm.OBS_SETS, dataset=dataset)\n", - "genes = vc.add_view(cm.FEATURE_LIST, dataset=dataset)\n", - "heatmap = vc.add_view(cm.HEATMAP, dataset=dataset)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4.4. Define the visualization layout\n", - "\n", - "The `vc.layout(view_concat)` method allows us to specify how our views will be arranged in the layout grid in the widget. The `|` and `/` characters are magic syntax for `hconcat(v1, v2)` and `vconcat(v1, v2)`, respectively." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vc.layout((scatterplot | cell_sets) / (heatmap | genes));" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 5. Launch the web application\n", - "\n", - "The `vc.web_app()` method serves the processed data locally and opens a web browser to `http://vitessce.io/?url={config_as_json}`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vc.web_app()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.0" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "nbsphinx": "hidden" + }, + "source": [ + "# Vitessce Web App Tutorial" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Visualization of single-cell RNA seq data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Import dependencies\n", + "\n", + "We need to import the classes and functions that we will be using from the corresponding packages." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from os.path import join\n", + "from urllib.request import urlretrieve\n", + "from anndata import read_h5ad\n", + "import scanpy as sc\n", + "\n", + "from vitessce import (\n", + " VitessceConfig,\n", + " Component as cm,\n", + " CoordinationType as ct,\n", + " AnnDataWrapper,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Download the data\n", + "\n", + "For this example, we need to download a dataset from the COVID-19 Cell Atlas https://www.covid19cellatlas.org/index.healthy.html#habib17." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "os.makedirs(\"data\", exist_ok=True)\n", + "adata_filepath = join(\"data\", \"habib17.processed.h5ad\")\n", + "urlretrieve('https://covid19.cog.sanger.ac.uk/habib17.processed.h5ad', adata_filepath)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Load the data\n", + "\n", + "Note: this function may print a `FutureWarning`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "adata = read_h5ad(adata_filepath)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3.1. Preprocess the Data For Visualization\n", + "\n", + "This dataset contains 25,587 genes. In order to visualize it efficiently, we convert it to CSC sparse format so that we can make fast requests for gene data. We also prepare to visualize the top 50 highly variable genes for the heatmap as ranked by dispersion norm, although one may use any boolean array filter for the heatmap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "top_dispersion = adata.var[\"dispersions_norm\"][\n", + " sorted(\n", + " range(len(adata.var[\"dispersions_norm\"])),\n", + " key=lambda k: adata.var[\"dispersions_norm\"][k],\n", + " )[-51:][0]\n", + "]\n", + "adata.var[\"top_highly_variable\"] = (\n", + " adata.var[\"dispersions_norm\"] > top_dispersion\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Create the Vitessce widget configuration\n", + "\n", + "Vitessce needs to know which pieces of data we are interested in visualizing, the visualization types we would like to use, and how we want to coordinate (or link) the views." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4.1. Instantiate a `VitessceConfig` object\n", + "\n", + "Use the `VitessceConfig(name, description)` constructor to create an instance." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vc = VitessceConfig(schema_version=\"1.0.15\", name='Habib et al', description='COVID-19 Healthy Donor Brain')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4.2. Add a dataset to the `VitessceConfig` instance\n", + "\n", + "In Vitessce, a dataset is a container for one file per data type. The `.add_dataset(name)` method on the `vc` instance sets up and returns a new dataset instance.\n", + "\n", + "Then, we can call the dataset's `.add_object(wrapper_object)` method to attach a \"data wrapper\" instance to our new dataset. For example, the `AnnDataWrapper` class knows how to convert AnnData objects to the corresponding Vitessce data types.\n", + "\n", + "Dataset wrapper classes may require additional parameters to resolve ambiguities. For instance, `AnnData` objects may store multiple clusterings or cell type annotation columns in the `adata.obs` dataframe. We can use the parameter `cell_set_obs_cols` to tell Vitessce which columns of the `obs` dataframe correspond to cell sets." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataset = vc.add_dataset(name='Brain').add_object(AnnDataWrapper(\n", + " adata,\n", + " obs_embedding_elems=[\"obsm/X_umap\"],\n", + " obs_embedding_names=[\"UMAP\"],\n", + " obs_set_elems=[\"obs/CellType\"],\n", + " obs_set_names=[\"Cell Type\"],\n", + " obs_feature_matrix_elem=\"X\",\n", + " feature_filter_elem=\"var/top_highly_variable\"\n", + " )\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4.3. Add visualizations to the `VitessceConfig` instance\n", + "\n", + "Now that we have added a dataset, we can configure visualizations. The `.add_view(dataset, component_type)` method adds a view (i.e. visualization or controller component) to the configuration.\n", + "\n", + "The `Component` enum class (which we have imported as `cm` here) can be used to fill in the `component_type` parameter.\n", + "\n", + "For convenience, the `SCATTERPLOT` component type takes the extra `mapping` keyword argument, which specifies which embedding should be used for mapping cells to (x,y) points on the plot." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "scatterplot = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"UMAP\")\n", + "cell_sets = vc.add_view(cm.OBS_SETS, dataset=dataset)\n", + "genes = vc.add_view(cm.FEATURE_LIST, dataset=dataset)\n", + "heatmap = vc.add_view(cm.HEATMAP, dataset=dataset)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4.4. Define the visualization layout\n", + "\n", + "The `vc.layout(view_concat)` method allows us to specify how our views will be arranged in the layout grid in the widget. The `|` and `/` characters are magic syntax for `hconcat(v1, v2)` and `vconcat(v1, v2)`, respectively." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vc.layout((scatterplot | cell_sets) / (heatmap | genes));" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Launch the web application\n", + "\n", + "The `vc.web_app()` method serves the processed data locally and opens a web browser to `http://vitessce.io/?url={config_as_json}`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vc.web_app()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/docs/notebooks/widget_brain.ipynb b/docs/notebooks/widget_brain.ipynb index 340e7de7..282fcbf1 100644 --- a/docs/notebooks/widget_brain.ipynb +++ b/docs/notebooks/widget_brain.ipynb @@ -180,7 +180,7 @@ "\n", "Then, we can call the dataset's `.add_object(wrapper_object)` method to attach a \"data wrapper\" instance to our new dataset. For example, the `AnnDataWrapper` helps to configure AnnData Zarr stores for use in the Vitessce configuration.\n", "\n", - "Dataset wrapper classes may require additional parameters to resolve ambiguities. For instance, `AnnData` objects may store multiple clusterings or cell type annotation columns in the `adata.obs` dataframe. We can use the parameter `obs_set_paths` to tell Vitessce that certain columns of the `obs` dataframe correspond to cell type annotations or cell clusterings." + "Dataset wrapper classes may require additional parameters to resolve ambiguities. For instance, `AnnData` objects may store multiple clusterings or cell type annotation columns in the `adata.obs` dataframe. We can use the parameter `obs_set_elems` to tell Vitessce that certain columns of the `obs` dataframe correspond to cell type annotations or cell clusterings." ] }, { @@ -191,12 +191,12 @@ "source": [ "dataset = vc.add_dataset(name='Brain').add_object(AnnDataWrapper(\n", " base_path=zarr_filepath,\n", - " obs_embedding_paths=[\"obsm/X_umap\"],\n", + " obs_embedding_elems=[\"obsm/X_umap\"],\n", " obs_embedding_names=[\"UMAP\"],\n", - " obs_set_paths=[\"obs/CellType\"],\n", + " obs_set_elems=[\"obs/CellType\"],\n", " obs_set_names=[\"Cell Type\"],\n", - " obs_feature_matrix_path=\"X\",\n", - " initial_feature_filter_path=\"var/top_highly_variable\"\n", + " obs_feature_matrix_elem=\"X\",\n", + " initial_feature_filter_elem=\"var/top_highly_variable\"\n", " )\n", ")" ] diff --git a/docs/notebooks/widget_brain_with_base_dir.ipynb b/docs/notebooks/widget_brain_with_base_dir.ipynb index 269dd2e6..c8497610 100644 --- a/docs/notebooks/widget_brain_with_base_dir.ipynb +++ b/docs/notebooks/widget_brain_with_base_dir.ipynb @@ -202,7 +202,7 @@ "\n", "Then, we can call the dataset's `.add_object(wrapper_object)` method to attach a \"data wrapper\" instance to our new dataset. For example, the `AnnDataWrapper` helps to configure AnnData Zarr stores for use in the Vitessce configuration.\n", "\n", - "Dataset wrapper classes may require additional parameters to resolve ambiguities. For instance, `AnnData` objects may store multiple clusterings or cell type annotation columns in the `adata.obs` dataframe. We can use the parameter `obs_set_paths` to tell Vitessce that certain columns of the `obs` dataframe correspond to cell type annotations or cell clusterings." + "Dataset wrapper classes may require additional parameters to resolve ambiguities. For instance, `AnnData` objects may store multiple clusterings or cell type annotation columns in the `adata.obs` dataframe. We can use the parameter `obs_set_elems` to tell Vitessce that certain columns of the `obs` dataframe correspond to cell type annotations or cell clusterings." ] }, { @@ -213,12 +213,12 @@ "source": [ "dataset = vc.add_dataset(name='Brain').add_object(AnnDataWrapper(\n", " base_path=zarr_relative_filepath, # Relative to BASE_DIR (because we specified base_dir in the VitessceConfig constructor)\n", - " obs_embedding_paths=[\"obsm/X_umap\"],\n", + " obs_embedding_elems=[\"obsm/X_umap\"],\n", " obs_embedding_names=[\"UMAP\"],\n", - " obs_set_paths=[\"obs/CellType\"],\n", + " obs_set_elems=[\"obs/CellType\"],\n", " obs_set_names=[\"Cell Type\"],\n", - " obs_feature_matrix_path=\"X\",\n", - " initial_feature_filter_path=\"var/top_highly_variable\"\n", + " obs_feature_matrix_elem=\"X\",\n", + " initial_feature_filter_elem=\"var/top_highly_variable\"\n", " )\n", ")" ] diff --git a/docs/notebooks/widget_genomic_profiles.ipynb b/docs/notebooks/widget_genomic_profiles.ipynb index 37c00cc5..b2026544 100644 --- a/docs/notebooks/widget_genomic_profiles.ipynb +++ b/docs/notebooks/widget_genomic_profiles.ipynb @@ -158,9 +158,9 @@ " zarr_path=multivec_zarr_path\n", ")).add_object(AnnDataWrapper(\n", " base_path=adata_zarr_path,\n", - " obs_embedding_paths=[\"obsm/X_umap\"],\n", + " obs_embedding_elems=[\"obsm/X_umap\"],\n", " obs_embedding_names=[\"UMAP\"],\n", - " obs_set_paths=[\"obs/cluster\"],\n", + " obs_set_elems=[\"obs/cluster\"],\n", " obs_set_names=[\"Cluster\"],\n", "))\n", "\n", diff --git a/docs/notebooks/widget_loom.ipynb b/docs/notebooks/widget_loom.ipynb index ad47c182..1facc535 100644 --- a/docs/notebooks/widget_loom.ipynb +++ b/docs/notebooks/widget_loom.ipynb @@ -145,7 +145,7 @@ "outputs": [], "source": [ "vc = VitessceConfig(schema_version=\"1.0.15\", name='Loom Example', description='osmFISH dataset of the mouse cortex including all cells')\n", - "w = AnnDataWrapper(base_path=zarr_filepath, obs_set_paths=[\"obs/ClusterName\"], obs_set_names=[\"Clusters\"], obs_locations_path=\"obsm/spatial\", obs_segmentations_path=\"obsm/segmentations\", obs_embedding_paths=[\"obsm/tSNE\"])\n", + "w = AnnDataWrapper(base_path=zarr_filepath, obs_set_elems=[\"obs/ClusterName\"], obs_set_names=[\"Clusters\"], obs_locations_elem=\"obsm/spatial\", obs_segmentations_elem=\"obsm/segmentations\", obs_embedding_elems=[\"obsm/tSNE\"])\n", "dataset = vc.add_dataset(name='SScortex').add_object(w)\n", "\n", "tsne = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"tSNE\")\n", diff --git a/docs/notebooks/widget_pbmc.ipynb b/docs/notebooks/widget_pbmc.ipynb index 0cfeaa89..e6e7c365 100644 --- a/docs/notebooks/widget_pbmc.ipynb +++ b/docs/notebooks/widget_pbmc.ipynb @@ -130,11 +130,11 @@ "vc = VitessceConfig(schema_version=\"1.0.15\", name='PBMC Reference')\n", "dataset = vc.add_dataset(name='PBMC 3k').add_object(AnnDataWrapper(\n", " base_path=zarr_filepath,\n", - " obs_set_paths=[\"obs/leiden\"],\n", + " obs_set_elems=[\"obs/leiden\"],\n", " obs_set_names=[\"Leiden\"],\n", - " obs_embedding_paths=[\"obsm/X_umap\", \"obsm/X_pca\"],\n", + " obs_embedding_elems=[\"obsm/X_umap\", \"obsm/X_pca\"],\n", " obs_embedding_names=[\"UMAP\", \"PCA\"],\n", - " obs_feature_matrix_path=\"X\"\n", + " obs_feature_matrix_elem=\"X\"\n", "))\n", "\n", "umap = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"UMAP\")\n", diff --git a/docs/notebooks/widget_pbmc_remote.ipynb b/docs/notebooks/widget_pbmc_remote.ipynb index 33a4d63f..5edceb2c 100644 --- a/docs/notebooks/widget_pbmc_remote.ipynb +++ b/docs/notebooks/widget_pbmc_remote.ipynb @@ -79,7 +79,7 @@ "outputs": [], "source": [ "vc = VitessceConfig(schema_version=\"1.0.15\", name='PBMC Reference')\n", - "dataset = vc.add_dataset(name='PBMC 3k').add_object(AnnDataWrapper(base_url=url, obs_set_paths=[\"obs/louvain\"], obs_set_names=[\"Louvain\"], obs_embedding_paths=[\"obsm/X_umap\", \"obsm/X_pca\"], obs_embedding_names=[\"UMAP\", \"PCA\"], obs_feature_matrix_path=\"X\"))\n", + "dataset = vc.add_dataset(name='PBMC 3k').add_object(AnnDataWrapper(base_url=url, obs_set_elems=[\"obs/louvain\"], obs_set_names=[\"Louvain\"], obs_embedding_elems=[\"obsm/X_umap\", \"obsm/X_pca\"], obs_embedding_names=[\"UMAP\", \"PCA\"], obs_feature_matrix_elem=\"X\"))\n", "\n", "umap = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"UMAP\")\n", "pca = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"PCA\")\n", diff --git a/docs/notebooks/widget_shortcut.ipynb b/docs/notebooks/widget_shortcut.ipynb index 75f758d3..e29f64df 100644 --- a/docs/notebooks/widget_shortcut.ipynb +++ b/docs/notebooks/widget_shortcut.ipynb @@ -1,176 +1,176 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "nbsphinx": "hidden" - }, - "source": [ - "# Vitessce Widget Tutorial" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# The from_object shortcut" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 1. Import dependencies\n", - "\n", - "Import the functions and classes that we will be using." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "from os.path import join\n", - "from urllib.request import urlretrieve\n", - "from anndata import read_h5ad\n", - "import scanpy as sc\n", - "\n", - "from vitessce import (\n", - " VitessceConfig,\n", - " Component as cm,\n", - " CoordinationType as ct,\n", - " AnnDataWrapper,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 2. Download the data\n", - "\n", - "For this example, we need to download a dataset from the COVID-19 Cell Atlas https://www.covid19cellatlas.org/index.healthy.html#habib17." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "os.makedirs(\"data\", exist_ok=True)\n", - "adata_filepath = join(\"data\", \"habib17.processed.h5ad\")\n", - "urlretrieve('https://covid19.cog.sanger.ac.uk/habib17.processed.h5ad', adata_filepath)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 3. Load the data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "adata = read_h5ad(join(\"data\", \"habib17.processed.h5ad\"))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 3.1. Preprocess the Data For Visualization" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "top_dispersion = adata.var[\"dispersions_norm\"][\n", - " sorted(\n", - " range(len(adata.var[\"dispersions_norm\"])),\n", - " key=lambda k: adata.var[\"dispersions_norm\"][k],\n", - " )[-51:][0]\n", - "]\n", - "adata.var[\"top_highly_variable\"] = (\n", - " adata.var[\"dispersions_norm\"] > top_dispersion\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "With one line of code, you may create a Vitessce widget based on an automatically inferred configuration." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vw = VitessceConfig.from_object(AnnDataWrapper(\n", - " adata,\n", - " obs_embedding_paths=[\"obsm/X_umap\"],\n", - " obs_embedding_names=[\"UMAP\"],\n", - " obs_set_paths=[\"obs/CellType\"],\n", - " obs_set_names=[\"Cell Type\"],\n", - " obs_feature_matrix_path=\"X\",\n", - " feature_filter_path=\"var/top_highly_variable\"\n", - "), schema_version=\"1.0.15\").widget(height=800)\n", - "vw" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.0" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "nbsphinx": "hidden" + }, + "source": [ + "# Vitessce Widget Tutorial" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# The from_object shortcut" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Import dependencies\n", + "\n", + "Import the functions and classes that we will be using." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from os.path import join\n", + "from urllib.request import urlretrieve\n", + "from anndata import read_h5ad\n", + "import scanpy as sc\n", + "\n", + "from vitessce import (\n", + " VitessceConfig,\n", + " Component as cm,\n", + " CoordinationType as ct,\n", + " AnnDataWrapper,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Download the data\n", + "\n", + "For this example, we need to download a dataset from the COVID-19 Cell Atlas https://www.covid19cellatlas.org/index.healthy.html#habib17." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "os.makedirs(\"data\", exist_ok=True)\n", + "adata_filepath = join(\"data\", \"habib17.processed.h5ad\")\n", + "urlretrieve('https://covid19.cog.sanger.ac.uk/habib17.processed.h5ad', adata_filepath)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Load the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "adata = read_h5ad(join(\"data\", \"habib17.processed.h5ad\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3.1. Preprocess the Data For Visualization" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "top_dispersion = adata.var[\"dispersions_norm\"][\n", + " sorted(\n", + " range(len(adata.var[\"dispersions_norm\"])),\n", + " key=lambda k: adata.var[\"dispersions_norm\"][k],\n", + " )[-51:][0]\n", + "]\n", + "adata.var[\"top_highly_variable\"] = (\n", + " adata.var[\"dispersions_norm\"] > top_dispersion\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "With one line of code, you may create a Vitessce widget based on an automatically inferred configuration." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vw = VitessceConfig.from_object(AnnDataWrapper(\n", + " adata,\n", + " obs_embedding_elems=[\"obsm/X_umap\"],\n", + " obs_embedding_names=[\"UMAP\"],\n", + " obs_set_elems=[\"obs/CellType\"],\n", + " obs_set_names=[\"Cell Type\"],\n", + " obs_feature_matrix_elem=\"X\",\n", + " feature_filter_elem=\"var/top_highly_variable\"\n", + "), schema_version=\"1.0.15\").widget(height=800)\n", + "vw" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/tests/test_wrappers.py b/tests/test_wrappers.py index 21bc35b0..0d67af54 100644 --- a/tests/test_wrappers.py +++ b/tests/test_wrappers.py @@ -210,9 +210,9 @@ def test_obs_segmentations_ome_zarr(self): def test_anndata(self): base_path = data_path / 'test.h5ad.zarr' w = AnnDataWrapper(base_path, - obs_set_paths=['obs/CellType'], obs_set_names=['Cell Type'], - obs_labels_names=['Cell Label'], obs_labels_paths=['obs/CellLabel'], - obs_embedding_paths=['obsm/X_umap'], obs_embedding_names=['UMAP']) + obs_set_elems=['obs/CellType'], obs_set_names=['Cell Type'], + obs_labels_names=['Cell Label'], obs_labels_elems=['obs/CellLabel'], + obs_embedding_elems=['obsm/X_umap'], obs_embedding_names=['UMAP']) w.local_dir_uid = 'anndata.zarr' file_def_creator = w.make_file_def_creator('A', 0) @@ -226,7 +226,7 @@ def test_anndata(self): def test_anndata_with_base_dir(self): base_path = 'test.h5ad.zarr' - w = AnnDataWrapper(base_path, obs_set_paths=['obs/CellType'], obs_set_names=['Cell Type'], obs_embedding_paths=[ + w = AnnDataWrapper(base_path, obs_set_elems=['obs/CellType'], obs_set_names=['Cell Type'], obs_embedding_elems=[ 'obsm/X_umap'], obs_embedding_names=['UMAP']) w.base_dir = data_path w.local_dir_uid = 'anndata.zarr' diff --git a/vitessce/wrappers.py b/vitessce/wrappers.py index 96167884..e55c3d3b 100644 --- a/vitessce/wrappers.py +++ b/vitessce/wrappers.py @@ -3,6 +3,7 @@ import tempfile from uuid import uuid4 from pathlib import PurePath, PurePosixPath +import warnings from .constants import ( norm_enum, @@ -894,28 +895,28 @@ def image_file_def_creator(base_url): class AnnDataWrapper(AbstractWrapper): - def __init__(self, base_path=None, base_url=None, obs_feature_matrix_path=None, feature_filter_path=None, initial_feature_filter_path=None, obs_set_paths=None, obs_set_names=None, obs_locations_path=None, obs_segmentations_path=None, obs_embedding_paths=None, obs_embedding_names=None, obs_embedding_dims=None, obs_spots_path=None, obs_points_path=None, request_init=None, feature_labels_path=None, obs_labels_path=None, convert_to_dense=True, coordination_values=None, obs_labels_paths=None, obs_labels_names=None, **kwargs): + def __init__(self, base_path=None, base_url=None, obs_feature_matrix_elem=None, feature_filter_elem=None, initial_feature_filter_elem=None, obs_set_elems=None, obs_set_names=None, obs_locations_elem=None, obs_segmentations_elem=None, obs_embedding_elems=None, obs_embedding_names=None, obs_embedding_dims=None, obs_spots_elem=None, obs_points_elem=None, request_init=None, feature_labels_elem=None, obs_labels_path=None, convert_to_dense=True, coordination_values=None, obs_labels_elems=None, obs_labels_names=None, **kwargs): """ Wrap an AnnData object by creating an instance of the ``AnnDataWrapper`` class. :param str base_path: A path to an AnnData object written to a Zarr store containing single-cell experiment data. :param str base_url: A remote url pointing to a zarr-backed AnnData store. - :param str obs_feature_matrix_path: Location of the expression (cell x gene) matrix, like `X` or `obsm/highly_variable_genes_subset` - :param str feature_filter_path: A string like `var/highly_variable` used in conjunction with `obs_feature_matrix_path` if obs_feature_matrix_path points to a subset of `X` of the full `var` list. - :param str initial_feature_filter_path: A string like `var/highly_variable` used in conjunction with `obs_feature_matrix_path` if obs_feature_matrix_path points to a subset of `X` of the full `var` list. - :param list[str] obs_set_paths: Column names like `['obs/louvain', 'obs/cellType']` for showing cell sets - :param list[str] obs_set_names: Names to display in place of those in `obs_set_paths`, like `['Louvain', 'Cell Type']` - :param str obs_locations_path: Column name in `obsm` that contains centroid coordinates for displaying centroids in the spatial viewer - :param str obs_segmentations_path: Column name in `obsm` that contains polygonal coordinates for displaying outlines in the spatial viewer - :param list[str] obs_embedding_paths: Column names like `['obsm/X_umap', 'obsm/X_pca']` for showing scatterplots + :param str obs_feature_matrix_elem: Location of the expression (cell x gene) matrix, like `X` or `obsm/highly_variable_genes_subset` + :param str feature_filter_elem: A string like `var/highly_variable` used in conjunction with `obs_feature_matrix_elem` if obs_feature_matrix_elem points to a subset of `X` of the full `var` list. + :param str initial_feature_filter_elem: A string like `var/highly_variable` used in conjunction with `obs_feature_matrix_elem` if obs_feature_matrix_elem points to a subset of `X` of the full `var` list. + :param list[str] obs_set_elems: Column names like `['obs/louvain', 'obs/cellType']` for showing cell sets + :param list[str] obs_set_names: Names to display in place of those in `obs_set_elems`, like `['Louvain', 'Cell Type']` + :param str obs_locations_elem: Column name in `obsm` that contains centroid coordinates for displaying centroids in the spatial viewer + :param str obs_segmentations_elem: Column name in `obsm` that contains polygonal coordinates for displaying outlines in the spatial viewer + :param list[str] obs_embedding_elems: Column names like `['obsm/X_umap', 'obsm/X_pca']` for showing scatterplots :param list[str] obs_embedding_names: Overriding names like `['UMAP', 'PCA']` for displaying above scatterplots :param list[str] obs_embedding_dims: Dimensions along which to get data for the scatterplot, like `[[0, 1], [4, 5]]` where `[0, 1]` is just the normal x and y but `[4, 5]` could be comparing the third and fourth principal components, for example. - :param str obs_spots_path: Column name in `obsm` that contains centroid coordinates for displaying spots in the spatial viewer - :param str obs_points_path: Column name in `obsm` that contains centroid coordinates for displaying points in the spatial viewer + :param str obs_spots_elem: Column name in `obsm` that contains centroid coordinates for displaying spots in the spatial viewer + :param str obs_points_elem: Column name in `obsm` that contains centroid coordinates for displaying points in the spatial viewer :param dict request_init: options to be passed along with every fetch request from the browser, like `{ "header": { "Authorization": "Bearer dsfjalsdfa1431" } }` - :param str feature_labels_path: The name of a column containing feature labels (e.g., alternate gene symbols), instead of the default index in `var` of the AnnData store. - :param str obs_labels_path: (DEPRECATED) The name of a column containing observation labels (e.g., alternate cell IDs), instead of the default index in `obs` of the AnnData store. Use `obs_labels_paths` and `obs_labels_names` instead. This arg will be removed in a future release. - :param list[str] obs_labels_paths: The names of columns containing observation labels (e.g., alternate cell IDs), instead of the default index in `obs` of the AnnData store. + :param str feature_labels_elem: The name of a column containing feature labels (e.g., alternate gene symbols), instead of the default index in `var` of the AnnData store. + :param str obs_labels_path: (DEPRECATED) The name of a column containing observation labels (e.g., alternate cell IDs), instead of the default index in `obs` of the AnnData store. Use `obs_labels_elems` and `obs_labels_names` instead. This arg will be removed in a future release. + :param list[str] obs_labels_elems: The names of columns containing observation labels (e.g., alternate cell IDs), instead of the default index in `obs` of the AnnData store. :param list[str] obs_labels_names: The optional display names of columns containing observation labels (e.g., alternate cell IDs), instead of the default index in `obs` of the AnnData store. :param bool convert_to_dense: Whether or not to convert `X` to dense the zarr store (dense is faster but takes more disk space). :param coordination_values: Coordination values for the file definition. @@ -939,26 +940,27 @@ def __init__(self, base_path=None, base_url=None, obs_feature_matrix_path=None, self.is_remote = True self.zarr_folder = None self.local_dir_uid = make_unique_filename(".adata.zarr") - self._expression_matrix = obs_feature_matrix_path + self._expression_matrix = obs_feature_matrix_elem self._cell_set_obs_names = obs_set_names self._mappings_obsm_names = obs_embedding_names - self._gene_var_filter = feature_filter_path - self._matrix_gene_var_filter = initial_feature_filter_path - self._cell_set_obs = obs_set_paths - self._spatial_centroid_obsm = obs_locations_path - self._spatial_polygon_obsm = obs_segmentations_path - self._mappings_obsm = obs_embedding_paths + self._gene_var_filter = feature_filter_elem + self._matrix_gene_var_filter = initial_feature_filter_elem + self._cell_set_obs = obs_set_elems + self._spatial_centroid_obsm = obs_locations_elem + self._spatial_polygon_obsm = obs_segmentations_elem + self._mappings_obsm = obs_embedding_elems self._mappings_obsm_dims = obs_embedding_dims - self._spatial_spots_obsm = obs_spots_path - self._spatial_points_obsm = obs_points_path + self._spatial_spots_obsm = obs_spots_elem + self._spatial_points_obsm = obs_points_elem self._request_init = request_init - self._feature_labels = feature_labels_path + self._feature_labels = feature_labels_elem # Support legacy provision of single obs labels path if (obs_labels_path is not None): - self._obs_labels_paths = [obs_labels_path] + warnings.warn("`obs_labels_path` will be deprecated in a future release.", DeprecationWarning) + self._obs_labels_elems = [obs_labels_path] self._obs_labels_names = [obs_labels_path.split('/')[-1]] else: - self._obs_labels_paths = obs_labels_paths + self._obs_labels_elems = obs_labels_elems self._obs_labels_names = obs_labels_names self._convert_to_dense = convert_to_dense self._coordination_values = coordination_values @@ -1050,16 +1052,16 @@ def get_anndata_zarr(base_url): options["featureLabels"] = { "path": self._feature_labels } - if self._obs_labels_paths is not None: - if self._obs_labels_names is not None and len(self._obs_labels_paths) == len(self._obs_labels_names): + if self._obs_labels_elems is not None: + if self._obs_labels_names is not None and len(self._obs_labels_elems) == len(self._obs_labels_names): # A name was provided for each path element, so use those values. names = self._obs_labels_names else: # Names were not provided for each path element, # so fall back to using the final part of each path for the names. - names = [labels_path.split('/')[-1] for labels_path in self._obs_labels_paths] + names = [labels_path.split('/')[-1] for labels_path in self._obs_labels_elems] obs_labels = [] - for path, name in zip(self._obs_labels_paths, names): + for path, name in zip(self._obs_labels_elems, names): obs_labels.append({"path": path, "obsLabelsType": name}) options["obsLabels"] = obs_labels if len(options.keys()) > 0: From 79574ff12f0daa49f3c6c2ec4d3c551dfba30bb3 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 7 May 2024 14:47:17 +0200 Subject: [PATCH 4/9] (refactor): small name changes --- vitessce/wrappers.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/vitessce/wrappers.py b/vitessce/wrappers.py index f2cde795..5da1aa14 100644 --- a/vitessce/wrappers.py +++ b/vitessce/wrappers.py @@ -986,7 +986,7 @@ def __init__(self, base_path=None, base_url=None, base_store=None, obs_feature_m self.is_remote = False self.is_store = False self.zarr_folder = 'anndata.zarr' - elif adata_url is not None: + elif base_url is not None: self.is_remote = True self.is_store = False self.zarr_folder = None @@ -998,11 +998,11 @@ def __init__(self, base_path=None, base_url=None, base_store=None, obs_feature_m self.local_dir_uid = make_unique_filename(".adata.zarr") self._expression_matrix = obs_feature_matrix_elem - self._cell_set_obs_names = obs_set_names + self._obs_set_names = obs_set_names self._mappings_obsm_names = obs_embedding_names self._gene_var_filter = feature_filter_elem self._matrix_gene_var_filter = initial_feature_filter_elem - self._cell_set_obs = obs_set_elems + self._obs_set_elems = obs_set_elems self._spatial_centroid_obsm = obs_locations_elem self._spatial_polygon_obsm = obs_segmentations_elem self._mappings_obsm = obs_embedding_elems @@ -1089,13 +1089,13 @@ def get_anndata_zarr(base_url): if self._mappings_obsm_dims is not None: for dim_i, dim in enumerate(self._mappings_obsm_dims): options["obsEmbedding"][dim_i]['dims'] = dim - if self._cell_set_obs is not None: + if self._obs_set_elems is not None: options["obsSets"] = [] - if self._cell_set_obs_names is not None: - names = self._cell_set_obs_names + if self._obs_set_names is not None: + names = self._obs_set_names else: - names = [obs.split('/')[-1] for obs in self._cell_set_obs] - for obs, name in zip(self._cell_set_obs, names): + names = [obs.split('/')[-1] for obs in self._obs_set_elems] + for obs, name in zip(self._obs_set_elems, names): options["obsSets"].append({ "name": name, "path": obs From b77a9fc0c7d9dcd7368a2b994b3dce7196f4a680 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Thu, 16 May 2024 12:30:02 +0200 Subject: [PATCH 5/9] (refactor): revert public changes --- ...rowser_to_vitessce_config_conversion.ipynb | 8 +-- docs/notebooks/data_export_files.ipynb | 14 ++-- docs/notebooks/data_export_s3.ipynb | 10 +-- docs/notebooks/web_app_brain.ipynb | 8 +-- docs/notebooks/widget_brain.ipynb | 12 ++-- .../widget_brain_with_base_dir.ipynb | 16 ++--- docs/notebooks/widget_genomic_profiles.ipynb | 6 +- docs/notebooks/widget_loom.ipynb | 2 +- docs/notebooks/widget_pbmc.ipynb | 8 +-- docs/notebooks/widget_pbmc_remote.ipynb | 2 +- docs/notebooks/widget_shortcut.ipynb | 8 +-- tests/test_wrappers.py | 14 ++-- vitessce/wrappers.py | 64 +++++++++---------- 13 files changed, 86 insertions(+), 86 deletions(-) diff --git a/docs/notebooks/cellbrowser_to_vitessce_config_conversion.ipynb b/docs/notebooks/cellbrowser_to_vitessce_config_conversion.ipynb index 8fcf4ab0..1576769f 100644 --- a/docs/notebooks/cellbrowser_to_vitessce_config_conversion.ipynb +++ b/docs/notebooks/cellbrowser_to_vitessce_config_conversion.ipynb @@ -138,11 +138,11 @@ "outputs": [], "source": [ "anndata_wrapper_inst = AnnDataWrapper(\n", - " base_path=zarr_filepath,\n", - " obs_feature_matrix_elem=\"X\",\n", - " obs_embedding_elems=[\"obsm/X_tsne\"],\n", + " adata_path=zarr_filepath,\n", + " obs_feature_matrix_path=\"X\",\n", + " obs_embedding_paths=[\"obsm/X_tsne\"],\n", " obs_embedding_names=[\"t-SNE\"],\n", - " obs_set_elems=[\"obs/cluster\", \"obs/age\"],\n", + " obs_set_paths=[\"obs/cluster\", \"obs/age\"],\n", " obs_set_names=[\"cluster\", \"age\"],\n", ")\n", "vc = VitessceConfig(schema_version=\"1.0.15\", name=\"Vitessce configuration for CellBrowser project adultPancreas\")\n", diff --git a/docs/notebooks/data_export_files.ipynb b/docs/notebooks/data_export_files.ipynb index b5ca784c..757882d6 100644 --- a/docs/notebooks/data_export_files.ipynb +++ b/docs/notebooks/data_export_files.ipynb @@ -124,13 +124,13 @@ "source": [ "vc = VitessceConfig(schema_version=\"1.0.15\", name='Habib et al', description='COVID-19 Healthy Donor Brain')\n", "dataset = vc.add_dataset(name='Brain').add_object(AnnDataWrapper(\n", - " base_path=zarr_filepath,\n", - " obs_embedding_elems=[\"obsm/X_umap\"],\n", + " adata_path=zarr_filepath,\n", + " obs_embedding_paths=[\"obsm/X_umap\"],\n", " obs_embedding_names=[\"UMAP\"],\n", - " obs_set_elems=[\"obs/CellType\"],\n", + " obs_set_paths=[\"obs/CellType\"],\n", " obs_set_names=[\"Cell Type\"],\n", - " obs_feature_matrix_elem=\"X\",\n", - " feature_filter_elem=\"var/top_highly_variable\"\n", + " obs_feature_matrix_path=\"X\",\n", + " feature_filter_path=\"var/top_highly_variable\"\n", "))\n", "scatterplot = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"X_umap\")\n", "cell_sets = vc.add_view(cm.OBS_SETS, dataset=dataset)\n", @@ -145,7 +145,7 @@ "source": [ "## 4. Export files to a local directory\n", "\n", - "The `.export(to='files')` method on the view config instance will export files to the specified directory `out_dir`. The `base_url` parameter is required so that the file URLs in the view config point to the location where you ultimately intend to serve the files." + "The `.export(to='files')` method on the view config instance will export files to the specified directory `out_dir`. The `adata_path` parameter is required so that the file URLs in the view config point to the location where you ultimately intend to serve the files." ] }, { @@ -154,7 +154,7 @@ "metadata": {}, "outputs": [], "source": [ - "config_dict = vc.export(to='files', base_url='http://localhost:3000', out_dir='./test')" + "config_dict = vc.export(to='files', adata_path='http://localhost:3000', out_dir='./test')" ] }, { diff --git a/docs/notebooks/data_export_s3.ipynb b/docs/notebooks/data_export_s3.ipynb index 67d24cc1..9d945ea8 100644 --- a/docs/notebooks/data_export_s3.ipynb +++ b/docs/notebooks/data_export_s3.ipynb @@ -125,13 +125,13 @@ "source": [ "vc = VitessceConfig(schema_version=\"1.0.15\", name='Habib et al', description='COVID-19 Healthy Donor Brain')\n", "dataset = vc.add_dataset(name='Brain').add_object(AnnDataWrapper(\n", - " base_path=zarr_filepath,\n", - " obs_embedding_elems=[\"obsm/X_umap\"],\n", + " adata_path=zarr_filepath,\n", + " obs_embedding_paths=[\"obsm/X_umap\"],\n", " obs_embedding_names=[\"UMAP\"],\n", - " obs_set_elems=[\"obs/CellType\"],\n", + " obs_set_paths=[\"obs/CellType\"],\n", " obs_set_names=[\"Cell Type\"],\n", - " obs_feature_matrix_elem=\"X\",\n", - " feature_filter_elem=\"var/top_highly_variable\"\n", + " obs_feature_matrix_path=\"X\",\n", + " feature_filter_path=\"var/top_highly_variable\"\n", "))\n", "scatterplot = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"UMAP\")\n", "cell_sets = vc.add_view(cm.OBS_SETS, dataset=dataset)\n", diff --git a/docs/notebooks/web_app_brain.ipynb b/docs/notebooks/web_app_brain.ipynb index 9bb2b872..7e183bd1 100644 --- a/docs/notebooks/web_app_brain.ipynb +++ b/docs/notebooks/web_app_brain.ipynb @@ -157,12 +157,12 @@ "source": [ "dataset = vc.add_dataset(name='Brain').add_object(AnnDataWrapper(\n", " adata,\n", - " obs_embedding_elems=[\"obsm/X_umap\"],\n", + " obs_embedding_paths=[\"obsm/X_umap\"],\n", " obs_embedding_names=[\"UMAP\"],\n", - " obs_set_elems=[\"obs/CellType\"],\n", + " obs_set_paths=[\"obs/CellType\"],\n", " obs_set_names=[\"Cell Type\"],\n", - " obs_feature_matrix_elem=\"X\",\n", - " feature_filter_elem=\"var/top_highly_variable\"\n", + " obs_feature_matrix_path=\"X\",\n", + " feature_filter_path=\"var/top_highly_variable\"\n", " )\n", ")" ] diff --git a/docs/notebooks/widget_brain.ipynb b/docs/notebooks/widget_brain.ipynb index 282fcbf1..969fb0da 100644 --- a/docs/notebooks/widget_brain.ipynb +++ b/docs/notebooks/widget_brain.ipynb @@ -180,7 +180,7 @@ "\n", "Then, we can call the dataset's `.add_object(wrapper_object)` method to attach a \"data wrapper\" instance to our new dataset. For example, the `AnnDataWrapper` helps to configure AnnData Zarr stores for use in the Vitessce configuration.\n", "\n", - "Dataset wrapper classes may require additional parameters to resolve ambiguities. For instance, `AnnData` objects may store multiple clusterings or cell type annotation columns in the `adata.obs` dataframe. We can use the parameter `obs_set_elems` to tell Vitessce that certain columns of the `obs` dataframe correspond to cell type annotations or cell clusterings." + "Dataset wrapper classes may require additional parameters to resolve ambiguities. For instance, `AnnData` objects may store multiple clusterings or cell type annotation columns in the `adata.obs` dataframe. We can use the parameter `obs_set_paths` to tell Vitessce that certain columns of the `obs` dataframe correspond to cell type annotations or cell clusterings." ] }, { @@ -190,13 +190,13 @@ "outputs": [], "source": [ "dataset = vc.add_dataset(name='Brain').add_object(AnnDataWrapper(\n", - " base_path=zarr_filepath,\n", - " obs_embedding_elems=[\"obsm/X_umap\"],\n", + " adata_path=zarr_filepath,\n", + " obs_embedding_paths=[\"obsm/X_umap\"],\n", " obs_embedding_names=[\"UMAP\"],\n", - " obs_set_elems=[\"obs/CellType\"],\n", + " obs_set_paths=[\"obs/CellType\"],\n", " obs_set_names=[\"Cell Type\"],\n", - " obs_feature_matrix_elem=\"X\",\n", - " initial_feature_filter_elem=\"var/top_highly_variable\"\n", + " obs_feature_matrix_path=\"X\",\n", + " initial_feature_filter_path=\"var/top_highly_variable\"\n", " )\n", ")" ] diff --git a/docs/notebooks/widget_brain_with_base_dir.ipynb b/docs/notebooks/widget_brain_with_base_dir.ipynb index c8497610..6bec5222 100644 --- a/docs/notebooks/widget_brain_with_base_dir.ipynb +++ b/docs/notebooks/widget_brain_with_base_dir.ipynb @@ -202,7 +202,7 @@ "\n", "Then, we can call the dataset's `.add_object(wrapper_object)` method to attach a \"data wrapper\" instance to our new dataset. For example, the `AnnDataWrapper` helps to configure AnnData Zarr stores for use in the Vitessce configuration.\n", "\n", - "Dataset wrapper classes may require additional parameters to resolve ambiguities. For instance, `AnnData` objects may store multiple clusterings or cell type annotation columns in the `adata.obs` dataframe. We can use the parameter `obs_set_elems` to tell Vitessce that certain columns of the `obs` dataframe correspond to cell type annotations or cell clusterings." + "Dataset wrapper classes may require additional parameters to resolve ambiguities. For instance, `AnnData` objects may store multiple clusterings or cell type annotation columns in the `adata.obs` dataframe. We can use the parameter `obs_set_paths` to tell Vitessce that certain columns of the `obs` dataframe correspond to cell type annotations or cell clusterings." ] }, { @@ -212,13 +212,13 @@ "outputs": [], "source": [ "dataset = vc.add_dataset(name='Brain').add_object(AnnDataWrapper(\n", - " base_path=zarr_relative_filepath, # Relative to BASE_DIR (because we specified base_dir in the VitessceConfig constructor)\n", - " obs_embedding_elems=[\"obsm/X_umap\"],\n", + " adata_path=zarr_relative_filepath, # Relative to BASE_DIR (because we specified base_dir in the VitessceConfig constructor)\n", + " obs_embedding_paths=[\"obsm/X_umap\"],\n", " obs_embedding_names=[\"UMAP\"],\n", - " obs_set_elems=[\"obs/CellType\"],\n", + " obs_set_paths=[\"obs/CellType\"],\n", " obs_set_names=[\"Cell Type\"],\n", - " obs_feature_matrix_elem=\"X\",\n", - " initial_feature_filter_elem=\"var/top_highly_variable\"\n", + " obs_feature_matrix_path=\"X\",\n", + " initial_feature_filter_path=\"var/top_highly_variable\"\n", " )\n", ")" ] @@ -300,7 +300,7 @@ "metadata": {}, "outputs": [], "source": [ - "config_dict = vc.to_dict(base_url=BASE_URL_PLACEHOLDER)\n", + "config_dict = vc.to_dict(adata_path=BASE_URL_PLACEHOLDER)\n", "config_dict" ] } @@ -326,4 +326,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/docs/notebooks/widget_genomic_profiles.ipynb b/docs/notebooks/widget_genomic_profiles.ipynb index b2026544..4c33ffea 100644 --- a/docs/notebooks/widget_genomic_profiles.ipynb +++ b/docs/notebooks/widget_genomic_profiles.ipynb @@ -157,10 +157,10 @@ "dataset = vc.add_dataset(name='HBM485.TBWH.322').add_object(MultivecZarrWrapper(\n", " zarr_path=multivec_zarr_path\n", ")).add_object(AnnDataWrapper(\n", - " base_path=adata_zarr_path,\n", - " obs_embedding_elems=[\"obsm/X_umap\"],\n", + " adata_path=adata_zarr_path,\n", + " obs_embedding_paths=[\"obsm/X_umap\"],\n", " obs_embedding_names=[\"UMAP\"],\n", - " obs_set_elems=[\"obs/cluster\"],\n", + " obs_set_paths=[\"obs/cluster\"],\n", " obs_set_names=[\"Cluster\"],\n", "))\n", "\n", diff --git a/docs/notebooks/widget_loom.ipynb b/docs/notebooks/widget_loom.ipynb index 1facc535..7ac2d147 100644 --- a/docs/notebooks/widget_loom.ipynb +++ b/docs/notebooks/widget_loom.ipynb @@ -145,7 +145,7 @@ "outputs": [], "source": [ "vc = VitessceConfig(schema_version=\"1.0.15\", name='Loom Example', description='osmFISH dataset of the mouse cortex including all cells')\n", - "w = AnnDataWrapper(base_path=zarr_filepath, obs_set_elems=[\"obs/ClusterName\"], obs_set_names=[\"Clusters\"], obs_locations_elem=\"obsm/spatial\", obs_segmentations_elem=\"obsm/segmentations\", obs_embedding_elems=[\"obsm/tSNE\"])\n", + "w = AnnDataWrapper(adata_path=zarr_filepath, obs_set_paths=[\"obs/ClusterName\"], obs_set_names=[\"Clusters\"], obs_locations_path=\"obsm/spatial\", obs_segmentations_path=\"obsm/segmentations\", obs_embedding_paths=[\"obsm/tSNE\"])\n", "dataset = vc.add_dataset(name='SScortex').add_object(w)\n", "\n", "tsne = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"tSNE\")\n", diff --git a/docs/notebooks/widget_pbmc.ipynb b/docs/notebooks/widget_pbmc.ipynb index e6e7c365..de2be821 100644 --- a/docs/notebooks/widget_pbmc.ipynb +++ b/docs/notebooks/widget_pbmc.ipynb @@ -129,12 +129,12 @@ "source": [ "vc = VitessceConfig(schema_version=\"1.0.15\", name='PBMC Reference')\n", "dataset = vc.add_dataset(name='PBMC 3k').add_object(AnnDataWrapper(\n", - " base_path=zarr_filepath,\n", - " obs_set_elems=[\"obs/leiden\"],\n", + " adata_path=zarr_filepath,\n", + " obs_set_paths=[\"obs/leiden\"],\n", " obs_set_names=[\"Leiden\"],\n", - " obs_embedding_elems=[\"obsm/X_umap\", \"obsm/X_pca\"],\n", + " obs_embedding_paths=[\"obsm/X_umap\", \"obsm/X_pca\"],\n", " obs_embedding_names=[\"UMAP\", \"PCA\"],\n", - " obs_feature_matrix_elem=\"X\"\n", + " obs_feature_matrix_path=\"X\"\n", "))\n", "\n", "umap = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"UMAP\")\n", diff --git a/docs/notebooks/widget_pbmc_remote.ipynb b/docs/notebooks/widget_pbmc_remote.ipynb index 5edceb2c..586e8f18 100644 --- a/docs/notebooks/widget_pbmc_remote.ipynb +++ b/docs/notebooks/widget_pbmc_remote.ipynb @@ -79,7 +79,7 @@ "outputs": [], "source": [ "vc = VitessceConfig(schema_version=\"1.0.15\", name='PBMC Reference')\n", - "dataset = vc.add_dataset(name='PBMC 3k').add_object(AnnDataWrapper(base_url=url, obs_set_elems=[\"obs/louvain\"], obs_set_names=[\"Louvain\"], obs_embedding_elems=[\"obsm/X_umap\", \"obsm/X_pca\"], obs_embedding_names=[\"UMAP\", \"PCA\"], obs_feature_matrix_elem=\"X\"))\n", + "dataset = vc.add_dataset(name='PBMC 3k').add_object(AnnDataWrapper(adata_path=url, obs_set_paths=[\"obs/louvain\"], obs_set_names=[\"Louvain\"], obs_embedding_paths=[\"obsm/X_umap\", \"obsm/X_pca\"], obs_embedding_names=[\"UMAP\", \"PCA\"], obs_feature_matrix_path=\"X\"))\n", "\n", "umap = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"UMAP\")\n", "pca = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"PCA\")\n", diff --git a/docs/notebooks/widget_shortcut.ipynb b/docs/notebooks/widget_shortcut.ipynb index e29f64df..911f1486 100644 --- a/docs/notebooks/widget_shortcut.ipynb +++ b/docs/notebooks/widget_shortcut.ipynb @@ -120,12 +120,12 @@ "source": [ "vw = VitessceConfig.from_object(AnnDataWrapper(\n", " adata,\n", - " obs_embedding_elems=[\"obsm/X_umap\"],\n", + " obs_embedding_paths=[\"obsm/X_umap\"],\n", " obs_embedding_names=[\"UMAP\"],\n", - " obs_set_elems=[\"obs/CellType\"],\n", + " obs_set_paths=[\"obs/CellType\"],\n", " obs_set_names=[\"Cell Type\"],\n", - " obs_feature_matrix_elem=\"X\",\n", - " feature_filter_elem=\"var/top_highly_variable\"\n", + " obs_feature_matrix_path=\"X\",\n", + " feature_filter_path=\"var/top_highly_variable\"\n", "), schema_version=\"1.0.15\").widget(height=800)\n", "vw" ] diff --git a/tests/test_wrappers.py b/tests/test_wrappers.py index 0d67af54..d351cb1c 100644 --- a/tests/test_wrappers.py +++ b/tests/test_wrappers.py @@ -208,11 +208,11 @@ def test_obs_segmentations_ome_zarr(self): }) def test_anndata(self): - base_path = data_path / 'test.h5ad.zarr' - w = AnnDataWrapper(base_path, - obs_set_elems=['obs/CellType'], obs_set_names=['Cell Type'], - obs_labels_names=['Cell Label'], obs_labels_elems=['obs/CellLabel'], - obs_embedding_elems=['obsm/X_umap'], obs_embedding_names=['UMAP']) + adata_path = data_path / 'test.h5ad.zarr' + w = AnnDataWrapper(adata_path, + obs_set_paths=['obs/CellType'], obs_set_names=['Cell Type'], + obs_labels_names=['Cell Label'], obs_labels_paths=['obs/CellLabel'], + obs_embedding_paths=['obsm/X_umap'], obs_embedding_names=['UMAP']) w.local_dir_uid = 'anndata.zarr' file_def_creator = w.make_file_def_creator('A', 0) @@ -225,8 +225,8 @@ def test_anndata(self): }}) def test_anndata_with_base_dir(self): - base_path = 'test.h5ad.zarr' - w = AnnDataWrapper(base_path, obs_set_elems=['obs/CellType'], obs_set_names=['Cell Type'], obs_embedding_elems=[ + adata_path = 'test.h5ad.zarr' + w = AnnDataWrapper(adata_path, obs_set_paths=['obs/CellType'], obs_set_names=['Cell Type'], obs_embedding_paths=[ 'obsm/X_umap'], obs_embedding_names=['UMAP']) w.base_dir = data_path w.local_dir_uid = 'anndata.zarr' diff --git a/vitessce/wrappers.py b/vitessce/wrappers.py index 5da1aa14..5d70f08a 100644 --- a/vitessce/wrappers.py +++ b/vitessce/wrappers.py @@ -938,7 +938,7 @@ def image_file_def_creator(base_url): class AnnDataWrapper(AbstractWrapper): - def __init__(self, base_path=None, base_url=None, base_store=None, obs_feature_matrix_elem=None, feature_filter_elem=None, initial_feature_filter_elem=None, obs_set_elems=None, obs_set_names=None, obs_locations_elem=None, obs_segmentations_elem=None, obs_embedding_elems=None, obs_embedding_names=None, obs_embedding_dims=None, obs_spots_elem=None, obs_points_elem=None, request_init=None, feature_labels_elem=None, obs_labels_path=None, convert_to_dense=True, coordination_values=None, obs_labels_elems=None, obs_labels_names=None, **kwargs): + def __init__(self, adata_path=None, adata_url=None, adata_store=None, obs_feature_matrix_path=None, feature_filter_path=None, initial_feature_filter_path=None, obs_set_paths=None, obs_set_names=None, obs_locations_path=None, obs_segmentations_path=None, obs_embedding_paths=None, obs_embedding_names=None, obs_embedding_dims=None, obs_spots_path=None, obs_points_path=None, request_init=None, feature_labels_path=None, obs_labels_path=None, convert_to_dense=True, coordination_values=None, obs_labels_paths=None, obs_labels_names=None, **kwargs): """ Wrap an AnnData object by creating an instance of the ``AnnDataWrapper`` class. @@ -946,22 +946,22 @@ def __init__(self, base_path=None, base_url=None, base_store=None, obs_feature_m :param str base_url: A remote url pointing to a zarr-backed AnnData store. :param base_store: A path to pass to zarr.FSStore, or an existing store instance. :type base_store: str or zarr.Storage - :param str obs_feature_matrix_elem: Location of the expression (cell x gene) matrix, like `X` or `obsm/highly_variable_genes_subset` - :param str feature_filter_elem: A string like `var/highly_variable` used in conjunction with `obs_feature_matrix_elem` if obs_feature_matrix_elem points to a subset of `X` of the full `var` list. - :param str initial_feature_filter_elem: A string like `var/highly_variable` used in conjunction with `obs_feature_matrix_elem` if obs_feature_matrix_elem points to a subset of `X` of the full `var` list. - :param list[str] obs_set_elems: Column names like `['obs/louvain', 'obs/cellType']` for showing cell sets - :param list[str] obs_set_names: Names to display in place of those in `obs_set_elems`, like `['Louvain', 'Cell Type']` - :param str obs_locations_elem: Column name in `obsm` that contains centroid coordinates for displaying centroids in the spatial viewer - :param str obs_segmentations_elem: Column name in `obsm` that contains polygonal coordinates for displaying outlines in the spatial viewer - :param list[str] obs_embedding_elems: Column names like `['obsm/X_umap', 'obsm/X_pca']` for showing scatterplots + :param str obs_feature_matrix_path: Location of the expression (cell x gene) matrix, like `X` or `obsm/highly_variable_genes_subset` + :param str feature_filter_path: A string like `var/highly_variable` used in conjunction with `obs_feature_matrix_path` if obs_feature_matrix_path points to a subset of `X` of the full `var` list. + :param str initial_feature_filter_path: A string like `var/highly_variable` used in conjunction with `obs_feature_matrix_path` if obs_feature_matrix_path points to a subset of `X` of the full `var` list. + :param list[str] obs_set_paths: Column names like `['obs/louvain', 'obs/cellType']` for showing cell sets + :param list[str] obs_set_names: Names to display in place of those in `obs_set_paths`, like `['Louvain', 'Cell Type']` + :param str obs_locations_path: Column name in `obsm` that contains centroid coordinates for displaying centroids in the spatial viewer + :param str obs_segmentations_path: Column name in `obsm` that contains polygonal coordinates for displaying outlines in the spatial viewer + :param list[str] obs_embedding_paths: Column names like `['obsm/X_umap', 'obsm/X_pca']` for showing scatterplots :param list[str] obs_embedding_names: Overriding names like `['UMAP', 'PCA']` for displaying above scatterplots :param list[str] obs_embedding_dims: Dimensions along which to get data for the scatterplot, like `[[0, 1], [4, 5]]` where `[0, 1]` is just the normal x and y but `[4, 5]` could be comparing the third and fourth principal components, for example. - :param str obs_spots_elem: Column name in `obsm` that contains centroid coordinates for displaying spots in the spatial viewer - :param str obs_points_elem: Column name in `obsm` that contains centroid coordinates for displaying points in the spatial viewer + :param str obs_spots_path: Column name in `obsm` that contains centroid coordinates for displaying spots in the spatial viewer + :param str obs_points_path: Column name in `obsm` that contains centroid coordinates for displaying points in the spatial viewer :param dict request_init: options to be passed along with every fetch request from the browser, like `{ "header": { "Authorization": "Bearer dsfjalsdfa1431" } }` - :param str feature_labels_elem: The name of a column containing feature labels (e.g., alternate gene symbols), instead of the default index in `var` of the AnnData store. - :param str obs_labels_path: (DEPRECATED) The name of a column containing observation labels (e.g., alternate cell IDs), instead of the default index in `obs` of the AnnData store. Use `obs_labels_elems` and `obs_labels_names` instead. This arg will be removed in a future release. - :param list[str] obs_labels_elems: The names of columns containing observation labels (e.g., alternate cell IDs), instead of the default index in `obs` of the AnnData store. + :param str feature_labels_path: The name of a column containing feature labels (e.g., alternate gene symbols), instead of the default index in `var` of the AnnData store. + :param str obs_labels_path: (DEPRECATED) The name of a column containing observation labels (e.g., alternate cell IDs), instead of the default index in `obs` of the AnnData store. Use `obs_labels_paths` and `obs_labels_names` instead. This arg will be removed in a future release. + :param list[str] obs_labels_paths: The names of columns containing observation labels (e.g., alternate cell IDs), instead of the default index in `obs` of the AnnData store. :param list[str] obs_labels_names: The optional display names of columns containing observation labels (e.g., alternate cell IDs), instead of the default index in `obs` of the AnnData store. :param bool convert_to_dense: Whether or not to convert `X` to dense the zarr store (dense is faster but takes more disk space). :param coordination_values: Coordination values for the file definition. @@ -970,11 +970,11 @@ def __init__(self, base_path=None, base_url=None, base_store=None, obs_feature_m """ super().__init__(**kwargs) self._repr = make_repr(locals()) - self._path = base_path - self._url = base_url - self._base_store = base_store + self._path = adata_path + self._url = adata_url + self._store = adata_store - num_inputs = sum([1 for x in [base_path, base_url, base_store] if x is not None]) + num_inputs = sum([1 for x in [adata_path, adata_url, adata_store] if x is not None]) if num_inputs > 1: raise ValueError( "Expected only one of base_path, base_url, or base_store to be provided") @@ -982,11 +982,11 @@ def __init__(self, base_path=None, base_url=None, base_store=None, obs_feature_m raise ValueError( "Expected one of base_path, base_url, or base_store to be provided") - if base_path is not None: + if adata_path is not None: self.is_remote = False self.is_store = False self.zarr_folder = 'anndata.zarr' - elif base_url is not None: + elif adata_url is not None: self.is_remote = True self.is_store = False self.zarr_folder = None @@ -997,27 +997,27 @@ def __init__(self, base_path=None, base_url=None, base_store=None, obs_feature_m self.zarr_folder = None self.local_dir_uid = make_unique_filename(".adata.zarr") - self._expression_matrix = obs_feature_matrix_elem + self._expression_matrix = obs_feature_matrix_path self._obs_set_names = obs_set_names self._mappings_obsm_names = obs_embedding_names - self._gene_var_filter = feature_filter_elem - self._matrix_gene_var_filter = initial_feature_filter_elem - self._obs_set_elems = obs_set_elems - self._spatial_centroid_obsm = obs_locations_elem - self._spatial_polygon_obsm = obs_segmentations_elem - self._mappings_obsm = obs_embedding_elems + self._gene_var_filter = feature_filter_path + self._matrix_gene_var_filter = initial_feature_filter_path + self._obs_set_elems = obs_set_paths + self._spatial_centroid_obsm = obs_locations_path + self._spatial_polygon_obsm = obs_segmentations_path + self._mappings_obsm = obs_embedding_paths self._mappings_obsm_dims = obs_embedding_dims - self._spatial_spots_obsm = obs_spots_elem - self._spatial_points_obsm = obs_points_elem + self._spatial_spots_obsm = obs_spots_path + self._spatial_points_obsm = obs_points_path self._request_init = request_init - self._feature_labels = feature_labels_elem + self._feature_labels = feature_labels_path # Support legacy provision of single obs labels path if (obs_labels_path is not None): warnings.warn("`obs_labels_path` will be deprecated in a future release.", DeprecationWarning) self._obs_labels_elems = [obs_labels_path] self._obs_labels_names = [obs_labels_path.split('/')[-1]] else: - self._obs_labels_elems = obs_labels_elems + self._obs_labels_elems = obs_labels_paths self._obs_labels_names = obs_labels_names self._convert_to_dense = convert_to_dense self._coordination_values = coordination_values @@ -1038,7 +1038,7 @@ def make_routes(self, dataset_uid, obj_i): if self.is_remote: return [] elif self.is_store: - self.register_zarr_store(dataset_uid, obj_i, self._base_store, self.local_dir_uid) + self.register_zarr_store(dataset_uid, obj_i, self._store, self.local_dir_uid) return [] else: return self.get_local_dir_route(dataset_uid, obj_i, self._path, self.local_dir_uid) From e4d80e2277c2bc245e52039d2bbe1857d2c5e18b Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Thu, 16 May 2024 15:56:43 +0200 Subject: [PATCH 6/9] (fix): revert `.ipynb` files --- ...rowser_to_vitessce_config_conversion.ipynb | 432 ++++++------ docs/notebooks/data_export_files.ipynb | 470 ++++++------- docs/notebooks/data_export_s3.ipynb | 458 ++++++------ docs/notebooks/web_app_brain.ipynb | 532 +++++++------- docs/notebooks/widget_brain.ipynb | 588 ++++++++-------- .../widget_brain_with_base_dir.ipynb | 654 +++++++++--------- docs/notebooks/widget_genomic_profiles.ipynb | 438 ++++++------ docs/notebooks/widget_loom.ipynb | 436 ++++++------ docs/notebooks/widget_pbmc.ipynb | 416 +++++------ docs/notebooks/widget_pbmc_remote.ipynb | 304 ++++---- docs/notebooks/widget_shortcut.ipynb | 350 +++++----- 11 files changed, 2539 insertions(+), 2539 deletions(-) diff --git a/docs/notebooks/cellbrowser_to_vitessce_config_conversion.ipynb b/docs/notebooks/cellbrowser_to_vitessce_config_conversion.ipynb index 1576769f..3ca1e0cf 100644 --- a/docs/notebooks/cellbrowser_to_vitessce_config_conversion.ipynb +++ b/docs/notebooks/cellbrowser_to_vitessce_config_conversion.ipynb @@ -1,217 +1,217 @@ { - "cells": [ - { - "cell_type": "markdown", - "id": "710bc947", - "metadata": {}, - "source": [ - "# Load UCSC Cell Browser project in Vitessce" - ] - }, - { - "cell_type": "markdown", - "id": "fad939f6-bd8b-46f8-8dd1-f0816d8ca5b3", - "metadata": {}, - "source": [ - "This notebook shows you how to use the `convert_cell_browser_project_to_anndata` function, which allows you to take an existing project, published in https://cells.ucsc.edu/ and:\n", - "1. Convert it into the AnnData format that is supported by Vitessce\n", - "2. Save the AnnData object as a Zarr store\n", - "3. Configure Vitessce with the AnnData-Zarr store\n", - "4. Render a Vitessce widget based on the config (step 3) directly in the notebook.\n", - "\n", - "The dataset that you choose to convert needs to be a valid UCSC Cell Browser \"project\", accessible from https://cells.ucsc.edu/, with a configuration available in https://github.com/ucscGenomeBrowser/cellbrowser-confs\n", - "\n", - "The `convert_cell_browser_project_to_anndata` function takes the name of that project as an input. For example, to convert this project, https://cells.ucsc.edu/?ds=adultPancreas, you will neeed to pass `\"adultPancreas\"` as the project name." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "532fea6a-69d4-4cac-8afb-6d334dbe7ca1", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import json\n", - "from os.path import join\n", - "from vitessce import (\n", - " convert_cell_browser_project_to_anndata,\n", - " AnnDataWrapper,\n", - " VitessceConfig,\n", - ")\n", - "from vitessce.data_utils import VAR_CHUNK_SIZE" - ] - }, - { - "cell_type": "markdown", - "id": "a8077cfd-abc2-488d-9d91-83bc29a0bbe9", - "metadata": {}, - "source": [ - "## 1. Convert UCSC Cell Browser project to a format that is supported by Vitessce\n", - "#### Output:\n", - "An AnnData object\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "743c2d61-d98c-4e8d-a821-d5fe0ec2d93b", - "metadata": {}, - "outputs": [], - "source": [ - "## 3. Convert UCSC Cell Browser project to a Vitessce view config" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "6fb3e7dc-baf8-49e9-9d24-264bcd668b49", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Converting CellBrowser config for project adultPancreas to Anndata-Zarr object\n", - "Successfully fetched configuration: https://cells.ucsc.edu/adultPancreas/dataset.json.\n", - "CellBrowser config is valid. Proceeding further with conversion.\n", - "Downloading expression matrix ...\n", - "Successfully downloaded expression matrix https://cells.ucsc.edu/adultPancreas/exprMatrix.tsv.gz.\n", - "Loading expression matrix into Anndata object ...\n", - "This dataset uses the format identifier|symbol for the ad.obs gene names (e.g. “ENSG0123123.3|HOX3”). We are keeping only the symbol.\n", - "Adding cell metadata to Anndata object ...\n", - "Successfully downloaded metadata meta.tsv.\n", - "Successful extraction of the following coordinates and URLS: {'X_tsne': 'tMinusSNE.coords.tsv.gz'}\n", - "Adding X_tsne to Anndata object ...\n", - "X_tsne successfully added.\n", - "Done adding coordinates to the Anndata object.\n", - "Filtering out all non-marker genes from Anndata object ...\n", - "Successfully filtered out all non-marker genes from Anndata object.\n", - "About to write the Anndata object to the Zarr store. The following properties will be saved:\n", - " Obs columns: ['cluster', 'age', 'age_unit', 'Key', 'experiment_name', 'fragAnalyzerRange', 'nCells', 'ng_ul', 'plate_nr', 'sample_recieve_date', 'chip_type', 'c1_chip_id', 'enrichment_method', 'capture_position', 'gene_body_coverage', 'intron_exon_ratio', 'mapped_reads', 'total_reads', 'n_genes']\n", - " Obsm keys: ['X_tsne']\n", - " Var columns: ['gene', 'n_cells']\n", - "obsm X_tsne is an instance of DataFrame, converting it to numpy array.\n" - ] - } - ], - "source": [ - "# Example run, coverting \"adultPancreas\" project:\n", - "adata = convert_cell_browser_project_to_anndata(project_name=\"adultPancreas\", keep_only_marker_genes=True)" - ] - }, - { - "cell_type": "markdown", - "id": "cf3cfcbe-4048-4a60-8988-b8c0eace23e2", - "metadata": {}, - "source": [ - "## 2. Save the AnnData object as a Zarr store" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "id": "8835ab53-2ee3-490e-a68c-c2d8952277a9", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "zarr_filepath = join(\"data\", \"out.adata.zarr\")\n", - "os.makedirs(os.path.dirname(zarr_filepath), exist_ok=True)\n", - "adata.write_zarr(zarr_filepath, chunks=[adata.shape[0], VAR_CHUNK_SIZE])" - ] - }, - { - "cell_type": "markdown", - "id": "d61667b4-dc32-4376-bff1-b4a5bf74140f", - "metadata": {}, - "source": [ - "## 3. Configure Vitessce with the AnnData-Zarr store" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "id": "259c1804-2e67-4a92-bc90-5ba5e3dba7b3", - "metadata": {}, - "outputs": [], - "source": [ - "anndata_wrapper_inst = AnnDataWrapper(\n", - " adata_path=zarr_filepath,\n", - " obs_feature_matrix_path=\"X\",\n", - " obs_embedding_paths=[\"obsm/X_tsne\"],\n", - " obs_embedding_names=[\"t-SNE\"],\n", - " obs_set_paths=[\"obs/cluster\", \"obs/age\"],\n", - " obs_set_names=[\"cluster\", \"age\"],\n", - ")\n", - "vc = VitessceConfig(schema_version=\"1.0.15\", name=\"Vitessce configuration for CellBrowser project adultPancreas\")\n", - "anndata_wrapper_inst.auto_view_config(vc)" - ] - }, - { - "cell_type": "markdown", - "id": "22e7d2fd-2c2e-4ce5-b551-7809cdc6568e", - "metadata": {}, - "source": [ - "## 4. Render the Vitessce widget" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "id": "cb9cb8e3-8ef4-49d9-b0a0-ba2f0fc80637", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "e5878bf30e1f4428a14604731928972d", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "VitessceWidget(config={'version': '1.0.15', 'name': 'Vitessce configuration for CellBrowser project adultPancr…" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "vw = vc.widget()\n", - "vw" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "774b8156-5cc6-4d17-884b-595957366230", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.0" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} \ No newline at end of file + "cells": [ + { + "cell_type": "markdown", + "id": "710bc947", + "metadata": {}, + "source": [ + "# Load UCSC Cell Browser project in Vitessce" + ] + }, + { + "cell_type": "markdown", + "id": "fad939f6-bd8b-46f8-8dd1-f0816d8ca5b3", + "metadata": {}, + "source": [ + "This notebook shows you how to use the `convert_cell_browser_project_to_anndata` function, which allows you to take an existing project, published in https://cells.ucsc.edu/ and:\n", + "1. Convert it into the AnnData format that is supported by Vitessce\n", + "2. Save the AnnData object as a Zarr store\n", + "3. Configure Vitessce with the AnnData-Zarr store\n", + "4. Render a Vitessce widget based on the config (step 3) directly in the notebook.\n", + "\n", + "The dataset that you choose to convert needs to be a valid UCSC Cell Browser \"project\", accessible from https://cells.ucsc.edu/, with a configuration available in https://github.com/ucscGenomeBrowser/cellbrowser-confs\n", + "\n", + "The `convert_cell_browser_project_to_anndata` function takes the name of that project as an input. For example, to convert this project, https://cells.ucsc.edu/?ds=adultPancreas, you will neeed to pass `\"adultPancreas\"` as the project name." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "532fea6a-69d4-4cac-8afb-6d334dbe7ca1", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import json\n", + "from os.path import join\n", + "from vitessce import (\n", + " convert_cell_browser_project_to_anndata,\n", + " AnnDataWrapper,\n", + " VitessceConfig,\n", + ")\n", + "from vitessce.data_utils import VAR_CHUNK_SIZE" + ] + }, + { + "cell_type": "markdown", + "id": "a8077cfd-abc2-488d-9d91-83bc29a0bbe9", + "metadata": {}, + "source": [ + "## 1. Convert UCSC Cell Browser project to a format that is supported by Vitessce\n", + "#### Output:\n", + "An AnnData object\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "743c2d61-d98c-4e8d-a821-d5fe0ec2d93b", + "metadata": {}, + "outputs": [], + "source": [ + "## 3. Convert UCSC Cell Browser project to a Vitessce view config" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "6fb3e7dc-baf8-49e9-9d24-264bcd668b49", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Converting CellBrowser config for project adultPancreas to Anndata-Zarr object\n", + "Successfully fetched configuration: https://cells.ucsc.edu/adultPancreas/dataset.json.\n", + "CellBrowser config is valid. Proceeding further with conversion.\n", + "Downloading expression matrix ...\n", + "Successfully downloaded expression matrix https://cells.ucsc.edu/adultPancreas/exprMatrix.tsv.gz.\n", + "Loading expression matrix into Anndata object ...\n", + "This dataset uses the format identifier|symbol for the ad.obs gene names (e.g. “ENSG0123123.3|HOX3”). We are keeping only the symbol.\n", + "Adding cell metadata to Anndata object ...\n", + "Successfully downloaded metadata meta.tsv.\n", + "Successful extraction of the following coordinates and URLS: {'X_tsne': 'tMinusSNE.coords.tsv.gz'}\n", + "Adding X_tsne to Anndata object ...\n", + "X_tsne successfully added.\n", + "Done adding coordinates to the Anndata object.\n", + "Filtering out all non-marker genes from Anndata object ...\n", + "Successfully filtered out all non-marker genes from Anndata object.\n", + "About to write the Anndata object to the Zarr store. The following properties will be saved:\n", + " Obs columns: ['cluster', 'age', 'age_unit', 'Key', 'experiment_name', 'fragAnalyzerRange', 'nCells', 'ng_ul', 'plate_nr', 'sample_recieve_date', 'chip_type', 'c1_chip_id', 'enrichment_method', 'capture_position', 'gene_body_coverage', 'intron_exon_ratio', 'mapped_reads', 'total_reads', 'n_genes']\n", + " Obsm keys: ['X_tsne']\n", + " Var columns: ['gene', 'n_cells']\n", + "obsm X_tsne is an instance of DataFrame, converting it to numpy array.\n" + ] + } + ], + "source": [ + "# Example run, coverting \"adultPancreas\" project:\n", + "adata = convert_cell_browser_project_to_anndata(project_name=\"adultPancreas\", keep_only_marker_genes=True)" + ] + }, + { + "cell_type": "markdown", + "id": "cf3cfcbe-4048-4a60-8988-b8c0eace23e2", + "metadata": {}, + "source": [ + "## 2. Save the AnnData object as a Zarr store" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "8835ab53-2ee3-490e-a68c-c2d8952277a9", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "zarr_filepath = join(\"data\", \"out.adata.zarr\")\n", + "os.makedirs(os.path.dirname(zarr_filepath), exist_ok=True)\n", + "adata.write_zarr(zarr_filepath, chunks=[adata.shape[0], VAR_CHUNK_SIZE])" + ] + }, + { + "cell_type": "markdown", + "id": "d61667b4-dc32-4376-bff1-b4a5bf74140f", + "metadata": {}, + "source": [ + "## 3. Configure Vitessce with the AnnData-Zarr store" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "259c1804-2e67-4a92-bc90-5ba5e3dba7b3", + "metadata": {}, + "outputs": [], + "source": [ + "anndata_wrapper_inst = AnnDataWrapper(\n", + " adata_path=zarr_filepath,\n", + " obs_feature_matrix_path=\"X\",\n", + " obs_embedding_paths=[\"obsm/X_tsne\"],\n", + " obs_embedding_names=[\"t-SNE\"],\n", + " obs_set_paths=[\"obs/cluster\", \"obs/age\"],\n", + " obs_set_names=[\"cluster\", \"age\"],\n", + ")\n", + "vc = VitessceConfig(schema_version=\"1.0.15\", name=\"Vitessce configuration for CellBrowser project adultPancreas\")\n", + "anndata_wrapper_inst.auto_view_config(vc)" + ] + }, + { + "cell_type": "markdown", + "id": "22e7d2fd-2c2e-4ce5-b551-7809cdc6568e", + "metadata": {}, + "source": [ + "## 4. Render the Vitessce widget" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "cb9cb8e3-8ef4-49d9-b0a0-ba2f0fc80637", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "e5878bf30e1f4428a14604731928972d", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VitessceWidget(config={'version': '1.0.15', 'name': 'Vitessce configuration for CellBrowser project adultPancr…" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vw = vc.widget()\n", + "vw" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "774b8156-5cc6-4d17-884b-595957366230", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/notebooks/data_export_files.ipynb b/docs/notebooks/data_export_files.ipynb index 757882d6..d2b17c6e 100644 --- a/docs/notebooks/data_export_files.ipynb +++ b/docs/notebooks/data_export_files.ipynb @@ -1,236 +1,236 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "nbsphinx": "hidden" - }, - "source": [ - "# Vitessce Data Preparation Tutorial" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Export data to local files" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 1. Import dependencies\n", - "\n", - "We need to import the classes and functions that we will be using from the corresponding packages." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import json\n", - "from urllib.parse import quote_plus\n", - "from os.path import join, isfile, isdir\n", - "from urllib.request import urlretrieve\n", - "from anndata import read_h5ad\n", - "import scanpy as sc\n", - "\n", - "from vitessce import (\n", - " VitessceWidget,\n", - " VitessceConfig,\n", - " Component as cm,\n", - " CoordinationType as ct,\n", - " AnnDataWrapper,\n", - ")\n", - "from vitessce.data_utils import (\n", - " optimize_adata,\n", - " VAR_CHUNK_SIZE,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 2. Download and process data\n", - "\n", - "For this example, we need to download a dataset from the COVID-19 Cell Atlas https://www.covid19cellatlas.org/index.healthy.html#habib17." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "adata_filepath = join(\"data\", \"habib17.processed.h5ad\")\n", - "if not isfile(adata_filepath):\n", - " os.makedirs(\"data\", exist_ok=True)\n", - " urlretrieve('https://covid19.cog.sanger.ac.uk/habib17.processed.h5ad', adata_filepath)\n", - "\n", - "adata = read_h5ad(adata_filepath)\n", - "top_dispersion = adata.var[\"dispersions_norm\"][\n", - " sorted(\n", - " range(len(adata.var[\"dispersions_norm\"])),\n", - " key=lambda k: adata.var[\"dispersions_norm\"][k],\n", - " )[-51:][0]\n", - "]\n", - "adata.var[\"top_highly_variable\"] = (\n", - " adata.var[\"dispersions_norm\"] > top_dispersion\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "zarr_filepath = join(\"data\", \"habib17.processed.zarr\")\n", - "if not isdir(zarr_filepath):\n", - " adata = optimize_adata(\n", - " adata,\n", - " obs_cols=[\"CellType\"],\n", - " obsm_keys=[\"X_umap\"],\n", - " var_cols=[\"top_highly_variable\"],\n", - " optimize_X=True,\n", - " )\n", - " adata.write_zarr(zarr_filepath, chunks=[adata.shape[0], VAR_CHUNK_SIZE])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 3. Create the Vitessce configuration" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Set up the configuration by adding the views and datasets of interest." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vc = VitessceConfig(schema_version=\"1.0.15\", name='Habib et al', description='COVID-19 Healthy Donor Brain')\n", - "dataset = vc.add_dataset(name='Brain').add_object(AnnDataWrapper(\n", - " adata_path=zarr_filepath,\n", - " obs_embedding_paths=[\"obsm/X_umap\"],\n", - " obs_embedding_names=[\"UMAP\"],\n", - " obs_set_paths=[\"obs/CellType\"],\n", - " obs_set_names=[\"Cell Type\"],\n", - " obs_feature_matrix_path=\"X\",\n", - " feature_filter_path=\"var/top_highly_variable\"\n", - "))\n", - "scatterplot = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"X_umap\")\n", - "cell_sets = vc.add_view(cm.OBS_SETS, dataset=dataset)\n", - "genes = vc.add_view(cm.FEATURE_LIST, dataset=dataset)\n", - "heatmap = vc.add_view(cm.HEATMAP, dataset=dataset)\n", - "vc.layout((scatterplot | (cell_sets / genes)) / heatmap);" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 4. Export files to a local directory\n", - "\n", - "The `.export(to='files')` method on the view config instance will export files to the specified directory `out_dir`. The `adata_path` parameter is required so that the file URLs in the view config point to the location where you ultimately intend to serve the files." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "config_dict = vc.export(to='files', adata_path='http://localhost:3000', out_dir='./test')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 5. Serve the files" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now that the files have been saved to the `./test` directory, they can be served by any static web server.\n", - "\n", - "If you would like to serve the files locally, we recommend [http-server](https://github.com/http-party/http-server) which can be installed with NPM or Homebrew:\n", - "```sh\n", - "cd test\n", - "http-server ./ --cors -p 3000\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 6. View on vitessce.io\n", - "\n", - "The returned view config dict can be converted to a URL, and if the files are served on the internet (rather than locally), this URL can be used to share the interactive visualizations with colleagues." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vitessce_url = \"http://vitessce.io/?url=data:,\" + quote_plus(json.dumps(config_dict))\n", - "import webbrowser\n", - "webbrowser.open(vitessce_url)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.0" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} \ No newline at end of file + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "nbsphinx": "hidden" + }, + "source": [ + "# Vitessce Data Preparation Tutorial" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Export data to local files" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Import dependencies\n", + "\n", + "We need to import the classes and functions that we will be using from the corresponding packages." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import json\n", + "from urllib.parse import quote_plus\n", + "from os.path import join, isfile, isdir\n", + "from urllib.request import urlretrieve\n", + "from anndata import read_h5ad\n", + "import scanpy as sc\n", + "\n", + "from vitessce import (\n", + " VitessceWidget,\n", + " VitessceConfig,\n", + " Component as cm,\n", + " CoordinationType as ct,\n", + " AnnDataWrapper,\n", + ")\n", + "from vitessce.data_utils import (\n", + " optimize_adata,\n", + " VAR_CHUNK_SIZE,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Download and process data\n", + "\n", + "For this example, we need to download a dataset from the COVID-19 Cell Atlas https://www.covid19cellatlas.org/index.healthy.html#habib17." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "adata_filepath = join(\"data\", \"habib17.processed.h5ad\")\n", + "if not isfile(adata_filepath):\n", + " os.makedirs(\"data\", exist_ok=True)\n", + " urlretrieve('https://covid19.cog.sanger.ac.uk/habib17.processed.h5ad', adata_filepath)\n", + "\n", + "adata = read_h5ad(adata_filepath)\n", + "top_dispersion = adata.var[\"dispersions_norm\"][\n", + " sorted(\n", + " range(len(adata.var[\"dispersions_norm\"])),\n", + " key=lambda k: adata.var[\"dispersions_norm\"][k],\n", + " )[-51:][0]\n", + "]\n", + "adata.var[\"top_highly_variable\"] = (\n", + " adata.var[\"dispersions_norm\"] > top_dispersion\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "zarr_filepath = join(\"data\", \"habib17.processed.zarr\")\n", + "if not isdir(zarr_filepath):\n", + " adata = optimize_adata(\n", + " adata,\n", + " obs_cols=[\"CellType\"],\n", + " obsm_keys=[\"X_umap\"],\n", + " var_cols=[\"top_highly_variable\"],\n", + " optimize_X=True,\n", + " )\n", + " adata.write_zarr(zarr_filepath, chunks=[adata.shape[0], VAR_CHUNK_SIZE])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Create the Vitessce configuration" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Set up the configuration by adding the views and datasets of interest." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vc = VitessceConfig(schema_version=\"1.0.15\", name='Habib et al', description='COVID-19 Healthy Donor Brain')\n", + "dataset = vc.add_dataset(name='Brain').add_object(AnnDataWrapper(\n", + " adata_path=zarr_filepath,\n", + " obs_embedding_paths=[\"obsm/X_umap\"],\n", + " obs_embedding_names=[\"UMAP\"],\n", + " obs_set_paths=[\"obs/CellType\"],\n", + " obs_set_names=[\"Cell Type\"],\n", + " obs_feature_matrix_path=\"X\",\n", + " feature_filter_path=\"var/top_highly_variable\"\n", + "))\n", + "scatterplot = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"X_umap\")\n", + "cell_sets = vc.add_view(cm.OBS_SETS, dataset=dataset)\n", + "genes = vc.add_view(cm.FEATURE_LIST, dataset=dataset)\n", + "heatmap = vc.add_view(cm.HEATMAP, dataset=dataset)\n", + "vc.layout((scatterplot | (cell_sets / genes)) / heatmap);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Export files to a local directory\n", + "\n", + "The `.export(to='files')` method on the view config instance will export files to the specified directory `out_dir`. The `base_url` parameter is required so that the file URLs in the view config point to the location where you ultimately intend to serve the files." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "config_dict = vc.export(to='files', base_url='http://localhost:3000', out_dir='./test')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Serve the files" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now that the files have been saved to the `./test` directory, they can be served by any static web server.\n", + "\n", + "If you would like to serve the files locally, we recommend [http-server](https://github.com/http-party/http-server) which can be installed with NPM or Homebrew:\n", + "```sh\n", + "cd test\n", + "http-server ./ --cors -p 3000\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 6. View on vitessce.io\n", + "\n", + "The returned view config dict can be converted to a URL, and if the files are served on the internet (rather than locally), this URL can be used to share the interactive visualizations with colleagues." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vitessce_url = \"http://vitessce.io/?url=data:,\" + quote_plus(json.dumps(config_dict))\n", + "import webbrowser\n", + "webbrowser.open(vitessce_url)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/notebooks/data_export_s3.ipynb b/docs/notebooks/data_export_s3.ipynb index 9d945ea8..c84a0e43 100644 --- a/docs/notebooks/data_export_s3.ipynb +++ b/docs/notebooks/data_export_s3.ipynb @@ -1,230 +1,230 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "nbsphinx": "hidden" - }, - "source": [ - "# Vitessce Data Preparation Tutorial" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Export data to AWS S3" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 1. Import dependencies\n", - "\n", - "We need to import the classes and functions that we will be using from the corresponding packages." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import boto3\n", - "import json\n", - "from urllib.parse import quote_plus\n", - "from os.path import join, isfile, isdir\n", - "from urllib.request import urlretrieve\n", - "from anndata import read_h5ad\n", - "import scanpy as sc\n", - "\n", - "from vitessce import (\n", - " VitessceWidget,\n", - " VitessceConfig,\n", - " Component as cm,\n", - " CoordinationType as ct,\n", - " AnnDataWrapper,\n", - ")\n", - "from vitessce.data_utils import (\n", - " optimize_adata,\n", - " VAR_CHUNK_SIZE,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 2. Download and process data\n", - "\n", - "For this example, we need to download a dataset from the COVID-19 Cell Atlas https://www.covid19cellatlas.org/index.healthy.html#habib17." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "adata_filepath = join(\"data\", \"habib17.processed.h5ad\")\n", - "if not isfile(adata_filepath):\n", - " os.makedirs(\"data\", exist_ok=True)\n", - " urlretrieve('https://covid19.cog.sanger.ac.uk/habib17.processed.h5ad', adata_filepath)\n", - "\n", - "adata = read_h5ad(adata_filepath)\n", - "top_dispersion = adata.var[\"dispersions_norm\"][\n", - " sorted(\n", - " range(len(adata.var[\"dispersions_norm\"])),\n", - " key=lambda k: adata.var[\"dispersions_norm\"][k],\n", - " )[-51:][0]\n", - "]\n", - "adata.var[\"top_highly_variable\"] = (\n", - " adata.var[\"dispersions_norm\"] > top_dispersion\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "zarr_filepath = join(\"data\", \"habib17.processed.zarr\")\n", - "if not isdir(zarr_filepath):\n", - " adata = optimize_adata(\n", - " adata,\n", - " obs_cols=[\"CellType\"],\n", - " obsm_keys=[\"X_umap\"],\n", - " var_cols=[\"top_highly_variable\"],\n", - " optimize_X=True,\n", - " )\n", - " adata.write_zarr(zarr_filepath, chunks=[adata.shape[0], VAR_CHUNK_SIZE])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 3. Create the Vitessce configuration" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Set up the configuration by adding the views and datasets of interest." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vc = VitessceConfig(schema_version=\"1.0.15\", name='Habib et al', description='COVID-19 Healthy Donor Brain')\n", - "dataset = vc.add_dataset(name='Brain').add_object(AnnDataWrapper(\n", - " adata_path=zarr_filepath,\n", - " obs_embedding_paths=[\"obsm/X_umap\"],\n", - " obs_embedding_names=[\"UMAP\"],\n", - " obs_set_paths=[\"obs/CellType\"],\n", - " obs_set_names=[\"Cell Type\"],\n", - " obs_feature_matrix_path=\"X\",\n", - " feature_filter_path=\"var/top_highly_variable\"\n", - "))\n", - "scatterplot = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"UMAP\")\n", - "cell_sets = vc.add_view(cm.OBS_SETS, dataset=dataset)\n", - "genes = vc.add_view(cm.FEATURE_LIST, dataset=dataset)\n", - "heatmap = vc.add_view(cm.HEATMAP, dataset=dataset)\n", - "vc.layout((scatterplot | (cell_sets / genes)) / heatmap);" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 4. Create a `boto3` resource with S3 credentials" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "s3 = boto3.resource(\n", - " service_name='s3',\n", - " aws_access_key_id=os.environ['VITESSCE_S3_ACCESS_KEY_ID'],\n", - " aws_secret_access_key=os.environ['VITESSCE_S3_SECRET_ACCESS_KEY'],\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 5. Upload files to S3\n", - "\n", - "The `.export(to='S3')` method on the view config instance will upload all data objects to the specified bucket. Then, the processed view config will be returned as a `dict`, with the file URLs filled in, pointing to the S3 bucket files. For more information about configuring the S3 bucket so that files are accessible over the internet, visit the \"Hosting Data\" page of our core documentation site." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "config_dict = vc.export(to='S3', s3=s3, bucket_name='vitessce-export-examples', prefix='test')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 6. View on vitessce.io\n", - "\n", - "The returned view config dict can be converted to a URL, and can be used to share the interactive visualizations with colleagues." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vitessce_url = \"http://vitessce.io/?url=data:,\" + quote_plus(json.dumps(config_dict))\n", - "import webbrowser\n", - "webbrowser.open(vitessce_url)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.0" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} \ No newline at end of file + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "nbsphinx": "hidden" + }, + "source": [ + "# Vitessce Data Preparation Tutorial" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Export data to AWS S3" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Import dependencies\n", + "\n", + "We need to import the classes and functions that we will be using from the corresponding packages." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import boto3\n", + "import json\n", + "from urllib.parse import quote_plus\n", + "from os.path import join, isfile, isdir\n", + "from urllib.request import urlretrieve\n", + "from anndata import read_h5ad\n", + "import scanpy as sc\n", + "\n", + "from vitessce import (\n", + " VitessceWidget,\n", + " VitessceConfig,\n", + " Component as cm,\n", + " CoordinationType as ct,\n", + " AnnDataWrapper,\n", + ")\n", + "from vitessce.data_utils import (\n", + " optimize_adata,\n", + " VAR_CHUNK_SIZE,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Download and process data\n", + "\n", + "For this example, we need to download a dataset from the COVID-19 Cell Atlas https://www.covid19cellatlas.org/index.healthy.html#habib17." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "adata_filepath = join(\"data\", \"habib17.processed.h5ad\")\n", + "if not isfile(adata_filepath):\n", + " os.makedirs(\"data\", exist_ok=True)\n", + " urlretrieve('https://covid19.cog.sanger.ac.uk/habib17.processed.h5ad', adata_filepath)\n", + "\n", + "adata = read_h5ad(adata_filepath)\n", + "top_dispersion = adata.var[\"dispersions_norm\"][\n", + " sorted(\n", + " range(len(adata.var[\"dispersions_norm\"])),\n", + " key=lambda k: adata.var[\"dispersions_norm\"][k],\n", + " )[-51:][0]\n", + "]\n", + "adata.var[\"top_highly_variable\"] = (\n", + " adata.var[\"dispersions_norm\"] > top_dispersion\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "zarr_filepath = join(\"data\", \"habib17.processed.zarr\")\n", + "if not isdir(zarr_filepath):\n", + " adata = optimize_adata(\n", + " adata,\n", + " obs_cols=[\"CellType\"],\n", + " obsm_keys=[\"X_umap\"],\n", + " var_cols=[\"top_highly_variable\"],\n", + " optimize_X=True,\n", + " )\n", + " adata.write_zarr(zarr_filepath, chunks=[adata.shape[0], VAR_CHUNK_SIZE])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Create the Vitessce configuration" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Set up the configuration by adding the views and datasets of interest." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vc = VitessceConfig(schema_version=\"1.0.15\", name='Habib et al', description='COVID-19 Healthy Donor Brain')\n", + "dataset = vc.add_dataset(name='Brain').add_object(AnnDataWrapper(\n", + " adata_path=zarr_filepath,\n", + " obs_embedding_paths=[\"obsm/X_umap\"],\n", + " obs_embedding_names=[\"UMAP\"],\n", + " obs_set_paths=[\"obs/CellType\"],\n", + " obs_set_names=[\"Cell Type\"],\n", + " obs_feature_matrix_path=\"X\",\n", + " feature_filter_path=\"var/top_highly_variable\"\n", + "))\n", + "scatterplot = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"UMAP\")\n", + "cell_sets = vc.add_view(cm.OBS_SETS, dataset=dataset)\n", + "genes = vc.add_view(cm.FEATURE_LIST, dataset=dataset)\n", + "heatmap = vc.add_view(cm.HEATMAP, dataset=dataset)\n", + "vc.layout((scatterplot | (cell_sets / genes)) / heatmap);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Create a `boto3` resource with S3 credentials" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "s3 = boto3.resource(\n", + " service_name='s3',\n", + " aws_access_key_id=os.environ['VITESSCE_S3_ACCESS_KEY_ID'],\n", + " aws_secret_access_key=os.environ['VITESSCE_S3_SECRET_ACCESS_KEY'],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Upload files to S3\n", + "\n", + "The `.export(to='S3')` method on the view config instance will upload all data objects to the specified bucket. Then, the processed view config will be returned as a `dict`, with the file URLs filled in, pointing to the S3 bucket files. For more information about configuring the S3 bucket so that files are accessible over the internet, visit the \"Hosting Data\" page of our core documentation site." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "config_dict = vc.export(to='S3', s3=s3, bucket_name='vitessce-export-examples', prefix='test')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 6. View on vitessce.io\n", + "\n", + "The returned view config dict can be converted to a URL, and can be used to share the interactive visualizations with colleagues." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vitessce_url = \"http://vitessce.io/?url=data:,\" + quote_plus(json.dumps(config_dict))\n", + "import webbrowser\n", + "webbrowser.open(vitessce_url)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/notebooks/web_app_brain.ipynb b/docs/notebooks/web_app_brain.ipynb index 7e183bd1..85d7905e 100644 --- a/docs/notebooks/web_app_brain.ipynb +++ b/docs/notebooks/web_app_brain.ipynb @@ -1,267 +1,267 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "nbsphinx": "hidden" - }, - "source": [ - "# Vitessce Web App Tutorial" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Visualization of single-cell RNA seq data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 1. Import dependencies\n", - "\n", - "We need to import the classes and functions that we will be using from the corresponding packages." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "from os.path import join\n", - "from urllib.request import urlretrieve\n", - "from anndata import read_h5ad\n", - "import scanpy as sc\n", - "\n", - "from vitessce import (\n", - " VitessceConfig,\n", - " Component as cm,\n", - " CoordinationType as ct,\n", - " AnnDataWrapper,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 2. Download the data\n", - "\n", - "For this example, we need to download a dataset from the COVID-19 Cell Atlas https://www.covid19cellatlas.org/index.healthy.html#habib17." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "os.makedirs(\"data\", exist_ok=True)\n", - "adata_filepath = join(\"data\", \"habib17.processed.h5ad\")\n", - "urlretrieve('https://covid19.cog.sanger.ac.uk/habib17.processed.h5ad', adata_filepath)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 3. Load the data\n", - "\n", - "Note: this function may print a `FutureWarning`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "adata = read_h5ad(adata_filepath)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 3.1. Preprocess the Data For Visualization\n", - "\n", - "This dataset contains 25,587 genes. In order to visualize it efficiently, we convert it to CSC sparse format so that we can make fast requests for gene data. We also prepare to visualize the top 50 highly variable genes for the heatmap as ranked by dispersion norm, although one may use any boolean array filter for the heatmap." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "top_dispersion = adata.var[\"dispersions_norm\"][\n", - " sorted(\n", - " range(len(adata.var[\"dispersions_norm\"])),\n", - " key=lambda k: adata.var[\"dispersions_norm\"][k],\n", - " )[-51:][0]\n", - "]\n", - "adata.var[\"top_highly_variable\"] = (\n", - " adata.var[\"dispersions_norm\"] > top_dispersion\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 4. Create the Vitessce widget configuration\n", - "\n", - "Vitessce needs to know which pieces of data we are interested in visualizing, the visualization types we would like to use, and how we want to coordinate (or link) the views." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4.1. Instantiate a `VitessceConfig` object\n", - "\n", - "Use the `VitessceConfig(name, description)` constructor to create an instance." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vc = VitessceConfig(schema_version=\"1.0.15\", name='Habib et al', description='COVID-19 Healthy Donor Brain')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4.2. Add a dataset to the `VitessceConfig` instance\n", - "\n", - "In Vitessce, a dataset is a container for one file per data type. The `.add_dataset(name)` method on the `vc` instance sets up and returns a new dataset instance.\n", - "\n", - "Then, we can call the dataset's `.add_object(wrapper_object)` method to attach a \"data wrapper\" instance to our new dataset. For example, the `AnnDataWrapper` class knows how to convert AnnData objects to the corresponding Vitessce data types.\n", - "\n", - "Dataset wrapper classes may require additional parameters to resolve ambiguities. For instance, `AnnData` objects may store multiple clusterings or cell type annotation columns in the `adata.obs` dataframe. We can use the parameter `cell_set_obs_cols` to tell Vitessce which columns of the `obs` dataframe correspond to cell sets." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dataset = vc.add_dataset(name='Brain').add_object(AnnDataWrapper(\n", - " adata,\n", - " obs_embedding_paths=[\"obsm/X_umap\"],\n", - " obs_embedding_names=[\"UMAP\"],\n", - " obs_set_paths=[\"obs/CellType\"],\n", - " obs_set_names=[\"Cell Type\"],\n", - " obs_feature_matrix_path=\"X\",\n", - " feature_filter_path=\"var/top_highly_variable\"\n", - " )\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4.3. Add visualizations to the `VitessceConfig` instance\n", - "\n", - "Now that we have added a dataset, we can configure visualizations. The `.add_view(dataset, component_type)` method adds a view (i.e. visualization or controller component) to the configuration.\n", - "\n", - "The `Component` enum class (which we have imported as `cm` here) can be used to fill in the `component_type` parameter.\n", - "\n", - "For convenience, the `SCATTERPLOT` component type takes the extra `mapping` keyword argument, which specifies which embedding should be used for mapping cells to (x,y) points on the plot." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "scatterplot = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"UMAP\")\n", - "cell_sets = vc.add_view(cm.OBS_SETS, dataset=dataset)\n", - "genes = vc.add_view(cm.FEATURE_LIST, dataset=dataset)\n", - "heatmap = vc.add_view(cm.HEATMAP, dataset=dataset)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4.4. Define the visualization layout\n", - "\n", - "The `vc.layout(view_concat)` method allows us to specify how our views will be arranged in the layout grid in the widget. The `|` and `/` characters are magic syntax for `hconcat(v1, v2)` and `vconcat(v1, v2)`, respectively." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vc.layout((scatterplot | cell_sets) / (heatmap | genes));" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 5. Launch the web application\n", - "\n", - "The `vc.web_app()` method serves the processed data locally and opens a web browser to `http://vitessce.io/?url={config_as_json}`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vc.web_app()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.0" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} \ No newline at end of file + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "nbsphinx": "hidden" + }, + "source": [ + "# Vitessce Web App Tutorial" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Visualization of single-cell RNA seq data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Import dependencies\n", + "\n", + "We need to import the classes and functions that we will be using from the corresponding packages." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from os.path import join\n", + "from urllib.request import urlretrieve\n", + "from anndata import read_h5ad\n", + "import scanpy as sc\n", + "\n", + "from vitessce import (\n", + " VitessceConfig,\n", + " Component as cm,\n", + " CoordinationType as ct,\n", + " AnnDataWrapper,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Download the data\n", + "\n", + "For this example, we need to download a dataset from the COVID-19 Cell Atlas https://www.covid19cellatlas.org/index.healthy.html#habib17." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "os.makedirs(\"data\", exist_ok=True)\n", + "adata_filepath = join(\"data\", \"habib17.processed.h5ad\")\n", + "urlretrieve('https://covid19.cog.sanger.ac.uk/habib17.processed.h5ad', adata_filepath)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Load the data\n", + "\n", + "Note: this function may print a `FutureWarning`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "adata = read_h5ad(adata_filepath)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3.1. Preprocess the Data For Visualization\n", + "\n", + "This dataset contains 25,587 genes. In order to visualize it efficiently, we convert it to CSC sparse format so that we can make fast requests for gene data. We also prepare to visualize the top 50 highly variable genes for the heatmap as ranked by dispersion norm, although one may use any boolean array filter for the heatmap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "top_dispersion = adata.var[\"dispersions_norm\"][\n", + " sorted(\n", + " range(len(adata.var[\"dispersions_norm\"])),\n", + " key=lambda k: adata.var[\"dispersions_norm\"][k],\n", + " )[-51:][0]\n", + "]\n", + "adata.var[\"top_highly_variable\"] = (\n", + " adata.var[\"dispersions_norm\"] > top_dispersion\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Create the Vitessce widget configuration\n", + "\n", + "Vitessce needs to know which pieces of data we are interested in visualizing, the visualization types we would like to use, and how we want to coordinate (or link) the views." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4.1. Instantiate a `VitessceConfig` object\n", + "\n", + "Use the `VitessceConfig(name, description)` constructor to create an instance." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vc = VitessceConfig(schema_version=\"1.0.15\", name='Habib et al', description='COVID-19 Healthy Donor Brain')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4.2. Add a dataset to the `VitessceConfig` instance\n", + "\n", + "In Vitessce, a dataset is a container for one file per data type. The `.add_dataset(name)` method on the `vc` instance sets up and returns a new dataset instance.\n", + "\n", + "Then, we can call the dataset's `.add_object(wrapper_object)` method to attach a \"data wrapper\" instance to our new dataset. For example, the `AnnDataWrapper` class knows how to convert AnnData objects to the corresponding Vitessce data types.\n", + "\n", + "Dataset wrapper classes may require additional parameters to resolve ambiguities. For instance, `AnnData` objects may store multiple clusterings or cell type annotation columns in the `adata.obs` dataframe. We can use the parameter `cell_set_obs_cols` to tell Vitessce which columns of the `obs` dataframe correspond to cell sets." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataset = vc.add_dataset(name='Brain').add_object(AnnDataWrapper(\n", + " adata,\n", + " obs_embedding_paths=[\"obsm/X_umap\"],\n", + " obs_embedding_names=[\"UMAP\"],\n", + " obs_set_paths=[\"obs/CellType\"],\n", + " obs_set_names=[\"Cell Type\"],\n", + " obs_feature_matrix_path=\"X\",\n", + " feature_filter_path=\"var/top_highly_variable\"\n", + " )\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4.3. Add visualizations to the `VitessceConfig` instance\n", + "\n", + "Now that we have added a dataset, we can configure visualizations. The `.add_view(dataset, component_type)` method adds a view (i.e. visualization or controller component) to the configuration.\n", + "\n", + "The `Component` enum class (which we have imported as `cm` here) can be used to fill in the `component_type` parameter.\n", + "\n", + "For convenience, the `SCATTERPLOT` component type takes the extra `mapping` keyword argument, which specifies which embedding should be used for mapping cells to (x,y) points on the plot." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "scatterplot = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"UMAP\")\n", + "cell_sets = vc.add_view(cm.OBS_SETS, dataset=dataset)\n", + "genes = vc.add_view(cm.FEATURE_LIST, dataset=dataset)\n", + "heatmap = vc.add_view(cm.HEATMAP, dataset=dataset)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4.4. Define the visualization layout\n", + "\n", + "The `vc.layout(view_concat)` method allows us to specify how our views will be arranged in the layout grid in the widget. The `|` and `/` characters are magic syntax for `hconcat(v1, v2)` and `vconcat(v1, v2)`, respectively." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vc.layout((scatterplot | cell_sets) / (heatmap | genes));" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Launch the web application\n", + "\n", + "The `vc.web_app()` method serves the processed data locally and opens a web browser to `http://vitessce.io/?url={config_as_json}`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vc.web_app()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/notebooks/widget_brain.ipynb b/docs/notebooks/widget_brain.ipynb index 969fb0da..759bd928 100644 --- a/docs/notebooks/widget_brain.ipynb +++ b/docs/notebooks/widget_brain.ipynb @@ -1,295 +1,295 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "nbsphinx": "hidden" - }, - "source": [ - "# Vitessce Widget Tutorial" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Visualization of single-cell RNA seq data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 1. Import dependencies\n", - "\n", - "We need to import the classes and functions that we will be using from the corresponding packages." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "from os.path import join, isfile, isdir\n", - "from urllib.request import urlretrieve\n", - "from anndata import read_h5ad\n", - "import scanpy as sc\n", - "\n", - "from vitessce import (\n", - " VitessceConfig,\n", - " Component as cm,\n", - " CoordinationType as ct,\n", - " AnnDataWrapper,\n", - ")\n", - "from vitessce.data_utils import (\n", - " optimize_adata,\n", - " VAR_CHUNK_SIZE,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 2. Download the data\n", - "\n", - "For this example, we need to download a dataset from the COVID-19 Cell Atlas https://www.covid19cellatlas.org/index.healthy.html#habib17." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "adata_filepath = join(\"data\", \"habib17.processed.h5ad\")\n", - "if not isfile(adata_filepath):\n", - " os.makedirs(\"data\", exist_ok=True)\n", - " urlretrieve('https://covid19.cog.sanger.ac.uk/habib17.processed.h5ad', adata_filepath)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 3. Load the data\n", - "\n", - "Note: this function may print a `FutureWarning`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "adata = read_h5ad(adata_filepath)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "tags": [] - }, - "source": [ - "## 3.1. Preprocess the Data For Visualization\n", - "\n", - "This dataset contains 25,587 genes. We prepare to visualize the top 50 highly variable genes for the heatmap as ranked by dispersion norm, although one may use any boolean array filter for the heatmap." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "top_dispersion = adata.var[\"dispersions_norm\"][\n", - " sorted(\n", - " range(len(adata.var[\"dispersions_norm\"])),\n", - " key=lambda k: adata.var[\"dispersions_norm\"][k],\n", - " )[-51:][0]\n", - "]\n", - "adata.var[\"top_highly_variable\"] = (\n", - " adata.var[\"dispersions_norm\"] > top_dispersion\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 3.2 Save the Data to Zarr store\n", - "\n", - "We want to convert the original `h5ad` file to a [Zarr](https://zarr.readthedocs.io/en/stable/) store, which Vitessce is able to load. We can use the `optimize_adata` function to ensure that all arrays and dataframe columns that we intend to use in our visualization are in the optimal format to be loaded by Vitessce. This function will cast arrays to numerical data types that take up less space (as long as the values allow). Note: unused arrays and columns (i.e., not specified in any of the parameters to `optimize_adata`) will not be copied into the new AnnData object." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "zarr_filepath = join(\"data\", \"habib17.processed.zarr\")\n", - "if not isdir(zarr_filepath):\n", - " adata = optimize_adata(\n", - " adata,\n", - " obs_cols=[\"CellType\"],\n", - " obsm_keys=[\"X_umap\"],\n", - " optimize_X=True,\n", - " var_cols=[\"top_highly_variable\"],\n", - " )\n", - " adata.write_zarr(zarr_filepath, chunks=[adata.shape[0], VAR_CHUNK_SIZE])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 4. Create the Vitessce widget configuration\n", - "\n", - "Vitessce needs to know which pieces of data we are interested in visualizing, the visualization types we would like to use, and how we want to coordinate (or link) the views." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4.1. Instantiate a `VitessceConfig` object\n", - "\n", - "Use the `VitessceConfig` constructor to create an instance." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vc = VitessceConfig(schema_version=\"1.0.15\", name='Habib et al', description='COVID-19 Healthy Donor Brain')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4.2. Add a dataset to the `VitessceConfig` instance\n", - "\n", - "In Vitessce, a dataset is a container for one file per data type. The `.add_dataset(name)` method on the `vc` instance sets up and returns a new dataset instance.\n", - "\n", - "Then, we can call the dataset's `.add_object(wrapper_object)` method to attach a \"data wrapper\" instance to our new dataset. For example, the `AnnDataWrapper` helps to configure AnnData Zarr stores for use in the Vitessce configuration.\n", - "\n", - "Dataset wrapper classes may require additional parameters to resolve ambiguities. For instance, `AnnData` objects may store multiple clusterings or cell type annotation columns in the `adata.obs` dataframe. We can use the parameter `obs_set_paths` to tell Vitessce that certain columns of the `obs` dataframe correspond to cell type annotations or cell clusterings." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dataset = vc.add_dataset(name='Brain').add_object(AnnDataWrapper(\n", - " adata_path=zarr_filepath,\n", - " obs_embedding_paths=[\"obsm/X_umap\"],\n", - " obs_embedding_names=[\"UMAP\"],\n", - " obs_set_paths=[\"obs/CellType\"],\n", - " obs_set_names=[\"Cell Type\"],\n", - " obs_feature_matrix_path=\"X\",\n", - " initial_feature_filter_path=\"var/top_highly_variable\"\n", - " )\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4.3. Add visualizations to the `VitessceConfig` instance\n", - "\n", - "Now that we have added a dataset, we can configure visualizations. The `.add_view` method adds a view (i.e. visualization or controller component) to the configuration.\n", - "\n", - "The `Component` enum class (which we have imported as `cm` here) can be used to fill in the `component_type` parameter.\n", - "\n", - "For convenience, the `SCATTERPLOT` component type takes the extra `mapping` keyword argument, which specifies which embedding should be used for mapping cells to (x,y) points on the plot." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "scatterplot = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"UMAP\")\n", - "cell_sets = vc.add_view(cm.OBS_SETS, dataset=dataset)\n", - "genes = vc.add_view(cm.FEATURE_LIST, dataset=dataset)\n", - "heatmap = vc.add_view(cm.HEATMAP, dataset=dataset)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4.4. Define the visualization layout\n", - "\n", - "The `vc.layout(view_concat)` method allows us to specify how our views will be arranged in the layout grid in the widget. The `|` and `/` characters are magic syntax for `hconcat(v1, v2)` and `vconcat(v1, v2)`, respectively." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vc.layout((scatterplot | cell_sets) / (heatmap | genes));" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 5. Create the widget\n", - "\n", - "The `vc.widget()` method returns the configured widget instance." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vw = vc.widget()\n", - "vw" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.0" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} \ No newline at end of file + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "nbsphinx": "hidden" + }, + "source": [ + "# Vitessce Widget Tutorial" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Visualization of single-cell RNA seq data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Import dependencies\n", + "\n", + "We need to import the classes and functions that we will be using from the corresponding packages." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from os.path import join, isfile, isdir\n", + "from urllib.request import urlretrieve\n", + "from anndata import read_h5ad\n", + "import scanpy as sc\n", + "\n", + "from vitessce import (\n", + " VitessceConfig,\n", + " Component as cm,\n", + " CoordinationType as ct,\n", + " AnnDataWrapper,\n", + ")\n", + "from vitessce.data_utils import (\n", + " optimize_adata,\n", + " VAR_CHUNK_SIZE,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Download the data\n", + "\n", + "For this example, we need to download a dataset from the COVID-19 Cell Atlas https://www.covid19cellatlas.org/index.healthy.html#habib17." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "adata_filepath = join(\"data\", \"habib17.processed.h5ad\")\n", + "if not isfile(adata_filepath):\n", + " os.makedirs(\"data\", exist_ok=True)\n", + " urlretrieve('https://covid19.cog.sanger.ac.uk/habib17.processed.h5ad', adata_filepath)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Load the data\n", + "\n", + "Note: this function may print a `FutureWarning`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "adata = read_h5ad(adata_filepath)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "## 3.1. Preprocess the Data For Visualization\n", + "\n", + "This dataset contains 25,587 genes. We prepare to visualize the top 50 highly variable genes for the heatmap as ranked by dispersion norm, although one may use any boolean array filter for the heatmap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "top_dispersion = adata.var[\"dispersions_norm\"][\n", + " sorted(\n", + " range(len(adata.var[\"dispersions_norm\"])),\n", + " key=lambda k: adata.var[\"dispersions_norm\"][k],\n", + " )[-51:][0]\n", + "]\n", + "adata.var[\"top_highly_variable\"] = (\n", + " adata.var[\"dispersions_norm\"] > top_dispersion\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3.2 Save the Data to Zarr store\n", + "\n", + "We want to convert the original `h5ad` file to a [Zarr](https://zarr.readthedocs.io/en/stable/) store, which Vitessce is able to load. We can use the `optimize_adata` function to ensure that all arrays and dataframe columns that we intend to use in our visualization are in the optimal format to be loaded by Vitessce. This function will cast arrays to numerical data types that take up less space (as long as the values allow). Note: unused arrays and columns (i.e., not specified in any of the parameters to `optimize_adata`) will not be copied into the new AnnData object." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "zarr_filepath = join(\"data\", \"habib17.processed.zarr\")\n", + "if not isdir(zarr_filepath):\n", + " adata = optimize_adata(\n", + " adata,\n", + " obs_cols=[\"CellType\"],\n", + " obsm_keys=[\"X_umap\"],\n", + " optimize_X=True,\n", + " var_cols=[\"top_highly_variable\"],\n", + " )\n", + " adata.write_zarr(zarr_filepath, chunks=[adata.shape[0], VAR_CHUNK_SIZE])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Create the Vitessce widget configuration\n", + "\n", + "Vitessce needs to know which pieces of data we are interested in visualizing, the visualization types we would like to use, and how we want to coordinate (or link) the views." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4.1. Instantiate a `VitessceConfig` object\n", + "\n", + "Use the `VitessceConfig` constructor to create an instance." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vc = VitessceConfig(schema_version=\"1.0.15\", name='Habib et al', description='COVID-19 Healthy Donor Brain')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4.2. Add a dataset to the `VitessceConfig` instance\n", + "\n", + "In Vitessce, a dataset is a container for one file per data type. The `.add_dataset(name)` method on the `vc` instance sets up and returns a new dataset instance.\n", + "\n", + "Then, we can call the dataset's `.add_object(wrapper_object)` method to attach a \"data wrapper\" instance to our new dataset. For example, the `AnnDataWrapper` helps to configure AnnData Zarr stores for use in the Vitessce configuration.\n", + "\n", + "Dataset wrapper classes may require additional parameters to resolve ambiguities. For instance, `AnnData` objects may store multiple clusterings or cell type annotation columns in the `adata.obs` dataframe. We can use the parameter `obs_set_paths` to tell Vitessce that certain columns of the `obs` dataframe correspond to cell type annotations or cell clusterings." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataset = vc.add_dataset(name='Brain').add_object(AnnDataWrapper(\n", + " adata_path=zarr_filepath,\n", + " obs_embedding_paths=[\"obsm/X_umap\"],\n", + " obs_embedding_names=[\"UMAP\"],\n", + " obs_set_paths=[\"obs/CellType\"],\n", + " obs_set_names=[\"Cell Type\"],\n", + " obs_feature_matrix_path=\"X\",\n", + " initial_feature_filter_path=\"var/top_highly_variable\"\n", + " )\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4.3. Add visualizations to the `VitessceConfig` instance\n", + "\n", + "Now that we have added a dataset, we can configure visualizations. The `.add_view` method adds a view (i.e. visualization or controller component) to the configuration.\n", + "\n", + "The `Component` enum class (which we have imported as `cm` here) can be used to fill in the `component_type` parameter.\n", + "\n", + "For convenience, the `SCATTERPLOT` component type takes the extra `mapping` keyword argument, which specifies which embedding should be used for mapping cells to (x,y) points on the plot." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "scatterplot = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"UMAP\")\n", + "cell_sets = vc.add_view(cm.OBS_SETS, dataset=dataset)\n", + "genes = vc.add_view(cm.FEATURE_LIST, dataset=dataset)\n", + "heatmap = vc.add_view(cm.HEATMAP, dataset=dataset)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4.4. Define the visualization layout\n", + "\n", + "The `vc.layout(view_concat)` method allows us to specify how our views will be arranged in the layout grid in the widget. The `|` and `/` characters are magic syntax for `hconcat(v1, v2)` and `vconcat(v1, v2)`, respectively." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vc.layout((scatterplot | cell_sets) / (heatmap | genes));" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Create the widget\n", + "\n", + "The `vc.widget()` method returns the configured widget instance." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vw = vc.widget()\n", + "vw" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/notebooks/widget_brain_with_base_dir.ipynb b/docs/notebooks/widget_brain_with_base_dir.ipynb index 6bec5222..015a3e67 100644 --- a/docs/notebooks/widget_brain_with_base_dir.ipynb +++ b/docs/notebooks/widget_brain_with_base_dir.ipynb @@ -1,329 +1,329 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "nbsphinx": "hidden" - }, - "source": [ - "# Vitessce Widget Tutorial" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Configure relative to a base_dir" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 1. Import dependencies\n", - "\n", - "We need to import the classes and functions that we will be using from the corresponding packages." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "from os.path import join, isfile, isdir\n", - "from urllib.request import urlretrieve\n", - "from anndata import read_h5ad\n", - "\n", - "from vitessce import (\n", - " VitessceConfig,\n", - " Component as cm,\n", - " CoordinationType as ct,\n", - " AnnDataWrapper,\n", - " BASE_URL_PLACEHOLDER,\n", - ")\n", - "from vitessce.data_utils import (\n", - " optimize_adata,\n", - " VAR_CHUNK_SIZE,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 2. Define a `base_dir`\n", - "\n", - "We will define a `base_dir` inside which our data will live. We will provide this to `VitessceConfig` in order to construct a configuration that contains URL paths relative to this directory." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "BASE_DIR = \"data\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 3. Download the data\n", - "\n", - "For this example, we need to download a dataset from the COVID-19 Cell Atlas https://www.covid19cellatlas.org/index.healthy.html#habib17." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "adata_relative_filepath = \"habib17.processed.h5ad\" # Relative to BASE_DIR\n", - "adata_filepath = join(BASE_DIR, adata_relative_filepath)\n", - "if not isfile(adata_filepath):\n", - " os.makedirs(BASE_DIR, exist_ok=True)\n", - " urlretrieve('https://covid19.cog.sanger.ac.uk/habib17.processed.h5ad', adata_filepath)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 4. Load the data\n", - "\n", - "Note: this function may print a `FutureWarning`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "adata = read_h5ad(adata_filepath)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "tags": [] - }, - "source": [ - "## 4.1. Preprocess the Data For Visualization\n", - "\n", - "This dataset contains 25,587 genes. We prepare to visualize the top 50 highly variable genes for the heatmap as ranked by dispersion norm, although one may use any boolean array filter for the heatmap." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "top_dispersion = adata.var[\"dispersions_norm\"][\n", - " sorted(\n", - " range(len(adata.var[\"dispersions_norm\"])),\n", - " key=lambda k: adata.var[\"dispersions_norm\"][k],\n", - " )[-51:][0]\n", - "]\n", - "adata.var[\"top_highly_variable\"] = (\n", - " adata.var[\"dispersions_norm\"] > top_dispersion\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 4.2 Save the Data to Zarr store\n", - "\n", - "We want to convert the original `h5ad` file to a [Zarr](https://zarr.readthedocs.io/en/stable/) store, which Vitessce is able to load. We can use the `optimize_adata` function to ensure that all arrays and dataframe columns that we intend to use in our visualization are in the optimal format to be loaded by Vitessce. This function will cast arrays to numerical data types that take up less space (as long as the values allow). Note: unused arrays and columns (i.e., not specified in any of the parameters to `optimize_adata`) will not be copied into the new AnnData object." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "zarr_relative_filepath = \"habib17.processed.zarr\" # Relative to BASE_DIR\n", - "zarr_filepath = join(BASE_DIR, zarr_relative_filepath)\n", - "if not isdir(zarr_filepath):\n", - " adata = optimize_adata(\n", - " adata,\n", - " obs_cols=[\"CellType\"],\n", - " obsm_keys=[\"X_umap\"],\n", - " optimize_X=True,\n", - " var_cols=[\"top_highly_variable\"],\n", - " )\n", - " adata.write_zarr(zarr_filepath, chunks=[adata.shape[0], VAR_CHUNK_SIZE])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 5. Create the Vitessce widget configuration\n", - "\n", - "Vitessce needs to know which pieces of data we are interested in visualizing, the visualization types we would like to use, and how we want to coordinate (or link) the views." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 5.1. Instantiate a `VitessceConfig` object\n", - "\n", - "Use the `VitessceConfig` constructor to create an instance. In this case, we want to construct our configuration using local data that is relative to a particular directory, so we provide the `base_dir` parameter.\n", - "\n", - "Note: This `base_dir` parameter is optional. When it is omitted, local data paths are assumed to be relative to the current working directory." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vc = VitessceConfig(schema_version=\"1.0.15\", name='Habib et al', base_dir=BASE_DIR)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 5.2. Add a dataset to the `VitessceConfig` instance\n", - "\n", - "In Vitessce, a dataset is a container for one file per data type. The `.add_dataset(name)` method on the `vc` instance sets up and returns a new dataset instance.\n", - "\n", - "Then, we can call the dataset's `.add_object(wrapper_object)` method to attach a \"data wrapper\" instance to our new dataset. For example, the `AnnDataWrapper` helps to configure AnnData Zarr stores for use in the Vitessce configuration.\n", - "\n", - "Dataset wrapper classes may require additional parameters to resolve ambiguities. For instance, `AnnData` objects may store multiple clusterings or cell type annotation columns in the `adata.obs` dataframe. We can use the parameter `obs_set_paths` to tell Vitessce that certain columns of the `obs` dataframe correspond to cell type annotations or cell clusterings." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dataset = vc.add_dataset(name='Brain').add_object(AnnDataWrapper(\n", - " adata_path=zarr_relative_filepath, # Relative to BASE_DIR (because we specified base_dir in the VitessceConfig constructor)\n", - " obs_embedding_paths=[\"obsm/X_umap\"],\n", - " obs_embedding_names=[\"UMAP\"],\n", - " obs_set_paths=[\"obs/CellType\"],\n", - " obs_set_names=[\"Cell Type\"],\n", - " obs_feature_matrix_path=\"X\",\n", - " initial_feature_filter_path=\"var/top_highly_variable\"\n", - " )\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 5.3. Add visualizations to the `VitessceConfig` instance\n", - "\n", - "Now that we have added a dataset, we can configure visualizations. The `.add_view` method adds a view (i.e. visualization or controller component) to the configuration.\n", - "\n", - "The `Component` enum class (which we have imported as `cm` here) can be used to fill in the `component_type` parameter.\n", - "\n", - "For convenience, the `SCATTERPLOT` component type takes the extra `mapping` keyword argument, which specifies which embedding should be used for mapping cells to (x,y) points on the plot." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "scatterplot = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"UMAP\")\n", - "cell_sets = vc.add_view(cm.OBS_SETS, dataset=dataset)\n", - "genes = vc.add_view(cm.FEATURE_LIST, dataset=dataset)\n", - "heatmap = vc.add_view(cm.HEATMAP, dataset=dataset)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 5.4. Define the visualization layout\n", - "\n", - "The `vc.layout(view_concat)` method allows us to specify how our views will be arranged in the layout grid in the widget. The `|` and `/` characters are magic syntax for `hconcat(v1, v2)` and `vconcat(v1, v2)`, respectively." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vc.layout((scatterplot | cell_sets) / (heatmap | genes));" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 6. Create the widget\n", - "\n", - "The `vc.widget()` method returns the configured widget instance." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vw = vc.widget()\n", - "vw" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 7. Check the URLs in the configuration\n", - "\n", - "We can check that the data URLs in the configuration respected the specified `base_dir`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "config_dict = vc.to_dict(adata_path=BASE_URL_PLACEHOLDER)\n", - "config_dict" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.0" - } - }, - "nbformat": 4, - "nbformat_minor": 4 + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "nbsphinx": "hidden" + }, + "source": [ + "# Vitessce Widget Tutorial" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Configure relative to a base_dir" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Import dependencies\n", + "\n", + "We need to import the classes and functions that we will be using from the corresponding packages." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from os.path import join, isfile, isdir\n", + "from urllib.request import urlretrieve\n", + "from anndata import read_h5ad\n", + "\n", + "from vitessce import (\n", + " VitessceConfig,\n", + " Component as cm,\n", + " CoordinationType as ct,\n", + " AnnDataWrapper,\n", + " BASE_URL_PLACEHOLDER,\n", + ")\n", + "from vitessce.data_utils import (\n", + " optimize_adata,\n", + " VAR_CHUNK_SIZE,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Define a `base_dir`\n", + "\n", + "We will define a `base_dir` inside which our data will live. We will provide this to `VitessceConfig` in order to construct a configuration that contains URL paths relative to this directory." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "BASE_DIR = \"data\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Download the data\n", + "\n", + "For this example, we need to download a dataset from the COVID-19 Cell Atlas https://www.covid19cellatlas.org/index.healthy.html#habib17." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "adata_relative_filepath = \"habib17.processed.h5ad\" # Relative to BASE_DIR\n", + "adata_filepath = join(BASE_DIR, adata_relative_filepath)\n", + "if not isfile(adata_filepath):\n", + " os.makedirs(BASE_DIR, exist_ok=True)\n", + " urlretrieve('https://covid19.cog.sanger.ac.uk/habib17.processed.h5ad', adata_filepath)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Load the data\n", + "\n", + "Note: this function may print a `FutureWarning`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "adata = read_h5ad(adata_filepath)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "## 4.1. Preprocess the Data For Visualization\n", + "\n", + "This dataset contains 25,587 genes. We prepare to visualize the top 50 highly variable genes for the heatmap as ranked by dispersion norm, although one may use any boolean array filter for the heatmap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "top_dispersion = adata.var[\"dispersions_norm\"][\n", + " sorted(\n", + " range(len(adata.var[\"dispersions_norm\"])),\n", + " key=lambda k: adata.var[\"dispersions_norm\"][k],\n", + " )[-51:][0]\n", + "]\n", + "adata.var[\"top_highly_variable\"] = (\n", + " adata.var[\"dispersions_norm\"] > top_dispersion\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4.2 Save the Data to Zarr store\n", + "\n", + "We want to convert the original `h5ad` file to a [Zarr](https://zarr.readthedocs.io/en/stable/) store, which Vitessce is able to load. We can use the `optimize_adata` function to ensure that all arrays and dataframe columns that we intend to use in our visualization are in the optimal format to be loaded by Vitessce. This function will cast arrays to numerical data types that take up less space (as long as the values allow). Note: unused arrays and columns (i.e., not specified in any of the parameters to `optimize_adata`) will not be copied into the new AnnData object." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "zarr_relative_filepath = \"habib17.processed.zarr\" # Relative to BASE_DIR\n", + "zarr_filepath = join(BASE_DIR, zarr_relative_filepath)\n", + "if not isdir(zarr_filepath):\n", + " adata = optimize_adata(\n", + " adata,\n", + " obs_cols=[\"CellType\"],\n", + " obsm_keys=[\"X_umap\"],\n", + " optimize_X=True,\n", + " var_cols=[\"top_highly_variable\"],\n", + " )\n", + " adata.write_zarr(zarr_filepath, chunks=[adata.shape[0], VAR_CHUNK_SIZE])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Create the Vitessce widget configuration\n", + "\n", + "Vitessce needs to know which pieces of data we are interested in visualizing, the visualization types we would like to use, and how we want to coordinate (or link) the views." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 5.1. Instantiate a `VitessceConfig` object\n", + "\n", + "Use the `VitessceConfig` constructor to create an instance. In this case, we want to construct our configuration using local data that is relative to a particular directory, so we provide the `base_dir` parameter.\n", + "\n", + "Note: This `base_dir` parameter is optional. When it is omitted, local data paths are assumed to be relative to the current working directory." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vc = VitessceConfig(schema_version=\"1.0.15\", name='Habib et al', base_dir=BASE_DIR)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 5.2. Add a dataset to the `VitessceConfig` instance\n", + "\n", + "In Vitessce, a dataset is a container for one file per data type. The `.add_dataset(name)` method on the `vc` instance sets up and returns a new dataset instance.\n", + "\n", + "Then, we can call the dataset's `.add_object(wrapper_object)` method to attach a \"data wrapper\" instance to our new dataset. For example, the `AnnDataWrapper` helps to configure AnnData Zarr stores for use in the Vitessce configuration.\n", + "\n", + "Dataset wrapper classes may require additional parameters to resolve ambiguities. For instance, `AnnData` objects may store multiple clusterings or cell type annotation columns in the `adata.obs` dataframe. We can use the parameter `obs_set_paths` to tell Vitessce that certain columns of the `obs` dataframe correspond to cell type annotations or cell clusterings." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataset = vc.add_dataset(name='Brain').add_object(AnnDataWrapper(\n", + " adata_path=zarr_relative_filepath, # Relative to BASE_DIR (because we specified base_dir in the VitessceConfig constructor)\n", + " obs_embedding_paths=[\"obsm/X_umap\"],\n", + " obs_embedding_names=[\"UMAP\"],\n", + " obs_set_paths=[\"obs/CellType\"],\n", + " obs_set_names=[\"Cell Type\"],\n", + " obs_feature_matrix_path=\"X\",\n", + " initial_feature_filter_path=\"var/top_highly_variable\"\n", + " )\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 5.3. Add visualizations to the `VitessceConfig` instance\n", + "\n", + "Now that we have added a dataset, we can configure visualizations. The `.add_view` method adds a view (i.e. visualization or controller component) to the configuration.\n", + "\n", + "The `Component` enum class (which we have imported as `cm` here) can be used to fill in the `component_type` parameter.\n", + "\n", + "For convenience, the `SCATTERPLOT` component type takes the extra `mapping` keyword argument, which specifies which embedding should be used for mapping cells to (x,y) points on the plot." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "scatterplot = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"UMAP\")\n", + "cell_sets = vc.add_view(cm.OBS_SETS, dataset=dataset)\n", + "genes = vc.add_view(cm.FEATURE_LIST, dataset=dataset)\n", + "heatmap = vc.add_view(cm.HEATMAP, dataset=dataset)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 5.4. Define the visualization layout\n", + "\n", + "The `vc.layout(view_concat)` method allows us to specify how our views will be arranged in the layout grid in the widget. The `|` and `/` characters are magic syntax for `hconcat(v1, v2)` and `vconcat(v1, v2)`, respectively." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vc.layout((scatterplot | cell_sets) / (heatmap | genes));" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 6. Create the widget\n", + "\n", + "The `vc.widget()` method returns the configured widget instance." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vw = vc.widget()\n", + "vw" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 7. Check the URLs in the configuration\n", + "\n", + "We can check that the data URLs in the configuration respected the specified `base_dir`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "config_dict = vc.to_dict(base_url=BASE_URL_PLACEHOLDER)\n", + "config_dict" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 } diff --git a/docs/notebooks/widget_genomic_profiles.ipynb b/docs/notebooks/widget_genomic_profiles.ipynb index 4c33ffea..1794598c 100644 --- a/docs/notebooks/widget_genomic_profiles.ipynb +++ b/docs/notebooks/widget_genomic_profiles.ipynb @@ -1,220 +1,220 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "nbsphinx": "hidden" - }, - "source": [ - "# Vitessce Widget Tutorial" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Visualization of genomic profiles" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 1. Import dependencies\n", - "\n", - "We need to import the classes and functions that we will be using from the corresponding packages." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from vitessce import (\n", - " VitessceConfig,\n", - " ViewType as vt,\n", - " CoordinationType as ct,\n", - " AnnDataWrapper,\n", - " MultivecZarrWrapper,\n", - ")\n", - "from vitessce.data_utils import (\n", - " adata_to_multivec_zarr,\n", - ")\n", - "from os.path import join\n", - "from scipy.io import mmread\n", - "import pandas as pd\n", - "import numpy as np\n", - "from anndata import AnnData" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 2. Load the data\n", - "\n", - "In this step, we load the raw data that has been downloaded from the HuBMAP portal https://portal.hubmapconsortium.org/browse/dataset/210d118a14c8624b6bb9610a9062656e" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mtx = mmread(join('data', 'snapatac', 'filtered_cell_by_bin.mtx')).toarray()\n", - "barcodes_df = pd.read_csv(join('data', 'snapatac', 'barcodes.txt'), header=None)\n", - "bins_df = pd.read_csv(join('data', 'snapatac', 'bins.txt'), header=None, names=[\"interval\"])\n", - "clusters_df = pd.read_csv(join('data', 'snapatac', 'umap_coords_clusters.csv'), index_col=0)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 3. Convert the data to Vitessce-compatible formats\n", - "\n", - "Vitessce can load AnnData objects saved to Zarr formats efficiently." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# The genome assembly is GRCh38 but the chromosome names in the bin names do not start with the \"chr\" prefix.\n", - "# This is incompatible with the chromosome names from `negspy`, so we need to append the prefix.\n", - "bins_df[\"interval\"] = bins_df[\"interval\"].apply(lambda x: \"chr\" + x)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "obs = clusters_df[[\"cluster\"]]\n", - "obs[\"cluster\"] = obs[\"cluster\"].astype(str)\n", - "obsm = { \"X_umap\": clusters_df[[\"umap.1\", \"umap.2\"]].values }\n", - "adata = AnnData(X=mtx, obs=obs, var=bins_df, obsm=obsm)\n", - "adata" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "multivec_zarr_path = join(\"data\", \"HBM485.TBWH.322.multivec.zarr\")\n", - "adata_zarr_path = join(\"data\", \"HBM485.TBWH.322.adata.zarr\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Sort cluster IDs\n", - "cluster_ids = obs[\"cluster\"].unique().tolist()\n", - "cluster_ids.sort(key=int)\n", - "# Save genomic profiles to multivec-zarr format.\n", - "adata_to_multivec_zarr(adata, multivec_zarr_path, obs_set_col=\"cluster\", obs_set_name=\"Cluster\", obs_set_vals=cluster_ids)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Save anndata object to AnnData-Zarr format.\n", - "adata.write_zarr(adata_zarr_path)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "tags": [] - }, - "source": [ - "## 4. Make a Vitessce configuration\n", - "\n", - "We need to tell Vitessce about the data that we want to load and the visualization components that we want to include in the widget.\n", - "For this dataset, we want to add the `GENOMIC_PROFILES` component, which renders genome browser tracks with [HiGlass](http://higlass.io)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vc = VitessceConfig(schema_version=\"1.0.15\", name='HuBMAP snATAC-seq')\n", - "dataset = vc.add_dataset(name='HBM485.TBWH.322').add_object(MultivecZarrWrapper(\n", - " zarr_path=multivec_zarr_path\n", - ")).add_object(AnnDataWrapper(\n", - " adata_path=adata_zarr_path,\n", - " obs_embedding_paths=[\"obsm/X_umap\"],\n", - " obs_embedding_names=[\"UMAP\"],\n", - " obs_set_paths=[\"obs/cluster\"],\n", - " obs_set_names=[\"Cluster\"],\n", - "))\n", - "\n", - "genomic_profiles = vc.add_view(vt.GENOMIC_PROFILES, dataset=dataset)\n", - "scatter = vc.add_view(vt.SCATTERPLOT, dataset=dataset, mapping = \"UMAP\")\n", - "cell_sets = vc.add_view(vt.OBS_SETS, dataset=dataset)\n", - "\n", - "vc.layout(genomic_profiles / (scatter | cell_sets));" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 5. Create the widget" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vw = vc.widget(height=800)\n", - "vw" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} \ No newline at end of file + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "nbsphinx": "hidden" + }, + "source": [ + "# Vitessce Widget Tutorial" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Visualization of genomic profiles" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Import dependencies\n", + "\n", + "We need to import the classes and functions that we will be using from the corresponding packages." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from vitessce import (\n", + " VitessceConfig,\n", + " ViewType as vt,\n", + " CoordinationType as ct,\n", + " AnnDataWrapper,\n", + " MultivecZarrWrapper,\n", + ")\n", + "from vitessce.data_utils import (\n", + " adata_to_multivec_zarr,\n", + ")\n", + "from os.path import join\n", + "from scipy.io import mmread\n", + "import pandas as pd\n", + "import numpy as np\n", + "from anndata import AnnData" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Load the data\n", + "\n", + "In this step, we load the raw data that has been downloaded from the HuBMAP portal https://portal.hubmapconsortium.org/browse/dataset/210d118a14c8624b6bb9610a9062656e" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mtx = mmread(join('data', 'snapatac', 'filtered_cell_by_bin.mtx')).toarray()\n", + "barcodes_df = pd.read_csv(join('data', 'snapatac', 'barcodes.txt'), header=None)\n", + "bins_df = pd.read_csv(join('data', 'snapatac', 'bins.txt'), header=None, names=[\"interval\"])\n", + "clusters_df = pd.read_csv(join('data', 'snapatac', 'umap_coords_clusters.csv'), index_col=0)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Convert the data to Vitessce-compatible formats\n", + "\n", + "Vitessce can load AnnData objects saved to Zarr formats efficiently." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# The genome assembly is GRCh38 but the chromosome names in the bin names do not start with the \"chr\" prefix.\n", + "# This is incompatible with the chromosome names from `negspy`, so we need to append the prefix.\n", + "bins_df[\"interval\"] = bins_df[\"interval\"].apply(lambda x: \"chr\" + x)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "obs = clusters_df[[\"cluster\"]]\n", + "obs[\"cluster\"] = obs[\"cluster\"].astype(str)\n", + "obsm = { \"X_umap\": clusters_df[[\"umap.1\", \"umap.2\"]].values }\n", + "adata = AnnData(X=mtx, obs=obs, var=bins_df, obsm=obsm)\n", + "adata" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "multivec_zarr_path = join(\"data\", \"HBM485.TBWH.322.multivec.zarr\")\n", + "adata_zarr_path = join(\"data\", \"HBM485.TBWH.322.adata.zarr\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Sort cluster IDs\n", + "cluster_ids = obs[\"cluster\"].unique().tolist()\n", + "cluster_ids.sort(key=int)\n", + "# Save genomic profiles to multivec-zarr format.\n", + "adata_to_multivec_zarr(adata, multivec_zarr_path, obs_set_col=\"cluster\", obs_set_name=\"Cluster\", obs_set_vals=cluster_ids)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Save anndata object to AnnData-Zarr format.\n", + "adata.write_zarr(adata_zarr_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "## 4. Make a Vitessce configuration\n", + "\n", + "We need to tell Vitessce about the data that we want to load and the visualization components that we want to include in the widget.\n", + "For this dataset, we want to add the `GENOMIC_PROFILES` component, which renders genome browser tracks with [HiGlass](http://higlass.io)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vc = VitessceConfig(schema_version=\"1.0.15\", name='HuBMAP snATAC-seq')\n", + "dataset = vc.add_dataset(name='HBM485.TBWH.322').add_object(MultivecZarrWrapper(\n", + " zarr_path=multivec_zarr_path\n", + ")).add_object(AnnDataWrapper(\n", + " adata_path=adata_zarr_path,\n", + " obs_embedding_paths=[\"obsm/X_umap\"],\n", + " obs_embedding_names=[\"UMAP\"],\n", + " obs_set_paths=[\"obs/cluster\"],\n", + " obs_set_names=[\"Cluster\"],\n", + "))\n", + "\n", + "genomic_profiles = vc.add_view(vt.GENOMIC_PROFILES, dataset=dataset)\n", + "scatter = vc.add_view(vt.SCATTERPLOT, dataset=dataset, mapping = \"UMAP\")\n", + "cell_sets = vc.add_view(vt.OBS_SETS, dataset=dataset)\n", + "\n", + "vc.layout(genomic_profiles / (scatter | cell_sets));" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Create the widget" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vw = vc.widget(height=800)\n", + "vw" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/notebooks/widget_loom.ipynb b/docs/notebooks/widget_loom.ipynb index 7ac2d147..0c4f958b 100644 --- a/docs/notebooks/widget_loom.ipynb +++ b/docs/notebooks/widget_loom.ipynb @@ -1,219 +1,219 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "nbsphinx": "hidden" - }, - "source": [ - "# Vitessce Widget Tutorial" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Visualization of a Loom file" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 1. Import dependencies" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "from os.path import join, isfile, isdir\n", - "from urllib.request import urlretrieve\n", - "from anndata import read_loom\n", - "import numpy as np\n", - "\n", - "from vitessce import (\n", - " VitessceConfig,\n", - " Component as cm,\n", - " CoordinationType as ct,\n", - " AnnDataWrapper,\n", - ")\n", - "from vitessce.data_utils import (\n", - " optimize_adata,\n", - " to_diamond,\n", - " VAR_CHUNK_SIZE,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 2. Download data\n", - "\n", - "Download `osmFISH_SScortex_mouse_all_cells.loom` from http://loom.linnarssonlab.org/." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "loom_filepath = join(\"data\", \"osmFISH_SScortex_mouse_all_cells.loom\")\n", - "if not isfile(loom_filepath):\n", - " os.makedirs(\"data\", exist_ok=True)\n", - " urlretrieve('http://loom.linnarssonlab.org/clone/osmFISH/osmFISH_SScortex_mouse_all_cells.loom', loom_filepath)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 3. Open Loom file with AnnData's read_loom" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "adata = read_loom(loom_filepath, obsm_names={\"tSNE\": [\"_tSNE_1\", \"_tSNE_2\"], \"spatial\": [\"X\", \"Y\"]})" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Generate pseudo-segmentations as diamond-shaped polygons centered on the spatial coordinate of each cell, and store in `adata.obsm[\"segmentations\"]`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "num_cells = adata.obs.shape[0]\n", - "adata.obsm[\"segmentations\"] = np.zeros((num_cells, 4, 2))\n", - "radius = 100\n", - "for i in range(num_cells):\n", - " adata.obsm[\"segmentations\"][i, :, :] = to_diamond(adata.obsm['spatial'][i, 0], adata.obsm['spatial'][i, 1], radius)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Save the AnnData object to a Zarr store:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "zarr_filepath = join(\"data\", \"osmFISH_SScortex_mouse_all_cells.zarr\")\n", - "if not isdir(zarr_filepath) or True:\n", - " adata = optimize_adata(\n", - " adata,\n", - " obs_cols=[\"ClusterName\"],\n", - " obsm_keys=[\"tSNE\", \"spatial\", \"segmentations\"],\n", - " optimize_X=True,\n", - " )\n", - " adata.write_zarr(zarr_filepath, chunks=[adata.shape[0], VAR_CHUNK_SIZE])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 4. Configure Vitessce\n", - "\n", - "Create a Vitessce view config." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vc = VitessceConfig(schema_version=\"1.0.15\", name='Loom Example', description='osmFISH dataset of the mouse cortex including all cells')\n", - "w = AnnDataWrapper(adata_path=zarr_filepath, obs_set_paths=[\"obs/ClusterName\"], obs_set_names=[\"Clusters\"], obs_locations_path=\"obsm/spatial\", obs_segmentations_path=\"obsm/segmentations\", obs_embedding_paths=[\"obsm/tSNE\"])\n", - "dataset = vc.add_dataset(name='SScortex').add_object(w)\n", - "\n", - "tsne = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"tSNE\")\n", - "cell_sets = vc.add_view(cm.OBS_SETS, dataset=dataset)\n", - "spatial = vc.add_view(cm.SPATIAL, dataset=dataset)\n", - "\n", - "spatial_segmentation_layer_value = {\n", - " \"opacity\": 1,\n", - " \"radius\": 0,\n", - " \"visible\": True,\n", - " \"stroked\": False\n", - "}\n", - "\n", - "vc.link_views([spatial], [ct.SPATIAL_ZOOM, ct.SPATIAL_TARGET_X, ct.SPATIAL_TARGET_Y, ct.SPATIAL_SEGMENTATION_LAYER], [-6.43, 10417.69, 24885.55, spatial_segmentation_layer_value])\n", - "vc.layout(spatial | (tsne / cell_sets));" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 5. Render the widget" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "A widget can be created with the `.widget()` method on the config instance." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vw = vc.widget()\n", - "vw" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.0" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} \ No newline at end of file + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "nbsphinx": "hidden" + }, + "source": [ + "# Vitessce Widget Tutorial" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Visualization of a Loom file" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Import dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from os.path import join, isfile, isdir\n", + "from urllib.request import urlretrieve\n", + "from anndata import read_loom\n", + "import numpy as np\n", + "\n", + "from vitessce import (\n", + " VitessceConfig,\n", + " Component as cm,\n", + " CoordinationType as ct,\n", + " AnnDataWrapper,\n", + ")\n", + "from vitessce.data_utils import (\n", + " optimize_adata,\n", + " to_diamond,\n", + " VAR_CHUNK_SIZE,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Download data\n", + "\n", + "Download `osmFISH_SScortex_mouse_all_cells.loom` from http://loom.linnarssonlab.org/." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "loom_filepath = join(\"data\", \"osmFISH_SScortex_mouse_all_cells.loom\")\n", + "if not isfile(loom_filepath):\n", + " os.makedirs(\"data\", exist_ok=True)\n", + " urlretrieve('http://loom.linnarssonlab.org/clone/osmFISH/osmFISH_SScortex_mouse_all_cells.loom', loom_filepath)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Open Loom file with AnnData's read_loom" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "adata = read_loom(loom_filepath, obsm_names={\"tSNE\": [\"_tSNE_1\", \"_tSNE_2\"], \"spatial\": [\"X\", \"Y\"]})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Generate pseudo-segmentations as diamond-shaped polygons centered on the spatial coordinate of each cell, and store in `adata.obsm[\"segmentations\"]`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "num_cells = adata.obs.shape[0]\n", + "adata.obsm[\"segmentations\"] = np.zeros((num_cells, 4, 2))\n", + "radius = 100\n", + "for i in range(num_cells):\n", + " adata.obsm[\"segmentations\"][i, :, :] = to_diamond(adata.obsm['spatial'][i, 0], adata.obsm['spatial'][i, 1], radius)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Save the AnnData object to a Zarr store:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "zarr_filepath = join(\"data\", \"osmFISH_SScortex_mouse_all_cells.zarr\")\n", + "if not isdir(zarr_filepath) or True:\n", + " adata = optimize_adata(\n", + " adata,\n", + " obs_cols=[\"ClusterName\"],\n", + " obsm_keys=[\"tSNE\", \"spatial\", \"segmentations\"],\n", + " optimize_X=True,\n", + " )\n", + " adata.write_zarr(zarr_filepath, chunks=[adata.shape[0], VAR_CHUNK_SIZE])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Configure Vitessce\n", + "\n", + "Create a Vitessce view config." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vc = VitessceConfig(schema_version=\"1.0.15\", name='Loom Example', description='osmFISH dataset of the mouse cortex including all cells')\n", + "w = AnnDataWrapper(adata_path=zarr_filepath, obs_set_paths=[\"obs/ClusterName\"], obs_set_names=[\"Clusters\"], obs_locations_path=\"obsm/spatial\", obs_segmentations_path=\"obsm/segmentations\", obs_embedding_paths=[\"obsm/tSNE\"])\n", + "dataset = vc.add_dataset(name='SScortex').add_object(w)\n", + "\n", + "tsne = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"tSNE\")\n", + "cell_sets = vc.add_view(cm.OBS_SETS, dataset=dataset)\n", + "spatial = vc.add_view(cm.SPATIAL, dataset=dataset)\n", + "\n", + "spatial_segmentation_layer_value = {\n", + " \"opacity\": 1,\n", + " \"radius\": 0,\n", + " \"visible\": True,\n", + " \"stroked\": False\n", + "}\n", + "\n", + "vc.link_views([spatial], [ct.SPATIAL_ZOOM, ct.SPATIAL_TARGET_X, ct.SPATIAL_TARGET_Y, ct.SPATIAL_SEGMENTATION_LAYER], [-6.43, 10417.69, 24885.55, spatial_segmentation_layer_value])\n", + "vc.layout(spatial | (tsne / cell_sets));" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Render the widget" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A widget can be created with the `.widget()` method on the config instance." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vw = vc.widget()\n", + "vw" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/notebooks/widget_pbmc.ipynb b/docs/notebooks/widget_pbmc.ipynb index de2be821..f972949c 100644 --- a/docs/notebooks/widget_pbmc.ipynb +++ b/docs/notebooks/widget_pbmc.ipynb @@ -1,209 +1,209 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "nbsphinx": "hidden" - }, - "source": [ - "# Vitessce Widget Tutorial" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Visualization of 3k PBMC reference" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 1. Import dependencies\n", - "\n", - "We need to import the classes and functions that we will be using from the corresponding packages." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "from os.path import join, isfile, isdir\n", - "from urllib.request import urlretrieve\n", - "from anndata import read_h5ad\n", - "import scanpy as sc\n", - "\n", - "from vitessce import (\n", - " VitessceConfig,\n", - " Component as cm,\n", - " CoordinationType as ct,\n", - " AnnDataWrapper,\n", - ")\n", - "from vitessce.data_utils import (\n", - " optimize_adata,\n", - " VAR_CHUNK_SIZE,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 2. Download the dataset\n", - "\n", - "Download `pbmc3k_final.h5ad` from https://seurat.nygenome.org/pbmc3k_final.h5ad" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "adata_filepath = join(\"data\", \"pbmc3k_final.h5ad\")\n", - "if not isfile(adata_filepath):\n", - " os.makedirs(\"data\", exist_ok=True)\n", - " urlretrieve('https://seurat.nygenome.org/pbmc3k_final.h5ad', adata_filepath)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 3. Load the dataset\n", - "\n", - "Load the dataset using AnnData's `read_h5ad` function." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "adata = read_h5ad(adata_filepath)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 3.1 Save the AnnData object to Zarr" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "zarr_filepath = join(\"data\", \"pbmc3k_final.zarr\")\n", - "if not isdir(zarr_filepath):\n", - " adata = optimize_adata(\n", - " adata,\n", - " obs_cols=[\"leiden\"],\n", - " obsm_keys=[\"X_umap\", \"X_pca\"],\n", - " optimize_X=True,\n", - " )\n", - " adata.write_zarr(zarr_filepath, chunks=[adata.shape[0], VAR_CHUNK_SIZE])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 4. Create a Vitessce view config\n", - "\n", - "Define the data and views you would like to include in the widget." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vc = VitessceConfig(schema_version=\"1.0.15\", name='PBMC Reference')\n", - "dataset = vc.add_dataset(name='PBMC 3k').add_object(AnnDataWrapper(\n", - " adata_path=zarr_filepath,\n", - " obs_set_paths=[\"obs/leiden\"],\n", - " obs_set_names=[\"Leiden\"],\n", - " obs_embedding_paths=[\"obsm/X_umap\", \"obsm/X_pca\"],\n", - " obs_embedding_names=[\"UMAP\", \"PCA\"],\n", - " obs_feature_matrix_path=\"X\"\n", - "))\n", - "\n", - "umap = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"UMAP\")\n", - "pca = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"PCA\")\n", - "cell_sets = vc.add_view(cm.OBS_SETS, dataset=dataset)\n", - "genes = vc.add_view(cm.FEATURE_LIST, dataset=dataset)\n", - "heatmap = vc.add_view(cm.HEATMAP, dataset=dataset)\n", - "\n", - "vc.layout((umap / pca) | ((cell_sets | genes) / heatmap));" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 5. Create the Vitessce widget" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "A widget can be created with the `.widget()` method on the config instance. Here, the `proxy=True` parameter allows this widget to be used in a cloud notebook environment, such as Binder." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vw = vc.widget()\n", - "vw" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.0" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} \ No newline at end of file + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "nbsphinx": "hidden" + }, + "source": [ + "# Vitessce Widget Tutorial" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Visualization of 3k PBMC reference" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Import dependencies\n", + "\n", + "We need to import the classes and functions that we will be using from the corresponding packages." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from os.path import join, isfile, isdir\n", + "from urllib.request import urlretrieve\n", + "from anndata import read_h5ad\n", + "import scanpy as sc\n", + "\n", + "from vitessce import (\n", + " VitessceConfig,\n", + " Component as cm,\n", + " CoordinationType as ct,\n", + " AnnDataWrapper,\n", + ")\n", + "from vitessce.data_utils import (\n", + " optimize_adata,\n", + " VAR_CHUNK_SIZE,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Download the dataset\n", + "\n", + "Download `pbmc3k_final.h5ad` from https://seurat.nygenome.org/pbmc3k_final.h5ad" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "adata_filepath = join(\"data\", \"pbmc3k_final.h5ad\")\n", + "if not isfile(adata_filepath):\n", + " os.makedirs(\"data\", exist_ok=True)\n", + " urlretrieve('https://seurat.nygenome.org/pbmc3k_final.h5ad', adata_filepath)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Load the dataset\n", + "\n", + "Load the dataset using AnnData's `read_h5ad` function." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "adata = read_h5ad(adata_filepath)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3.1 Save the AnnData object to Zarr" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "zarr_filepath = join(\"data\", \"pbmc3k_final.zarr\")\n", + "if not isdir(zarr_filepath):\n", + " adata = optimize_adata(\n", + " adata,\n", + " obs_cols=[\"leiden\"],\n", + " obsm_keys=[\"X_umap\", \"X_pca\"],\n", + " optimize_X=True,\n", + " )\n", + " adata.write_zarr(zarr_filepath, chunks=[adata.shape[0], VAR_CHUNK_SIZE])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Create a Vitessce view config\n", + "\n", + "Define the data and views you would like to include in the widget." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vc = VitessceConfig(schema_version=\"1.0.15\", name='PBMC Reference')\n", + "dataset = vc.add_dataset(name='PBMC 3k').add_object(AnnDataWrapper(\n", + " adata_path=zarr_filepath,\n", + " obs_set_paths=[\"obs/leiden\"],\n", + " obs_set_names=[\"Leiden\"],\n", + " obs_embedding_paths=[\"obsm/X_umap\", \"obsm/X_pca\"],\n", + " obs_embedding_names=[\"UMAP\", \"PCA\"],\n", + " obs_feature_matrix_path=\"X\"\n", + "))\n", + "\n", + "umap = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"UMAP\")\n", + "pca = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"PCA\")\n", + "cell_sets = vc.add_view(cm.OBS_SETS, dataset=dataset)\n", + "genes = vc.add_view(cm.FEATURE_LIST, dataset=dataset)\n", + "heatmap = vc.add_view(cm.HEATMAP, dataset=dataset)\n", + "\n", + "vc.layout((umap / pca) | ((cell_sets | genes) / heatmap));" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Create the Vitessce widget" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A widget can be created with the `.widget()` method on the config instance. Here, the `proxy=True` parameter allows this widget to be used in a cloud notebook environment, such as Binder." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vw = vc.widget()\n", + "vw" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/notebooks/widget_pbmc_remote.ipynb b/docs/notebooks/widget_pbmc_remote.ipynb index 586e8f18..d238dca7 100644 --- a/docs/notebooks/widget_pbmc_remote.ipynb +++ b/docs/notebooks/widget_pbmc_remote.ipynb @@ -1,153 +1,153 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "nbsphinx": "hidden" - }, - "source": [ - "# Vitessce Widget Tutorial" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Visualization of 3k PBMC reference from Remote Zarr Store" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 1. Import dependencies\n", - "\n", - "We need to import the classes and functions that we will be using from the corresponding packages." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "from os.path import join\n", - "from urllib.request import urlretrieve\n", - "from anndata import read_h5ad\n", - "import scanpy as sc\n", - "\n", - "from vitessce import (\n", - " VitessceConfig,\n", - " Component as cm,\n", - " CoordinationType as ct,\n", - " AnnDataWrapper,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 2. Set the URL for the Remote Dataset\n", - "\n", - "For this example, we already have uploaded the `pbmc3k` dataset as a zarr store from the [scanpy docs](https://scanpy.readthedocs.io/en/stable/api/scanpy.datasets.pbmc3k.html) to the cloud." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "url = 'https://storage.googleapis.com/vitessce-demo-data/anndata-test/pbmc3k_processed.zarr/'" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 3. Create a Vitessce view config\n", - "\n", - "Define the data and views you would like to include in the widget." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vc = VitessceConfig(schema_version=\"1.0.15\", name='PBMC Reference')\n", - "dataset = vc.add_dataset(name='PBMC 3k').add_object(AnnDataWrapper(adata_path=url, obs_set_paths=[\"obs/louvain\"], obs_set_names=[\"Louvain\"], obs_embedding_paths=[\"obsm/X_umap\", \"obsm/X_pca\"], obs_embedding_names=[\"UMAP\", \"PCA\"], obs_feature_matrix_path=\"X\"))\n", - "\n", - "umap = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"UMAP\")\n", - "pca = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"PCA\")\n", - "cell_sets = vc.add_view(cm.OBS_SETS, dataset=dataset)\n", - "genes = vc.add_view(cm.FEATURE_LIST, dataset=dataset)\n", - "heatmap = vc.add_view(cm.HEATMAP, dataset=dataset)\n", - "\n", - "vc.layout((umap / pca) | ((cell_sets | genes) / heatmap));" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 4. Create the Vitessce widget" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "A widget can be created with the `.widget()` method on the config instance. Here, the `proxy=True` parameter allows this widget to be used in a cloud notebook environment, such as Binder." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vw = vc.widget()\n", - "vw" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.0" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} \ No newline at end of file + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "nbsphinx": "hidden" + }, + "source": [ + "# Vitessce Widget Tutorial" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Visualization of 3k PBMC reference from Remote Zarr Store" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Import dependencies\n", + "\n", + "We need to import the classes and functions that we will be using from the corresponding packages." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from os.path import join\n", + "from urllib.request import urlretrieve\n", + "from anndata import read_h5ad\n", + "import scanpy as sc\n", + "\n", + "from vitessce import (\n", + " VitessceConfig,\n", + " Component as cm,\n", + " CoordinationType as ct,\n", + " AnnDataWrapper,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Set the URL for the Remote Dataset\n", + "\n", + "For this example, we already have uploaded the `pbmc3k` dataset as a zarr store from the [scanpy docs](https://scanpy.readthedocs.io/en/stable/api/scanpy.datasets.pbmc3k.html) to the cloud." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "url = 'https://storage.googleapis.com/vitessce-demo-data/anndata-test/pbmc3k_processed.zarr/'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Create a Vitessce view config\n", + "\n", + "Define the data and views you would like to include in the widget." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vc = VitessceConfig(schema_version=\"1.0.15\", name='PBMC Reference')\n", + "dataset = vc.add_dataset(name='PBMC 3k').add_object(AnnDataWrapper(adata_url=url, obs_set_paths=[\"obs/louvain\"], obs_set_names=[\"Louvain\"], obs_embedding_paths=[\"obsm/X_umap\", \"obsm/X_pca\"], obs_embedding_names=[\"UMAP\", \"PCA\"], obs_feature_matrix_path=\"X\"))\n", + "\n", + "umap = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"UMAP\")\n", + "pca = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"PCA\")\n", + "cell_sets = vc.add_view(cm.OBS_SETS, dataset=dataset)\n", + "genes = vc.add_view(cm.FEATURE_LIST, dataset=dataset)\n", + "heatmap = vc.add_view(cm.HEATMAP, dataset=dataset)\n", + "\n", + "vc.layout((umap / pca) | ((cell_sets | genes) / heatmap));" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Create the Vitessce widget" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A widget can be created with the `.widget()` method on the config instance. Here, the `proxy=True` parameter allows this widget to be used in a cloud notebook environment, such as Binder." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vw = vc.widget()\n", + "vw" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/notebooks/widget_shortcut.ipynb b/docs/notebooks/widget_shortcut.ipynb index 911f1486..75f758d3 100644 --- a/docs/notebooks/widget_shortcut.ipynb +++ b/docs/notebooks/widget_shortcut.ipynb @@ -1,176 +1,176 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "nbsphinx": "hidden" - }, - "source": [ - "# Vitessce Widget Tutorial" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# The from_object shortcut" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 1. Import dependencies\n", - "\n", - "Import the functions and classes that we will be using." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "from os.path import join\n", - "from urllib.request import urlretrieve\n", - "from anndata import read_h5ad\n", - "import scanpy as sc\n", - "\n", - "from vitessce import (\n", - " VitessceConfig,\n", - " Component as cm,\n", - " CoordinationType as ct,\n", - " AnnDataWrapper,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 2. Download the data\n", - "\n", - "For this example, we need to download a dataset from the COVID-19 Cell Atlas https://www.covid19cellatlas.org/index.healthy.html#habib17." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "os.makedirs(\"data\", exist_ok=True)\n", - "adata_filepath = join(\"data\", \"habib17.processed.h5ad\")\n", - "urlretrieve('https://covid19.cog.sanger.ac.uk/habib17.processed.h5ad', adata_filepath)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 3. Load the data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "adata = read_h5ad(join(\"data\", \"habib17.processed.h5ad\"))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 3.1. Preprocess the Data For Visualization" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "top_dispersion = adata.var[\"dispersions_norm\"][\n", - " sorted(\n", - " range(len(adata.var[\"dispersions_norm\"])),\n", - " key=lambda k: adata.var[\"dispersions_norm\"][k],\n", - " )[-51:][0]\n", - "]\n", - "adata.var[\"top_highly_variable\"] = (\n", - " adata.var[\"dispersions_norm\"] > top_dispersion\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "With one line of code, you may create a Vitessce widget based on an automatically inferred configuration." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vw = VitessceConfig.from_object(AnnDataWrapper(\n", - " adata,\n", - " obs_embedding_paths=[\"obsm/X_umap\"],\n", - " obs_embedding_names=[\"UMAP\"],\n", - " obs_set_paths=[\"obs/CellType\"],\n", - " obs_set_names=[\"Cell Type\"],\n", - " obs_feature_matrix_path=\"X\",\n", - " feature_filter_path=\"var/top_highly_variable\"\n", - "), schema_version=\"1.0.15\").widget(height=800)\n", - "vw" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.0" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} \ No newline at end of file + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "nbsphinx": "hidden" + }, + "source": [ + "# Vitessce Widget Tutorial" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# The from_object shortcut" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Import dependencies\n", + "\n", + "Import the functions and classes that we will be using." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from os.path import join\n", + "from urllib.request import urlretrieve\n", + "from anndata import read_h5ad\n", + "import scanpy as sc\n", + "\n", + "from vitessce import (\n", + " VitessceConfig,\n", + " Component as cm,\n", + " CoordinationType as ct,\n", + " AnnDataWrapper,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Download the data\n", + "\n", + "For this example, we need to download a dataset from the COVID-19 Cell Atlas https://www.covid19cellatlas.org/index.healthy.html#habib17." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "os.makedirs(\"data\", exist_ok=True)\n", + "adata_filepath = join(\"data\", \"habib17.processed.h5ad\")\n", + "urlretrieve('https://covid19.cog.sanger.ac.uk/habib17.processed.h5ad', adata_filepath)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Load the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "adata = read_h5ad(join(\"data\", \"habib17.processed.h5ad\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3.1. Preprocess the Data For Visualization" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "top_dispersion = adata.var[\"dispersions_norm\"][\n", + " sorted(\n", + " range(len(adata.var[\"dispersions_norm\"])),\n", + " key=lambda k: adata.var[\"dispersions_norm\"][k],\n", + " )[-51:][0]\n", + "]\n", + "adata.var[\"top_highly_variable\"] = (\n", + " adata.var[\"dispersions_norm\"] > top_dispersion\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "With one line of code, you may create a Vitessce widget based on an automatically inferred configuration." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vw = VitessceConfig.from_object(AnnDataWrapper(\n", + " adata,\n", + " obs_embedding_paths=[\"obsm/X_umap\"],\n", + " obs_embedding_names=[\"UMAP\"],\n", + " obs_set_paths=[\"obs/CellType\"],\n", + " obs_set_names=[\"Cell Type\"],\n", + " obs_feature_matrix_path=\"X\",\n", + " feature_filter_path=\"var/top_highly_variable\"\n", + "), schema_version=\"1.0.15\").widget(height=800)\n", + "vw" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} From 8620d1868abc740738311a32dcba4f9d075ddd5e Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Thu, 16 May 2024 15:58:10 +0200 Subject: [PATCH 7/9] (chore): fix last final args --- vitessce/wrappers.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/vitessce/wrappers.py b/vitessce/wrappers.py index 5d70f08a..9ef7e17e 100644 --- a/vitessce/wrappers.py +++ b/vitessce/wrappers.py @@ -938,14 +938,14 @@ def image_file_def_creator(base_url): class AnnDataWrapper(AbstractWrapper): - def __init__(self, adata_path=None, adata_url=None, adata_store=None, obs_feature_matrix_path=None, feature_filter_path=None, initial_feature_filter_path=None, obs_set_paths=None, obs_set_names=None, obs_locations_path=None, obs_segmentations_path=None, obs_embedding_paths=None, obs_embedding_names=None, obs_embedding_dims=None, obs_spots_path=None, obs_points_path=None, request_init=None, feature_labels_path=None, obs_labels_path=None, convert_to_dense=True, coordination_values=None, obs_labels_paths=None, obs_labels_names=None, **kwargs): + def __init__(self, adata_path=None, adata_url=None, adata_store=None, obs_feature_matrix_path=None, feature_filter_path=None, initial_feature_filter_path=None, obs_set_paths=None, obs_set_names=None, obs_locations_path=None, obs_segmentations_path=None, obs_embedding_paths=None, obs_embedding_names=None, obs_embedding_dims=None, obs_spots_path=None, obs_points_path=None, feature_labels_path=None, obs_labels_path=None, convert_to_dense=True, coordination_values=None, obs_labels_paths=None, obs_labels_names=None, **kwargs): """ Wrap an AnnData object by creating an instance of the ``AnnDataWrapper`` class. - :param str base_path: A path to an AnnData object written to a Zarr store containing single-cell experiment data. - :param str base_url: A remote url pointing to a zarr-backed AnnData store. - :param base_store: A path to pass to zarr.FSStore, or an existing store instance. - :type base_store: str or zarr.Storage + :param str adata_path: A path to an AnnData object written to a Zarr store containing single-cell experiment data. + :param str adata_url: A remote url pointing to a zarr-backed AnnData store. + :param adata_store: A path to pass to zarr.FSStore, or an existing store instance. + :type adata_store: str or zarr.Storage :param str obs_feature_matrix_path: Location of the expression (cell x gene) matrix, like `X` or `obsm/highly_variable_genes_subset` :param str feature_filter_path: A string like `var/highly_variable` used in conjunction with `obs_feature_matrix_path` if obs_feature_matrix_path points to a subset of `X` of the full `var` list. :param str initial_feature_filter_path: A string like `var/highly_variable` used in conjunction with `obs_feature_matrix_path` if obs_feature_matrix_path points to a subset of `X` of the full `var` list. @@ -958,7 +958,6 @@ def __init__(self, adata_path=None, adata_url=None, adata_store=None, obs_featur :param list[str] obs_embedding_dims: Dimensions along which to get data for the scatterplot, like `[[0, 1], [4, 5]]` where `[0, 1]` is just the normal x and y but `[4, 5]` could be comparing the third and fourth principal components, for example. :param str obs_spots_path: Column name in `obsm` that contains centroid coordinates for displaying spots in the spatial viewer :param str obs_points_path: Column name in `obsm` that contains centroid coordinates for displaying points in the spatial viewer - :param dict request_init: options to be passed along with every fetch request from the browser, like `{ "header": { "Authorization": "Bearer dsfjalsdfa1431" } }` :param str feature_labels_path: The name of a column containing feature labels (e.g., alternate gene symbols), instead of the default index in `var` of the AnnData store. :param str obs_labels_path: (DEPRECATED) The name of a column containing observation labels (e.g., alternate cell IDs), instead of the default index in `obs` of the AnnData store. Use `obs_labels_paths` and `obs_labels_names` instead. This arg will be removed in a future release. :param list[str] obs_labels_paths: The names of columns containing observation labels (e.g., alternate cell IDs), instead of the default index in `obs` of the AnnData store. @@ -977,10 +976,10 @@ def __init__(self, adata_path=None, adata_url=None, adata_store=None, obs_featur num_inputs = sum([1 for x in [adata_path, adata_url, adata_store] if x is not None]) if num_inputs > 1: raise ValueError( - "Expected only one of base_path, base_url, or base_store to be provided") + "Expected only one of adataa_path, adata_url, or adata_store to be provided") if num_inputs == 0: raise ValueError( - "Expected one of base_path, base_url, or base_store to be provided") + "Expected one of adataa_path, adata_url, or adata_store to be provided") if adata_path is not None: self.is_remote = False From 08bd8d7017de10eac6c93c9b88fe53415581d3f1 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Thu, 16 May 2024 16:14:33 +0200 Subject: [PATCH 8/9] (fix): name --- vitessce/wrappers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vitessce/wrappers.py b/vitessce/wrappers.py index 9ef7e17e..e6bd6504 100644 --- a/vitessce/wrappers.py +++ b/vitessce/wrappers.py @@ -976,10 +976,10 @@ def __init__(self, adata_path=None, adata_url=None, adata_store=None, obs_featur num_inputs = sum([1 for x in [adata_path, adata_url, adata_store] if x is not None]) if num_inputs > 1: raise ValueError( - "Expected only one of adataa_path, adata_url, or adata_store to be provided") + "Expected only one of adata_path, adata_url, or adata_store to be provided") if num_inputs == 0: raise ValueError( - "Expected one of adataa_path, adata_url, or adata_store to be provided") + "Expected one of adata_path, adata_url, or adata_store to be provided") if adata_path is not None: self.is_remote = False From ed5c494c2cf808925487d22d51f408ce7e3fe9ec Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Thu, 16 May 2024 16:16:49 +0200 Subject: [PATCH 9/9] (fix): remove `request_init` completely and fix `setup.cfg` --- setup.cfg | 15 ++++++++++----- vitessce/wrappers.py | 1 - 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/setup.cfg b/setup.cfg index b20c7454..7ddeb2c0 100644 --- a/setup.cfg +++ b/setup.cfg @@ -7,11 +7,16 @@ per-file-ignores = vitessce/__init__.py: F401 vitessce/data_utils/__init__.py: F401 ignore = - E501, # Ignore line too long - W605, # Ignore invalid escape sequence '\*' - W503, # Ignore line break before binary operator: Skim down the left edge to understand intent. - E127 # Ignore continuation line over-indented for visual indent - E128 # Ignore continuation line under-indented for visual indent + # Ignore line too long + E501, + # Ignore invalid escape sequence '\*' + W605, + # Ignore line break before binary operator: Skim down the left edge to understand intent. + W503, + # Ignore continuation line over-indented for visual indent + E127 + # Ignore continuation line under-indented for visual indent + E128 exclude = ./js/node_modules/, ./docs/notebooks/.ipynb_checkpoints/, diff --git a/vitessce/wrappers.py b/vitessce/wrappers.py index e6bd6504..e579e36b 100644 --- a/vitessce/wrappers.py +++ b/vitessce/wrappers.py @@ -1008,7 +1008,6 @@ def __init__(self, adata_path=None, adata_url=None, adata_store=None, obs_featur self._mappings_obsm_dims = obs_embedding_dims self._spatial_spots_obsm = obs_spots_path self._spatial_points_obsm = obs_points_path - self._request_init = request_init self._feature_labels = feature_labels_path # Support legacy provision of single obs labels path if (obs_labels_path is not None):