From 56bd44b2c108d38b10de6cc2feab9bcb664ea7d1 Mon Sep 17 00:00:00 2001 From: Prathap Sridharan Date: Tue, 28 May 2024 12:39:52 -0700 Subject: [PATCH 01/29] Add setup instructions to integrate soma spatial --- api/python/cellxgene_census/pyproject.toml | 3 +- .../SPATIAL-README.md | 5 + tools/cellxgene_census_builder/pyproject.toml | 9 +- .../spatial_dataset_ingest.ipynb | 123 ++++++++++++++++++ 4 files changed, 136 insertions(+), 4 deletions(-) create mode 100644 tools/cellxgene_census_builder/SPATIAL-README.md create mode 100644 tools/cellxgene_census_builder/spatial_dev_tools/spatial_dataset_ingest.ipynb diff --git a/api/python/cellxgene_census/pyproject.toml b/api/python/cellxgene_census/pyproject.toml index 1ec0b29e1..d42e819ba 100644 --- a/api/python/cellxgene_census/pyproject.toml +++ b/api/python/cellxgene_census/pyproject.toml @@ -31,7 +31,8 @@ dependencies= [ # NOTE: the tiledbsoma version must be >= to the version used in the Census builder, to # ensure that the assets are readable (tiledbsoma supports backward compatible reading). # Make sure this version does not fall behind the builder's tiledbsoma version. - "tiledbsoma~=1.11.4", + # TODO (spatial): Pin tiledbsoma dependency to an actual released version after tiledbsoma spatial code has been released + "tiledbsoma @ git+https://github.com/single-cell-data/TileDB-SOMA.git@6569fc0#egg=tiledbsoma&subdirectory=apis/python/", "anndata", "numpy>=1.21,<2.0", "requests", diff --git a/tools/cellxgene_census_builder/SPATIAL-README.md b/tools/cellxgene_census_builder/SPATIAL-README.md new file mode 100644 index 000000000..19224eeaf --- /dev/null +++ b/tools/cellxgene_census_builder/SPATIAL-README.md @@ -0,0 +1,5 @@ +## Development Environment Setup and Run + +- Install `cellxgene_census` package [from source](../../api/python/cellxgene_census/README.md) +- `pip install -e tools/cellxgene_census_builder` +- [Dev tools for spatial](./spatial_dev_tools/) contains scripts and notebooks to aid development and testing diff --git a/tools/cellxgene_census_builder/pyproject.toml b/tools/cellxgene_census_builder/pyproject.toml index 5f684c238..5a2d018d1 100644 --- a/tools/cellxgene_census_builder/pyproject.toml +++ b/tools/cellxgene_census_builder/pyproject.toml @@ -25,7 +25,8 @@ classifiers = [ ] dependencies= [ "typing_extensions==4.10.0", - "pyarrow==15.0.2", + # TODO (spatial): This was unpinned to have the builder work on MacOS. Pin it when it is time to release or MacOS issue is fixed + "pyarrow", "pandas[performance]==2.2.1", "anndata==0.10.6", "numpy==1.26.4", @@ -34,8 +35,10 @@ dependencies= [ # recent cellxgene-census _readers_ are able to read the results of a Census build (writer). # The compatibility matrix is defined here: # https://github.com/TileDB-Inc/TileDB/blob/dev/format_spec/FORMAT_SPEC.md - "tiledbsoma==1.9.3", - "cellxgene-census==1.12.0", + # TODO (spatial): Pin tiledbsoma dependency to an actual released version after tiledbsoma spatial code has been released + "tiledbsoma @ git+https://github.com/single-cell-data/TileDB-SOMA.git@6569fc0#egg=tiledbsoma&subdirectory=apis/python/", + # TODO (spatial): Uncomment line below to install "cellxgene-census" at an appropriate version when tiledbsoma spatial code has been released + # "cellxgene-census==1.12.0", "cellxgene-ontology-guide==0.6.1", "scipy==1.12.0", "fsspec[http]==2024.3.1", diff --git a/tools/cellxgene_census_builder/spatial_dev_tools/spatial_dataset_ingest.ipynb b/tools/cellxgene_census_builder/spatial_dev_tools/spatial_dataset_ingest.ipynb new file mode 100644 index 000000000..256b29be4 --- /dev/null +++ b/tools/cellxgene_census_builder/spatial_dev_tools/spatial_dataset_ingest.ipynb @@ -0,0 +1,123 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "8502ec5c-26ac-423e-89f1-d624b91e384c", + "metadata": {}, + "outputs": [], + "source": [ + "import tiledbsoma\n", + "from tiledbsoma.experimental.ingest import from_cxg_spatial_h5ad" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1424f61e-f47a-485a-9765-7598f8e469fa", + "metadata": {}, + "outputs": [], + "source": [ + "from_cxg_spatial_h5ad(\n", + " input_h5ad_path=\"/Users/psridharan/Downloads/c63d5cb4-1046-4948-a188-e6af50ef90f4.h5ad\",\n", + " experiment_uri=\"soma-spatial\",\n", + " measurement_name=\"RNA\",\n", + " scene_name=\"c63d5cb4-1046-4948-a188-e6af50ef90f4\",\n", + " uns_keys=[],\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7919de5f-7709-46c1-b2d0-c4288a880a76", + "metadata": {}, + "outputs": [], + "source": [ + "sp = tiledbsoma.open(\"soma-spatial\")\n", + "sp" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7f08df07-2027-494f-b383-b9e739a614fd", + "metadata": {}, + "outputs": [], + "source": [ + "sp.spatial[\"c63d5cb4-1046-4948-a188-e6af50ef90f4\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "35ba9213-37f1-40b7-a922-f31e75258240", + "metadata": {}, + "outputs": [], + "source": [ + "sp.spatial[\"c63d5cb4-1046-4948-a188-e6af50ef90f4\"][\"img\"][\"hires\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a792279-b560-4246-b853-8735f85e2bb3", + "metadata": {}, + "outputs": [], + "source": [ + "hires_ndarray = sp.spatial[\"c63d5cb4-1046-4948-a188-e6af50ef90f4\"][\"img\"][\"hires\"].read()\n", + "hires_ndarray.to_numpy()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "616715c4-0178-4f27-831f-63d6c024cf1a", + "metadata": {}, + "outputs": [], + "source": [ + "sp.spatial[\"c63d5cb4-1046-4948-a188-e6af50ef90f4\"][\"obsl\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f596b70a-528b-43c1-aa75-429d13cc164b", + "metadata": {}, + "outputs": [], + "source": [ + "obsl_df = sp.spatial[\"c63d5cb4-1046-4948-a188-e6af50ef90f4\"][\"obsl\"]\n", + "obsl_df.read().concat().to_pandas()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3c144a87-72ba-4924-b87b-e4552b00a9ae", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 76c2a8a44cd12c245676d700d241873ab4db9463 Mon Sep 17 00:00:00 2001 From: Prathap Sridharan Date: Tue, 28 May 2024 14:30:18 -0700 Subject: [PATCH 02/29] Install `git` in census builder docker container --- tools/cellxgene_census_builder/Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/cellxgene_census_builder/Dockerfile b/tools/cellxgene_census_builder/Dockerfile index a5b7cd40c..0d586a69f 100644 --- a/tools/cellxgene_census_builder/Dockerfile +++ b/tools/cellxgene_census_builder/Dockerfile @@ -6,12 +6,14 @@ ARG COMMIT_SHA ENV COMMIT_SHA=${COMMIT_SHA} # Ubuntu 22 contains only the python3.11 RC as of 2023-12-21, so use deadsnakes +# TODO (spatial): `git` is added to this dockerfile to be able to install python packages from github. Remove when it is not needed. RUN apt update && \ apt install -y software-properties-common && \ add-apt-repository -y ppa:deadsnakes/ppa && \ apt update && \ apt -y full-upgrade && \ apt -y install python3.11 python3.11-venv python3-pip awscli && \ + apt -y install git && \ apt-get clean # set python3.11 as default From a38c2154d5e9224440a4905c90bcbea2692b4b22 Mon Sep 17 00:00:00 2001 From: Prathap Sridharan Date: Tue, 28 May 2024 14:59:33 -0700 Subject: [PATCH 03/29] Add back pyarrow pin on builder for testing --- tools/cellxgene_census_builder/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/cellxgene_census_builder/pyproject.toml b/tools/cellxgene_census_builder/pyproject.toml index 5a2d018d1..7db1cff1f 100644 --- a/tools/cellxgene_census_builder/pyproject.toml +++ b/tools/cellxgene_census_builder/pyproject.toml @@ -26,7 +26,7 @@ classifiers = [ dependencies= [ "typing_extensions==4.10.0", # TODO (spatial): This was unpinned to have the builder work on MacOS. Pin it when it is time to release or MacOS issue is fixed - "pyarrow", + "pyarrow==15.0.2", "pandas[performance]==2.2.1", "anndata==0.10.6", "numpy==1.26.4", From 8ae84af247429210c3d45256c6f5ac8f198003c8 Mon Sep 17 00:00:00 2001 From: Prathap Sridharan Date: Tue, 28 May 2024 16:24:24 -0700 Subject: [PATCH 04/29] Modify builder pins to test builder unit tests on GH --- tools/cellxgene_census_builder/pyproject.toml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/cellxgene_census_builder/pyproject.toml b/tools/cellxgene_census_builder/pyproject.toml index 7db1cff1f..85945d5a2 100644 --- a/tools/cellxgene_census_builder/pyproject.toml +++ b/tools/cellxgene_census_builder/pyproject.toml @@ -36,9 +36,10 @@ dependencies= [ # The compatibility matrix is defined here: # https://github.com/TileDB-Inc/TileDB/blob/dev/format_spec/FORMAT_SPEC.md # TODO (spatial): Pin tiledbsoma dependency to an actual released version after tiledbsoma spatial code has been released - "tiledbsoma @ git+https://github.com/single-cell-data/TileDB-SOMA.git@6569fc0#egg=tiledbsoma&subdirectory=apis/python/", + # "tiledbsoma @ git+https://github.com/single-cell-data/TileDB-SOMA.git@6569fc0#egg=tiledbsoma&subdirectory=apis/python/", + "tiledbsoma==1.9.3", # TODO (spatial): Uncomment line below to install "cellxgene-census" at an appropriate version when tiledbsoma spatial code has been released - # "cellxgene-census==1.12.0", + "cellxgene-census==1.12.0", "cellxgene-ontology-guide==0.6.1", "scipy==1.12.0", "fsspec[http]==2024.3.1", From b3301831adc22029fb0cf860447231e85ef9252d Mon Sep 17 00:00:00 2001 From: Prathap Sridharan Date: Tue, 28 May 2024 17:01:00 -0700 Subject: [PATCH 05/29] Remove cellxgene_census package dependency for testing --- tools/cellxgene_census_builder/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/cellxgene_census_builder/pyproject.toml b/tools/cellxgene_census_builder/pyproject.toml index 85945d5a2..e5b4e8ad6 100644 --- a/tools/cellxgene_census_builder/pyproject.toml +++ b/tools/cellxgene_census_builder/pyproject.toml @@ -39,7 +39,7 @@ dependencies= [ # "tiledbsoma @ git+https://github.com/single-cell-data/TileDB-SOMA.git@6569fc0#egg=tiledbsoma&subdirectory=apis/python/", "tiledbsoma==1.9.3", # TODO (spatial): Uncomment line below to install "cellxgene-census" at an appropriate version when tiledbsoma spatial code has been released - "cellxgene-census==1.12.0", + # "cellxgene-census==1.12.0", "cellxgene-ontology-guide==0.6.1", "scipy==1.12.0", "fsspec[http]==2024.3.1", From 6655196fae116d08d213f2efa545f891c27270f4 Mon Sep 17 00:00:00 2001 From: Prathap Sridharan Date: Tue, 28 May 2024 17:14:52 -0700 Subject: [PATCH 06/29] Pin to tiledbsoma commit to test builder unit tests --- tools/cellxgene_census_builder/pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/cellxgene_census_builder/pyproject.toml b/tools/cellxgene_census_builder/pyproject.toml index e5b4e8ad6..05c3200e9 100644 --- a/tools/cellxgene_census_builder/pyproject.toml +++ b/tools/cellxgene_census_builder/pyproject.toml @@ -36,8 +36,8 @@ dependencies= [ # The compatibility matrix is defined here: # https://github.com/TileDB-Inc/TileDB/blob/dev/format_spec/FORMAT_SPEC.md # TODO (spatial): Pin tiledbsoma dependency to an actual released version after tiledbsoma spatial code has been released - # "tiledbsoma @ git+https://github.com/single-cell-data/TileDB-SOMA.git@6569fc0#egg=tiledbsoma&subdirectory=apis/python/", - "tiledbsoma==1.9.3", + "tiledbsoma @ git+https://github.com/single-cell-data/TileDB-SOMA.git@7c456b6#egg=tiledbsoma&subdirectory=apis/python/", + # "tiledbsoma==1.9.3", # TODO (spatial): Uncomment line below to install "cellxgene-census" at an appropriate version when tiledbsoma spatial code has been released # "cellxgene-census==1.12.0", "cellxgene-ontology-guide==0.6.1", From e91b2fa6cfb0f380bc75779f8c544e111ca827ab Mon Sep 17 00:00:00 2001 From: Prathap Sridharan Date: Tue, 28 May 2024 17:31:44 -0700 Subject: [PATCH 07/29] Pin to tiledbsoma git commit for 1.9.5 to test --- tools/cellxgene_census_builder/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/cellxgene_census_builder/pyproject.toml b/tools/cellxgene_census_builder/pyproject.toml index 05c3200e9..5addb8b8b 100644 --- a/tools/cellxgene_census_builder/pyproject.toml +++ b/tools/cellxgene_census_builder/pyproject.toml @@ -36,7 +36,7 @@ dependencies= [ # The compatibility matrix is defined here: # https://github.com/TileDB-Inc/TileDB/blob/dev/format_spec/FORMAT_SPEC.md # TODO (spatial): Pin tiledbsoma dependency to an actual released version after tiledbsoma spatial code has been released - "tiledbsoma @ git+https://github.com/single-cell-data/TileDB-SOMA.git@7c456b6#egg=tiledbsoma&subdirectory=apis/python/", + "tiledbsoma @ git+https://github.com/single-cell-data/TileDB-SOMA.git@3ed7453#egg=tiledbsoma&subdirectory=apis/python/", # "tiledbsoma==1.9.3", # TODO (spatial): Uncomment line below to install "cellxgene-census" at an appropriate version when tiledbsoma spatial code has been released # "cellxgene-census==1.12.0", From 4115601c12580130a20a3472103c43e15ec971cf Mon Sep 17 00:00:00 2001 From: Prathap Sridharan Date: Tue, 28 May 2024 17:49:56 -0700 Subject: [PATCH 08/29] Unpin pyarrow in builder to test builder unit tests --- tools/cellxgene_census_builder/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/cellxgene_census_builder/pyproject.toml b/tools/cellxgene_census_builder/pyproject.toml index 5addb8b8b..5ee87837f 100644 --- a/tools/cellxgene_census_builder/pyproject.toml +++ b/tools/cellxgene_census_builder/pyproject.toml @@ -26,7 +26,7 @@ classifiers = [ dependencies= [ "typing_extensions==4.10.0", # TODO (spatial): This was unpinned to have the builder work on MacOS. Pin it when it is time to release or MacOS issue is fixed - "pyarrow==15.0.2", + "pyarrow", "pandas[performance]==2.2.1", "anndata==0.10.6", "numpy==1.26.4", From 1c7f98511e17ca109117892ae75967b97576dc96 Mon Sep 17 00:00:00 2001 From: Prathap Sridharan Date: Tue, 28 May 2024 18:06:59 -0700 Subject: [PATCH 09/29] Pin to tiledbsoma git commit for 1.10.2 to test --- tools/cellxgene_census_builder/SPATIAL-README.md | 3 +++ tools/cellxgene_census_builder/pyproject.toml | 6 ++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tools/cellxgene_census_builder/SPATIAL-README.md b/tools/cellxgene_census_builder/SPATIAL-README.md index 19224eeaf..7f739b3a9 100644 --- a/tools/cellxgene_census_builder/SPATIAL-README.md +++ b/tools/cellxgene_census_builder/SPATIAL-README.md @@ -2,4 +2,7 @@ - Install `cellxgene_census` package [from source](../../api/python/cellxgene_census/README.md) - `pip install -e tools/cellxgene_census_builder` + + **NOTE:** When running the builder on MacOS, unpin `pyarrow` in [census builder pyproject.toml](./pyproject.toml) + - [Dev tools for spatial](./spatial_dev_tools/) contains scripts and notebooks to aid development and testing diff --git a/tools/cellxgene_census_builder/pyproject.toml b/tools/cellxgene_census_builder/pyproject.toml index 5ee87837f..1e2c370ee 100644 --- a/tools/cellxgene_census_builder/pyproject.toml +++ b/tools/cellxgene_census_builder/pyproject.toml @@ -25,8 +25,7 @@ classifiers = [ ] dependencies= [ "typing_extensions==4.10.0", - # TODO (spatial): This was unpinned to have the builder work on MacOS. Pin it when it is time to release or MacOS issue is fixed - "pyarrow", + "pyarrow==15.0.2", "pandas[performance]==2.2.1", "anndata==0.10.6", "numpy==1.26.4", @@ -36,8 +35,7 @@ dependencies= [ # The compatibility matrix is defined here: # https://github.com/TileDB-Inc/TileDB/blob/dev/format_spec/FORMAT_SPEC.md # TODO (spatial): Pin tiledbsoma dependency to an actual released version after tiledbsoma spatial code has been released - "tiledbsoma @ git+https://github.com/single-cell-data/TileDB-SOMA.git@3ed7453#egg=tiledbsoma&subdirectory=apis/python/", - # "tiledbsoma==1.9.3", + "tiledbsoma @ git+https://github.com/single-cell-data/TileDB-SOMA.git@b1f7fd5#egg=tiledbsoma&subdirectory=apis/python/", # TODO (spatial): Uncomment line below to install "cellxgene-census" at an appropriate version when tiledbsoma spatial code has been released # "cellxgene-census==1.12.0", "cellxgene-ontology-guide==0.6.1", From 4aa689e9549fd1b217315b7419b904368f592cdc Mon Sep 17 00:00:00 2001 From: Prathap Sridharan Date: Tue, 28 May 2024 18:23:16 -0700 Subject: [PATCH 10/29] Pin to tiledbsoma git commit for 1.11.1 to test --- tools/cellxgene_census_builder/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/cellxgene_census_builder/pyproject.toml b/tools/cellxgene_census_builder/pyproject.toml index 1e2c370ee..7b3ba50c9 100644 --- a/tools/cellxgene_census_builder/pyproject.toml +++ b/tools/cellxgene_census_builder/pyproject.toml @@ -35,7 +35,7 @@ dependencies= [ # The compatibility matrix is defined here: # https://github.com/TileDB-Inc/TileDB/blob/dev/format_spec/FORMAT_SPEC.md # TODO (spatial): Pin tiledbsoma dependency to an actual released version after tiledbsoma spatial code has been released - "tiledbsoma @ git+https://github.com/single-cell-data/TileDB-SOMA.git@b1f7fd5#egg=tiledbsoma&subdirectory=apis/python/", + "tiledbsoma @ git+https://github.com/single-cell-data/TileDB-SOMA.git@78ff65f#egg=tiledbsoma&subdirectory=apis/python/", # TODO (spatial): Uncomment line below to install "cellxgene-census" at an appropriate version when tiledbsoma spatial code has been released # "cellxgene-census==1.12.0", "cellxgene-ontology-guide==0.6.1", From 327cee6293317686d1ed98b7d32db8d7381aaf77 Mon Sep 17 00:00:00 2001 From: Prathap Sridharan Date: Thu, 30 May 2024 16:27:23 -0700 Subject: [PATCH 11/29] Pin tiledbsoma to 16f481f - head of spatial branch --- api/python/cellxgene_census/pyproject.toml | 2 +- tools/cellxgene_census_builder/pyproject.toml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/api/python/cellxgene_census/pyproject.toml b/api/python/cellxgene_census/pyproject.toml index d42e819ba..1c5fcb8bb 100644 --- a/api/python/cellxgene_census/pyproject.toml +++ b/api/python/cellxgene_census/pyproject.toml @@ -32,7 +32,7 @@ dependencies= [ # ensure that the assets are readable (tiledbsoma supports backward compatible reading). # Make sure this version does not fall behind the builder's tiledbsoma version. # TODO (spatial): Pin tiledbsoma dependency to an actual released version after tiledbsoma spatial code has been released - "tiledbsoma @ git+https://github.com/single-cell-data/TileDB-SOMA.git@6569fc0#egg=tiledbsoma&subdirectory=apis/python/", + "tiledbsoma @ git+https://github.com/single-cell-data/TileDB-SOMA.git@16f481f#egg=tiledbsoma&subdirectory=apis/python/", "anndata", "numpy>=1.21,<2.0", "requests", diff --git a/tools/cellxgene_census_builder/pyproject.toml b/tools/cellxgene_census_builder/pyproject.toml index 7b3ba50c9..c3a6013b8 100644 --- a/tools/cellxgene_census_builder/pyproject.toml +++ b/tools/cellxgene_census_builder/pyproject.toml @@ -25,7 +25,7 @@ classifiers = [ ] dependencies= [ "typing_extensions==4.10.0", - "pyarrow==15.0.2", + "pyarrow", "pandas[performance]==2.2.1", "anndata==0.10.6", "numpy==1.26.4", @@ -35,7 +35,7 @@ dependencies= [ # The compatibility matrix is defined here: # https://github.com/TileDB-Inc/TileDB/blob/dev/format_spec/FORMAT_SPEC.md # TODO (spatial): Pin tiledbsoma dependency to an actual released version after tiledbsoma spatial code has been released - "tiledbsoma @ git+https://github.com/single-cell-data/TileDB-SOMA.git@78ff65f#egg=tiledbsoma&subdirectory=apis/python/", + "tiledbsoma @ git+https://github.com/single-cell-data/TileDB-SOMA.git@16f481f#egg=tiledbsoma&subdirectory=apis/python/", # TODO (spatial): Uncomment line below to install "cellxgene-census" at an appropriate version when tiledbsoma spatial code has been released # "cellxgene-census==1.12.0", "cellxgene-ontology-guide==0.6.1", From 97ef790afe97f8bbf2e3dceebc7c9363b1ae24f5 Mon Sep 17 00:00:00 2001 From: Prathap Sridharan Date: Thu, 30 May 2024 16:39:50 -0700 Subject: [PATCH 12/29] Pin pyarrow back to 15.0.2 --- tools/cellxgene_census_builder/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/cellxgene_census_builder/pyproject.toml b/tools/cellxgene_census_builder/pyproject.toml index c3a6013b8..98fcb39bc 100644 --- a/tools/cellxgene_census_builder/pyproject.toml +++ b/tools/cellxgene_census_builder/pyproject.toml @@ -25,7 +25,7 @@ classifiers = [ ] dependencies= [ "typing_extensions==4.10.0", - "pyarrow", + "pyarrow==15.0.2", "pandas[performance]==2.2.1", "anndata==0.10.6", "numpy==1.26.4", From 3567dc745b6747a97e6334a5f920bc19db1c2a52 Mon Sep 17 00:00:00 2001 From: Prathap Sridharan Date: Mon, 3 Jun 2024 10:39:30 -0700 Subject: [PATCH 13/29] Pin tiledbsoma to fc5f8e7 to fix census builder tests --- api/python/cellxgene_census/pyproject.toml | 2 +- tools/cellxgene_census_builder/pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/api/python/cellxgene_census/pyproject.toml b/api/python/cellxgene_census/pyproject.toml index 1c5fcb8bb..e246dafba 100644 --- a/api/python/cellxgene_census/pyproject.toml +++ b/api/python/cellxgene_census/pyproject.toml @@ -32,7 +32,7 @@ dependencies= [ # ensure that the assets are readable (tiledbsoma supports backward compatible reading). # Make sure this version does not fall behind the builder's tiledbsoma version. # TODO (spatial): Pin tiledbsoma dependency to an actual released version after tiledbsoma spatial code has been released - "tiledbsoma @ git+https://github.com/single-cell-data/TileDB-SOMA.git@16f481f#egg=tiledbsoma&subdirectory=apis/python/", + "tiledbsoma @ git+https://github.com/single-cell-data/TileDB-SOMA.git@fc5f8e7#egg=tiledbsoma&subdirectory=apis/python/", "anndata", "numpy>=1.21,<2.0", "requests", diff --git a/tools/cellxgene_census_builder/pyproject.toml b/tools/cellxgene_census_builder/pyproject.toml index 98fcb39bc..c2deed373 100644 --- a/tools/cellxgene_census_builder/pyproject.toml +++ b/tools/cellxgene_census_builder/pyproject.toml @@ -35,7 +35,7 @@ dependencies= [ # The compatibility matrix is defined here: # https://github.com/TileDB-Inc/TileDB/blob/dev/format_spec/FORMAT_SPEC.md # TODO (spatial): Pin tiledbsoma dependency to an actual released version after tiledbsoma spatial code has been released - "tiledbsoma @ git+https://github.com/single-cell-data/TileDB-SOMA.git@16f481f#egg=tiledbsoma&subdirectory=apis/python/", + "tiledbsoma @ git+https://github.com/single-cell-data/TileDB-SOMA.git@fc5f8e7#egg=tiledbsoma&subdirectory=apis/python/", # TODO (spatial): Uncomment line below to install "cellxgene-census" at an appropriate version when tiledbsoma spatial code has been released # "cellxgene-census==1.12.0", "cellxgene-ontology-guide==0.6.1", From ba5ac8f4eb3143785ab28db7f2fda664fe1ad8a1 Mon Sep 17 00:00:00 2001 From: Prathap Sridharan Date: Tue, 4 Jun 2024 14:51:18 -0700 Subject: [PATCH 14/29] Add comments to notebook --- .../tiledbsoma_spatial_dataset_ingest.ipynb | 628 ++++++++++++++++++ 1 file changed, 628 insertions(+) create mode 100644 tools/cellxgene_census_builder/spatial_dev_tools/tiledbsoma_spatial_dataset_ingest.ipynb diff --git a/tools/cellxgene_census_builder/spatial_dev_tools/tiledbsoma_spatial_dataset_ingest.ipynb b/tools/cellxgene_census_builder/spatial_dev_tools/tiledbsoma_spatial_dataset_ingest.ipynb new file mode 100644 index 000000000..753d4959e --- /dev/null +++ b/tools/cellxgene_census_builder/spatial_dev_tools/tiledbsoma_spatial_dataset_ingest.ipynb @@ -0,0 +1,628 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "8502ec5c-26ac-423e-89f1-d624b91e384c", + "metadata": {}, + "outputs": [], + "source": [ + "import tiledbsoma\n", + "from tiledbsoma.experimental.ingest import from_cxg_spatial_h5ad" + ] + }, + { + "cell_type": "markdown", + "id": "236d24e6-0c4d-476e-b328-33e0eedd8fc8", + "metadata": {}, + "source": [ + "# Ingest CXG spatial h5ad file and create SOMA object on disk" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "1424f61e-f47a-485a-9765-7598f8e469fa", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + ":119: FutureWarning: SparseDataset is deprecated and will be removed in late 2024. It has been replaced by the public classes CSRDataset and CSCDataset.\n", + "\n", + "For instance checks, use `isinstance(X, (anndata.experimental.CSRDataset, anndata.experimental.CSCDataset))` instead.\n", + "\n", + "For creation, use `anndata.experimental.sparse_dataset(X)` instead.\n", + "\n" + ] + }, + { + "data": { + "text/plain": [ + "'soma-spatial'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# NOTE: Replace value of `my_h5ad_path` with an appropriate path to a cellxgene h5ad spatial file\n", + "my_h5ad_path = \"../../../ps_stuff/spatial_test_datasets/c63d5cb4-1046-4948-a188-e6af50ef90f4.h5ad\"\n", + "\n", + "from_cxg_spatial_h5ad(\n", + " input_h5ad_path=my_h5ad_path,\n", + " experiment_uri=\"soma-spatial\",\n", + " measurement_name=\"RNA\",\n", + " scene_name=\"c63d5cb4-1046-4948-a188-e6af50ef90f4\",\n", + " uns_keys=[],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "f738c098-d7a3-4f4b-b2ba-77d4c7109045", + "metadata": {}, + "source": [ + "# Inspect the structure of the SOMA object on disk" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "7919de5f-7709-46c1-b2d0-c4288a880a76", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sp = tiledbsoma.open(\"soma-spatial\")\n", + "sp" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "7f08df07-2027-494f-b383-b9e739a614fd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sp.spatial[\"c63d5cb4-1046-4948-a188-e6af50ef90f4\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "35ba9213-37f1-40b7-a922-f31e75258240", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sp.spatial[\"c63d5cb4-1046-4948-a188-e6af50ef90f4\"][\"img\"][\"hires\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "5a792279-b560-4246-b853-8735f85e2bb3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[[237, 237, 237, ..., 237, 237, 237],\n", + " [237, 237, 237, ..., 237, 237, 237],\n", + " [237, 237, 237, ..., 237, 237, 237],\n", + " ...,\n", + " [237, 237, 237, ..., 234, 234, 233],\n", + " [237, 237, 237, ..., 234, 234, 233],\n", + " [237, 237, 237, ..., 234, 234, 233]],\n", + "\n", + " [[241, 241, 241, ..., 240, 240, 240],\n", + " [241, 241, 241, ..., 240, 240, 240],\n", + " [241, 241, 241, ..., 240, 240, 240],\n", + " ...,\n", + " [241, 241, 241, ..., 237, 237, 237],\n", + " [241, 241, 241, ..., 237, 237, 236],\n", + " [241, 241, 241, ..., 237, 237, 236]],\n", + "\n", + " [[240, 240, 240, ..., 243, 243, 243],\n", + " [240, 240, 240, ..., 243, 243, 243],\n", + " [240, 240, 240, ..., 243, 243, 243],\n", + " ...,\n", + " [240, 240, 240, ..., 240, 240, 239],\n", + " [240, 240, 240, ..., 240, 240, 238],\n", + " [240, 240, 240, ..., 240, 240, 238]]], dtype=uint8)" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "hires_ndarray = sp.spatial[\"c63d5cb4-1046-4948-a188-e6af50ef90f4\"][\"img\"][\"hires\"].read()\n", + "hires_ndarray.to_numpy()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "616715c4-0178-4f27-831f-63d6c024cf1a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sp.spatial[\"c63d5cb4-1046-4948-a188-e6af50ef90f4\"][\"obsl\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "f596b70a-528b-43c1-aa75-429d13cc164b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
yxsoma_joinidobs_idin_tissuearray_rowarray_colsamplen_countslog_counts...cell_typeassaydiseaseorganismsextissueself_reported_ethnicitydevelopment_stageobservation_joinid_soma_geometry
0269207475ACGCCTGACACGCGCT-1000WS_PLA_S9101770NaNNaN...unknownVisium Spatial Gene ExpressionnormalHomo sapiensunknowndecidua basalisunknownCarnegie stage 23RtI>4TTINp8.912657
1269254351ACAGGAGGCGCAGCCG-1020WS_PLA_S9101770NaNNaN...unknownVisium Spatial Gene ExpressionnormalHomo sapiensunknowndecidua basalisunknownCarnegie stage 231NhO+d24#28.912657
2269350897AGTGGGAGTATACACG-1060WS_PLA_S9101770NaNNaN...unknownVisium Spatial Gene ExpressionnormalHomo sapiensunknowndecidua basalisunknownCarnegie stage 23)WcdK+K-I&8.912657
32697791956CGCAATCGATCATTAG-10240WS_PLA_S9101770NaNNaN...unknownVisium Spatial Gene ExpressionnormalHomo sapiensunknowndecidua basalisunknownCarnegie stage 23dXpvvuF)=I8.912657
426982769AAATGTATCTTATCCC-10260WS_PLA_S9101770NaNNaN...unknownVisium Spatial Gene ExpressionnormalHomo sapiensunknowndecidua basalisunknownCarnegie stage 23P&gHd-$`ro8.912657
..................................................................
4987201113284754TTAGGATGGGAGGGTA-1147127WS_PLA_S91017703707.08.217978...uterine smooth muscle cellVisium Spatial Gene ExpressionnormalHomo sapiensunknowndecidua basalisunknownCarnegie stage 23Qf1g!`$s)c8.912657
4988201116144178TCCGGCTGTCGGGTCG-1159127WS_PLA_S91017702860.07.958577...uterine smooth muscle cellVisium Spatial Gene ExpressionnormalHomo sapiensunknowndecidua basalisunknownCarnegie stage 23C<G$z5^2qT8.912657
4989201118534702TTAACTGATCGTTTGG-1069127WS_PLA_S9101770NaNNaN...unknownVisium Spatial Gene ExpressionnormalHomo sapiensunknowndecidua basalisunknownCarnegie stage 23fbCxa)n{M}8.912657
4990201119004678TGTTCTCATACTATAG-1071127WS_PLA_S9101770NaNNaN...unknownVisium Spatial Gene ExpressionnormalHomo sapiensunknowndecidua basalisunknownCarnegie stage 23v+}g+r2P+{8.912657
4991201120444881TTGAAGTGCATCTACA-1077127WS_PLA_S9101770NaNNaN...unknownVisium Spatial Gene ExpressionnormalHomo sapiensunknowndecidua basalisunknownCarnegie stage 23+t<L)+OTQX8.912657
\n", + "

4992 rows × 114 columns

\n", + "
" + ], + "text/plain": [ + " y x soma_joinid obs_id in_tissue array_row \\\n", + "0 269 207 475 ACGCCTGACACGCGCT-1 0 0 \n", + "1 269 254 351 ACAGGAGGCGCAGCCG-1 0 2 \n", + "2 269 350 897 AGTGGGAGTATACACG-1 0 6 \n", + "3 269 779 1956 CGCAATCGATCATTAG-1 0 24 \n", + "4 269 827 69 AAATGTATCTTATCCC-1 0 26 \n", + "... ... ... ... ... ... ... \n", + "4987 2011 1328 4754 TTAGGATGGGAGGGTA-1 1 47 \n", + "4988 2011 1614 4178 TCCGGCTGTCGGGTCG-1 1 59 \n", + "4989 2011 1853 4702 TTAACTGATCGTTTGG-1 0 69 \n", + "4990 2011 1900 4678 TGTTCTCATACTATAG-1 0 71 \n", + "4991 2011 2044 4881 TTGAAGTGCATCTACA-1 0 77 \n", + "\n", + " array_col sample n_counts log_counts ... \\\n", + "0 0 WS_PLA_S9101770 NaN NaN ... \n", + "1 0 WS_PLA_S9101770 NaN NaN ... \n", + "2 0 WS_PLA_S9101770 NaN NaN ... \n", + "3 0 WS_PLA_S9101770 NaN NaN ... \n", + "4 0 WS_PLA_S9101770 NaN NaN ... \n", + "... ... ... ... ... ... \n", + "4987 127 WS_PLA_S9101770 3707.0 8.217978 ... \n", + "4988 127 WS_PLA_S9101770 2860.0 7.958577 ... \n", + "4989 127 WS_PLA_S9101770 NaN NaN ... \n", + "4990 127 WS_PLA_S9101770 NaN NaN ... \n", + "4991 127 WS_PLA_S9101770 NaN NaN ... \n", + "\n", + " cell_type assay disease \\\n", + "0 unknown Visium Spatial Gene Expression normal \n", + "1 unknown Visium Spatial Gene Expression normal \n", + "2 unknown Visium Spatial Gene Expression normal \n", + "3 unknown Visium Spatial Gene Expression normal \n", + "4 unknown Visium Spatial Gene Expression normal \n", + "... ... ... ... \n", + "4987 uterine smooth muscle cell Visium Spatial Gene Expression normal \n", + "4988 uterine smooth muscle cell Visium Spatial Gene Expression normal \n", + "4989 unknown Visium Spatial Gene Expression normal \n", + "4990 unknown Visium Spatial Gene Expression normal \n", + "4991 unknown Visium Spatial Gene Expression normal \n", + "\n", + " organism sex tissue self_reported_ethnicity \\\n", + "0 Homo sapiens unknown decidua basalis unknown \n", + "1 Homo sapiens unknown decidua basalis unknown \n", + "2 Homo sapiens unknown decidua basalis unknown \n", + "3 Homo sapiens unknown decidua basalis unknown \n", + "4 Homo sapiens unknown decidua basalis unknown \n", + "... ... ... ... ... \n", + "4987 Homo sapiens unknown decidua basalis unknown \n", + "4988 Homo sapiens unknown decidua basalis unknown \n", + "4989 Homo sapiens unknown decidua basalis unknown \n", + "4990 Homo sapiens unknown decidua basalis unknown \n", + "4991 Homo sapiens unknown decidua basalis unknown \n", + "\n", + " development_stage observation_joinid _soma_geometry \n", + "0 Carnegie stage 23 RtI>4TTINp 8.912657 \n", + "1 Carnegie stage 23 1NhO+d24#2 8.912657 \n", + "2 Carnegie stage 23 )WcdK+K-I& 8.912657 \n", + "3 Carnegie stage 23 dXpvvuF)=I 8.912657 \n", + "4 Carnegie stage 23 P&gHd-$`ro 8.912657 \n", + "... ... ... ... \n", + "4987 Carnegie stage 23 Qf1g!`$s)c 8.912657 \n", + "4988 Carnegie stage 23 C Date: Tue, 4 Jun 2024 16:03:56 -0700 Subject: [PATCH 15/29] Create notebook to demo census object creation --- .../spatial_dev_tools/blocklist.txt | 0 .../census_spatial_dataset_ingest.ipynb | 122 +++++++++++++++++ .../spatial_dataset_ingest.ipynb | 123 ------------------ .../tiledbsoma_spatial_dataset_ingest.ipynb | 18 +-- 4 files changed, 131 insertions(+), 132 deletions(-) create mode 100644 tools/cellxgene_census_builder/spatial_dev_tools/blocklist.txt create mode 100644 tools/cellxgene_census_builder/spatial_dev_tools/census_spatial_dataset_ingest.ipynb delete mode 100644 tools/cellxgene_census_builder/spatial_dev_tools/spatial_dataset_ingest.ipynb diff --git a/tools/cellxgene_census_builder/spatial_dev_tools/blocklist.txt b/tools/cellxgene_census_builder/spatial_dev_tools/blocklist.txt new file mode 100644 index 000000000..e69de29bb diff --git a/tools/cellxgene_census_builder/spatial_dev_tools/census_spatial_dataset_ingest.ipynb b/tools/cellxgene_census_builder/spatial_dev_tools/census_spatial_dataset_ingest.ipynb new file mode 100644 index 000000000..400c348f9 --- /dev/null +++ b/tools/cellxgene_census_builder/spatial_dev_tools/census_spatial_dataset_ingest.ipynb @@ -0,0 +1,122 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "1bbac1b0-ded5-4244-9800-1a8bb278fd15", + "metadata": {}, + "source": [ + "# Ingest CXG spatial h5ad files from a directory and create Census object" + ] + }, + { + "cell_type": "markdown", + "id": "e01b8234-5c45-4960-b1dd-3d1eb82fa233", + "metadata": {}, + "source": [ + "## Create a manifest file containing the location of spatial datasets to load" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "2a7d2e30-1e16-4296-921b-392ff351ea30", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "from cellxgene_census_builder.build_soma.manifest import load_manifest" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "868539a7-e4ed-46ac-a7c4-2b54fcff21fa", + "metadata": {}, + "outputs": [], + "source": [ + "def create_manifest_csv_file(spatial_datasets_dir, manifest_file_path):\n", + " file_ids = [os.path.splitext(filename)[0] for filename in os.listdir(spatial_datasets_dir)]\n", + " file_paths = [os.path.join(spatial_datasets_dir, filename) for filename in os.listdir(spatial_datasets_dir)]\n", + " manifest_content = \"\\n\".join([\", \".join(pair) for pair in zip(file_ids, file_paths, strict=False)])\n", + "\n", + " with open(manifest_file_path, \"w\") as f:\n", + " f.write(manifest_content.strip())" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e28e1076-3b59-48c2-b963-c9597e7f495b", + "metadata": {}, + "outputs": [], + "source": [ + "# NOTE: Replace value of `spatial_datasets_dir` with a valid path to a folder containing datasets\n", + "spatial_datasets_dir = \"../../../ps_stuff/spatial_test_datasets\"\n", + "manifest_file_path = \"./manifest.csv\"\n", + "blocklist_file_path = \"./blocklist.txt\"\n", + "\n", + "create_manifest_csv_file(spatial_datasets_dir, manifest_file_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "6fc3de32-56fe-4758-9426-15b92668a72f", + "metadata": {}, + "outputs": [], + "source": [ + "datasets = load_manifest(manifest_file_path, blocklist_file_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "dcc16556-5cee-4616-af86-9146cbc7fa36", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Dataset(dataset_id='c6f6e674-b59d-46cf-8525-73f64f9eef8c', dataset_asset_h5ad_uri='../../../ps_stuff/spatial_test_datasets/c6f6e674-b59d-46cf-8525-73f64f9eef8c.h5ad', dataset_version_id='', dataset_h5ad_path='', dataset_title='', citation='', collection_id='', collection_name='', collection_doi='', asset_h5ad_filesize=-1, cell_count=-1, mean_genes_per_cell=-1.0, schema_version='', dataset_total_cell_count=0, soma_joinid=-1),\n", + " Dataset(dataset_id='fa3893cb-d420-42ac-8263-09719a26102e', dataset_asset_h5ad_uri='../../../ps_stuff/spatial_test_datasets/fa3893cb-d420-42ac-8263-09719a26102e.h5ad', dataset_version_id='', dataset_h5ad_path='', dataset_title='', citation='', collection_id='', collection_name='', collection_doi='', asset_h5ad_filesize=-1, cell_count=-1, mean_genes_per_cell=-1.0, schema_version='', dataset_total_cell_count=0, soma_joinid=-1),\n", + " Dataset(dataset_id='07998bf8-d070-41bb-a584-f8bdd1193aef', dataset_asset_h5ad_uri='../../../ps_stuff/spatial_test_datasets/07998bf8-d070-41bb-a584-f8bdd1193aef.h5ad', dataset_version_id='', dataset_h5ad_path='', dataset_title='', citation='', collection_id='', collection_name='', collection_doi='', asset_h5ad_filesize=-1, cell_count=-1, mean_genes_per_cell=-1.0, schema_version='', dataset_total_cell_count=0, soma_joinid=-1),\n", + " Dataset(dataset_id='9624a105-319c-4abf-b10b-d96ce1650100', dataset_asset_h5ad_uri='../../../ps_stuff/spatial_test_datasets/9624a105-319c-4abf-b10b-d96ce1650100.h5ad', dataset_version_id='', dataset_h5ad_path='', dataset_title='', citation='', collection_id='', collection_name='', collection_doi='', asset_h5ad_filesize=-1, cell_count=-1, mean_genes_per_cell=-1.0, schema_version='', dataset_total_cell_count=0, soma_joinid=-1),\n", + " Dataset(dataset_id='53e343af-979c-4525-a705-1b9d1a1fee14', dataset_asset_h5ad_uri='../../../ps_stuff/spatial_test_datasets/53e343af-979c-4525-a705-1b9d1a1fee14.h5ad', dataset_version_id='', dataset_h5ad_path='', dataset_title='', citation='', collection_id='', collection_name='', collection_doi='', asset_h5ad_filesize=-1, cell_count=-1, mean_genes_per_cell=-1.0, schema_version='', dataset_total_cell_count=0, soma_joinid=-1),\n", + " Dataset(dataset_id='6ab91271-5f48-4e98-92ef-d02ee21e63e1', dataset_asset_h5ad_uri='../../../ps_stuff/spatial_test_datasets/6ab91271-5f48-4e98-92ef-d02ee21e63e1.h5ad', dataset_version_id='', dataset_h5ad_path='', dataset_title='', citation='', collection_id='', collection_name='', collection_doi='', asset_h5ad_filesize=-1, cell_count=-1, mean_genes_per_cell=-1.0, schema_version='', dataset_total_cell_count=0, soma_joinid=-1),\n", + " Dataset(dataset_id='c63d5cb4-1046-4948-a188-e6af50ef90f4', dataset_asset_h5ad_uri='../../../ps_stuff/spatial_test_datasets/c63d5cb4-1046-4948-a188-e6af50ef90f4.h5ad', dataset_version_id='', dataset_h5ad_path='', dataset_title='', citation='', collection_id='', collection_name='', collection_doi='', asset_h5ad_filesize=-1, cell_count=-1, mean_genes_per_cell=-1.0, schema_version='', dataset_total_cell_count=0, soma_joinid=-1),\n", + " Dataset(dataset_id='1bb92cf8-ab3f-4bb0-a722-b241b5d377ed', dataset_asset_h5ad_uri='../../../ps_stuff/spatial_test_datasets/1bb92cf8-ab3f-4bb0-a722-b241b5d377ed.h5ad', dataset_version_id='', dataset_h5ad_path='', dataset_title='', citation='', collection_id='', collection_name='', collection_doi='', asset_h5ad_filesize=-1, cell_count=-1, mean_genes_per_cell=-1.0, schema_version='', dataset_total_cell_count=0, soma_joinid=-1)]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "datasets" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tools/cellxgene_census_builder/spatial_dev_tools/spatial_dataset_ingest.ipynb b/tools/cellxgene_census_builder/spatial_dev_tools/spatial_dataset_ingest.ipynb deleted file mode 100644 index 256b29be4..000000000 --- a/tools/cellxgene_census_builder/spatial_dev_tools/spatial_dataset_ingest.ipynb +++ /dev/null @@ -1,123 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "8502ec5c-26ac-423e-89f1-d624b91e384c", - "metadata": {}, - "outputs": [], - "source": [ - "import tiledbsoma\n", - "from tiledbsoma.experimental.ingest import from_cxg_spatial_h5ad" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1424f61e-f47a-485a-9765-7598f8e469fa", - "metadata": {}, - "outputs": [], - "source": [ - "from_cxg_spatial_h5ad(\n", - " input_h5ad_path=\"/Users/psridharan/Downloads/c63d5cb4-1046-4948-a188-e6af50ef90f4.h5ad\",\n", - " experiment_uri=\"soma-spatial\",\n", - " measurement_name=\"RNA\",\n", - " scene_name=\"c63d5cb4-1046-4948-a188-e6af50ef90f4\",\n", - " uns_keys=[],\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7919de5f-7709-46c1-b2d0-c4288a880a76", - "metadata": {}, - "outputs": [], - "source": [ - "sp = tiledbsoma.open(\"soma-spatial\")\n", - "sp" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7f08df07-2027-494f-b383-b9e739a614fd", - "metadata": {}, - "outputs": [], - "source": [ - "sp.spatial[\"c63d5cb4-1046-4948-a188-e6af50ef90f4\"]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "35ba9213-37f1-40b7-a922-f31e75258240", - "metadata": {}, - "outputs": [], - "source": [ - "sp.spatial[\"c63d5cb4-1046-4948-a188-e6af50ef90f4\"][\"img\"][\"hires\"]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5a792279-b560-4246-b853-8735f85e2bb3", - "metadata": {}, - "outputs": [], - "source": [ - "hires_ndarray = sp.spatial[\"c63d5cb4-1046-4948-a188-e6af50ef90f4\"][\"img\"][\"hires\"].read()\n", - "hires_ndarray.to_numpy()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "616715c4-0178-4f27-831f-63d6c024cf1a", - "metadata": {}, - "outputs": [], - "source": [ - "sp.spatial[\"c63d5cb4-1046-4948-a188-e6af50ef90f4\"][\"obsl\"]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f596b70a-528b-43c1-aa75-429d13cc164b", - "metadata": {}, - "outputs": [], - "source": [ - "obsl_df = sp.spatial[\"c63d5cb4-1046-4948-a188-e6af50ef90f4\"][\"obsl\"]\n", - "obsl_df.read().concat().to_pandas()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3c144a87-72ba-4924-b87b-e4552b00a9ae", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.9" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/tools/cellxgene_census_builder/spatial_dev_tools/tiledbsoma_spatial_dataset_ingest.ipynb b/tools/cellxgene_census_builder/spatial_dev_tools/tiledbsoma_spatial_dataset_ingest.ipynb index 753d4959e..95eb16764 100644 --- a/tools/cellxgene_census_builder/spatial_dev_tools/tiledbsoma_spatial_dataset_ingest.ipynb +++ b/tools/cellxgene_census_builder/spatial_dev_tools/tiledbsoma_spatial_dataset_ingest.ipynb @@ -1,5 +1,13 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "80a83763-0c07-439c-aee5-2cc3de405946", + "metadata": {}, + "source": [ + "# Ingest CXG spatial h5ad file and create SOMA object on disk" + ] + }, { "cell_type": "code", "execution_count": 1, @@ -11,14 +19,6 @@ "from tiledbsoma.experimental.ingest import from_cxg_spatial_h5ad" ] }, - { - "cell_type": "markdown", - "id": "236d24e6-0c4d-476e-b328-33e0eedd8fc8", - "metadata": {}, - "source": [ - "# Ingest CXG spatial h5ad file and create SOMA object on disk" - ] - }, { "cell_type": "code", "execution_count": 2, @@ -66,7 +66,7 @@ "id": "f738c098-d7a3-4f4b-b2ba-77d4c7109045", "metadata": {}, "source": [ - "# Inspect the structure of the SOMA object on disk" + "## Inspect the structure of the SOMA object on disk" ] }, { From 0f1f20ad02aad5d96c6d1393873b65b803dc2a52 Mon Sep 17 00:00:00 2001 From: Prathap Sridharan Date: Wed, 5 Jun 2024 12:26:18 -0700 Subject: [PATCH 16/29] Use absolute file path for contents of manifest file --- .../census_spatial_dataset_ingest.ipynb | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tools/cellxgene_census_builder/spatial_dev_tools/census_spatial_dataset_ingest.ipynb b/tools/cellxgene_census_builder/spatial_dev_tools/census_spatial_dataset_ingest.ipynb index 400c348f9..e78e55bba 100644 --- a/tools/cellxgene_census_builder/spatial_dev_tools/census_spatial_dataset_ingest.ipynb +++ b/tools/cellxgene_census_builder/spatial_dev_tools/census_spatial_dataset_ingest.ipynb @@ -52,7 +52,7 @@ "outputs": [], "source": [ "# NOTE: Replace value of `spatial_datasets_dir` with a valid path to a folder containing datasets\n", - "spatial_datasets_dir = \"../../../ps_stuff/spatial_test_datasets\"\n", + "spatial_datasets_dir = \"/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets\"\n", "manifest_file_path = \"./manifest.csv\"\n", "blocklist_file_path = \"./blocklist.txt\"\n", "\n", @@ -78,14 +78,14 @@ { "data": { "text/plain": [ - "[Dataset(dataset_id='c6f6e674-b59d-46cf-8525-73f64f9eef8c', dataset_asset_h5ad_uri='../../../ps_stuff/spatial_test_datasets/c6f6e674-b59d-46cf-8525-73f64f9eef8c.h5ad', dataset_version_id='', dataset_h5ad_path='', dataset_title='', citation='', collection_id='', collection_name='', collection_doi='', asset_h5ad_filesize=-1, cell_count=-1, mean_genes_per_cell=-1.0, schema_version='', dataset_total_cell_count=0, soma_joinid=-1),\n", - " Dataset(dataset_id='fa3893cb-d420-42ac-8263-09719a26102e', dataset_asset_h5ad_uri='../../../ps_stuff/spatial_test_datasets/fa3893cb-d420-42ac-8263-09719a26102e.h5ad', dataset_version_id='', dataset_h5ad_path='', dataset_title='', citation='', collection_id='', collection_name='', collection_doi='', asset_h5ad_filesize=-1, cell_count=-1, mean_genes_per_cell=-1.0, schema_version='', dataset_total_cell_count=0, soma_joinid=-1),\n", - " Dataset(dataset_id='07998bf8-d070-41bb-a584-f8bdd1193aef', dataset_asset_h5ad_uri='../../../ps_stuff/spatial_test_datasets/07998bf8-d070-41bb-a584-f8bdd1193aef.h5ad', dataset_version_id='', dataset_h5ad_path='', dataset_title='', citation='', collection_id='', collection_name='', collection_doi='', asset_h5ad_filesize=-1, cell_count=-1, mean_genes_per_cell=-1.0, schema_version='', dataset_total_cell_count=0, soma_joinid=-1),\n", - " Dataset(dataset_id='9624a105-319c-4abf-b10b-d96ce1650100', dataset_asset_h5ad_uri='../../../ps_stuff/spatial_test_datasets/9624a105-319c-4abf-b10b-d96ce1650100.h5ad', dataset_version_id='', dataset_h5ad_path='', dataset_title='', citation='', collection_id='', collection_name='', collection_doi='', asset_h5ad_filesize=-1, cell_count=-1, mean_genes_per_cell=-1.0, schema_version='', dataset_total_cell_count=0, soma_joinid=-1),\n", - " Dataset(dataset_id='53e343af-979c-4525-a705-1b9d1a1fee14', dataset_asset_h5ad_uri='../../../ps_stuff/spatial_test_datasets/53e343af-979c-4525-a705-1b9d1a1fee14.h5ad', dataset_version_id='', dataset_h5ad_path='', dataset_title='', citation='', collection_id='', collection_name='', collection_doi='', asset_h5ad_filesize=-1, cell_count=-1, mean_genes_per_cell=-1.0, schema_version='', dataset_total_cell_count=0, soma_joinid=-1),\n", - " Dataset(dataset_id='6ab91271-5f48-4e98-92ef-d02ee21e63e1', dataset_asset_h5ad_uri='../../../ps_stuff/spatial_test_datasets/6ab91271-5f48-4e98-92ef-d02ee21e63e1.h5ad', dataset_version_id='', dataset_h5ad_path='', dataset_title='', citation='', collection_id='', collection_name='', collection_doi='', asset_h5ad_filesize=-1, cell_count=-1, mean_genes_per_cell=-1.0, schema_version='', dataset_total_cell_count=0, soma_joinid=-1),\n", - " Dataset(dataset_id='c63d5cb4-1046-4948-a188-e6af50ef90f4', dataset_asset_h5ad_uri='../../../ps_stuff/spatial_test_datasets/c63d5cb4-1046-4948-a188-e6af50ef90f4.h5ad', dataset_version_id='', dataset_h5ad_path='', dataset_title='', citation='', collection_id='', collection_name='', collection_doi='', asset_h5ad_filesize=-1, cell_count=-1, mean_genes_per_cell=-1.0, schema_version='', dataset_total_cell_count=0, soma_joinid=-1),\n", - " Dataset(dataset_id='1bb92cf8-ab3f-4bb0-a722-b241b5d377ed', dataset_asset_h5ad_uri='../../../ps_stuff/spatial_test_datasets/1bb92cf8-ab3f-4bb0-a722-b241b5d377ed.h5ad', dataset_version_id='', dataset_h5ad_path='', dataset_title='', citation='', collection_id='', collection_name='', collection_doi='', asset_h5ad_filesize=-1, cell_count=-1, mean_genes_per_cell=-1.0, schema_version='', dataset_total_cell_count=0, soma_joinid=-1)]" + "[Dataset(dataset_id='c6f6e674-b59d-46cf-8525-73f64f9eef8c', dataset_asset_h5ad_uri='/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/c6f6e674-b59d-46cf-8525-73f64f9eef8c.h5ad', dataset_version_id='', dataset_h5ad_path='', dataset_title='', citation='', collection_id='', collection_name='', collection_doi='', asset_h5ad_filesize=-1, cell_count=-1, mean_genes_per_cell=-1.0, schema_version='', dataset_total_cell_count=0, soma_joinid=-1),\n", + " Dataset(dataset_id='fa3893cb-d420-42ac-8263-09719a26102e', dataset_asset_h5ad_uri='/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/fa3893cb-d420-42ac-8263-09719a26102e.h5ad', dataset_version_id='', dataset_h5ad_path='', dataset_title='', citation='', collection_id='', collection_name='', collection_doi='', asset_h5ad_filesize=-1, cell_count=-1, mean_genes_per_cell=-1.0, schema_version='', dataset_total_cell_count=0, soma_joinid=-1),\n", + " Dataset(dataset_id='07998bf8-d070-41bb-a584-f8bdd1193aef', dataset_asset_h5ad_uri='/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/07998bf8-d070-41bb-a584-f8bdd1193aef.h5ad', dataset_version_id='', dataset_h5ad_path='', dataset_title='', citation='', collection_id='', collection_name='', collection_doi='', asset_h5ad_filesize=-1, cell_count=-1, mean_genes_per_cell=-1.0, schema_version='', dataset_total_cell_count=0, soma_joinid=-1),\n", + " Dataset(dataset_id='9624a105-319c-4abf-b10b-d96ce1650100', dataset_asset_h5ad_uri='/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/9624a105-319c-4abf-b10b-d96ce1650100.h5ad', dataset_version_id='', dataset_h5ad_path='', dataset_title='', citation='', collection_id='', collection_name='', collection_doi='', asset_h5ad_filesize=-1, cell_count=-1, mean_genes_per_cell=-1.0, schema_version='', dataset_total_cell_count=0, soma_joinid=-1),\n", + " Dataset(dataset_id='53e343af-979c-4525-a705-1b9d1a1fee14', dataset_asset_h5ad_uri='/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/53e343af-979c-4525-a705-1b9d1a1fee14.h5ad', dataset_version_id='', dataset_h5ad_path='', dataset_title='', citation='', collection_id='', collection_name='', collection_doi='', asset_h5ad_filesize=-1, cell_count=-1, mean_genes_per_cell=-1.0, schema_version='', dataset_total_cell_count=0, soma_joinid=-1),\n", + " Dataset(dataset_id='6ab91271-5f48-4e98-92ef-d02ee21e63e1', dataset_asset_h5ad_uri='/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/6ab91271-5f48-4e98-92ef-d02ee21e63e1.h5ad', dataset_version_id='', dataset_h5ad_path='', dataset_title='', citation='', collection_id='', collection_name='', collection_doi='', asset_h5ad_filesize=-1, cell_count=-1, mean_genes_per_cell=-1.0, schema_version='', dataset_total_cell_count=0, soma_joinid=-1),\n", + " Dataset(dataset_id='c63d5cb4-1046-4948-a188-e6af50ef90f4', dataset_asset_h5ad_uri='/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/c63d5cb4-1046-4948-a188-e6af50ef90f4.h5ad', dataset_version_id='', dataset_h5ad_path='', dataset_title='', citation='', collection_id='', collection_name='', collection_doi='', asset_h5ad_filesize=-1, cell_count=-1, mean_genes_per_cell=-1.0, schema_version='', dataset_total_cell_count=0, soma_joinid=-1),\n", + " Dataset(dataset_id='1bb92cf8-ab3f-4bb0-a722-b241b5d377ed', dataset_asset_h5ad_uri='/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/1bb92cf8-ab3f-4bb0-a722-b241b5d377ed.h5ad', dataset_version_id='', dataset_h5ad_path='', dataset_title='', citation='', collection_id='', collection_name='', collection_doi='', asset_h5ad_filesize=-1, cell_count=-1, mean_genes_per_cell=-1.0, schema_version='', dataset_total_cell_count=0, soma_joinid=-1)]" ] }, "execution_count": 5, From 5c81aaa23922960aa2b676e46159dbd67196dbe4 Mon Sep 17 00:00:00 2001 From: Prathap Sridharan Date: Wed, 5 Jun 2024 15:03:27 -0700 Subject: [PATCH 17/29] Add "EFO:0010961" to the list of allowed assays --- .../src/cellxgene_census_builder/build_soma/globals.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/globals.py b/tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/globals.py index e6b00cdfe..801717f98 100644 --- a/tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/globals.py +++ b/tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/globals.py @@ -329,6 +329,7 @@ "EFO:0010713", # 10x immune profiling "EFO:0010714", # 10x TCR enrichment "EFO:0010715", # 10x Ig enrichment + "EFO:0010961", # Visium Spatial Gene Expression "EFO:0010964", # barcoded plate-based single cell RNA-seq "EFO:0011025", # 10x 5' v1 "EFO:0022396", # TruSeq From ce4b525f421cacc9a464ffdcd89f5f06941ad34e Mon Sep 17 00:00:00 2001 From: Prathap Sridharan Date: Wed, 5 Jun 2024 15:22:00 -0700 Subject: [PATCH 18/29] Add comments for clarity in pyproject.toml --- api/python/cellxgene_census/pyproject.toml | 2 ++ tools/cellxgene_census_builder/pyproject.toml | 2 ++ 2 files changed, 4 insertions(+) diff --git a/api/python/cellxgene_census/pyproject.toml b/api/python/cellxgene_census/pyproject.toml index e246dafba..c39d62cb4 100644 --- a/api/python/cellxgene_census/pyproject.toml +++ b/api/python/cellxgene_census/pyproject.toml @@ -31,6 +31,8 @@ dependencies= [ # NOTE: the tiledbsoma version must be >= to the version used in the Census builder, to # ensure that the assets are readable (tiledbsoma supports backward compatible reading). # Make sure this version does not fall behind the builder's tiledbsoma version. + # TODO (spatial): tiledbsoma pin to a PyPI release is temporarily commented out in favor git commit pin + # "tiledbsoma==1.11.4", # TODO (spatial): Pin tiledbsoma dependency to an actual released version after tiledbsoma spatial code has been released "tiledbsoma @ git+https://github.com/single-cell-data/TileDB-SOMA.git@fc5f8e7#egg=tiledbsoma&subdirectory=apis/python/", "anndata", diff --git a/tools/cellxgene_census_builder/pyproject.toml b/tools/cellxgene_census_builder/pyproject.toml index c2deed373..41ec7cef3 100644 --- a/tools/cellxgene_census_builder/pyproject.toml +++ b/tools/cellxgene_census_builder/pyproject.toml @@ -34,6 +34,8 @@ dependencies= [ # recent cellxgene-census _readers_ are able to read the results of a Census build (writer). # The compatibility matrix is defined here: # https://github.com/TileDB-Inc/TileDB/blob/dev/format_spec/FORMAT_SPEC.md + # TODO (spatial): tiledbsoma pin to a PyPI release is temporarily commented out in favor git commit pin + # "tiledbsoma==1.9.3", # TODO (spatial): Pin tiledbsoma dependency to an actual released version after tiledbsoma spatial code has been released "tiledbsoma @ git+https://github.com/single-cell-data/TileDB-SOMA.git@fc5f8e7#egg=tiledbsoma&subdirectory=apis/python/", # TODO (spatial): Uncomment line below to install "cellxgene-census" at an appropriate version when tiledbsoma spatial code has been released From 1f654b6c5e0cf7c478467d2a755e8c8e299e4a31 Mon Sep 17 00:00:00 2001 From: Prathap Sridharan Date: Wed, 5 Jun 2024 15:37:24 -0700 Subject: [PATCH 19/29] Fix filepaths in notebook --- .../spatial_dev_tools/tiledbsoma_spatial_dataset_ingest.ipynb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/cellxgene_census_builder/spatial_dev_tools/tiledbsoma_spatial_dataset_ingest.ipynb b/tools/cellxgene_census_builder/spatial_dev_tools/tiledbsoma_spatial_dataset_ingest.ipynb index 95eb16764..42424c98d 100644 --- a/tools/cellxgene_census_builder/spatial_dev_tools/tiledbsoma_spatial_dataset_ingest.ipynb +++ b/tools/cellxgene_census_builder/spatial_dev_tools/tiledbsoma_spatial_dataset_ingest.ipynb @@ -50,7 +50,9 @@ ], "source": [ "# NOTE: Replace value of `my_h5ad_path` with an appropriate path to a cellxgene h5ad spatial file\n", - "my_h5ad_path = \"../../../ps_stuff/spatial_test_datasets/c63d5cb4-1046-4948-a188-e6af50ef90f4.h5ad\"\n", + "my_h5ad_path = (\n", + " \"/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/c63d5cb4-1046-4948-a188-e6af50ef90f4.h5ad\"\n", + ")\n", "\n", "from_cxg_spatial_h5ad(\n", " input_h5ad_path=my_h5ad_path,\n", From 4712e95596398d3bbb468fa88c1a501f50af233a Mon Sep 17 00:00:00 2001 From: Prathap Sridharan Date: Wed, 5 Jun 2024 16:52:34 -0700 Subject: [PATCH 20/29] Make census builder run without errors on spatial datasets --- .../SPATIAL-README.md | 3 +- .../census_spatial_dataset_ingest.ipynb | 378 +++++++++++++++++- .../build_soma/validate_soma.py | 15 +- 3 files changed, 381 insertions(+), 15 deletions(-) diff --git a/tools/cellxgene_census_builder/SPATIAL-README.md b/tools/cellxgene_census_builder/SPATIAL-README.md index 7f739b3a9..b0937ece6 100644 --- a/tools/cellxgene_census_builder/SPATIAL-README.md +++ b/tools/cellxgene_census_builder/SPATIAL-README.md @@ -1,8 +1,9 @@ ## Development Environment Setup and Run -- Install `cellxgene_census` package [from source](../../api/python/cellxgene_census/README.md) - `pip install -e tools/cellxgene_census_builder` **NOTE:** When running the builder on MacOS, unpin `pyarrow` in [census builder pyproject.toml](./pyproject.toml) +- `pip install -e api/python/cellxgene_census` + - [Dev tools for spatial](./spatial_dev_tools/) contains scripts and notebooks to aid development and testing diff --git a/tools/cellxgene_census_builder/spatial_dev_tools/census_spatial_dataset_ingest.ipynb b/tools/cellxgene_census_builder/spatial_dev_tools/census_spatial_dataset_ingest.ipynb index e78e55bba..3ec790807 100644 --- a/tools/cellxgene_census_builder/spatial_dev_tools/census_spatial_dataset_ingest.ipynb +++ b/tools/cellxgene_census_builder/spatial_dev_tools/census_spatial_dataset_ingest.ipynb @@ -47,21 +47,33 @@ { "cell_type": "code", "execution_count": 3, - "id": "e28e1076-3b59-48c2-b963-c9597e7f495b", + "id": "72ef927a-c18d-4215-a136-adf67c4da881", "metadata": {}, "outputs": [], "source": [ - "# NOTE: Replace value of `spatial_datasets_dir` with a valid path to a folder containing datasets\n", + "# NOTE: Change these variables to point to appropriate file paths on your machine\n", "spatial_datasets_dir = \"/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets\"\n", - "manifest_file_path = \"./manifest.csv\"\n", - "blocklist_file_path = \"./blocklist.txt\"\n", - "\n", - "create_manifest_csv_file(spatial_datasets_dir, manifest_file_path)" + "manifest_file_path = (\n", + " \"/Users/psridharan/code/cellxgene-census/tools/cellxgene_census_builder/spatial_dev_tools/manifest.csv\"\n", + ")\n", + "blocklist_file_path = (\n", + " \"/Users/psridharan/code/cellxgene-census/tools/cellxgene_census_builder/spatial_dev_tools/blocklist.txt\"\n", + ")" ] }, { "cell_type": "code", "execution_count": 4, + "id": "d5e2f07f-75a3-4e02-8295-be87b1043a4b", + "metadata": {}, + "outputs": [], + "source": [ + "create_manifest_csv_file(spatial_datasets_dir, manifest_file_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, "id": "6fc3de32-56fe-4758-9426-15b92668a72f", "metadata": {}, "outputs": [], @@ -71,7 +83,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "id": "dcc16556-5cee-4616-af86-9146cbc7fa36", "metadata": {}, "outputs": [ @@ -88,7 +100,7 @@ " Dataset(dataset_id='1bb92cf8-ab3f-4bb0-a722-b241b5d377ed', dataset_asset_h5ad_uri='/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/1bb92cf8-ab3f-4bb0-a722-b241b5d377ed.h5ad', dataset_version_id='', dataset_h5ad_path='', dataset_title='', citation='', collection_id='', collection_name='', collection_doi='', asset_h5ad_filesize=-1, cell_count=-1, mean_genes_per_cell=-1.0, schema_version='', dataset_total_cell_count=0, soma_joinid=-1)]" ] }, - "execution_count": 5, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -96,6 +108,356 @@ "source": [ "datasets" ] + }, + { + "cell_type": "markdown", + "id": "e0acd774-76a4-4de2-b219-c4adcd55e54d", + "metadata": {}, + "source": [ + "## Run the census builder to ingest the spatial datasets" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "ba536c4c-c5e0-4630-baeb-488b2c314e63", + "metadata": {}, + "outputs": [], + "source": [ + "# NOTE: Change these variables to point to appropriate file paths on your machine\n", + "census_builder_working_dir = \"/Users/psridharan/code/cellxgene-census/ps_stuff/census-builds\"\n", + "census_build_tag = \"census_spatial\"" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "f61079cd-6c7d-437b-bd4e-50f196e6bdaa", + "metadata": {}, + "outputs": [], + "source": [ + "# Ensure the working directory does not already contain a build tag with the same name\n", + "! rm -rf {census_builder_working_dir}/logs\n", + "! rm -rf {census_builder_working_dir}/{census_build_tag}" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "289e277b-ee53-4baa-ab6f-b458356389e4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-06-05 16:47:41 5875 DEBUG Setting NUMEXPR_MAX_THREADS environment variable to \"5\"\n", + "2024-06-05 16:47:41 5875 DEBUG Setting OMP_NUM_THREADS environment variable to \"1\"\n", + "2024-06-05 16:47:41 5875 DEBUG Setting OPENBLAS_NUM_THREADS environment variable to \"1\"\n", + "2024-06-05 16:47:41 5875 DEBUG Setting MKL_NUM_THREADS environment variable to \"1\"\n", + "2024-06-05 16:47:41 5875 DEBUG Setting VECLIB_MAXIMUM_THREADS environment variable to \"1\"\n", + "2024-06-05 16:47:41 5875 INFO CensusBuildArgs(working_dir=PosixPath('/Users/psridharan/code/cellxgene-census/ps_stuff/census-builds'), config=CensusBuildConfig(verbose=1, dashboard=True, log_dir='logs', log_file='build.log', reports_dir='reports', consolidate=True, dryrun=False, cellxgene_census_S3_path='s3://cellxgene-data-public/cell-census', cellxgene_census_default_mirror_S3_path='s3://cellxgene-census-public-us-west-2/cell-census', cellxgene_census_S3_replica_path=None, logs_S3_path='s3://cellxgene-data-public-logs/builder', build_tag='census_spatial', max_worker_processes=48, host_validation_disable=False, host_validation_min_physical_memory=549755813888, host_validation_min_swap_memory=2199023255552, host_validation_min_free_disk_space=1979120929996, release_cleanup_days=32, dataset_id_blocklist_uri='https://raw.githubusercontent.com/chanzuckerberg/cellxgene-census/main/tools/cellxgene_census_builder/dataset_blocklist.txt', user_agent_prefix='census-builder-', user_agent_environment='unknown', manifest=<_io.TextIOWrapper name='/Users/psridharan/code/cellxgene-census/tools/cellxgene_census_builder/spatial_dev_tools/manifest.csv' mode='r' encoding='UTF-8'>, test_first_n=0), state=CensusBuildState())\n", + "2024-06-05 16:47:41 5875 INFO System memory: mem-used=16267870208 (47.3%), max-mem-used=16267870208 (47.3%), mem-total=34359738368 load-avg=(2.72, 2.47, 2.51)\n", + "2024-06-05 16:47:41 5875 INFO Starting process resource logger with period 15.0\n", + "2024-06-05 16:47:43 5875 INFO Dask client created: \n", + "2024-06-05 16:47:43 5875 INFO Dask client using cluster: LocalCluster(b67349d8, 'tcp://127.0.0.1:62818', workers=10, threads=10)\n", + "2024-06-05 16:47:43 5875 INFO Dashboard link: http://127.0.0.1:8787/status\n", + "2024-06-05 16:47:43 5875 INFO Build step 1 - get source assets - started\n", + "2024-06-05 16:47:43 5875 INFO Loading manifest from file\n", + "2024-06-05 16:47:44 5875 INFO Dataset blocklist found, containing 5 ids.\n", + "2024-06-05 16:47:44 5875 INFO After blocklist and dedup, will load 8 datasets.\n", + "2024-06-05 16:47:44 5875 INFO Starting asset staging to /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads\n", + "2024-06-05 16:47:45 5882 DEBUG Copy complete, url=/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/c63d5cb4-1046-4948-a188-e6af50ef90f4.h5ad, bytes=42809187\n", + "2024-06-05 16:47:45 5880 DEBUG Copy complete, url=/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/9624a105-319c-4abf-b10b-d96ce1650100.h5ad, bytes=126790774\n", + "2024-06-05 16:47:45 5881 DEBUG Copy complete, url=/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/c6f6e674-b59d-46cf-8525-73f64f9eef8c.h5ad, bytes=49260402\n", + "2024-06-05 16:47:45 5885 DEBUG Copy complete, url=/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/6ab91271-5f48-4e98-92ef-d02ee21e63e1.h5ad, bytes=28385363\n", + "2024-06-05 16:47:45 5883 DEBUG Copy complete, url=/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/53e343af-979c-4525-a705-1b9d1a1fee14.h5ad, bytes=147321916\n", + "2024-06-05 16:47:45 5878 DEBUG Copy complete, url=/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/1bb92cf8-ab3f-4bb0-a722-b241b5d377ed.h5ad, bytes=1112747498\n", + "2024-06-05 16:47:46 5886 DEBUG Copy complete, url=/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/fa3893cb-d420-42ac-8263-09719a26102e.h5ad, bytes=625411595\n", + "2024-06-05 16:47:46 5879 DEBUG Copy complete, url=/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/07998bf8-d070-41bb-a584-f8bdd1193aef.h5ad, bytes=637135389\n", + "2024-06-05 16:47:46 5875 INFO Build step 1 - get source assets - finished\n", + "2024-06-05 16:47:46 5875 INFO Build step 2 - Create root collection - started\n", + "2024-06-05 16:47:46 5875 INFO homo_sapiens: create experiment at /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/homo_sapiens\n", + "2024-06-05 16:47:46 5875 INFO mus_musculus: create experiment at /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/mus_musculus\n", + "2024-06-05 16:47:46 5875 INFO Build step 2 - Create root collection - finished\n", + "2024-06-05 16:47:46 5875 INFO Build step 3 - accumulate obs and var axes - started\n", + "2024-06-05 16:47:46 5882 DEBUG 6ab91271-5f48-4e98-92ef-d02ee21e63e1/homo_sapiens - found 4992 cells\n", + "2024-06-05 16:47:46 5885 DEBUG 07998bf8-d070-41bb-a584-f8bdd1193aef/homo_sapiens - found 4992 cells\n", + "2024-06-05 16:47:46 5880 DEBUG 9624a105-319c-4abf-b10b-d96ce1650100/homo_sapiens - found 4992 cells\n", + "2024-06-05 16:47:46 5879 DEBUG 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed/homo_sapiens - found 4992 cells\n", + "2024-06-05 16:47:46 5881 DEBUG fa3893cb-d420-42ac-8263-09719a26102e/homo_sapiens - found 4992 cells\n", + "2024-06-05 16:47:46 5878 DEBUG 53e343af-979c-4525-a705-1b9d1a1fee14/homo_sapiens - found 4992 cells\n", + "2024-06-05 16:47:46 5887 DEBUG c63d5cb4-1046-4948-a188-e6af50ef90f4/homo_sapiens - found 4992 cells\n", + "2024-06-05 16:47:46 5886 DEBUG c6f6e674-b59d-46cf-8525-73f64f9eef8c/homo_sapiens - found 4992 cells\n", + "2024-06-05 16:47:46 5882 DEBUG 6ab91271-5f48-4e98-92ef-d02ee21e63e1/mus_musculus - found 0 cells\n", + "2024-06-05 16:47:46 5882 DEBUG mus_musculus - H5AD has no data after filtering, skipping 6ab91271-5f48-4e98-92ef-d02ee21e63e1\n", + "2024-06-05 16:47:46 5878 DEBUG 53e343af-979c-4525-a705-1b9d1a1fee14/mus_musculus - found 0 cells\n", + "2024-06-05 16:47:46 5878 DEBUG mus_musculus - H5AD has no data after filtering, skipping 53e343af-979c-4525-a705-1b9d1a1fee14\n", + "2024-06-05 16:47:46 5885 DEBUG 07998bf8-d070-41bb-a584-f8bdd1193aef/mus_musculus - found 0 cells\n", + "2024-06-05 16:47:46 5885 DEBUG mus_musculus - H5AD has no data after filtering, skipping 07998bf8-d070-41bb-a584-f8bdd1193aef\n", + "2024-06-05 16:47:46 5887 DEBUG c63d5cb4-1046-4948-a188-e6af50ef90f4/mus_musculus - found 0 cells\n", + "2024-06-05 16:47:46 5887 DEBUG mus_musculus - H5AD has no data after filtering, skipping c63d5cb4-1046-4948-a188-e6af50ef90f4\n", + "2024-06-05 16:47:46 5879 DEBUG 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed/mus_musculus - found 0 cells\n", + "2024-06-05 16:47:46 5879 DEBUG mus_musculus - H5AD has no data after filtering, skipping 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed\n", + "2024-06-05 16:47:46 5886 DEBUG c6f6e674-b59d-46cf-8525-73f64f9eef8c/mus_musculus - found 0 cells\n", + "2024-06-05 16:47:46 5886 DEBUG mus_musculus - H5AD has no data after filtering, skipping c6f6e674-b59d-46cf-8525-73f64f9eef8c\n", + "2024-06-05 16:47:46 5880 DEBUG 9624a105-319c-4abf-b10b-d96ce1650100/mus_musculus - found 0 cells\n", + "2024-06-05 16:47:46 5880 DEBUG mus_musculus - H5AD has no data after filtering, skipping 9624a105-319c-4abf-b10b-d96ce1650100\n", + "2024-06-05 16:47:46 5881 DEBUG fa3893cb-d420-42ac-8263-09719a26102e/mus_musculus - found 0 cells\n", + "2024-06-05 16:47:46 5881 DEBUG mus_musculus - H5AD has no data after filtering, skipping fa3893cb-d420-42ac-8263-09719a26102e\n", + "2024-06-05 16:47:46 5875 INFO Build step 3 - axis accumulation complete\n", + "2024-06-05 16:47:46 5875 INFO Build step 3 - accumulate obs and var axes - finished\n", + "2024-06-05 16:47:46 5875 INFO Scaling cluster to 1 workers.\n", + "2024-06-05 16:47:46 5875 INFO Build step 4 - Populate X layers - started\n", + "2024-06-05 16:47:46 5875 INFO homo_sapiens: create X layers\n", + "2024-06-05 16:47:47 5875 INFO mus_musculus: create X layers\n", + "2024-06-05 16:47:47 5878 DEBUG dispatch_X_chunk [enter]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/1bb92cf8-ab3f-4bb0-a722-b241b5d377ed.h5ad, 0\n", + "2024-06-05 16:47:47 5878 INFO processing X /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/1bb92cf8-ab3f-4bb0-a722-b241b5d377ed.h5ad, 0, chunk 0 of 1\n", + "2024-06-05 16:47:52 5878 INFO dispatch_X_chunk [exit, 4.99s]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/1bb92cf8-ab3f-4bb0-a722-b241b5d377ed.h5ad, 0\n", + "2024-06-05 16:47:52 5878 DEBUG dispatch_X_chunk [enter]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/c63d5cb4-1046-4948-a188-e6af50ef90f4.h5ad, 0\n", + "2024-06-05 16:47:52 5878 INFO processing X /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/c63d5cb4-1046-4948-a188-e6af50ef90f4.h5ad, 0, chunk 0 of 1\n", + "2024-06-05 16:47:56 5878 INFO dispatch_X_chunk [exit, 4.33s]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/c63d5cb4-1046-4948-a188-e6af50ef90f4.h5ad, 0\n", + "2024-06-05 16:47:56 5878 DEBUG dispatch_X_chunk [enter]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/6ab91271-5f48-4e98-92ef-d02ee21e63e1.h5ad, 0\n", + "2024-06-05 16:47:56 5878 INFO processing X /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/6ab91271-5f48-4e98-92ef-d02ee21e63e1.h5ad, 0, chunk 0 of 1\n", + "2024-06-05 16:47:56 5875 INFO System memory: mem-used=17571299328 (51.1%), max-mem-used=17571299328 (51.1%), mem-total=34359738368 load-avg=(2.74, 2.49, 2.51)\n", + "2024-06-05 16:47:59 5878 INFO dispatch_X_chunk [exit, 2.89s]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/6ab91271-5f48-4e98-92ef-d02ee21e63e1.h5ad, 0\n", + "2024-06-05 16:47:59 5878 DEBUG dispatch_X_chunk [enter]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/53e343af-979c-4525-a705-1b9d1a1fee14.h5ad, 0\n", + "2024-06-05 16:47:59 5878 INFO processing X /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/53e343af-979c-4525-a705-1b9d1a1fee14.h5ad, 0, chunk 0 of 1\n", + "2024-06-05 16:48:02 5878 INFO dispatch_X_chunk [exit, 3.50s]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/53e343af-979c-4525-a705-1b9d1a1fee14.h5ad, 0\n", + "2024-06-05 16:48:02 5878 DEBUG dispatch_X_chunk [enter]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/9624a105-319c-4abf-b10b-d96ce1650100.h5ad, 0\n", + "2024-06-05 16:48:02 5878 INFO processing X /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/9624a105-319c-4abf-b10b-d96ce1650100.h5ad, 0, chunk 0 of 1\n", + "2024-06-05 16:48:04 5878 INFO dispatch_X_chunk [exit, 1.90s]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/9624a105-319c-4abf-b10b-d96ce1650100.h5ad, 0\n", + "2024-06-05 16:48:04 5878 DEBUG dispatch_X_chunk [enter]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/07998bf8-d070-41bb-a584-f8bdd1193aef.h5ad, 0\n", + "2024-06-05 16:48:04 5878 INFO processing X /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/07998bf8-d070-41bb-a584-f8bdd1193aef.h5ad, 0, chunk 0 of 1\n", + "2024-06-05 16:48:08 5878 INFO dispatch_X_chunk [exit, 3.13s]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/07998bf8-d070-41bb-a584-f8bdd1193aef.h5ad, 0\n", + "2024-06-05 16:48:08 5878 DEBUG dispatch_X_chunk [enter]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/fa3893cb-d420-42ac-8263-09719a26102e.h5ad, 0\n", + "2024-06-05 16:48:08 5878 INFO processing X /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/fa3893cb-d420-42ac-8263-09719a26102e.h5ad, 0, chunk 0 of 1\n", + "2024-06-05 16:48:11 5878 INFO dispatch_X_chunk [exit, 3.48s]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/fa3893cb-d420-42ac-8263-09719a26102e.h5ad, 0\n", + "2024-06-05 16:48:11 5878 DEBUG dispatch_X_chunk [enter]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/c6f6e674-b59d-46cf-8525-73f64f9eef8c.h5ad, 0\n", + "2024-06-05 16:48:11 5878 INFO processing X /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/c6f6e674-b59d-46cf-8525-73f64f9eef8c.h5ad, 0, chunk 0 of 1\n", + "2024-06-05 16:48:11 5875 INFO System memory: mem-used=18162663424 (52.9%), max-mem-used=18162663424 (52.9%), mem-total=34359738368 load-avg=(3.45, 2.66, 2.57)\n", + "2024-06-05 16:48:15 5878 INFO dispatch_X_chunk [exit, 3.51s]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/c6f6e674-b59d-46cf-8525-73f64f9eef8c.h5ad, 0\n", + "2024-06-05 16:48:15 5875 INFO Save presence matrix for homo_sapiens - start\n", + "2024-06-05 16:48:15 5875 INFO Save presence matrix for homo_sapiens - finish\n", + "2024-06-05 16:48:15 5875 INFO Save presence matrix for mus_musculus - start\n", + "2024-06-05 16:48:15 5875 INFO Save presence matrix for mus_musculus - finish\n", + "2024-06-05 16:48:15 5875 INFO Build step 4 - Populate X layers - finished\n", + "2024-06-05 16:48:15 5875 INFO Build step 5 - Save axis and summary info - started\n", + "2024-06-05 16:48:15 5875 INFO homo_sapiens: writing obs dataframe\n", + "2024-06-05 16:48:15 5875 DEBUG experiment homo_sapiens obs = (39936, 30)\n", + "2024-06-05 16:48:15 5875 INFO homo_sapiens: writing var dataframe\n", + "2024-06-05 16:48:15 5875 DEBUG experiment homo_sapiens var = (36972, 6)\n", + "2024-06-05 16:48:15 5875 INFO mus_musculus: writing obs dataframe\n", + "2024-06-05 16:48:15 5875 INFO mus_musculus: empty obs dataframe\n", + "2024-06-05 16:48:15 5875 INFO mus_musculus: writing var dataframe\n", + "2024-06-05 16:48:15 5875 INFO mus_musculus: empty var dataframe\n", + "2024-06-05 16:48:15 5875 INFO Creating dataset_manifest\n", + "2024-06-05 16:48:15 5875 INFO Creating census_summary_cell_counts\n", + "2024-06-05 16:48:15 5875 INFO Creating census summary\n", + "2024-06-05 16:48:15 5875 INFO Create census organisms dataframe\n", + "2024-06-05 16:48:15 5875 INFO Build step 5 - Save axis and summary info - finished\n", + "2024-06-05 16:48:15 5875 INFO tiledb_soma_1969_work_around: deleting bounding box from file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/homo_sapiens/ms/RNA/feature_dataset_presence_matrix\n", + "2024-06-05 16:48:15 5875 INFO tiledb_soma_1969_work_around: deleting bounding box from file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/homo_sapiens/ms/RNA/X/raw\n", + "2024-06-05 16:48:15 5875 INFO tiledb_soma_1969_work_around: deleting bounding box from file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/homo_sapiens/ms/RNA/X/normalized\n", + "2024-06-05 16:48:15 5875 INFO Scaling cluster to 10 workers.\n", + "2024-06-05 16:48:15 5875 INFO Consolidate: found 21 TileDB objects to consolidate\n", + "2024-06-05 16:48:15 5875 INFO Consolidate: 21 consolidation jobs queued\n", + "2024-06-05 16:48:15 5878 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_info/datasets\n", + "2024-06-05 16:48:15 5878 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_info/datasets\n", + "2024-06-05 16:48:15 5878 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_info/summary_cell_counts\n", + "2024-06-05 16:48:15 5878 INFO Consolidate[vacuum=True] finish, 0.04 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_info/summary_cell_counts\n", + "2024-06-05 16:48:15 5878 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_info/summary\n", + "2024-06-05 16:48:15 5878 INFO Consolidate[vacuum=True] finish, 0.03 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_info/summary\n", + "2024-06-05 16:48:15 5878 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_info/organisms\n", + "2024-06-05 16:48:15 5878 INFO Consolidate[vacuum=True] finish, 0.02 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_info/organisms\n", + "2024-06-05 16:48:15 5878 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_info\n", + "2024-06-05 16:48:15 5878 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_info\n", + "2024-06-05 16:48:15 5878 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/homo_sapiens/obs\n", + "2024-06-05 16:48:16 5878 INFO Consolidate[vacuum=True] finish, 0.10 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/homo_sapiens/obs\n", + "2024-06-05 16:48:16 5878 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/homo_sapiens/ms/RNA/var\n", + "2024-06-05 16:48:16 5878 INFO Consolidate[vacuum=True] finish, 0.04 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/homo_sapiens/ms/RNA/var\n", + "2024-06-05 16:48:16 5878 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/homo_sapiens/ms/RNA/feature_dataset_presence_matrix\n", + "2024-06-05 16:48:16 5878 INFO Consolidate[vacuum=True] finish, 0.05 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/homo_sapiens/ms/RNA/feature_dataset_presence_matrix\n", + "2024-06-05 16:48:16 5878 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/homo_sapiens/ms/RNA/X/raw\n", + "2024-06-05 16:48:17 5943 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/homo_sapiens/ms/RNA/X\n", + "2024-06-05 16:48:17 5943 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/homo_sapiens/ms/RNA/X\n", + "2024-06-05 16:48:17 5943 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/homo_sapiens/ms/RNA\n", + "2024-06-05 16:48:17 5940 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/homo_sapiens\n", + "2024-06-05 16:48:17 5943 INFO Consolidate[vacuum=True] finish, 0.03 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/homo_sapiens/ms/RNA\n", + "2024-06-05 16:48:17 5940 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/homo_sapiens\n", + "2024-06-05 16:48:17 5945 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/mus_musculus/ms/RNA/var\n", + "2024-06-05 16:48:17 5943 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/homo_sapiens/ms\n", + "2024-06-05 16:48:17 5940 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/mus_musculus/obs\n", + "2024-06-05 16:48:17 5943 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/homo_sapiens/ms\n", + "2024-06-05 16:48:18 5945 INFO Consolidate[vacuum=True] finish, 0.02 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/mus_musculus/ms/RNA/var\n", + "2024-06-05 16:48:18 5945 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/mus_musculus/ms/RNA/X\n", + "2024-06-05 16:48:18 5943 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/mus_musculus/ms/RNA\n", + "2024-06-05 16:48:18 5945 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/mus_musculus/ms/RNA/X\n", + "2024-06-05 16:48:18 5945 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data\n", + "2024-06-05 16:48:18 5945 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data\n", + "2024-06-05 16:48:18 5943 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/mus_musculus/ms/RNA\n", + "2024-06-05 16:48:18 5943 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/mus_musculus\n", + "2024-06-05 16:48:18 5943 INFO Consolidate[vacuum=True] finish, 0.00 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/mus_musculus\n", + "2024-06-05 16:48:18 5940 INFO Consolidate[vacuum=True] finish, 0.05 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/mus_musculus/obs\n", + "2024-06-05 16:48:18 5940 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/mus_musculus/ms\n", + "2024-06-05 16:48:18 5940 INFO Consolidate[vacuum=True] finish, 0.00 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/mus_musculus/ms\n", + "2024-06-05 16:48:18 5948 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/homo_sapiens/ms/RNA/X/normalized\n", + "2024-06-05 16:48:19 5878 INFO Consolidate[vacuum=True] finish, 3.46 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/homo_sapiens/ms/RNA/X/raw\n", + "2024-06-05 16:48:21 5948 INFO Consolidate[vacuum=True] finish, 3.27 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/homo_sapiens/ms/RNA/X/normalized\n", + "2024-06-05 16:48:21 5875 INFO Validation of SOMA objects - start\n", + "2024-06-05 16:48:21 5875 DEBUG validate_directory_structure [enter]\n", + "2024-06-05 16:48:21 5875 INFO validate_directory_structure [exit, 0.00s]\n", + "2024-06-05 16:48:22 5875 DEBUG validate_relative_path [enter]\n", + "2024-06-05 16:48:22 5875 INFO validate_relative_path [exit, 0.02s]\n", + "2024-06-05 16:48:22 5875 DEBUG validate_axis_dataframes_schema [enter]\n", + "2024-06-05 16:48:22 5875 INFO validate_axis_dataframes_schema [exit, 0.03s]\n", + "2024-06-05 16:48:22 5875 DEBUG validate_manifest_contents [enter]\n", + "2024-06-05 16:48:22 5875 INFO validate_manifest_contents [exit, 0.00s]\n", + "2024-06-05 16:48:22 5942 DEBUG _validate_X_layers_raw_contents [enter]: 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed\n", + "2024-06-05 16:48:22 5947 DEBUG _validate_X_layers_raw_contents [enter]: c63d5cb4-1046-4948-a188-e6af50ef90f4\n", + "2024-06-05 16:48:22 5945 DEBUG _validate_X_layers_raw_contents [enter]: fa3893cb-d420-42ac-8263-09719a26102e\n", + "2024-06-05 16:48:22 5878 DEBUG _validate_X_layers_raw_contents [enter]: 9624a105-319c-4abf-b10b-d96ce1650100\n", + "2024-06-05 16:48:22 5941 DEBUG _validate_X_layers_raw_contents [enter]: c6f6e674-b59d-46cf-8525-73f64f9eef8c\n", + "2024-06-05 16:48:22 5940 DEBUG _validate_X_layers_raw_contents [enter]: 07998bf8-d070-41bb-a584-f8bdd1193aef\n", + "2024-06-05 16:48:22 5946 DEBUG _validate_X_layers_raw_contents [enter]: 53e343af-979c-4525-a705-1b9d1a1fee14\n", + "2024-06-05 16:48:22 5944 DEBUG _validate_X_layers_raw_contents [enter]: 6ab91271-5f48-4e98-92ef-d02ee21e63e1\n", + "2024-06-05 16:48:22 5948 DEBUG _validate_X_layers_presence_general [enter]\n", + "2024-06-05 16:48:22 5943 DEBUG validate_internal_consistency [enter]\n", + "2024-06-05 16:48:22 5948 INFO _validate_X_layers_presence_general [exit, 0.19s]\n", + "2024-06-05 16:48:22 5948 DEBUG _validate_axis_dataframes [enter]: c6f6e674-b59d-46cf-8525-73f64f9eef8c\n", + "2024-06-05 16:48:22 5943 INFO validate_internal_consistency [exit, 0.53s]\n", + "2024-06-05 16:48:22 5943 DEBUG _validate_X_layers_presence [enter]: c6f6e674-b59d-46cf-8525-73f64f9eef8c\n", + "2024-06-05 16:48:22 5943 INFO _validate_X_layers_presence [exit, 0.15s]: c6f6e674-b59d-46cf-8525-73f64f9eef8c\n", + "2024-06-05 16:48:22 5943 DEBUG _validate_X_layers_presence [enter]: fa3893cb-d420-42ac-8263-09719a26102e\n", + "2024-06-05 16:48:23 5943 INFO _validate_X_layers_presence [exit, 0.11s]: fa3893cb-d420-42ac-8263-09719a26102e\n", + "2024-06-05 16:48:23 5943 DEBUG _validate_X_layers_presence [enter]: 07998bf8-d070-41bb-a584-f8bdd1193aef\n", + "2024-06-05 16:48:23 5943 INFO _validate_X_layers_presence [exit, 0.09s]: 07998bf8-d070-41bb-a584-f8bdd1193aef\n", + "2024-06-05 16:48:23 5943 DEBUG _validate_X_layers_presence [enter]: 9624a105-319c-4abf-b10b-d96ce1650100\n", + "2024-06-05 16:48:23 5943 INFO _validate_X_layers_presence [exit, 0.17s]: 9624a105-319c-4abf-b10b-d96ce1650100\n", + "2024-06-05 16:48:23 5943 DEBUG _validate_X_layers_presence [enter]: 53e343af-979c-4525-a705-1b9d1a1fee14\n", + "2024-06-05 16:48:23 5943 INFO _validate_X_layers_presence [exit, 0.08s]: 53e343af-979c-4525-a705-1b9d1a1fee14\n", + "2024-06-05 16:48:23 5943 DEBUG _validate_X_layers_presence [enter]: 6ab91271-5f48-4e98-92ef-d02ee21e63e1\n", + "2024-06-05 16:48:23 5948 INFO _validate_axis_dataframes [exit, 0.66s]: c6f6e674-b59d-46cf-8525-73f64f9eef8c\n", + "2024-06-05 16:48:23 5948 DEBUG _validate_axis_dataframes [enter]: fa3893cb-d420-42ac-8263-09719a26102e\n", + "2024-06-05 16:48:23 5943 INFO _validate_X_layers_presence [exit, 0.13s]: 6ab91271-5f48-4e98-92ef-d02ee21e63e1\n", + "2024-06-05 16:48:23 5943 DEBUG _validate_X_layers_presence [enter]: c63d5cb4-1046-4948-a188-e6af50ef90f4\n", + "2024-06-05 16:48:23 5878 INFO _validate_X_layers_raw_contents [exit, 1.31s]: 9624a105-319c-4abf-b10b-d96ce1650100\n", + "2024-06-05 16:48:23 5943 INFO _validate_X_layers_presence [exit, 0.05s]: c63d5cb4-1046-4948-a188-e6af50ef90f4\n", + "2024-06-05 16:48:23 5943 DEBUG _validate_X_layers_presence [enter]: 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed\n", + "2024-06-05 16:48:23 5943 INFO _validate_X_layers_presence [exit, 0.09s]: 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed\n", + "2024-06-05 16:48:23 5943 DEBUG _validate_X_layers_normalized [enter]: homo_sapiens rows [0, 32000)\n", + "2024-06-05 16:48:23 5948 INFO _validate_axis_dataframes [exit, 0.37s]: fa3893cb-d420-42ac-8263-09719a26102e\n", + "2024-06-05 16:48:23 5948 DEBUG _validate_axis_dataframes [enter]: 07998bf8-d070-41bb-a584-f8bdd1193aef\n", + "2024-06-05 16:48:24 5948 INFO _validate_axis_dataframes [exit, 0.37s]: 07998bf8-d070-41bb-a584-f8bdd1193aef\n", + "2024-06-05 16:48:24 5948 DEBUG _validate_axis_dataframes [enter]: 9624a105-319c-4abf-b10b-d96ce1650100\n", + "2024-06-05 16:48:24 5948 INFO _validate_axis_dataframes [exit, 0.27s]: 9624a105-319c-4abf-b10b-d96ce1650100\n", + "2024-06-05 16:48:24 5948 DEBUG _validate_axis_dataframes [enter]: 53e343af-979c-4525-a705-1b9d1a1fee14\n", + "2024-06-05 16:48:24 5944 INFO _validate_X_layers_raw_contents [exit, 2.34s]: 6ab91271-5f48-4e98-92ef-d02ee21e63e1\n", + "2024-06-05 16:48:24 5945 INFO _validate_X_layers_raw_contents [exit, 2.45s]: fa3893cb-d420-42ac-8263-09719a26102e\n", + "2024-06-05 16:48:24 5948 INFO _validate_axis_dataframes [exit, 0.28s]: 53e343af-979c-4525-a705-1b9d1a1fee14\n", + "2024-06-05 16:48:24 5948 DEBUG _validate_axis_dataframes [enter]: 6ab91271-5f48-4e98-92ef-d02ee21e63e1\n", + "2024-06-05 16:48:24 5940 INFO _validate_X_layers_raw_contents [exit, 2.55s]: 07998bf8-d070-41bb-a584-f8bdd1193aef\n", + "2024-06-05 16:48:24 5942 INFO _validate_X_layers_raw_contents [exit, 2.63s]: 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed\n", + "2024-06-05 16:48:24 5941 INFO _validate_X_layers_raw_contents [exit, 2.63s]: c6f6e674-b59d-46cf-8525-73f64f9eef8c\n", + "2024-06-05 16:48:24 5948 INFO _validate_axis_dataframes [exit, 0.23s]: 6ab91271-5f48-4e98-92ef-d02ee21e63e1\n", + "2024-06-05 16:48:24 5948 DEBUG _validate_axis_dataframes [enter]: c63d5cb4-1046-4948-a188-e6af50ef90f4\n", + "2024-06-05 16:48:25 5946 INFO _validate_X_layers_raw_contents [exit, 2.76s]: 53e343af-979c-4525-a705-1b9d1a1fee14\n", + "2024-06-05 16:48:25 5947 INFO _validate_X_layers_raw_contents [exit, 2.89s]: c63d5cb4-1046-4948-a188-e6af50ef90f4\n", + "2024-06-05 16:48:25 5948 INFO _validate_axis_dataframes [exit, 0.21s]: c63d5cb4-1046-4948-a188-e6af50ef90f4\n", + "2024-06-05 16:48:25 5948 DEBUG _validate_axis_dataframes [enter]: 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed\n", + "2024-06-05 16:48:25 5948 INFO _validate_axis_dataframes [exit, 0.18s]: 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed\n", + "2024-06-05 16:48:25 5948 DEBUG validate_soma_bounding_box [enter]\n", + "2024-06-05 16:48:25 5948 INFO validate_soma_bounding_box [exit, 0.08s]\n", + "2024-06-05 16:48:25 5948 DEBUG validate_X_layers_schema [enter]\n", + "2024-06-05 16:48:25 5948 INFO validate_X_layers_schema [exit, 0.05s]\n", + "2024-06-05 16:48:25 5944 DEBUG validate_axis_dataframes_global_ids [enter]\n", + "2024-06-05 16:48:25 5944 INFO validate_axis_dataframes_global_ids [exit, 0.08s]\n", + "2024-06-05 16:48:26 5875 INFO System memory: mem-used=21406154752 (62.3%), max-mem-used=21406154752 (62.3%), mem-total=34359738368 load-avg=(15.26, 5.28, 3.51)\n", + "2024-06-05 16:48:31 5943 INFO _validate_X_layers_normalized [exit, 7.41s]: homo_sapiens rows [0, 32000)\n", + "2024-06-05 16:48:31 5943 DEBUG _validate_X_layers_normalized [enter]: homo_sapiens rows [32000, 64000)\n", + "2024-06-05 16:48:32 5943 INFO _validate_X_layers_normalized [exit, 1.90s]: homo_sapiens rows [32000, 64000)\n", + "2024-06-05 16:48:32 5943 DEBUG _validate_X_layers_has_unique_coords [enter]: homo_sapiens, raw, rows [0, 96000)\n", + "2024-06-05 16:48:35 5943 INFO _validate_X_layers_has_unique_coords [exit, 2.12s]: homo_sapiens, raw, rows [0, 96000)\n", + "2024-06-05 16:48:35 5943 DEBUG _validate_X_layers_has_unique_coords [enter]: homo_sapiens, normalized, rows [0, 96000)\n", + "2024-06-05 16:48:37 5943 INFO _validate_X_layers_has_unique_coords [exit, 1.92s]: homo_sapiens, normalized, rows [0, 96000)\n", + "2024-06-05 16:48:37 5875 DEBUG validate_consolidation [enter]\n", + "2024-06-05 16:48:37 5875 INFO validate_consolidation [exit, 0.11s]\n", + "2024-06-05 16:48:37 5875 INFO Validation & consolidation complete.\n", + "2024-06-05 16:48:38,278 - distributed.scheduler - WARNING - Removing worker 'tcp://127.0.0.1:62983' caused the cluster to lose already computed task(s), which will be recomputed elsewhere: {'assert_all-1c2e7cb8-a6b6-47bd-9804-9c565ae29cb2'} (stimulus_id='handle-worker-cleanup-1717631318.278749')\n", + "2024-06-05 16:48:38 5875 INFO Dask cluster shut down\n", + "2024-06-05 16:48:38 5875 INFO Fini\n" + ] + } + ], + "source": [ + "! python -m cellxgene_census_builder.build_soma -v --build-tag {census_build_tag} {census_builder_working_dir} build --manifest {manifest_file_path}" + ] + }, + { + "cell_type": "markdown", + "id": "2ab2bf1a-edd5-49c0-8c23-7465e4e9f9b5", + "metadata": {}, + "source": [ + "## Query the generated census object\n", + "NOTE: Currently the demo shows summary cell counts but the demo will show case spatial queries once that is availabe" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "a82ad4cb-b71b-40b0-9954-a1cae985c985", + "metadata": {}, + "outputs": [], + "source": [ + "import cellxgene_census" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "a685b68b-b6d1-4ab9-a6be-33ba48f365fa", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " organism category label \\\n", + "0 Homo sapiens all na \n", + "1 Homo sapiens assay Visium Spatial Gene Expression \n", + "2 Homo sapiens cell_type neuronal receptor cell \n", + "3 Homo sapiens cell_type epithelial cell \n", + "4 Homo sapiens cell_type blood cell \n", + ".. ... ... ... \n", + "62 Homo sapiens tissue primary visual cortex \n", + "63 Homo sapiens tissue_general heart \n", + "64 Homo sapiens tissue_general brain \n", + "65 Homo sapiens tissue_general placenta \n", + "66 Homo sapiens tissue_general liver \n", + "\n", + " ontology_term_id total_cell_count unique_cell_count \n", + "0 na 39936 39936 \n", + "1 EFO:0010961 39936 39936 \n", + "2 CL:0000006 3 3 \n", + "3 CL:0000066 335 335 \n", + "4 CL:0000081 193 193 \n", + ".. ... ... ... \n", + "62 UBERON:0002436 4992 4992 \n", + "63 UBERON:0000948 9984 9984 \n", + "64 UBERON:0000955 9984 9984 \n", + "65 UBERON:0001987 9984 9984 \n", + "66 UBERON:0002107 9984 9984 \n", + "\n", + "[67 rows x 6 columns]\n" + ] + } + ], + "source": [ + "with cellxgene_census.open_soma(uri=f\"{census_builder_working_dir}/{census_build_tag}/soma\") as census:\n", + " census_summary_cell_counts = census[\"census_info\"][\"summary_cell_counts\"].read().concat().to_pandas()\n", + " census_summary_cell_counts = census_summary_cell_counts.drop(columns=[\"soma_joinid\"])\n", + " print(census_summary_cell_counts)" + ] } ], "metadata": { diff --git a/tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/validate_soma.py b/tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/validate_soma.py index 2aea90943..197013a52 100644 --- a/tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/validate_soma.py +++ b/tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/validate_soma.py @@ -138,11 +138,13 @@ def validate_all_soma_objects_exist(soma_path: str, experiment_specifications: l ) # verify required dataset fields are set - df: pd.DataFrame = census_info[CENSUS_DATASETS_NAME].read().concat().to_pandas() - assert (df["collection_id"] != "").all() - assert (df["collection_name"] != "").all() - assert (df["dataset_title"] != "").all() - assert (df["dataset_version_id"] != "").all() + # TODO (spatial): The below 5 lines should be uncommented once datasets are available via `/datasets` endpoint + # df: pd.DataFrame = census_info[CENSUS_DATASETS_NAME].read().concat().to_pandas() + + # assert (df["collection_id"] != "").all() + # assert (df["collection_name"] != "").all() + # assert (df["dataset_title"] != "").all() + # assert (df["dataset_version_id"] != "").all() # there should be an experiment for each builder census_data = census[CENSUS_DATA_NAME] @@ -836,7 +838,8 @@ def load_datasets_from_census(assets_path: str, soma_path: str) -> list[Dataset] with soma.Collection.open(soma_path, context=SOMA_TileDB_Context()) as census: df = census[CENSUS_INFO_NAME][CENSUS_DATASETS_NAME].read().concat().to_pandas() df["dataset_asset_h5ad_uri"] = df.dataset_h5ad_path.map(lambda p: urlcat(assets_path, p)) - assert df.dataset_version_id.is_unique + # TODO (spatial): The below 1 assert should be uncommented once datasets are available via `/datasets` endpoint + # assert df.dataset_version_id.is_unique assert df.dataset_id.is_unique df["asset_h5ad_filesize"] = df.dataset_asset_h5ad_uri.map(lambda p: os.path.getsize(p)) datasets = Dataset.from_dataframe(df) From e7e6b0765d56ca665cb157a5c7b90f9ffcd8dafe Mon Sep 17 00:00:00 2001 From: Prathap Sridharan Date: Fri, 7 Jun 2024 15:37:52 -0700 Subject: [PATCH 21/29] Add census_data and census_spatial collections --- .../census_spatial_dataset_ingest.ipynb | 1163 +++++++++++++---- .../build_soma/build_soma.py | 12 +- .../build_soma/experiment_builder.py | 17 +- .../build_soma/experiment_specs.py | 21 +- .../build_soma/globals.py | 10 +- .../build_soma/validate_soma.py | 65 +- 6 files changed, 984 insertions(+), 304 deletions(-) diff --git a/tools/cellxgene_census_builder/spatial_dev_tools/census_spatial_dataset_ingest.ipynb b/tools/cellxgene_census_builder/spatial_dev_tools/census_spatial_dataset_ingest.ipynb index 3ec790807..8ea0520cf 100644 --- a/tools/cellxgene_census_builder/spatial_dev_tools/census_spatial_dataset_ingest.ipynb +++ b/tools/cellxgene_census_builder/spatial_dev_tools/census_spatial_dataset_ingest.ipynb @@ -126,7 +126,7 @@ "source": [ "# NOTE: Change these variables to point to appropriate file paths on your machine\n", "census_builder_working_dir = \"/Users/psridharan/code/cellxgene-census/ps_stuff/census-builds\"\n", - "census_build_tag = \"census_spatial\"" + "census_build_tag = \"test-spatial-build\"" ] }, { @@ -151,241 +151,327 @@ "name": "stdout", "output_type": "stream", "text": [ - "2024-06-05 16:47:41 5875 DEBUG Setting NUMEXPR_MAX_THREADS environment variable to \"5\"\n", - "2024-06-05 16:47:41 5875 DEBUG Setting OMP_NUM_THREADS environment variable to \"1\"\n", - "2024-06-05 16:47:41 5875 DEBUG Setting OPENBLAS_NUM_THREADS environment variable to \"1\"\n", - "2024-06-05 16:47:41 5875 DEBUG Setting MKL_NUM_THREADS environment variable to \"1\"\n", - "2024-06-05 16:47:41 5875 DEBUG Setting VECLIB_MAXIMUM_THREADS environment variable to \"1\"\n", - "2024-06-05 16:47:41 5875 INFO CensusBuildArgs(working_dir=PosixPath('/Users/psridharan/code/cellxgene-census/ps_stuff/census-builds'), config=CensusBuildConfig(verbose=1, dashboard=True, log_dir='logs', log_file='build.log', reports_dir='reports', consolidate=True, dryrun=False, cellxgene_census_S3_path='s3://cellxgene-data-public/cell-census', cellxgene_census_default_mirror_S3_path='s3://cellxgene-census-public-us-west-2/cell-census', cellxgene_census_S3_replica_path=None, logs_S3_path='s3://cellxgene-data-public-logs/builder', build_tag='census_spatial', max_worker_processes=48, host_validation_disable=False, host_validation_min_physical_memory=549755813888, host_validation_min_swap_memory=2199023255552, host_validation_min_free_disk_space=1979120929996, release_cleanup_days=32, dataset_id_blocklist_uri='https://raw.githubusercontent.com/chanzuckerberg/cellxgene-census/main/tools/cellxgene_census_builder/dataset_blocklist.txt', user_agent_prefix='census-builder-', user_agent_environment='unknown', manifest=<_io.TextIOWrapper name='/Users/psridharan/code/cellxgene-census/tools/cellxgene_census_builder/spatial_dev_tools/manifest.csv' mode='r' encoding='UTF-8'>, test_first_n=0), state=CensusBuildState())\n", - "2024-06-05 16:47:41 5875 INFO System memory: mem-used=16267870208 (47.3%), max-mem-used=16267870208 (47.3%), mem-total=34359738368 load-avg=(2.72, 2.47, 2.51)\n", - "2024-06-05 16:47:41 5875 INFO Starting process resource logger with period 15.0\n", - "2024-06-05 16:47:43 5875 INFO Dask client created: \n", - "2024-06-05 16:47:43 5875 INFO Dask client using cluster: LocalCluster(b67349d8, 'tcp://127.0.0.1:62818', workers=10, threads=10)\n", - "2024-06-05 16:47:43 5875 INFO Dashboard link: http://127.0.0.1:8787/status\n", - "2024-06-05 16:47:43 5875 INFO Build step 1 - get source assets - started\n", - "2024-06-05 16:47:43 5875 INFO Loading manifest from file\n", - "2024-06-05 16:47:44 5875 INFO Dataset blocklist found, containing 5 ids.\n", - "2024-06-05 16:47:44 5875 INFO After blocklist and dedup, will load 8 datasets.\n", - "2024-06-05 16:47:44 5875 INFO Starting asset staging to /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads\n", - "2024-06-05 16:47:45 5882 DEBUG Copy complete, url=/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/c63d5cb4-1046-4948-a188-e6af50ef90f4.h5ad, bytes=42809187\n", - "2024-06-05 16:47:45 5880 DEBUG Copy complete, url=/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/9624a105-319c-4abf-b10b-d96ce1650100.h5ad, bytes=126790774\n", - "2024-06-05 16:47:45 5881 DEBUG Copy complete, url=/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/c6f6e674-b59d-46cf-8525-73f64f9eef8c.h5ad, bytes=49260402\n", - "2024-06-05 16:47:45 5885 DEBUG Copy complete, url=/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/6ab91271-5f48-4e98-92ef-d02ee21e63e1.h5ad, bytes=28385363\n", - "2024-06-05 16:47:45 5883 DEBUG Copy complete, url=/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/53e343af-979c-4525-a705-1b9d1a1fee14.h5ad, bytes=147321916\n", - "2024-06-05 16:47:45 5878 DEBUG Copy complete, url=/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/1bb92cf8-ab3f-4bb0-a722-b241b5d377ed.h5ad, bytes=1112747498\n", - "2024-06-05 16:47:46 5886 DEBUG Copy complete, url=/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/fa3893cb-d420-42ac-8263-09719a26102e.h5ad, bytes=625411595\n", - "2024-06-05 16:47:46 5879 DEBUG Copy complete, url=/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/07998bf8-d070-41bb-a584-f8bdd1193aef.h5ad, bytes=637135389\n", - "2024-06-05 16:47:46 5875 INFO Build step 1 - get source assets - finished\n", - "2024-06-05 16:47:46 5875 INFO Build step 2 - Create root collection - started\n", - "2024-06-05 16:47:46 5875 INFO homo_sapiens: create experiment at /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/homo_sapiens\n", - "2024-06-05 16:47:46 5875 INFO mus_musculus: create experiment at /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/mus_musculus\n", - "2024-06-05 16:47:46 5875 INFO Build step 2 - Create root collection - finished\n", - "2024-06-05 16:47:46 5875 INFO Build step 3 - accumulate obs and var axes - started\n", - "2024-06-05 16:47:46 5882 DEBUG 6ab91271-5f48-4e98-92ef-d02ee21e63e1/homo_sapiens - found 4992 cells\n", - "2024-06-05 16:47:46 5885 DEBUG 07998bf8-d070-41bb-a584-f8bdd1193aef/homo_sapiens - found 4992 cells\n", - "2024-06-05 16:47:46 5880 DEBUG 9624a105-319c-4abf-b10b-d96ce1650100/homo_sapiens - found 4992 cells\n", - "2024-06-05 16:47:46 5879 DEBUG 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed/homo_sapiens - found 4992 cells\n", - "2024-06-05 16:47:46 5881 DEBUG fa3893cb-d420-42ac-8263-09719a26102e/homo_sapiens - found 4992 cells\n", - "2024-06-05 16:47:46 5878 DEBUG 53e343af-979c-4525-a705-1b9d1a1fee14/homo_sapiens - found 4992 cells\n", - "2024-06-05 16:47:46 5887 DEBUG c63d5cb4-1046-4948-a188-e6af50ef90f4/homo_sapiens - found 4992 cells\n", - "2024-06-05 16:47:46 5886 DEBUG c6f6e674-b59d-46cf-8525-73f64f9eef8c/homo_sapiens - found 4992 cells\n", - "2024-06-05 16:47:46 5882 DEBUG 6ab91271-5f48-4e98-92ef-d02ee21e63e1/mus_musculus - found 0 cells\n", - "2024-06-05 16:47:46 5882 DEBUG mus_musculus - H5AD has no data after filtering, skipping 6ab91271-5f48-4e98-92ef-d02ee21e63e1\n", - "2024-06-05 16:47:46 5878 DEBUG 53e343af-979c-4525-a705-1b9d1a1fee14/mus_musculus - found 0 cells\n", - "2024-06-05 16:47:46 5878 DEBUG mus_musculus - H5AD has no data after filtering, skipping 53e343af-979c-4525-a705-1b9d1a1fee14\n", - "2024-06-05 16:47:46 5885 DEBUG 07998bf8-d070-41bb-a584-f8bdd1193aef/mus_musculus - found 0 cells\n", - "2024-06-05 16:47:46 5885 DEBUG mus_musculus - H5AD has no data after filtering, skipping 07998bf8-d070-41bb-a584-f8bdd1193aef\n", - "2024-06-05 16:47:46 5887 DEBUG c63d5cb4-1046-4948-a188-e6af50ef90f4/mus_musculus - found 0 cells\n", - "2024-06-05 16:47:46 5887 DEBUG mus_musculus - H5AD has no data after filtering, skipping c63d5cb4-1046-4948-a188-e6af50ef90f4\n", - "2024-06-05 16:47:46 5879 DEBUG 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed/mus_musculus - found 0 cells\n", - "2024-06-05 16:47:46 5879 DEBUG mus_musculus - H5AD has no data after filtering, skipping 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed\n", - "2024-06-05 16:47:46 5886 DEBUG c6f6e674-b59d-46cf-8525-73f64f9eef8c/mus_musculus - found 0 cells\n", - "2024-06-05 16:47:46 5886 DEBUG mus_musculus - H5AD has no data after filtering, skipping c6f6e674-b59d-46cf-8525-73f64f9eef8c\n", - "2024-06-05 16:47:46 5880 DEBUG 9624a105-319c-4abf-b10b-d96ce1650100/mus_musculus - found 0 cells\n", - "2024-06-05 16:47:46 5880 DEBUG mus_musculus - H5AD has no data after filtering, skipping 9624a105-319c-4abf-b10b-d96ce1650100\n", - "2024-06-05 16:47:46 5881 DEBUG fa3893cb-d420-42ac-8263-09719a26102e/mus_musculus - found 0 cells\n", - "2024-06-05 16:47:46 5881 DEBUG mus_musculus - H5AD has no data after filtering, skipping fa3893cb-d420-42ac-8263-09719a26102e\n", - "2024-06-05 16:47:46 5875 INFO Build step 3 - axis accumulation complete\n", - "2024-06-05 16:47:46 5875 INFO Build step 3 - accumulate obs and var axes - finished\n", - "2024-06-05 16:47:46 5875 INFO Scaling cluster to 1 workers.\n", - "2024-06-05 16:47:46 5875 INFO Build step 4 - Populate X layers - started\n", - "2024-06-05 16:47:46 5875 INFO homo_sapiens: create X layers\n", - "2024-06-05 16:47:47 5875 INFO mus_musculus: create X layers\n", - "2024-06-05 16:47:47 5878 DEBUG dispatch_X_chunk [enter]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/1bb92cf8-ab3f-4bb0-a722-b241b5d377ed.h5ad, 0\n", - "2024-06-05 16:47:47 5878 INFO processing X /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/1bb92cf8-ab3f-4bb0-a722-b241b5d377ed.h5ad, 0, chunk 0 of 1\n", - "2024-06-05 16:47:52 5878 INFO dispatch_X_chunk [exit, 4.99s]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/1bb92cf8-ab3f-4bb0-a722-b241b5d377ed.h5ad, 0\n", - "2024-06-05 16:47:52 5878 DEBUG dispatch_X_chunk [enter]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/c63d5cb4-1046-4948-a188-e6af50ef90f4.h5ad, 0\n", - "2024-06-05 16:47:52 5878 INFO processing X /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/c63d5cb4-1046-4948-a188-e6af50ef90f4.h5ad, 0, chunk 0 of 1\n", - "2024-06-05 16:47:56 5878 INFO dispatch_X_chunk [exit, 4.33s]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/c63d5cb4-1046-4948-a188-e6af50ef90f4.h5ad, 0\n", - "2024-06-05 16:47:56 5878 DEBUG dispatch_X_chunk [enter]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/6ab91271-5f48-4e98-92ef-d02ee21e63e1.h5ad, 0\n", - "2024-06-05 16:47:56 5878 INFO processing X /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/6ab91271-5f48-4e98-92ef-d02ee21e63e1.h5ad, 0, chunk 0 of 1\n", - "2024-06-05 16:47:56 5875 INFO System memory: mem-used=17571299328 (51.1%), max-mem-used=17571299328 (51.1%), mem-total=34359738368 load-avg=(2.74, 2.49, 2.51)\n", - "2024-06-05 16:47:59 5878 INFO dispatch_X_chunk [exit, 2.89s]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/6ab91271-5f48-4e98-92ef-d02ee21e63e1.h5ad, 0\n", - "2024-06-05 16:47:59 5878 DEBUG dispatch_X_chunk [enter]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/53e343af-979c-4525-a705-1b9d1a1fee14.h5ad, 0\n", - "2024-06-05 16:47:59 5878 INFO processing X /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/53e343af-979c-4525-a705-1b9d1a1fee14.h5ad, 0, chunk 0 of 1\n", - "2024-06-05 16:48:02 5878 INFO dispatch_X_chunk [exit, 3.50s]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/53e343af-979c-4525-a705-1b9d1a1fee14.h5ad, 0\n", - "2024-06-05 16:48:02 5878 DEBUG dispatch_X_chunk [enter]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/9624a105-319c-4abf-b10b-d96ce1650100.h5ad, 0\n", - "2024-06-05 16:48:02 5878 INFO processing X /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/9624a105-319c-4abf-b10b-d96ce1650100.h5ad, 0, chunk 0 of 1\n", - "2024-06-05 16:48:04 5878 INFO dispatch_X_chunk [exit, 1.90s]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/9624a105-319c-4abf-b10b-d96ce1650100.h5ad, 0\n", - "2024-06-05 16:48:04 5878 DEBUG dispatch_X_chunk [enter]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/07998bf8-d070-41bb-a584-f8bdd1193aef.h5ad, 0\n", - "2024-06-05 16:48:04 5878 INFO processing X /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/07998bf8-d070-41bb-a584-f8bdd1193aef.h5ad, 0, chunk 0 of 1\n", - "2024-06-05 16:48:08 5878 INFO dispatch_X_chunk [exit, 3.13s]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/07998bf8-d070-41bb-a584-f8bdd1193aef.h5ad, 0\n", - "2024-06-05 16:48:08 5878 DEBUG dispatch_X_chunk [enter]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/fa3893cb-d420-42ac-8263-09719a26102e.h5ad, 0\n", - "2024-06-05 16:48:08 5878 INFO processing X /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/fa3893cb-d420-42ac-8263-09719a26102e.h5ad, 0, chunk 0 of 1\n", - "2024-06-05 16:48:11 5878 INFO dispatch_X_chunk [exit, 3.48s]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/fa3893cb-d420-42ac-8263-09719a26102e.h5ad, 0\n", - "2024-06-05 16:48:11 5878 DEBUG dispatch_X_chunk [enter]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/c6f6e674-b59d-46cf-8525-73f64f9eef8c.h5ad, 0\n", - "2024-06-05 16:48:11 5878 INFO processing X /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/c6f6e674-b59d-46cf-8525-73f64f9eef8c.h5ad, 0, chunk 0 of 1\n", - "2024-06-05 16:48:11 5875 INFO System memory: mem-used=18162663424 (52.9%), max-mem-used=18162663424 (52.9%), mem-total=34359738368 load-avg=(3.45, 2.66, 2.57)\n", - "2024-06-05 16:48:15 5878 INFO dispatch_X_chunk [exit, 3.51s]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/h5ads/c6f6e674-b59d-46cf-8525-73f64f9eef8c.h5ad, 0\n", - "2024-06-05 16:48:15 5875 INFO Save presence matrix for homo_sapiens - start\n", - "2024-06-05 16:48:15 5875 INFO Save presence matrix for homo_sapiens - finish\n", - "2024-06-05 16:48:15 5875 INFO Save presence matrix for mus_musculus - start\n", - "2024-06-05 16:48:15 5875 INFO Save presence matrix for mus_musculus - finish\n", - "2024-06-05 16:48:15 5875 INFO Build step 4 - Populate X layers - finished\n", - "2024-06-05 16:48:15 5875 INFO Build step 5 - Save axis and summary info - started\n", - "2024-06-05 16:48:15 5875 INFO homo_sapiens: writing obs dataframe\n", - "2024-06-05 16:48:15 5875 DEBUG experiment homo_sapiens obs = (39936, 30)\n", - "2024-06-05 16:48:15 5875 INFO homo_sapiens: writing var dataframe\n", - "2024-06-05 16:48:15 5875 DEBUG experiment homo_sapiens var = (36972, 6)\n", - "2024-06-05 16:48:15 5875 INFO mus_musculus: writing obs dataframe\n", - "2024-06-05 16:48:15 5875 INFO mus_musculus: empty obs dataframe\n", - "2024-06-05 16:48:15 5875 INFO mus_musculus: writing var dataframe\n", - "2024-06-05 16:48:15 5875 INFO mus_musculus: empty var dataframe\n", - "2024-06-05 16:48:15 5875 INFO Creating dataset_manifest\n", - "2024-06-05 16:48:15 5875 INFO Creating census_summary_cell_counts\n", - "2024-06-05 16:48:15 5875 INFO Creating census summary\n", - "2024-06-05 16:48:15 5875 INFO Create census organisms dataframe\n", - "2024-06-05 16:48:15 5875 INFO Build step 5 - Save axis and summary info - finished\n", - "2024-06-05 16:48:15 5875 INFO tiledb_soma_1969_work_around: deleting bounding box from file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/homo_sapiens/ms/RNA/feature_dataset_presence_matrix\n", - "2024-06-05 16:48:15 5875 INFO tiledb_soma_1969_work_around: deleting bounding box from file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/homo_sapiens/ms/RNA/X/raw\n", - "2024-06-05 16:48:15 5875 INFO tiledb_soma_1969_work_around: deleting bounding box from file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/homo_sapiens/ms/RNA/X/normalized\n", - "2024-06-05 16:48:15 5875 INFO Scaling cluster to 10 workers.\n", - "2024-06-05 16:48:15 5875 INFO Consolidate: found 21 TileDB objects to consolidate\n", - "2024-06-05 16:48:15 5875 INFO Consolidate: 21 consolidation jobs queued\n", - "2024-06-05 16:48:15 5878 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_info/datasets\n", - "2024-06-05 16:48:15 5878 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_info/datasets\n", - "2024-06-05 16:48:15 5878 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_info/summary_cell_counts\n", - "2024-06-05 16:48:15 5878 INFO Consolidate[vacuum=True] finish, 0.04 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_info/summary_cell_counts\n", - "2024-06-05 16:48:15 5878 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_info/summary\n", - "2024-06-05 16:48:15 5878 INFO Consolidate[vacuum=True] finish, 0.03 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_info/summary\n", - "2024-06-05 16:48:15 5878 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_info/organisms\n", - "2024-06-05 16:48:15 5878 INFO Consolidate[vacuum=True] finish, 0.02 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_info/organisms\n", - "2024-06-05 16:48:15 5878 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_info\n", - "2024-06-05 16:48:15 5878 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_info\n", - "2024-06-05 16:48:15 5878 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/homo_sapiens/obs\n", - "2024-06-05 16:48:16 5878 INFO Consolidate[vacuum=True] finish, 0.10 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/homo_sapiens/obs\n", - "2024-06-05 16:48:16 5878 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/homo_sapiens/ms/RNA/var\n", - "2024-06-05 16:48:16 5878 INFO Consolidate[vacuum=True] finish, 0.04 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/homo_sapiens/ms/RNA/var\n", - "2024-06-05 16:48:16 5878 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/homo_sapiens/ms/RNA/feature_dataset_presence_matrix\n", - "2024-06-05 16:48:16 5878 INFO Consolidate[vacuum=True] finish, 0.05 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/homo_sapiens/ms/RNA/feature_dataset_presence_matrix\n", - "2024-06-05 16:48:16 5878 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/homo_sapiens/ms/RNA/X/raw\n", - "2024-06-05 16:48:17 5943 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/homo_sapiens/ms/RNA/X\n", - "2024-06-05 16:48:17 5943 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/homo_sapiens/ms/RNA/X\n", - "2024-06-05 16:48:17 5943 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/homo_sapiens/ms/RNA\n", - "2024-06-05 16:48:17 5940 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/homo_sapiens\n", - "2024-06-05 16:48:17 5943 INFO Consolidate[vacuum=True] finish, 0.03 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/homo_sapiens/ms/RNA\n", - "2024-06-05 16:48:17 5940 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/homo_sapiens\n", - "2024-06-05 16:48:17 5945 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/mus_musculus/ms/RNA/var\n", - "2024-06-05 16:48:17 5943 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/homo_sapiens/ms\n", - "2024-06-05 16:48:17 5940 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/mus_musculus/obs\n", - "2024-06-05 16:48:17 5943 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/homo_sapiens/ms\n", - "2024-06-05 16:48:18 5945 INFO Consolidate[vacuum=True] finish, 0.02 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/mus_musculus/ms/RNA/var\n", - "2024-06-05 16:48:18 5945 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/mus_musculus/ms/RNA/X\n", - "2024-06-05 16:48:18 5943 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/mus_musculus/ms/RNA\n", - "2024-06-05 16:48:18 5945 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/mus_musculus/ms/RNA/X\n", - "2024-06-05 16:48:18 5945 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data\n", - "2024-06-05 16:48:18 5945 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data\n", - "2024-06-05 16:48:18 5943 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/mus_musculus/ms/RNA\n", - "2024-06-05 16:48:18 5943 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/mus_musculus\n", - "2024-06-05 16:48:18 5943 INFO Consolidate[vacuum=True] finish, 0.00 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/mus_musculus\n", - "2024-06-05 16:48:18 5940 INFO Consolidate[vacuum=True] finish, 0.05 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/mus_musculus/obs\n", - "2024-06-05 16:48:18 5940 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/mus_musculus/ms\n", - "2024-06-05 16:48:18 5940 INFO Consolidate[vacuum=True] finish, 0.00 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/mus_musculus/ms\n", - "2024-06-05 16:48:18 5948 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/homo_sapiens/ms/RNA/X/normalized\n", - "2024-06-05 16:48:19 5878 INFO Consolidate[vacuum=True] finish, 3.46 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/homo_sapiens/ms/RNA/X/raw\n", - "2024-06-05 16:48:21 5948 INFO Consolidate[vacuum=True] finish, 3.27 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/census_spatial/soma/census_data/homo_sapiens/ms/RNA/X/normalized\n", - "2024-06-05 16:48:21 5875 INFO Validation of SOMA objects - start\n", - "2024-06-05 16:48:21 5875 DEBUG validate_directory_structure [enter]\n", - "2024-06-05 16:48:21 5875 INFO validate_directory_structure [exit, 0.00s]\n", - "2024-06-05 16:48:22 5875 DEBUG validate_relative_path [enter]\n", - "2024-06-05 16:48:22 5875 INFO validate_relative_path [exit, 0.02s]\n", - "2024-06-05 16:48:22 5875 DEBUG validate_axis_dataframes_schema [enter]\n", - "2024-06-05 16:48:22 5875 INFO validate_axis_dataframes_schema [exit, 0.03s]\n", - "2024-06-05 16:48:22 5875 DEBUG validate_manifest_contents [enter]\n", - "2024-06-05 16:48:22 5875 INFO validate_manifest_contents [exit, 0.00s]\n", - "2024-06-05 16:48:22 5942 DEBUG _validate_X_layers_raw_contents [enter]: 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed\n", - "2024-06-05 16:48:22 5947 DEBUG _validate_X_layers_raw_contents [enter]: c63d5cb4-1046-4948-a188-e6af50ef90f4\n", - "2024-06-05 16:48:22 5945 DEBUG _validate_X_layers_raw_contents [enter]: fa3893cb-d420-42ac-8263-09719a26102e\n", - "2024-06-05 16:48:22 5878 DEBUG _validate_X_layers_raw_contents [enter]: 9624a105-319c-4abf-b10b-d96ce1650100\n", - "2024-06-05 16:48:22 5941 DEBUG _validate_X_layers_raw_contents [enter]: c6f6e674-b59d-46cf-8525-73f64f9eef8c\n", - "2024-06-05 16:48:22 5940 DEBUG _validate_X_layers_raw_contents [enter]: 07998bf8-d070-41bb-a584-f8bdd1193aef\n", - "2024-06-05 16:48:22 5946 DEBUG _validate_X_layers_raw_contents [enter]: 53e343af-979c-4525-a705-1b9d1a1fee14\n", - "2024-06-05 16:48:22 5944 DEBUG _validate_X_layers_raw_contents [enter]: 6ab91271-5f48-4e98-92ef-d02ee21e63e1\n", - "2024-06-05 16:48:22 5948 DEBUG _validate_X_layers_presence_general [enter]\n", - "2024-06-05 16:48:22 5943 DEBUG validate_internal_consistency [enter]\n", - "2024-06-05 16:48:22 5948 INFO _validate_X_layers_presence_general [exit, 0.19s]\n", - "2024-06-05 16:48:22 5948 DEBUG _validate_axis_dataframes [enter]: c6f6e674-b59d-46cf-8525-73f64f9eef8c\n", - "2024-06-05 16:48:22 5943 INFO validate_internal_consistency [exit, 0.53s]\n", - "2024-06-05 16:48:22 5943 DEBUG _validate_X_layers_presence [enter]: c6f6e674-b59d-46cf-8525-73f64f9eef8c\n", - "2024-06-05 16:48:22 5943 INFO _validate_X_layers_presence [exit, 0.15s]: c6f6e674-b59d-46cf-8525-73f64f9eef8c\n", - "2024-06-05 16:48:22 5943 DEBUG _validate_X_layers_presence [enter]: fa3893cb-d420-42ac-8263-09719a26102e\n", - "2024-06-05 16:48:23 5943 INFO _validate_X_layers_presence [exit, 0.11s]: fa3893cb-d420-42ac-8263-09719a26102e\n", - "2024-06-05 16:48:23 5943 DEBUG _validate_X_layers_presence [enter]: 07998bf8-d070-41bb-a584-f8bdd1193aef\n", - "2024-06-05 16:48:23 5943 INFO _validate_X_layers_presence [exit, 0.09s]: 07998bf8-d070-41bb-a584-f8bdd1193aef\n", - "2024-06-05 16:48:23 5943 DEBUG _validate_X_layers_presence [enter]: 9624a105-319c-4abf-b10b-d96ce1650100\n", - "2024-06-05 16:48:23 5943 INFO _validate_X_layers_presence [exit, 0.17s]: 9624a105-319c-4abf-b10b-d96ce1650100\n", - "2024-06-05 16:48:23 5943 DEBUG _validate_X_layers_presence [enter]: 53e343af-979c-4525-a705-1b9d1a1fee14\n", - "2024-06-05 16:48:23 5943 INFO _validate_X_layers_presence [exit, 0.08s]: 53e343af-979c-4525-a705-1b9d1a1fee14\n", - "2024-06-05 16:48:23 5943 DEBUG _validate_X_layers_presence [enter]: 6ab91271-5f48-4e98-92ef-d02ee21e63e1\n", - "2024-06-05 16:48:23 5948 INFO _validate_axis_dataframes [exit, 0.66s]: c6f6e674-b59d-46cf-8525-73f64f9eef8c\n", - "2024-06-05 16:48:23 5948 DEBUG _validate_axis_dataframes [enter]: fa3893cb-d420-42ac-8263-09719a26102e\n", - "2024-06-05 16:48:23 5943 INFO _validate_X_layers_presence [exit, 0.13s]: 6ab91271-5f48-4e98-92ef-d02ee21e63e1\n", - "2024-06-05 16:48:23 5943 DEBUG _validate_X_layers_presence [enter]: c63d5cb4-1046-4948-a188-e6af50ef90f4\n", - "2024-06-05 16:48:23 5878 INFO _validate_X_layers_raw_contents [exit, 1.31s]: 9624a105-319c-4abf-b10b-d96ce1650100\n", - "2024-06-05 16:48:23 5943 INFO _validate_X_layers_presence [exit, 0.05s]: c63d5cb4-1046-4948-a188-e6af50ef90f4\n", - "2024-06-05 16:48:23 5943 DEBUG _validate_X_layers_presence [enter]: 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed\n", - "2024-06-05 16:48:23 5943 INFO _validate_X_layers_presence [exit, 0.09s]: 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed\n", - "2024-06-05 16:48:23 5943 DEBUG _validate_X_layers_normalized [enter]: homo_sapiens rows [0, 32000)\n", - "2024-06-05 16:48:23 5948 INFO _validate_axis_dataframes [exit, 0.37s]: fa3893cb-d420-42ac-8263-09719a26102e\n", - "2024-06-05 16:48:23 5948 DEBUG _validate_axis_dataframes [enter]: 07998bf8-d070-41bb-a584-f8bdd1193aef\n", - "2024-06-05 16:48:24 5948 INFO _validate_axis_dataframes [exit, 0.37s]: 07998bf8-d070-41bb-a584-f8bdd1193aef\n", - "2024-06-05 16:48:24 5948 DEBUG _validate_axis_dataframes [enter]: 9624a105-319c-4abf-b10b-d96ce1650100\n", - "2024-06-05 16:48:24 5948 INFO _validate_axis_dataframes [exit, 0.27s]: 9624a105-319c-4abf-b10b-d96ce1650100\n", - "2024-06-05 16:48:24 5948 DEBUG _validate_axis_dataframes [enter]: 53e343af-979c-4525-a705-1b9d1a1fee14\n", - "2024-06-05 16:48:24 5944 INFO _validate_X_layers_raw_contents [exit, 2.34s]: 6ab91271-5f48-4e98-92ef-d02ee21e63e1\n", - "2024-06-05 16:48:24 5945 INFO _validate_X_layers_raw_contents [exit, 2.45s]: fa3893cb-d420-42ac-8263-09719a26102e\n", - "2024-06-05 16:48:24 5948 INFO _validate_axis_dataframes [exit, 0.28s]: 53e343af-979c-4525-a705-1b9d1a1fee14\n", - "2024-06-05 16:48:24 5948 DEBUG _validate_axis_dataframes [enter]: 6ab91271-5f48-4e98-92ef-d02ee21e63e1\n", - "2024-06-05 16:48:24 5940 INFO _validate_X_layers_raw_contents [exit, 2.55s]: 07998bf8-d070-41bb-a584-f8bdd1193aef\n", - "2024-06-05 16:48:24 5942 INFO _validate_X_layers_raw_contents [exit, 2.63s]: 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed\n", - "2024-06-05 16:48:24 5941 INFO _validate_X_layers_raw_contents [exit, 2.63s]: c6f6e674-b59d-46cf-8525-73f64f9eef8c\n", - "2024-06-05 16:48:24 5948 INFO _validate_axis_dataframes [exit, 0.23s]: 6ab91271-5f48-4e98-92ef-d02ee21e63e1\n", - "2024-06-05 16:48:24 5948 DEBUG _validate_axis_dataframes [enter]: c63d5cb4-1046-4948-a188-e6af50ef90f4\n", - "2024-06-05 16:48:25 5946 INFO _validate_X_layers_raw_contents [exit, 2.76s]: 53e343af-979c-4525-a705-1b9d1a1fee14\n", - "2024-06-05 16:48:25 5947 INFO _validate_X_layers_raw_contents [exit, 2.89s]: c63d5cb4-1046-4948-a188-e6af50ef90f4\n", - "2024-06-05 16:48:25 5948 INFO _validate_axis_dataframes [exit, 0.21s]: c63d5cb4-1046-4948-a188-e6af50ef90f4\n", - "2024-06-05 16:48:25 5948 DEBUG _validate_axis_dataframes [enter]: 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed\n", - "2024-06-05 16:48:25 5948 INFO _validate_axis_dataframes [exit, 0.18s]: 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed\n", - "2024-06-05 16:48:25 5948 DEBUG validate_soma_bounding_box [enter]\n", - "2024-06-05 16:48:25 5948 INFO validate_soma_bounding_box [exit, 0.08s]\n", - "2024-06-05 16:48:25 5948 DEBUG validate_X_layers_schema [enter]\n", - "2024-06-05 16:48:25 5948 INFO validate_X_layers_schema [exit, 0.05s]\n", - "2024-06-05 16:48:25 5944 DEBUG validate_axis_dataframes_global_ids [enter]\n", - "2024-06-05 16:48:25 5944 INFO validate_axis_dataframes_global_ids [exit, 0.08s]\n", - "2024-06-05 16:48:26 5875 INFO System memory: mem-used=21406154752 (62.3%), max-mem-used=21406154752 (62.3%), mem-total=34359738368 load-avg=(15.26, 5.28, 3.51)\n", - "2024-06-05 16:48:31 5943 INFO _validate_X_layers_normalized [exit, 7.41s]: homo_sapiens rows [0, 32000)\n", - "2024-06-05 16:48:31 5943 DEBUG _validate_X_layers_normalized [enter]: homo_sapiens rows [32000, 64000)\n", - "2024-06-05 16:48:32 5943 INFO _validate_X_layers_normalized [exit, 1.90s]: homo_sapiens rows [32000, 64000)\n", - "2024-06-05 16:48:32 5943 DEBUG _validate_X_layers_has_unique_coords [enter]: homo_sapiens, raw, rows [0, 96000)\n", - "2024-06-05 16:48:35 5943 INFO _validate_X_layers_has_unique_coords [exit, 2.12s]: homo_sapiens, raw, rows [0, 96000)\n", - "2024-06-05 16:48:35 5943 DEBUG _validate_X_layers_has_unique_coords [enter]: homo_sapiens, normalized, rows [0, 96000)\n", - "2024-06-05 16:48:37 5943 INFO _validate_X_layers_has_unique_coords [exit, 1.92s]: homo_sapiens, normalized, rows [0, 96000)\n", - "2024-06-05 16:48:37 5875 DEBUG validate_consolidation [enter]\n", - "2024-06-05 16:48:37 5875 INFO validate_consolidation [exit, 0.11s]\n", - "2024-06-05 16:48:37 5875 INFO Validation & consolidation complete.\n", - "2024-06-05 16:48:38,278 - distributed.scheduler - WARNING - Removing worker 'tcp://127.0.0.1:62983' caused the cluster to lose already computed task(s), which will be recomputed elsewhere: {'assert_all-1c2e7cb8-a6b6-47bd-9804-9c565ae29cb2'} (stimulus_id='handle-worker-cleanup-1717631318.278749')\n", - "2024-06-05 16:48:38 5875 INFO Dask cluster shut down\n", - "2024-06-05 16:48:38 5875 INFO Fini\n" + "2024-06-07 15:32:39 39783 DEBUG Setting NUMEXPR_MAX_THREADS environment variable to \"5\"\n", + "2024-06-07 15:32:39 39783 DEBUG Setting OMP_NUM_THREADS environment variable to \"1\"\n", + "2024-06-07 15:32:39 39783 DEBUG Setting OPENBLAS_NUM_THREADS environment variable to \"1\"\n", + "2024-06-07 15:32:39 39783 DEBUG Setting MKL_NUM_THREADS environment variable to \"1\"\n", + "2024-06-07 15:32:39 39783 DEBUG Setting VECLIB_MAXIMUM_THREADS environment variable to \"1\"\n", + "2024-06-07 15:32:39 39783 INFO CensusBuildArgs(working_dir=PosixPath('/Users/psridharan/code/cellxgene-census/ps_stuff/census-builds'), config=CensusBuildConfig(verbose=1, dashboard=True, log_dir='logs', log_file='build.log', reports_dir='reports', consolidate=True, dryrun=False, cellxgene_census_S3_path='s3://cellxgene-data-public/cell-census', cellxgene_census_default_mirror_S3_path='s3://cellxgene-census-public-us-west-2/cell-census', cellxgene_census_S3_replica_path=None, logs_S3_path='s3://cellxgene-data-public-logs/builder', build_tag='test-spatial-build', max_worker_processes=48, host_validation_disable=False, host_validation_min_physical_memory=549755813888, host_validation_min_swap_memory=2199023255552, host_validation_min_free_disk_space=1979120929996, release_cleanup_days=32, dataset_id_blocklist_uri='https://raw.githubusercontent.com/chanzuckerberg/cellxgene-census/main/tools/cellxgene_census_builder/dataset_blocklist.txt', user_agent_prefix='census-builder-', user_agent_environment='unknown', manifest=<_io.TextIOWrapper name='/Users/psridharan/code/cellxgene-census/tools/cellxgene_census_builder/spatial_dev_tools/manifest.csv' mode='r' encoding='UTF-8'>, test_first_n=0), state=CensusBuildState())\n", + "2024-06-07 15:32:39 39783 INFO System memory: mem-used=16792043520 (48.9%), max-mem-used=16792043520 (48.9%), mem-total=34359738368 load-avg=(1.69, 2.01, 2.01)\n", + "2024-06-07 15:32:39 39783 INFO Starting process resource logger with period 15.0\n", + "2024-06-07 15:32:41 39783 INFO Dask client created: \n", + "2024-06-07 15:32:41 39783 INFO Dask client using cluster: LocalCluster(de4babdf, 'tcp://127.0.0.1:65400', workers=10, threads=10)\n", + "2024-06-07 15:32:41 39783 INFO Dashboard link: http://127.0.0.1:8787/status\n", + "2024-06-07 15:32:41 39783 INFO Build step 1 - get source assets - started\n", + "2024-06-07 15:32:41 39783 INFO Loading manifest from file\n", + "2024-06-07 15:32:42 39783 INFO Dataset blocklist found, containing 5 ids.\n", + "2024-06-07 15:32:42 39783 INFO After blocklist and dedup, will load 8 datasets.\n", + "2024-06-07 15:32:42 39783 INFO Starting asset staging to /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads\n", + "2024-06-07 15:32:42 39790 DEBUG Copy complete, url=/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/c63d5cb4-1046-4948-a188-e6af50ef90f4.h5ad, bytes=42809187\n", + "2024-06-07 15:32:42 39792 DEBUG Copy complete, url=/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/53e343af-979c-4525-a705-1b9d1a1fee14.h5ad, bytes=147321916\n", + "2024-06-07 15:32:43 39791 DEBUG Copy complete, url=/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/6ab91271-5f48-4e98-92ef-d02ee21e63e1.h5ad, bytes=28385363\n", + "2024-06-07 15:32:43 39786 DEBUG Copy complete, url=/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/c6f6e674-b59d-46cf-8525-73f64f9eef8c.h5ad, bytes=49260402\n", + "2024-06-07 15:32:43 39794 DEBUG Copy complete, url=/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/9624a105-319c-4abf-b10b-d96ce1650100.h5ad, bytes=126790774\n", + "2024-06-07 15:32:43 39788 DEBUG Copy complete, url=/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/07998bf8-d070-41bb-a584-f8bdd1193aef.h5ad, bytes=637135389\n", + "2024-06-07 15:32:43 39793 DEBUG Copy complete, url=/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/fa3893cb-d420-42ac-8263-09719a26102e.h5ad, bytes=625411595\n", + "2024-06-07 15:32:43 39787 DEBUG Copy complete, url=/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/1bb92cf8-ab3f-4bb0-a722-b241b5d377ed.h5ad, bytes=1112747498\n", + "2024-06-07 15:32:43 39783 INFO Build step 1 - get source assets - finished\n", + "2024-06-07 15:32:43 39783 INFO Build step 2 - Create root collection - started\n", + "2024-06-07 15:32:43 39783 INFO homo_sapiens: create experiment at /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/homo_sapiens\n", + "2024-06-07 15:32:43 39783 INFO mus_musculus: create experiment at /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/mus_musculus\n", + "2024-06-07 15:32:43 39783 INFO homo_sapiens: create experiment at /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens\n", + "2024-06-07 15:32:43 39783 INFO mus_musculus: create experiment at /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/mus_musculus\n", + "2024-06-07 15:32:43 39783 INFO Build step 2 - Create root collection - finished\n", + "2024-06-07 15:32:43 39783 INFO Build step 3 - accumulate obs and var axes - started\n", + "2024-06-07 15:32:43 39790 DEBUG 6ab91271-5f48-4e98-92ef-d02ee21e63e1/homo_sapiens - found 4992 cells\n", + "2024-06-07 15:32:43 39788 DEBUG 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed/homo_sapiens - found 4992 cells\n", + "2024-06-07 15:32:43 39786 DEBUG fa3893cb-d420-42ac-8263-09719a26102e/homo_sapiens - found 4992 cells\n", + "2024-06-07 15:32:43 39793 DEBUG c6f6e674-b59d-46cf-8525-73f64f9eef8c/homo_sapiens - found 4992 cells\n", + "2024-06-07 15:32:43 39791 DEBUG 07998bf8-d070-41bb-a584-f8bdd1193aef/homo_sapiens - found 4992 cells\n", + "2024-06-07 15:32:43 39795 DEBUG c63d5cb4-1046-4948-a188-e6af50ef90f4/homo_sapiens - found 4992 cells\n", + "2024-06-07 15:32:43 39794 DEBUG 9624a105-319c-4abf-b10b-d96ce1650100/homo_sapiens - found 4992 cells\n", + "2024-06-07 15:32:43 39787 DEBUG 53e343af-979c-4525-a705-1b9d1a1fee14/homo_sapiens - found 4992 cells\n", + "2024-06-07 15:32:43 39790 DEBUG 6ab91271-5f48-4e98-92ef-d02ee21e63e1/mus_musculus - found 0 cells\n", + "2024-06-07 15:32:43 39790 DEBUG mus_musculus - H5AD has no data after filtering, skipping 6ab91271-5f48-4e98-92ef-d02ee21e63e1\n", + "2024-06-07 15:32:43 39788 DEBUG 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed/mus_musculus - found 0 cells\n", + "2024-06-07 15:32:43 39788 DEBUG mus_musculus - H5AD has no data after filtering, skipping 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed\n", + "2024-06-07 15:32:43 39786 DEBUG fa3893cb-d420-42ac-8263-09719a26102e/homo_sapiens - found 0 cells\n", + "2024-06-07 15:32:43 39786 DEBUG homo_sapiens - H5AD has no data after filtering, skipping fa3893cb-d420-42ac-8263-09719a26102e\n", + "2024-06-07 15:32:43 39793 DEBUG c6f6e674-b59d-46cf-8525-73f64f9eef8c/homo_sapiens - found 0 cells\n", + "2024-06-07 15:32:43 39793 DEBUG homo_sapiens - H5AD has no data after filtering, skipping c6f6e674-b59d-46cf-8525-73f64f9eef8c\n", + "2024-06-07 15:32:43 39795 DEBUG c63d5cb4-1046-4948-a188-e6af50ef90f4/mus_musculus - found 0 cells\n", + "2024-06-07 15:32:43 39795 DEBUG mus_musculus - H5AD has no data after filtering, skipping c63d5cb4-1046-4948-a188-e6af50ef90f4\n", + "2024-06-07 15:32:43 39791 DEBUG 07998bf8-d070-41bb-a584-f8bdd1193aef/mus_musculus - found 0 cells\n", + "2024-06-07 15:32:43 39791 DEBUG mus_musculus - H5AD has no data after filtering, skipping 07998bf8-d070-41bb-a584-f8bdd1193aef\n", + "2024-06-07 15:32:43 39787 DEBUG 53e343af-979c-4525-a705-1b9d1a1fee14/homo_sapiens - found 0 cells\n", + "2024-06-07 15:32:43 39787 DEBUG homo_sapiens - H5AD has no data after filtering, skipping 53e343af-979c-4525-a705-1b9d1a1fee14\n", + "2024-06-07 15:32:43 39794 DEBUG 9624a105-319c-4abf-b10b-d96ce1650100/homo_sapiens - found 0 cells\n", + "2024-06-07 15:32:43 39794 DEBUG homo_sapiens - H5AD has no data after filtering, skipping 9624a105-319c-4abf-b10b-d96ce1650100\n", + "2024-06-07 15:32:43 39790 DEBUG 6ab91271-5f48-4e98-92ef-d02ee21e63e1/homo_sapiens - found 0 cells\n", + "2024-06-07 15:32:43 39790 DEBUG homo_sapiens - H5AD has no data after filtering, skipping 6ab91271-5f48-4e98-92ef-d02ee21e63e1\n", + "2024-06-07 15:32:44 39795 DEBUG c63d5cb4-1046-4948-a188-e6af50ef90f4/homo_sapiens - found 0 cells\n", + "2024-06-07 15:32:44 39795 DEBUG homo_sapiens - H5AD has no data after filtering, skipping c63d5cb4-1046-4948-a188-e6af50ef90f4\n", + "2024-06-07 15:32:44 39786 DEBUG fa3893cb-d420-42ac-8263-09719a26102e/mus_musculus - found 0 cells\n", + "2024-06-07 15:32:44 39786 DEBUG mus_musculus - H5AD has no data after filtering, skipping fa3893cb-d420-42ac-8263-09719a26102e\n", + "2024-06-07 15:32:44 39788 DEBUG 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed/mus_musculus - found 0 cells\n", + "2024-06-07 15:32:44 39788 DEBUG mus_musculus - H5AD has no data after filtering, skipping 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed\n", + "2024-06-07 15:32:44 39793 DEBUG c6f6e674-b59d-46cf-8525-73f64f9eef8c/mus_musculus - found 0 cells\n", + "2024-06-07 15:32:44 39793 DEBUG mus_musculus - H5AD has no data after filtering, skipping c6f6e674-b59d-46cf-8525-73f64f9eef8c\n", + "2024-06-07 15:32:44 39791 DEBUG 07998bf8-d070-41bb-a584-f8bdd1193aef/mus_musculus - found 0 cells\n", + "2024-06-07 15:32:44 39791 DEBUG mus_musculus - H5AD has no data after filtering, skipping 07998bf8-d070-41bb-a584-f8bdd1193aef\n", + "2024-06-07 15:32:44 39787 DEBUG 53e343af-979c-4525-a705-1b9d1a1fee14/mus_musculus - found 0 cells\n", + "2024-06-07 15:32:44 39787 DEBUG mus_musculus - H5AD has no data after filtering, skipping 53e343af-979c-4525-a705-1b9d1a1fee14\n", + "2024-06-07 15:32:44 39794 DEBUG 9624a105-319c-4abf-b10b-d96ce1650100/mus_musculus - found 0 cells\n", + "2024-06-07 15:32:44 39794 DEBUG mus_musculus - H5AD has no data after filtering, skipping 9624a105-319c-4abf-b10b-d96ce1650100\n", + "2024-06-07 15:32:44 39790 DEBUG 6ab91271-5f48-4e98-92ef-d02ee21e63e1/mus_musculus - found 0 cells\n", + "2024-06-07 15:32:44 39790 DEBUG mus_musculus - H5AD has no data after filtering, skipping 6ab91271-5f48-4e98-92ef-d02ee21e63e1\n", + "2024-06-07 15:32:44 39795 DEBUG c63d5cb4-1046-4948-a188-e6af50ef90f4/mus_musculus - found 0 cells\n", + "2024-06-07 15:32:44 39795 DEBUG mus_musculus - H5AD has no data after filtering, skipping c63d5cb4-1046-4948-a188-e6af50ef90f4\n", + "2024-06-07 15:32:44 39786 DEBUG fa3893cb-d420-42ac-8263-09719a26102e/mus_musculus - found 0 cells\n", + "2024-06-07 15:32:44 39786 DEBUG mus_musculus - H5AD has no data after filtering, skipping fa3893cb-d420-42ac-8263-09719a26102e\n", + "2024-06-07 15:32:44 39788 DEBUG 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed/homo_sapiens - found 0 cells\n", + "2024-06-07 15:32:44 39788 DEBUG homo_sapiens - H5AD has no data after filtering, skipping 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed\n", + "2024-06-07 15:32:44 39791 DEBUG 07998bf8-d070-41bb-a584-f8bdd1193aef/homo_sapiens - found 0 cells\n", + "2024-06-07 15:32:44 39791 DEBUG homo_sapiens - H5AD has no data after filtering, skipping 07998bf8-d070-41bb-a584-f8bdd1193aef\n", + "2024-06-07 15:32:44 39793 DEBUG c6f6e674-b59d-46cf-8525-73f64f9eef8c/mus_musculus - found 0 cells\n", + "2024-06-07 15:32:44 39793 DEBUG mus_musculus - H5AD has no data after filtering, skipping c6f6e674-b59d-46cf-8525-73f64f9eef8c\n", + "2024-06-07 15:32:44 39787 DEBUG 53e343af-979c-4525-a705-1b9d1a1fee14/mus_musculus - found 0 cells\n", + "2024-06-07 15:32:44 39787 DEBUG mus_musculus - H5AD has no data after filtering, skipping 53e343af-979c-4525-a705-1b9d1a1fee14\n", + "2024-06-07 15:32:44 39794 DEBUG 9624a105-319c-4abf-b10b-d96ce1650100/mus_musculus - found 0 cells\n", + "2024-06-07 15:32:44 39794 DEBUG mus_musculus - H5AD has no data after filtering, skipping 9624a105-319c-4abf-b10b-d96ce1650100\n", + "2024-06-07 15:32:44 39783 INFO Build step 3 - axis accumulation complete\n", + "2024-06-07 15:32:44 39783 INFO Build step 3 - accumulate obs and var axes - finished\n", + "2024-06-07 15:32:44 39783 INFO Scaling cluster to 1 workers.\n", + "2024-06-07 15:32:44 39783 INFO Build step 4 - Populate X layers - started\n", + "2024-06-07 15:32:44 39783 INFO homo_sapiens: create X layers\n", + "2024-06-07 15:32:44 39783 INFO mus_musculus: create X layers\n", + "2024-06-07 15:32:44 39783 INFO homo_sapiens: create X layers\n", + "2024-06-07 15:32:44 39783 INFO mus_musculus: create X layers\n", + "2024-06-07 15:32:44 39786 DEBUG dispatch_X_chunk [enter]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/1bb92cf8-ab3f-4bb0-a722-b241b5d377ed.h5ad, 0\n", + "2024-06-07 15:32:44 39786 INFO processing X /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/1bb92cf8-ab3f-4bb0-a722-b241b5d377ed.h5ad, 0, chunk 0 of 1\n", + "2024-06-07 15:32:49 39786 INFO dispatch_X_chunk [exit, 5.02s]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/1bb92cf8-ab3f-4bb0-a722-b241b5d377ed.h5ad, 0\n", + "2024-06-07 15:32:49 39786 DEBUG dispatch_X_chunk [enter]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/c63d5cb4-1046-4948-a188-e6af50ef90f4.h5ad, 0\n", + "2024-06-07 15:32:49 39786 INFO processing X /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/c63d5cb4-1046-4948-a188-e6af50ef90f4.h5ad, 0, chunk 0 of 1\n", + "2024-06-07 15:32:54 39786 INFO dispatch_X_chunk [exit, 4.33s]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/c63d5cb4-1046-4948-a188-e6af50ef90f4.h5ad, 0\n", + "2024-06-07 15:32:54 39786 DEBUG dispatch_X_chunk [enter]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/6ab91271-5f48-4e98-92ef-d02ee21e63e1.h5ad, 0\n", + "2024-06-07 15:32:54 39786 INFO processing X /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/6ab91271-5f48-4e98-92ef-d02ee21e63e1.h5ad, 0, chunk 0 of 1\n", + "2024-06-07 15:32:54 39783 INFO System memory: mem-used=18183700480 (52.9%), max-mem-used=18183700480 (52.9%), mem-total=34359738368 load-avg=(2.34, 2.12, 2.05)\n", + "2024-06-07 15:32:57 39786 INFO dispatch_X_chunk [exit, 2.93s]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/6ab91271-5f48-4e98-92ef-d02ee21e63e1.h5ad, 0\n", + "2024-06-07 15:32:57 39786 DEBUG dispatch_X_chunk [enter]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/53e343af-979c-4525-a705-1b9d1a1fee14.h5ad, 0\n", + "2024-06-07 15:32:57 39786 INFO processing X /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/53e343af-979c-4525-a705-1b9d1a1fee14.h5ad, 0, chunk 0 of 1\n", + "2024-06-07 15:33:00 39786 INFO dispatch_X_chunk [exit, 3.70s]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/53e343af-979c-4525-a705-1b9d1a1fee14.h5ad, 0\n", + "2024-06-07 15:33:00 39786 DEBUG dispatch_X_chunk [enter]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/9624a105-319c-4abf-b10b-d96ce1650100.h5ad, 0\n", + "2024-06-07 15:33:00 39786 INFO processing X /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/9624a105-319c-4abf-b10b-d96ce1650100.h5ad, 0, chunk 0 of 1\n", + "2024-06-07 15:33:02 39786 INFO dispatch_X_chunk [exit, 1.95s]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/9624a105-319c-4abf-b10b-d96ce1650100.h5ad, 0\n", + "2024-06-07 15:33:02 39786 DEBUG dispatch_X_chunk [enter]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/07998bf8-d070-41bb-a584-f8bdd1193aef.h5ad, 0\n", + "2024-06-07 15:33:02 39786 INFO processing X /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/07998bf8-d070-41bb-a584-f8bdd1193aef.h5ad, 0, chunk 0 of 1\n", + "2024-06-07 15:33:06 39786 INFO dispatch_X_chunk [exit, 3.04s]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/07998bf8-d070-41bb-a584-f8bdd1193aef.h5ad, 0\n", + "2024-06-07 15:33:06 39786 DEBUG dispatch_X_chunk [enter]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/fa3893cb-d420-42ac-8263-09719a26102e.h5ad, 0\n", + "2024-06-07 15:33:06 39786 INFO processing X /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/fa3893cb-d420-42ac-8263-09719a26102e.h5ad, 0, chunk 0 of 1\n", + "2024-06-07 15:33:09 39783 INFO System memory: mem-used=18928959488 (55.1%), max-mem-used=18928959488 (55.1%), mem-total=34359738368 load-avg=(2.19, 2.1, 2.04)\n", + "2024-06-07 15:33:09 39786 INFO dispatch_X_chunk [exit, 3.45s]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/fa3893cb-d420-42ac-8263-09719a26102e.h5ad, 0\n", + "2024-06-07 15:33:09 39786 DEBUG dispatch_X_chunk [enter]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/c6f6e674-b59d-46cf-8525-73f64f9eef8c.h5ad, 0\n", + "2024-06-07 15:33:09 39786 INFO processing X /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/c6f6e674-b59d-46cf-8525-73f64f9eef8c.h5ad, 0, chunk 0 of 1\n", + "2024-06-07 15:33:13 39786 INFO dispatch_X_chunk [exit, 3.55s]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/c6f6e674-b59d-46cf-8525-73f64f9eef8c.h5ad, 0\n", + "2024-06-07 15:33:13 39783 INFO Save presence matrix for homo_sapiens - start\n", + "2024-06-07 15:33:13 39783 INFO Save presence matrix for homo_sapiens - finish\n", + "2024-06-07 15:33:13 39783 INFO Save presence matrix for mus_musculus - start\n", + "2024-06-07 15:33:13 39783 INFO Save presence matrix for mus_musculus - finish\n", + "2024-06-07 15:33:13 39783 INFO Save presence matrix for homo_sapiens - start\n", + "2024-06-07 15:33:13 39783 INFO Save presence matrix for homo_sapiens - finish\n", + "2024-06-07 15:33:13 39783 INFO Save presence matrix for mus_musculus - start\n", + "2024-06-07 15:33:13 39783 INFO Save presence matrix for mus_musculus - finish\n", + "2024-06-07 15:33:13 39783 INFO Build step 4 - Populate X layers - finished\n", + "2024-06-07 15:33:13 39783 INFO Build step 5 - Save axis and summary info - started\n", + "2024-06-07 15:33:13 39783 INFO homo_sapiens: writing obs dataframe\n", + "2024-06-07 15:33:13 39783 INFO homo_sapiens: empty obs dataframe\n", + "2024-06-07 15:33:13 39783 INFO homo_sapiens: writing var dataframe\n", + "2024-06-07 15:33:13 39783 INFO homo_sapiens: empty var dataframe\n", + "2024-06-07 15:33:13 39783 INFO mus_musculus: writing obs dataframe\n", + "2024-06-07 15:33:13 39783 INFO mus_musculus: empty obs dataframe\n", + "2024-06-07 15:33:13 39783 INFO mus_musculus: writing var dataframe\n", + "2024-06-07 15:33:13 39783 INFO mus_musculus: empty var dataframe\n", + "2024-06-07 15:33:13 39783 INFO homo_sapiens: writing obs dataframe\n", + "2024-06-07 15:33:13 39783 DEBUG experiment homo_sapiens obs = (39936, 30)\n", + "2024-06-07 15:33:13 39783 INFO homo_sapiens: writing var dataframe\n", + "2024-06-07 15:33:13 39783 DEBUG experiment homo_sapiens var = (36972, 6)\n", + "2024-06-07 15:33:13 39783 INFO mus_musculus: writing obs dataframe\n", + "2024-06-07 15:33:13 39783 INFO mus_musculus: empty obs dataframe\n", + "2024-06-07 15:33:13 39783 INFO mus_musculus: writing var dataframe\n", + "2024-06-07 15:33:13 39783 INFO mus_musculus: empty var dataframe\n", + "2024-06-07 15:33:13 39783 INFO Creating dataset_manifest\n", + "2024-06-07 15:33:13 39783 INFO Creating census_summary_cell_counts\n", + "2024-06-07 15:33:13 39783 INFO Creating census summary\n", + "2024-06-07 15:33:13 39783 INFO Create census organisms dataframe\n", + "2024-06-07 15:33:13 39783 INFO Build step 5 - Save axis and summary info - finished\n", + "2024-06-07 15:33:13 39783 INFO tiledb_soma_1969_work_around: deleting bounding box from file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms/RNA/feature_dataset_presence_matrix\n", + "2024-06-07 15:33:13 39783 INFO tiledb_soma_1969_work_around: deleting bounding box from file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms/RNA/X/raw\n", + "2024-06-07 15:33:13 39783 INFO tiledb_soma_1969_work_around: deleting bounding box from file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms/RNA/X/normalized\n", + "2024-06-07 15:33:13 39783 INFO Scaling cluster to 10 workers.\n", + "2024-06-07 15:33:13 39783 INFO Consolidate: found 36 TileDB objects to consolidate\n", + "2024-06-07 15:33:13 39783 INFO Consolidate: 36 consolidation jobs queued\n", + "2024-06-07 15:33:13 39786 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_info/datasets\n", + "2024-06-07 15:33:13 39786 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_info/datasets\n", + "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_info/summary_cell_counts\n", + "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] finish, 0.04 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_info/summary_cell_counts\n", + "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_info/summary\n", + "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] finish, 0.02 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_info/summary\n", + "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_info/organisms\n", + "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] finish, 0.02 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_info/organisms\n", + "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_info\n", + "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_info\n", + "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/homo_sapiens/obs\n", + "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/homo_sapiens/obs\n", + "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/homo_sapiens/ms/RNA/var\n", + "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] finish, 0.02 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/homo_sapiens/ms/RNA/var\n", + "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/homo_sapiens/ms/RNA/X\n", + "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] finish, 0.02 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/homo_sapiens/ms/RNA/X\n", + "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/homo_sapiens/ms/RNA\n", + "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/homo_sapiens/ms/RNA\n", + "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/homo_sapiens/ms\n", + "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/homo_sapiens/ms\n", + "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/homo_sapiens\n", + "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/homo_sapiens\n", + "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/mus_musculus/obs\n", + "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] finish, 0.04 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/mus_musculus/obs\n", + "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/mus_musculus/ms/RNA/var\n", + "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] finish, 0.04 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/mus_musculus/ms/RNA/var\n", + "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/mus_musculus/ms/RNA/X\n", + "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/mus_musculus/ms/RNA/X\n", + "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/mus_musculus/ms/RNA\n", + "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/mus_musculus/ms/RNA\n", + "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/mus_musculus/ms\n", + "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/mus_musculus/ms\n", + "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/mus_musculus\n", + "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/mus_musculus\n", + "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data\n", + "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] finish, 0.00 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data\n", + "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/obs\n", + "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] finish, 0.12 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/obs\n", + "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms/RNA/var\n", + "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] finish, 0.03 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms/RNA/var\n", + "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms/RNA/feature_dataset_presence_matrix\n", + "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] finish, 0.03 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms/RNA/feature_dataset_presence_matrix\n", + "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms/RNA/X/raw\n", + "2024-06-07 15:33:16 39866 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms/RNA/X\n", + "2024-06-07 15:33:16 39863 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms\n", + "2024-06-07 15:33:16 39864 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens\n", + "2024-06-07 15:33:16 39863 INFO Consolidate[vacuum=True] finish, 0.02 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms\n", + "2024-06-07 15:33:16 39864 INFO Consolidate[vacuum=True] finish, 0.02 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens\n", + "2024-06-07 15:33:16 39866 INFO Consolidate[vacuum=True] finish, 0.02 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms/RNA/X\n", + "2024-06-07 15:33:16 39861 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/mus_musculus/ms/RNA\n", + "2024-06-07 15:33:16 39868 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial\n", + "2024-06-07 15:33:16 39860 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/mus_musculus/spatial\n", + "2024-06-07 15:33:16 39860 INFO Consolidate[vacuum=True] finish, 0.00 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/mus_musculus/spatial\n", + "2024-06-07 15:33:16 39863 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/spatial\n", + "2024-06-07 15:33:16 39861 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/mus_musculus/ms/RNA\n", + "2024-06-07 15:33:16 39862 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/mus_musculus/ms/RNA/var\n", + "2024-06-07 15:33:16 39861 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/mus_musculus/ms\n", + "2024-06-07 15:33:16 39866 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms/RNA\n", + "2024-06-07 15:33:16 39864 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/mus_musculus/obs\n", + "2024-06-07 15:33:16 39868 INFO Consolidate[vacuum=True] finish, 0.02 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial\n", + "2024-06-07 15:33:16 39861 INFO Consolidate[vacuum=True] finish, 0.02 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/mus_musculus/ms\n", + "2024-06-07 15:33:16 39860 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/mus_musculus\n", + "2024-06-07 15:33:16 39860 INFO Consolidate[vacuum=True] finish, 0.00 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/mus_musculus\n", + "2024-06-07 15:33:16 39866 INFO Consolidate[vacuum=True] finish, 0.02 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms/RNA\n", + "2024-06-07 15:33:16 39863 INFO Consolidate[vacuum=True] finish, 0.03 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/spatial\n", + "2024-06-07 15:33:16 39864 INFO Consolidate[vacuum=True] finish, 0.02 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/mus_musculus/obs\n", + "2024-06-07 15:33:16 39862 INFO Consolidate[vacuum=True] finish, 0.03 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/mus_musculus/ms/RNA/var\n", + "2024-06-07 15:33:16 39862 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/mus_musculus/ms/RNA/X\n", + "2024-06-07 15:33:16 39862 INFO Consolidate[vacuum=True] finish, 0.00 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/mus_musculus/ms/RNA/X\n", + "2024-06-07 15:33:16 39863 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms/RNA/X/normalized\n", + "2024-06-07 15:33:18 39786 INFO Consolidate[vacuum=True] finish, 3.72 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms/RNA/X/raw\n", + "2024-06-07 15:33:20 39863 INFO Consolidate[vacuum=True] finish, 3.39 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms/RNA/X/normalized\n", + "2024-06-07 15:33:20 39783 INFO Validation of SOMA objects - start\n", + "2024-06-07 15:33:20 39783 DEBUG validate_directory_structure [enter]\n", + "2024-06-07 15:33:20 39783 INFO validate_directory_structure [exit, 0.00s]\n", + "2024-06-07 15:33:20 39783 DEBUG validate_relative_path [enter]\n", + "2024-06-07 15:33:20 39783 INFO validate_relative_path [exit, 0.03s]\n", + "2024-06-07 15:33:20 39783 DEBUG validate_axis_dataframes_schema [enter]\n", + "2024-06-07 15:33:20 39783 INFO validate_axis_dataframes_schema [exit, 0.03s]\n", + "2024-06-07 15:33:20 39783 DEBUG validate_manifest_contents [enter]\n", + "2024-06-07 15:33:20 39783 INFO validate_manifest_contents [exit, 0.00s]\n", + "2024-06-07 15:33:20 39864 DEBUG _validate_X_layers_raw_contents [enter]: 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed\n", + "2024-06-07 15:33:20 39861 DEBUG _validate_X_layers_raw_contents [enter]: 9624a105-319c-4abf-b10b-d96ce1650100\n", + "2024-06-07 15:33:20 39786 DEBUG _validate_X_layers_raw_contents [enter]: c6f6e674-b59d-46cf-8525-73f64f9eef8c\n", + "2024-06-07 15:33:20 39866 DEBUG _validate_X_layers_raw_contents [enter]: fa3893cb-d420-42ac-8263-09719a26102e\n", + "2024-06-07 15:33:20 39868 DEBUG _validate_X_layers_raw_contents [enter]: 53e343af-979c-4525-a705-1b9d1a1fee14\n", + "2024-06-07 15:33:20 39867 DEBUG _validate_X_layers_raw_contents [enter]: 6ab91271-5f48-4e98-92ef-d02ee21e63e1\n", + "2024-06-07 15:33:20 39865 DEBUG _validate_X_layers_raw_contents [enter]: 07998bf8-d070-41bb-a584-f8bdd1193aef\n", + "2024-06-07 15:33:20 39863 DEBUG _validate_X_layers_raw_contents [enter]: c63d5cb4-1046-4948-a188-e6af50ef90f4\n", + "2024-06-07 15:33:20 39862 DEBUG _validate_X_layers_presence_general [enter]\n", + "2024-06-07 15:33:20 39860 DEBUG validate_internal_consistency [enter]\n", + "2024-06-07 15:33:20 39864 INFO _validate_X_layers_raw_contents [exit, 0.07s]: 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed\n", + "2024-06-07 15:33:20 39866 INFO _validate_X_layers_raw_contents [exit, 0.08s]: fa3893cb-d420-42ac-8263-09719a26102e\n", + "2024-06-07 15:33:20 39786 INFO _validate_X_layers_raw_contents [exit, 0.08s]: c6f6e674-b59d-46cf-8525-73f64f9eef8c\n", + "2024-06-07 15:33:20 39861 INFO _validate_X_layers_raw_contents [exit, 0.10s]: 9624a105-319c-4abf-b10b-d96ce1650100\n", + "2024-06-07 15:33:20 39867 INFO _validate_X_layers_raw_contents [exit, 0.09s]: 6ab91271-5f48-4e98-92ef-d02ee21e63e1\n", + "2024-06-07 15:33:20 39868 INFO _validate_X_layers_raw_contents [exit, 0.10s]: 53e343af-979c-4525-a705-1b9d1a1fee14\n", + "2024-06-07 15:33:20 39865 INFO _validate_X_layers_raw_contents [exit, 0.11s]: 07998bf8-d070-41bb-a584-f8bdd1193aef\n", + "2024-06-07 15:33:20 39863 INFO _validate_X_layers_raw_contents [exit, 0.11s]: c63d5cb4-1046-4948-a188-e6af50ef90f4\n", + "2024-06-07 15:33:20 39862 INFO _validate_X_layers_presence_general [exit, 0.13s]\n", + "2024-06-07 15:33:20 39860 INFO validate_internal_consistency [exit, 0.20s]\n", + "2024-06-07 15:33:20 39860 DEBUG _validate_X_layers_presence [enter]: c6f6e674-b59d-46cf-8525-73f64f9eef8c\n", + "2024-06-07 15:33:20 39863 DEBUG _validate_axis_dataframes [enter]: c6f6e674-b59d-46cf-8525-73f64f9eef8c\n", + "2024-06-07 15:33:20 39860 INFO _validate_X_layers_presence [exit, 0.05s]: c6f6e674-b59d-46cf-8525-73f64f9eef8c\n", + "2024-06-07 15:33:20 39860 DEBUG _validate_X_layers_presence [enter]: fa3893cb-d420-42ac-8263-09719a26102e\n", + "2024-06-07 15:33:21 39860 INFO _validate_X_layers_presence [exit, 0.06s]: fa3893cb-d420-42ac-8263-09719a26102e\n", + "2024-06-07 15:33:21 39860 DEBUG _validate_X_layers_presence [enter]: 07998bf8-d070-41bb-a584-f8bdd1193aef\n", + "2024-06-07 15:33:21 39860 INFO _validate_X_layers_presence [exit, 0.06s]: 07998bf8-d070-41bb-a584-f8bdd1193aef\n", + "2024-06-07 15:33:21 39860 DEBUG _validate_X_layers_presence [enter]: 9624a105-319c-4abf-b10b-d96ce1650100\n", + "2024-06-07 15:33:21 39860 INFO _validate_X_layers_presence [exit, 0.08s]: 9624a105-319c-4abf-b10b-d96ce1650100\n", + "2024-06-07 15:33:21 39860 DEBUG _validate_X_layers_presence [enter]: 53e343af-979c-4525-a705-1b9d1a1fee14\n", + "2024-06-07 15:33:21 39860 INFO _validate_X_layers_presence [exit, 0.06s]: 53e343af-979c-4525-a705-1b9d1a1fee14\n", + "2024-06-07 15:33:21 39860 DEBUG _validate_X_layers_presence [enter]: 6ab91271-5f48-4e98-92ef-d02ee21e63e1\n", + "2024-06-07 15:33:21 39860 INFO _validate_X_layers_presence [exit, 0.05s]: 6ab91271-5f48-4e98-92ef-d02ee21e63e1\n", + "2024-06-07 15:33:21 39860 DEBUG _validate_X_layers_presence [enter]: c63d5cb4-1046-4948-a188-e6af50ef90f4\n", + "2024-06-07 15:33:21 39860 INFO _validate_X_layers_presence [exit, 0.05s]: c63d5cb4-1046-4948-a188-e6af50ef90f4\n", + "2024-06-07 15:33:21 39860 DEBUG _validate_X_layers_presence [enter]: 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed\n", + "2024-06-07 15:33:21 39863 INFO _validate_axis_dataframes [exit, 0.41s]: c6f6e674-b59d-46cf-8525-73f64f9eef8c\n", + "2024-06-07 15:33:21 39863 DEBUG _validate_axis_dataframes [enter]: fa3893cb-d420-42ac-8263-09719a26102e\n", + "2024-06-07 15:33:21 39860 INFO _validate_X_layers_presence [exit, 0.05s]: 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed\n", + "2024-06-07 15:33:21 39860 DEBUG _validate_X_layers_normalized [enter]: homo_sapiens rows [0, 32000)\n", + "2024-06-07 15:33:21 39860 INFO _validate_X_layers_normalized [exit, 0.02s]: homo_sapiens rows [0, 32000)\n", + "2024-06-07 15:33:21 39860 DEBUG _validate_X_layers_normalized [enter]: homo_sapiens rows [32000, 64000)\n", + "2024-06-07 15:33:21 39860 INFO _validate_X_layers_normalized [exit, 0.01s]: homo_sapiens rows [32000, 64000)\n", + "2024-06-07 15:33:21 39860 DEBUG _validate_X_layers_normalized [enter]: homo_sapiens rows [0, 32000)\n", + "2024-06-07 15:33:21 39863 INFO _validate_axis_dataframes [exit, 0.39s]: fa3893cb-d420-42ac-8263-09719a26102e\n", + "2024-06-07 15:33:21 39863 DEBUG _validate_axis_dataframes [enter]: 07998bf8-d070-41bb-a584-f8bdd1193aef\n", + "2024-06-07 15:33:22 39863 INFO _validate_axis_dataframes [exit, 0.35s]: 07998bf8-d070-41bb-a584-f8bdd1193aef\n", + "2024-06-07 15:33:22 39863 DEBUG _validate_axis_dataframes [enter]: 9624a105-319c-4abf-b10b-d96ce1650100\n", + "2024-06-07 15:33:22 39863 INFO _validate_axis_dataframes [exit, 0.32s]: 9624a105-319c-4abf-b10b-d96ce1650100\n", + "2024-06-07 15:33:22 39863 DEBUG _validate_axis_dataframes [enter]: 53e343af-979c-4525-a705-1b9d1a1fee14\n", + "2024-06-07 15:33:22 39863 INFO _validate_axis_dataframes [exit, 0.34s]: 53e343af-979c-4525-a705-1b9d1a1fee14\n", + "2024-06-07 15:33:22 39863 DEBUG _validate_axis_dataframes [enter]: 6ab91271-5f48-4e98-92ef-d02ee21e63e1\n", + "2024-06-07 15:33:23 39863 INFO _validate_axis_dataframes [exit, 0.32s]: 6ab91271-5f48-4e98-92ef-d02ee21e63e1\n", + "2024-06-07 15:33:23 39863 DEBUG _validate_axis_dataframes [enter]: c63d5cb4-1046-4948-a188-e6af50ef90f4\n", + "2024-06-07 15:33:23 39863 INFO _validate_axis_dataframes [exit, 0.35s]: c63d5cb4-1046-4948-a188-e6af50ef90f4\n", + "2024-06-07 15:33:23 39863 DEBUG _validate_axis_dataframes [enter]: 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed\n", + "2024-06-07 15:33:23 39863 INFO _validate_axis_dataframes [exit, 0.46s]: 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed\n", + "2024-06-07 15:33:23 39863 DEBUG validate_soma_bounding_box [enter]\n", + "2024-06-07 15:33:24 39863 INFO validate_soma_bounding_box [exit, 0.21s]\n", + "2024-06-07 15:33:24 39863 DEBUG validate_axis_dataframes_global_ids [enter]\n", + "2024-06-07 15:33:24 39863 INFO validate_axis_dataframes_global_ids [exit, 0.19s]\n", + "2024-06-07 15:33:24 39863 DEBUG validate_X_layers_schema [enter]\n", + "2024-06-07 15:33:24 39783 INFO System memory: mem-used=20338524160 (59.2%), max-mem-used=20338524160 (59.2%), mem-total=34359738368 load-avg=(6.93, 3.14, 2.42)\n", + "2024-06-07 15:33:24 39863 INFO validate_X_layers_schema [exit, 0.10s]\n", + "2024-06-07 15:33:28 39860 INFO _validate_X_layers_normalized [exit, 7.09s]: homo_sapiens rows [0, 32000)\n", + "2024-06-07 15:33:28 39860 DEBUG _validate_X_layers_normalized [enter]: homo_sapiens rows [32000, 64000)\n", + "2024-06-07 15:33:30 39860 INFO _validate_X_layers_normalized [exit, 2.00s]: homo_sapiens rows [32000, 64000)\n", + "2024-06-07 15:33:30 39860 DEBUG _validate_X_layers_has_unique_coords [enter]: homo_sapiens, raw, rows [0, 96000)\n", + "2024-06-07 15:33:30 39860 INFO _validate_X_layers_has_unique_coords [exit, 0.01s]: homo_sapiens, raw, rows [0, 96000)\n", + "2024-06-07 15:33:30 39860 DEBUG _validate_X_layers_has_unique_coords [enter]: homo_sapiens, normalized, rows [0, 96000)\n", + "2024-06-07 15:33:30 39860 INFO _validate_X_layers_has_unique_coords [exit, 0.01s]: homo_sapiens, normalized, rows [0, 96000)\n", + "2024-06-07 15:33:30 39860 DEBUG _validate_X_layers_has_unique_coords [enter]: homo_sapiens, raw, rows [0, 96000)\n", + "2024-06-07 15:33:32 39860 INFO _validate_X_layers_has_unique_coords [exit, 2.13s]: homo_sapiens, raw, rows [0, 96000)\n", + "2024-06-07 15:33:32 39860 DEBUG _validate_X_layers_has_unique_coords [enter]: homo_sapiens, normalized, rows [0, 96000)\n", + "2024-06-07 15:33:34 39860 INFO _validate_X_layers_has_unique_coords [exit, 1.92s]: homo_sapiens, normalized, rows [0, 96000)\n", + "2024-06-07 15:33:34 39783 DEBUG validate_consolidation [enter]\n", + "2024-06-07 15:33:34 39783 INFO validate_consolidation [exit, 0.16s]\n", + "2024-06-07 15:33:34 39783 INFO Validation & consolidation complete.\n", + "2024-06-07 15:33:35,857 - distributed.scheduler - WARNING - Removing worker 'tcp://127.0.0.1:49198' caused the cluster to lose already computed task(s), which will be recomputed elsewhere: {'assert_all-02830fee-7826-4829-b307-f07daafc4098'} (stimulus_id='handle-worker-cleanup-1717799615.857165')\n", + "2024-06-07 15:33:36 39783 INFO Dask cluster shut down\n", + "2024-06-07 15:33:36 39783 INFO Fini\n" ] } ], @@ -398,65 +484,592 @@ "id": "2ab2bf1a-edd5-49c0-8c23-7465e4e9f9b5", "metadata": {}, "source": [ - "## Query the generated census object\n", - "NOTE: Currently the demo shows summary cell counts but the demo will show case spatial queries once that is availabe" + "## Inspect the census object\n", + "**Work-In-Progress** \n", + "\n", + "_Note that there is a `census_data` and `census_spatial` collection side by side and the `census_spatial` collection also contains a `spatial` collection along with `obs` and `ms`_" ] }, { "cell_type": "code", "execution_count": 10, - "id": "a82ad4cb-b71b-40b0-9954-a1cae985c985", + "id": "10e13cb5-9291-4fb1-99f4-776b870261f4", "metadata": {}, "outputs": [], "source": [ - "import cellxgene_census" + "import tiledbsoma" ] }, { "cell_type": "code", "execution_count": 11, + "id": "ec5d1ffd-ba81-4386-bc26-574652ffdecc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma'" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "soma_root_collection_uri = f\"{census_builder_working_dir}/{census_build_tag}/soma\"\n", + "soma_root_collection_uri" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "51c1e38d-d767-4870-94c6-a9c00630deb1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "soma_root_collection = tiledbsoma.open(soma_root_collection_uri)\n", + "soma_root_collection" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "8de7aafb-30d3-40d7-96c0-5c7a2fe65b02", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "soma_root_collection[\"census_spatial\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "5a78859a-db6a-4ecd-95ff-8c397e8e0875", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "soma_root_collection[\"census_spatial\"][\"homo_sapiens\"]" + ] + }, + { + "cell_type": "markdown", + "id": "a05a3395-b510-4c6b-a5ff-0485535e63aa", + "metadata": {}, + "source": [ + "# Query the census object\n", + "**Work-In-Progress**" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "a82ad4cb-b71b-40b0-9954-a1cae985c985", + "metadata": {}, + "outputs": [], + "source": [ + "import cellxgene_census\n", + "\n", + "census = cellxgene_census.open_soma(uri=soma_root_collection_uri)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, "id": "a685b68b-b6d1-4ab9-a6be-33ba48f365fa", "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - " organism category label \\\n", - "0 Homo sapiens all na \n", - "1 Homo sapiens assay Visium Spatial Gene Expression \n", - "2 Homo sapiens cell_type neuronal receptor cell \n", - "3 Homo sapiens cell_type epithelial cell \n", - "4 Homo sapiens cell_type blood cell \n", - ".. ... ... ... \n", - "62 Homo sapiens tissue primary visual cortex \n", - "63 Homo sapiens tissue_general heart \n", - "64 Homo sapiens tissue_general brain \n", - "65 Homo sapiens tissue_general placenta \n", - "66 Homo sapiens tissue_general liver \n", - "\n", - " ontology_term_id total_cell_count unique_cell_count \n", - "0 na 39936 39936 \n", - "1 EFO:0010961 39936 39936 \n", - "2 CL:0000006 3 3 \n", - "3 CL:0000066 335 335 \n", - "4 CL:0000081 193 193 \n", - ".. ... ... ... \n", - "62 UBERON:0002436 4992 4992 \n", - "63 UBERON:0000948 9984 9984 \n", - "64 UBERON:0000955 9984 9984 \n", - "65 UBERON:0001987 9984 9984 \n", - "66 UBERON:0002107 9984 9984 \n", - "\n", - "[67 rows x 6 columns]\n" - ] + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
soma_joiniddataset_idassay_ontology_term_idcell_typetissueraw_sumnnzraw_mean_nnzraw_variance_nnzn_measured_vars
00c6f6e674-b59d-46cf-8525-73f64f9eef8cEFO:0010961unknowncaudate lobe of liver4113.013762.989099232.39915421082
11c6f6e674-b59d-46cf-8525-73f64f9eef8cEFO:0010961periportal region hepatocytecaudate lobe of liver9551.025513.744022669.16307721082
22c6f6e674-b59d-46cf-8525-73f64f9eef8cEFO:0010961unknowncaudate lobe of liver5874.017953.272423349.79029321082
33c6f6e674-b59d-46cf-8525-73f64f9eef8cEFO:0010961periportal region hepatocytecaudate lobe of liver9659.023684.078970826.86617421082
44c6f6e674-b59d-46cf-8525-73f64f9eef8cEFO:0010961hepatocytecaudate lobe of liver6293.018463.408992342.38493821082
.................................
39931399311bb92cf8-ab3f-4bb0-a722-b241b5d377edEFO:0010961periportal region hepatocytecaudate lobe of liver14401.032914.375874712.89484620713
39932399321bb92cf8-ab3f-4bb0-a722-b241b5d377edEFO:0010961unknowncaudate lobe of liver3369.011402.95526398.84347520713
39933399331bb92cf8-ab3f-4bb0-a722-b241b5d377edEFO:0010961periportal region hepatocytecaudate lobe of liver9040.025833.499806334.45613920713
39934399341bb92cf8-ab3f-4bb0-a722-b241b5d377edEFO:0010961unknowncaudate lobe of liver2679.09732.75334094.60576320713
39935399351bb92cf8-ab3f-4bb0-a722-b241b5d377edEFO:0010961unknowncaudate lobe of liver996.04272.33255329.81872720713
\n", + "

39936 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " soma_joinid dataset_id \\\n", + "0 0 c6f6e674-b59d-46cf-8525-73f64f9eef8c \n", + "1 1 c6f6e674-b59d-46cf-8525-73f64f9eef8c \n", + "2 2 c6f6e674-b59d-46cf-8525-73f64f9eef8c \n", + "3 3 c6f6e674-b59d-46cf-8525-73f64f9eef8c \n", + "4 4 c6f6e674-b59d-46cf-8525-73f64f9eef8c \n", + "... ... ... \n", + "39931 39931 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed \n", + "39932 39932 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed \n", + "39933 39933 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed \n", + "39934 39934 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed \n", + "39935 39935 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed \n", + "\n", + " assay_ontology_term_id cell_type \\\n", + "0 EFO:0010961 unknown \n", + "1 EFO:0010961 periportal region hepatocyte \n", + "2 EFO:0010961 unknown \n", + "3 EFO:0010961 periportal region hepatocyte \n", + "4 EFO:0010961 hepatocyte \n", + "... ... ... \n", + "39931 EFO:0010961 periportal region hepatocyte \n", + "39932 EFO:0010961 unknown \n", + "39933 EFO:0010961 periportal region hepatocyte \n", + "39934 EFO:0010961 unknown \n", + "39935 EFO:0010961 unknown \n", + "\n", + " tissue raw_sum nnz raw_mean_nnz raw_variance_nnz \\\n", + "0 caudate lobe of liver 4113.0 1376 2.989099 232.399154 \n", + "1 caudate lobe of liver 9551.0 2551 3.744022 669.163077 \n", + "2 caudate lobe of liver 5874.0 1795 3.272423 349.790293 \n", + "3 caudate lobe of liver 9659.0 2368 4.078970 826.866174 \n", + "4 caudate lobe of liver 6293.0 1846 3.408992 342.384938 \n", + "... ... ... ... ... ... \n", + "39931 caudate lobe of liver 14401.0 3291 4.375874 712.894846 \n", + "39932 caudate lobe of liver 3369.0 1140 2.955263 98.843475 \n", + "39933 caudate lobe of liver 9040.0 2583 3.499806 334.456139 \n", + "39934 caudate lobe of liver 2679.0 973 2.753340 94.605763 \n", + "39935 caudate lobe of liver 996.0 427 2.332553 29.818727 \n", + "\n", + " n_measured_vars \n", + "0 21082 \n", + "1 21082 \n", + "2 21082 \n", + "3 21082 \n", + "4 21082 \n", + "... ... \n", + "39931 20713 \n", + "39932 20713 \n", + "39933 20713 \n", + "39934 20713 \n", + "39935 20713 \n", + "\n", + "[39936 rows x 10 columns]" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "obs_columns = [\n", + " \"soma_joinid\",\n", + " \"dataset_id\",\n", + " \"assay_ontology_term_id\",\n", + " \"cell_type\",\n", + " \"tissue\",\n", + " \"raw_sum\",\n", + " \"nnz\",\n", + " \"raw_mean_nnz\",\n", + " \"raw_variance_nnz\",\n", + " \"n_measured_vars\",\n", + "]\n", + "obs_df = census[\"census_spatial\"][\"homo_sapiens\"].obs.read(column_names=obs_columns).concat().to_pandas()\n", + "obs_df" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "ad894083-a79b-4395-a836-e3136a28f502", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
soma_joinidfeature_idfeature_namefeature_lengthnnzn_measured_obs
00ENSG00000243485MIR1302-2HG102114992
11ENSG00000237613FAM138A121900
22ENSG00000186092OR4F5261800
33ENSG00000238009ENSG00000238009.637265139936
44ENSG00000239945ENSG00000239945.1131900
.....................
3696736967ENSG00000280081LINC01667416914992
3696836968ENSG00000235609ENSG00000235609.759291479984
3696936969ENSG00000265590CFAP298-TCP10L1932600
3697036970ENSG00000249624IFNAR2-IL10RB3943659984
3697136971ENSG00000249209ENSG00000249209.210781259984
\n", + "

36972 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " soma_joinid feature_id feature_name feature_length nnz \\\n", + "0 0 ENSG00000243485 MIR1302-2HG 1021 1 \n", + "1 1 ENSG00000237613 FAM138A 1219 0 \n", + "2 2 ENSG00000186092 OR4F5 2618 0 \n", + "3 3 ENSG00000238009 ENSG00000238009.6 3726 51 \n", + "4 4 ENSG00000239945 ENSG00000239945.1 1319 0 \n", + "... ... ... ... ... ... \n", + "36967 36967 ENSG00000280081 LINC01667 4169 1 \n", + "36968 36968 ENSG00000235609 ENSG00000235609.7 5929 147 \n", + "36969 36969 ENSG00000265590 CFAP298-TCP10L 19326 0 \n", + "36970 36970 ENSG00000249624 IFNAR2-IL10RB 3943 65 \n", + "36971 36971 ENSG00000249209 ENSG00000249209.2 1078 125 \n", + "\n", + " n_measured_obs \n", + "0 4992 \n", + "1 0 \n", + "2 0 \n", + "3 39936 \n", + "4 0 \n", + "... ... \n", + "36967 4992 \n", + "36968 9984 \n", + "36969 0 \n", + "36970 9984 \n", + "36971 9984 \n", + "\n", + "[36972 rows x 6 columns]" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "with cellxgene_census.open_soma(uri=f\"{census_builder_working_dir}/{census_build_tag}/soma\") as census:\n", - " census_summary_cell_counts = census[\"census_info\"][\"summary_cell_counts\"].read().concat().to_pandas()\n", - " census_summary_cell_counts = census_summary_cell_counts.drop(columns=[\"soma_joinid\"])\n", - " print(census_summary_cell_counts)" + "var_df = census[\"census_spatial\"][\"homo_sapiens\"].ms[\"RNA\"].var.read().concat().to_pandas()\n", + "\n", + "var_df" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "f1b7ec72-de3c-4e35-9301-7a5c332c1c5c", + "metadata": {}, + "outputs": [], + "source": [ + "census.close()" ] } ], diff --git a/tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/build_soma.py b/tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/build_soma.py index b563c993b..20a2fa6c0 100644 --- a/tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/build_soma.py +++ b/tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/build_soma.py @@ -25,6 +25,7 @@ from .globals import ( CENSUS_DATA_NAME, CENSUS_INFO_NAME, + CENSUS_SPATIAL_NAME, SOMA_TileDB_Context, ) from .manifest import load_manifest @@ -153,7 +154,7 @@ def populate_root_collection(root_collection: soma.Collection) -> soma.Collectio root_collection.metadata["git_commit_sha"] = sha # Create sub-collections for experiments, etc. - for n in [CENSUS_INFO_NAME, CENSUS_DATA_NAME]: + for n in [CENSUS_INFO_NAME, CENSUS_DATA_NAME, CENSUS_SPATIAL_NAME]: root_collection.add_new_collection(n) return root_collection @@ -198,7 +199,14 @@ def build_step2_create_root_collection(soma_path: str, experiment_builders: list populate_root_collection(root_collection) for e in experiment_builders: - e.create(census_data=root_collection[CENSUS_DATA_NAME]) + # TODO (spatial): Confirm the decision that we are clearly separating + # experiments containing spatial assays from experiments not containing + # spatial assays. That is, an experiment should never contain assays from + # spatial and non-spatial modalities + if e.specification.is_exclusively_spatial(): + e.create(census_data=root_collection[CENSUS_SPATIAL_NAME]) + else: + e.create(census_data=root_collection[CENSUS_DATA_NAME]) logger.info("Build step 2 - Create root collection - finished") return root_collection diff --git a/tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/experiment_builder.py b/tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/experiment_builder.py index 1410c2a63..c04d871e1 100644 --- a/tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/experiment_builder.py +++ b/tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/experiment_builder.py @@ -25,6 +25,7 @@ from .anndata import AnnDataFilterSpec, AnnDataProxy, open_anndata from .datasets import Dataset from .globals import ( + ALLOWED_SPATIAL_ASSAYS, CENSUS_OBS_PLATFORM_CONFIG, CENSUS_OBS_TABLE_SPEC, CENSUS_VAR_PLATFORM_CONFIG, @@ -109,6 +110,10 @@ def create( """Factory method. Do not instantiate the class directly.""" return cls(name, label, anndata_cell_filter_spec, organism_ontology_term_id) + def is_exclusively_spatial(self) -> bool: + """Returns True if the experiment specification EXCLUSIVELY involves spatial assays.""" + return self.anndata_cell_filter_spec["assay_ontology_term_ids"] == ALLOWED_SPATIAL_ASSAYS + class ExperimentBuilder: """Class that embodies the operators and state to build an Experiment. @@ -143,7 +148,7 @@ def anndata_cell_filter_spec(self) -> AnnDataFilterSpec: return self.specification.anndata_cell_filter_spec def create(self, census_data: soma.Collection) -> None: - """Create experiment within the specified Collection with a single Measurement.""" + """Create experiment within the specified Collection.""" logger.info(f"{self.name}: create experiment at {urlcat(census_data.uri, self.name)}") self.experiment = census_data.add_new_collection(self.name, soma.Experiment) @@ -155,6 +160,10 @@ def create(self, census_data: soma.Collection) -> None: # make measurement and add to ms collection ms.add_new_collection(MEASUREMENT_RNA_NAME, soma.Measurement) + # create `spatial` + if self.specification.is_exclusively_spatial(): + self.experiment.add_new_collection("spatial") + def write_obs_dataframe(self) -> None: logger.info(f"{self.name}: writing obs dataframe") assert self.experiment is not None @@ -661,7 +670,7 @@ def read_and_dispatch_partial_h5ad( if d.dataset_id in eb.dataset_obs_joinid_start for chunk in range(0, eb.dataset_n_obs[d.dataset_id], REDUCE_X_MAJOR_ROW_STRIDE) ] - per_eb_results[eb.name] = ( + per_eb_results[eb.experiment_uri] = ( dask.bag.from_sequence(read_file_chunks) .starmap(read_and_dispatch_partial_h5ad, global_var_joinids=global_var_joinids) .foldby("dataset_id", reduce_X_stats_binop) @@ -685,12 +694,12 @@ def populate_X_layers( per_eb_results = _reduce_X_matrices(assets_path, datasets, experiment_builders) for eb in experiment_builders: - if eb.name not in per_eb_results: + if eb.experiment_uri not in per_eb_results: continue # add per-dataset stats to each per-dataset XReduction eb_result: list[XReduction] = [] - for dataset_id, xreduction in per_eb_results[eb.name]: + for dataset_id, xreduction in per_eb_results[eb.experiment_uri]: assert dataset_id == xreduction["dataset_id"] d = datasets_by_id[dataset_id] eb_result.extend( diff --git a/tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/experiment_specs.py b/tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/experiment_specs.py index d287c4ece..d470cbe23 100644 --- a/tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/experiment_specs.py +++ b/tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/experiment_specs.py @@ -1,7 +1,7 @@ import functools from .experiment_builder import ExperimentBuilder, ExperimentSpecification -from .globals import RNA_SEQ +from .globals import ALLOWED_SPATIAL_ASSAYS, RNA_SEQ @functools.cache @@ -30,6 +30,25 @@ def make_experiment_specs() -> list[ExperimentSpecification]: }, organism_ontology_term_id="NCBITaxon:10090", ), + # Experiments for spatial assays + ExperimentSpecification.create( + name="homo_sapiens", + label="Homo sapiens", + anndata_cell_filter_spec={ + "organism_ontology_term_id": "NCBITaxon:9606", + "assay_ontology_term_ids": ALLOWED_SPATIAL_ASSAYS, + }, + organism_ontology_term_id="NCBITaxon:9606", + ), + ExperimentSpecification.create( + name="mus_musculus", + label="Mus musculus", + anndata_cell_filter_spec={ + "organism_ontology_term_id": "NCBITaxon:10090", + "assay_ontology_term_ids": ALLOWED_SPATIAL_ASSAYS, + }, + organism_ontology_term_id="NCBITaxon:10090", + ), ] diff --git a/tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/globals.py b/tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/globals.py index 801717f98..1dfc67bd2 100644 --- a/tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/globals.py +++ b/tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/globals.py @@ -52,6 +52,9 @@ # top-level SOMA collection CENSUS_DATA_NAME = "census_data" +# top-level SOMA collection +CENSUS_SPATIAL_NAME = "census_spatial" + # "census_info"/"summary_cell_counts" SOMA Dataframe CENSUS_SUMMARY_CELL_COUNTS_NAME = "summary_cell_counts" # object name @@ -329,7 +332,6 @@ "EFO:0010713", # 10x immune profiling "EFO:0010714", # 10x TCR enrichment "EFO:0010715", # 10x Ig enrichment - "EFO:0010961", # Visium Spatial Gene Expression "EFO:0010964", # barcoded plate-based single cell RNA-seq "EFO:0011025", # 10x 5' v1 "EFO:0022396", # TruSeq @@ -353,6 +355,12 @@ "EFO:0700016", # Smart-seq v4 ] +# list of EFO terms that correspond to SPATIAL modality/measurement. These terms +# define the inclusive filter applied to obs.assay_ontology_term_id. All other +ALLOWED_SPATIAL_ASSAYS = [ + "EFO:0010961", # Visium Spatial Gene Expression +] + # Full-gene assays have special handling in the "normalized" X layers FULL_GENE_ASSAY = [ "EFO:0003755", # FL-cDNA diff --git a/tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/validate_soma.py b/tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/validate_soma.py index 197013a52..ec2d8c11c 100644 --- a/tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/validate_soma.py +++ b/tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/validate_soma.py @@ -38,6 +38,7 @@ CENSUS_OBS_STATS_COLUMNS, CENSUS_OBS_TABLE_SPEC, CENSUS_SCHEMA_VERSION, + CENSUS_SPATIAL_NAME, CENSUS_SUMMARY_CELL_COUNTS_NAME, CENSUS_SUMMARY_CELL_COUNTS_TABLE_SPEC, CENSUS_SUMMARY_NAME, @@ -82,9 +83,18 @@ def assert_all(__iterable: Iterable[object]) -> bool: return r +def get_census_data_collection_name(eb: ExperimentSpecification) -> str: + return CENSUS_SPATIAL_NAME if eb.is_exclusively_spatial() else CENSUS_DATA_NAME + + +def get_experiment_uri(base_uri: str, eb: ExperimentSpecification) -> str: + census_data_collection_name = get_census_data_collection_name(eb) + return urlcat(base_uri, census_data_collection_name, eb.name) + + def open_experiment(base_uri: str, eb: ExperimentSpecification) -> soma.Experiment: """Helper function that knows the Census schema path conventions.""" - return soma.Experiment.open(urlcat(base_uri, CENSUS_DATA_NAME, eb.name), mode="r") + return soma.Experiment.open(get_experiment_uri(base_uri, eb), mode="r") def get_experiment_shape(base_uri: str, specs: list[ExperimentSpecification]) -> dict[str, tuple[int, int]]: @@ -230,9 +240,10 @@ def validate_axis_dataframes_global_ids( .concat() .to_pandas() ) - assert eb_info[eb.name].n_obs == len(census_obs_df) == exp.obs.count - assert (len(census_obs_df) == 0) or (census_obs_df.soma_joinid.max() + 1 == eb_info[eb.name].n_obs) - assert eb_info[eb.name].dataset_ids == set(census_obs_df.dataset_id.unique()) + eb_info_key = get_experiment_uri(soma_path, eb) + assert eb_info[eb_info_key].n_obs == len(census_obs_df) == exp.obs.count + assert (len(census_obs_df) == 0) or (census_obs_df.soma_joinid.max() + 1 == eb_info[eb_info_key].n_obs) + assert eb_info[eb_info_key].dataset_ids == set(census_obs_df.dataset_id.unique()) # Validate that all obs soma_joinids are unique and in the range [0, n). obs_unique_joinids = np.unique(census_obs_df.soma_joinid.to_numpy()) @@ -254,13 +265,13 @@ def validate_axis_dataframes_global_ids( del census_obs_df, obs_unique_joinids # var - n_vars = len(eb_info[eb.name].vars) + n_vars = len(eb_info[eb_info_key].vars) census_var_df = ( exp.ms[MEASUREMENT_RNA_NAME].var.read(column_names=["feature_id", "soma_joinid"]).concat().to_pandas() ) assert n_vars == len(census_var_df) == exp.ms[MEASUREMENT_RNA_NAME].var.count - assert eb_info[eb.name].vars == set(census_var_df.feature_id.array) + assert eb_info[eb_info_key].vars == set(census_var_df.feature_id.array) assert (len(census_var_df) == 0) or (census_var_df.soma_joinid.max() + 1 == n_vars) # Validate that all var soma_joinids are unique and in the range [0, n). @@ -289,7 +300,7 @@ def _validate_axis_dataframes( eb_info: dict[str, EbInfo] = {} for eb in experiment_specifications: with soma.Collection.open(soma_path, context=SOMA_TileDB_Context()) as census: - census_data = census[CENSUS_DATA_NAME] + census_data_collection = census[get_census_data_collection_name(eb)] dataset_id = dataset.dataset_id ad = open_anndata( dataset, @@ -298,8 +309,16 @@ def _validate_axis_dataframes( var_column_names=CXG_VAR_COLUMNS_READ, filter_spec=eb.anndata_cell_filter_spec, ) - eb_info[eb.name] = EbInfo() - se = census_data[eb.name] + se = census_data_collection[eb.name] + + # NOTE: Since we are validating data for each experiment, we + # use the experiment uri as the key for the data that must be validated. + # Using just the experiment spec name would cause collisions as in the case + # of spatial and non-spatial experiments with the same name (experiment spec name) + # but stored under different census root collections + eb_info_key = get_experiment_uri(soma_path, eb) + eb_info[eb_info_key] = EbInfo() + dataset_obs = ( se.obs.read( column_names=list(CENSUS_OBS_TABLE_SPEC.field_names()), @@ -326,11 +345,13 @@ def _validate_axis_dataframes( if isinstance(dataset_obs[key].dtype, pd.CategoricalDtype): dataset_obs[key] = dataset_obs[key].astype(dataset_obs[key].cat.categories.dtype) - assert len(dataset_obs) == len(ad.obs), f"{dataset.dataset_id}/{eb.name} obs length mismatch" + assert ( + len(dataset_obs) == len(ad.obs) + ), f"{dataset.dataset_id}/{eb.name} obs length mismatch soma experiment obs len: {len(dataset_obs)} != anndata obs len: {len(ad.obs)}" if ad.n_obs > 0: - eb_info[eb.name].n_obs += ad.n_obs - eb_info[eb.name].dataset_ids.add(dataset_id) - eb_info[eb.name].vars |= set(ad.var.index.array) + eb_info[eb_info_key].n_obs += ad.n_obs + eb_info[eb_info_key].dataset_ids.add(dataset_id) + eb_info[eb_info_key].vars |= set(ad.var.index.array) ad_obs = ad.obs[list(set(CXG_OBS_TERM_COLUMNS) - set(CENSUS_OBS_STATS_COLUMNS))].reset_index( drop=True ) @@ -343,11 +364,11 @@ def _validate_axis_dataframes( def reduce_eb_info(results: Sequence[dict[str, EbInfo]]) -> dict[str, EbInfo]: eb_info = {} for res in results: - for name, info in res.items(): - if name not in eb_info: - eb_info[name] = copy.copy(info) + for eb_info_key, info in res.items(): + if eb_info_key not in eb_info: + eb_info[eb_info_key] = copy.copy(info) else: - eb_info[name].update(info) + eb_info[eb_info_key].update(info) return eb_info eb_info = ( @@ -815,8 +836,9 @@ def validate_X_layers_schema( with open_experiment(soma_path, eb) as exp: assert soma.Collection.exists(exp.ms[MEASUREMENT_RNA_NAME].X.uri) - n_obs = eb_info[eb.name].n_obs - n_vars = eb_info[eb.name].n_vars + eb_info_key = get_experiment_uri(soma_path, eb) + n_obs = eb_info[eb_info_key].n_obs + n_vars = eb_info[eb_info_key].n_vars assert n_obs == exp.obs.count assert n_vars == exp.ms[MEASUREMENT_RNA_NAME].var.count @@ -1011,8 +1033,9 @@ def get_sparse_arrays(C: soma.Collection) -> list[soma.SparseNDArray]: # first, confirm we set shape correctly, as the code uses it as the max bounding box for eb in experiment_specifications: with open_experiment(soma_path, eb) as exp: - n_obs = eb_info[eb.name].n_obs - n_vars = eb_info[eb.name].n_vars + eb_info_key = get_experiment_uri(soma_path, eb) + n_obs = eb_info[eb_info_key].n_obs + n_vars = eb_info[eb_info_key].n_vars for layer_name in exp.ms[MEASUREMENT_RNA_NAME].X: assert exp.ms[MEASUREMENT_RNA_NAME].X[layer_name].shape == (n_obs, n_vars) if "feature_dataset_presence_matrix" in exp.ms[MEASUREMENT_RNA_NAME]: From 44c1109e70d53b27c0be06c710438536eef46630 Mon Sep 17 00:00:00 2001 From: Prathap Sridharan Date: Fri, 7 Jun 2024 16:24:51 -0700 Subject: [PATCH 22/29] Update notebook --- .../census_spatial_dataset_ingest.ipynb | 642 +++++++++--------- 1 file changed, 321 insertions(+), 321 deletions(-) diff --git a/tools/cellxgene_census_builder/spatial_dev_tools/census_spatial_dataset_ingest.ipynb b/tools/cellxgene_census_builder/spatial_dev_tools/census_spatial_dataset_ingest.ipynb index 8ea0520cf..f89cf825e 100644 --- a/tools/cellxgene_census_builder/spatial_dev_tools/census_spatial_dataset_ingest.ipynb +++ b/tools/cellxgene_census_builder/spatial_dev_tools/census_spatial_dataset_ingest.ipynb @@ -151,327 +151,327 @@ "name": "stdout", "output_type": "stream", "text": [ - "2024-06-07 15:32:39 39783 DEBUG Setting NUMEXPR_MAX_THREADS environment variable to \"5\"\n", - "2024-06-07 15:32:39 39783 DEBUG Setting OMP_NUM_THREADS environment variable to \"1\"\n", - "2024-06-07 15:32:39 39783 DEBUG Setting OPENBLAS_NUM_THREADS environment variable to \"1\"\n", - "2024-06-07 15:32:39 39783 DEBUG Setting MKL_NUM_THREADS environment variable to \"1\"\n", - "2024-06-07 15:32:39 39783 DEBUG Setting VECLIB_MAXIMUM_THREADS environment variable to \"1\"\n", - "2024-06-07 15:32:39 39783 INFO CensusBuildArgs(working_dir=PosixPath('/Users/psridharan/code/cellxgene-census/ps_stuff/census-builds'), config=CensusBuildConfig(verbose=1, dashboard=True, log_dir='logs', log_file='build.log', reports_dir='reports', consolidate=True, dryrun=False, cellxgene_census_S3_path='s3://cellxgene-data-public/cell-census', cellxgene_census_default_mirror_S3_path='s3://cellxgene-census-public-us-west-2/cell-census', cellxgene_census_S3_replica_path=None, logs_S3_path='s3://cellxgene-data-public-logs/builder', build_tag='test-spatial-build', max_worker_processes=48, host_validation_disable=False, host_validation_min_physical_memory=549755813888, host_validation_min_swap_memory=2199023255552, host_validation_min_free_disk_space=1979120929996, release_cleanup_days=32, dataset_id_blocklist_uri='https://raw.githubusercontent.com/chanzuckerberg/cellxgene-census/main/tools/cellxgene_census_builder/dataset_blocklist.txt', user_agent_prefix='census-builder-', user_agent_environment='unknown', manifest=<_io.TextIOWrapper name='/Users/psridharan/code/cellxgene-census/tools/cellxgene_census_builder/spatial_dev_tools/manifest.csv' mode='r' encoding='UTF-8'>, test_first_n=0), state=CensusBuildState())\n", - "2024-06-07 15:32:39 39783 INFO System memory: mem-used=16792043520 (48.9%), max-mem-used=16792043520 (48.9%), mem-total=34359738368 load-avg=(1.69, 2.01, 2.01)\n", - "2024-06-07 15:32:39 39783 INFO Starting process resource logger with period 15.0\n", - "2024-06-07 15:32:41 39783 INFO Dask client created: \n", - "2024-06-07 15:32:41 39783 INFO Dask client using cluster: LocalCluster(de4babdf, 'tcp://127.0.0.1:65400', workers=10, threads=10)\n", - "2024-06-07 15:32:41 39783 INFO Dashboard link: http://127.0.0.1:8787/status\n", - "2024-06-07 15:32:41 39783 INFO Build step 1 - get source assets - started\n", - "2024-06-07 15:32:41 39783 INFO Loading manifest from file\n", - "2024-06-07 15:32:42 39783 INFO Dataset blocklist found, containing 5 ids.\n", - "2024-06-07 15:32:42 39783 INFO After blocklist and dedup, will load 8 datasets.\n", - "2024-06-07 15:32:42 39783 INFO Starting asset staging to /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads\n", - "2024-06-07 15:32:42 39790 DEBUG Copy complete, url=/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/c63d5cb4-1046-4948-a188-e6af50ef90f4.h5ad, bytes=42809187\n", - "2024-06-07 15:32:42 39792 DEBUG Copy complete, url=/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/53e343af-979c-4525-a705-1b9d1a1fee14.h5ad, bytes=147321916\n", - "2024-06-07 15:32:43 39791 DEBUG Copy complete, url=/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/6ab91271-5f48-4e98-92ef-d02ee21e63e1.h5ad, bytes=28385363\n", - "2024-06-07 15:32:43 39786 DEBUG Copy complete, url=/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/c6f6e674-b59d-46cf-8525-73f64f9eef8c.h5ad, bytes=49260402\n", - "2024-06-07 15:32:43 39794 DEBUG Copy complete, url=/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/9624a105-319c-4abf-b10b-d96ce1650100.h5ad, bytes=126790774\n", - "2024-06-07 15:32:43 39788 DEBUG Copy complete, url=/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/07998bf8-d070-41bb-a584-f8bdd1193aef.h5ad, bytes=637135389\n", - "2024-06-07 15:32:43 39793 DEBUG Copy complete, url=/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/fa3893cb-d420-42ac-8263-09719a26102e.h5ad, bytes=625411595\n", - "2024-06-07 15:32:43 39787 DEBUG Copy complete, url=/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/1bb92cf8-ab3f-4bb0-a722-b241b5d377ed.h5ad, bytes=1112747498\n", - "2024-06-07 15:32:43 39783 INFO Build step 1 - get source assets - finished\n", - "2024-06-07 15:32:43 39783 INFO Build step 2 - Create root collection - started\n", - "2024-06-07 15:32:43 39783 INFO homo_sapiens: create experiment at /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/homo_sapiens\n", - "2024-06-07 15:32:43 39783 INFO mus_musculus: create experiment at /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/mus_musculus\n", - "2024-06-07 15:32:43 39783 INFO homo_sapiens: create experiment at /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens\n", - "2024-06-07 15:32:43 39783 INFO mus_musculus: create experiment at /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/mus_musculus\n", - "2024-06-07 15:32:43 39783 INFO Build step 2 - Create root collection - finished\n", - "2024-06-07 15:32:43 39783 INFO Build step 3 - accumulate obs and var axes - started\n", - "2024-06-07 15:32:43 39790 DEBUG 6ab91271-5f48-4e98-92ef-d02ee21e63e1/homo_sapiens - found 4992 cells\n", - "2024-06-07 15:32:43 39788 DEBUG 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed/homo_sapiens - found 4992 cells\n", - "2024-06-07 15:32:43 39786 DEBUG fa3893cb-d420-42ac-8263-09719a26102e/homo_sapiens - found 4992 cells\n", - "2024-06-07 15:32:43 39793 DEBUG c6f6e674-b59d-46cf-8525-73f64f9eef8c/homo_sapiens - found 4992 cells\n", - "2024-06-07 15:32:43 39791 DEBUG 07998bf8-d070-41bb-a584-f8bdd1193aef/homo_sapiens - found 4992 cells\n", - "2024-06-07 15:32:43 39795 DEBUG c63d5cb4-1046-4948-a188-e6af50ef90f4/homo_sapiens - found 4992 cells\n", - "2024-06-07 15:32:43 39794 DEBUG 9624a105-319c-4abf-b10b-d96ce1650100/homo_sapiens - found 4992 cells\n", - "2024-06-07 15:32:43 39787 DEBUG 53e343af-979c-4525-a705-1b9d1a1fee14/homo_sapiens - found 4992 cells\n", - "2024-06-07 15:32:43 39790 DEBUG 6ab91271-5f48-4e98-92ef-d02ee21e63e1/mus_musculus - found 0 cells\n", - "2024-06-07 15:32:43 39790 DEBUG mus_musculus - H5AD has no data after filtering, skipping 6ab91271-5f48-4e98-92ef-d02ee21e63e1\n", - "2024-06-07 15:32:43 39788 DEBUG 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed/mus_musculus - found 0 cells\n", - "2024-06-07 15:32:43 39788 DEBUG mus_musculus - H5AD has no data after filtering, skipping 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed\n", - "2024-06-07 15:32:43 39786 DEBUG fa3893cb-d420-42ac-8263-09719a26102e/homo_sapiens - found 0 cells\n", - "2024-06-07 15:32:43 39786 DEBUG homo_sapiens - H5AD has no data after filtering, skipping fa3893cb-d420-42ac-8263-09719a26102e\n", - "2024-06-07 15:32:43 39793 DEBUG c6f6e674-b59d-46cf-8525-73f64f9eef8c/homo_sapiens - found 0 cells\n", - "2024-06-07 15:32:43 39793 DEBUG homo_sapiens - H5AD has no data after filtering, skipping c6f6e674-b59d-46cf-8525-73f64f9eef8c\n", - "2024-06-07 15:32:43 39795 DEBUG c63d5cb4-1046-4948-a188-e6af50ef90f4/mus_musculus - found 0 cells\n", - "2024-06-07 15:32:43 39795 DEBUG mus_musculus - H5AD has no data after filtering, skipping c63d5cb4-1046-4948-a188-e6af50ef90f4\n", - "2024-06-07 15:32:43 39791 DEBUG 07998bf8-d070-41bb-a584-f8bdd1193aef/mus_musculus - found 0 cells\n", - "2024-06-07 15:32:43 39791 DEBUG mus_musculus - H5AD has no data after filtering, skipping 07998bf8-d070-41bb-a584-f8bdd1193aef\n", - "2024-06-07 15:32:43 39787 DEBUG 53e343af-979c-4525-a705-1b9d1a1fee14/homo_sapiens - found 0 cells\n", - "2024-06-07 15:32:43 39787 DEBUG homo_sapiens - H5AD has no data after filtering, skipping 53e343af-979c-4525-a705-1b9d1a1fee14\n", - "2024-06-07 15:32:43 39794 DEBUG 9624a105-319c-4abf-b10b-d96ce1650100/homo_sapiens - found 0 cells\n", - "2024-06-07 15:32:43 39794 DEBUG homo_sapiens - H5AD has no data after filtering, skipping 9624a105-319c-4abf-b10b-d96ce1650100\n", - "2024-06-07 15:32:43 39790 DEBUG 6ab91271-5f48-4e98-92ef-d02ee21e63e1/homo_sapiens - found 0 cells\n", - "2024-06-07 15:32:43 39790 DEBUG homo_sapiens - H5AD has no data after filtering, skipping 6ab91271-5f48-4e98-92ef-d02ee21e63e1\n", - "2024-06-07 15:32:44 39795 DEBUG c63d5cb4-1046-4948-a188-e6af50ef90f4/homo_sapiens - found 0 cells\n", - "2024-06-07 15:32:44 39795 DEBUG homo_sapiens - H5AD has no data after filtering, skipping c63d5cb4-1046-4948-a188-e6af50ef90f4\n", - "2024-06-07 15:32:44 39786 DEBUG fa3893cb-d420-42ac-8263-09719a26102e/mus_musculus - found 0 cells\n", - "2024-06-07 15:32:44 39786 DEBUG mus_musculus - H5AD has no data after filtering, skipping fa3893cb-d420-42ac-8263-09719a26102e\n", - "2024-06-07 15:32:44 39788 DEBUG 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed/mus_musculus - found 0 cells\n", - "2024-06-07 15:32:44 39788 DEBUG mus_musculus - H5AD has no data after filtering, skipping 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed\n", - "2024-06-07 15:32:44 39793 DEBUG c6f6e674-b59d-46cf-8525-73f64f9eef8c/mus_musculus - found 0 cells\n", - "2024-06-07 15:32:44 39793 DEBUG mus_musculus - H5AD has no data after filtering, skipping c6f6e674-b59d-46cf-8525-73f64f9eef8c\n", - "2024-06-07 15:32:44 39791 DEBUG 07998bf8-d070-41bb-a584-f8bdd1193aef/mus_musculus - found 0 cells\n", - "2024-06-07 15:32:44 39791 DEBUG mus_musculus - H5AD has no data after filtering, skipping 07998bf8-d070-41bb-a584-f8bdd1193aef\n", - "2024-06-07 15:32:44 39787 DEBUG 53e343af-979c-4525-a705-1b9d1a1fee14/mus_musculus - found 0 cells\n", - "2024-06-07 15:32:44 39787 DEBUG mus_musculus - H5AD has no data after filtering, skipping 53e343af-979c-4525-a705-1b9d1a1fee14\n", - "2024-06-07 15:32:44 39794 DEBUG 9624a105-319c-4abf-b10b-d96ce1650100/mus_musculus - found 0 cells\n", - "2024-06-07 15:32:44 39794 DEBUG mus_musculus - H5AD has no data after filtering, skipping 9624a105-319c-4abf-b10b-d96ce1650100\n", - "2024-06-07 15:32:44 39790 DEBUG 6ab91271-5f48-4e98-92ef-d02ee21e63e1/mus_musculus - found 0 cells\n", - "2024-06-07 15:32:44 39790 DEBUG mus_musculus - H5AD has no data after filtering, skipping 6ab91271-5f48-4e98-92ef-d02ee21e63e1\n", - "2024-06-07 15:32:44 39795 DEBUG c63d5cb4-1046-4948-a188-e6af50ef90f4/mus_musculus - found 0 cells\n", - "2024-06-07 15:32:44 39795 DEBUG mus_musculus - H5AD has no data after filtering, skipping c63d5cb4-1046-4948-a188-e6af50ef90f4\n", - "2024-06-07 15:32:44 39786 DEBUG fa3893cb-d420-42ac-8263-09719a26102e/mus_musculus - found 0 cells\n", - "2024-06-07 15:32:44 39786 DEBUG mus_musculus - H5AD has no data after filtering, skipping fa3893cb-d420-42ac-8263-09719a26102e\n", - "2024-06-07 15:32:44 39788 DEBUG 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed/homo_sapiens - found 0 cells\n", - "2024-06-07 15:32:44 39788 DEBUG homo_sapiens - H5AD has no data after filtering, skipping 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed\n", - "2024-06-07 15:32:44 39791 DEBUG 07998bf8-d070-41bb-a584-f8bdd1193aef/homo_sapiens - found 0 cells\n", - "2024-06-07 15:32:44 39791 DEBUG homo_sapiens - H5AD has no data after filtering, skipping 07998bf8-d070-41bb-a584-f8bdd1193aef\n", - "2024-06-07 15:32:44 39793 DEBUG c6f6e674-b59d-46cf-8525-73f64f9eef8c/mus_musculus - found 0 cells\n", - "2024-06-07 15:32:44 39793 DEBUG mus_musculus - H5AD has no data after filtering, skipping c6f6e674-b59d-46cf-8525-73f64f9eef8c\n", - "2024-06-07 15:32:44 39787 DEBUG 53e343af-979c-4525-a705-1b9d1a1fee14/mus_musculus - found 0 cells\n", - "2024-06-07 15:32:44 39787 DEBUG mus_musculus - H5AD has no data after filtering, skipping 53e343af-979c-4525-a705-1b9d1a1fee14\n", - "2024-06-07 15:32:44 39794 DEBUG 9624a105-319c-4abf-b10b-d96ce1650100/mus_musculus - found 0 cells\n", - "2024-06-07 15:32:44 39794 DEBUG mus_musculus - H5AD has no data after filtering, skipping 9624a105-319c-4abf-b10b-d96ce1650100\n", - "2024-06-07 15:32:44 39783 INFO Build step 3 - axis accumulation complete\n", - "2024-06-07 15:32:44 39783 INFO Build step 3 - accumulate obs and var axes - finished\n", - "2024-06-07 15:32:44 39783 INFO Scaling cluster to 1 workers.\n", - "2024-06-07 15:32:44 39783 INFO Build step 4 - Populate X layers - started\n", - "2024-06-07 15:32:44 39783 INFO homo_sapiens: create X layers\n", - "2024-06-07 15:32:44 39783 INFO mus_musculus: create X layers\n", - "2024-06-07 15:32:44 39783 INFO homo_sapiens: create X layers\n", - "2024-06-07 15:32:44 39783 INFO mus_musculus: create X layers\n", - "2024-06-07 15:32:44 39786 DEBUG dispatch_X_chunk [enter]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/1bb92cf8-ab3f-4bb0-a722-b241b5d377ed.h5ad, 0\n", - "2024-06-07 15:32:44 39786 INFO processing X /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/1bb92cf8-ab3f-4bb0-a722-b241b5d377ed.h5ad, 0, chunk 0 of 1\n", - "2024-06-07 15:32:49 39786 INFO dispatch_X_chunk [exit, 5.02s]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/1bb92cf8-ab3f-4bb0-a722-b241b5d377ed.h5ad, 0\n", - "2024-06-07 15:32:49 39786 DEBUG dispatch_X_chunk [enter]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/c63d5cb4-1046-4948-a188-e6af50ef90f4.h5ad, 0\n", - "2024-06-07 15:32:49 39786 INFO processing X /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/c63d5cb4-1046-4948-a188-e6af50ef90f4.h5ad, 0, chunk 0 of 1\n", - "2024-06-07 15:32:54 39786 INFO dispatch_X_chunk [exit, 4.33s]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/c63d5cb4-1046-4948-a188-e6af50ef90f4.h5ad, 0\n", - "2024-06-07 15:32:54 39786 DEBUG dispatch_X_chunk [enter]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/6ab91271-5f48-4e98-92ef-d02ee21e63e1.h5ad, 0\n", - "2024-06-07 15:32:54 39786 INFO processing X /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/6ab91271-5f48-4e98-92ef-d02ee21e63e1.h5ad, 0, chunk 0 of 1\n", - "2024-06-07 15:32:54 39783 INFO System memory: mem-used=18183700480 (52.9%), max-mem-used=18183700480 (52.9%), mem-total=34359738368 load-avg=(2.34, 2.12, 2.05)\n", - "2024-06-07 15:32:57 39786 INFO dispatch_X_chunk [exit, 2.93s]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/6ab91271-5f48-4e98-92ef-d02ee21e63e1.h5ad, 0\n", - "2024-06-07 15:32:57 39786 DEBUG dispatch_X_chunk [enter]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/53e343af-979c-4525-a705-1b9d1a1fee14.h5ad, 0\n", - "2024-06-07 15:32:57 39786 INFO processing X /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/53e343af-979c-4525-a705-1b9d1a1fee14.h5ad, 0, chunk 0 of 1\n", - "2024-06-07 15:33:00 39786 INFO dispatch_X_chunk [exit, 3.70s]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/53e343af-979c-4525-a705-1b9d1a1fee14.h5ad, 0\n", - "2024-06-07 15:33:00 39786 DEBUG dispatch_X_chunk [enter]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/9624a105-319c-4abf-b10b-d96ce1650100.h5ad, 0\n", - "2024-06-07 15:33:00 39786 INFO processing X /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/9624a105-319c-4abf-b10b-d96ce1650100.h5ad, 0, chunk 0 of 1\n", - "2024-06-07 15:33:02 39786 INFO dispatch_X_chunk [exit, 1.95s]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/9624a105-319c-4abf-b10b-d96ce1650100.h5ad, 0\n", - "2024-06-07 15:33:02 39786 DEBUG dispatch_X_chunk [enter]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/07998bf8-d070-41bb-a584-f8bdd1193aef.h5ad, 0\n", - "2024-06-07 15:33:02 39786 INFO processing X /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/07998bf8-d070-41bb-a584-f8bdd1193aef.h5ad, 0, chunk 0 of 1\n", - "2024-06-07 15:33:06 39786 INFO dispatch_X_chunk [exit, 3.04s]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/07998bf8-d070-41bb-a584-f8bdd1193aef.h5ad, 0\n", - "2024-06-07 15:33:06 39786 DEBUG dispatch_X_chunk [enter]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/fa3893cb-d420-42ac-8263-09719a26102e.h5ad, 0\n", - "2024-06-07 15:33:06 39786 INFO processing X /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/fa3893cb-d420-42ac-8263-09719a26102e.h5ad, 0, chunk 0 of 1\n", - "2024-06-07 15:33:09 39783 INFO System memory: mem-used=18928959488 (55.1%), max-mem-used=18928959488 (55.1%), mem-total=34359738368 load-avg=(2.19, 2.1, 2.04)\n", - "2024-06-07 15:33:09 39786 INFO dispatch_X_chunk [exit, 3.45s]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/fa3893cb-d420-42ac-8263-09719a26102e.h5ad, 0\n", - "2024-06-07 15:33:09 39786 DEBUG dispatch_X_chunk [enter]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/c6f6e674-b59d-46cf-8525-73f64f9eef8c.h5ad, 0\n", - "2024-06-07 15:33:09 39786 INFO processing X /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/c6f6e674-b59d-46cf-8525-73f64f9eef8c.h5ad, 0, chunk 0 of 1\n", - "2024-06-07 15:33:13 39786 INFO dispatch_X_chunk [exit, 3.55s]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/c6f6e674-b59d-46cf-8525-73f64f9eef8c.h5ad, 0\n", - "2024-06-07 15:33:13 39783 INFO Save presence matrix for homo_sapiens - start\n", - "2024-06-07 15:33:13 39783 INFO Save presence matrix for homo_sapiens - finish\n", - "2024-06-07 15:33:13 39783 INFO Save presence matrix for mus_musculus - start\n", - "2024-06-07 15:33:13 39783 INFO Save presence matrix for mus_musculus - finish\n", - "2024-06-07 15:33:13 39783 INFO Save presence matrix for homo_sapiens - start\n", - "2024-06-07 15:33:13 39783 INFO Save presence matrix for homo_sapiens - finish\n", - "2024-06-07 15:33:13 39783 INFO Save presence matrix for mus_musculus - start\n", - "2024-06-07 15:33:13 39783 INFO Save presence matrix for mus_musculus - finish\n", - "2024-06-07 15:33:13 39783 INFO Build step 4 - Populate X layers - finished\n", - "2024-06-07 15:33:13 39783 INFO Build step 5 - Save axis and summary info - started\n", - "2024-06-07 15:33:13 39783 INFO homo_sapiens: writing obs dataframe\n", - "2024-06-07 15:33:13 39783 INFO homo_sapiens: empty obs dataframe\n", - "2024-06-07 15:33:13 39783 INFO homo_sapiens: writing var dataframe\n", - "2024-06-07 15:33:13 39783 INFO homo_sapiens: empty var dataframe\n", - "2024-06-07 15:33:13 39783 INFO mus_musculus: writing obs dataframe\n", - "2024-06-07 15:33:13 39783 INFO mus_musculus: empty obs dataframe\n", - "2024-06-07 15:33:13 39783 INFO mus_musculus: writing var dataframe\n", - "2024-06-07 15:33:13 39783 INFO mus_musculus: empty var dataframe\n", - "2024-06-07 15:33:13 39783 INFO homo_sapiens: writing obs dataframe\n", - "2024-06-07 15:33:13 39783 DEBUG experiment homo_sapiens obs = (39936, 30)\n", - "2024-06-07 15:33:13 39783 INFO homo_sapiens: writing var dataframe\n", - "2024-06-07 15:33:13 39783 DEBUG experiment homo_sapiens var = (36972, 6)\n", - "2024-06-07 15:33:13 39783 INFO mus_musculus: writing obs dataframe\n", - "2024-06-07 15:33:13 39783 INFO mus_musculus: empty obs dataframe\n", - "2024-06-07 15:33:13 39783 INFO mus_musculus: writing var dataframe\n", - "2024-06-07 15:33:13 39783 INFO mus_musculus: empty var dataframe\n", - "2024-06-07 15:33:13 39783 INFO Creating dataset_manifest\n", - "2024-06-07 15:33:13 39783 INFO Creating census_summary_cell_counts\n", - "2024-06-07 15:33:13 39783 INFO Creating census summary\n", - "2024-06-07 15:33:13 39783 INFO Create census organisms dataframe\n", - "2024-06-07 15:33:13 39783 INFO Build step 5 - Save axis and summary info - finished\n", - "2024-06-07 15:33:13 39783 INFO tiledb_soma_1969_work_around: deleting bounding box from file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms/RNA/feature_dataset_presence_matrix\n", - "2024-06-07 15:33:13 39783 INFO tiledb_soma_1969_work_around: deleting bounding box from file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms/RNA/X/raw\n", - "2024-06-07 15:33:13 39783 INFO tiledb_soma_1969_work_around: deleting bounding box from file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms/RNA/X/normalized\n", - "2024-06-07 15:33:13 39783 INFO Scaling cluster to 10 workers.\n", - "2024-06-07 15:33:13 39783 INFO Consolidate: found 36 TileDB objects to consolidate\n", - "2024-06-07 15:33:13 39783 INFO Consolidate: 36 consolidation jobs queued\n", - "2024-06-07 15:33:13 39786 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_info/datasets\n", - "2024-06-07 15:33:13 39786 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_info/datasets\n", - "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_info/summary_cell_counts\n", - "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] finish, 0.04 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_info/summary_cell_counts\n", - "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_info/summary\n", - "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] finish, 0.02 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_info/summary\n", - "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_info/organisms\n", - "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] finish, 0.02 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_info/organisms\n", - "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_info\n", - "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_info\n", - "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/homo_sapiens/obs\n", - "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/homo_sapiens/obs\n", - "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/homo_sapiens/ms/RNA/var\n", - "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] finish, 0.02 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/homo_sapiens/ms/RNA/var\n", - "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/homo_sapiens/ms/RNA/X\n", - "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] finish, 0.02 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/homo_sapiens/ms/RNA/X\n", - "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/homo_sapiens/ms/RNA\n", - "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/homo_sapiens/ms/RNA\n", - "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/homo_sapiens/ms\n", - "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/homo_sapiens/ms\n", - "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/homo_sapiens\n", - "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/homo_sapiens\n", - "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/mus_musculus/obs\n", - "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] finish, 0.04 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/mus_musculus/obs\n", - "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/mus_musculus/ms/RNA/var\n", - "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] finish, 0.04 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/mus_musculus/ms/RNA/var\n", - "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/mus_musculus/ms/RNA/X\n", - "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/mus_musculus/ms/RNA/X\n", - "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/mus_musculus/ms/RNA\n", - "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/mus_musculus/ms/RNA\n", - "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/mus_musculus/ms\n", - "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/mus_musculus/ms\n", - "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/mus_musculus\n", - "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/mus_musculus\n", - "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data\n", - "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] finish, 0.00 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data\n", - "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/obs\n", - "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] finish, 0.12 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/obs\n", - "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms/RNA/var\n", - "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] finish, 0.03 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms/RNA/var\n", - "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms/RNA/feature_dataset_presence_matrix\n", - "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] finish, 0.03 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms/RNA/feature_dataset_presence_matrix\n", - "2024-06-07 15:33:14 39786 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms/RNA/X/raw\n", - "2024-06-07 15:33:16 39866 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms/RNA/X\n", - "2024-06-07 15:33:16 39863 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms\n", - "2024-06-07 15:33:16 39864 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens\n", - "2024-06-07 15:33:16 39863 INFO Consolidate[vacuum=True] finish, 0.02 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms\n", - "2024-06-07 15:33:16 39864 INFO Consolidate[vacuum=True] finish, 0.02 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens\n", - "2024-06-07 15:33:16 39866 INFO Consolidate[vacuum=True] finish, 0.02 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms/RNA/X\n", - "2024-06-07 15:33:16 39861 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/mus_musculus/ms/RNA\n", - "2024-06-07 15:33:16 39868 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial\n", - "2024-06-07 15:33:16 39860 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/mus_musculus/spatial\n", - "2024-06-07 15:33:16 39860 INFO Consolidate[vacuum=True] finish, 0.00 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/mus_musculus/spatial\n", - "2024-06-07 15:33:16 39863 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/spatial\n", - "2024-06-07 15:33:16 39861 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/mus_musculus/ms/RNA\n", - "2024-06-07 15:33:16 39862 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/mus_musculus/ms/RNA/var\n", - "2024-06-07 15:33:16 39861 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/mus_musculus/ms\n", - "2024-06-07 15:33:16 39866 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms/RNA\n", - "2024-06-07 15:33:16 39864 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/mus_musculus/obs\n", - "2024-06-07 15:33:16 39868 INFO Consolidate[vacuum=True] finish, 0.02 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial\n", - "2024-06-07 15:33:16 39861 INFO Consolidate[vacuum=True] finish, 0.02 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/mus_musculus/ms\n", - "2024-06-07 15:33:16 39860 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/mus_musculus\n", - "2024-06-07 15:33:16 39860 INFO Consolidate[vacuum=True] finish, 0.00 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/mus_musculus\n", - "2024-06-07 15:33:16 39866 INFO Consolidate[vacuum=True] finish, 0.02 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms/RNA\n", - "2024-06-07 15:33:16 39863 INFO Consolidate[vacuum=True] finish, 0.03 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/spatial\n", - "2024-06-07 15:33:16 39864 INFO Consolidate[vacuum=True] finish, 0.02 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/mus_musculus/obs\n", - "2024-06-07 15:33:16 39862 INFO Consolidate[vacuum=True] finish, 0.03 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/mus_musculus/ms/RNA/var\n", - "2024-06-07 15:33:16 39862 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/mus_musculus/ms/RNA/X\n", - "2024-06-07 15:33:16 39862 INFO Consolidate[vacuum=True] finish, 0.00 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/mus_musculus/ms/RNA/X\n", - "2024-06-07 15:33:16 39863 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms/RNA/X/normalized\n", - "2024-06-07 15:33:18 39786 INFO Consolidate[vacuum=True] finish, 3.72 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms/RNA/X/raw\n", - "2024-06-07 15:33:20 39863 INFO Consolidate[vacuum=True] finish, 3.39 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms/RNA/X/normalized\n", - "2024-06-07 15:33:20 39783 INFO Validation of SOMA objects - start\n", - "2024-06-07 15:33:20 39783 DEBUG validate_directory_structure [enter]\n", - "2024-06-07 15:33:20 39783 INFO validate_directory_structure [exit, 0.00s]\n", - "2024-06-07 15:33:20 39783 DEBUG validate_relative_path [enter]\n", - "2024-06-07 15:33:20 39783 INFO validate_relative_path [exit, 0.03s]\n", - "2024-06-07 15:33:20 39783 DEBUG validate_axis_dataframes_schema [enter]\n", - "2024-06-07 15:33:20 39783 INFO validate_axis_dataframes_schema [exit, 0.03s]\n", - "2024-06-07 15:33:20 39783 DEBUG validate_manifest_contents [enter]\n", - "2024-06-07 15:33:20 39783 INFO validate_manifest_contents [exit, 0.00s]\n", - "2024-06-07 15:33:20 39864 DEBUG _validate_X_layers_raw_contents [enter]: 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed\n", - "2024-06-07 15:33:20 39861 DEBUG _validate_X_layers_raw_contents [enter]: 9624a105-319c-4abf-b10b-d96ce1650100\n", - "2024-06-07 15:33:20 39786 DEBUG _validate_X_layers_raw_contents [enter]: c6f6e674-b59d-46cf-8525-73f64f9eef8c\n", - "2024-06-07 15:33:20 39866 DEBUG _validate_X_layers_raw_contents [enter]: fa3893cb-d420-42ac-8263-09719a26102e\n", - "2024-06-07 15:33:20 39868 DEBUG _validate_X_layers_raw_contents [enter]: 53e343af-979c-4525-a705-1b9d1a1fee14\n", - "2024-06-07 15:33:20 39867 DEBUG _validate_X_layers_raw_contents [enter]: 6ab91271-5f48-4e98-92ef-d02ee21e63e1\n", - "2024-06-07 15:33:20 39865 DEBUG _validate_X_layers_raw_contents [enter]: 07998bf8-d070-41bb-a584-f8bdd1193aef\n", - "2024-06-07 15:33:20 39863 DEBUG _validate_X_layers_raw_contents [enter]: c63d5cb4-1046-4948-a188-e6af50ef90f4\n", - "2024-06-07 15:33:20 39862 DEBUG _validate_X_layers_presence_general [enter]\n", - "2024-06-07 15:33:20 39860 DEBUG validate_internal_consistency [enter]\n", - "2024-06-07 15:33:20 39864 INFO _validate_X_layers_raw_contents [exit, 0.07s]: 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed\n", - "2024-06-07 15:33:20 39866 INFO _validate_X_layers_raw_contents [exit, 0.08s]: fa3893cb-d420-42ac-8263-09719a26102e\n", - "2024-06-07 15:33:20 39786 INFO _validate_X_layers_raw_contents [exit, 0.08s]: c6f6e674-b59d-46cf-8525-73f64f9eef8c\n", - "2024-06-07 15:33:20 39861 INFO _validate_X_layers_raw_contents [exit, 0.10s]: 9624a105-319c-4abf-b10b-d96ce1650100\n", - "2024-06-07 15:33:20 39867 INFO _validate_X_layers_raw_contents [exit, 0.09s]: 6ab91271-5f48-4e98-92ef-d02ee21e63e1\n", - "2024-06-07 15:33:20 39868 INFO _validate_X_layers_raw_contents [exit, 0.10s]: 53e343af-979c-4525-a705-1b9d1a1fee14\n", - "2024-06-07 15:33:20 39865 INFO _validate_X_layers_raw_contents [exit, 0.11s]: 07998bf8-d070-41bb-a584-f8bdd1193aef\n", - "2024-06-07 15:33:20 39863 INFO _validate_X_layers_raw_contents [exit, 0.11s]: c63d5cb4-1046-4948-a188-e6af50ef90f4\n", - "2024-06-07 15:33:20 39862 INFO _validate_X_layers_presence_general [exit, 0.13s]\n", - "2024-06-07 15:33:20 39860 INFO validate_internal_consistency [exit, 0.20s]\n", - "2024-06-07 15:33:20 39860 DEBUG _validate_X_layers_presence [enter]: c6f6e674-b59d-46cf-8525-73f64f9eef8c\n", - "2024-06-07 15:33:20 39863 DEBUG _validate_axis_dataframes [enter]: c6f6e674-b59d-46cf-8525-73f64f9eef8c\n", - "2024-06-07 15:33:20 39860 INFO _validate_X_layers_presence [exit, 0.05s]: c6f6e674-b59d-46cf-8525-73f64f9eef8c\n", - "2024-06-07 15:33:20 39860 DEBUG _validate_X_layers_presence [enter]: fa3893cb-d420-42ac-8263-09719a26102e\n", - "2024-06-07 15:33:21 39860 INFO _validate_X_layers_presence [exit, 0.06s]: fa3893cb-d420-42ac-8263-09719a26102e\n", - "2024-06-07 15:33:21 39860 DEBUG _validate_X_layers_presence [enter]: 07998bf8-d070-41bb-a584-f8bdd1193aef\n", - "2024-06-07 15:33:21 39860 INFO _validate_X_layers_presence [exit, 0.06s]: 07998bf8-d070-41bb-a584-f8bdd1193aef\n", - "2024-06-07 15:33:21 39860 DEBUG _validate_X_layers_presence [enter]: 9624a105-319c-4abf-b10b-d96ce1650100\n", - "2024-06-07 15:33:21 39860 INFO _validate_X_layers_presence [exit, 0.08s]: 9624a105-319c-4abf-b10b-d96ce1650100\n", - "2024-06-07 15:33:21 39860 DEBUG _validate_X_layers_presence [enter]: 53e343af-979c-4525-a705-1b9d1a1fee14\n", - "2024-06-07 15:33:21 39860 INFO _validate_X_layers_presence [exit, 0.06s]: 53e343af-979c-4525-a705-1b9d1a1fee14\n", - "2024-06-07 15:33:21 39860 DEBUG _validate_X_layers_presence [enter]: 6ab91271-5f48-4e98-92ef-d02ee21e63e1\n", - "2024-06-07 15:33:21 39860 INFO _validate_X_layers_presence [exit, 0.05s]: 6ab91271-5f48-4e98-92ef-d02ee21e63e1\n", - "2024-06-07 15:33:21 39860 DEBUG _validate_X_layers_presence [enter]: c63d5cb4-1046-4948-a188-e6af50ef90f4\n", - "2024-06-07 15:33:21 39860 INFO _validate_X_layers_presence [exit, 0.05s]: c63d5cb4-1046-4948-a188-e6af50ef90f4\n", - "2024-06-07 15:33:21 39860 DEBUG _validate_X_layers_presence [enter]: 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed\n", - "2024-06-07 15:33:21 39863 INFO _validate_axis_dataframes [exit, 0.41s]: c6f6e674-b59d-46cf-8525-73f64f9eef8c\n", - "2024-06-07 15:33:21 39863 DEBUG _validate_axis_dataframes [enter]: fa3893cb-d420-42ac-8263-09719a26102e\n", - "2024-06-07 15:33:21 39860 INFO _validate_X_layers_presence [exit, 0.05s]: 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed\n", - "2024-06-07 15:33:21 39860 DEBUG _validate_X_layers_normalized [enter]: homo_sapiens rows [0, 32000)\n", - "2024-06-07 15:33:21 39860 INFO _validate_X_layers_normalized [exit, 0.02s]: homo_sapiens rows [0, 32000)\n", - "2024-06-07 15:33:21 39860 DEBUG _validate_X_layers_normalized [enter]: homo_sapiens rows [32000, 64000)\n", - "2024-06-07 15:33:21 39860 INFO _validate_X_layers_normalized [exit, 0.01s]: homo_sapiens rows [32000, 64000)\n", - "2024-06-07 15:33:21 39860 DEBUG _validate_X_layers_normalized [enter]: homo_sapiens rows [0, 32000)\n", - "2024-06-07 15:33:21 39863 INFO _validate_axis_dataframes [exit, 0.39s]: fa3893cb-d420-42ac-8263-09719a26102e\n", - "2024-06-07 15:33:21 39863 DEBUG _validate_axis_dataframes [enter]: 07998bf8-d070-41bb-a584-f8bdd1193aef\n", - "2024-06-07 15:33:22 39863 INFO _validate_axis_dataframes [exit, 0.35s]: 07998bf8-d070-41bb-a584-f8bdd1193aef\n", - "2024-06-07 15:33:22 39863 DEBUG _validate_axis_dataframes [enter]: 9624a105-319c-4abf-b10b-d96ce1650100\n", - "2024-06-07 15:33:22 39863 INFO _validate_axis_dataframes [exit, 0.32s]: 9624a105-319c-4abf-b10b-d96ce1650100\n", - "2024-06-07 15:33:22 39863 DEBUG _validate_axis_dataframes [enter]: 53e343af-979c-4525-a705-1b9d1a1fee14\n", - "2024-06-07 15:33:22 39863 INFO _validate_axis_dataframes [exit, 0.34s]: 53e343af-979c-4525-a705-1b9d1a1fee14\n", - "2024-06-07 15:33:22 39863 DEBUG _validate_axis_dataframes [enter]: 6ab91271-5f48-4e98-92ef-d02ee21e63e1\n", - "2024-06-07 15:33:23 39863 INFO _validate_axis_dataframes [exit, 0.32s]: 6ab91271-5f48-4e98-92ef-d02ee21e63e1\n", - "2024-06-07 15:33:23 39863 DEBUG _validate_axis_dataframes [enter]: c63d5cb4-1046-4948-a188-e6af50ef90f4\n", - "2024-06-07 15:33:23 39863 INFO _validate_axis_dataframes [exit, 0.35s]: c63d5cb4-1046-4948-a188-e6af50ef90f4\n", - "2024-06-07 15:33:23 39863 DEBUG _validate_axis_dataframes [enter]: 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed\n", - "2024-06-07 15:33:23 39863 INFO _validate_axis_dataframes [exit, 0.46s]: 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed\n", - "2024-06-07 15:33:23 39863 DEBUG validate_soma_bounding_box [enter]\n", - "2024-06-07 15:33:24 39863 INFO validate_soma_bounding_box [exit, 0.21s]\n", - "2024-06-07 15:33:24 39863 DEBUG validate_axis_dataframes_global_ids [enter]\n", - "2024-06-07 15:33:24 39863 INFO validate_axis_dataframes_global_ids [exit, 0.19s]\n", - "2024-06-07 15:33:24 39863 DEBUG validate_X_layers_schema [enter]\n", - "2024-06-07 15:33:24 39783 INFO System memory: mem-used=20338524160 (59.2%), max-mem-used=20338524160 (59.2%), mem-total=34359738368 load-avg=(6.93, 3.14, 2.42)\n", - "2024-06-07 15:33:24 39863 INFO validate_X_layers_schema [exit, 0.10s]\n", - "2024-06-07 15:33:28 39860 INFO _validate_X_layers_normalized [exit, 7.09s]: homo_sapiens rows [0, 32000)\n", - "2024-06-07 15:33:28 39860 DEBUG _validate_X_layers_normalized [enter]: homo_sapiens rows [32000, 64000)\n", - "2024-06-07 15:33:30 39860 INFO _validate_X_layers_normalized [exit, 2.00s]: homo_sapiens rows [32000, 64000)\n", - "2024-06-07 15:33:30 39860 DEBUG _validate_X_layers_has_unique_coords [enter]: homo_sapiens, raw, rows [0, 96000)\n", - "2024-06-07 15:33:30 39860 INFO _validate_X_layers_has_unique_coords [exit, 0.01s]: homo_sapiens, raw, rows [0, 96000)\n", - "2024-06-07 15:33:30 39860 DEBUG _validate_X_layers_has_unique_coords [enter]: homo_sapiens, normalized, rows [0, 96000)\n", - "2024-06-07 15:33:30 39860 INFO _validate_X_layers_has_unique_coords [exit, 0.01s]: homo_sapiens, normalized, rows [0, 96000)\n", - "2024-06-07 15:33:30 39860 DEBUG _validate_X_layers_has_unique_coords [enter]: homo_sapiens, raw, rows [0, 96000)\n", - "2024-06-07 15:33:32 39860 INFO _validate_X_layers_has_unique_coords [exit, 2.13s]: homo_sapiens, raw, rows [0, 96000)\n", - "2024-06-07 15:33:32 39860 DEBUG _validate_X_layers_has_unique_coords [enter]: homo_sapiens, normalized, rows [0, 96000)\n", - "2024-06-07 15:33:34 39860 INFO _validate_X_layers_has_unique_coords [exit, 1.92s]: homo_sapiens, normalized, rows [0, 96000)\n", - "2024-06-07 15:33:34 39783 DEBUG validate_consolidation [enter]\n", - "2024-06-07 15:33:34 39783 INFO validate_consolidation [exit, 0.16s]\n", - "2024-06-07 15:33:34 39783 INFO Validation & consolidation complete.\n", - "2024-06-07 15:33:35,857 - distributed.scheduler - WARNING - Removing worker 'tcp://127.0.0.1:49198' caused the cluster to lose already computed task(s), which will be recomputed elsewhere: {'assert_all-02830fee-7826-4829-b307-f07daafc4098'} (stimulus_id='handle-worker-cleanup-1717799615.857165')\n", - "2024-06-07 15:33:36 39783 INFO Dask cluster shut down\n", - "2024-06-07 15:33:36 39783 INFO Fini\n" + "2024-06-07 16:23:00 45577 DEBUG Setting NUMEXPR_MAX_THREADS environment variable to \"5\"\n", + "2024-06-07 16:23:00 45577 DEBUG Setting OMP_NUM_THREADS environment variable to \"1\"\n", + "2024-06-07 16:23:00 45577 DEBUG Setting OPENBLAS_NUM_THREADS environment variable to \"1\"\n", + "2024-06-07 16:23:00 45577 DEBUG Setting MKL_NUM_THREADS environment variable to \"1\"\n", + "2024-06-07 16:23:00 45577 DEBUG Setting VECLIB_MAXIMUM_THREADS environment variable to \"1\"\n", + "2024-06-07 16:23:00 45577 INFO CensusBuildArgs(working_dir=PosixPath('/Users/psridharan/code/cellxgene-census/ps_stuff/census-builds'), config=CensusBuildConfig(verbose=1, dashboard=True, log_dir='logs', log_file='build.log', reports_dir='reports', consolidate=True, dryrun=False, cellxgene_census_S3_path='s3://cellxgene-data-public/cell-census', cellxgene_census_default_mirror_S3_path='s3://cellxgene-census-public-us-west-2/cell-census', cellxgene_census_S3_replica_path=None, logs_S3_path='s3://cellxgene-data-public-logs/builder', build_tag='test-spatial-build', max_worker_processes=48, host_validation_disable=False, host_validation_min_physical_memory=549755813888, host_validation_min_swap_memory=2199023255552, host_validation_min_free_disk_space=1979120929996, release_cleanup_days=32, dataset_id_blocklist_uri='https://raw.githubusercontent.com/chanzuckerberg/cellxgene-census/main/tools/cellxgene_census_builder/dataset_blocklist.txt', user_agent_prefix='census-builder-', user_agent_environment='unknown', manifest=<_io.TextIOWrapper name='/Users/psridharan/code/cellxgene-census/tools/cellxgene_census_builder/spatial_dev_tools/manifest.csv' mode='r' encoding='UTF-8'>, test_first_n=0), state=CensusBuildState())\n", + "2024-06-07 16:23:00 45577 INFO System memory: mem-used=14976499712 (43.6%), max-mem-used=14976499712 (43.6%), mem-total=34359738368 load-avg=(5.61, 4.8, 4.98)\n", + "2024-06-07 16:23:00 45577 INFO Starting process resource logger with period 15.0\n", + "2024-06-07 16:23:03 45577 INFO Dask client created: \n", + "2024-06-07 16:23:03 45577 INFO Dask client using cluster: LocalCluster(a6ad3214, 'tcp://127.0.0.1:51063', workers=10, threads=10)\n", + "2024-06-07 16:23:03 45577 INFO Dashboard link: http://127.0.0.1:8787/status\n", + "2024-06-07 16:23:03 45577 INFO Build step 1 - get source assets - started\n", + "2024-06-07 16:23:03 45577 INFO Loading manifest from file\n", + "2024-06-07 16:23:03 45577 INFO Dataset blocklist found, containing 5 ids.\n", + "2024-06-07 16:23:03 45577 INFO After blocklist and dedup, will load 8 datasets.\n", + "2024-06-07 16:23:03 45577 INFO Starting asset staging to /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads\n", + "2024-06-07 16:23:03 45584 DEBUG Copy complete, url=/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/c6f6e674-b59d-46cf-8525-73f64f9eef8c.h5ad, bytes=49260402\n", + "2024-06-07 16:23:03 45580 DEBUG Copy complete, url=/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/6ab91271-5f48-4e98-92ef-d02ee21e63e1.h5ad, bytes=28385363\n", + "2024-06-07 16:23:03 45589 DEBUG Copy complete, url=/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/9624a105-319c-4abf-b10b-d96ce1650100.h5ad, bytes=126790774\n", + "2024-06-07 16:23:04 45585 DEBUG Copy complete, url=/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/53e343af-979c-4525-a705-1b9d1a1fee14.h5ad, bytes=147321916\n", + "2024-06-07 16:23:04 45582 DEBUG Copy complete, url=/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/07998bf8-d070-41bb-a584-f8bdd1193aef.h5ad, bytes=637135389\n", + "2024-06-07 16:23:04 45581 DEBUG Copy complete, url=/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/c63d5cb4-1046-4948-a188-e6af50ef90f4.h5ad, bytes=42809187\n", + "2024-06-07 16:23:04 45587 DEBUG Copy complete, url=/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/fa3893cb-d420-42ac-8263-09719a26102e.h5ad, bytes=625411595\n", + "2024-06-07 16:23:04 45586 DEBUG Copy complete, url=/Users/psridharan/code/cellxgene-census/ps_stuff/spatial_test_datasets/1bb92cf8-ab3f-4bb0-a722-b241b5d377ed.h5ad, bytes=1112747498\n", + "2024-06-07 16:23:04 45577 INFO Build step 1 - get source assets - finished\n", + "2024-06-07 16:23:04 45577 INFO Build step 2 - Create root collection - started\n", + "2024-06-07 16:23:04 45577 INFO homo_sapiens: create experiment at /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/homo_sapiens\n", + "2024-06-07 16:23:04 45577 INFO mus_musculus: create experiment at /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/mus_musculus\n", + "2024-06-07 16:23:04 45577 INFO homo_sapiens: create experiment at /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens\n", + "2024-06-07 16:23:04 45577 INFO mus_musculus: create experiment at /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/mus_musculus\n", + "2024-06-07 16:23:04 45577 INFO Build step 2 - Create root collection - finished\n", + "2024-06-07 16:23:04 45577 INFO Build step 3 - accumulate obs and var axes - started\n", + "2024-06-07 16:23:04 45588 DEBUG c63d5cb4-1046-4948-a188-e6af50ef90f4/homo_sapiens - found 4992 cells\n", + "2024-06-07 16:23:04 45581 DEBUG 6ab91271-5f48-4e98-92ef-d02ee21e63e1/homo_sapiens - found 4992 cells\n", + "2024-06-07 16:23:04 45582 DEBUG 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed/homo_sapiens - found 4992 cells\n", + "2024-06-07 16:23:04 45586 DEBUG 53e343af-979c-4525-a705-1b9d1a1fee14/homo_sapiens - found 4992 cells\n", + "2024-06-07 16:23:04 45580 DEBUG 07998bf8-d070-41bb-a584-f8bdd1193aef/homo_sapiens - found 4992 cells\n", + "2024-06-07 16:23:04 45589 DEBUG 9624a105-319c-4abf-b10b-d96ce1650100/homo_sapiens - found 4992 cells\n", + "2024-06-07 16:23:04 45587 DEBUG c6f6e674-b59d-46cf-8525-73f64f9eef8c/homo_sapiens - found 4992 cells\n", + "2024-06-07 16:23:04 45584 DEBUG fa3893cb-d420-42ac-8263-09719a26102e/homo_sapiens - found 4992 cells\n", + "2024-06-07 16:23:04 45588 DEBUG c63d5cb4-1046-4948-a188-e6af50ef90f4/homo_sapiens - found 0 cells\n", + "2024-06-07 16:23:04 45588 DEBUG homo_sapiens - H5AD has no data after filtering, skipping c63d5cb4-1046-4948-a188-e6af50ef90f4\n", + "2024-06-07 16:23:04 45581 DEBUG 6ab91271-5f48-4e98-92ef-d02ee21e63e1/mus_musculus - found 0 cells\n", + "2024-06-07 16:23:04 45581 DEBUG mus_musculus - H5AD has no data after filtering, skipping 6ab91271-5f48-4e98-92ef-d02ee21e63e1\n", + "2024-06-07 16:23:04 45586 DEBUG 53e343af-979c-4525-a705-1b9d1a1fee14/mus_musculus - found 0 cells\n", + "2024-06-07 16:23:04 45586 DEBUG mus_musculus - H5AD has no data after filtering, skipping 53e343af-979c-4525-a705-1b9d1a1fee14\n", + "2024-06-07 16:23:04 45580 DEBUG 07998bf8-d070-41bb-a584-f8bdd1193aef/mus_musculus - found 0 cells\n", + "2024-06-07 16:23:04 45587 DEBUG c6f6e674-b59d-46cf-8525-73f64f9eef8c/homo_sapiens - found 0 cells\n", + "2024-06-07 16:23:04 45580 DEBUG mus_musculus - H5AD has no data after filtering, skipping 07998bf8-d070-41bb-a584-f8bdd1193aef\n", + "2024-06-07 16:23:04 45582 DEBUG 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed/homo_sapiens - found 0 cells\n", + "2024-06-07 16:23:04 45582 DEBUG homo_sapiens - H5AD has no data after filtering, skipping 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed\n", + "2024-06-07 16:23:04 45587 DEBUG homo_sapiens - H5AD has no data after filtering, skipping c6f6e674-b59d-46cf-8525-73f64f9eef8c\n", + "2024-06-07 16:23:04 45584 DEBUG fa3893cb-d420-42ac-8263-09719a26102e/mus_musculus - found 0 cells\n", + "2024-06-07 16:23:04 45584 DEBUG mus_musculus - H5AD has no data after filtering, skipping fa3893cb-d420-42ac-8263-09719a26102e\n", + "2024-06-07 16:23:04 45589 DEBUG 9624a105-319c-4abf-b10b-d96ce1650100/mus_musculus - found 0 cells\n", + "2024-06-07 16:23:04 45589 DEBUG mus_musculus - H5AD has no data after filtering, skipping 9624a105-319c-4abf-b10b-d96ce1650100\n", + "2024-06-07 16:23:04 45588 DEBUG c63d5cb4-1046-4948-a188-e6af50ef90f4/mus_musculus - found 0 cells\n", + "2024-06-07 16:23:04 45588 DEBUG mus_musculus - H5AD has no data after filtering, skipping c63d5cb4-1046-4948-a188-e6af50ef90f4\n", + "2024-06-07 16:23:04 45581 DEBUG 6ab91271-5f48-4e98-92ef-d02ee21e63e1/homo_sapiens - found 0 cells\n", + "2024-06-07 16:23:04 45581 DEBUG homo_sapiens - H5AD has no data after filtering, skipping 6ab91271-5f48-4e98-92ef-d02ee21e63e1\n", + "2024-06-07 16:23:04 45586 DEBUG 53e343af-979c-4525-a705-1b9d1a1fee14/homo_sapiens - found 0 cells\n", + "2024-06-07 16:23:04 45586 DEBUG homo_sapiens - H5AD has no data after filtering, skipping 53e343af-979c-4525-a705-1b9d1a1fee14\n", + "2024-06-07 16:23:04 45580 DEBUG 07998bf8-d070-41bb-a584-f8bdd1193aef/mus_musculus - found 0 cells\n", + "2024-06-07 16:23:04 45580 DEBUG mus_musculus - H5AD has no data after filtering, skipping 07998bf8-d070-41bb-a584-f8bdd1193aef\n", + "2024-06-07 16:23:04 45582 DEBUG 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed/mus_musculus - found 0 cells\n", + "2024-06-07 16:23:04 45582 DEBUG mus_musculus - H5AD has no data after filtering, skipping 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed\n", + "2024-06-07 16:23:04 45587 DEBUG c6f6e674-b59d-46cf-8525-73f64f9eef8c/mus_musculus - found 0 cells\n", + "2024-06-07 16:23:04 45587 DEBUG mus_musculus - H5AD has no data after filtering, skipping c6f6e674-b59d-46cf-8525-73f64f9eef8c\n", + "2024-06-07 16:23:04 45584 DEBUG fa3893cb-d420-42ac-8263-09719a26102e/mus_musculus - found 0 cells\n", + "2024-06-07 16:23:04 45584 DEBUG mus_musculus - H5AD has no data after filtering, skipping fa3893cb-d420-42ac-8263-09719a26102e\n", + "2024-06-07 16:23:04 45589 DEBUG 9624a105-319c-4abf-b10b-d96ce1650100/mus_musculus - found 0 cells\n", + "2024-06-07 16:23:04 45589 DEBUG mus_musculus - H5AD has no data after filtering, skipping 9624a105-319c-4abf-b10b-d96ce1650100\n", + "2024-06-07 16:23:05 45588 DEBUG c63d5cb4-1046-4948-a188-e6af50ef90f4/mus_musculus - found 0 cells\n", + "2024-06-07 16:23:05 45588 DEBUG mus_musculus - H5AD has no data after filtering, skipping c63d5cb4-1046-4948-a188-e6af50ef90f4\n", + "2024-06-07 16:23:05 45581 DEBUG 6ab91271-5f48-4e98-92ef-d02ee21e63e1/mus_musculus - found 0 cells\n", + "2024-06-07 16:23:05 45581 DEBUG mus_musculus - H5AD has no data after filtering, skipping 6ab91271-5f48-4e98-92ef-d02ee21e63e1\n", + "2024-06-07 16:23:05 45586 DEBUG 53e343af-979c-4525-a705-1b9d1a1fee14/mus_musculus - found 0 cells\n", + "2024-06-07 16:23:05 45586 DEBUG mus_musculus - H5AD has no data after filtering, skipping 53e343af-979c-4525-a705-1b9d1a1fee14\n", + "2024-06-07 16:23:05 45580 DEBUG 07998bf8-d070-41bb-a584-f8bdd1193aef/homo_sapiens - found 0 cells\n", + "2024-06-07 16:23:05 45580 DEBUG homo_sapiens - H5AD has no data after filtering, skipping 07998bf8-d070-41bb-a584-f8bdd1193aef\n", + "2024-06-07 16:23:05 45582 DEBUG 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed/mus_musculus - found 0 cells\n", + "2024-06-07 16:23:05 45582 DEBUG mus_musculus - H5AD has no data after filtering, skipping 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed\n", + "2024-06-07 16:23:05 45587 DEBUG c6f6e674-b59d-46cf-8525-73f64f9eef8c/mus_musculus - found 0 cells\n", + "2024-06-07 16:23:05 45587 DEBUG mus_musculus - H5AD has no data after filtering, skipping c6f6e674-b59d-46cf-8525-73f64f9eef8c\n", + "2024-06-07 16:23:05 45589 DEBUG 9624a105-319c-4abf-b10b-d96ce1650100/homo_sapiens - found 0 cells\n", + "2024-06-07 16:23:05 45589 DEBUG homo_sapiens - H5AD has no data after filtering, skipping 9624a105-319c-4abf-b10b-d96ce1650100\n", + "2024-06-07 16:23:05 45584 DEBUG fa3893cb-d420-42ac-8263-09719a26102e/homo_sapiens - found 0 cells\n", + "2024-06-07 16:23:05 45584 DEBUG homo_sapiens - H5AD has no data after filtering, skipping fa3893cb-d420-42ac-8263-09719a26102e\n", + "2024-06-07 16:23:05 45577 INFO Build step 3 - axis accumulation complete\n", + "2024-06-07 16:23:05 45577 INFO Build step 3 - accumulate obs and var axes - finished\n", + "2024-06-07 16:23:05 45577 INFO Scaling cluster to 1 workers.\n", + "2024-06-07 16:23:05 45577 INFO Build step 4 - Populate X layers - started\n", + "2024-06-07 16:23:05 45577 INFO homo_sapiens: create X layers\n", + "2024-06-07 16:23:05 45577 INFO mus_musculus: create X layers\n", + "2024-06-07 16:23:05 45577 INFO homo_sapiens: create X layers\n", + "2024-06-07 16:23:05 45577 INFO mus_musculus: create X layers\n", + "2024-06-07 16:23:05 45580 DEBUG dispatch_X_chunk [enter]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/1bb92cf8-ab3f-4bb0-a722-b241b5d377ed.h5ad, 0\n", + "2024-06-07 16:23:05 45580 INFO processing X /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/1bb92cf8-ab3f-4bb0-a722-b241b5d377ed.h5ad, 0, chunk 0 of 1\n", + "2024-06-07 16:23:10 45580 INFO dispatch_X_chunk [exit, 5.05s]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/1bb92cf8-ab3f-4bb0-a722-b241b5d377ed.h5ad, 0\n", + "2024-06-07 16:23:10 45580 DEBUG dispatch_X_chunk [enter]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/c63d5cb4-1046-4948-a188-e6af50ef90f4.h5ad, 0\n", + "2024-06-07 16:23:10 45580 INFO processing X /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/c63d5cb4-1046-4948-a188-e6af50ef90f4.h5ad, 0, chunk 0 of 1\n", + "2024-06-07 16:23:15 45580 INFO dispatch_X_chunk [exit, 4.46s]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/c63d5cb4-1046-4948-a188-e6af50ef90f4.h5ad, 0\n", + "2024-06-07 16:23:15 45580 DEBUG dispatch_X_chunk [enter]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/6ab91271-5f48-4e98-92ef-d02ee21e63e1.h5ad, 0\n", + "2024-06-07 16:23:15 45580 INFO processing X /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/6ab91271-5f48-4e98-92ef-d02ee21e63e1.h5ad, 0, chunk 0 of 1\n", + "2024-06-07 16:23:15 45577 INFO System memory: mem-used=17120067584 (49.8%), max-mem-used=17120067584 (49.8%), mem-total=34359738368 load-avg=(5.74, 4.89, 5.01)\n", + "2024-06-07 16:23:18 45580 INFO dispatch_X_chunk [exit, 3.00s]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/6ab91271-5f48-4e98-92ef-d02ee21e63e1.h5ad, 0\n", + "2024-06-07 16:23:18 45580 DEBUG dispatch_X_chunk [enter]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/53e343af-979c-4525-a705-1b9d1a1fee14.h5ad, 0\n", + "2024-06-07 16:23:18 45580 INFO processing X /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/53e343af-979c-4525-a705-1b9d1a1fee14.h5ad, 0, chunk 0 of 1\n", + "2024-06-07 16:23:22 45580 INFO dispatch_X_chunk [exit, 3.58s]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/53e343af-979c-4525-a705-1b9d1a1fee14.h5ad, 0\n", + "2024-06-07 16:23:22 45580 DEBUG dispatch_X_chunk [enter]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/9624a105-319c-4abf-b10b-d96ce1650100.h5ad, 0\n", + "2024-06-07 16:23:22 45580 INFO processing X /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/9624a105-319c-4abf-b10b-d96ce1650100.h5ad, 0, chunk 0 of 1\n", + "2024-06-07 16:23:23 45580 INFO dispatch_X_chunk [exit, 1.89s]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/9624a105-319c-4abf-b10b-d96ce1650100.h5ad, 0\n", + "2024-06-07 16:23:23 45580 DEBUG dispatch_X_chunk [enter]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/07998bf8-d070-41bb-a584-f8bdd1193aef.h5ad, 0\n", + "2024-06-07 16:23:24 45580 INFO processing X /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/07998bf8-d070-41bb-a584-f8bdd1193aef.h5ad, 0, chunk 0 of 1\n", + "2024-06-07 16:23:27 45580 INFO dispatch_X_chunk [exit, 3.04s]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/07998bf8-d070-41bb-a584-f8bdd1193aef.h5ad, 0\n", + "2024-06-07 16:23:27 45580 DEBUG dispatch_X_chunk [enter]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/fa3893cb-d420-42ac-8263-09719a26102e.h5ad, 0\n", + "2024-06-07 16:23:27 45580 INFO processing X /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/fa3893cb-d420-42ac-8263-09719a26102e.h5ad, 0, chunk 0 of 1\n", + "2024-06-07 16:23:30 45580 INFO dispatch_X_chunk [exit, 3.47s]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/fa3893cb-d420-42ac-8263-09719a26102e.h5ad, 0\n", + "2024-06-07 16:23:30 45580 DEBUG dispatch_X_chunk [enter]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/c6f6e674-b59d-46cf-8525-73f64f9eef8c.h5ad, 0\n", + "2024-06-07 16:23:30 45580 INFO processing X /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/c6f6e674-b59d-46cf-8525-73f64f9eef8c.h5ad, 0, chunk 0 of 1\n", + "2024-06-07 16:23:30 45577 INFO System memory: mem-used=17663524864 (51.4%), max-mem-used=17663524864 (51.4%), mem-total=34359738368 load-avg=(5.93, 4.97, 5.04)\n", + "2024-06-07 16:23:34 45580 INFO dispatch_X_chunk [exit, 3.54s]: /Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/h5ads/c6f6e674-b59d-46cf-8525-73f64f9eef8c.h5ad, 0\n", + "2024-06-07 16:23:34 45577 INFO Save presence matrix for homo_sapiens - start\n", + "2024-06-07 16:23:34 45577 INFO Save presence matrix for homo_sapiens - finish\n", + "2024-06-07 16:23:34 45577 INFO Save presence matrix for mus_musculus - start\n", + "2024-06-07 16:23:34 45577 INFO Save presence matrix for mus_musculus - finish\n", + "2024-06-07 16:23:34 45577 INFO Save presence matrix for homo_sapiens - start\n", + "2024-06-07 16:23:34 45577 INFO Save presence matrix for homo_sapiens - finish\n", + "2024-06-07 16:23:34 45577 INFO Save presence matrix for mus_musculus - start\n", + "2024-06-07 16:23:34 45577 INFO Save presence matrix for mus_musculus - finish\n", + "2024-06-07 16:23:34 45577 INFO Build step 4 - Populate X layers - finished\n", + "2024-06-07 16:23:34 45577 INFO Build step 5 - Save axis and summary info - started\n", + "2024-06-07 16:23:34 45577 INFO homo_sapiens: writing obs dataframe\n", + "2024-06-07 16:23:34 45577 INFO homo_sapiens: empty obs dataframe\n", + "2024-06-07 16:23:34 45577 INFO homo_sapiens: writing var dataframe\n", + "2024-06-07 16:23:34 45577 INFO homo_sapiens: empty var dataframe\n", + "2024-06-07 16:23:34 45577 INFO mus_musculus: writing obs dataframe\n", + "2024-06-07 16:23:34 45577 INFO mus_musculus: empty obs dataframe\n", + "2024-06-07 16:23:34 45577 INFO mus_musculus: writing var dataframe\n", + "2024-06-07 16:23:34 45577 INFO mus_musculus: empty var dataframe\n", + "2024-06-07 16:23:34 45577 INFO homo_sapiens: writing obs dataframe\n", + "2024-06-07 16:23:34 45577 DEBUG experiment homo_sapiens obs = (39936, 30)\n", + "2024-06-07 16:23:34 45577 INFO homo_sapiens: writing var dataframe\n", + "2024-06-07 16:23:34 45577 DEBUG experiment homo_sapiens var = (36972, 6)\n", + "2024-06-07 16:23:34 45577 INFO mus_musculus: writing obs dataframe\n", + "2024-06-07 16:23:34 45577 INFO mus_musculus: empty obs dataframe\n", + "2024-06-07 16:23:34 45577 INFO mus_musculus: writing var dataframe\n", + "2024-06-07 16:23:34 45577 INFO mus_musculus: empty var dataframe\n", + "2024-06-07 16:23:34 45577 INFO Creating dataset_manifest\n", + "2024-06-07 16:23:34 45577 INFO Creating census_summary_cell_counts\n", + "2024-06-07 16:23:34 45577 INFO Creating census summary\n", + "2024-06-07 16:23:34 45577 INFO Create census organisms dataframe\n", + "2024-06-07 16:23:34 45577 INFO Build step 5 - Save axis and summary info - finished\n", + "2024-06-07 16:23:34 45577 INFO tiledb_soma_1969_work_around: deleting bounding box from file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms/RNA/feature_dataset_presence_matrix\n", + "2024-06-07 16:23:34 45577 INFO tiledb_soma_1969_work_around: deleting bounding box from file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms/RNA/X/raw\n", + "2024-06-07 16:23:34 45577 INFO tiledb_soma_1969_work_around: deleting bounding box from file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms/RNA/X/normalized\n", + "2024-06-07 16:23:34 45577 INFO Scaling cluster to 10 workers.\n", + "2024-06-07 16:23:34 45577 INFO Consolidate: found 36 TileDB objects to consolidate\n", + "2024-06-07 16:23:34 45577 INFO Consolidate: 36 consolidation jobs queued\n", + "2024-06-07 16:23:34 45580 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_info/datasets\n", + "2024-06-07 16:23:34 45580 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_info/datasets\n", + "2024-06-07 16:23:35 45580 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_info/summary_cell_counts\n", + "2024-06-07 16:23:35 45580 INFO Consolidate[vacuum=True] finish, 0.05 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_info/summary_cell_counts\n", + "2024-06-07 16:23:35 45580 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_info/summary\n", + "2024-06-07 16:23:35 45580 INFO Consolidate[vacuum=True] finish, 0.04 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_info/summary\n", + "2024-06-07 16:23:35 45580 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_info/organisms\n", + "2024-06-07 16:23:35 45580 INFO Consolidate[vacuum=True] finish, 0.04 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_info/organisms\n", + "2024-06-07 16:23:35 45580 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_info\n", + "2024-06-07 16:23:35 45580 INFO Consolidate[vacuum=True] finish, 0.00 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_info\n", + "2024-06-07 16:23:35 45580 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/homo_sapiens/obs\n", + "2024-06-07 16:23:35 45580 INFO Consolidate[vacuum=True] finish, 0.02 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/homo_sapiens/obs\n", + "2024-06-07 16:23:35 45580 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/homo_sapiens/ms/RNA/var\n", + "2024-06-07 16:23:35 45580 INFO Consolidate[vacuum=True] finish, 0.07 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/homo_sapiens/ms/RNA/var\n", + "2024-06-07 16:23:35 45580 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/homo_sapiens/ms/RNA/X\n", + "2024-06-07 16:23:35 45580 INFO Consolidate[vacuum=True] finish, 0.02 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/homo_sapiens/ms/RNA/X\n", + "2024-06-07 16:23:35 45580 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/homo_sapiens/ms/RNA\n", + "2024-06-07 16:23:35 45580 INFO Consolidate[vacuum=True] finish, 0.02 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/homo_sapiens/ms/RNA\n", + "2024-06-07 16:23:35 45580 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/homo_sapiens/ms\n", + "2024-06-07 16:23:35 45580 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/homo_sapiens/ms\n", + "2024-06-07 16:23:35 45580 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/homo_sapiens\n", + "2024-06-07 16:23:35 45580 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/homo_sapiens\n", + "2024-06-07 16:23:35 45580 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/mus_musculus/obs\n", + "2024-06-07 16:23:35 45580 INFO Consolidate[vacuum=True] finish, 0.02 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/mus_musculus/obs\n", + "2024-06-07 16:23:35 45580 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/mus_musculus/ms/RNA/var\n", + "2024-06-07 16:23:35 45580 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/mus_musculus/ms/RNA/var\n", + "2024-06-07 16:23:35 45580 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/mus_musculus/ms/RNA/X\n", + "2024-06-07 16:23:35 45580 INFO Consolidate[vacuum=True] finish, 0.00 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/mus_musculus/ms/RNA/X\n", + "2024-06-07 16:23:35 45580 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/mus_musculus/ms/RNA\n", + "2024-06-07 16:23:35 45580 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/mus_musculus/ms/RNA\n", + "2024-06-07 16:23:35 45580 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/mus_musculus/ms\n", + "2024-06-07 16:23:35 45580 INFO Consolidate[vacuum=True] finish, 0.00 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/mus_musculus/ms\n", + "2024-06-07 16:23:35 45580 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/mus_musculus\n", + "2024-06-07 16:23:35 45580 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data/mus_musculus\n", + "2024-06-07 16:23:35 45580 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data\n", + "2024-06-07 16:23:35 45580 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_data\n", + "2024-06-07 16:23:35 45580 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/obs\n", + "2024-06-07 16:23:35 45580 INFO Consolidate[vacuum=True] finish, 0.12 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/obs\n", + "2024-06-07 16:23:35 45580 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms/RNA/var\n", + "2024-06-07 16:23:35 45580 INFO Consolidate[vacuum=True] finish, 0.02 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms/RNA/var\n", + "2024-06-07 16:23:35 45580 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms/RNA/feature_dataset_presence_matrix\n", + "2024-06-07 16:23:35 45580 INFO Consolidate[vacuum=True] finish, 0.04 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms/RNA/feature_dataset_presence_matrix\n", + "2024-06-07 16:23:35 45580 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms/RNA/X/raw\n", + "2024-06-07 16:23:36 45648 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms/RNA/X\n", + "2024-06-07 16:23:36 45648 INFO Consolidate[vacuum=True] finish, 0.03 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms/RNA/X\n", + "2024-06-07 16:23:36 45648 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms/RNA\n", + "2024-06-07 16:23:36 45648 INFO Consolidate[vacuum=True] finish, 0.03 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms/RNA\n", + "2024-06-07 16:23:36 45648 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms\n", + "2024-06-07 16:23:36 45647 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/spatial\n", + "2024-06-07 16:23:37 45648 INFO Consolidate[vacuum=True] finish, 0.02 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms\n", + "2024-06-07 16:23:37 45648 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/mus_musculus/obs\n", + "2024-06-07 16:23:37 45649 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/mus_musculus/ms/RNA/var\n", + "2024-06-07 16:23:37 45647 INFO Consolidate[vacuum=True] finish, 0.02 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/spatial\n", + "2024-06-07 16:23:37 45648 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/mus_musculus/obs\n", + "2024-06-07 16:23:37 45648 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/mus_musculus/ms/RNA\n", + "2024-06-07 16:23:37 45647 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens\n", + "2024-06-07 16:23:37 45649 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/mus_musculus/ms/RNA/var\n", + "2024-06-07 16:23:37 45647 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens\n", + "2024-06-07 16:23:37 45649 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/mus_musculus/ms/RNA/X\n", + "2024-06-07 16:23:37 45648 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/mus_musculus/ms/RNA\n", + "2024-06-07 16:23:37 45647 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/mus_musculus/spatial\n", + "2024-06-07 16:23:37 45649 INFO Consolidate[vacuum=True] finish, 0.00 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/mus_musculus/ms/RNA/X\n", + "2024-06-07 16:23:37 45648 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/mus_musculus/ms\n", + "2024-06-07 16:23:37 45649 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/mus_musculus\n", + "2024-06-07 16:23:37 45648 INFO Consolidate[vacuum=True] finish, 0.00 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/mus_musculus/ms\n", + "2024-06-07 16:23:37 45647 INFO Consolidate[vacuum=True] finish, 0.01 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/mus_musculus/spatial\n", + "2024-06-07 16:23:37 45647 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial\n", + "2024-06-07 16:23:37 45649 INFO Consolidate[vacuum=True] finish, 0.02 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/mus_musculus\n", + "2024-06-07 16:23:37 45647 INFO Consolidate[vacuum=True] finish, 0.02 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial\n", + "2024-06-07 16:23:37 45653 INFO Consolidate[vacuum=True] start, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms/RNA/X/normalized\n", + "2024-06-07 16:23:39 45580 INFO Consolidate[vacuum=True] finish, 3.65 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms/RNA/X/raw\n", + "2024-06-07 16:23:40 45653 INFO Consolidate[vacuum=True] finish, 3.49 seconds, uri=file:///Users/psridharan/code/cellxgene-census/ps_stuff/census-builds/test-spatial-build/soma/census_spatial/homo_sapiens/ms/RNA/X/normalized\n", + "2024-06-07 16:23:40 45577 INFO Validation of SOMA objects - start\n", + "2024-06-07 16:23:40 45577 DEBUG validate_directory_structure [enter]\n", + "2024-06-07 16:23:40 45577 INFO validate_directory_structure [exit, 0.00s]\n", + "2024-06-07 16:23:40 45577 DEBUG validate_relative_path [enter]\n", + "2024-06-07 16:23:41 45577 INFO validate_relative_path [exit, 0.03s]\n", + "2024-06-07 16:23:41 45577 DEBUG validate_axis_dataframes_schema [enter]\n", + "2024-06-07 16:23:41 45577 INFO validate_axis_dataframes_schema [exit, 0.03s]\n", + "2024-06-07 16:23:41 45577 DEBUG validate_manifest_contents [enter]\n", + "2024-06-07 16:23:41 45577 INFO validate_manifest_contents [exit, 0.00s]\n", + "2024-06-07 16:23:41 45648 DEBUG _validate_X_layers_raw_contents [enter]: 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed\n", + "2024-06-07 16:23:41 45647 DEBUG _validate_X_layers_raw_contents [enter]: 53e343af-979c-4525-a705-1b9d1a1fee14\n", + "2024-06-07 16:23:41 45653 DEBUG _validate_X_layers_raw_contents [enter]: fa3893cb-d420-42ac-8263-09719a26102e\n", + "2024-06-07 16:23:41 45652 DEBUG _validate_X_layers_raw_contents [enter]: 6ab91271-5f48-4e98-92ef-d02ee21e63e1\n", + "2024-06-07 16:23:41 45654 DEBUG _validate_X_layers_raw_contents [enter]: 07998bf8-d070-41bb-a584-f8bdd1193aef\n", + "2024-06-07 16:23:41 45644 DEBUG _validate_X_layers_raw_contents [enter]: c63d5cb4-1046-4948-a188-e6af50ef90f4\n", + "2024-06-07 16:23:41 45651 DEBUG _validate_X_layers_raw_contents [enter]: 9624a105-319c-4abf-b10b-d96ce1650100\n", + "2024-06-07 16:23:41 45645 DEBUG _validate_X_layers_raw_contents [enter]: c6f6e674-b59d-46cf-8525-73f64f9eef8c\n", + "2024-06-07 16:23:41 45580 DEBUG _validate_X_layers_presence_general [enter]\n", + "2024-06-07 16:23:41 45649 DEBUG _validate_axis_dataframes [enter]: c6f6e674-b59d-46cf-8525-73f64f9eef8c\n", + "2024-06-07 16:23:41 45648 INFO _validate_X_layers_raw_contents [exit, 0.08s]: 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed\n", + "2024-06-07 16:23:41 45647 INFO _validate_X_layers_raw_contents [exit, 0.08s]: 53e343af-979c-4525-a705-1b9d1a1fee14\n", + "2024-06-07 16:23:41 45644 INFO _validate_X_layers_raw_contents [exit, 0.09s]: c63d5cb4-1046-4948-a188-e6af50ef90f4\n", + "2024-06-07 16:23:41 45654 INFO _validate_X_layers_raw_contents [exit, 0.10s]: 07998bf8-d070-41bb-a584-f8bdd1193aef\n", + "2024-06-07 16:23:41 45648 DEBUG validate_internal_consistency [enter]\n", + "2024-06-07 16:23:41 45653 INFO _validate_X_layers_raw_contents [exit, 0.11s]: fa3893cb-d420-42ac-8263-09719a26102e\n", + "2024-06-07 16:23:41 45651 INFO _validate_X_layers_raw_contents [exit, 0.11s]: 9624a105-319c-4abf-b10b-d96ce1650100\n", + "2024-06-07 16:23:41 45580 INFO _validate_X_layers_presence_general [exit, 0.10s]\n", + "2024-06-07 16:23:41 45652 INFO _validate_X_layers_raw_contents [exit, 0.12s]: 6ab91271-5f48-4e98-92ef-d02ee21e63e1\n", + "2024-06-07 16:23:41 45645 INFO _validate_X_layers_raw_contents [exit, 0.12s]: c6f6e674-b59d-46cf-8525-73f64f9eef8c\n", + "2024-06-07 16:23:41 45648 INFO validate_internal_consistency [exit, 0.15s]\n", + "2024-06-07 16:23:41 45649 INFO _validate_axis_dataframes [exit, 0.37s]: c6f6e674-b59d-46cf-8525-73f64f9eef8c\n", + "2024-06-07 16:23:41 45649 DEBUG _validate_axis_dataframes [enter]: fa3893cb-d420-42ac-8263-09719a26102e\n", + "2024-06-07 16:23:41 45649 INFO _validate_axis_dataframes [exit, 0.29s]: fa3893cb-d420-42ac-8263-09719a26102e\n", + "2024-06-07 16:23:41 45649 DEBUG _validate_axis_dataframes [enter]: 07998bf8-d070-41bb-a584-f8bdd1193aef\n", + "2024-06-07 16:23:42 45649 INFO _validate_axis_dataframes [exit, 0.29s]: 07998bf8-d070-41bb-a584-f8bdd1193aef\n", + "2024-06-07 16:23:42 45649 DEBUG _validate_axis_dataframes [enter]: 9624a105-319c-4abf-b10b-d96ce1650100\n", + "2024-06-07 16:23:42 45649 INFO _validate_axis_dataframes [exit, 0.29s]: 9624a105-319c-4abf-b10b-d96ce1650100\n", + "2024-06-07 16:23:42 45649 DEBUG _validate_axis_dataframes [enter]: 53e343af-979c-4525-a705-1b9d1a1fee14\n", + "2024-06-07 16:23:42 45649 INFO _validate_axis_dataframes [exit, 0.29s]: 53e343af-979c-4525-a705-1b9d1a1fee14\n", + "2024-06-07 16:23:42 45649 DEBUG _validate_axis_dataframes [enter]: 6ab91271-5f48-4e98-92ef-d02ee21e63e1\n", + "2024-06-07 16:23:43 45649 INFO _validate_axis_dataframes [exit, 0.27s]: 6ab91271-5f48-4e98-92ef-d02ee21e63e1\n", + "2024-06-07 16:23:43 45649 DEBUG _validate_axis_dataframes [enter]: c63d5cb4-1046-4948-a188-e6af50ef90f4\n", + "2024-06-07 16:23:43 45649 INFO _validate_axis_dataframes [exit, 0.27s]: c63d5cb4-1046-4948-a188-e6af50ef90f4\n", + "2024-06-07 16:23:43 45649 DEBUG _validate_axis_dataframes [enter]: 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed\n", + "2024-06-07 16:23:43 45649 INFO _validate_axis_dataframes [exit, 0.30s]: 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed\n", + "2024-06-07 16:23:43 45649 DEBUG _validate_X_layers_presence [enter]: c6f6e674-b59d-46cf-8525-73f64f9eef8c\n", + "2024-06-07 16:23:43 45649 INFO _validate_X_layers_presence [exit, 0.05s]: c6f6e674-b59d-46cf-8525-73f64f9eef8c\n", + "2024-06-07 16:23:43 45649 DEBUG _validate_X_layers_presence [enter]: fa3893cb-d420-42ac-8263-09719a26102e\n", + "2024-06-07 16:23:43 45649 INFO _validate_X_layers_presence [exit, 0.07s]: fa3893cb-d420-42ac-8263-09719a26102e\n", + "2024-06-07 16:23:43 45649 DEBUG _validate_X_layers_presence [enter]: 07998bf8-d070-41bb-a584-f8bdd1193aef\n", + "2024-06-07 16:23:43 45649 INFO _validate_X_layers_presence [exit, 0.05s]: 07998bf8-d070-41bb-a584-f8bdd1193aef\n", + "2024-06-07 16:23:43 45649 DEBUG _validate_X_layers_presence [enter]: 9624a105-319c-4abf-b10b-d96ce1650100\n", + "2024-06-07 16:23:43 45649 INFO _validate_X_layers_presence [exit, 0.05s]: 9624a105-319c-4abf-b10b-d96ce1650100\n", + "2024-06-07 16:23:43 45649 DEBUG _validate_X_layers_presence [enter]: 53e343af-979c-4525-a705-1b9d1a1fee14\n", + "2024-06-07 16:23:43 45649 INFO _validate_X_layers_presence [exit, 0.05s]: 53e343af-979c-4525-a705-1b9d1a1fee14\n", + "2024-06-07 16:23:43 45649 DEBUG _validate_X_layers_presence [enter]: 6ab91271-5f48-4e98-92ef-d02ee21e63e1\n", + "2024-06-07 16:23:43 45649 INFO _validate_X_layers_presence [exit, 0.05s]: 6ab91271-5f48-4e98-92ef-d02ee21e63e1\n", + "2024-06-07 16:23:43 45649 DEBUG _validate_X_layers_presence [enter]: c63d5cb4-1046-4948-a188-e6af50ef90f4\n", + "2024-06-07 16:23:44 45649 INFO _validate_X_layers_presence [exit, 0.05s]: c63d5cb4-1046-4948-a188-e6af50ef90f4\n", + "2024-06-07 16:23:44 45649 DEBUG _validate_X_layers_presence [enter]: 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed\n", + "2024-06-07 16:23:44 45649 INFO _validate_X_layers_presence [exit, 0.08s]: 1bb92cf8-ab3f-4bb0-a722-b241b5d377ed\n", + "2024-06-07 16:23:44 45649 DEBUG _validate_X_layers_normalized [enter]: homo_sapiens rows [0, 32000)\n", + "2024-06-07 16:23:44 45649 INFO _validate_X_layers_normalized [exit, 0.01s]: homo_sapiens rows [0, 32000)\n", + "2024-06-07 16:23:44 45649 DEBUG _validate_X_layers_normalized [enter]: homo_sapiens rows [32000, 64000)\n", + "2024-06-07 16:23:44 45649 INFO _validate_X_layers_normalized [exit, 0.01s]: homo_sapiens rows [32000, 64000)\n", + "2024-06-07 16:23:44 45649 DEBUG _validate_X_layers_normalized [enter]: homo_sapiens rows [0, 32000)\n", + "2024-06-07 16:23:44 45653 DEBUG validate_soma_bounding_box [enter]\n", + "2024-06-07 16:23:44 45645 DEBUG validate_X_layers_schema [enter]\n", + "2024-06-07 16:23:44 45647 DEBUG validate_axis_dataframes_global_ids [enter]\n", + "2024-06-07 16:23:44 45653 INFO validate_soma_bounding_box [exit, 0.28s]\n", + "2024-06-07 16:23:44 45645 INFO validate_X_layers_schema [exit, 0.15s]\n", + "2024-06-07 16:23:44 45647 INFO validate_axis_dataframes_global_ids [exit, 0.21s]\n", + "2024-06-07 16:23:45 45577 INFO System memory: mem-used=20379811840 (59.3%), max-mem-used=20379811840 (59.3%), mem-total=34359738368 load-avg=(8.24, 5.55, 5.25)\n", + "2024-06-07 16:23:51 45649 INFO _validate_X_layers_normalized [exit, 7.09s]: homo_sapiens rows [0, 32000)\n", + "2024-06-07 16:23:51 45649 DEBUG _validate_X_layers_normalized [enter]: homo_sapiens rows [32000, 64000)\n", + "2024-06-07 16:23:53 45649 INFO _validate_X_layers_normalized [exit, 1.97s]: homo_sapiens rows [32000, 64000)\n", + "2024-06-07 16:23:53 45649 DEBUG _validate_X_layers_has_unique_coords [enter]: homo_sapiens, raw, rows [0, 96000)\n", + "2024-06-07 16:23:53 45649 INFO _validate_X_layers_has_unique_coords [exit, 0.01s]: homo_sapiens, raw, rows [0, 96000)\n", + "2024-06-07 16:23:53 45649 DEBUG _validate_X_layers_has_unique_coords [enter]: homo_sapiens, normalized, rows [0, 96000)\n", + "2024-06-07 16:23:53 45649 INFO _validate_X_layers_has_unique_coords [exit, 0.01s]: homo_sapiens, normalized, rows [0, 96000)\n", + "2024-06-07 16:23:53 45649 DEBUG _validate_X_layers_has_unique_coords [enter]: homo_sapiens, raw, rows [0, 96000)\n", + "2024-06-07 16:23:55 45649 INFO _validate_X_layers_has_unique_coords [exit, 2.19s]: homo_sapiens, raw, rows [0, 96000)\n", + "2024-06-07 16:23:55 45649 DEBUG _validate_X_layers_has_unique_coords [enter]: homo_sapiens, normalized, rows [0, 96000)\n", + "2024-06-07 16:23:57 45649 INFO _validate_X_layers_has_unique_coords [exit, 1.95s]: homo_sapiens, normalized, rows [0, 96000)\n", + "2024-06-07 16:23:57 45577 DEBUG validate_consolidation [enter]\n", + "2024-06-07 16:23:57 45577 INFO validate_consolidation [exit, 0.17s]\n", + "2024-06-07 16:23:57 45577 INFO Validation & consolidation complete.\n", + "2024-06-07 16:23:58,641 - distributed.scheduler - WARNING - Removing worker 'tcp://127.0.0.1:51249' caused the cluster to lose already computed task(s), which will be recomputed elsewhere: {'assert_all-f91667c6-1504-4d54-aba4-98e04eb74308'} (stimulus_id='handle-worker-cleanup-1717802638.641124')\n", + "2024-06-07 16:23:59 45577 INFO Dask cluster shut down\n", + "2024-06-07 16:23:59 45577 INFO Fini\n" ] } ], From 1854f41000a9a996f6826d6252d7b6436f4392d3 Mon Sep 17 00:00:00 2001 From: Prathap Sridharan Date: Wed, 12 Jun 2024 10:28:27 -0700 Subject: [PATCH 23/29] Pin tiledbsoma to commit 5069714 for latest spatial --- api/python/cellxgene_census/pyproject.toml | 2 +- tools/cellxgene_census_builder/pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/api/python/cellxgene_census/pyproject.toml b/api/python/cellxgene_census/pyproject.toml index c39d62cb4..fe3be1308 100644 --- a/api/python/cellxgene_census/pyproject.toml +++ b/api/python/cellxgene_census/pyproject.toml @@ -34,7 +34,7 @@ dependencies= [ # TODO (spatial): tiledbsoma pin to a PyPI release is temporarily commented out in favor git commit pin # "tiledbsoma==1.11.4", # TODO (spatial): Pin tiledbsoma dependency to an actual released version after tiledbsoma spatial code has been released - "tiledbsoma @ git+https://github.com/single-cell-data/TileDB-SOMA.git@fc5f8e7#egg=tiledbsoma&subdirectory=apis/python/", + "tiledbsoma @ git+https://github.com/single-cell-data/TileDB-SOMA.git@5069714#egg=tiledbsoma&subdirectory=apis/python/", "anndata", "numpy>=1.21,<2.0", "requests", diff --git a/tools/cellxgene_census_builder/pyproject.toml b/tools/cellxgene_census_builder/pyproject.toml index 41ec7cef3..246b61b6c 100644 --- a/tools/cellxgene_census_builder/pyproject.toml +++ b/tools/cellxgene_census_builder/pyproject.toml @@ -37,7 +37,7 @@ dependencies= [ # TODO (spatial): tiledbsoma pin to a PyPI release is temporarily commented out in favor git commit pin # "tiledbsoma==1.9.3", # TODO (spatial): Pin tiledbsoma dependency to an actual released version after tiledbsoma spatial code has been released - "tiledbsoma @ git+https://github.com/single-cell-data/TileDB-SOMA.git@fc5f8e7#egg=tiledbsoma&subdirectory=apis/python/", + "tiledbsoma @ git+https://github.com/single-cell-data/TileDB-SOMA.git@5069714#egg=tiledbsoma&subdirectory=apis/python/", # TODO (spatial): Uncomment line below to install "cellxgene-census" at an appropriate version when tiledbsoma spatial code has been released # "cellxgene-census==1.12.0", "cellxgene-ontology-guide==0.6.1", From 14d682e31051e48dcc149968955fe72560c39e32 Mon Sep 17 00:00:00 2001 From: Prathap Sridharan Date: Wed, 12 Jun 2024 10:48:05 -0700 Subject: [PATCH 24/29] Update tiledbsoma spatial notebook --- .../tiledbsoma_spatial_dataset_ingest.ipynb | 30 ++++++++++++++++--- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/tools/cellxgene_census_builder/spatial_dev_tools/tiledbsoma_spatial_dataset_ingest.ipynb b/tools/cellxgene_census_builder/spatial_dev_tools/tiledbsoma_spatial_dataset_ingest.ipynb index 42424c98d..37bf28da6 100644 --- a/tools/cellxgene_census_builder/spatial_dev_tools/tiledbsoma_spatial_dataset_ingest.ipynb +++ b/tools/cellxgene_census_builder/spatial_dev_tools/tiledbsoma_spatial_dataset_ingest.ipynb @@ -194,7 +194,8 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 7, @@ -211,6 +212,27 @@ "execution_count": 8, "id": "f596b70a-528b-43c1-aa75-429d13cc164b", "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sp.spatial[\"c63d5cb4-1046-4948-a188-e6af50ef90f4\"][\"obsl\"][\"loc\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "c0767815-4f13-468c-99b9-82f6680c8337", + "metadata": {}, "outputs": [ { "data": { @@ -595,14 +617,14 @@ "[4992 rows x 114 columns]" ] }, - "execution_count": 8, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "obsl_df = sp.spatial[\"c63d5cb4-1046-4948-a188-e6af50ef90f4\"][\"obsl\"]\n", - "obsl_df.read().concat().to_pandas()" + "obsl_loc_df = sp.spatial[\"c63d5cb4-1046-4948-a188-e6af50ef90f4\"][\"obsl\"][\"loc\"]\n", + "obsl_loc_df.read().concat().to_pandas()" ] } ], From bf0fdea26b2667f768c83e18080549460bae0ddb Mon Sep 17 00:00:00 2001 From: Prathap Sridharan Date: Wed, 12 Jun 2024 11:25:41 -0700 Subject: [PATCH 25/29] Update tiledbsoma spatial notebook --- .../tiledbsoma_spatial_dataset_ingest.ipynb | 66 +++++++++++++++---- 1 file changed, 55 insertions(+), 11 deletions(-) diff --git a/tools/cellxgene_census_builder/spatial_dev_tools/tiledbsoma_spatial_dataset_ingest.ipynb b/tools/cellxgene_census_builder/spatial_dev_tools/tiledbsoma_spatial_dataset_ingest.ipynb index 37bf28da6..acab16226 100644 --- a/tools/cellxgene_census_builder/spatial_dev_tools/tiledbsoma_spatial_dataset_ingest.ipynb +++ b/tools/cellxgene_census_builder/spatial_dev_tools/tiledbsoma_spatial_dataset_ingest.ipynb @@ -99,6 +99,50 @@ { "cell_type": "code", "execution_count": 4, + "id": "bcb874f0-d1eb-4e09-ab77-b77116e60592", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sp.ms" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "8298ad0e-4413-45e8-9161-ef550f58a29e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sp.spatial" + ] + }, + { + "cell_type": "code", + "execution_count": 6, "id": "7f08df07-2027-494f-b383-b9e739a614fd", "metadata": {}, "outputs": [ @@ -111,7 +155,7 @@ " 'obsl': 'file:///Users/psridharan/code/cellxgene-census/tools/cellxgene_census_builder/spatial_dev_tools/soma-spatial/spatial/c63d5cb4-1046-4948-a188-e6af50ef90f4/obsl' (unopened)>" ] }, - "execution_count": 4, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -122,7 +166,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 7, "id": "35ba9213-37f1-40b7-a922-f31e75258240", "metadata": {}, "outputs": [ @@ -132,7 +176,7 @@ "" ] }, - "execution_count": 5, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -143,7 +187,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 8, "id": "5a792279-b560-4246-b853-8735f85e2bb3", "metadata": {}, "outputs": [ @@ -175,7 +219,7 @@ " [240, 240, 240, ..., 240, 240, 238]]], dtype=uint8)" ] }, - "execution_count": 6, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -187,7 +231,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 9, "id": "616715c4-0178-4f27-831f-63d6c024cf1a", "metadata": {}, "outputs": [ @@ -198,7 +242,7 @@ " 'loc': 'file:///Users/psridharan/code/cellxgene-census/tools/cellxgene_census_builder/spatial_dev_tools/soma-spatial/spatial/c63d5cb4-1046-4948-a188-e6af50ef90f4/obsl/loc' (unopened)>" ] }, - "execution_count": 7, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -209,7 +253,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "id": "f596b70a-528b-43c1-aa75-429d13cc164b", "metadata": {}, "outputs": [ @@ -219,7 +263,7 @@ "" ] }, - "execution_count": 8, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -230,7 +274,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 11, "id": "c0767815-4f13-468c-99b9-82f6680c8337", "metadata": {}, "outputs": [ @@ -617,7 +661,7 @@ "[4992 rows x 114 columns]" ] }, - "execution_count": 9, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } From ebbe1b1bf279e9e9248dfe1f4e39c500468e6885 Mon Sep 17 00:00:00 2001 From: Prathap Sridharan Date: Wed, 12 Jun 2024 13:03:31 -0700 Subject: [PATCH 26/29] Update tiledbsoma spatial notebook --- .../tiledbsoma_spatial_dataset_ingest.ipynb | 51 ++++++++++++++----- 1 file changed, 38 insertions(+), 13 deletions(-) diff --git a/tools/cellxgene_census_builder/spatial_dev_tools/tiledbsoma_spatial_dataset_ingest.ipynb b/tools/cellxgene_census_builder/spatial_dev_tools/tiledbsoma_spatial_dataset_ingest.ipynb index acab16226..5125e8a47 100644 --- a/tools/cellxgene_census_builder/spatial_dev_tools/tiledbsoma_spatial_dataset_ingest.ipynb +++ b/tools/cellxgene_census_builder/spatial_dev_tools/tiledbsoma_spatial_dataset_ingest.ipynb @@ -121,6 +121,31 @@ { "cell_type": "code", "execution_count": 5, + "id": "2a4f16a9-0fbd-4d31-b8f4-d58149ac2605", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sp.ms[\"RNA\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, "id": "8298ad0e-4413-45e8-9161-ef550f58a29e", "metadata": {}, "outputs": [ @@ -131,7 +156,7 @@ " 'c63d5cb4-1046-4948-a188-e6af50ef90f4': 'file:///Users/psridharan/code/cellxgene-census/tools/cellxgene_census_builder/spatial_dev_tools/soma-spatial/spatial/c63d5cb4-1046-4948-a188-e6af50ef90f4' (unopened)>" ] }, - "execution_count": 5, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -142,7 +167,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "id": "7f08df07-2027-494f-b383-b9e739a614fd", "metadata": {}, "outputs": [ @@ -155,7 +180,7 @@ " 'obsl': 'file:///Users/psridharan/code/cellxgene-census/tools/cellxgene_census_builder/spatial_dev_tools/soma-spatial/spatial/c63d5cb4-1046-4948-a188-e6af50ef90f4/obsl' (unopened)>" ] }, - "execution_count": 6, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -166,7 +191,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "id": "35ba9213-37f1-40b7-a922-f31e75258240", "metadata": {}, "outputs": [ @@ -176,7 +201,7 @@ "" ] }, - "execution_count": 7, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -187,7 +212,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "id": "5a792279-b560-4246-b853-8735f85e2bb3", "metadata": {}, "outputs": [ @@ -219,7 +244,7 @@ " [240, 240, 240, ..., 240, 240, 238]]], dtype=uint8)" ] }, - "execution_count": 8, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -231,7 +256,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "id": "616715c4-0178-4f27-831f-63d6c024cf1a", "metadata": {}, "outputs": [ @@ -242,7 +267,7 @@ " 'loc': 'file:///Users/psridharan/code/cellxgene-census/tools/cellxgene_census_builder/spatial_dev_tools/soma-spatial/spatial/c63d5cb4-1046-4948-a188-e6af50ef90f4/obsl/loc' (unopened)>" ] }, - "execution_count": 9, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -253,7 +278,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "id": "f596b70a-528b-43c1-aa75-429d13cc164b", "metadata": {}, "outputs": [ @@ -263,7 +288,7 @@ "" ] }, - "execution_count": 10, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -274,7 +299,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "id": "c0767815-4f13-468c-99b9-82f6680c8337", "metadata": {}, "outputs": [ @@ -661,7 +686,7 @@ "[4992 rows x 114 columns]" ] }, - "execution_count": 11, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } From ff734985d088d66216af833dd5422d30a1109161 Mon Sep 17 00:00:00 2001 From: Prathap Sridharan Date: Wed, 12 Jun 2024 13:40:19 -0700 Subject: [PATCH 27/29] Pin tiledbsoma to commit 69d699e for latest spatial --- api/python/cellxgene_census/pyproject.toml | 2 +- tools/cellxgene_census_builder/pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/api/python/cellxgene_census/pyproject.toml b/api/python/cellxgene_census/pyproject.toml index fe3be1308..b702b7c30 100644 --- a/api/python/cellxgene_census/pyproject.toml +++ b/api/python/cellxgene_census/pyproject.toml @@ -34,7 +34,7 @@ dependencies= [ # TODO (spatial): tiledbsoma pin to a PyPI release is temporarily commented out in favor git commit pin # "tiledbsoma==1.11.4", # TODO (spatial): Pin tiledbsoma dependency to an actual released version after tiledbsoma spatial code has been released - "tiledbsoma @ git+https://github.com/single-cell-data/TileDB-SOMA.git@5069714#egg=tiledbsoma&subdirectory=apis/python/", + "tiledbsoma @ git+https://github.com/single-cell-data/TileDB-SOMA.git@69d699e#egg=tiledbsoma&subdirectory=apis/python/", "anndata", "numpy>=1.21,<2.0", "requests", diff --git a/tools/cellxgene_census_builder/pyproject.toml b/tools/cellxgene_census_builder/pyproject.toml index 246b61b6c..af1ba40ed 100644 --- a/tools/cellxgene_census_builder/pyproject.toml +++ b/tools/cellxgene_census_builder/pyproject.toml @@ -37,7 +37,7 @@ dependencies= [ # TODO (spatial): tiledbsoma pin to a PyPI release is temporarily commented out in favor git commit pin # "tiledbsoma==1.9.3", # TODO (spatial): Pin tiledbsoma dependency to an actual released version after tiledbsoma spatial code has been released - "tiledbsoma @ git+https://github.com/single-cell-data/TileDB-SOMA.git@5069714#egg=tiledbsoma&subdirectory=apis/python/", + "tiledbsoma @ git+https://github.com/single-cell-data/TileDB-SOMA.git@69d699e#egg=tiledbsoma&subdirectory=apis/python/", # TODO (spatial): Uncomment line below to install "cellxgene-census" at an appropriate version when tiledbsoma spatial code has been released # "cellxgene-census==1.12.0", "cellxgene-ontology-guide==0.6.1", From a5c21a16cd5c44f94016865169c56cb27cef3769 Mon Sep 17 00:00:00 2001 From: Prathap Sridharan Date: Thu, 13 Jun 2024 16:57:31 -0700 Subject: [PATCH 28/29] Pin tiledbsoma to commit 9eb540f for latest spatial --- api/python/cellxgene_census/pyproject.toml | 2 +- tools/cellxgene_census_builder/pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/api/python/cellxgene_census/pyproject.toml b/api/python/cellxgene_census/pyproject.toml index b702b7c30..c055487b2 100644 --- a/api/python/cellxgene_census/pyproject.toml +++ b/api/python/cellxgene_census/pyproject.toml @@ -34,7 +34,7 @@ dependencies= [ # TODO (spatial): tiledbsoma pin to a PyPI release is temporarily commented out in favor git commit pin # "tiledbsoma==1.11.4", # TODO (spatial): Pin tiledbsoma dependency to an actual released version after tiledbsoma spatial code has been released - "tiledbsoma @ git+https://github.com/single-cell-data/TileDB-SOMA.git@69d699e#egg=tiledbsoma&subdirectory=apis/python/", + "tiledbsoma @ git+https://github.com/single-cell-data/TileDB-SOMA.git@9eb540f#egg=tiledbsoma&subdirectory=apis/python/", "anndata", "numpy>=1.21,<2.0", "requests", diff --git a/tools/cellxgene_census_builder/pyproject.toml b/tools/cellxgene_census_builder/pyproject.toml index af1ba40ed..fd6127ee3 100644 --- a/tools/cellxgene_census_builder/pyproject.toml +++ b/tools/cellxgene_census_builder/pyproject.toml @@ -37,7 +37,7 @@ dependencies= [ # TODO (spatial): tiledbsoma pin to a PyPI release is temporarily commented out in favor git commit pin # "tiledbsoma==1.9.3", # TODO (spatial): Pin tiledbsoma dependency to an actual released version after tiledbsoma spatial code has been released - "tiledbsoma @ git+https://github.com/single-cell-data/TileDB-SOMA.git@69d699e#egg=tiledbsoma&subdirectory=apis/python/", + "tiledbsoma @ git+https://github.com/single-cell-data/TileDB-SOMA.git@9eb540f#egg=tiledbsoma&subdirectory=apis/python/", # TODO (spatial): Uncomment line below to install "cellxgene-census" at an appropriate version when tiledbsoma spatial code has been released # "cellxgene-census==1.12.0", "cellxgene-ontology-guide==0.6.1", From 2cef2bb2489b576a170ad4cdda93751966a7e06a Mon Sep 17 00:00:00 2001 From: Prathap Sridharan Date: Thu, 13 Jun 2024 17:05:20 -0700 Subject: [PATCH 29/29] Update tiledbsoma spatial notebook --- .../tiledbsoma_spatial_dataset_ingest.ipynb | 336 +++++++++++++++++- 1 file changed, 321 insertions(+), 15 deletions(-) diff --git a/tools/cellxgene_census_builder/spatial_dev_tools/tiledbsoma_spatial_dataset_ingest.ipynb b/tools/cellxgene_census_builder/spatial_dev_tools/tiledbsoma_spatial_dataset_ingest.ipynb index 5125e8a47..df9860fa1 100644 --- a/tools/cellxgene_census_builder/spatial_dev_tools/tiledbsoma_spatial_dataset_ingest.ipynb +++ b/tools/cellxgene_census_builder/spatial_dev_tools/tiledbsoma_spatial_dataset_ingest.ipynb @@ -80,9 +80,10 @@ { "data": { "text/plain": [ - "" ] }, @@ -127,7 +128,8 @@ { "data": { "text/plain": [ - "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sp.ms[\"RNA\"].var_scene" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "3efc6723-be0e-4743-900d-53ad0d803701", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
soma_joinidscene_iddata
00c63d5cb4-1046-4948-a188-e6af50ef90f4True
13c63d5cb4-1046-4948-a188-e6af50ef90f4True
27c63d5cb4-1046-4948-a188-e6af50ef90f4True
312c63d5cb4-1046-4948-a188-e6af50ef90f4True
413c63d5cb4-1046-4948-a188-e6af50ef90f4True
............
2274733148c63d5cb4-1046-4948-a188-e6af50ef90f4True
2274833149c63d5cb4-1046-4948-a188-e6af50ef90f4True
2274933151c63d5cb4-1046-4948-a188-e6af50ef90f4True
2275033153c63d5cb4-1046-4948-a188-e6af50ef90f4True
2275133156c63d5cb4-1046-4948-a188-e6af50ef90f4True
\n", + "

22752 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " soma_joinid scene_id data\n", + "0 0 c63d5cb4-1046-4948-a188-e6af50ef90f4 True\n", + "1 3 c63d5cb4-1046-4948-a188-e6af50ef90f4 True\n", + "2 7 c63d5cb4-1046-4948-a188-e6af50ef90f4 True\n", + "3 12 c63d5cb4-1046-4948-a188-e6af50ef90f4 True\n", + "4 13 c63d5cb4-1046-4948-a188-e6af50ef90f4 True\n", + "... ... ... ...\n", + "22747 33148 c63d5cb4-1046-4948-a188-e6af50ef90f4 True\n", + "22748 33149 c63d5cb4-1046-4948-a188-e6af50ef90f4 True\n", + "22749 33151 c63d5cb4-1046-4948-a188-e6af50ef90f4 True\n", + "22750 33153 c63d5cb4-1046-4948-a188-e6af50ef90f4 True\n", + "22751 33156 c63d5cb4-1046-4948-a188-e6af50ef90f4 True\n", + "\n", + "[22752 rows x 3 columns]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "var_scene_df = sp.ms[\"RNA\"].var_scene\n", + "var_scene_df.read().concat().to_pandas()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "66ec6095-2013-46d0-a630-6ee0a879c056", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sp.obs_scene" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "c4989946-47c0-4c50-9da4-e8e02a0454f0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
soma_joinidscene_iddata
00c63d5cb4-1046-4948-a188-e6af50ef90f4True
11c63d5cb4-1046-4948-a188-e6af50ef90f4True
22c63d5cb4-1046-4948-a188-e6af50ef90f4True
33c63d5cb4-1046-4948-a188-e6af50ef90f4True
44c63d5cb4-1046-4948-a188-e6af50ef90f4True
............
49874987c63d5cb4-1046-4948-a188-e6af50ef90f4True
49884988c63d5cb4-1046-4948-a188-e6af50ef90f4True
49894989c63d5cb4-1046-4948-a188-e6af50ef90f4True
49904990c63d5cb4-1046-4948-a188-e6af50ef90f4True
49914991c63d5cb4-1046-4948-a188-e6af50ef90f4True
\n", + "

4992 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " soma_joinid scene_id data\n", + "0 0 c63d5cb4-1046-4948-a188-e6af50ef90f4 True\n", + "1 1 c63d5cb4-1046-4948-a188-e6af50ef90f4 True\n", + "2 2 c63d5cb4-1046-4948-a188-e6af50ef90f4 True\n", + "3 3 c63d5cb4-1046-4948-a188-e6af50ef90f4 True\n", + "4 4 c63d5cb4-1046-4948-a188-e6af50ef90f4 True\n", + "... ... ... ...\n", + "4987 4987 c63d5cb4-1046-4948-a188-e6af50ef90f4 True\n", + "4988 4988 c63d5cb4-1046-4948-a188-e6af50ef90f4 True\n", + "4989 4989 c63d5cb4-1046-4948-a188-e6af50ef90f4 True\n", + "4990 4990 c63d5cb4-1046-4948-a188-e6af50ef90f4 True\n", + "4991 4991 c63d5cb4-1046-4948-a188-e6af50ef90f4 True\n", + "\n", + "[4992 rows x 3 columns]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "obs_scene_df = sp.obs_scene\n", + "obs_scene_df.read().concat().to_pandas()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, "id": "8298ad0e-4413-45e8-9161-ef550f58a29e", "metadata": {}, "outputs": [ @@ -156,7 +462,7 @@ " 'c63d5cb4-1046-4948-a188-e6af50ef90f4': 'file:///Users/psridharan/code/cellxgene-census/tools/cellxgene_census_builder/spatial_dev_tools/soma-spatial/spatial/c63d5cb4-1046-4948-a188-e6af50ef90f4' (unopened)>" ] }, - "execution_count": 6, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -167,7 +473,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 11, "id": "7f08df07-2027-494f-b383-b9e739a614fd", "metadata": {}, "outputs": [ @@ -180,7 +486,7 @@ " 'obsl': 'file:///Users/psridharan/code/cellxgene-census/tools/cellxgene_census_builder/spatial_dev_tools/soma-spatial/spatial/c63d5cb4-1046-4948-a188-e6af50ef90f4/obsl' (unopened)>" ] }, - "execution_count": 7, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -191,7 +497,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 12, "id": "35ba9213-37f1-40b7-a922-f31e75258240", "metadata": {}, "outputs": [ @@ -201,7 +507,7 @@ "" ] }, - "execution_count": 8, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -212,7 +518,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 13, "id": "5a792279-b560-4246-b853-8735f85e2bb3", "metadata": {}, "outputs": [ @@ -244,7 +550,7 @@ " [240, 240, 240, ..., 240, 240, 238]]], dtype=uint8)" ] }, - "execution_count": 9, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -256,7 +562,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 14, "id": "616715c4-0178-4f27-831f-63d6c024cf1a", "metadata": {}, "outputs": [ @@ -267,7 +573,7 @@ " 'loc': 'file:///Users/psridharan/code/cellxgene-census/tools/cellxgene_census_builder/spatial_dev_tools/soma-spatial/spatial/c63d5cb4-1046-4948-a188-e6af50ef90f4/obsl/loc' (unopened)>" ] }, - "execution_count": 10, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -278,7 +584,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 15, "id": "f596b70a-528b-43c1-aa75-429d13cc164b", "metadata": {}, "outputs": [ @@ -288,7 +594,7 @@ "" ] }, - "execution_count": 11, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -299,7 +605,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 16, "id": "c0767815-4f13-468c-99b9-82f6680c8337", "metadata": {}, "outputs": [ @@ -686,7 +992,7 @@ "[4992 rows x 114 columns]" ] }, - "execution_count": 12, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" }