diff --git a/docs/resource_sources.rst b/docs/resource_sources.rst index ce59dad1e..656314d01 100644 --- a/docs/resource_sources.rst +++ b/docs/resource_sources.rst @@ -6,7 +6,6 @@ gnomAD data is available through `multiple cloud providers' public datasets prog The functions in the :doc:`gnomad.resources ` package can be configured to load data from different sources. If Hail determines that is is running in a cloud provider's Spark environment, resources will default to being read from that cloud provider's datasets program. -For example, resource will be read from Azure Open Datasets if Hail determines that it is running on an Azure HDInsight cluster. Otherwise, resources will default to being read from Google Cloud Public Datasets. This can be configured using the ``GNOMAD_DEFAULT_PUBLIC_RESOURCE_SOURCE`` environment variable. diff --git a/gnomad/resources/config.py b/gnomad/resources/config.py index 4d40535bc..1c82597f1 100644 --- a/gnomad/resources/config.py +++ b/gnomad/resources/config.py @@ -14,7 +14,6 @@ class GnomadPublicResourceSource(Enum): GNOMAD = "gnomAD" GOOGLE_CLOUD_PUBLIC_DATASETS = "Google Cloud Public Datasets" REGISTRY_OF_OPEN_DATA_ON_AWS = "Registry of Open Data on AWS" - AZURE_OPEN_DATASETS = "Azure Open Datasets" def get_default_public_resource_source() -> Union[GnomadPublicResourceSource, str]: @@ -25,7 +24,6 @@ def get_default_public_resource_source() -> Union[GnomadPublicResourceSource, st - If the ``GNOMAD_DEFAULT_PUBLIC_RESOURCE_SOURCE`` environment variable is set, use the source configured there. - Otherwise, if Hail determines that is is running in a cloud provider's Spark environment, use the source from that cloud provider. - For example, use Azure Open Datasets if running on an Azure HDInsight cluster. - Otherwise, use Google Cloud Public Datasets. :returns: Default resource source @@ -54,7 +52,6 @@ def get_default_public_resource_source() -> Union[GnomadPublicResourceSource, st cloud_spark_provider = guess_cloud_spark_provider() default_resource_sources_by_provider = { "dataproc": GnomadPublicResourceSource.GOOGLE_CLOUD_PUBLIC_DATASETS, - "hdinsight": GnomadPublicResourceSource.AZURE_OPEN_DATASETS, } if cloud_spark_provider: try: diff --git a/gnomad/resources/resource_utils.py b/gnomad/resources/resource_utils.py index 2c6eb64a9..db56768ab 100644 --- a/gnomad/resources/resource_utils.py +++ b/gnomad/resources/resource_utils.py @@ -592,9 +592,6 @@ def _get_path(self) -> str: if resource_source == GnomadPublicResourceSource.REGISTRY_OF_OPEN_DATA_ON_AWS: return f"s3a://gnomad-public-us-east-1{relative_path}" - if resource_source == GnomadPublicResourceSource.AZURE_OPEN_DATASETS: - return f"wasbs://dataset@datasetgnomad.blob.core.windows.net{relative_path}" - return ( f"{resource_source.rstrip('/')}{relative_path}" # pylint: disable=no-member ) diff --git a/tests/resources/test_resource_utils.py b/tests/resources/test_resource_utils.py index 4d389a9e7..31a4113b7 100644 --- a/tests/resources/test_resource_utils.py +++ b/tests/resources/test_resource_utils.py @@ -119,10 +119,6 @@ class TestDefaultPublicResourceSource: GnomadPublicResourceSource.REGISTRY_OF_OPEN_DATA_ON_AWS, "s3a://gnomad-public-us-east-1/example.ht", ), - ( - GnomadPublicResourceSource.AZURE_OPEN_DATASETS, - "wasbs://dataset@datasetgnomad.blob.core.windows.net/example.ht", - ), ( "gs://my-bucket/gnomad-resources", "gs://my-bucket/gnomad-resources/example.ht", @@ -154,7 +150,6 @@ def test_read_from_default_source(self, default_source, expected_path): "Registry of Open Data on AWS", GnomadPublicResourceSource.REGISTRY_OF_OPEN_DATA_ON_AWS, ), - ("Azure Open Datasets", GnomadPublicResourceSource.AZURE_OPEN_DATASETS), ("gs://my-bucket/gnomad-resources", "gs://my-bucket/gnomad-resources"), ], ) @@ -172,7 +167,6 @@ def test_get_default_source_from_environment( "cloud_spark_provider,expected_default_source", [ ("dataproc", GnomadPublicResourceSource.GOOGLE_CLOUD_PUBLIC_DATASETS), - ("hdinsight", GnomadPublicResourceSource.AZURE_OPEN_DATASETS), ("unknown", GnomadPublicResourceSource.GOOGLE_CLOUD_PUBLIC_DATASETS), (None, GnomadPublicResourceSource.GOOGLE_CLOUD_PUBLIC_DATASETS), ], @@ -198,7 +192,7 @@ def test_default_source_from_environment_overrides_cloud_spark_provider(self): with ( patch( "hail.utils.guess_cloud_spark_provider", - return_value="hdinsight", + return_value="dataproc", create=True, ), patch.dict( @@ -255,16 +249,6 @@ def gnomad_public_resource_test_parameters( GnomadPublicResourceSource.REGISTRY_OF_OPEN_DATA_ON_AWS, f"s3a://gnomad-public-us-east-1{path}", ), - ( - f"gs://gnomad-public{path}", - GnomadPublicResourceSource.AZURE_OPEN_DATASETS, - f"wasbs://dataset@datasetgnomad.blob.core.windows.net{path}", - ), - ( - f"gs://gnomad-public-requester-pays{path}", - GnomadPublicResourceSource.AZURE_OPEN_DATASETS, - f"wasbs://dataset@datasetgnomad.blob.core.windows.net{path}", - ), ( f"gs://gnomad-public{path}", "gs://my-bucket/gnomad-resources",