From b5c326ccc9de18d593dd4a4ae4f610bf08c1cbf3 Mon Sep 17 00:00:00 2001 From: Jing Date: Mon, 15 Jul 2024 14:28:54 -0400 Subject: [PATCH 1/4] update first six cells --- docs/nb/get_data.ipynb | 220 +++++++++++++++++++++-------------------- 1 file changed, 113 insertions(+), 107 deletions(-) diff --git a/docs/nb/get_data.ipynb b/docs/nb/get_data.ipynb index 203c64b9..0141286e 100644 --- a/docs/nb/get_data.ipynb +++ b/docs/nb/get_data.ipynb @@ -13,7 +13,7 @@ "id": "e212234f", "metadata": {}, "source": [ - "This notebook describes and provides example code to\n", + "This notebook describes and provides example code to:\n", "\n", "1. Filter NMDC metadata to obtain IDs and fetch attributes, using API endpoints.\n", "2. Download collected metadata to files, and data objects to files.\n", @@ -88,32 +88,57 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "id": "c0691d07-51f7-46e3-a57a-b98c72e636b1", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'has_output': ['nmdc:bb7a9edac41c31f6d36c34f6bfa7491a'],\n", - " 'started_at_time': '2021-01-21T23:31:33Z',\n", - " 'execution_resource': 'EMSL-RZR',\n", - " 'has_input': ['emsl:output_747989'],\n", - " 'was_informed_by': 'emsl:747989',\n", - " 'git_url': 'https://github.com/microbiomedata/enviroMS',\n", - " 'id': 'nmdc:f2a40483485c45baaf30160d0ca2ac40',\n", - " 'used': '12T_FTICR_B',\n", - " 'type': 'nmdc:NomAnalysisActivity',\n", - " 'ended_at_time': '2021-01-21T23:31:33Z'}" + "{'id': 'nmdc:bsm-13-amrnys72',\n", + " 'name': 'Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_25-Nov-14',\n", + " 'description': 'Sterilized sand packs were incubated back in the ground and collected at time point T4.',\n", + " 'env_broad_scale': {'has_raw_value': 'ENVO:01000253',\n", + " 'term': {'id': 'ENVO:01000253'}},\n", + " 'env_local_scale': {'has_raw_value': 'ENVO:01000621',\n", + " 'term': {'id': 'ENVO:01000621'}},\n", + " 'env_medium': {'has_raw_value': 'ENVO:01000017',\n", + " 'term': {'id': 'ENVO:01000017'}},\n", + " 'type': 'nmdc:Biosample',\n", + " 'collection_date': {'has_raw_value': '2014-11-25'},\n", + " 'depth': {'has_raw_value': '0.5',\n", + " 'has_numeric_value': 0.5,\n", + " 'has_unit': 'meter'},\n", + " 'geo_loc_name': {'has_raw_value': 'USA: Columbia River, Washington'},\n", + " 'lat_lon': {'has_raw_value': '46.37228379 -119.2717467',\n", + " 'latitude': 46.37228379,\n", + " 'longitude': -119.2717467},\n", + " 'ecosystem': 'Engineered',\n", + " 'ecosystem_category': 'Artificial ecosystem',\n", + " 'ecosystem_type': 'Sand microcosm',\n", + " 'ecosystem_subtype': 'Unclassified',\n", + " 'specific_ecosystem': 'Unclassified',\n", + " 'add_date': '2015-05-28',\n", + " 'community': 'microbial communities',\n", + " 'habitat': 'sand microcosm',\n", + " 'location': 'groundwater-surface water interaction zone in Washington, USA',\n", + " 'mod_date': '2021-06-17',\n", + " 'ncbi_taxonomy_name': 'sediment metagenome',\n", + " 'sample_collection_site': 'sand microcosm',\n", + " 'part_of': ['nmdc:sty-11-aygzgv51'],\n", + " 'alternative_identifiers': ['img.taxon:3300042754'],\n", + " 'insdc_biosample_identifiers': ['biosample:SAMN06343877'],\n", + " 'samp_name': 'GW-RW T4_25-Nov-14',\n", + " 'gold_biosample_identifiers': ['gold:Gb0115231']}" ] }, - "execution_count": 2, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "get_json(\"/nmdcschema/ids/nmdc:f2a40483485c45baaf30160d0ca2ac40\")" + "get_json(\"/nmdcschema/ids/nmdc:bsm-13-amrnys72\")" ] }, { @@ -126,57 +151,57 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "id": "99fae5ca-af59-4156-ad5a-7946376780c5", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'env_local_scale': {'has_raw_value': 'ENVO:01000621'},\n", - " 'collection_date': {'has_raw_value': '2014-09-23'},\n", - " 'add_date': '2015-05-28',\n", - " 'geo_loc_name': {'has_raw_value': 'USA: Columbia River, Washington'},\n", - " 'location': 'groundwater-surface water interaction zone in Washington, USA',\n", - " 'mod_date': '2021-06-17',\n", - " 'description': 'Sterilized sand packs were incubated back in the ground and collected at time point T2.',\n", + "{'id': 'nmdc:bsm-13-w2cwcx50',\n", + " 'name': 'Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T4_12-Aug-14',\n", + " 'description': 'Sterilized sand packs were incubated back in the ground and collected at time point T4.',\n", + " 'env_broad_scale': {'has_raw_value': 'ENVO:01000253',\n", + " 'term': {'id': 'ENVO:01000253'}},\n", + " 'env_local_scale': {'has_raw_value': 'ENVO:01000621',\n", + " 'term': {'id': 'ENVO:01000621'}},\n", + " 'env_medium': {'has_raw_value': 'ENVO:01000017',\n", + " 'term': {'id': 'ENVO:01000017'}},\n", + " 'type': 'nmdc:Biosample',\n", + " 'collection_date': {'has_raw_value': '2014-08-12'},\n", " 'depth': {'has_raw_value': '0.5',\n", " 'has_numeric_value': 0.5,\n", " 'has_unit': 'meter'},\n", - " 'part_of': ['gold:Gs0114663'],\n", - " 'ncbi_taxonomy_name': 'sediment metagenome',\n", - " 'GOLD_sample_identifiers': ['gold:Gb0115217'],\n", + " 'geo_loc_name': {'has_raw_value': 'USA: Columbia River, Washington'},\n", + " 'lat_lon': {'has_raw_value': '46.37228379 -119.2717467',\n", + " 'latitude': 46.37228379,\n", + " 'longitude': -119.2717467},\n", + " 'ecosystem': 'Engineered',\n", " 'ecosystem_category': 'Artificial ecosystem',\n", " 'ecosystem_type': 'Sand microcosm',\n", - " 'env_broad_scale': {'has_raw_value': 'ENVO:01000253'},\n", - " 'sample_collection_site': 'sand microcosm',\n", - " 'id': 'gold:Gb0115217',\n", - " 'identifier': 'GW-RW T2_23-Sept-14',\n", " 'ecosystem_subtype': 'Unclassified',\n", - " 'depth2': {'has_raw_value': '1.0',\n", - " 'has_numeric_value': 1,\n", - " 'has_unit': 'meter'},\n", " 'specific_ecosystem': 'Unclassified',\n", - " 'INSDC_biosample_identifiers': ['biosample:SAMN06343863'],\n", + " 'add_date': '2015-05-28',\n", " 'community': 'microbial communities',\n", - " 'name': 'Sand microcosm microbial communities from a hyporheic zone in Columbia River, Washington, USA - GW-RW T2_23-Sept-14',\n", - " 'alternative_identifiers': ['img.taxon:3300042741'],\n", - " 'lat_lon': {'has_raw_value': '46.37228379 -119.2717467',\n", - " 'latitude': 46.37228379,\n", - " 'longitude': -119.2717467},\n", " 'habitat': 'sand microcosm',\n", - " 'ecosystem': 'Engineered',\n", - " 'env_medium': {'has_raw_value': 'ENVO:01000017'},\n", - " 'type': 'nmdc:Biosample'}" + " 'location': 'groundwater-surface water interaction zone in Washington, USA',\n", + " 'mod_date': '2021-06-17',\n", + " 'ncbi_taxonomy_name': 'sediment metagenome',\n", + " 'sample_collection_site': 'sand microcosm',\n", + " 'part_of': ['nmdc:sty-11-aygzgv51'],\n", + " 'alternative_identifiers': ['img.taxon:3300042748'],\n", + " 'insdc_biosample_identifiers': ['biosample:SAMN06343871'],\n", + " 'samp_name': 'GW-RW T4_12-Aug-14',\n", + " 'gold_biosample_identifiers': ['gold:Gb0115225']}" ] }, - "execution_count": 3, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "get_json(\"/nmdcschema/biosample_set/gold:Gb0115217\")" + "get_json(\"/nmdcschema/biosample_set/nmdc:bsm-13-w2cwcx50\")" ] }, { @@ -189,7 +214,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "id": "756a2985-90e1-44df-8be7-84a67f737e87", "metadata": {}, "outputs": [ @@ -199,7 +224,7 @@ "19" ] }, - "execution_count": 4, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -227,7 +252,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "id": "ffec39f5-f35b-4f27-bdc9-97090929412f", "metadata": {}, "outputs": [ @@ -239,7 +264,7 @@ "Studies filter:\n", "\n", "{'count': 3,\n", - " 'db_response_time_ms': 12,\n", + " 'db_response_time_ms': 1,\n", " 'mongo_filter_dict': {'ecosystem_type': 'Soil'},\n", " 'mongo_sort_list': None,\n", " 'page': 1,\n", @@ -248,80 +273,61 @@ " 'ecosystem_category': 'Terrestrial',\n", " 'ecosystem_subtype': 'Unclassified',\n", " 'ecosystem_type': 'Soil',\n", - " 'id': 'gold:Gs0128850'},\n", + " 'id': 'nmdc:sty-11-076c9980'},\n", " {'ecosystem': 'Environmental',\n", " 'ecosystem_category': 'Terrestrial',\n", " 'ecosystem_subtype': 'Meadow',\n", " 'ecosystem_type': 'Soil',\n", - " 'id': 'gold:Gs0135149'},\n", + " 'id': 'nmdc:sty-11-dcqce727'},\n", " {'ecosystem': 'Environmental',\n", " 'ecosystem_category': 'Terrestrial',\n", " 'ecosystem_subtype': 'Unclassified',\n", " 'ecosystem_type': 'Soil',\n", - " 'id': 'gold:Gs0154044'}]\n", + " 'id': 'nmdc:sty-11-r2h77870'}]\n", "\n", "Data Objects filter and sort:\n", "\n", - "{'count': 11556,\n", - " 'db_response_time_ms': 362,\n", + "{'count': 0,\n", + " 'db_response_time_ms': 233,\n", " 'mongo_filter_dict': {'description': {'$regex': 'GFF'}},\n", " 'mongo_sort_list': [['file_size_bytes', -1]],\n", " 'page': 1,\n", " 'per_page': 25}\n", - "[{'description': 'Prodigal GFF file for gold:Gp0208583',\n", - " 'file_size_bytes': 3013162589,\n", - " 'id': 'nmdc:ef4512eba3c1bc0a3c99d1ee7a78270b',\n", - " 'url': 'https://data.microbiomedata.org/data/nmdc:mga06f4615/annotation/nmdc_mga06f4615_prodigal.gff'},\n", - " {'description': 'Prodigal GFF file for gold:Gp0116338',\n", - " 'file_size_bytes': 2898136903,\n", - " 'id': 'nmdc:14917611a1d1fe3b4f5dc97ae9ffcb86',\n", - " 'url': 'https://data.microbiomedata.org/data/nmdc:mga0m894/annotation/nmdc_mga0m894_prodigal.gff'},\n", - " {'description': 'Prodigal GFF file for gold:Gp0208578',\n", - " 'file_size_bytes': 2837716826,\n", - " 'id': 'nmdc:a206b25e328af837ea708d6a7acb4e9f',\n", - " 'url': 'https://data.microbiomedata.org/data/nmdc:mga0x8zm48/annotation/nmdc_mga0x8zm48_prodigal.gff'},\n", - " {'description': 'Prodigal GFF file for gold:Gp0116337',\n", - " 'file_size_bytes': 2789133841,\n", - " 'id': 'nmdc:0fba5e13ecb1c51f9298e89d1bfca222',\n", - " 'url': 'https://data.microbiomedata.org/data/nmdc:mga0kt39/annotation/nmdc_mga0kt39_prodigal.gff'},\n", - " {'description': 'Prodigal GFF file for gold:Gp0208581',\n", - " 'file_size_bytes': 2728019630,\n", - " 'id': 'nmdc:274b9d4e3f71858c94e129a93f5a1232',\n", - " 'url': 'https://data.microbiomedata.org/data/nmdc:mga0qfj577/annotation/nmdc_mga0qfj577_prodigal.gff'}]\n", + "[]\n", "\n", "Activities filter and sort:\n", "\n", - "{'count': 2054,\n", - " 'db_response_time_ms': 2591,\n", + "{'count': 7046,\n", + " 'db_response_time_ms': 3989,\n", " 'mongo_filter_dict': {'execution_resource': {'$regex': 'NERSC'},\n", " 'started_at_time': {'$gt': '2022-01-01'}},\n", " 'page': 1,\n", " 'per_page': 25}\n", - "[{'ended_at_time': '2022-05-31T12:31:18-07:00',\n", - " 'execution_resource': 'NERSC-Cori',\n", - " 'id': 'nmdc:683c4a7adaae08cf5456f7b80bb6f4d3',\n", - " 'started_at_time': '2022-05-31T12:31:18-07:00',\n", - " 'type': 'nmdc:MetatranscriptomeAssembly'},\n", - " {'ended_at_time': '2022-05-31T12:29:49-07:00',\n", - " 'execution_resource': 'NERSC-Cori',\n", - " 'id': 'nmdc:6305a511f040e8bef679b8a2e439329e',\n", - " 'started_at_time': '2022-05-31T12:29:49-07:00',\n", - " 'type': 'nmdc:MetatranscriptomeAssembly'},\n", - " {'ended_at_time': '2022-05-31T12:29:02-07:00',\n", - " 'execution_resource': 'NERSC-Cori',\n", - " 'id': 'nmdc:17b505f7781a3f0e932e8f39f4190068',\n", - " 'started_at_time': '2022-05-31T12:29:02-07:00',\n", - " 'type': 'nmdc:MetatranscriptomeAssembly'},\n", - " {'ended_at_time': '2022-05-31T12:28:04-07:00',\n", - " 'execution_resource': 'NERSC-Cori',\n", - " 'id': 'nmdc:b502fd974951d11591564592ecff731c',\n", - " 'started_at_time': '2022-05-31T12:28:04-07:00',\n", - " 'type': 'nmdc:MetatranscriptomeAssembly'},\n", - " {'ended_at_time': '2022-05-31T12:26:24-07:00',\n", - " 'execution_resource': 'NERSC-Cori',\n", - " 'id': 'nmdc:839560f9650622f232c262d8cf7a9db9',\n", - " 'started_at_time': '2022-05-31T12:26:24-07:00',\n", - " 'type': 'nmdc:MetatranscriptomeAssembly'}]\n" + "[{'ended_at_time': '2024-07-15T10:51:55.589472+00:00',\n", + " 'execution_resource': 'NERSC-Perlmutter',\n", + " 'id': 'nmdc:wfrbt-11-ay3tc236.1',\n", + " 'started_at_time': '2024-07-15T03:31:07.781447+00:00',\n", + " 'type': 'nmdc:ReadBasedTaxonomyAnalysisActivity'},\n", + " {'ended_at_time': '2024-07-14T09:42:44.870934+00:00',\n", + " 'execution_resource': 'NERSC-Perlmutter',\n", + " 'id': 'nmdc:wfrbt-11-z7786992.1',\n", + " 'started_at_time': '2024-07-13T21:07:41.419236+00:00',\n", + " 'type': 'nmdc:ReadBasedTaxonomyAnalysisActivity'},\n", + " {'ended_at_time': '2024-07-14T09:30:12.258176+00:00',\n", + " 'execution_resource': 'NERSC-Perlmutter',\n", + " 'id': 'nmdc:wfrbt-11-db7m7x43.1',\n", + " 'started_at_time': '2024-07-13T21:07:35.352313+00:00',\n", + " 'type': 'nmdc:ReadBasedTaxonomyAnalysisActivity'},\n", + " {'ended_at_time': '2024-07-14T09:03:40.446429+00:00',\n", + " 'execution_resource': 'NERSC-Perlmutter',\n", + " 'id': 'nmdc:wfrbt-11-zhawc650.1',\n", + " 'started_at_time': '2024-07-13T21:05:44.023046+00:00',\n", + " 'type': 'nmdc:ReadBasedTaxonomyAnalysisActivity'},\n", + " {'ended_at_time': '2024-07-14T09:02:57.875885+00:00',\n", + " 'execution_resource': 'NERSC-Perlmutter',\n", + " 'id': 'nmdc:wfrbt-11-he896a62.1',\n", + " 'started_at_time': '2024-07-13T21:06:02.938818+00:00',\n", + " 'type': 'nmdc:ReadBasedTaxonomyAnalysisActivity'}]\n" ] } ], @@ -393,7 +399,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 10, "id": "c66df5a7-8302-4453-8d4c-68fe8d85fe56", "metadata": {}, "outputs": [ @@ -409,10 +415,10 @@ { "data": { "text/plain": [ - "b'{\"GOLD_sample_identifiers\": [\"gold:Gb0110680\"], \"INSDC_biosample_identifiers\": [\"BIOSAMPLE:SAMN08902828\"], \"add_date\": \"2015-02-26\", \"collection_date\": {\"has_raw_value\": \"2014-09-03\"}, \"depth\": {\"has_maximum_numeric_value\": 0.2, \"has_minimum_numeric_value\": 0.1, \"has_numeric_value\": 0.1, \"has_raw_value\": \"0.1 to 0.2 meters\", \"has_unit\": \"metre\"}, \"depth2\": {\"has_numeric_value\": 0.2, \"has_raw_value\": \"0.2 meters\", \"has_unit\": \"metre\"}, \"description\": \"Grasslands soil microbial communities from the Angelo Coastal Reserve, plot 2. There is a duplicate submission for this entry in NCBI. The NCBI identifiers for a duplicate are PRJNA449266 and SAMN08902828\", \"ecosystem\": \"Environmental\", \"ecosystem_category\": \"Terrestrial\", \"ecosystem_subtype\": \"Grasslands\", \"ecosystem_type\": \"Soil\", \"elev\": {\"has_numeric_value\": 432, \"has_raw_value\": \"432 meters\", \"has_unit\": \"metre\"}, \"env_broad_scale\": {\"has_raw_value\": \"grassland biome [ENVO:01000177]\", \"term\": {\"id\": \"ENVO:01000177\", \"name\": \"grassland biome\"}}, \"env_local_scale\": {\"has_raw_value\": \"biosphere reserve [ENVO:00000376]\", \"term\": {\"id\": \"ENVO:00000376\", \"name\": \"biosphere reserve\"}}, \"env_medium\": {\"has_raw_value\": \"grassland soil [ENVO:00005750]\", \"term\": {\"id\": \"ENVO:00005750\", \"name\": \"grassland soil\"}}, \"geo_loc_name\": {\"has_raw_value\": \"USA: California: Angelo Coastal Reserve\"}, \"habitat\": \"Grasslands soil\", \"id\": \"gold:Gb0110680\", \"identifier\": \"14_0903_02_20cm\", \"lat_lon\": {\"has_raw_value\": \"39.7392 -123.6308\", \"latitude\": 39.7392, \"longitude\": -123.6308}, \"location\": \"USA: California: Angelo Coastal Reserve\", \"mod_date\": \"2022-08-02\", \"name\": \"Grasslands soil microbial communities from the Angelo Coastal Reserve, California, USA - 14_0903_02_20cm\", \"ncbi_taxonomy_name\": \"soil metagenome\", \"part_of\": [\"gold:Gs0110119\"], \"sample_collection_site\": \"grassland soil\", \"specific_ecosystem\": \"Unclassified\"}\\n'" + "b'{\"add_date\": \"2015-02-26\", \"collection_date\": {\"has_raw_value\": \"2014-09-03\"}, \"depth\": {\"has_maximum_numeric_value\": 0.4, \"has_minimum_numeric_value\": 0.3, \"has_numeric_value\": 0.3, \"has_raw_value\": \"0.3 to 0.4 meters\", \"has_unit\": \"metre\"}, \"description\": \"Grasslands soil microbial communities from the Angelo Coastal Reserve, plot 9. There is a duplicate submission for this entry in NCBI. The NCBI identifiers for a duplicate are PRJNA449266 and SAMN08902854\", \"ecosystem\": \"Environmental\", \"ecosystem_category\": \"Terrestrial\", \"ecosystem_subtype\": \"Grasslands\", \"ecosystem_type\": \"Soil\", \"elev\": 432, \"env_broad_scale\": {\"has_raw_value\": \"grassland biome [ENVO:01000177]\", \"term\": {\"id\": \"ENVO:01000177\"}}, \"env_local_scale\": {\"has_raw_value\": \"biosphere reserve [ENVO:00000376]\", \"term\": {\"id\": \"ENVO:00000376\"}}, \"env_medium\": {\"has_raw_value\": \"grassland soil [ENVO:00005750]\", \"term\": {\"id\": \"ENVO:00005750\"}}, \"geo_loc_name\": {\"has_raw_value\": \"USA: California: Angelo Coastal Reserve\"}, \"habitat\": \"Grasslands soil\", \"id\": \"nmdc:bsm-11-04qjyv47\", \"lat_lon\": {\"has_raw_value\": \"39.7392 -123.6308\", \"latitude\": 39.7392, \"longitude\": -123.6308}, \"location\": \"USA: California: Angelo Coastal Reserve\", \"mod_date\": \"2022-08-02\", \"name\": \"Grasslands soil microbial communities from the Angelo Coastal Reserve, California, USA - 14_0903_09_40cm\", \"ncbi_taxonomy_name\": \"soil metagenome\", \"part_of\": [\"nmdc:sty-11-zs2syx06\"], \"sample_collection_site\": \"grassland soil\", \"specific_ecosystem\": \"Unclassified\", \"insdc_biosample_identifiers\": [\"biosample:SAMN08902854\"], \"samp_name\": \"14_0903_09_40cm\", \"gold_biosample_identifiers\": [\"gold:Gb0110688\"], \"type\": \"nmdc:Biosample\"}\\n'" ] }, - "execution_count": 6, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -426,14 +432,14 @@ "all_results = []\n", "while cursor is not None:\n", " json_response = get_json(\n", - " f\"/biosamples?filter=part_of:gold:Gs0110119&cursor={cursor}\"\n", + " f\"/biosamples?filter=part_of:nmdc:sty-11-zs2syx06&cursor={cursor}\"\n", " )\n", " m, rs = meta(json_response), results(json_response)\n", " cursor = m['next_cursor']\n", " print(\"fetched\", len(rs), f\"results out of {m['count']} total\")\n", " all_results.extend(rs)\n", "\n", - "path = \"~/biosamples_part_of_gold:Gs0110119.jsonl\"\n", + "path = \"~/biosamples_part_of_nmdc:sty-11-zs2syx06.jsonl\"\n", "\n", "write_jsonlines_file(\n", " Path(path).expanduser(),\n", @@ -669,9 +675,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python (nmdc-runtime)", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "nmdc-runtime" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -683,7 +689,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.12" + "version": "3.10.14" } }, "nbformat": 4, From 1cbf4b3e06cfba5513c2f3511120ba7ba53fe07f Mon Sep 17 00:00:00 2001 From: Jing Date: Wed, 17 Jul 2024 14:30:33 -0400 Subject: [PATCH 2/4] update remaining ids --- docs/nb/get_data.ipynb | 41 +++++++++++------------------------------ 1 file changed, 11 insertions(+), 30 deletions(-) diff --git a/docs/nb/get_data.ipynb b/docs/nb/get_data.ipynb index 0141286e..732ae586 100644 --- a/docs/nb/get_data.ipynb +++ b/docs/nb/get_data.ipynb @@ -462,42 +462,23 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 11, "id": "5f201881-9fe6-444e-97e7-5d34a541e4a1", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "c81d0322f3a744b8b545836a07c31238", + "model_id": "d6ac56a3dab8439fbde6f3f7454d91d0", "version_major": 2, "version_minor": 0 }, "text/plain": [ - " 0%| | 0/22 [00:00 omics processing activity emsl:705701 > nmdc:NomAnalysisActivity activity > data object nmdc:2a779b0132303d5999c6f7c99915fd34 from https://nmdcdemo.emsl.pnnl.gov/nom/results/Brodie_134A_CHCl3_15Oct18_IAT_p1_1_01_35922.csv...\n", - "downloading biosample igsn:IEWFS000A > omics processing activity emsl:705702 > nmdc:NomAnalysisActivity activity > data object nmdc:9f0d52cc46d247b8d2ba12d5842b9fb6 from https://nmdcdemo.emsl.pnnl.gov/nom/results/Brodie_134A_H2O_15Oct18_IAT_p1_1_01_35893.csv...\n", - "downloading biosample igsn:IEWFS000A > omics processing activity emsl:705703 > nmdc:NomAnalysisActivity activity > data object nmdc:e3449444d03be27addeaca224ce9a3a3 from https://nmdcdemo.emsl.pnnl.gov/nom/results/Brodie_134A_MeOH_15Oct18_IAT_p1_1_01_35910.csv...\n", - "downloading biosample igsn:IEWFS000A > omics processing activity emsl:705704 > nmdc:NomAnalysisActivity activity > data object nmdc:ed4d444af3672b33bf43a1d5b6dd1ca9 from https://nmdcdemo.emsl.pnnl.gov/nom/results/Brodie_134B_CHCl3_15Oct18_IAT_p1_1_01_35927.csv...\n", - "downloading biosample igsn:IEWFS000A > omics processing activity emsl:705705 > nmdc:NomAnalysisActivity activity > data object nmdc:4296e45e09241a4ac76202d4f1f40458 from https://nmdcdemo.emsl.pnnl.gov/nom/results/Brodie_134B_H2O_15Oct18_IAT_p1_1_01_35898.csv...\n", - "downloading biosample igsn:IEWFS000A > omics processing activity emsl:705706 > nmdc:NomAnalysisActivity activity > data object nmdc:031d764e78bd55a9247ee11fcf407587 from https://nmdcdemo.emsl.pnnl.gov/nom/results/Brodie_134B_H2O_SPE_15Oct18_IAT_p05_1_01_35903.csv...\n", - "downloading biosample igsn:IEWFS000A > omics processing activity emsl:705707 > nmdc:NomAnalysisActivity activity > data object nmdc:f2f58fc563ac5836762826b0e6db6f48 from https://nmdcdemo.emsl.pnnl.gov/nom/results/Brodie_134B_MeOH_15Oct18_IAT_p1_1_01_35915.csv...\n", - "downloading biosample igsn:IEWFS000A > omics processing activity emsl:713600 > nmdc:NomAnalysisActivity activity > data object nmdc:d7bda0fca4304e2699eed4be527c81de from https://nmdcdemo.emsl.pnnl.gov/nom/results/Brodie_134_r3_H2O_30Nov18_IATp1_1_01_37902.csv...\n", - "downloading biosample igsn:IEWFS000A > omics processing activity emsl:715134 > nmdc:NomAnalysisActivity activity > data object nmdc:f905c863ed0369cc3f83a5cbc46561d4 from https://nmdcdemo.emsl.pnnl.gov/nom/results/Brodie_134_H2O_3_IATp1_07Dec18_1_01_38232.csv...\n", - "downloading biosample igsn:IEWFS000A > omics processing activity emsl:717358 > nmdc:NomAnalysisActivity activity > data object nmdc:2d795aad78f1ddba9e3f7add02fc259f from https://nmdcdemo.emsl.pnnl.gov/nom/results/Brodie_134_H2O_1_12Dec18_IATp1_1_01_38495.csv...\n", - "downloading biosample igsn:IEWFS000A > omics processing activity emsl:717359 > nmdc:NomAnalysisActivity activity > data object nmdc:ce403bfa29fdce24d6047570f0336c48 from https://nmdcdemo.emsl.pnnl.gov/nom/results/Brodie_134_H2O_2_12Dec18_IATp1_1_01_38496.csv...\n", - "downloading biosample igsn:IEWFS000A > omics processing activity emsl:717360 > nmdc:NomAnalysisActivity activity > data object nmdc:119d7c652e0c29e32c5066cc987b17ff from https://nmdcdemo.emsl.pnnl.gov/nom/results/Brodie_134_H2O_3_12Dec18_IATp1_1_01_38497.csv...\n", - "downloading biosample igsn:IEWFS000A > omics processing activity emsl:717365 > nmdc:NomAnalysisActivity activity > data object nmdc:4b649d353b2c2385ab042682ba516d14 from https://nmdcdemo.emsl.pnnl.gov/nom/results/Brodie_134_H2O_1_13Dec18_IATp15_1_01_38565.csv...\n" - ] } ], "source": [ @@ -509,7 +490,7 @@ "\n", " return local_filename\n", "\n", - "id_biosample = \"igsn:IEWFS000A\"\n", + "id_biosample = \"igsn:IEWFS000A\" #UPDATE\n", "rs_ompro = results(get_json(f\"/activities?filter=type:nmdc:OmicsProcessing,has_input:{id_biosample}\"))\n", "for id_ompro in tqdm([d[\"id\"] for d in rs_ompro]):\n", " rs_act = results(get_json(f\"/activities?filter=was_informed_by:{id_ompro}\"))\n", @@ -539,7 +520,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 12, "id": "4796adea-af8f-431c-88a0-0631234c991d", "metadata": {}, "outputs": [ @@ -550,23 +531,23 @@ "fetched 25 results out of 60 total\n", "fetched 25 results out of 60 total\n", "fetched 10 results out of 60 total\n", - "[{'id': 'gold:Gb0110680',\n", + "[{'id': 'nmdc:bsm-11-04qjyv47',\n", " 'lat_lon': {'has_raw_value': '39.7392 -123.6308',\n", " 'latitude': 39.7392,\n", " 'longitude': -123.6308}},\n", - " {'id': 'gold:Gb0110681',\n", + " {'id': 'nmdc:bsm-11-05082t91',\n", " 'lat_lon': {'has_raw_value': '39.7392 -123.6308',\n", " 'latitude': 39.7392,\n", " 'longitude': -123.6308}},\n", - " {'id': 'gold:Gb0110682',\n", + " {'id': 'nmdc:bsm-11-2fjtje68',\n", " 'lat_lon': {'has_raw_value': '39.7392 -123.6308',\n", " 'latitude': 39.7392,\n", " 'longitude': -123.6308}},\n", - " {'id': 'gold:Gb0110683',\n", + " {'id': 'nmdc:bsm-11-3zrd9503',\n", " 'lat_lon': {'has_raw_value': '39.7392 -123.6308',\n", " 'latitude': 39.7392,\n", " 'longitude': -123.6308}},\n", - " {'id': 'gold:Gb0110684',\n", + " {'id': 'nmdc:bsm-11-4x1n6x51',\n", " 'lat_lon': {'has_raw_value': '39.7392 -123.6308',\n", " 'latitude': 39.7392,\n", " 'longitude': -123.6308}}]\n" @@ -578,7 +559,7 @@ "all_results = []\n", "while cursor is not None:\n", " json_response = get_json(\n", - " f\"/biosamples?filter=part_of:gold:Gs0110119&cursor={cursor}\"\n", + " f\"/biosamples?filter=part_of:nmdc:sty-11-zs2syx06&cursor={cursor}\"\n", " )\n", " m, rs = meta(json_response), results(json_response)\n", " cursor = m['next_cursor']\n", From ecd08741a6b831d3cf49dff855ee6c1461fae9e7 Mon Sep 17 00:00:00 2001 From: Jing Date: Fri, 19 Jul 2024 10:54:26 -0400 Subject: [PATCH 3/4] fix id in last cell --- docs/nb/get_data.ipynb | 183 +++++++++++------------------------------ 1 file changed, 46 insertions(+), 137 deletions(-) diff --git a/docs/nb/get_data.ipynb b/docs/nb/get_data.ipynb index 732ae586..1f922ddd 100644 --- a/docs/nb/get_data.ipynb +++ b/docs/nb/get_data.ipynb @@ -88,7 +88,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "id": "c0691d07-51f7-46e3-a57a-b98c72e636b1", "metadata": {}, "outputs": [ @@ -106,9 +106,7 @@ " 'term': {'id': 'ENVO:01000017'}},\n", " 'type': 'nmdc:Biosample',\n", " 'collection_date': {'has_raw_value': '2014-11-25'},\n", - " 'depth': {'has_raw_value': '0.5',\n", - " 'has_numeric_value': 0.5,\n", - " 'has_unit': 'meter'},\n", + " 'depth': {'has_raw_value': '0.5', 'has_numeric_value': 0.5, 'has_unit': 'm'},\n", " 'geo_loc_name': {'has_raw_value': 'USA: Columbia River, Washington'},\n", " 'lat_lon': {'has_raw_value': '46.37228379 -119.2717467',\n", " 'latitude': 46.37228379,\n", @@ -132,7 +130,7 @@ " 'gold_biosample_identifiers': ['gold:Gb0115231']}" ] }, - "execution_count": 3, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -151,7 +149,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "id": "99fae5ca-af59-4156-ad5a-7946376780c5", "metadata": {}, "outputs": [ @@ -169,9 +167,7 @@ " 'term': {'id': 'ENVO:01000017'}},\n", " 'type': 'nmdc:Biosample',\n", " 'collection_date': {'has_raw_value': '2014-08-12'},\n", - " 'depth': {'has_raw_value': '0.5',\n", - " 'has_numeric_value': 0.5,\n", - " 'has_unit': 'meter'},\n", + " 'depth': {'has_raw_value': '0.5', 'has_numeric_value': 0.5, 'has_unit': 'm'},\n", " 'geo_loc_name': {'has_raw_value': 'USA: Columbia River, Washington'},\n", " 'lat_lon': {'has_raw_value': '46.37228379 -119.2717467',\n", " 'latitude': 46.37228379,\n", @@ -195,7 +191,7 @@ " 'gold_biosample_identifiers': ['gold:Gb0115225']}" ] }, - "execution_count": 4, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -214,7 +210,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "id": "756a2985-90e1-44df-8be7-84a67f737e87", "metadata": {}, "outputs": [ @@ -224,7 +220,7 @@ "19" ] }, - "execution_count": 5, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -252,7 +248,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "id": "ffec39f5-f35b-4f27-bdc9-97090929412f", "metadata": {}, "outputs": [ @@ -288,7 +284,7 @@ "Data Objects filter and sort:\n", "\n", "{'count': 0,\n", - " 'db_response_time_ms': 233,\n", + " 'db_response_time_ms': 243,\n", " 'mongo_filter_dict': {'description': {'$regex': 'GFF'}},\n", " 'mongo_sort_list': [['file_size_bytes', -1]],\n", " 'page': 1,\n", @@ -296,38 +292,33 @@ "[]\n", "\n", "Activities filter and sort:\n", - "\n", - "{'count': 7046,\n", - " 'db_response_time_ms': 3989,\n", - " 'mongo_filter_dict': {'execution_resource': {'$regex': 'NERSC'},\n", - " 'started_at_time': {'$gt': '2022-01-01'}},\n", - " 'page': 1,\n", - " 'per_page': 25}\n", - "[{'ended_at_time': '2024-07-15T10:51:55.589472+00:00',\n", - " 'execution_resource': 'NERSC-Perlmutter',\n", - " 'id': 'nmdc:wfrbt-11-ay3tc236.1',\n", - " 'started_at_time': '2024-07-15T03:31:07.781447+00:00',\n", - " 'type': 'nmdc:ReadBasedTaxonomyAnalysisActivity'},\n", - " {'ended_at_time': '2024-07-14T09:42:44.870934+00:00',\n", - " 'execution_resource': 'NERSC-Perlmutter',\n", - " 'id': 'nmdc:wfrbt-11-z7786992.1',\n", - " 'started_at_time': '2024-07-13T21:07:41.419236+00:00',\n", - " 'type': 'nmdc:ReadBasedTaxonomyAnalysisActivity'},\n", - " {'ended_at_time': '2024-07-14T09:30:12.258176+00:00',\n", - " 'execution_resource': 'NERSC-Perlmutter',\n", - " 'id': 'nmdc:wfrbt-11-db7m7x43.1',\n", - " 'started_at_time': '2024-07-13T21:07:35.352313+00:00',\n", - " 'type': 'nmdc:ReadBasedTaxonomyAnalysisActivity'},\n", - " {'ended_at_time': '2024-07-14T09:03:40.446429+00:00',\n", - " 'execution_resource': 'NERSC-Perlmutter',\n", - " 'id': 'nmdc:wfrbt-11-zhawc650.1',\n", - " 'started_at_time': '2024-07-13T21:05:44.023046+00:00',\n", - " 'type': 'nmdc:ReadBasedTaxonomyAnalysisActivity'},\n", - " {'ended_at_time': '2024-07-14T09:02:57.875885+00:00',\n", - " 'execution_resource': 'NERSC-Perlmutter',\n", - " 'id': 'nmdc:wfrbt-11-he896a62.1',\n", - " 'started_at_time': '2024-07-13T21:06:02.938818+00:00',\n", - " 'type': 'nmdc:ReadBasedTaxonomyAnalysisActivity'}]\n" + "\n" + ] + }, + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[5], line 27\u001b[0m\n\u001b[1;32m 20\u001b[0m pprint([pick(\n\u001b[1;32m 21\u001b[0m [\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdescription\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfile_size_bytes\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mid\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124murl\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 22\u001b[0m , r\n\u001b[1;32m 23\u001b[0m ) \u001b[38;5;28;01mfor\u001b[39;00m r \u001b[38;5;129;01min\u001b[39;00m results(json_response)][:\u001b[38;5;241m5\u001b[39m])\n\u001b[1;32m 25\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mActivities filter and sort:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m---> 27\u001b[0m json_response \u001b[38;5;241m=\u001b[39m \u001b[43mget_json\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 28\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/activities?\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 29\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mfilter=started_at_time:>2022-01-01\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 30\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m,\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 31\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mexecution_resource.search:NERSC\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 32\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m&\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 33\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msort=ended_at_time:desc\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 34\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m 35\u001b[0m pprint(meta(json_response))\n\u001b[1;32m 36\u001b[0m pprint([\n\u001b[1;32m 37\u001b[0m pick([\n\u001b[1;32m 38\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mid\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 44\u001b[0m ) \u001b[38;5;28;01mfor\u001b[39;00m r \u001b[38;5;129;01min\u001b[39;00m results(json_response)][:\u001b[38;5;241m5\u001b[39m]\n\u001b[1;32m 45\u001b[0m )\n", + "Cell \u001b[0;32mIn[1], line 17\u001b[0m, in \u001b[0;36mget_json\u001b[0;34m(path, host, **kwargs)\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget_json\u001b[39m(path, host\u001b[38;5;241m=\u001b[39mHOST, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m---> 17\u001b[0m r \u001b[38;5;241m=\u001b[39m \u001b[43mrequests\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhost\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 18\u001b[0m r\u001b[38;5;241m.\u001b[39mraise_for_status()\n\u001b[1;32m 19\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m r\u001b[38;5;241m.\u001b[39mjson()\n", + "File \u001b[0;32m~/nmdc/nmdc-runtime/venv/lib/python3.10/site-packages/requests/api.py:73\u001b[0m, in \u001b[0;36mget\u001b[0;34m(url, params, **kwargs)\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget\u001b[39m(url, params\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 63\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124mr\u001b[39m\u001b[38;5;124;03m\"\"\"Sends a GET request.\u001b[39;00m\n\u001b[1;32m 64\u001b[0m \n\u001b[1;32m 65\u001b[0m \u001b[38;5;124;03m :param url: URL for the new :class:`Request` object.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 70\u001b[0m \u001b[38;5;124;03m :rtype: requests.Response\u001b[39;00m\n\u001b[1;32m 71\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m---> 73\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mget\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/nmdc/nmdc-runtime/venv/lib/python3.10/site-packages/requests/api.py:59\u001b[0m, in \u001b[0;36mrequest\u001b[0;34m(method, url, **kwargs)\u001b[0m\n\u001b[1;32m 55\u001b[0m \u001b[38;5;66;03m# By using the 'with' statement we are sure the session is closed, thus we\u001b[39;00m\n\u001b[1;32m 56\u001b[0m \u001b[38;5;66;03m# avoid leaving sockets open which can trigger a ResourceWarning in some\u001b[39;00m\n\u001b[1;32m 57\u001b[0m \u001b[38;5;66;03m# cases, and look like a memory leak in others.\u001b[39;00m\n\u001b[1;32m 58\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m sessions\u001b[38;5;241m.\u001b[39mSession() \u001b[38;5;28;01mas\u001b[39;00m session:\n\u001b[0;32m---> 59\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43msession\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/nmdc/nmdc-runtime/venv/lib/python3.10/site-packages/requests/sessions.py:589\u001b[0m, in \u001b[0;36mSession.request\u001b[0;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[0m\n\u001b[1;32m 584\u001b[0m send_kwargs \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 585\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtimeout\u001b[39m\u001b[38;5;124m\"\u001b[39m: timeout,\n\u001b[1;32m 586\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mallow_redirects\u001b[39m\u001b[38;5;124m\"\u001b[39m: allow_redirects,\n\u001b[1;32m 587\u001b[0m }\n\u001b[1;32m 588\u001b[0m send_kwargs\u001b[38;5;241m.\u001b[39mupdate(settings)\n\u001b[0;32m--> 589\u001b[0m resp \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprep\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43msend_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 591\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m resp\n", + "File \u001b[0;32m~/nmdc/nmdc-runtime/venv/lib/python3.10/site-packages/requests/sessions.py:703\u001b[0m, in \u001b[0;36mSession.send\u001b[0;34m(self, request, **kwargs)\u001b[0m\n\u001b[1;32m 700\u001b[0m start \u001b[38;5;241m=\u001b[39m preferred_clock()\n\u001b[1;32m 702\u001b[0m \u001b[38;5;66;03m# Send the request\u001b[39;00m\n\u001b[0;32m--> 703\u001b[0m r \u001b[38;5;241m=\u001b[39m \u001b[43madapter\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 705\u001b[0m \u001b[38;5;66;03m# Total elapsed time of the request (approximately)\u001b[39;00m\n\u001b[1;32m 706\u001b[0m elapsed \u001b[38;5;241m=\u001b[39m preferred_clock() \u001b[38;5;241m-\u001b[39m start\n", + "File \u001b[0;32m~/nmdc/nmdc-runtime/venv/lib/python3.10/site-packages/requests/adapters.py:667\u001b[0m, in \u001b[0;36mHTTPAdapter.send\u001b[0;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[1;32m 664\u001b[0m timeout \u001b[38;5;241m=\u001b[39m TimeoutSauce(connect\u001b[38;5;241m=\u001b[39mtimeout, read\u001b[38;5;241m=\u001b[39mtimeout)\n\u001b[1;32m 666\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 667\u001b[0m resp \u001b[38;5;241m=\u001b[39m \u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43murlopen\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 668\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 669\u001b[0m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 670\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 671\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 672\u001b[0m \u001b[43m \u001b[49m\u001b[43mredirect\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 673\u001b[0m \u001b[43m \u001b[49m\u001b[43massert_same_host\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 674\u001b[0m \u001b[43m \u001b[49m\u001b[43mpreload_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 675\u001b[0m \u001b[43m \u001b[49m\u001b[43mdecode_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 676\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmax_retries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 677\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 678\u001b[0m \u001b[43m \u001b[49m\u001b[43mchunked\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunked\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 679\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 681\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (ProtocolError, \u001b[38;5;167;01mOSError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[1;32m 682\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mConnectionError\u001b[39;00m(err, request\u001b[38;5;241m=\u001b[39mrequest)\n", + "File \u001b[0;32m~/nmdc/nmdc-runtime/venv/lib/python3.10/site-packages/urllib3/connectionpool.py:789\u001b[0m, in \u001b[0;36mHTTPConnectionPool.urlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)\u001b[0m\n\u001b[1;32m 786\u001b[0m response_conn \u001b[38;5;241m=\u001b[39m conn \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m release_conn \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 788\u001b[0m \u001b[38;5;66;03m# Make the request on the HTTPConnection object\u001b[39;00m\n\u001b[0;32m--> 789\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 790\u001b[0m \u001b[43m \u001b[49m\u001b[43mconn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 791\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 792\u001b[0m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 793\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout_obj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 794\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 795\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 796\u001b[0m \u001b[43m \u001b[49m\u001b[43mchunked\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunked\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 797\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 798\u001b[0m \u001b[43m \u001b[49m\u001b[43mresponse_conn\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresponse_conn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 799\u001b[0m \u001b[43m \u001b[49m\u001b[43mpreload_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpreload_content\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 800\u001b[0m \u001b[43m \u001b[49m\u001b[43mdecode_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdecode_content\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 801\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mresponse_kw\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 802\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 804\u001b[0m \u001b[38;5;66;03m# Everything went great!\u001b[39;00m\n\u001b[1;32m 805\u001b[0m clean_exit \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n", + "File \u001b[0;32m~/nmdc/nmdc-runtime/venv/lib/python3.10/site-packages/urllib3/connectionpool.py:536\u001b[0m, in \u001b[0;36mHTTPConnectionPool._make_request\u001b[0;34m(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length)\u001b[0m\n\u001b[1;32m 534\u001b[0m \u001b[38;5;66;03m# Receive the response from the server\u001b[39;00m\n\u001b[1;32m 535\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 536\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgetresponse\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 537\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (BaseSSLError, \u001b[38;5;167;01mOSError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 538\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_raise_timeout(err\u001b[38;5;241m=\u001b[39me, url\u001b[38;5;241m=\u001b[39murl, timeout_value\u001b[38;5;241m=\u001b[39mread_timeout)\n", + "File \u001b[0;32m~/nmdc/nmdc-runtime/venv/lib/python3.10/site-packages/urllib3/connection.py:464\u001b[0m, in \u001b[0;36mHTTPConnection.getresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 461\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mresponse\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m HTTPResponse\n\u001b[1;32m 463\u001b[0m \u001b[38;5;66;03m# Get the response from http.client.HTTPConnection\u001b[39;00m\n\u001b[0;32m--> 464\u001b[0m httplib_response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgetresponse\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 466\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 467\u001b[0m assert_header_parsing(httplib_response\u001b[38;5;241m.\u001b[39mmsg)\n", + "File \u001b[0;32m/opt/homebrew/Cellar/python@3.10/3.10.14/Frameworks/Python.framework/Versions/3.10/lib/python3.10/http/client.py:1375\u001b[0m, in \u001b[0;36mHTTPConnection.getresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1373\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1374\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1375\u001b[0m \u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbegin\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1376\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mConnectionError\u001b[39;00m:\n\u001b[1;32m 1377\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mclose()\n", + "File \u001b[0;32m/opt/homebrew/Cellar/python@3.10/3.10.14/Frameworks/Python.framework/Versions/3.10/lib/python3.10/http/client.py:318\u001b[0m, in \u001b[0;36mHTTPResponse.begin\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 316\u001b[0m \u001b[38;5;66;03m# read until we get a non-100 response\u001b[39;00m\n\u001b[1;32m 317\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m--> 318\u001b[0m version, status, reason \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_read_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 319\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m status \u001b[38;5;241m!=\u001b[39m CONTINUE:\n\u001b[1;32m 320\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n", + "File \u001b[0;32m/opt/homebrew/Cellar/python@3.10/3.10.14/Frameworks/Python.framework/Versions/3.10/lib/python3.10/http/client.py:279\u001b[0m, in \u001b[0;36mHTTPResponse._read_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 278\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_read_status\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m--> 279\u001b[0m line \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mstr\u001b[39m(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreadline\u001b[49m\u001b[43m(\u001b[49m\u001b[43m_MAXLINE\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124miso-8859-1\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 280\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(line) \u001b[38;5;241m>\u001b[39m _MAXLINE:\n\u001b[1;32m 281\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m LineTooLong(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstatus line\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "File \u001b[0;32m/opt/homebrew/Cellar/python@3.10/3.10.14/Frameworks/Python.framework/Versions/3.10/lib/python3.10/socket.py:705\u001b[0m, in \u001b[0;36mSocketIO.readinto\u001b[0;34m(self, b)\u001b[0m\n\u001b[1;32m 703\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[1;32m 704\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 705\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sock\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrecv_into\u001b[49m\u001b[43m(\u001b[49m\u001b[43mb\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 706\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m timeout:\n\u001b[1;32m 707\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_timeout_occurred \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n", + "File \u001b[0;32m/opt/homebrew/Cellar/python@3.10/3.10.14/Frameworks/Python.framework/Versions/3.10/lib/python3.10/ssl.py:1307\u001b[0m, in \u001b[0;36mSSLSocket.recv_into\u001b[0;34m(self, buffer, nbytes, flags)\u001b[0m\n\u001b[1;32m 1303\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m flags \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 1304\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 1305\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnon-zero flags not allowed in calls to recv_into() on \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m\n\u001b[1;32m 1306\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m)\n\u001b[0;32m-> 1307\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnbytes\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbuffer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1308\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1309\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39mrecv_into(buffer, nbytes, flags)\n", + "File \u001b[0;32m/opt/homebrew/Cellar/python@3.10/3.10.14/Frameworks/Python.framework/Versions/3.10/lib/python3.10/ssl.py:1163\u001b[0m, in \u001b[0;36mSSLSocket.read\u001b[0;34m(self, len, buffer)\u001b[0m\n\u001b[1;32m 1161\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1162\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m buffer \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1163\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sslobj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbuffer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1164\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1165\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_sslobj\u001b[38;5;241m.\u001b[39mread(\u001b[38;5;28mlen\u001b[39m)\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] } ], @@ -399,30 +390,10 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "c66df5a7-8302-4453-8d4c-68fe8d85fe56", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "fetched 25 results out of 60 total\n", - "fetched 25 results out of 60 total\n", - "fetched 10 results out of 60 total\n" - ] - }, - { - "data": { - "text/plain": [ - "b'{\"add_date\": \"2015-02-26\", \"collection_date\": {\"has_raw_value\": \"2014-09-03\"}, \"depth\": {\"has_maximum_numeric_value\": 0.4, \"has_minimum_numeric_value\": 0.3, \"has_numeric_value\": 0.3, \"has_raw_value\": \"0.3 to 0.4 meters\", \"has_unit\": \"metre\"}, \"description\": \"Grasslands soil microbial communities from the Angelo Coastal Reserve, plot 9. There is a duplicate submission for this entry in NCBI. The NCBI identifiers for a duplicate are PRJNA449266 and SAMN08902854\", \"ecosystem\": \"Environmental\", \"ecosystem_category\": \"Terrestrial\", \"ecosystem_subtype\": \"Grasslands\", \"ecosystem_type\": \"Soil\", \"elev\": 432, \"env_broad_scale\": {\"has_raw_value\": \"grassland biome [ENVO:01000177]\", \"term\": {\"id\": \"ENVO:01000177\"}}, \"env_local_scale\": {\"has_raw_value\": \"biosphere reserve [ENVO:00000376]\", \"term\": {\"id\": \"ENVO:00000376\"}}, \"env_medium\": {\"has_raw_value\": \"grassland soil [ENVO:00005750]\", \"term\": {\"id\": \"ENVO:00005750\"}}, \"geo_loc_name\": {\"has_raw_value\": \"USA: California: Angelo Coastal Reserve\"}, \"habitat\": \"Grasslands soil\", \"id\": \"nmdc:bsm-11-04qjyv47\", \"lat_lon\": {\"has_raw_value\": \"39.7392 -123.6308\", \"latitude\": 39.7392, \"longitude\": -123.6308}, \"location\": \"USA: California: Angelo Coastal Reserve\", \"mod_date\": \"2022-08-02\", \"name\": \"Grasslands soil microbial communities from the Angelo Coastal Reserve, California, USA - 14_0903_09_40cm\", \"ncbi_taxonomy_name\": \"soil metagenome\", \"part_of\": [\"nmdc:sty-11-zs2syx06\"], \"sample_collection_site\": \"grassland soil\", \"specific_ecosystem\": \"Unclassified\", \"insdc_biosample_identifiers\": [\"biosample:SAMN08902854\"], \"samp_name\": \"14_0903_09_40cm\", \"gold_biosample_identifiers\": [\"gold:Gb0110688\"], \"type\": \"nmdc:Biosample\"}\\n'" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "def write_jsonlines_file(path, all_results):\n", " with open(path, \"w\") as f:\n", @@ -462,25 +433,10 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "5f201881-9fe6-444e-97e7-5d34a541e4a1", "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "d6ac56a3dab8439fbde6f3f7454d91d0", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "0it [00:00, ?it/s]" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "def download_file(url, directory=\"~/\"):\n", " local_filename = url.split('/')[-1]\n", @@ -490,7 +446,7 @@ "\n", " return local_filename\n", "\n", - "id_biosample = \"igsn:IEWFS000A\" #UPDATE\n", + "id_biosample = \"nmdc:bsm-11-mbnqn650\"\n", "rs_ompro = results(get_json(f\"/activities?filter=type:nmdc:OmicsProcessing,has_input:{id_biosample}\"))\n", "for id_ompro in tqdm([d[\"id\"] for d in rs_ompro]):\n", " rs_act = results(get_json(f\"/activities?filter=was_informed_by:{id_ompro}\"))\n", @@ -579,7 +535,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 11, "id": "0e0168df-9256-498e-b652-8d7bd535d582", "metadata": {}, "outputs": [ @@ -587,47 +543,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "Index,m/z,Calibrated m/z,Calculated m/z,Peak Height,Resolving Power,S/N,Ion Charge,m/z Error (ppm),m/z Error Score,Isotopologue Similarity,Confidence Score,DBE,H/C,O/C,Heteroatom Class,Ion Type,Is Isotopologue,Mono Isotopic Index,Molecular Formula,C,H,O,S,N\n", - "3,888.4887543119316,888.4883146532127,888.4878482725782,3085144.7556615113,115985.22008880168,8.926135177919196,-1,-0.5249150401183273,0.21637236558431258,0.0,0.12982341935058755,21.0,1.290909090909091,0.12727272727272726,N1 S1 O7,de-protonated,0,,C55 H71 O7 S1 N1,55,71,7,1,1\n", - "4,885.3918562712611,885.3914165190189,885.3914239992979,3271768.2235989384,116390.47009957765,9.466085953040722,-1,0.008448555979629386,0.999603533623901,0.0,0.5997621201743406,16.0,1.3478260869565217,0.3695652173913043,O17,de-protonated,0,,C46 H62 O17,46,62,17,,\n", - "6,877.03548957106,877.0350496100808,877.035237742867,3413803.0920212218,93999.77607505002,9.877030182866147,-1,0.2145099513797522,0.7744236389320375,0.0,0.4646541833592225,32.0,0.5238095238095238,0.47619047619047616,S1 O20,de-protonated,0,,C42 H22 O20 S1,42,22,20,1,\n", - "7,871.5868128663766,871.5863728035725,871.5857118408668,2525810.108505442,118234.47009250808,7.307832937466206,-1,-0.758345044867053,0.04096943704460999,0.0,0.024581662226765992,23.0,1.2903225806451613,0.016129032258064516,S1 O1,de-protonated,0,,C62 H80 O1 S1,62,80,1,1,\n", - "8,869.7066517807076,869.7062116890468,869.7062208994556,2533183.851969944,118489.97010116033,7.329167116619603,-1,0.010590252923018975,0.999377119309806,0.0,0.5996262715858836,7.0,1.7818181818181817,0.09090909090909091,S1 O5,de-protonated,0,,C55 H98 O5 S1,55,98,5,1,\n", - "11,862.1881537094312,862.18771353458,862.18698735984,2500237.533314158,95618.37607241525,7.233844751794849,-1,-0.8422473901817746,0.019428704336239354,0.0,0.011657222601743612,19.0,1.0789473684210527,0.5263157894736842,N1 S1 O20,de-protonated,0,,C38 H41 O20 S1 N1,38,41,20,1,1\n", - "12,841.990360022908,841.9899198796957,841.9907299023058,2370457.287293944,97912.17607420433,6.858356367570994,-1,0.9620326939315207,0.005847710725539058,0.0,0.003508626435323435,35.0,0.325,0.525,N1 O21,de-protonated,0,,C40 H13 O21 N1,40,13,21,,1\n", - "13,838.2477426554628,838.2473025589466,838.2480119249798,2449834.394824783,122936.47008928134,7.088015216009881,-1,0.846248393230588,0.018713029385901064,0.0,0.011227817631540639,33.0,0.7884615384615384,0.15384615384615385,N1 S1 O8,de-protonated,0,,C52 H41 O8 S1 N1,52,41,8,1,1\n", - "14,830.2707204394443,830.2702804850616,830.2699291394654,2775378.1338916654,82745.31339509624,8.029898871842809,-1,-0.423170325523773,0.3697786516541105,0.0,0.2218671909924663,17.0,1.225,0.4,N1 S1 O16,de-protonated,0,,C40 H49 O16 S1 N1,40,49,16,1,1\n", - "15,820.3453445049055,820.3449048083498,820.3455960048715,2743261.455279875,100495.57607049527,7.936976873862471,-1,0.8425674801075833,0.019370581727126596,0.0,0.011622349036275957,5.0,1.7878787878787878,0.6666666666666666,N1 O22,de-protonated,0,,C33 H59 O22 N1,33,59,22,,1\n", - "17,799.3685325405778,799.3680936843464,799.367398784561,3118598.9451273754,103132.77607188243,9.022926946568637,-1,-0.8693121415769531,0.015020289920773781,0.0,0.009012173952464269,23.0,1.12,0.14,S1 O7,de-protonated,0,,C50 H56 O7 S1,50,56,7,1,\n", - "18,780.4733701715519,780.4729324155969,780.4725921312933,2463579.1177946297,105629.57606904797,7.127782314453726,-1,-0.43599776224277864,0.3478172613374125,0.0,0.2086903568024475,8.0,1.6904761904761905,0.23809523809523808,N1 S1 O10,de-protonated,0,,C42 H71 O10 S1 N1,42,71,10,1,1\n", - "22,653.5153865174419,653.5149645932129,653.5150488223496,2773339.3701903233,157687.97009893385,8.024000192255532,-1,0.12888630016707295,0.9118433198656568,0.0,0.547105991919394,8.0,1.6666666666666667,0.11904761904761904,O5,de-protonated,0,,C42 H70 O5,42,70,5,,\n", - "35,450.26572507976397,450.2653590871265,450.2649821433228,21202835.25855894,228867.72008897978,61.345378794865574,-1,-0.8371599361487543,0.010186573708662465,0.766236582456902,0.31260657720795826,11.0,1.3214285714285714,0.14285714285714285,N1 O4,de-protonated,0,,C28 H37 O4 N1,28,37,4,,1\n", - "37,283.2645638441444,283.2642719869493,283.2642538675084,4273299.506825067,291039.1760681066,12.36377936032282,-1,-0.06396656369622783,0.9775246355422157,0.0,0.5865147813253294,1.0,2.0,0.1111111111111111,O2,de-protonated,0,,C18 H36 O2,18,36,2,,\n", - "38,273.0618794463733,273.06159294231406,273.06159087568057,2432007.4278552043,377392.22020885994,7.036437112035749,-1,-0.007568378593162039,0.9996818264361175,0.0,0.5998090958616704,5.0,1.2727272727272727,0.7272727272727273,O8,de-protonated,0,,C11 H14 O8,11,14,8,,\n", - "39,255.23321441074,255.2329374887667,255.23295373855242,6820572.259571557,323002.9760412817,19.73370937230056,-1,0.06366648772467076,0.9777326510838139,0.0,0.5866395906502884,1.0,2.0,0.125,O2,de-protonated,0,,C16 H32 O2,16,32,2,,\n", - "0,893.9807192795649,893.9802798081946,,2925279.2994384016,92217.97607563925,8.463602368102553,-1,,,,,,,,unassigned,,,,,,,,,\n", - "1,891.2401722007025,891.239732632407,,3414246.805595051,92501.5760653175,9.87831396293287,-1,,,,,,,,unassigned,,,,,,,,,\n", - "2,889.1174652085989,889.1170255699338,,2955400.261042048,77268.8133940652,8.550750231900409,-1,,,,,,,,unassigned,,,,,,,,,\n", - "5,884.5134572608324,884.5130174836548,,5254152.640152923,116506.4700934881,15.201645441550752,-1,,,,,,,,unassigned,,,,,,,,,\n", - "9,864.2012204090391,864.2007802514113,,3030984.065495402,119244.47008147289,8.769434056889162,-1,,,,,,,,unassigned,,,,,,,,,\n", - "10,862.8953391929115,862.89489902369,,3055128.0467927232,119424.97008838368,8.83928894470198,-1,,,,,,,,unassigned,,,,,,,,,\n", - "16,803.9459525978525,803.9455135240407,,5601357.72374313,170909.29346210908,16.20620106407045,-1,,,,,,,,unassigned,,,,,,,,,\n", - "19,752.2242722248513,752.223836720794,,3273358.7560336525,136995.72010069533,9.470687781687843,-1,,,,,,,,unassigned,,,,,,,,,\n", - "20,750.7742477178517,750.7738123490101,,2904412.8971422175,137259.97009147579,8.40323037835044,-1,,,,,,,,unassigned,,,,,,,,,\n", - "21,653.5198929566078,653.519471031556,,3485404.0251306985,210248.96017618917,10.084190513553223,-1,,,,,,,,unassigned,,,,,,,,,\n", - "23,619.4881452383327,619.4877300537637,,6028785.5745970225,95057.26862897968,17.442862251046506,-1,,,,,,,,unassigned,,,,,,,,,\n", - "24,619.4845026976315,619.4840875138403,,6626103.056320465,221800.29347879274,19.171058821471128,-1,,,,,,,,unassigned,,,,,,,,,\n", - "25,618.4823634993785,618.4819485300792,,67607054.28433818,166619.97011002325,195.60498884712098,-1,,,,,,,,unassigned,,,,,,,,,\n", - "26,605.4715778756023,605.4711657741412,,7625159.61822322,226933.62680355,22.06159220910761,-1,,,,,,,,unassigned,,,,,,,,,\n", - "27,605.4681309396186,605.4677188389377,,6698551.287281573,97257.26863065724,19.380670607684152,-1,,,,,,,,unassigned,,,,,,,,,\n", - "28,604.4669880341766,604.4665761605637,,90288493.75827102,170483.22009523786,261.2283585131924,-1,,,,,,,,unassigned,,,,,,,,,\n", - "29,591.4558841306729,591.4554752911807,,3570435.8944568695,232311.29346066722,10.330210075080675,-1,,,,,,,,unassigned,,,,,,,,,\n", - "30,591.4525206690176,591.4521118303297,,4196033.12055477,232312.62681350167,12.140227383614969,-1,,,,,,,,unassigned,,,,,,,,,\n", - "31,591.4362071597824,591.4357983249955,,4325198.308595796,99565.84004350418,12.513936243343444,-1,,,,,,,,unassigned,,,,,,,,,\n", - "32,590.4511390833583,590.450730484585,,33051017.328779608,174529.72008540362,95.62528562170476,-1,,,,,,,,unassigned,,,,,,,,,\n", - "33,574.4557295997837,574.4553249571502,,57270445.86539885,179389.47009937343,165.6984621406013,-1,,,,,,,,unassigned,,,,,,,,,\n", - "34,509.29372586837684,509.2933397523243,,3016102.9919072534,115624.41149005273,8.726379197243881,-1,,,,,,,,unassigned,,,,,,,,,\n", - "36,311.1007284631918,311.10042248349583,,2602658.391103224,220831.98006834995,7.530175230287296,-1,,,,,,,,unassigned,,,,,,,,,\n" + "76eedf046a56c17d91606a89109be631\n", + "\n" ] } ], @@ -639,19 +556,11 @@ "\n", " return b.getvalue()\n", "\n", - "b = load_bytes(get_json(\"/nmdcschema/data_object_set/nmdc:4b649d353b2c2385ab042682ba516d14\")[\"url\"])\n", + "b = load_bytes(get_json(\"/nmdcschema/data_object_set/nmdc:dobj-11-p2174y95\")[\"url\"])\n", "\n", "for line in b.decode('utf-8').split(\"\\n\"):\n", " print(line)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "adcf941b-8b8f-4ebe-909d-1ba8d9976aa5", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { From ec2b30ab2635b2c382c3f157cf4b7181f0c871d0 Mon Sep 17 00:00:00 2001 From: aclum Date: Mon, 21 Oct 2024 14:39:42 -0700 Subject: [PATCH 4/4] Update docs/nb/get_data.ipynb --- docs/nb/get_data.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/nb/get_data.ipynb b/docs/nb/get_data.ipynb index 1f922ddd..be8a8a80 100644 --- a/docs/nb/get_data.ipynb +++ b/docs/nb/get_data.ipynb @@ -285,7 +285,7 @@ "\n", "{'count': 0,\n", " 'db_response_time_ms': 243,\n", - " 'mongo_filter_dict': {'description': {'$regex': 'GFF'}},\n", + " 'mongo_filter_dict': {'data_object_type': {'$regex': 'GFF'}},\n", " 'mongo_sort_list': [['file_size_bytes', -1]],\n", " 'page': 1,\n", " 'per_page': 25}\n",