Skip to content

Commit b3d2597

Browse files
author
Jennifer Tran
committed
fix: update naming and simplify logic
1 parent c5a0031 commit b3d2597

File tree

1 file changed

+33
-41
lines changed

1 file changed

+33
-41
lines changed

transformation-scripts/collection-and-item-workflows-ingest.ipynb

+33-41
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@
6464
" \"combined_CMIP6_daily_GISS-E2-1-G_tas_kerchunk_DEMO\",\n",
6565
"]\n",
6666
"\n",
67-
"json_file_paths = glob.glob(\"../ingestion-data/collections/*.json\")\n",
67+
"collection_json_file_paths = glob.glob(\"../ingestion-data/collections/*.json\")\n",
6868
"filtered_list = [\n",
6969
" item\n",
7070
" for item in json_file_paths\n",
@@ -110,26 +110,14 @@
110110
"metadata": {},
111111
"outputs": [],
112112
"source": [
113-
"def load_json_files_from_directory(directory):\n",
114-
" json_files = []\n",
115-
" for filename in os.listdir(directory):\n",
116-
" if filename.endswith(\".json\"):\n",
117-
" json_files.append(filename)\n",
118-
" return json_files\n",
119-
"\n",
120-
"\n",
121113
"def find_matching_file_names(collections_list, discovery_items_list):\n",
122114
" matching_file_names = []\n",
123115
" for collection_filename in collections_list:\n",
124-
" collection_json = load_json_file(\n",
125-
" os.path.join(collections_files, collection_filename)\n",
126-
" )\n",
116+
" collection_json = load_json_file(collection_filename)\n",
127117
" id1 = collection_json.get(\"id\")\n",
128118
" if id1 is not None:\n",
129119
" for discovery_items_filename in discovery_items_list:\n",
130-
" item_json = load_json_file(\n",
131-
" os.path.join(discovery_items_files, discovery_items_filename)\n",
132-
" )\n",
120+
" item_json = load_json_file(discovery_items_filename)\n",
133121
" if isinstance(item_json, list):\n",
134122
" if len(item_json) > 0:\n",
135123
" collection2 = item_json[0].get(\"collection\")\n",
@@ -140,7 +128,6 @@
140128
" if collection2 == id1:\n",
141129
" # Found a match\n",
142130
" matching_file_names.append(discovery_items_filename)\n",
143-
" # Further processing or comparison can be done here\n",
144131
" break\n",
145132
" return matching_file_names\n",
146133
"\n",
@@ -157,12 +144,15 @@
157144
" else \"../ingestion-data/production/discovery-items/\"\n",
158145
")\n",
159146
"\n",
160-
"# Load JSON files from directories\n",
161-
"json_files_dir1 = load_json_files_from_directory(collections_files)\n",
162-
"json_files_dir2 = load_json_files_from_directory(discovery_items_files)\n",
163-
"\n",
147+
"discovery_items_json_file_paths = (\n",
148+
" glob.glob(\"../ingestion-data/staging/discovery-items//*.json\")\n",
149+
" if testing_mode\n",
150+
" else glob.glob(\"../ingestion-data/production/discovery-items//*.json\")\n",
151+
")\n",
164152
"# Find matching file names\n",
165-
"matching_file_names = find_matching_file_names(json_files_dir1, json_files_dir2)\n",
153+
"matching_file_names = find_matching_file_names(\n",
154+
" collections_json_file_paths, discovery_items_json_file_paths\n",
155+
")\n",
166156
"\n",
167157
"# for file_pair in matching_file_names:\n",
168158
"# print(\"Match found:\")\n",
@@ -195,6 +185,7 @@
195185
"mcp_prod_user_pool_id = \"CHANGE ME\"\n",
196186
"mcp_prod_identity_pool_id = \"CHANGE ME\"\n",
197187
"\n",
188+
"print(f\"TESTING MODE? {testing_mode}\")\n",
198189
"if testing_mode:\n",
199190
" STAC_INGESTOR_API = f\"{test_endpoint}/api/ingest/\"\n",
200191
" VEDA_STAC_API = f\"{test_endpoint}/api/stac/\"\n",
@@ -219,6 +210,7 @@
219210
"outputs": [],
220211
"source": [
221212
"TOKEN = \"REPLACE ME\"\n",
213+
"\n",
222214
"authorization_header = f\"Bearer {TOKEN}\"\n",
223215
"headers = {\n",
224216
" \"Authorization\": authorization_header,\n",
@@ -265,20 +257,22 @@
265257
" )\n",
266258
"\n",
267259
"\n",
268-
"def ingest_item(item):\n",
260+
"def ingest_discovery_item(discovery_item):\n",
269261
" discovery_url = f\"{WORKFLOWS_API}/discovery\"\n",
270262
" try:\n",
271-
" response = requests.post(discovery_url, json=item, headers=headers)\n",
263+
" response = requests.post(\n",
264+
" discovery_url, json=ingest_discovery_item, headers=headers\n",
265+
" )\n",
272266
" response.raise_for_status()\n",
273267
" if response.status_code == 201:\n",
274268
" print(f\"Request was successful. \")\n",
275269
" else:\n",
276270
" print(\n",
277-
" f\"Kicking off discovery for {item} failed. Request failed with status code: {response.status_code}\"\n",
271+
" f\"Kicking off discovery for {ingest_discovery_item} failed. Request failed with status code: {response.status_code}\"\n",
278272
" )\n",
279273
" except requests.RequestException as e:\n",
280274
" print(\n",
281-
" f\"Kicking off discovery for {item} failed. An error occurred during the request: {e}\"\n",
275+
" f\"Kicking off discovery for {ingest_discovery_item} failed. An error occurred during the request: {e}\"\n",
282276
" )\n",
283277
" except Exception as e:\n",
284278
" print(\n",
@@ -300,7 +294,7 @@
300294
"outputs": [],
301295
"source": [
302296
"test_file_paths_and_collection_ids = [file_paths_and_collection_ids[0]]\n",
303-
"test_discovery_item = [f\"{file_paths_and_collection_ids[0].get(\"collectionId\")}.json\"]\n",
297+
"test_discovery_item = [f\"../ingestion-data/staging/discovery-items/{file_paths_and_collection_ids[0].get(\"collectionId\")}.json\"]\n",
304298
"\n",
305299
"print(test_discovery_item)\n",
306300
"print(test_file_paths_and_collection_ids)\n",
@@ -315,7 +309,10 @@
315309
" test_discovery_item\n",
316310
" if testing_mode\n",
317311
" else discovery_items_to_process\n",
318-
")"
312+
")\n",
313+
"\n",
314+
"print(file_paths_and_collection_ids)\n",
315+
"print(discovery_items_to_process)"
319316
]
320317
},
321318
{
@@ -354,27 +351,22 @@
354351
"metadata": {},
355352
"outputs": [],
356353
"source": [
357-
"for item in discovery_items_to_process:\n",
358-
" if testing_mode:\n",
359-
" file_path = f\"../ingestion-data/staging/discovery_items/{item}\"\n",
360-
" else:\n",
361-
" file_path = f\"../ingestion-data/production/discovery_items/{item}\"\n",
362-
"\n",
354+
"for discovery_item in discovery_items_to_process:\n",
363355
" try:\n",
364-
" with open(file_path, \"r\", encoding=\"utf-8\") as file:\n",
365-
" item = json.load(file)\n",
356+
" with open(discovery_item, \"r\", encoding=\"utf-8\") as file:\n",
357+
" discovery_item_json = json.load(file)\n",
366358
"\n",
367359
" # Publish the updated collection to the target ingestion `api/collections` endpoint\n",
368-
" if isinstance(item_json, list):\n",
369-
" for single_item in item_json:\n",
370-
" ingest_item(single_item)\n",
360+
" if isinstance(discovery_item_json, list):\n",
361+
" for single_discovery_item in discovery_item_json:\n",
362+
" ingest_discovery_item(single_discovery_item)\n",
371363
" else:\n",
372-
" ingest_item(item)\n",
364+
" ingest_discovery_item(discovery_item_json)\n",
373365
"\n",
374366
" except requests.RequestException as e:\n",
375-
" print(f\"An error occurred for collectionId {collection_id}: {e}\")\n",
367+
" print(f\"An error occurred for discovery item {discovery_item}: {e}\")\n",
376368
" except Exception as e:\n",
377-
" print(f\"An unexpected error occurred for collectionId {collection_id}: {e}\")"
369+
" print(f\"An unexpected error occurred for discovery item {discovery_item}: {e}\")"
378370
]
379371
}
380372
],

0 commit comments

Comments
 (0)