|
64 | 64 | " \"combined_CMIP6_daily_GISS-E2-1-G_tas_kerchunk_DEMO\",\n",
|
65 | 65 | "]\n",
|
66 | 66 | "\n",
|
67 |
| - "json_file_paths = glob.glob(\"../ingestion-data/collections/*.json\")\n", |
| 67 | + "collection_json_file_paths = glob.glob(\"../ingestion-data/collections/*.json\")\n", |
68 | 68 | "filtered_list = [\n",
|
69 | 69 | " item\n",
|
70 | 70 | " for item in json_file_paths\n",
|
|
110 | 110 | "metadata": {},
|
111 | 111 | "outputs": [],
|
112 | 112 | "source": [
|
113 |
| - "def load_json_files_from_directory(directory):\n", |
114 |
| - " json_files = []\n", |
115 |
| - " for filename in os.listdir(directory):\n", |
116 |
| - " if filename.endswith(\".json\"):\n", |
117 |
| - " json_files.append(filename)\n", |
118 |
| - " return json_files\n", |
119 |
| - "\n", |
120 |
| - "\n", |
121 | 113 | "def find_matching_file_names(collections_list, discovery_items_list):\n",
|
122 | 114 | " matching_file_names = []\n",
|
123 | 115 | " for collection_filename in collections_list:\n",
|
124 |
| - " collection_json = load_json_file(\n", |
125 |
| - " os.path.join(collections_files, collection_filename)\n", |
126 |
| - " )\n", |
| 116 | + " collection_json = load_json_file(collection_filename)\n", |
127 | 117 | " id1 = collection_json.get(\"id\")\n",
|
128 | 118 | " if id1 is not None:\n",
|
129 | 119 | " for discovery_items_filename in discovery_items_list:\n",
|
130 |
| - " item_json = load_json_file(\n", |
131 |
| - " os.path.join(discovery_items_files, discovery_items_filename)\n", |
132 |
| - " )\n", |
| 120 | + " item_json = load_json_file(discovery_items_filename)\n", |
133 | 121 | " if isinstance(item_json, list):\n",
|
134 | 122 | " if len(item_json) > 0:\n",
|
135 | 123 | " collection2 = item_json[0].get(\"collection\")\n",
|
|
140 | 128 | " if collection2 == id1:\n",
|
141 | 129 | " # Found a match\n",
|
142 | 130 | " matching_file_names.append(discovery_items_filename)\n",
|
143 |
| - " # Further processing or comparison can be done here\n", |
144 | 131 | " break\n",
|
145 | 132 | " return matching_file_names\n",
|
146 | 133 | "\n",
|
|
157 | 144 | " else \"../ingestion-data/production/discovery-items/\"\n",
|
158 | 145 | ")\n",
|
159 | 146 | "\n",
|
160 |
| - "# Load JSON files from directories\n", |
161 |
| - "json_files_dir1 = load_json_files_from_directory(collections_files)\n", |
162 |
| - "json_files_dir2 = load_json_files_from_directory(discovery_items_files)\n", |
163 |
| - "\n", |
| 147 | + "discovery_items_json_file_paths = (\n", |
| 148 | + " glob.glob(\"../ingestion-data/staging/discovery-items//*.json\")\n", |
| 149 | + " if testing_mode\n", |
| 150 | + " else glob.glob(\"../ingestion-data/production/discovery-items//*.json\")\n", |
| 151 | + ")\n", |
164 | 152 | "# Find matching file names\n",
|
165 |
| - "matching_file_names = find_matching_file_names(json_files_dir1, json_files_dir2)\n", |
| 153 | + "matching_file_names = find_matching_file_names(\n", |
| 154 | + " collections_json_file_paths, discovery_items_json_file_paths\n", |
| 155 | + ")\n", |
166 | 156 | "\n",
|
167 | 157 | "# for file_pair in matching_file_names:\n",
|
168 | 158 | "# print(\"Match found:\")\n",
|
|
195 | 185 | "mcp_prod_user_pool_id = \"CHANGE ME\"\n",
|
196 | 186 | "mcp_prod_identity_pool_id = \"CHANGE ME\"\n",
|
197 | 187 | "\n",
|
| 188 | + "print(f\"TESTING MODE? {testing_mode}\")\n", |
198 | 189 | "if testing_mode:\n",
|
199 | 190 | " STAC_INGESTOR_API = f\"{test_endpoint}/api/ingest/\"\n",
|
200 | 191 | " VEDA_STAC_API = f\"{test_endpoint}/api/stac/\"\n",
|
|
219 | 210 | "outputs": [],
|
220 | 211 | "source": [
|
221 | 212 | "TOKEN = \"REPLACE ME\"\n",
|
| 213 | + "\n", |
222 | 214 | "authorization_header = f\"Bearer {TOKEN}\"\n",
|
223 | 215 | "headers = {\n",
|
224 | 216 | " \"Authorization\": authorization_header,\n",
|
|
265 | 257 | " )\n",
|
266 | 258 | "\n",
|
267 | 259 | "\n",
|
268 |
| - "def ingest_item(item):\n", |
| 260 | + "def ingest_discovery_item(discovery_item):\n", |
269 | 261 | " discovery_url = f\"{WORKFLOWS_API}/discovery\"\n",
|
270 | 262 | " try:\n",
|
271 |
| - " response = requests.post(discovery_url, json=item, headers=headers)\n", |
| 263 | + " response = requests.post(\n", |
| 264 | + " discovery_url, json=ingest_discovery_item, headers=headers\n", |
| 265 | + " )\n", |
272 | 266 | " response.raise_for_status()\n",
|
273 | 267 | " if response.status_code == 201:\n",
|
274 | 268 | " print(f\"Request was successful. \")\n",
|
275 | 269 | " else:\n",
|
276 | 270 | " print(\n",
|
277 |
| - " f\"Kicking off discovery for {item} failed. Request failed with status code: {response.status_code}\"\n", |
| 271 | + " f\"Kicking off discovery for {ingest_discovery_item} failed. Request failed with status code: {response.status_code}\"\n", |
278 | 272 | " )\n",
|
279 | 273 | " except requests.RequestException as e:\n",
|
280 | 274 | " print(\n",
|
281 |
| - " f\"Kicking off discovery for {item} failed. An error occurred during the request: {e}\"\n", |
| 275 | + " f\"Kicking off discovery for {ingest_discovery_item} failed. An error occurred during the request: {e}\"\n", |
282 | 276 | " )\n",
|
283 | 277 | " except Exception as e:\n",
|
284 | 278 | " print(\n",
|
|
300 | 294 | "outputs": [],
|
301 | 295 | "source": [
|
302 | 296 | "test_file_paths_and_collection_ids = [file_paths_and_collection_ids[0]]\n",
|
303 |
| - "test_discovery_item = [f\"{file_paths_and_collection_ids[0].get(\"collectionId\")}.json\"]\n", |
| 297 | + "test_discovery_item = [f\"../ingestion-data/staging/discovery-items/{file_paths_and_collection_ids[0].get(\"collectionId\")}.json\"]\n", |
304 | 298 | "\n",
|
305 | 299 | "print(test_discovery_item)\n",
|
306 | 300 | "print(test_file_paths_and_collection_ids)\n",
|
|
315 | 309 | " test_discovery_item\n",
|
316 | 310 | " if testing_mode\n",
|
317 | 311 | " else discovery_items_to_process\n",
|
318 |
| - ")" |
| 312 | + ")\n", |
| 313 | + "\n", |
| 314 | + "print(file_paths_and_collection_ids)\n", |
| 315 | + "print(discovery_items_to_process)" |
319 | 316 | ]
|
320 | 317 | },
|
321 | 318 | {
|
|
354 | 351 | "metadata": {},
|
355 | 352 | "outputs": [],
|
356 | 353 | "source": [
|
357 |
| - "for item in discovery_items_to_process:\n", |
358 |
| - " if testing_mode:\n", |
359 |
| - " file_path = f\"../ingestion-data/staging/discovery_items/{item}\"\n", |
360 |
| - " else:\n", |
361 |
| - " file_path = f\"../ingestion-data/production/discovery_items/{item}\"\n", |
362 |
| - "\n", |
| 354 | + "for discovery_item in discovery_items_to_process:\n", |
363 | 355 | " try:\n",
|
364 |
| - " with open(file_path, \"r\", encoding=\"utf-8\") as file:\n", |
365 |
| - " item = json.load(file)\n", |
| 356 | + " with open(discovery_item, \"r\", encoding=\"utf-8\") as file:\n", |
| 357 | + " discovery_item_json = json.load(file)\n", |
366 | 358 | "\n",
|
367 | 359 | " # Publish the updated collection to the target ingestion `api/collections` endpoint\n",
|
368 |
| - " if isinstance(item_json, list):\n", |
369 |
| - " for single_item in item_json:\n", |
370 |
| - " ingest_item(single_item)\n", |
| 360 | + " if isinstance(discovery_item_json, list):\n", |
| 361 | + " for single_discovery_item in discovery_item_json:\n", |
| 362 | + " ingest_discovery_item(single_discovery_item)\n", |
371 | 363 | " else:\n",
|
372 |
| - " ingest_item(item)\n", |
| 364 | + " ingest_discovery_item(discovery_item_json)\n", |
373 | 365 | "\n",
|
374 | 366 | " except requests.RequestException as e:\n",
|
375 |
| - " print(f\"An error occurred for collectionId {collection_id}: {e}\")\n", |
| 367 | + " print(f\"An error occurred for discovery item {discovery_item}: {e}\")\n", |
376 | 368 | " except Exception as e:\n",
|
377 |
| - " print(f\"An unexpected error occurred for collectionId {collection_id}: {e}\")" |
| 369 | + " print(f\"An unexpected error occurred for discovery item {discovery_item}: {e}\")" |
378 | 370 | ]
|
379 | 371 | }
|
380 | 372 | ],
|
|
0 commit comments