Skip to content

Commit b11bc9f

Browse files
committed
Update AWS url for coordinates collection
1 parent 0e6a07a commit b11bc9f

File tree

2 files changed

+24
-27
lines changed

2 files changed

+24
-27
lines changed

caribou/data_collector/components/provider/provider_retriever.py

+23-26
Original file line numberDiff line numberDiff line change
@@ -80,43 +80,40 @@ def retrieve_aws_regions(self) -> dict[str, dict[str, Any]]:
8080
# Then we get the list of all regions from the AWS regions page
8181
# To get the location of the regions (Based on location name)
8282
amazon_region_page = requests.get(AMAZON_REGION_URL, timeout=5)
83-
8483
amazon_region_page_soup = BeautifulSoup(amazon_region_page.content, "html.parser")
8584

8685
regions = {}
87-
8886
tables = amazon_region_page_soup.find_all("table")
8987

9088
if len(tables) == 0:
9189
raise ValueError("Could not find any tables on the AWS regions page")
9290

93-
for table in tables:
94-
if not table.find_previous("h3").text.strip() == "Available Regions":
91+
# Process the first table (which is the regions table)
92+
table = tables[0]
93+
table_rows = table.find_all("tr")[1:] # Skip header row
94+
95+
for table_row in table_rows:
96+
table_cells = table_row.find_all("td")
97+
if len(table_cells) < 2: # We only need first two columns (Code and Name)
9598
continue
99+
100+
region_code = table_cells[0].text.strip()
101+
region_name = table_cells[1].text.strip()
96102

97-
table_rows = table.find_all("tr")
98-
99-
for table_row in table_rows:
100-
table_cells = table_row.find_all("td")
101-
if len(table_cells) != 3:
102-
continue
103-
region_code = table_cells[0].text.strip()
104-
region_name = table_cells[1].text.strip()
105-
106-
if region_code not in all_enabled_regions:
107-
# Skip regions that are not enabled for the current account
108-
continue
109-
110-
coordinates = self.retrieve_location(region_name)
111-
regions[f"{Provider.AWS.value}:{region_code}"] = {
112-
"name": region_name,
113-
"provider": Provider.AWS.value,
114-
"code": region_code,
115-
"latitude": coordinates[0],
116-
"longitude": coordinates[1],
117-
}
118-
self._aws_region_name_to_code[region_name] = region_code
103+
if region_code not in all_enabled_regions:
104+
# Skip regions that are not enabled for the current account
105+
continue
119106

107+
coordinates = self.retrieve_location(region_name)
108+
print(f"Coordinates for {region_name}: {coordinates}")
109+
regions[f"{Provider.AWS.value}:{region_code}"] = {
110+
"name": region_name,
111+
"provider": Provider.AWS.value,
112+
"code": region_code,
113+
"latitude": coordinates[0],
114+
"longitude": coordinates[1],
115+
}
116+
self._aws_region_name_to_code[region_name] = region_code
120117
return regions
121118

122119
def retrieve_integrationtest_regions(self) -> dict[str, dict[str, Any]]:
+1-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
AMAZON_REGION_URL = "https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html#concepts-available-regions" # pylint: disable=line-too-long
1+
AMAZON_REGION_URL = "https://docs.aws.amazon.com/global-infrastructure/latest/regions/aws-regions.html#available-regions" # pylint: disable=line-too-long
22
CLOUD_PING = "https://www.cloudping.co/grid/"

0 commit comments

Comments
 (0)