Skip to content

Update AWS url for coordinates collection #345

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
May 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/workflow.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ jobs:
runs-on: ubuntu-latest
if: ${{ !github.event.pull_request.draft }}
timeout-minutes: 5
permissions:
pull-requests: write
steps:
- uses: actions/checkout@v2
- name: Setup poetry
Expand All @@ -54,6 +56,7 @@ jobs:
poetry run pytest --cache-clear --cov=caribou/ --cov-fail-under=80
poetry run pytest --junitxml=pytest.xml --cov-report=term-missing:skip-covered --cov-fail-under=80 --cov=caribou caribou/tests | tee pytest-coverage.txt
- name: Pytest coverage comment
if: ${{ github.event.pull_request.head.repo.full_name == github.repository }}
uses: MishaKav/pytest-coverage-comment@main
with:
pytest-coverage-path: ./pytest-coverage.txt
Expand Down
46 changes: 22 additions & 24 deletions caribou/data_collector/components/provider/provider_retriever.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,33 +90,31 @@ def retrieve_aws_regions(self) -> dict[str, dict[str, Any]]:
if len(tables) == 0:
raise ValueError("Could not find any tables on the AWS regions page")

for table in tables:
if not table.find_previous("h3").text.strip() == "Available Regions":
# Process the first table (which is the regions table)
table = tables[0]
table_rows = table.find_all("tr")[1:] # Skip header row

for table_row in table_rows:
table_cells = table_row.find_all("td")
if len(table_cells) < 2: # We only need first two columns (Code and Name)
continue

table_rows = table.find_all("tr")

for table_row in table_rows:
table_cells = table_row.find_all("td")
if len(table_cells) != 3:
continue
region_code = table_cells[0].text.strip()
region_name = table_cells[1].text.strip()

if region_code not in all_enabled_regions:
# Skip regions that are not enabled for the current account
continue

coordinates = self.retrieve_location(region_name)
regions[f"{Provider.AWS.value}:{region_code}"] = {
"name": region_name,
"provider": Provider.AWS.value,
"code": region_code,
"latitude": coordinates[0],
"longitude": coordinates[1],
}
self._aws_region_name_to_code[region_name] = region_code
region_code = table_cells[0].text.strip()
region_name = table_cells[1].text.strip()

if region_code not in all_enabled_regions:
# Skip regions that are not enabled for the current account
continue

coordinates = self.retrieve_location(region_name)
regions[f"{Provider.AWS.value}:{region_code}"] = {
"name": region_name,
"provider": Provider.AWS.value,
"code": region_code,
"latitude": coordinates[0],
"longitude": coordinates[1],
}
self._aws_region_name_to_code[region_name] = region_code
return regions

def retrieve_integrationtest_regions(self) -> dict[str, dict[str, Any]]:
Expand Down
2 changes: 1 addition & 1 deletion caribou/data_collector/utils/constants.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
AMAZON_REGION_URL = "https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html#concepts-available-regions" # pylint: disable=line-too-long
AMAZON_REGION_URL = "https://docs.aws.amazon.com/global-infrastructure/latest/regions/aws-regions.html#available-regions" # pylint: disable=line-too-long
CLOUD_PING = "https://www.cloudping.co/grid/"
Original file line number Diff line number Diff line change
Expand Up @@ -207,12 +207,42 @@ def test_retrieve_aws_regions(self, mock_googlemaps_client, mock_requests_get):
mock_html_content = """
<html>
<body>
<h3>Available Regions</h3>
<table>
<tr><td>us-east-1</td><td>US East (N. Virginia)</td><td>Some other data</td></tr>
<tr><td>eu-west-1</td><td>EU (Ireland)</td><td>Some other data</td></tr>
<tr><td>no-nons-1</td><td>Nowhere</td><td>Some other data</td></tr>
</table>
<div class="table-container">
<div class="table-contents disable-scroll">
<table id="w136aab7c13b7">
<thead>
<tr>
<th>Code</th>
<th>Name</th>
<th>AZs</th>
<th>Geography</th>
<th>Opt-in status</th>
</tr>
</thead>
<tr>
<td tabindex="-1">us-east-1</td>
<td tabindex="-1">US East (N. Virginia)</td>
<td tabindex="-1">6</td>
<td tabindex="-1">United States of America</td>
<td tabindex="-1">Not required</td>
</tr>
<tr>
<td tabindex="-1">us-west-1</td>
<td tabindex="-1">US West (N. California)</td>
<td tabindex="-1">3</td>
<td tabindex="-1">United States of America</td>
<td tabindex="-1">Not required</td>
</tr>
<tr>
<td tabindex="-1">eu-west-1</td>
<td tabindex="-1">EU (Ireland)</td>
<td tabindex="-1">3</td>
<td tabindex="-1">Ireland</td>
<td tabindex="-1">Not required</td>
</tr>
</table>
</div>
</div>
</body>
</html>
"""
Expand All @@ -232,16 +262,26 @@ def test_retrieve_aws_regions(self, mock_googlemaps_client, mock_requests_get):
mock_str_to_bool.return_value = False
provider_retriever = ProviderRetriever(None) # Assuming None can be passed as a dummy RemoteClient

provider_retriever._retrieve_enabled_aws_regions = Mock(return_value=["us-east-1", "eu-west-1"])
provider_retriever._retrieve_enabled_aws_regions = Mock(return_value=["us-east-1", "us-west-1"])

regions = provider_retriever.retrieve_aws_regions()

# Verify the regions dictionary structure
self.assertIn("aws:us-east-1", regions)
self.assertIn("aws:eu-west-1", regions)
self.assertNotIn("aws:no-nons-1", regions)
self.assertEqual(regions["aws:us-east-1"]["name"], "US East (N. Virginia)")
self.assertEqual(regions["aws:us-east-1"]["latitude"], 37.7749)
self.assertEqual(regions["aws:us-east-1"]["longitude"], -122.4194)
self.assertIn("aws:us-west-1", regions)
self.assertNotIn("aws:eu-west-1", regions) # Should not be included as it's not in enabled regions

# Verify the region data structure
us_east_region = regions["aws:us-east-1"]
self.assertEqual(us_east_region["name"], "US East (N. Virginia)")
self.assertEqual(us_east_region["provider"], "aws")
self.assertEqual(us_east_region["code"], "us-east-1")
self.assertEqual(us_east_region["latitude"], 37.7749)
self.assertEqual(us_east_region["longitude"], -122.4194)

# Verify the region name to code mapping
self.assertEqual(provider_retriever._aws_region_name_to_code["US East (N. Virginia)"], "us-east-1")
self.assertEqual(provider_retriever._aws_region_name_to_code["US West (N. California)"], "us-west-1")

@patch("requests.get")
@patch("caribou.data_collector.components.provider.provider_retriever.boto3.client")
Expand Down