Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/confluence-importer.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ jobs:
run: uv sync --dev
- name: Lint with ruff
run: uv run ruff check .
- name: Lint with mypy
run: uv run mypy .

test:
runs-on: ubuntu-latest
Expand Down
22 changes: 15 additions & 7 deletions services/confluence-importer/confluence_importer/c4.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Module for interacting with the C4 API to manage Confluence content."""

from typing import TypedDict
import requests

from confluence_importer.logger import logger
Expand All @@ -19,13 +20,13 @@ def clear_previous_ingests() -> None:

for index, item in enumerate(files):
num_items = len(files)
file_name = item.get("fileName")
file_name = item["fileName"]

is_confluence_page_file = file_name.startswith("confluence_page_") and file_name.endswith(".md")

if is_confluence_page_file:
try:
delete_confluence_page(item.get("id"))
delete_confluence_page(item["id"])
except Exception as e:
deletion_counter["error"] += 1
logger.error(
Expand Down Expand Up @@ -58,7 +59,7 @@ def clear_previous_ingests() -> None:
)


def delete_confluence_page(file_id):
def delete_confluence_page(file_id: int) -> None:
"""Deletes a file from the C4 bucket by its ID.

Args:
Expand All @@ -67,7 +68,14 @@ def delete_confluence_page(file_id):
requests.delete(f"{c4_base_url}/api/buckets/{bucket_id}/files/{file_id}", headers={"x-api-key": config.c4_token})


def fetch_bucket_files_list():
class C4BucketFileItem(TypedDict):
"""TypedDict representing a file item in the C4 bucket."""

id: int
fileName: str


def fetch_bucket_files_list() -> list[C4BucketFileItem]:
"""Fetches the list of all files in the C4 bucket.

Returns:
Expand All @@ -76,14 +84,14 @@ def fetch_bucket_files_list():
page = 1
batch_size = 50

items: list[str] = []
items: list[C4BucketFileItem] = []

while True:
logger.debug("Fetching partial list of files from c4 ", bucket_id=bucket_id, page=page)
response = requests.get(f"{c4_base_url}/api/buckets/{bucket_id}/files", headers={"x-api-key": config.c4_token})

total = response.json().get("total")
items_in_page = response.json().get("items")
total = response.json().get("total", 0)
items_in_page = response.json().get("items", [])

items.extend(items_in_page)

Expand Down
2 changes: 1 addition & 1 deletion services/confluence-importer/confluence_importer/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@ class Config(BaseSettings):
c4_token: str


config = Config()
config = Config() # type: ignore[call-arg]
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

confluence_url = config.confluence_url

confluence_api = Confluence(url=confluence_url, token=config.confluence_token)
confluence_api = Confluence(url=confluence_url, token=config.confluence_token) # type: ignore[no-untyped-call]


@dataclass
Expand All @@ -32,7 +32,7 @@ def get_page(page_id: int) -> ConfluencePage:
Returns:
A ConfluencePage dataclass containing the page information and content as HTML
"""
page = confluence_api.get_page_by_id(page_id, expand="body.storage,history.lastUpdated")
page = confluence_api.get_page_by_id(page_id, expand="body.storage,history.lastUpdated") # type: ignore[no-untyped-call]

return ConfluencePage(
page_id,
Expand Down Expand Up @@ -67,7 +67,7 @@ def get_pages_for_space(space_key: str) -> Generator[ConfluencePage]:
content_type="page",
expand="body.storage,history.lastUpdated",
status="current",
)
) # type: ignore[no-untyped-call]

len_result = 0
for r in result:
Expand Down
30 changes: 20 additions & 10 deletions services/confluence-importer/main.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Main module for the Confluence to C4 synchronization process."""

from dataclasses import dataclass
from confluence_importer import confluence
from confluence_importer.c4 import clear_previous_ingests, import_confluence_page
from confluence_importer.markdown import html_to_markdown
Expand All @@ -11,7 +12,15 @@
page_ids = config.confluence_page_ids_to_import


def process_confluence_spaces(page_import_counter):
@dataclass
class PageImportCounter:
"""Data class to track the number of successful and failed imports."""

error: int = 0
success: int = 0


def process_confluence_spaces(page_import_counter: PageImportCounter) -> None:
"""Processes all Confluence spaces specified in the configuration.

Fetches all pages from each space and imports them into C4.
Expand All @@ -29,10 +38,10 @@ def process_confluence_spaces(page_import_counter):
try:
page_markdown = html_to_markdown(page)
import_confluence_page(page.id, page_markdown)
page_import_counter["success"] += 1
page_import_counter.success += 1
logger.info("Import Confluence page", space_key=space_key, page_id=page.id, page_count=f"{index}")
except Exception as e:
page_import_counter["error"] += 1
page_import_counter.error += 1
logger.error(
"Error importing Confluence page",
error=str(e),
Expand All @@ -45,7 +54,7 @@ def process_confluence_spaces(page_import_counter):
logger.info("Import of all Confluence Spaces completed")


def process_individual_pages(page_import_counter):
def process_individual_pages(page_import_counter: PageImportCounter) -> None:
"""Processes individual Confluence pages specified in the configuration.

Fetches each page by ID and imports it into C4.
Expand All @@ -61,26 +70,26 @@ def process_individual_pages(page_import_counter):
page = confluence.get_page(page_id)
page_markdown = html_to_markdown(page)
import_confluence_page(page_id, page_markdown)
page_import_counter["success"] += 1
page_import_counter.success += 1
logger.info("Import Confluence page", page_id=page_id, progress=f"{index + 1}/{num_pages}")
except Exception as e:
page_import_counter["error"] += 1
page_import_counter.error += 1
logger.error(
"Error importing Confluence page", error=str(e), page_id=page_id, progress=f"{index + 1}/{num_pages}"
)

logger.info("Import of individual Confluence pages completed")


def log_final_results(page_import_counter):
def log_final_results(page_import_counter: PageImportCounter) -> None:
"""Logs the final results of the import process.

Outputs either a success message or an error message based on the import counter.

Args:
page_import_counter: Dictionary containing counts of successful and failed imports
"""
if page_import_counter["error"] > 0:
if page_import_counter.error > 0:
logger.error(
"Synchronization Confluence to c4 completed with errors! See log for more information.",
page_import_counter=page_import_counter,
Expand All @@ -89,7 +98,7 @@ def log_final_results(page_import_counter):
logger.info("Synchronization Confluence to c4 completed.", page_import_counter)


def main():
def main() -> None:
"""Main entry point for the Confluence to C4 synchronization process.

Orchestrates the entire import process:
Expand All @@ -102,7 +111,8 @@ def main():

clear_previous_ingests()

page_import_counter = {"error": 0, "success": 0}
page_import_counter = PageImportCounter()

process_confluence_spaces(page_import_counter)
process_individual_pages(page_import_counter)
log_final_results(page_import_counter)
Expand Down
11 changes: 11 additions & 0 deletions services/confluence-importer/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,18 @@ convention = "google"

[dependency-groups]
dev = [
"mypy>=1.17.1",
"pip>=25.2",
"pytest>=8.4.1",
"pytest-mock>=3.14.1",
"ruff>=0.12.7",
]

[tool.mypy]
strict = true
install_types = true
non_interactive = true
ignore_missing_imports = true
exclude = [
".cache",
]
74 changes: 47 additions & 27 deletions services/confluence-importer/tests/test_c4.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
class TestC4:
"""Tests for the c4 module functionality."""

def test_delete_confluence_page(self, mocker: MockerFixture):
def test_delete_confluence_page(self, mocker: MockerFixture) -> None:
"""Test that delete_confluence_page correctly calls the C4 API.

Args:
Expand All @@ -24,17 +24,17 @@ def test_delete_confluence_page(self, mocker: MockerFixture):
mocker.patch("confluence_importer.c4.c4_base_url", "http://test-url")
mocker.patch("confluence_importer.c4.bucket_id", "test-bucket")
mocker.patch("confluence_importer.c4.config.c4_token", "test-token")
file_id = "test-file-id"
file_id = 23

# act
delete_confluence_page(file_id)

# assert
mock_requests.delete.assert_called_once_with(
"http://test-url/api/buckets/test-bucket/files/test-file-id", headers={"x-api-key": "test-token"}
"http://test-url/api/buckets/test-bucket/files/23", headers={"x-api-key": "test-token"}
)

def test_fetch_bucket_files_list_single_page(self, mocker: MockerFixture):
def test_fetch_bucket_files_list_single_page(self, mocker: MockerFixture) -> None:
"""Test that fetch_bucket_files_list correctly handles a single page of results.

Args:
Expand All @@ -51,8 +51,8 @@ def test_fetch_bucket_files_list_single_page(self, mocker: MockerFixture):
mock_response.json.return_value = {
"total": 2,
"items": [
{"id": "file1", "fileName": "confluence_page_1.md"},
{"id": "file2", "fileName": "confluence_page_2.md"},
{"id": 1, "fileName": "confluence_page_1.md"},
{"id": 2, "fileName": "confluence_page_2.md"},
],
}
mock_requests.get.return_value = mock_response
Expand All @@ -65,11 +65,11 @@ def test_fetch_bucket_files_list_single_page(self, mocker: MockerFixture):
"http://test-url/api/buckets/test-bucket/files", headers={"x-api-key": "test-token"}
)
assert len(result) == 2
assert result[0]["id"] == "file1"
assert result[1]["id"] == "file2"
assert result[0]["id"] == 1
assert result[1]["id"] == 2
mock_logger.info.assert_called_once()

def test_fetch_bucket_files_list_multiple_pages(self, mocker: MockerFixture):
def test_fetch_bucket_files_list_multiple_pages(self, mocker: MockerFixture) -> None:
"""Test that fetch_bucket_files_list correctly handles multiple pages of results.

Args:
Expand All @@ -86,13 +86,13 @@ def test_fetch_bucket_files_list_multiple_pages(self, mocker: MockerFixture):
first_response.json.return_value = {
"total": 3,
"items": [
{"id": "file1", "fileName": "confluence_page_1.md"},
{"id": "file2", "fileName": "confluence_page_2.md"},
{"id": 1, "fileName": "confluence_page_1.md"},
{"id": 2, "fileName": "confluence_page_2.md"},
],
}

second_response = mocker.MagicMock()
second_response.json.return_value = {"total": 3, "items": [{"id": "file3", "fileName": "confluence_page_3.md"}]}
second_response.json.return_value = {"total": 3, "items": [{"id": 3, "fileName": "confluence_page_3.md"}]}

mock_requests.get.return_value = first_response

Expand All @@ -104,10 +104,10 @@ def test_fetch_bucket_files_list_multiple_pages(self, mocker: MockerFixture):
"http://test-url/api/buckets/test-bucket/files", headers={"x-api-key": "test-token"}
)
assert len(result) == 2
assert result[0]["id"] == "file1"
assert result[1]["id"] == "file2"
assert result[0]["id"] == 1
assert result[1]["id"] == 2

def test_import_confluence_page_success(self, mocker: MockerFixture):
def test_import_confluence_page_success(self, mocker: MockerFixture) -> None:
"""Test that import_confluence_page correctly handles successful API responses.

Args:
Expand Down Expand Up @@ -140,7 +140,7 @@ def test_import_confluence_page_success(self, mocker: MockerFixture):
mock_logger.debug.assert_called_once()
mock_logger.error.assert_not_called()

def test_import_confluence_page_error(self, mocker: MockerFixture):
def test_import_confluence_page_error(self, mocker: MockerFixture) -> None:
"""Test that import_confluence_page correctly handles error API responses.

Args:
Expand Down Expand Up @@ -173,7 +173,7 @@ def test_import_confluence_page_error(self, mocker: MockerFixture):
mock_logger.debug.assert_not_called()
mock_logger.error.assert_called_once()

def test_clear_previous_ingests(self, mocker: MockerFixture):
def test_clear_previous_ingests(self, mocker: MockerFixture) -> None:
"""Test that clear_previous_ingests correctly deletes Confluence pages from C4.

Args:
Expand All @@ -183,9 +183,9 @@ def test_clear_previous_ingests(self, mocker: MockerFixture):
mock_fetch_bucket_files = mocker.patch(
"confluence_importer.c4.fetch_bucket_files_list",
return_value=[
{"id": "file1", "fileName": "confluence_page_1.md"},
{"id": "file2", "fileName": "other_file.txt"},
{"id": "file3", "fileName": "confluence_page_2.md"},
{"id": 1, "fileName": "confluence_page_1.md"},
{"id": 2, "fileName": "other_file.txt"},
{"id": 3, "fileName": "confluence_page_2.md"},
],
)
mock_delete_confluence_page = mocker.patch("confluence_importer.c4.delete_confluence_page")
Expand All @@ -198,11 +198,31 @@ def test_clear_previous_ingests(self, mocker: MockerFixture):
# assert
mock_fetch_bucket_files.assert_called_once()
assert mock_delete_confluence_page.call_count == 2
mock_delete_confluence_page.assert_any_call("file1")
mock_delete_confluence_page.assert_any_call("file3")
mock_delete_confluence_page.assert_any_call(1)
mock_delete_confluence_page.assert_any_call(3)
mock_logger.info.assert_called()

def test_clear_previous_ingests_with_error(self, mocker: MockerFixture):
def test_clear_previous_ingests_with_empty_list(self, mocker: MockerFixture) -> None:
"""Test that clear_previous_ingests works correctly with empty bucket files list.

Args:
mocker: Pytest fixture for mocking
"""
# arrange
mock_fetch_bucket_files = mocker.patch("confluence_importer.c4.fetch_bucket_files_list", return_value=[])
mock_delete_confluence_page = mocker.patch("confluence_importer.c4.delete_confluence_page")
mock_logger = mocker.patch("confluence_importer.c4.logger")
mocker.patch("confluence_importer.c4.bucket_id", "test-bucket")

# act
clear_previous_ingests()

# assert
mock_fetch_bucket_files.assert_called_once()
mock_delete_confluence_page.assert_not_called()
mock_logger.info.assert_called()

def test_clear_previous_ingests_with_error(self, mocker: MockerFixture) -> None:
"""Test that clear_previous_ingests correctly handles errors during deletion.

Args:
Expand All @@ -212,13 +232,13 @@ def test_clear_previous_ingests_with_error(self, mocker: MockerFixture):
mock_fetch_bucket_files = mocker.patch(
"confluence_importer.c4.fetch_bucket_files_list",
return_value=[
{"id": "file1", "fileName": "confluence_page_1.md"},
{"id": "file2", "fileName": "confluence_page_2.md"},
{"id": 1, "fileName": "confluence_page_1.md"},
{"id": 2, "fileName": "confluence_page_2.md"},
],
)

def delete_side_effect(file_id):
if file_id == "file2":
def delete_side_effect(file_id: int) -> None:
if file_id == 2:
raise Exception("Delete failed")

mock_delete_confluence_page = mocker.patch(
Expand Down
Loading