Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions tests/integration/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,18 @@ def _create_key_pair(vault_dir: Path):
)


def _render_metadata_files(metadata_directory: Path) -> None:
def _render_working_dir_metadata(datastore_dir: str) -> None:
metadata_directory = Path(f"{datastore_dir}_working")
template_dir = Path("tests/integration/resources/templates")
template_path = template_dir / "built_metadata_template.json"
template = open(template_path, "r", encoding="utf-8").read()
for filename in os.listdir(metadata_directory):
if not filename.endswith(".json"):
continue
dataset_name = filename[:-12] # Remove __DRAFT.json
content = template.replace("DATASET_NAME", dataset_name)
content = template.replace("DATASET_NAME", dataset_name).replace(
"ORIGIN", "working_dir"
)
with open(metadata_directory / filename, "w") as f:
f.write(content)

Expand All @@ -57,7 +60,11 @@ def _render_metadata_all(metadata_all_file: Path) -> None:
rendered_data_structures = []
for dataset_name in metadata_all["dataStructures"]:
rendered_data_structures.append(
json.loads(template.replace("DATASET_NAME", dataset_name))
json.loads(
template.replace("DATASET_NAME", dataset_name).replace(
"ORIGIN", "metadata_all"
)
)
)
metadata_all["dataStructures"] = rendered_data_structures
with open(metadata_all_file, "w") as f:
Expand Down Expand Up @@ -99,7 +106,7 @@ def prepare_datastore(datastore_dir: str, *, package_to_input: bool = False):
"""
if package_to_input:
_package_to_input(datastore_dir)
_render_metadata_files(Path(f"{datastore_dir}_working"))
_render_working_dir_metadata(datastore_dir)
metadata_dir = f"{datastore_dir}/datastore"
for filename in os.listdir(metadata_dir):
if "metadata_all" in filename:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
{
"name": "DRAFT_CHANGE",
"description": "Andre publisering",
"operation": "PATCH_METADATA",
"operation": "CHANGE",
"releaseStatus": "DRAFT"
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"languageCode": "no",
"temporality": "FIXED",
"sensitivityLevel": "PERSON_GENERAL",
"subjectFields": ["mock"],
"subjectFields": ["ORIGIN"],
"temporalCoverage": { "start": 0, "stop": 1 },
"identifierVariables": [{
"variableRole": "Identifier",
Expand Down
51 changes: 44 additions & 7 deletions tests/integration/test_datastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
)
from job_executor.adapter.fs import LocalStorageAdapter
from job_executor.adapter.fs.models.datastore_versions import DatastoreVersion
from job_executor.adapter.fs.models.metadata import Metadata
from job_executor.domain import datastores
from job_executor.domain.models import JobContext
from tests.integration.common import (
Expand Down Expand Up @@ -68,11 +69,20 @@ def generate_job_parameters(
) -> JobParameters:
match operation:
case Operation.BUMP:
if bump_manifesto is None:
raise AssertionError(
"Could not generate job without a bump manifesto"
)
bump_to_version = "1.0.1"
if bump_manifesto.update_type == "MAJOR":
bump_to_version = "2.0.0"
if bump_manifesto.update_type == "MINOR":
bump_to_version = "1.1.0"
return JobParameters(
bump_manifesto=bump_manifesto,
description="some description",
bump_from_version="1.0.0",
bump_to_version="2.0.0", # todo
bump_to_version=bump_to_version,
target=target,
operation=operation,
)
Expand Down Expand Up @@ -112,13 +122,27 @@ def generate_job_parameters(
)


def _get_metadata_from_draft(
job_context: JobContext, dataset_name: str
) -> Metadata:
datastore_dir = job_context.local_storage.datastore_dir
return next(
ds
for ds in datastore_dir.get_metadata_all_draft().data_structures
if ds.name == dataset_name
)


def test_import_built_patch(mocked_datastore_api: MockedDatastoreApi):
DATASET_NAME = "BUILT_PATCH_METADATA"
job_context = generate_job_context(
operation=Operation.PATCH_METADATA,
target=DATASET_NAME,
)
released_metadata = _get_metadata_from_draft(job_context, DATASET_NAME)
datastores.patch_metadata(job_context)
patched_metadata = _get_metadata_from_draft(job_context, DATASET_NAME)
assert released_metadata != patched_metadata
assert mocked_datastore_api.update_job_status.call_count == 2
metadata_all_draft = (
job_context.local_storage.datastore_dir.get_metadata_all_draft()
Expand All @@ -137,12 +161,7 @@ def test_import_built_add(mocked_datastore_api: MockedDatastoreApi):
)
datastores.add(job_context)
assert mocked_datastore_api.update_job_status.call_count == 2
metadata_all_draft = (
job_context.local_storage.datastore_dir.get_metadata_all_draft()
)
assert DATASET_NAME in [
ds.name for ds in metadata_all_draft.data_structures
]
assert _get_metadata_from_draft(job_context, DATASET_NAME)
assert not os.path.exists(WORKING_DIR / f"{DATASET_NAME}__DRAFT.json")
assert not os.path.exists(
DATASTORE_DIR / f"data/{DATASET_NAME}__DRAFT.parquet"
Expand All @@ -156,7 +175,10 @@ def test_import_built_change(mocked_datastore_api: MockedDatastoreApi):
operation=Operation.CHANGE,
target=DATASET_NAME,
)
released_metadata = _get_metadata_from_draft(job_context, DATASET_NAME)
datastores.change(job_context)
changed_metadata = _get_metadata_from_draft(job_context, DATASET_NAME)
assert changed_metadata != released_metadata
assert mocked_datastore_api.update_job_status.call_count == 2
metadata_all_draft = (
job_context.local_storage.datastore_dir.get_metadata_all_draft()
Expand Down Expand Up @@ -193,6 +215,9 @@ def test_bump_patch(mocked_datastore_api: MockedDatastoreApi):
)
datastores.bump_version(bump_job_context)
assert mocked_datastore_api.update_job_status.call_count == 4
assert os.path.exists(
DATASTORE_DIR / "datastore" / "metadata_all__1_0_1.json"
)


def test_bump_minor(mocked_datastore_api: MockedDatastoreApi):
Expand All @@ -218,6 +243,12 @@ def test_bump_minor(mocked_datastore_api: MockedDatastoreApi):
)
datastores.bump_version(bump_job_context)
assert mocked_datastore_api.update_job_status.call_count == 4
assert os.path.exists(
DATASTORE_DIR / "datastore" / "metadata_all__1_1_0.json"
)
assert os.path.exists(
DATASTORE_DIR / "datastore" / "data_versions__1_1.json"
)


def test_bump_major(mocked_datastore_api: MockedDatastoreApi):
Expand All @@ -243,6 +274,12 @@ def test_bump_major(mocked_datastore_api: MockedDatastoreApi):
)
datastores.bump_version(bump_job_context)
assert mocked_datastore_api.update_job_status.call_count == 4
assert os.path.exists(
DATASTORE_DIR / "datastore" / "metadata_all__2_0_0.json"
)
assert os.path.exists(
DATASTORE_DIR / "datastore" / "data_versions__2_0.json"
)


def test_delete_draft(mocked_datastore_api: MockedDatastoreApi):
Expand Down
67 changes: 48 additions & 19 deletions tests/integration/test_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,12 @@ class MockedDatastoreApi:
update_description: MagicMock


@pytest.fixture(autouse=True)
@dataclass
class MockedPseudonymService:
pseudonymize: MagicMock


@pytest.fixture
def mocked_datastore_api(mocker) -> MockedDatastoreApi:
return MockedDatastoreApi(
update_job_status=mocker.patch(
Expand All @@ -53,18 +58,20 @@ def mocked_datastore_api(mocker) -> MockedDatastoreApi:


@pytest.fixture(autouse=True)
def mocked_pseudonym_service(mocker):
return mocker.patch(
"job_executor.adapter.pseudonym_service.pseudonymize",
return_value={
"00000000001": 1,
"00000000002": 2,
"00000000003": 3,
"00000000004": 4,
"00000000005": 5,
"00000000006": 6,
"00000000007": 7,
},
def mocked_pseudonym_service(mocker) -> MockedPseudonymService:
return MockedPseudonymService(
pseudonymize=mocker.patch(
"job_executor.adapter.pseudonym_service.pseudonymize",
return_value={
"00000000001": 1,
"00000000002": 2,
"00000000003": 3,
"00000000004": 4,
"00000000005": 5,
"00000000006": 6,
"00000000007": 7,
},
)
)


Expand Down Expand Up @@ -119,6 +126,7 @@ def test_import_add(mocked_datastore_api: MockedDatastoreApi):
assert mocked_datastore_api.update_job_status.call_count == 6
assert mocked_datastore_api.update_description.call_count == 1
assert os.path.exists(WORKING_DIR / f"{DATASET_NAME}__DRAFT.json")
assert os.path.exists(WORKING_DIR / f"{DATASET_NAME}__DRAFT.parquet")


def test_import_change(mocked_datastore_api: MockedDatastoreApi):
Expand All @@ -131,9 +139,13 @@ def test_import_change(mocked_datastore_api: MockedDatastoreApi):
assert mocked_datastore_api.update_job_status.call_count == 6
assert mocked_datastore_api.update_description.call_count == 1
assert os.path.exists(WORKING_DIR / f"{DATASET_NAME}__DRAFT.json")
assert os.path.exists(WORKING_DIR / f"{DATASET_NAME}__DRAFT.parquet")


def test_import_add_no_pseudo(mocked_datastore_api: MockedDatastoreApi):
def test_import_add_no_pseudo(
mocked_datastore_api: MockedDatastoreApi,
mocked_pseudonym_service: MockedPseudonymService,
):
DATASET_NAME = "IMPORTABLE_ADD_NO_PSEUDO"
add_no_pseudo_context = generate_job_context(
operation=Operation.ADD,
Expand All @@ -142,7 +154,9 @@ def test_import_add_no_pseudo(mocked_datastore_api: MockedDatastoreApi):
build_dataset_worker.run_worker(add_no_pseudo_context, Queue())
assert mocked_datastore_api.update_job_status.call_count == 5
assert mocked_datastore_api.update_description.call_count == 1
assert mocked_pseudonym_service.pseudonymize.call_count == 0
assert os.path.exists(WORKING_DIR / f"{DATASET_NAME}__DRAFT.json")
assert os.path.exists(WORKING_DIR / f"{DATASET_NAME}__DRAFT.parquet")


def test_import_add_partitioned(mocked_datastore_api: MockedDatastoreApi):
Expand All @@ -155,6 +169,7 @@ def test_import_add_partitioned(mocked_datastore_api: MockedDatastoreApi):
assert mocked_datastore_api.update_job_status.call_count == 6
assert mocked_datastore_api.update_description.call_count == 1
assert os.path.exists(WORKING_DIR / f"{DATASET_NAME}__DRAFT.json")
assert os.path.exists(WORKING_DIR / f"{DATASET_NAME}__DRAFT")


def test_import_add_invalid(mocked_datastore_api: MockedDatastoreApi):
Expand All @@ -167,16 +182,30 @@ def test_import_add_invalid(mocked_datastore_api: MockedDatastoreApi):
assert mocked_datastore_api.update_job_status.call_count == 3
assert mocked_datastore_api.update_description.call_count == 0
assert os.path.exists(INPUT_DIR / f"archive/{DATASET_NAME}.tar")
assert not os.path.exists(WORKING_DIR / f"{DATASET_NAME}__DRAFT.json")
assert not os.path.exists(WORKING_DIR / f"{DATASET_NAME}__DRAFT")


@pytest.fixture
def mocked_offline_datastore_api(mocker) -> MockedDatastoreApi:
return MockedDatastoreApi(
update_job_status=mocker.patch(
"job_executor.adapter.datastore_api.update_job_status",
side_effect=Exception("offline"),
),
update_description=mocker.patch(
"job_executor.adapter.datastore_api.update_description",
side_effect=Exception("offline"),
),
)


def test_import_add_datastore_api_down(
mocked_datastore_api: MockedDatastoreApi,
mocked_offline_datastore_api: MockedDatastoreApi,
):
DATASET_NAME = "IMPORTABLE_ADD"
add_datastore_api_down_context = generate_job_context(
operation=Operation.ADD, target=DATASET_NAME
)
build_dataset_worker.run_worker(add_datastore_api_down_context, Queue())
assert mocked_datastore_api.update_job_status.call_count == 6
assert mocked_datastore_api.update_description.call_count == 1
assert os.path.exists(WORKING_DIR / f"{DATASET_NAME}__DRAFT.json")
with pytest.raises(Exception, match="offline"):
build_dataset_worker.run_worker(add_datastore_api_down_context, Queue())
15 changes: 13 additions & 2 deletions tests/integration/test_rollback.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,13 @@ def selected_datastore(request):

@pytest.fixture(autouse=True)
def set_up_resources(selected_datastore):
"""
As we want to set up a different datastore directory
for each test. We parameterize this autouse fixture with
a "selected_datastore" parameter.
We always move the selected datastore to the DATASTORE_DIR
when setting up tests so this global can be reused.
"""
assert selected_datastore
backup_resources()
if selected_datastore != DATASTORE_DIR:
Expand Down Expand Up @@ -130,8 +137,8 @@ def test_rollback_first_bump(mocked_datastore_api: MockedDatastoreApi):
target="DATASTORE",
bump_manifesto=bump_manifesto,
description="some description",
bump_from_version="1.0.0",
bump_to_version="2.0.0",
bump_from_version="0.0.0",
bump_to_version="1.0.0",
),
)
rollback.fix_interrupted_job(job)
Expand All @@ -152,6 +159,10 @@ def test_rollback_first_bump(mocked_datastore_api: MockedDatastoreApi):
]
for file in os.listdir(metadata_dir)
)
assert not any(
file in ["metadata_all__1_0_0.json", "data_versions__1_0.json"]
for file in os.listdir(metadata_dir)
)


@pytest.mark.parametrize(
Expand Down
Loading