Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions integration-tests/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@

# Make the fixtures defined in `tests/fixtures/` globally available without imports.
pytest_plugins = [
"tests.fixtures.datasets",
"tests.fixtures.integration_test_logs",
"tests.fixtures.sample_datasets",
"tests.fixtures.path_configs",
"tests.fixtures.package_instance",
"tests.fixtures.package_test_config",
Expand Down
4 changes: 2 additions & 2 deletions integration-tests/tests/data/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,9 @@ To access a dataset from within the test system, the following rules should be o
@pytest.fixture(scope="session")
def dataset_name(
integration_test_path_config: IntegrationTestPathConfig,
) -> IntegrationTestDataset:
) -> SampleDataset:
"""Returns an object corresponding to the `dataset_name` test dataset."""
return IntegrationTestDataset(
return SampleDataset(
dataset_root_dir=integration_test_path_config.test_data_dir / "dataset_name",
)
```
Expand Down
11 changes: 11 additions & 0 deletions integration-tests/tests/data/text_singlefile/logs/simple.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
2015-03-23 05:48:30,122 TEST1
2015-03-23 05:48:30,122Z TEST2
2015-03-23 05:48:30,122 Z TEST3
2015-03-23 05:48:30,122+00 TEST4
2015-03-23 05:48:30,122+00Z TEST5
2015-03-23 05:48:30,122 +00 TEST6
2015-03-23 05:48:30,122 +00Z TEST7
2015-03-23 05:48:30,122UTC+00 TEST8
2015-03-23 05:48:30,122UTC+00Z TEST9
2015-03-23 05:48:30,122 UTC+00 TEST10
2015-03-23 05:48:30,122 UTC+00Z TEST11
12 changes: 12 additions & 0 deletions integration-tests/tests/data/text_singlefile/metadata.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"dataset_name": "text_singlefile",
"unstructured": true,
"timestamp_key": null,
"begin_ts": 1427089710122,
"end_ts": 1427089710122,
"logs_subdir": "logs",
"file_names": [
"simple.txt"
],
"single_match_wildcard_query": "TEST1"
}
28 changes: 0 additions & 28 deletions integration-tests/tests/fixtures/datasets.py

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,28 +1,57 @@
"""Session-scoped test log fixtures shared across integration tests."""
"""Session-scoped fixtures for test datasets downloaded on-demand from external URLs."""

import logging
import pathlib
import subprocess
from dataclasses import dataclass, field, InitVar
from pathlib import Path

import pytest

from tests.utils.config import (
IntegrationTestLogs,
IntegrationTestPathConfig,
)
from tests.utils.classes import IntegrationTestPathConfig
from tests.utils.utils import (
get_binary_path,
remove_path,
validate_dir_exists,
)

logger = logging.getLogger(__name__)


@dataclass(frozen=True)
class DownloadedDataset:
"""Metadata for the downloaded logs used for integration tests."""

#:
name: str
#:
tarball_url: str
integration_test_path_config: InitVar[IntegrationTestPathConfig]
#:
tarball_path: Path = field(init=False, repr=True)
#:
extraction_dir: Path = field(init=False, repr=True)
#: Optional number of log events in the downloaded logs.
num_log_events: int | None = None

def __post_init__(self, integration_test_path_config: IntegrationTestPathConfig) -> None:
"""Initialize and set tarball and extraction paths for integration test logs."""
name = self.name.strip()
if 0 == len(name):
err_msg = "`name` cannot be empty."
raise ValueError(err_msg)
downloaded_logs_dir = integration_test_path_config.downloaded_logs_dir
validate_dir_exists(downloaded_logs_dir)

object.__setattr__(self, "name", name)
object.__setattr__(self, "tarball_path", downloaded_logs_dir / f"{name}.tar.gz")
object.__setattr__(self, "extraction_dir", downloaded_logs_dir / name)
Comment thread
quinntaylormitchell marked this conversation as resolved.


@pytest.fixture(scope="session")
def hive_24hr(
request: pytest.FixtureRequest,
integration_test_path_config: IntegrationTestPathConfig,
) -> IntegrationTestLogs:
) -> DownloadedDataset:
"""Provides shared `hive_24hr` test logs."""
return _download_and_extract_gzip_dataset(
request=request,
Expand All @@ -36,7 +65,7 @@ def hive_24hr(
def postgresql(
request: pytest.FixtureRequest,
integration_test_path_config: IntegrationTestPathConfig,
) -> IntegrationTestLogs:
) -> DownloadedDataset:
"""Provides shared `postgresql` test logs."""
return _download_and_extract_gzip_dataset(
request=request,
Expand All @@ -46,51 +75,15 @@ def postgresql(
)


@pytest.fixture(scope="session")
def simple_unstructured(
request: pytest.FixtureRequest,
integration_test_path_config: IntegrationTestPathConfig,
) -> IntegrationTestLogs:
"""Provides a simple unstructured test log."""
name = "simple_unstructured"
integration_test_logs = IntegrationTestLogs(
name=name,
tarball_url=f"{name}.tar.gz",
integration_test_path_config=integration_test_path_config,
num_log_events=11,
)
remove_path(integration_test_logs.extraction_dir)
integration_test_logs.extraction_dir.mkdir(parents=True, exist_ok=False)

with pathlib.Path.open(integration_test_logs.extraction_dir / f"{name}.log", "w") as f:
f.write(
"2015-03-23 05:48:30,122 TEST1\n"
"2015-03-23 05:48:30,122Z TEST2\n"
"2015-03-23 05:48:30,122 Z TEST3\n"
"2015-03-23 05:48:30,122+00 TEST4\n"
"2015-03-23 05:48:30,122+00Z TEST5\n"
"2015-03-23 05:48:30,122 +00 TEST6\n"
"2015-03-23 05:48:30,122 +00Z TEST7\n"
"2015-03-23 05:48:30,122UTC+00 TEST8\n"
"2015-03-23 05:48:30,122UTC+00Z TEST9\n"
"2015-03-23 05:48:30,122 UTC+00 TEST10\n"
"2015-03-23 05:48:30,122 UTC+00Z TEST11\n"
)

logger.info("Set up logs for dataset `%s`.", name)
request.config.cache.set(name, True)
return integration_test_logs


def _download_and_extract_gzip_dataset(
request: pytest.FixtureRequest,
integration_test_path_config: IntegrationTestPathConfig,
name: str,
tarball_url: str,
keep_leading_dir: bool = False,
) -> IntegrationTestLogs:
) -> DownloadedDataset:
"""
Download and extract a gzip-compressed dataset tarball for setting up the `IntegrationTestLogs`
Download and extract a gzip-compressed dataset tarball for setting up the `DownloadedDataset`
fixture. Adjust its file permissions for test use.

:param request: Provides access to the pytest cache.
Expand All @@ -99,24 +92,24 @@ def _download_and_extract_gzip_dataset(
:param tarball_url: Dataset tarball URL.
:param keep_leading_dir: Whether to preserve the top-level directory during tarball extraction.
Defaults to False to avoid an unnecessary extra directory level.
:return: An IntegrationTestLogs instance providing metadata for the downloaded logs.
:return: A DownloadedDataset instance providing metadata for the downloaded logs.
:raises subprocess.CalledProcessError: If `curl`, `tar`, or `chmod` fails.
"""
integration_test_logs = IntegrationTestLogs(
downloaded_dataset = DownloadedDataset(
name=name,
tarball_url=tarball_url,
integration_test_path_config=integration_test_path_config,
)
if request.config.cache.get(name, False):
logger.info("Test logs `%s` are up-to-date. Skipping download.", name)
return integration_test_logs
return downloaded_dataset

remove_path(integration_test_logs.tarball_path)
remove_path(integration_test_logs.extraction_dir)
integration_test_logs.extraction_dir.mkdir(parents=True, exist_ok=False)
remove_path(downloaded_dataset.tarball_path)
remove_path(downloaded_dataset.extraction_dir)
downloaded_dataset.extraction_dir.mkdir(parents=True, exist_ok=False)

tarball_path_str = str(integration_test_logs.tarball_path)
extract_path_str = str(integration_test_logs.extraction_dir)
tarball_path_str = str(downloaded_dataset.tarball_path)
extract_path_str = str(downloaded_dataset.extraction_dir)

# fmt: off
curl_cmd = [
Expand Down Expand Up @@ -151,4 +144,4 @@ def _download_and_extract_gzip_dataset(

logger.info("Downloaded and extracted uncompressed logs for dataset `%s`.", name)
request.config.cache.set(name, True)
return integration_test_logs
return downloaded_dataset
9 changes: 5 additions & 4 deletions integration-tests/tests/fixtures/path_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

import pytest

from tests.utils.classes import IntegrationTestPathConfig
from tests.utils.config import (
ClpCorePathConfig,
IntegrationTestPathConfig,
PackagePathConfig,
)
from tests.utils.utils import resolve_path_env_var
Expand All @@ -18,9 +18,10 @@ def clp_core_path_config() -> ClpCorePathConfig:

@pytest.fixture(scope="session")
def integration_test_path_config() -> IntegrationTestPathConfig:
"""Provides paths for the integration-test directory and its contents."""
"""Provides paths relevant to all integration tests."""
return IntegrationTestPathConfig(
test_root_dir=resolve_path_env_var("CLP_BUILD_DIR") / "integration-tests"
clp_build_dir=resolve_path_env_var("CLP_BUILD_DIR"),
integration_tests_project_root=resolve_path_env_var("INTEGRATION_TESTS_PROJECT_ROOT"),
)


Expand All @@ -34,5 +35,5 @@ def fixt_package_path_config(
package_test_scripts_dir=(
resolve_path_env_var("INTEGRATION_TESTS_PROJECT_ROOT") / "tests" / "package_tests"
),
test_root_dir=integration_test_path_config.test_root_dir,
test_cache_dir=integration_test_path_config.test_cache_dir,
)
38 changes: 38 additions & 0 deletions integration-tests/tests/fixtures/sample_datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""Session-scoped fixtures for sample datasets stored in the integration-tests data folder."""

import pytest

from tests.utils.classes import (
IntegrationTestPathConfig,
SampleDataset,
)


@pytest.fixture(scope="session")
def json_multifile(
integration_test_path_config: IntegrationTestPathConfig,
) -> SampleDataset:
"""Returns an object corresponding to the `json_multifile` sample dataset."""
return SampleDataset(
dataset_root_dir=integration_test_path_config.test_data_dir / "json_multifile",
)


@pytest.fixture(scope="session")
def text_multifile(
integration_test_path_config: IntegrationTestPathConfig,
) -> SampleDataset:
"""Returns an object corresponding to the `text_multifile` sample dataset."""
return SampleDataset(
dataset_root_dir=integration_test_path_config.test_data_dir / "text_multifile",
)


@pytest.fixture(scope="session")
def text_singlefile(
integration_test_path_config: IntegrationTestPathConfig,
) -> SampleDataset:
"""Returns an object corresponding to the `text_singlefile` sample dataset."""
return SampleDataset(
dataset_root_dir=integration_test_path_config.test_data_dir / "text_singlefile",
)

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

Loading
Loading