Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions integration-tests/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@

# Make the fixtures defined in `tests/fixtures/` globally available without imports.
pytest_plugins = [
"tests.fixtures.datasets",
"tests.fixtures.integration_test_logs",
"tests.fixtures.sample_datasets",
Comment thread
quinntaylormitchell marked this conversation as resolved.
"tests.fixtures.path_configs",
"tests.fixtures.package_instance",
"tests.fixtures.package_test_config",
Expand Down
4 changes: 2 additions & 2 deletions integration-tests/tests/data/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,9 @@ To access a dataset from within the test system, the following rules should be o
@pytest.fixture(scope="session")
def dataset_name(
integration_test_path_config: IntegrationTestPathConfig,
) -> IntegrationTestDataset:
) -> SampleDataset:
"""Returns an object corresponding to the `dataset_name` test dataset."""
return IntegrationTestDataset(
return SampleDataset(
dataset_root_dir=integration_test_path_config.test_data_dir / "dataset_name",
)
```
Expand Down
11 changes: 11 additions & 0 deletions integration-tests/tests/data/text_singlefile/logs/simple.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
2015-03-23 05:48:30,122 TEST1
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Bill-hbrhbr can we check if the test cases match the ones in cpp?

std::vector<ExpectedCatSequenceTransformation> const timezone_transformations{

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just fyi: the contents of simple.txt in this PR match the test logs introduced in #2192.

2015-03-23 05:48:30,122Z TEST2
2015-03-23 05:48:30,122 Z TEST3
2015-03-23 05:48:30,122+00 TEST4
2015-03-23 05:48:30,122+00Z TEST5
2015-03-23 05:48:30,122 +00 TEST6
2015-03-23 05:48:30,122 +00Z TEST7
2015-03-23 05:48:30,122UTC+00 TEST8
2015-03-23 05:48:30,122UTC+00Z TEST9
2015-03-23 05:48:30,122 UTC+00 TEST10
2015-03-23 05:48:30,122 UTC+00Z TEST11
12 changes: 12 additions & 0 deletions integration-tests/tests/data/text_singlefile/metadata.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"dataset_name": "text_singlefile",
"unstructured": true,
"timestamp_key": null,
"begin_ts": 1427089710122,
"end_ts": 1427089710122,
"logs_subdir": "logs",
"file_names": [
"simple.txt"
],
"single_match_wildcard_query": "TEST1"
}
28 changes: 0 additions & 28 deletions integration-tests/tests/fixtures/datasets.py
Comment thread
quinntaylormitchell marked this conversation as resolved.
Outdated

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,28 +1,57 @@
"""Session-scoped test log fixtures shared across integration tests."""
"""Session-scoped fixtures for test datasets downloaded on-demand from external URLs."""

import logging
import pathlib
import subprocess
from dataclasses import dataclass, field, InitVar
from pathlib import Path

import pytest

from tests.utils.config import (
IntegrationTestLogs,
IntegrationTestPathConfig,
)
from tests.utils.classes import IntegrationTestPathConfig
from tests.utils.utils import (
get_binary_path,
remove_path,
validate_dir_exists,
)

logger = logging.getLogger(__name__)


@dataclass(frozen=True)
class DownloadedDataset:
Comment thread
quinntaylormitchell marked this conversation as resolved.
"""Metadata for the downloaded logs used for integration tests."""
Comment thread
quinntaylormitchell marked this conversation as resolved.
Outdated

#:
name: str
#:
tarball_url: str
integration_test_path_config: InitVar[IntegrationTestPathConfig]
#:
tarball_path: Path = field(init=False, repr=True)
#:
extraction_dir: Path = field(init=False, repr=True)
#: Optional number of log events in the downloaded logs.
num_log_events: int | None = None

def __post_init__(self, integration_test_path_config: IntegrationTestPathConfig) -> None:
"""Initialize and set tarball and extraction paths for integration test logs."""
name = self.name.strip()
if 0 == len(name):
err_msg = "`name` cannot be empty."
raise ValueError(err_msg)
downloaded_logs_dir = integration_test_path_config.downloaded_logs_dir
validate_dir_exists(downloaded_logs_dir)

object.__setattr__(self, "name", name)
object.__setattr__(self, "tarball_path", downloaded_logs_dir / f"{name}.tar.gz")
object.__setattr__(self, "extraction_dir", downloaded_logs_dir / name)
Comment thread
quinntaylormitchell marked this conversation as resolved.
Outdated


@pytest.fixture(scope="session")
def hive_24hr(
request: pytest.FixtureRequest,
integration_test_path_config: IntegrationTestPathConfig,
) -> IntegrationTestLogs:
) -> DownloadedDataset:
"""Provides shared `hive_24hr` test logs."""
return _download_and_extract_gzip_dataset(
request=request,
Expand All @@ -36,7 +65,7 @@ def hive_24hr(
def postgresql(
request: pytest.FixtureRequest,
integration_test_path_config: IntegrationTestPathConfig,
) -> IntegrationTestLogs:
) -> DownloadedDataset:
"""Provides shared `postgresql` test logs."""
return _download_and_extract_gzip_dataset(
request=request,
Expand All @@ -46,51 +75,15 @@ def postgresql(
)


@pytest.fixture(scope="session")
def simple_unstructured(
request: pytest.FixtureRequest,
integration_test_path_config: IntegrationTestPathConfig,
) -> IntegrationTestLogs:
"""Provides a simple unstructured test log."""
name = "simple_unstructured"
integration_test_logs = IntegrationTestLogs(
name=name,
tarball_url=f"{name}.tar.gz",
integration_test_path_config=integration_test_path_config,
num_log_events=11,
)
remove_path(integration_test_logs.extraction_dir)
integration_test_logs.extraction_dir.mkdir(parents=True, exist_ok=False)

with pathlib.Path.open(integration_test_logs.extraction_dir / f"{name}.log", "w") as f:
f.write(
"2015-03-23 05:48:30,122 TEST1\n"
"2015-03-23 05:48:30,122Z TEST2\n"
"2015-03-23 05:48:30,122 Z TEST3\n"
"2015-03-23 05:48:30,122+00 TEST4\n"
"2015-03-23 05:48:30,122+00Z TEST5\n"
"2015-03-23 05:48:30,122 +00 TEST6\n"
"2015-03-23 05:48:30,122 +00Z TEST7\n"
"2015-03-23 05:48:30,122UTC+00 TEST8\n"
"2015-03-23 05:48:30,122UTC+00Z TEST9\n"
"2015-03-23 05:48:30,122 UTC+00 TEST10\n"
"2015-03-23 05:48:30,122 UTC+00Z TEST11\n"
)

logger.info("Set up logs for dataset `%s`.", name)
request.config.cache.set(name, True)
return integration_test_logs


def _download_and_extract_gzip_dataset(
request: pytest.FixtureRequest,
integration_test_path_config: IntegrationTestPathConfig,
name: str,
tarball_url: str,
keep_leading_dir: bool = False,
) -> IntegrationTestLogs:
) -> DownloadedDataset:
"""
Download and extract a gzip-compressed dataset tarball for setting up the `IntegrationTestLogs`
Download and extract a gzip-compressed dataset tarball for setting up the `DownloadedDataset`
fixture. Adjust its file permissions for test use.

:param request: Provides access to the pytest cache.
Expand All @@ -99,24 +92,24 @@ def _download_and_extract_gzip_dataset(
:param tarball_url: Dataset tarball URL.
:param keep_leading_dir: Whether to preserve the top-level directory during tarball extraction.
Defaults to False to avoid an unnecessary extra directory level.
:return: An IntegrationTestLogs instance providing metadata for the downloaded logs.
:return: An DownloadedDataset instance providing metadata for the downloaded logs.
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated
:raises subprocess.CalledProcessError: If `curl`, `tar`, or `chmod` fails.
"""
integration_test_logs = IntegrationTestLogs(
downloaded_dataset = DownloadedDataset(
name=name,
tarball_url=tarball_url,
integration_test_path_config=integration_test_path_config,
)
if request.config.cache.get(name, False):
logger.info("Test logs `%s` are up-to-date. Skipping download.", name)
return integration_test_logs
return downloaded_dataset

remove_path(integration_test_logs.tarball_path)
remove_path(integration_test_logs.extraction_dir)
integration_test_logs.extraction_dir.mkdir(parents=True, exist_ok=False)
remove_path(downloaded_dataset.tarball_path)
remove_path(downloaded_dataset.extraction_dir)
downloaded_dataset.extraction_dir.mkdir(parents=True, exist_ok=False)

tarball_path_str = str(integration_test_logs.tarball_path)
extract_path_str = str(integration_test_logs.extraction_dir)
tarball_path_str = str(downloaded_dataset.tarball_path)
extract_path_str = str(downloaded_dataset.extraction_dir)

# fmt: off
curl_cmd = [
Expand Down Expand Up @@ -151,4 +144,4 @@ def _download_and_extract_gzip_dataset(

logger.info("Downloaded and extracted uncompressed logs for dataset `%s`.", name)
request.config.cache.set(name, True)
return integration_test_logs
return downloaded_dataset
9 changes: 5 additions & 4 deletions integration-tests/tests/fixtures/path_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

import pytest

from tests.utils.classes import IntegrationTestPathConfig
from tests.utils.config import (
ClpCorePathConfig,
IntegrationTestPathConfig,
PackagePathConfig,
)
from tests.utils.utils import resolve_path_env_var
Expand All @@ -18,9 +18,10 @@ def clp_core_path_config() -> ClpCorePathConfig:

@pytest.fixture(scope="session")
def integration_test_path_config() -> IntegrationTestPathConfig:
"""Provides paths for the integration-test directory and its contents."""
"""Provides paths relevant to all integration tests."""
return IntegrationTestPathConfig(
test_root_dir=resolve_path_env_var("CLP_BUILD_DIR") / "integration-tests"
clp_build_dir=resolve_path_env_var("CLP_BUILD_DIR"),
integration_tests_project_root=resolve_path_env_var("INTEGRATION_TESTS_PROJECT_ROOT"),
)


Expand All @@ -34,5 +35,5 @@ def fixt_package_path_config(
package_test_scripts_dir=(
resolve_path_env_var("INTEGRATION_TESTS_PROJECT_ROOT") / "tests" / "package_tests"
),
test_root_dir=integration_test_path_config.test_root_dir,
test_cache_dir=integration_test_path_config.test_cache_dir,
)
38 changes: 38 additions & 0 deletions integration-tests/tests/fixtures/sample_datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""Session-scoped fixtures for sample datasets stored in the integration-tests data folder."""

import pytest

from tests.utils.classes import (
IntegrationTestPathConfig,
SampleDataset,
)


@pytest.fixture(scope="session")
def json_multifile(
integration_test_path_config: IntegrationTestPathConfig,
) -> SampleDataset:
"""Returns an object corresponding to the `json_multifile` sample dataset."""
return SampleDataset(
dataset_root_dir=integration_test_path_config.test_data_dir / "json_multifile",
)


@pytest.fixture(scope="session")
def text_multifile(
integration_test_path_config: IntegrationTestPathConfig,
) -> SampleDataset:
"""Returns an object corresponding to the `text_multifile` sample dataset."""
return SampleDataset(
dataset_root_dir=integration_test_path_config.test_data_dir / "text_multifile",
)


@pytest.fixture(scope="session")
def text_singlefile(
integration_test_path_config: IntegrationTestPathConfig,
) -> SampleDataset:
"""Returns an object corresponding to the `text_singlefile` sample dataset."""
return SampleDataset(
dataset_root_dir=integration_test_path_config.test_data_dir / "text_singlefile",
)

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

Loading
Loading