Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 72 additions & 1 deletion lib/ramble/ramble/experiment_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,14 @@
# option. This file may not be copied, modified, or distributed
# except according to those terms.

import os
from enum import Enum

from ramble.namespace import namespace
from ramble.software_info import SoftwareInfo
from ramble.util.file_util import get_newest_experiment_file

import spack.util.spack_json as sjson


# Can use auto() once we're at >= python 3.11
Expand All @@ -28,6 +33,7 @@ class ExperimentStatus(str, Enum):

_OUTPUT_MAPPING = {
"name": "name",
"status": "EXPERIMENT_STATUS",
namespace.n_repeats: "N_REPEATS",
"keys": "keys",
"contexts": "CONTEXTS",
Expand All @@ -38,13 +44,16 @@ class ExperimentStatus(str, Enum):
namespace.variants: "VARIANTS",
"experiment_chain": "EXPERIMENT_CHAIN",
"success_criteria": "SUCCESS_CRITERIA",
"object_definitions": "OBJECT_DEFINITIONS",
}


# TODO: would be better to use dataclass after 3.6 support is dropped
class ExperimentResult:
"""Class containing results and related metadata of an experiment"""

cache_file_name = "ramble_results_cache.json"

def __init__(self, app_inst):
"""Build up the result from the given app instance"""
self._app_inst = app_inst
Expand All @@ -60,8 +69,51 @@ def __init__(self, app_inst):
self.raw_variables = {}
self.variables = {}
self.variants = []
self.object_definitions = []

def read_cache(self, workspace, app_inst) -> bool:
experiment_dir = app_inst.expander.experiment_run_dir
cache_file = os.path.join(experiment_dir, self.cache_file_name)

if not os.path.isfile(cache_file):
return False
cache_timestamp = os.path.getmtime(cache_file)

newest_file, file_timestamp = get_newest_experiment_file(experiment_dir)

if file_timestamp is not None and cache_timestamp < file_timestamp:
return False

with open(cache_file) as f:
cache_dict = sjson.load(f)

object_inventory = app_inst.object_inventory(workspace=workspace)
object_key = _OUTPUT_MAPPING["object_definitions"]
if object_key not in cache_dict or object_inventory != cache_dict[object_key]:
return False

self.from_dict(cache_dict)
return True

def write_cache(self, app_inst):
experiment_dir = app_inst.expander.experiment_run_dir
cache_file = os.path.join(experiment_dir, self.cache_file_name)

def finalize(self):
out_dict = self.to_dict()

software_key = _OUTPUT_MAPPING["software"]
software_packages = {}
if software_key in out_dict:
software_packages = out_dict[software_key].copy()

out_dict[software_key] = {}
for key, pkg_list in software_packages.items():
out_dict[software_key][key] = [pkg.to_dict() for pkg in pkg_list]

with open(cache_file, "w+") as f:
sjson.dump(out_dict, f)

def finalize(self, workspace):
app_inst = self._app_inst
self.name = app_inst.expander.experiment_namespace

Expand All @@ -86,6 +138,25 @@ def finalize(self):

self.variants = sorted(app_inst.experiment_variants().as_set())

self.object_definitions = app_inst.object_inventory(workspace)

def from_dict(self, in_dict: dict):
"""Convert a dict back into a results object

Args:
in_dict (dict): Input dictionary of results from a cache
"""

for lookup_key, output_key in _OUTPUT_MAPPING.items():
if output_key in in_dict:
setattr(self, lookup_key, in_dict[output_key])

software_key = _OUTPUT_MAPPING["software"]
if software_key in in_dict:
self.software = {}
for key, pkg_list in in_dict[software_key].items():
self.software[key] = [SoftwareInfo(**pkg_conf) for pkg_conf in pkg_list]

def to_dict(self):
"""Generate a dict for encoders (json, yaml) and uploaders.

Expand Down
14 changes: 11 additions & 3 deletions lib/ramble/ramble/test/end_to_end/experiment_hashes.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,17 @@ def test_experiment_hashes(mutable_config, mutable_mock_workspace_path, workspac
with open(experiment_inventory) as f:
data = sjson.load(f)

assert "application_definition" in data
assert data["application_definition"] != ""
assert data["application_definition"] is not None
assert "object_definitions" in data
assert data["object_definitions"] is not None
assert data["object_definitions"] != []

found_app = False
for definition in data["object_definitions"]:
if definition["type"] == "applications":
found_app = True
assert definition["digest"] != ""
assert definition["digest"] is not None
assert found_app

# Test Attributes
expected_attrs = {"variables", "modifiers", "env_vars", "internals", "chained_experiments"}
Expand Down
19 changes: 17 additions & 2 deletions lib/ramble/ramble/test/experiment_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,25 @@

"""Tests on the ExperimentResult class"""

import pytest

from ramble.experiment_result import ExperimentResult


def test_to_dict(mutable_mock_apps_repo):
@pytest.fixture
def mock_workspace():
class MockWorkspace:
def __init__(self):
self.path = "/mock/workspace/path"
self.name = "mock_workspace"

def get_path(self):
return self.path

return MockWorkspace()


def test_to_dict(mutable_mock_apps_repo, mock_workspace):
basic_app_inst = mutable_mock_apps_repo.get("basic")
basic_app_inst.set_variables_and_variants(
{"workload_name": "test_wl", "experiment_status": "placeholder", "test_var": "my_var"},
Expand All @@ -20,7 +35,7 @@ def test_to_dict(mutable_mock_apps_repo):
)
basic_app_inst.set_status("UNKNOWN")
exp_res = ExperimentResult(basic_app_inst)
exp_res.finalize()
exp_res.finalize(mock_workspace)
res_dict = exp_res.to_dict()

assert "name" in res_dict
Expand Down
124 changes: 124 additions & 0 deletions lib/ramble/ramble/test/test_experiment_result.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
# Copyright 2022-2025 The Ramble Authors
#
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
# https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
# <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
# option. This file may not be copied, modified, or distributed
# except according to those terms.

"""
Tests for the experiment_result module.
"""

import os
import time

import pytest

from ramble.experiment_result import ExperimentResult
from ramble.util.file_util import get_newest_experiment_file

# pylint: disable=protected-access


@pytest.fixture
def experiment_result(tmpdir):
"""Fixture for an ExperimentResult object."""
app_inst = mock_app_inst(tmpdir)
result = ExperimentResult(app_inst)
result.name = "test_experiment"
result.status = "SUCCESS"
result.raw_variables = {"VAR1": "VALUE1"}
result.variables = {"VAR1": "VALUE1"}
result.variants = ["variant1", "variant2"]
result.object_definitions = [{"name": "test_object", "type": "APPLICATION"}]
return result


def mock_app_inst(tmpdir):
"""Creates a mock application instance."""

class MockExpander:
"""Mock expander class."""

def __init__(self):
self.experiment_run_dir = str(tmpdir)

class MockAppInst:
"""Mock application instance class."""

def __init__(self):
self.expander = MockExpander()

def object_inventory(self, workspace): # pylint: disable=unused-argument
"""Returns a mock object inventory."""
return [{"name": "test_object", "type": "APPLICATION"}]

return MockAppInst()


def test_write_read_cache(experiment_result, tmpdir):
"""Test writing and reading the experiment result cache."""
app_inst = mock_app_inst(tmpdir)
experiment_result.write_cache(app_inst)

new_result = ExperimentResult(app_inst)
assert new_result.read_cache(None, app_inst)

assert new_result.name == experiment_result.name
assert new_result.status == experiment_result.status
assert new_result.raw_variables == experiment_result.raw_variables
assert new_result.variables == experiment_result.variables
assert new_result.variants == experiment_result.variants
assert new_result.object_definitions == experiment_result.object_definitions


def test_get_newest_experiment_file(tmpdir):
"""Test the get_newest_experiment_file function."""
# Create some files with different modification times
file1 = tmpdir.join("file1.txt")
file1.write("content")
time.sleep(0.1)
file2 = tmpdir.join("file2.txt")
file2.write("content")
time.sleep(0.1)
subdir = tmpdir.mkdir("subdir")
file3 = subdir.join("file3.txt")
file3.write("content")

newest_file, timestamp = get_newest_experiment_file(str(tmpdir))
assert newest_file == str(file3)
assert timestamp == os.path.getmtime(str(file3))


def test_read_cache_stale(experiment_result, tmpdir):
"""Test that read_cache returns False if the cache is stale."""
app_inst = mock_app_inst(tmpdir)
experiment_result.write_cache(app_inst)

# Make a file that is newer than the cache
time.sleep(0.1)
new_file = tmpdir.join("new_file.txt")
new_file.write("content")

new_result = ExperimentResult(app_inst)
assert not new_result.read_cache(None, app_inst)


def test_read_cache_object_inventory_changed(experiment_result, tmpdir):
"""
Test that read_cache returns False if the object inventory has changed.
"""
app_inst = mock_app_inst(tmpdir)
experiment_result.write_cache(app_inst)

class MockAppInstNewInventory(type(app_inst)):
"""Mock application instance with a different object inventory."""

def object_inventory(self, workspace): # pylint: disable=unused-argument
"""Returns a different mock object inventory."""
return [{"name": "new_object", "type": "APPLICATION"}]

new_app_inst = MockAppInstNewInventory()
new_result = ExperimentResult(new_app_inst)
assert not new_result.read_cache(None, new_app_inst)
25 changes: 25 additions & 0 deletions lib/ramble/ramble/util/file_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
# except according to those terms.

import os
from pathlib import Path

_DRY_RUN_PATH_PREFIX = os.path.join("dry-run", "path", "to")

Expand Down Expand Up @@ -39,3 +40,27 @@ def create_symlink(base, link):
os.unlink(link)

os.symlink(base, link)


def get_newest_experiment_file(base_directory):
"""Given a base directory, determine the newest file in the directory (and
it's subdirectories)and return the file path and it's timestamp in seconds.

Args:
base_directory (str): Directory to search newest file for

Returns:
(str): Path to newest file (or None if not found)
(int): Timestamp of file in seconds (or None if no file is found)
"""
files = Path(base_directory).rglob("*")
files = [f for f in files if f.is_file() and not os.path.basename(f).startswith("ramble_")]

if not files:
return None, None

newest_file_path = max(files, key=os.path.getmtime)

timestamp_seconds = os.path.getmtime(newest_file_path)

return str(newest_file_path), timestamp_seconds
Loading