Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion krkn_ai/algorithm/genetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def __init__(
# Track run metadata for results summary
self.start_time: Optional[datetime.datetime] = None
self.end_time: Optional[datetime.datetime] = None
self.seed: Optional[int] = None # Seed can be set externally if needed
self.seed: Optional[int] = self.config.seed
self.completed_generations: int = 0

if self.config.population_size < 2:
Expand Down Expand Up @@ -276,6 +276,8 @@ def _check_and_stop(self, cur_generation: int, elapsed_time: float) -> bool:
cur_generation,
format_duration(elapsed_time),
)
self.completed_generations = cur_generation
self.end_time = datetime.datetime.now(datetime.timezone.utc)
return True
return False

Expand Down
4 changes: 2 additions & 2 deletions krkn_ai/chaos_engines/krkn_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def __check_runner_availability(self):
def run(self, scenario: BaseScenario, generation_id: int) -> CommandRunResult:
logger.info("Running scenario: %s", scenario)

start_time = datetime.datetime.now()
start_time = datetime.datetime.now(datetime.timezone.utc)

# Generate command krkn executor command
log, returncode, run_uuid = None, None, None
Expand Down Expand Up @@ -127,7 +127,7 @@ def run(self, scenario: BaseScenario, generation_id: int) -> CommandRunResult:
# Stop watching application urls for health checks
health_check_watcher.stop()

end_time = datetime.datetime.now()
end_time = datetime.datetime.now(datetime.timezone.utc)

# calculate fitness scores
fitness_result: FitnessResult = FitnessResult()
Expand Down
182 changes: 182 additions & 0 deletions tests/unit/reporter/test_json_summary_reporter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
"""
Unit tests for JSONSummaryReporter
"""

import os
import json
import datetime

from krkn_ai.reporter.json_summary_reporter import JSONSummaryReporter
from krkn_ai.models.app import CommandRunResult, FitnessResult
from krkn_ai.models.scenario.scenario_dummy import DummyScenario


class TestJSONSummaryReporter:
"""Test JSONSummaryReporter core functionality"""

def _create_results(self, gen_id, start_score, count, scenario, now):
results = {}
for i in range(count):
sid = (gen_id * 100) + i
score = float(start_score + (i * 10))
res = CommandRunResult(
generation_id=gen_id,
scenario_id=sid,
scenario=scenario,
cmd="test",
log="test",
returncode=0,
start_time=now,
end_time=now,
fitness_result=FitnessResult(fitness_score=score),
)
results[sid] = res
return results

def test_generate_summary_content(self, minimal_config):
"""Test summary dictionary content and calculations"""
now = datetime.datetime(2023, 1, 1, 12, 0, 0, tzinfo=datetime.timezone.utc)
cc = minimal_config.cluster_components
scenario = DummyScenario(cluster_components=cc)

gen0 = self._create_results(0, 0, 3, scenario, now)
gen1 = self._create_results(1, 40, 3, scenario, now)

pop = {**gen0, **gen1}
best = [gen0[2], gen1[102]]

reporter = JSONSummaryReporter(
run_uuid="test-run",
config=minimal_config,
seen_population=pop,
best_of_generation=best,
start_time=now,
end_time=now + datetime.timedelta(seconds=100),
completed_generations=2,
seed=123
)
Comment on lines +48 to +57

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Action required

5. Ga metadata always empty 🐞 Bug ✓ Correctness

• The new unit tests pass end_time and completed_generations into JSONSummaryReporter, so
  duration/generation metadata looks correct in tests.
• In the real code path (GeneticAlgorithm.save()), end_time remains None and
  completed_generations remains 0 because they are initialized but never updated in the generation
  loop or stop logic.
• As a result, results.json will report duration_seconds=0.0 and generations_completed=0 for
  real runs, which breaks correctness of run reporting/analytics.
Agent Prompt
## Issue description
`GeneticAlgorithm` passes `end_time` and `completed_generations` into `JSONSummaryReporter`, but these fields are never updated during the algorithm run. This causes `results.json` to report incorrect run metadata (duration stays 0, generations_completed stays 0) in real executions.

## Issue Context
The new unit tests for `JSONSummaryReporter` pass explicit `start_time/end_time/completed_generations`, so they don’t exercise the real producer of these values (`GeneticAlgorithm`).

## Fix Focus Areas
- krkn_ai/algorithm/genetic.py[120-190]
- krkn_ai/algorithm/genetic.py[260-280]
- krkn_ai/algorithm/genetic.py[682-700]
- krkn_ai/reporter/json_summary_reporter.py[66-70]

## Implementation notes
- Set `self.completed_generations` when a generation completes (e.g., after evaluation, or when stopping).
- Set `self.end_time` when stopping is triggered (ideally timezone-aware and consistent with `self.start_time`).
- Consider setting `self.seed = self.config.seed` (or pass `config.seed`) so seed is recorded.
- Add an integration-style test that runs a minimal/mocked `GeneticAlgorithm.simulate()` then `save()` and asserts `results.json` has non-zero duration and correct generations_completed.

ⓘ Copy this prompt and use it to remediate the issue with your preferred AI generation tools


summary = reporter.generate_summary()

assert summary["run_id"] == "test-run"
assert summary["seed"] == 123
assert summary["start_time"] == now.isoformat()
assert summary["duration_seconds"] == 100.0

assert summary["config"]["generations"] == minimal_config.generations
assert summary["config"]["population_size"] == minimal_config.population_size

assert summary["summary"]["total_scenarios_executed"] == 6
assert summary["summary"]["best_fitness_score"] == 60.0
assert summary["summary"]["average_fitness_score"] == 30.0
assert summary["summary"]["generations_completed"] == 2

assert len(summary["fitness_progression"]) == 2
assert summary["fitness_progression"][0]["average"] == 10.0
assert summary["fitness_progression"][0]["best"] == 20.0
assert summary["fitness_progression"][1]["average"] == 50.0
assert summary["fitness_progression"][1]["best"] == 60.0

Comment on lines 59 to 79

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Action required

1. generate_summary assertions incomplete 📎 Requirement gap ✓ Correctness

• The test_generate_summary_content test asserts only a small subset of the required
  generate_summary() output (e.g., run_id, duration_seconds, a couple summary stats, and a few
  progression values).
• This does not validate required run metadata (e.g., seed, timestamps), config summary, and the
  broader statistics/schema expectations, so regressions to results.json structure or key computed
  fields could still pass CI.
• As written, the test suite does not meet the checklist’s requirement to assert the unified summary
  dictionary’s required keys and correct values.
Agent Prompt
## Issue description
`generate_summary()` unit tests do not validate the required output schema and key computed fields described in the compliance checklist.

## Issue Context
Downstream consumers rely on `results.json` schema stability. Partial assertions (only a few keys) can allow breaking changes to pass CI.

## Fix Focus Areas
- tests/unit/reporter/test_json_summary_reporter.py[34-66]

ⓘ Copy this prompt and use it to remediate the issue with your preferred AI generation tools

def test_best_scenarios_ranking(self, minimal_config):
"""Test ranking logic and top 10 truncation"""
now = datetime.datetime.now(datetime.timezone.utc)
cc = minimal_config.cluster_components
scenario = DummyScenario(cluster_components=cc)

pop = self._create_results(0, 0, 15, scenario, now)

reporter = JSONSummaryReporter(
run_uuid="test",
config=minimal_config,
seen_population=pop,
best_of_generation=[],
)

best = reporter.generate_summary()["best_scenarios"]
assert len(best) == 10

for i in range(10):
item = best[i]
assert item["rank"] == i + 1
expected_score = float(140 - (i * 10))
assert item["fitness_score"] == expected_score
assert "scenario_id" in item
assert "generation" in item
assert "scenario_type" in item
assert "parameters" in item

def test_edge_cases(self, minimal_config):
"""Test single generation and zero fitness cases"""
now = datetime.datetime.now(datetime.timezone.utc)
cc = minimal_config.cluster_components
scenario = DummyScenario(cluster_components=cc)

gen0 = self._create_results(0, 50, 2, scenario, now)
reporter = JSONSummaryReporter(
run_uuid="single",
config=minimal_config,
seen_population=gen0,
best_of_generation=[gen0[1]],
completed_generations=1
)
summary = reporter.generate_summary()
assert len(summary["fitness_progression"]) == 1
assert summary["fitness_progression"][0]["best"] == 60.0

res_zero = CommandRunResult(
generation_id=0,
scenario_id=999,
scenario=scenario,
cmd="test",
log="test",
returncode=0,
start_time=now,
end_time=now,
fitness_result=FitnessResult(fitness_score=0.0),
)
reporter = JSONSummaryReporter(
run_uuid="zero",
config=minimal_config,
seen_population={999: res_zero},
best_of_generation=[],
)
summary = reporter.generate_summary()
assert summary["summary"]["best_fitness_score"] == 0.0
assert summary["summary"]["average_fitness_score"] == 0.0

Comment on lines 95 to 146

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Action required

2. Top-10 ranking checks partial 📎 Requirement gap ✓ Correctness

• The best-scenarios test checks only the list length and the first entry’s rank/fitness, but it
  does not validate full sorting, rank increments, truncation correctness across all returned entries,
  or required fields for each item.
• This leaves gaps where incorrect ordering, missing fields, or wrong ranks for entries 2–10 would
  not be caught.
Agent Prompt
## Issue description
`_build_best_scenarios()` behavior (sorting/ranking/truncation and required fields) is only partially validated.

## Issue Context
Incorrect ordering or missing fields in `best_scenarios` can break downstream consumers even if the top item looks correct.

## Fix Focus Areas
- tests/unit/reporter/test_json_summary_reporter.py[67-86]

ⓘ Copy this prompt and use it to remediate the issue with your preferred AI generation tools

def test_empty_population(self, minimal_config):
"""Test summary behavior with no results"""
reporter = JSONSummaryReporter(
run_uuid="empty",
config=minimal_config,
seen_population={},
best_of_generation=[],
)
summary = reporter.generate_summary()
assert summary["summary"]["total_scenarios_executed"] == 0
assert summary["best_scenarios"] == []
assert summary["summary"]["best_fitness_score"] == 0.0
assert summary["summary"]["average_fitness_score"] == 0.0

def test_save_json_consistency(self, minimal_config, temp_output_dir):
"""Test that save method output matches generated summary"""
now = datetime.datetime.now(datetime.timezone.utc)
cc = minimal_config.cluster_components
scenario = DummyScenario(cluster_components=cc)
pop = self._create_results(0, 10, 1, scenario, now)

Comment on lines 147 to 167

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Action required

3. Missing minimal/zero-fitness edge cases 📎 Requirement gap ⛯ Reliability

• The tests include an empty population case, but do not cover the other required edge cases:
  single-generation best_of_generation and explicit zero-fitness scenarios.
• Without these cases, failures or malformed output in minimal/degenerate runs can slip through CI.
Agent Prompt
## Issue description
Required edge cases (single generation and zero fitness) are not covered.

## Issue Context
Minimal or degenerate runs are common in testing and early stopping; output must remain stable and error-free.

## Fix Focus Areas
- tests/unit/reporter/test_json_summary_reporter.py[87-97]

ⓘ Copy this prompt and use it to remediate the issue with your preferred AI generation tools

reporter = JSONSummaryReporter(
run_uuid="save-test",
config=minimal_config,
seen_population=pop,
best_of_generation=[],
)

expected_summary = reporter.generate_summary()
reporter.save(temp_output_dir)

path = os.path.join(temp_output_dir, "results.json")
assert os.path.exists(path)
with open(path, "r") as f:
saved_content = json.load(f)
assert saved_content == expected_summary