Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 28 additions & 6 deletions cmd/run.go
Original file line number Diff line number Diff line change
@@ -1,22 +1,26 @@
package cmd

import (
"bytes"
"errors"
"fmt"
"io"
"log"
"os"
"strings"
"time"

"github.com/fatih/color"
"github.com/krkn-chaos/krknctl/pkg/config"
"github.com/krkn-chaos/krknctl/pkg/provider/factory"
"github.com/krkn-chaos/krknctl/pkg/provider/models"
"github.com/krkn-chaos/krknctl/pkg/resiliency"
"github.com/krkn-chaos/krknctl/pkg/scenarioorchestrator"
"github.com/krkn-chaos/krknctl/pkg/scenarioorchestrator/utils"
"github.com/krkn-chaos/krknctl/pkg/typing"
commonutils "github.com/krkn-chaos/krknctl/pkg/utils"
"github.com/spf13/cobra"
"github.com/spf13/pflag"
"log"
"os"
"strings"
"time"
)

// 🤖 Assisted with Claude Code (claude.ai/code)
Expand Down Expand Up @@ -303,6 +307,13 @@ func NewRunCommand(factory *factory.ProviderFactory, scenarioOrchestrator *scena
}

if !runDetached {

// Here we are using an io.MultiWriter to multiplex the container's stdout and stderr to both
// the terminal stdout and a bytes.Buffer. This allows us to capture the logs for parsing later.
// The container stdout and stderr will be written to both the terminal stdout and the bytes.Buffer.
var logBuf bytes.Buffer
mw := io.MultiWriter(os.Stdout, &logBuf)

commChan := make(chan *string)
go func() {
for msg := range commChan {
Expand All @@ -311,14 +322,25 @@ func NewRunCommand(factory *factory.ProviderFactory, scenarioOrchestrator *scena
spinner.Stop()
}()

_, err = (*scenarioOrchestrator).RunAttached(quayImageURI+":"+scenarioDetail.Name, containerName, environment, false, volumes, os.Stdout, os.Stderr, &commChan, conn, registrySettings)
_, err = (*scenarioOrchestrator).RunAttached(quayImageURI+":"+scenarioDetail.Name, containerName, environment, false, volumes, mw, mw, &commChan, conn, registrySettings)
if err != nil {
var staterr *utils.ExitError
if errors.As(err, &staterr) {
os.Exit(staterr.ExitStatus)
}
return err
}

// Parse resiliency report from captured logs and generate report
if rep, perr := resiliency.ParseResiliencyReport(logBuf.Bytes()); perr == nil {
if err := resiliency.GenerateAndWriteReport(
[]resiliency.DetailedScenarioReport{*rep},
"resiliency-report.json",
); err != nil {
log.Printf("Error generating resiliency report: %v", err)
}
}

scenarioDuration := time.Since(startTime)
fmt.Printf("%s ran for %s\n", scenarioDetail.Name, scenarioDuration.String())
} else {
Expand All @@ -343,7 +365,7 @@ func NewRunCommand(factory *factory.ProviderFactory, scenarioOrchestrator *scena
func printHelp(scenario models.ScenarioDetail) {
boldWhite := color.New(color.FgHiWhite, color.Bold).SprintFunc()
for _, f := range scenario.Fields {

enum := ""
if f.Type == typing.Enum {
enum = strings.Replace(*f.AllowedValues, *f.Separator, "|", -1)
Expand Down
25 changes: 19 additions & 6 deletions cmd/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,23 @@ import (
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"os"
"strings"
"time"

"github.com/briandowns/spinner"
"github.com/krkn-chaos/krknctl/pkg/config"
"github.com/krkn-chaos/krknctl/pkg/provider"
"github.com/krkn-chaos/krknctl/pkg/provider/factory"
"github.com/krkn-chaos/krknctl/pkg/provider/models"
"github.com/krkn-chaos/krknctl/pkg/resiliency"
orchestratorModels "github.com/krkn-chaos/krknctl/pkg/scenarioorchestrator/models"
"github.com/krkn-chaos/krknctl/pkg/typing"
"github.com/spf13/cobra"
"github.com/spf13/pflag"
"io"
"net/http"
"net/url"
"os"
"strings"
"time"
)

// 🤖 Assisted with Claude Code (claude.ai/code)
Expand Down Expand Up @@ -122,6 +124,17 @@ func ParseFlags(scenarioDetail *models.ScenarioDetail, args []string, scenarioCo
}

}

// Set RESILIENCY_ENABLED_MODE based on PROMETHEUS_URL using resiliency helper
if cfg, err := config.LoadConfig(); err == nil {
promURL := ""
if prom, ok := environment["PROMETHEUS_URL"]; ok {
promURL = prom.value
}
mode := resiliency.ComputeResiliencyMode(promURL, cfg)
environment[cfg.EnvResiliencyEnabledMode] = ParsedField{value: mode, secret: false}
}

return &environment, &volumes, nil
}

Expand Down
4 changes: 4 additions & 0 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ type Config struct {
GithubReleaseAPIDeprecated string `json:"github_release_api_deprecated"`
TableFieldMaxLength int `json:"table_field_max_length"`
TableMaxStepScenarioLength int `json:"table_max_step_scenario_length"`
ResiliencyEnabledMode string `json:"resiliency_enabled_mode"`
ResiliencyReportRegex string `json:"resiliency_report_regex"`
EnvResiliencyEnabledMode string `json:"env_resiliency_enabled_mode"`
EnvKrknAlertsYamlContent string `json:"env_krkn_alerts_yaml_content"`
}

//go:embed config.json
Expand Down
6 changes: 5 additions & 1 deletion pkg/config/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,14 @@
"env_private_registry_token": "KRKNCTL_PRIVATE_REGISTRY_TOKEN",
"env_private_registry_scenarios": "KRKNCTL_PRIVATE_REGISTRY_SCENARIOS",
"env_private_registry_insecure": "KRKNCTL_PRIVATE_REGISTRY_INSECURE",
"env_resiliency_enabled_mode": "RESILIENCY_ENABLED_MODE",
"env_krkn_alerts_yaml_content": "KRKN_ALERTS_YAML_CONTENT",
"github_latest_release": "https://github.com/krkn-chaos/krknctl/releases/latest",
"github_latest_release_api": "https://api.github.com/repos/krkn-chaos/krknctl/releases/latest",
"github_release_api": "https://api.github.com/repos/krkn-chaos/krknctl/releases/tags",
"github_release_api_deprecated": "[DEPRECATED]",
"table_field_max_length": 20,
"table_max_step_scenario_length": 7
"table_max_step_scenario_length": 7,
"resiliency_enabled_mode": "controller",
"resiliency_report_regex": "KRKN_RESILIENCY_REPORT_JSON:\\s*(\\{.*)"
}
241 changes: 241 additions & 0 deletions pkg/resiliency/resiliency.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,241 @@
package resiliency

// Package resiliency centralises all logic related to parsing and aggregating
// resiliency reports that are emitted by the krkn engine. A report is printed
// on a single log line prefixed by the token `KRKN_RESILIENCY_REPORT_JSON:`
// and contains a JSON payload describing the outcome of one(for krknctl) or more chaos
// scenarios executed by the engine.
//
// The goal of this package is to provide:
// 1. A small set of structs mirroring the JSON schema.
// 2. A parser capable of extracting and unmarshalling the payload from a
// raw block of log text.
// 3. Helper utilities to merge many individual reports into a single final
// summary which is later persisted to `resiliency-report.json` by the
// CLI.

import (
"encoding/json"
"errors"
"fmt"
"os"
"regexp"
"sync"

"github.com/krkn-chaos/krknctl/pkg/config"
)

// ----------------------------------------------------------------------------
// Data structures
// ----------------------------------------------------------------------------

type OverallResiliencyReport struct {
Scenarios map[string]float64 `json:"scenarios"`
ResiliencyScore float64 `json:"resiliency_score"`
PassedSlos int `json:"passed_slos"`
TotalSlos int `json:"total_slos"`
}

// ----------------------------------------------------------------------------

type DetailedScenarioReport struct {
OverallReport OverallResiliencyReport
ScenarioWeights map[string]float64 `json:"scenario_weights,omitempty"`
}

// ----------------------------------------------------------------------------

type FinalReport struct {
Scenarios map[string]float64 `json:"scenarios"`
ResiliencyScore float64 `json:"resiliency_score"`
PassedSlos int `json:"passed_slos"`
TotalSlos int `json:"total_slos"`
}

// ----------------------------------------------------------------------------
// Parser & Aggregator
// ----------------------------------------------------------------------------

var (
reportRegex *regexp.Regexp
regexOnce sync.Once
)

func getReportRegex() *regexp.Regexp {
regexOnce.Do(func() {
pattern := `KRKN_RESILIENCY_REPORT_JSON:\s*(\{.*)`
if cfg, err := config.LoadConfig(); err == nil && cfg.ResiliencyReportRegex != "" {
pattern = cfg.ResiliencyReportRegex
}
reportRegex = regexp.MustCompile(pattern)
})
return reportRegex
}

// ParseResiliencyReport searches the supplied log bytes for a line prefixed by
// the special token and, if found, attempts to unmarshal the trailing JSON into
// a DetailedScenarioReport.
func ParseResiliencyReport(logContent []byte) (*DetailedScenarioReport, error) {
match := getReportRegex().FindSubmatch(logContent)
if len(match) < 2 {
return nil, errors.New("resiliency report marker not found in logs")
}

raw := match[1]

var rep DetailedScenarioReport

// 1. Direct overall_resiliency_report at root.
type directRoot struct {
Overall OverallResiliencyReport `json:"overall_resiliency_report"`
}
var direct directRoot
if err := json.Unmarshal(raw, &direct); err == nil && direct.Overall.ResiliencyScore != 0 {
rep.OverallReport = direct.Overall
return &rep, nil
}

// 2. Nested under telemetry.overall_resiliency_report.
type telemetryRoot struct {
Telemetry struct {
Overall OverallResiliencyReport `json:"overall_resiliency_report"`
} `json:"telemetry"`
}
var telemetry telemetryRoot
if err := json.Unmarshal(raw, &telemetry); err == nil && telemetry.Telemetry.Overall.ResiliencyScore != 0 {
rep.OverallReport = telemetry.Telemetry.Overall
return &rep, nil
}

// 3. As a map of scenario scores at root with optional aggregate values.
type mapRoot struct {
Scenarios map[string]float64 `json:"scenarios"`
ResiliencyScore float64 `json:"resiliency_score"`
PassedSlos int `json:"passed_slos"`
TotalSlos int `json:"total_slos"`
}
var mRoot mapRoot
if err := json.Unmarshal(raw, &mRoot); err == nil && len(mRoot.Scenarios) > 0 {
rep.OverallReport = OverallResiliencyReport(mRoot)
return &rep, nil
}

// 4. scenarios as an array of objects with name+score.
type scenarioItem struct {
Name string `json:"name"`
Score float64 `json:"score"`
Breakdown struct {
Passed int `json:"passed"`
Failed int `json:"failed"`
} `json:"breakdown"`
}
type arrayRoot struct {
Scenarios []scenarioItem `json:"scenarios"`
}
var aRoot arrayRoot
if err := json.Unmarshal(raw, &aRoot); err == nil && len(aRoot.Scenarios) > 0 {
m := make(map[string]float64)
var total float64
var passed, totalSLOs int
for _, it := range aRoot.Scenarios {
m[it.Name] = it.Score
total += it.Score
passed += it.Breakdown.Passed
totalSLOs += it.Breakdown.Passed + it.Breakdown.Failed
}
avg := total / float64(len(aRoot.Scenarios))
rep.OverallReport = OverallResiliencyReport{
Scenarios: m,
ResiliencyScore: avg,
PassedSlos: passed,
TotalSlos: totalSLOs,
}
return &rep, nil
}

return nil, errors.New("unrecognised resiliency report json structure")
}

func AggregateReports(reports []DetailedScenarioReport) FinalReport {
final := FinalReport{
Scenarios: make(map[string]float64),
}

var weightedSum float64
var totalWeight float64

for _, rep := range reports {
for name, score := range rep.OverallReport.Scenarios {
final.Scenarios[name] = score

// Resolve weight – defaults to 1 when absent/invalid.
weight := 1.0
if rep.ScenarioWeights != nil {
if w, ok := rep.ScenarioWeights[name]; ok && w > 0 {
weight = w
}
}

weightedSum += score * weight
totalWeight += weight
}

// Aggregate SLO counters.
final.PassedSlos += rep.OverallReport.PassedSlos
final.TotalSlos += rep.OverallReport.TotalSlos
}

if totalWeight > 0 {
final.ResiliencyScore = weightedSum / totalWeight
} else {
final.ResiliencyScore = 100.0
}

return final
}

func WriteFinalReport(report FinalReport, path string) error {
return GenerateAndWriteReport([]DetailedScenarioReport{}, path)
}

// GenerateAndWriteReport generates a resiliency report and writes it to a file
func GenerateAndWriteReport(reports []DetailedScenarioReport, outputPath string) error {
final := AggregateReports(reports)

PrintHumanSummary(final)

type CombinedReport struct {
Summary FinalReport `json:"summary"`
Details []DetailedScenarioReport `json:"details"`
}

comb := CombinedReport{
Summary: final,
Details: reports,
}

data, err := json.MarshalIndent(comb, "", " ")
if err != nil {
return fmt.Errorf("failed to marshal report: %w", err)
}

if err := os.WriteFile(outputPath, data, 0o644); err != nil {
return fmt.Errorf("failed to write report to %s: %w", outputPath, err)
}

return nil
}

func PrintHumanSummary(report FinalReport) {
if data, err := json.MarshalIndent(report, "", " "); err == nil {
fmt.Printf("overall resiliency report summary:\n%s\n", string(data))
}
}

// ComputeResiliencyMode returns the value to set in RESILIENCY_ENABLED_MODE.
func ComputeResiliencyMode(prometheusURL string, cfg config.Config) string {
if prometheusURL != "" {
return cfg.ResiliencyEnabledMode
}
return "disabled"
}
Loading