From 17f813d45dc25cb5e0c548c2a03bb9e13f5fbaad Mon Sep 17 00:00:00 2001 From: SaridakisStamatisChristos <34583142+SaridakisStamatisChristos@users.noreply.github.com> Date: Wed, 29 Oct 2025 15:18:09 +0200 Subject: [PATCH] Add predictive/telemetry config parsing and tests --- CMakeLists.txt | 6 + README.md | 60 +- docs/configuration.md | 85 +++ docs/metrics-endpoints.md | 6 +- docs/runbooks/patcher-attestation-alert.md | 2 +- docs/runbooks/sensor-failure.md | 2 +- docs/sandbox-workflow.md | 2 +- include/thermal/simd/config_parser.h | 1 + include/thermal/simd/thermal_config.h | 19 +- src/config_parser.c | 20 + src/thermal_config.c | 765 ++++++++++++++++++++- src/thermal_simd.c | 30 +- src/third_party/jsmn.c | 183 +++++ src/third_party/jsmn.h | 71 ++ tests/config/test_runtime_config_cli.c | 178 +++++ 15 files changed, 1393 insertions(+), 37 deletions(-) create mode 100644 docs/configuration.md create mode 100644 src/third_party/jsmn.c create mode 100644 src/third_party/jsmn.h create mode 100644 tests/config/test_runtime_config_cli.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 6ac3612..6434a1a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,6 +21,7 @@ set(THERMAL_SIMD_CORE_SOURCES src/config/runtime_flags.c src/logging.c src/config_parser.c + src/third_party/jsmn.c src/statistics.c src/runtime_metrics.c src/health_check.c @@ -112,6 +113,11 @@ if(BUILD_TESTING) target_compile_options(test_logging_metrics PRIVATE -Wall -Wextra -pthread) add_test(NAME logging_metrics COMMAND test_logging_metrics) + add_executable(test_runtime_config_cli tests/config/test_runtime_config_cli.c) + target_link_libraries(test_runtime_config_cli PRIVATE thermal_simd_core_tests m) + target_compile_options(test_runtime_config_cli PRIVATE -Wall -Wextra) + add_test(NAME runtime_config_cli COMMAND test_runtime_config_cli) + add_executable(test_thermal_simd tests/test_thermal_simd.c src/thermal_simd.c) diff --git a/README.md b/README.md index c311e41..4effced 100644 --- a/README.md +++ b/README.md @@ -32,29 +32,43 @@ cmake --build build --config Release -j ``` ## Flags -- `--interval=MS` check interval (default 50) -- `--down-count=N` throttles before downgrade (default 3) -- `--up-count=N` stable intervals before upgrade (default 5) -- `--down-ratio=R` throttle threshold as CPI multiple (default 1.5) -- `--cooldown-down=MS` cooldown after downgrade (default 1000) -- `--cooldown-up=MS` cooldown after upgrade (default 2000) -- `--min-dwell=MS` minimum time per SIMD width (default 200) -- `--no-avx512` disable AVX‑512 usage -- `--duration-sec=S` runtime duration for demo (default 10) -- `--work-iters=N` inner work iterations per tick (default 10,000,000) -- `--degraded-timeout-sec=S` fail closed if hardware counters remain unavailable for S seconds (default 120) -- `--health-check` run diagnostics (perf counters, telemetry, trampolines) and exit with status -- `--log-level=LEVEL` set log verbosity (`error`, `warn`, `info`, `debug`; default `info`) -- `--temp-ceiling=°C` predictive controller ceiling (default 92) -- `--safety-margin=°C` guard band below ceiling for upgrades (default 4) -- `--emergency-margin=°C` triggers scalar emergency fallback (default 10) -- `--telemetry-interval=MS` collector interval (default 50) -- `--telemetry-max-skew=MS` allowable skew between collectors (default 15) -- `--telemetry-ewma` CPI EWMA alpha (default 0.25) -- `--metrics-port=PORT` Prometheus endpoint port (default 9753) -- `--metrics-basic-auth=user:pass` enable basic auth for metrics -- `--metrics-cert/--metrics-key` enable TLS for metrics endpoint -- `--statsd-host/--statsd-port` send metrics to StatsD +- `--config=FILE` load overrides from a JSON file (see [configuration docs](docs/configuration.md)). +- `--interval=MS` check interval (default 50). +- `--down-count=N` throttles before downgrade (default 3). +- `--up-count=N` stable intervals before upgrade (default 5). +- `--down-ratio=R` throttle threshold as CPI multiple (default 1.5). +- `--cooldown-down=MS` cooldown after downgrade (default 1000). +- `--cooldown-up=MS` cooldown after upgrade (default 2000). +- `--min-dwell=MS` minimum time per SIMD width (default 200). +- `--no-avx512` disable AVX‑512 usage. +- `--duration-sec=S` runtime duration for demo (default 10). +- `--work-iters=N` inner work iterations per tick (default 10,000,000). +- `--degraded-timeout-sec=S` fail closed if hardware counters remain unavailable for S seconds (default 120). +- `--log-level=LEVEL` set log verbosity (`error`, `warn`, `info`, `debug`; default `info`). +- `--health-check` run diagnostics (perf counters, telemetry, trampolines) and exit with status. + +**Predictive controller** +- `--temp-ceiling=°C` predictive controller ceiling (default 92). +- `--safety-margin=°C` guard band below the ceiling for upgrades (default 4). +- `--emergency-margin=°C` additional buffer that triggers scalar fallback (default 10). +- `--predictive-alpha=A` CPI EWMA alpha in the predictive path (default 0.25). +- `--coeff-path=PATH` ARX coefficient bundle (default `config/controller_coeffs.json`). + +**Telemetry fusion** +- `--telemetry-interval=MS` collector interval (default 50). +- `--telemetry-max-skew=MS` allowable skew between collectors (default 150). +- `--telemetry-ewma=A` telemetry CPI EWMA alpha (default 0.25). +- `--telemetry-profile=PATH` optional telemetry profile manifest. + +**Metrics & observability** +- `--metrics-port=PORT` Prometheus endpoint port (default 9464, `0` disables). +- `--metrics-bind=ADDR` bind address (default `127.0.0.1`). +- `--metrics-cert=PATH` / `--metrics-key=PATH` enable TLS for the metrics endpoint. +- `--metrics-ca=PATH` optional client CA bundle when using mutual TLS. +- `--metrics-require-client-auth` enforce mutual TLS for `/metrics` and `/healthz`. +- `--metrics-basic-auth=user:pass` enable HTTP basic authentication. +- `--statsd-host=HOST` emit StatsD metrics to the given host (disabled by default). +- `--statsd-port=PORT` StatsD UDP port (default 8125). Environment override: - `TSD_LOG_LEVEL` mirrors `--log-level` for non-interactive deployments. diff --git a/docs/configuration.md b/docs/configuration.md new file mode 100644 index 0000000..4a8dfcf --- /dev/null +++ b/docs/configuration.md @@ -0,0 +1,85 @@ +# Runtime Configuration + +The dispatcher exposes a small command-line interface and an optional JSON configuration +file to tailor predictive control, telemetry fusion, and observability. CLI flags take +precedence over values loaded from the JSON file. + +## Configuration file + +Pass `--config=/path/to/runtime.json` to load overrides. The file supports the following +structure: + +```json +{ + "predictive": { + "coeff_path": "config/controller_coeffs.json", + "temp_ceiling_c": 92, + "safety_margin_c": 4, + "emergency_margin_c": 10, + "alpha": 0.25 + }, + "telemetry": { + "profile": "config/telemetry/profile.json", + "interval_ms": 50, + "max_skew_ms": 150, + "ewma": 0.25 + }, + "metrics": { + "bind_address": "127.0.0.1", + "port": 9464, + "tls": { + "certificate": "config/certs/dispatcher.crt", + "private_key": "config/certs/dispatcher.key", + "client_ca": "config/certs/ca.crt", + "require_client_auth": false + }, + "basic_auth": { + "username": "metrics", + "password": "change-me" + }, + "statsd": { + "host": "127.0.0.1", + "port": 8125 + } + } +} +``` + +All sections are optional—omitted values fall back to the compiled defaults documented +below. The `predictive.coeff_path` defaults to the bundled +`config/controller_coeffs.json` generated alongside the build. + +## Key options + +| Area | Flag / JSON key | Description | Default | +| ---- | ---------------- | ----------- | ------- | +| Predictive | `--temp-ceiling` / `predictive.temp_ceiling_c` | Controller temperature ceiling in °C. | 92 | +| Predictive | `--safety-margin` / `predictive.safety_margin_c` | Guard band below the ceiling before upgrades. | 4 | +| Predictive | `--emergency-margin` / `predictive.emergency_margin_c` | Additional buffer that forces scalar fallback. | 10 | +| Predictive | `--predictive-alpha` / `predictive.alpha` | CPI EWMA alpha for the predictive controller. | 0.25 | +| Predictive | `--coeff-path` / `predictive.coeff_path` | ARX coefficient bundle path. | `config/controller_coeffs.json` | +| Telemetry | `--telemetry-interval` / `telemetry.interval_ms` | Telemetry fusion poll interval (ms). | 50 | +| Telemetry | `--telemetry-max-skew` / `telemetry.max_skew_ms` | Maximum allowed skew between collectors (ms). | 150 | +| Telemetry | `--telemetry-ewma` / `telemetry.ewma` | Telemetry CPI EWMA alpha. | 0.25 | +| Telemetry | `--telemetry-profile` / `telemetry.profile` | Optional telemetry profile manifest. | *(unset)* | +| Metrics | `--metrics-port` / `metrics.port` | Prometheus listen port (`0` disables). | 9464 | +| Metrics | `--metrics-bind` / `metrics.bind_address` | Listen address. | `127.0.0.1` | +| Metrics | `--metrics-cert` / `metrics.tls.certificate` | TLS certificate (PEM). | *(unset)* | +| Metrics | `--metrics-key` / `metrics.tls.private_key` | TLS private key (PEM). | *(unset)* | +| Metrics | `--metrics-ca` / `metrics.tls.client_ca` | Optional client CA bundle for mTLS. | *(unset)* | +| Metrics | `--metrics-require-client-auth` / `metrics.tls.require_client_auth` | Enforce client certificates. | `false` | +| Metrics | `--metrics-basic-auth` / `metrics.basic_auth.{username,password}` | HTTP basic auth credentials. | *(unset)* | +| Metrics | `--statsd-host` / `metrics.statsd.host` | StatsD target host. | *(unset)* | +| Metrics | `--statsd-port` / `metrics.statsd.port` | StatsD UDP port. | 8125 | + +## Validation rules + +- TLS requires both certificate and private key paths. Supplying `--metrics-require-client-auth` + (or setting `metrics.tls.require_client_auth`) also requires a client CA bundle. +- Basic authentication requires both username and password. +- StatsD is enabled only when both host and port are set. +- Telemetry intervals and skews must remain between 10 ms and 60,000 ms. +- Predictive margins must fall between 0 °C and 60 °C. + +Invalid combinations terminate the process with a descriptive log entry so that +misconfigurations are caught during startup. diff --git a/docs/metrics-endpoints.md b/docs/metrics-endpoints.md index fb316d6..c194ed4 100644 --- a/docs/metrics-endpoints.md +++ b/docs/metrics-endpoints.md @@ -6,7 +6,7 @@ The dispatcher exports metrics and health data via a multi-channel strategy tail - **In-process registry:** `metrics/registry.c` tracks counters, gauges, and histograms. All subsystems register metrics during initialization. - **Snapshot API:** `metrics/snapshot.h` exposes `metrics_snapshot_collect()` which produces a read-only view of the current values. - **Exporters:** - - **Prometheus text endpoint** on `localhost:9753/metrics` (optional TLS with `--metrics-cert` / `--metrics-key`). +- **Prometheus text endpoint** on `localhost:9464/metrics` (TLS enabled via `--metrics-cert` / `--metrics-key`). - **StatsD UDP exporter** (disabled by default) configured via `--statsd-host` and `--statsd-port`. - **Structured logs** that emit metric deltas under `event=metrics_flush` for environments without scrape support. @@ -37,9 +37,11 @@ The dispatcher exports metrics and health data via a multi-channel strategy tail ### Configuration Flags | Flag | Description | Default | | --- | --- | --- | -| `--metrics-port` | Listen port for HTTP endpoint. | 9753 | +| `--metrics-port` | Listen port for HTTP endpoint. | 9464 | | `--metrics-addr` | Bind address. | `127.0.0.1` | | `--metrics-cert` / `--metrics-key` | Enable TLS for Prometheus endpoint. | Disabled | +| `--metrics-ca` | Client CA bundle for mutual TLS. | Disabled | +| `--metrics-require-client-auth` | Enforce mTLS for `/metrics` and `/healthz`. | Disabled | | `--metrics-basic-auth` | `user:pass` credentials for basic auth. | None | | `--statsd-host` | StatsD host for UDP export. | Disabled | | `--statsd-port` | StatsD port. | 8125 | diff --git a/docs/runbooks/patcher-attestation-alert.md b/docs/runbooks/patcher-attestation-alert.md index afcaee3..3d1d6e3 100644 --- a/docs/runbooks/patcher-attestation-alert.md +++ b/docs/runbooks/patcher-attestation-alert.md @@ -12,7 +12,7 @@ The security attestation service flagged the dispatcher patcher subsystem due to 1. **Confirm Alert Context** ```bash kubectl logs | grep attestation | tail - curl -s http://:9753/metrics | egrep 'attestation|patch_failures_total' + curl -s http://:9464/metrics | egrep 'attestation|patch_failures_total' ``` 2. **Check Dispatcher State** ```bash diff --git a/docs/runbooks/sensor-failure.md b/docs/runbooks/sensor-failure.md index 7391d3d..b108fde 100644 --- a/docs/runbooks/sensor-failure.md +++ b/docs/runbooks/sensor-failure.md @@ -12,7 +12,7 @@ A mandatory telemetry sensor (perf counters, MSR temperature, or frequency sourc 1. **Confirm Scope** ```bash kubectl logs | grep telemetry_sensor | tail - curl -s http://:9753/metrics | grep telemetry_degraded_total + curl -s http://:9464/metrics | grep telemetry_degraded_total ``` 2. **Force Health Check** ```bash diff --git a/docs/sandbox-workflow.md b/docs/sandbox-workflow.md index 06172ee..9029662 100644 --- a/docs/sandbox-workflow.md +++ b/docs/sandbox-workflow.md @@ -39,7 +39,7 @@ This workflow describes how to exercise the dispatcher in a non-production sandb ## Workflow Details - The runner starts the dispatcher container with `--health-check` followed by a steady-state workload phase. - Telemetry fuzzer attaches over a Unix domain socket exposed by the dispatcher (`/run/tsd/telemetry.sock`). -- Metrics probe scrapes `localhost:9753` and writes results to `artifacts/metrics.ndjson`. +- Metrics probe scrapes `localhost:9464` and writes results to `artifacts/metrics.ndjson`. - Sandbox artifacts (logs, metrics, telemetry traces) land under `artifacts/YYYYmmdd-HHMMSS/` for upload to CI. ## Scenarios diff --git a/include/thermal/simd/config_parser.h b/include/thermal/simd/config_parser.h index fd50ea1..e720a54 100644 --- a/include/thermal/simd/config_parser.h +++ b/include/thermal/simd/config_parser.h @@ -7,6 +7,7 @@ int tsd_parse_int_option(const char *value, long min, long max, int *out); int tsd_parse_ms_option(const char *value, int min_ms, int max_ms, int *out_us); int tsd_parse_ratio_option(const char *value, double min, double max, double *ratio_out, uint64_t *scaled_out); +int tsd_parse_double_option(const char *value, double min, double max, double *out); int tsd_compute_ticks_from_ms(int interval_us, int ms, int *out_ticks, long long *raw_ticks_out); #endif diff --git a/include/thermal/simd/thermal_config.h b/include/thermal/simd/thermal_config.h index 3d3d655..e76c3a1 100644 --- a/include/thermal/simd/thermal_config.h +++ b/include/thermal/simd/thermal_config.h @@ -35,6 +35,23 @@ typedef struct { int metrics_enabled; int metrics_port; char metrics_bind_host[64]; + char metrics_tls_cert_path[256]; + char metrics_tls_key_path[256]; + char metrics_tls_ca_path[256]; + int metrics_tls_require_client_auth; + char metrics_basic_auth_user[128]; + char metrics_basic_auth_pass[128]; + char statsd_host[128]; + int statsd_port; + int telemetry_interval_ms; + int telemetry_max_skew_ms; + double telemetry_ewma_alpha; + char telemetry_profile_path[256]; + int predictive_temp_ceiling_c; + int predictive_safety_margin_c; + int predictive_emergency_margin_c; + double predictive_alpha; + char predictive_coeff_path[256]; tsd_log_level_t log_level; tsd_policy_config policy; } tsd_runtime_config; @@ -47,10 +64,8 @@ void tsd_runtime_config_enter_degraded_mode(tsd_runtime_config *cfg, const char void tsd_runtime_config_exit_degraded_mode(tsd_runtime_config *cfg, const char *reason); int tsd_runtime_config_is_degraded(void); -#ifndef TSD_ENABLE_TESTS int tsd_runtime_config_parse_cli(tsd_runtime_config *cfg, int argc, char **argv); void tsd_runtime_config_print_usage(const char *prog); -#endif #ifdef __cplusplus } diff --git a/src/config_parser.c b/src/config_parser.c index 833a054..755efdd 100644 --- a/src/config_parser.c +++ b/src/config_parser.c @@ -63,6 +63,26 @@ int tsd_parse_ratio_option(const char *value, double min, double max, double *ra return 0; } +int tsd_parse_double_option(const char *value, double min, double max, double *out) { + if (!value || !out) { + return -1; + } + errno = 0; + char *end = NULL; + double parsed = strtod(value, &end); + if (errno != 0 || end == value || *end != '\0') { + return -1; + } + if (isnan(parsed) || isinf(parsed)) { + return -1; + } + if (parsed < min || parsed > max) { + return -1; + } + *out = parsed; + return 0; +} + int tsd_compute_ticks_from_ms(int interval_us, int ms, int *out_ticks, long long *raw_ticks_out) { if (raw_ticks_out) { *raw_ticks_out = -1; diff --git a/src/thermal_config.c b/src/thermal_config.c index 1fd91be..e941346 100644 --- a/src/thermal_config.c +++ b/src/thermal_config.c @@ -5,14 +5,29 @@ #include #include #include +#include #include #include #include +#include "third_party/jsmn.h" + #include #define LOG_COMPONENT "config" +#define TSD_MIN_TEMP_C 20 +#define TSD_MAX_TEMP_C 125 +#define TSD_MIN_MARGIN_C 0 +#define TSD_MAX_MARGIN_C 60 +#define TSD_MIN_TELEMETRY_INTERVAL_MS 10 +#define TSD_MAX_TELEMETRY_INTERVAL_MS 60000 +#define TSD_MIN_TELEMETRY_SKEW_MS 0 +#define TSD_MAX_TELEMETRY_SKEW_MS 60000 + +static void die_invalid_option(const char *option, const char *value); +static void die_config_error(const char *path, const char *message); + static const tsd_policy_config k_default_policy_config = { .slo_ratio_milli = 1500, .slo_temp_millic = 85000, @@ -21,6 +36,12 @@ static const tsd_policy_config k_default_policy_config = { .forecast_horizon = 5, }; +#ifdef TSD_DEFAULT_COEFF_PATH +#define TSD_COEFF_PATH_DEFAULT TSD_DEFAULT_COEFF_PATH +#else +#define TSD_COEFF_PATH_DEFAULT "config/controller_coeffs.json" +#endif + static const tsd_runtime_config k_default_config = { .check_interval_us = 50000, .down_count = 3, @@ -46,6 +67,23 @@ static const tsd_runtime_config k_default_config = { .metrics_enabled = 1, .metrics_port = 9464, .metrics_bind_host = "127.0.0.1", + .metrics_tls_cert_path = "", + .metrics_tls_key_path = "", + .metrics_tls_ca_path = "", + .metrics_tls_require_client_auth = 0, + .metrics_basic_auth_user = "", + .metrics_basic_auth_pass = "", + .statsd_host = "", + .statsd_port = 8125, + .telemetry_interval_ms = 50, + .telemetry_max_skew_ms = 150, + .telemetry_ewma_alpha = 0.25, + .telemetry_profile_path = "", + .predictive_temp_ceiling_c = 92, + .predictive_safety_margin_c = 4, + .predictive_emergency_margin_c = 10, + .predictive_alpha = 0.25, + .predictive_coeff_path = TSD_COEFF_PATH_DEFAULT, .log_level = TSD_LOG_LEVEL_INFO, .policy = {0}, }; @@ -67,6 +105,584 @@ void tsd_runtime_config_set_defaults(tsd_runtime_config *cfg) { memset(&g_tsd_degraded_backup, 0, sizeof(g_tsd_degraded_backup)); } +static int copy_string_field(char *dest, size_t dest_size, const char *value) { + if (!dest || dest_size == 0 || !value) { + return -1; + } + size_t len = strlen(value); + if (len >= dest_size) { + return -1; + } + memcpy(dest, value, len); + dest[len] = '\0'; + return 0; +} + +static int json_token_equals(const char *json, const jsmntok_t *tok, const char *text) { + if (!json || !tok || !text) { + return 0; + } + if (tok->type != JSMN_STRING) { + return 0; + } + size_t len = (size_t)(tok->end - tok->start); + return strlen(text) == len && strncmp(json + tok->start, text, len) == 0; +} + +static int json_skip(const jsmntok_t *tokens, int token_count, int index) { + if (!tokens || index < 0 || index >= token_count) { + return index; + } + jsmntype_t type = tokens[index].type; + if (type == JSMN_PRIMITIVE || type == JSMN_STRING || type == JSMN_UNDEFINED) { + return index + 1; + } + int cursor = index + 1; + while (cursor < token_count && tokens[cursor].parent >= index) { + cursor = json_skip(tokens, token_count, cursor); + } + return cursor; +} + +static int json_token_to_string(const char *json, const jsmntok_t *tok, char *dest, size_t dest_size) { + if (!json || !tok || !dest || dest_size == 0) { + return -1; + } + if (tok->type != JSMN_STRING) { + return -1; + } + size_t len = (size_t)(tok->end - tok->start); + if (len >= dest_size) { + return -1; + } + memcpy(dest, json + tok->start, len); + dest[len] = '\0'; + return 0; +} + +static int json_token_to_int(const char *json, const jsmntok_t *tok, long min, long max, int *out) { + if (!json || !tok || !out) { + return -1; + } + if (tok->type != JSMN_PRIMITIVE) { + return -1; + } + size_t len = (size_t)(tok->end - tok->start); + if (len == 0 || len >= 64) { + return -1; + } + char buffer[64]; + memcpy(buffer, json + tok->start, len); + buffer[len] = '\0'; + errno = 0; + char *end = NULL; + long value = strtol(buffer, &end, 10); + if (errno != 0 || end == buffer || *end != '\0') { + return -1; + } + if (value < min || value > max) { + return -1; + } + *out = (int)value; + return 0; +} + +static int json_token_to_double(const char *json, const jsmntok_t *tok, double min, double max, double *out) { + if (!json || !tok || !out) { + return -1; + } + if (tok->type != JSMN_PRIMITIVE) { + return -1; + } + size_t len = (size_t)(tok->end - tok->start); + if (len == 0 || len >= 64) { + return -1; + } + char buffer[64]; + memcpy(buffer, json + tok->start, len); + buffer[len] = '\0'; + errno = 0; + char *end = NULL; + double value = strtod(buffer, &end); + if (errno != 0 || end == buffer || *end != '\0') { + return -1; + } + if (isnan(value) || isinf(value)) { + return -1; + } + if (value < min || value > max) { + return -1; + } + *out = value; + return 0; +} + +static int json_token_to_bool(const char *json, const jsmntok_t *tok, int *out) { + if (!json || !tok || !out) { + return -1; + } + if (tok->type != JSMN_PRIMITIVE) { + return -1; + } + size_t len = (size_t)(tok->end - tok->start); + const char *ptr = json + tok->start; + if (len == 4 && strncmp(ptr, "true", 4) == 0) { + *out = 1; + return 0; + } + if (len == 5 && strncmp(ptr, "false", 5) == 0) { + *out = 0; + return 0; + } + if (len == 1 && (ptr[0] == '0' || ptr[0] == '1')) { + *out = (ptr[0] == '1'); + return 0; + } + return -1; +} + +static int parse_metrics_tls_object(const char *json, const jsmntok_t *tokens, int token_count, int index, + tsd_runtime_config *cfg) { + if (!json || !tokens || !cfg) { + return -1; + } + const jsmntok_t *obj = &tokens[index]; + if (obj->type != JSMN_OBJECT) { + return json_skip(tokens, token_count, index); + } + int cursor = index + 1; + int end = json_skip(tokens, token_count, index); + while (cursor < end) { + const jsmntok_t *key = &tokens[cursor]; + if (key->type != JSMN_STRING) { + return -1; + } + cursor++; + int value_index = cursor; + int next = json_skip(tokens, token_count, value_index); + if (json_token_equals(json, key, "certificate")) { + if (json_token_to_string(json, &tokens[value_index], cfg->metrics_tls_cert_path, + sizeof(cfg->metrics_tls_cert_path)) != 0) { + return -1; + } + } else if (json_token_equals(json, key, "private_key")) { + if (json_token_to_string(json, &tokens[value_index], cfg->metrics_tls_key_path, + sizeof(cfg->metrics_tls_key_path)) != 0) { + return -1; + } + } else if (json_token_equals(json, key, "client_ca")) { + if (json_token_to_string(json, &tokens[value_index], cfg->metrics_tls_ca_path, + sizeof(cfg->metrics_tls_ca_path)) != 0) { + return -1; + } + } else if (json_token_equals(json, key, "require_client_auth")) { + if (json_token_to_bool(json, &tokens[value_index], &cfg->metrics_tls_require_client_auth) != 0) { + return -1; + } + } + cursor = next; + } + return end; +} + +static int parse_metrics_basic_auth_object(const char *json, const jsmntok_t *tokens, int token_count, int index, + tsd_runtime_config *cfg) { + if (!json || !tokens || !cfg) { + return -1; + } + const jsmntok_t *obj = &tokens[index]; + if (obj->type != JSMN_OBJECT) { + return json_skip(tokens, token_count, index); + } + int cursor = index + 1; + int end = json_skip(tokens, token_count, index); + while (cursor < end) { + const jsmntok_t *key = &tokens[cursor]; + if (key->type != JSMN_STRING) { + return -1; + } + cursor++; + int value_index = cursor; + int next = json_skip(tokens, token_count, value_index); + if (json_token_equals(json, key, "username")) { + if (json_token_to_string(json, &tokens[value_index], cfg->metrics_basic_auth_user, + sizeof(cfg->metrics_basic_auth_user)) != 0) { + return -1; + } + } else if (json_token_equals(json, key, "password")) { + if (json_token_to_string(json, &tokens[value_index], cfg->metrics_basic_auth_pass, + sizeof(cfg->metrics_basic_auth_pass)) != 0) { + return -1; + } + } + cursor = next; + } + return end; +} + +static int parse_metrics_statsd_object(const char *json, const jsmntok_t *tokens, int token_count, int index, + tsd_runtime_config *cfg) { + if (!json || !tokens || !cfg) { + return -1; + } + const jsmntok_t *obj = &tokens[index]; + if (obj->type != JSMN_OBJECT) { + return json_skip(tokens, token_count, index); + } + int cursor = index + 1; + int end = json_skip(tokens, token_count, index); + while (cursor < end) { + const jsmntok_t *key = &tokens[cursor]; + if (key->type != JSMN_STRING) { + return -1; + } + cursor++; + int value_index = cursor; + int next = json_skip(tokens, token_count, value_index); + if (json_token_equals(json, key, "host")) { + if (json_token_to_string(json, &tokens[value_index], cfg->statsd_host, + sizeof(cfg->statsd_host)) != 0) { + return -1; + } + } else if (json_token_equals(json, key, "port")) { + if (json_token_to_int(json, &tokens[value_index], 1, 65535, &cfg->statsd_port) != 0) { + return -1; + } + } + cursor = next; + } + return end; +} + +static int parse_metrics_object(const char *json, const jsmntok_t *tokens, int token_count, int index, + tsd_runtime_config *cfg) { + if (!json || !tokens || !cfg) { + return -1; + } + const jsmntok_t *obj = &tokens[index]; + if (obj->type != JSMN_OBJECT) { + return json_skip(tokens, token_count, index); + } + int cursor = index + 1; + int end = json_skip(tokens, token_count, index); + while (cursor < end) { + const jsmntok_t *key = &tokens[cursor]; + if (key->type != JSMN_STRING) { + return -1; + } + cursor++; + int value_index = cursor; + int next = json_skip(tokens, token_count, value_index); + if (json_token_equals(json, key, "bind_address")) { + if (json_token_to_string(json, &tokens[value_index], cfg->metrics_bind_host, + sizeof(cfg->metrics_bind_host)) != 0) { + return -1; + } + } else if (json_token_equals(json, key, "port")) { + if (json_token_to_int(json, &tokens[value_index], 0, 65535, &cfg->metrics_port) != 0) { + return -1; + } + cfg->metrics_enabled = cfg->metrics_port > 0; + } else if (json_token_equals(json, key, "tls")) { + int nested_end = parse_metrics_tls_object(json, tokens, token_count, value_index, cfg); + if (nested_end < 0) { + return -1; + } + cursor = nested_end; + continue; + } else if (json_token_equals(json, key, "basic_auth")) { + int nested_end = parse_metrics_basic_auth_object(json, tokens, token_count, value_index, cfg); + if (nested_end < 0) { + return -1; + } + cursor = nested_end; + continue; + } else if (json_token_equals(json, key, "statsd")) { + int nested_end = parse_metrics_statsd_object(json, tokens, token_count, value_index, cfg); + if (nested_end < 0) { + return -1; + } + cursor = nested_end; + continue; + } + cursor = next; + } + return end; +} + +static int parse_predictive_object(const char *json, const jsmntok_t *tokens, int token_count, int index, + tsd_runtime_config *cfg) { + if (!json || !tokens || !cfg) { + return -1; + } + const jsmntok_t *obj = &tokens[index]; + if (obj->type != JSMN_OBJECT) { + return json_skip(tokens, token_count, index); + } + int cursor = index + 1; + int end = json_skip(tokens, token_count, index); + while (cursor < end) { + const jsmntok_t *key = &tokens[cursor]; + if (key->type != JSMN_STRING) { + return -1; + } + cursor++; + int value_index = cursor; + int next = json_skip(tokens, token_count, value_index); + if (json_token_equals(json, key, "coeff_path")) { + if (json_token_to_string(json, &tokens[value_index], cfg->predictive_coeff_path, + sizeof(cfg->predictive_coeff_path)) != 0) { + return -1; + } + } else if (json_token_equals(json, key, "temp_ceiling_c")) { + if (json_token_to_int(json, &tokens[value_index], TSD_MIN_TEMP_C, TSD_MAX_TEMP_C, + &cfg->predictive_temp_ceiling_c) != 0) { + return -1; + } + } else if (json_token_equals(json, key, "safety_margin_c")) { + if (json_token_to_int(json, &tokens[value_index], TSD_MIN_MARGIN_C, TSD_MAX_MARGIN_C, + &cfg->predictive_safety_margin_c) != 0) { + return -1; + } + } else if (json_token_equals(json, key, "emergency_margin_c")) { + if (json_token_to_int(json, &tokens[value_index], TSD_MIN_MARGIN_C, TSD_MAX_MARGIN_C, + &cfg->predictive_emergency_margin_c) != 0) { + return -1; + } + } else if (json_token_equals(json, key, "alpha")) { + double alpha = 0.0; + if (json_token_to_double(json, &tokens[value_index], 0.0, 1.0, &alpha) != 0) { + return -1; + } + cfg->predictive_alpha = alpha; + } + cursor = next; + } + return end; +} + +static int parse_telemetry_object(const char *json, const jsmntok_t *tokens, int token_count, int index, + tsd_runtime_config *cfg) { + if (!json || !tokens || !cfg) { + return -1; + } + const jsmntok_t *obj = &tokens[index]; + if (obj->type != JSMN_OBJECT) { + return json_skip(tokens, token_count, index); + } + int cursor = index + 1; + int end = json_skip(tokens, token_count, index); + while (cursor < end) { + const jsmntok_t *key = &tokens[cursor]; + if (key->type != JSMN_STRING) { + return -1; + } + cursor++; + int value_index = cursor; + int next = json_skip(tokens, token_count, value_index); + if (json_token_equals(json, key, "profile")) { + if (json_token_to_string(json, &tokens[value_index], cfg->telemetry_profile_path, + sizeof(cfg->telemetry_profile_path)) != 0) { + return -1; + } + } else if (json_token_equals(json, key, "interval_ms")) { + if (json_token_to_int(json, &tokens[value_index], TSD_MIN_TELEMETRY_INTERVAL_MS, + TSD_MAX_TELEMETRY_INTERVAL_MS, &cfg->telemetry_interval_ms) != 0) { + return -1; + } + } else if (json_token_equals(json, key, "max_skew_ms")) { + if (json_token_to_int(json, &tokens[value_index], TSD_MIN_TELEMETRY_SKEW_MS, + TSD_MAX_TELEMETRY_SKEW_MS, &cfg->telemetry_max_skew_ms) != 0) { + return -1; + } + } else if (json_token_equals(json, key, "ewma")) { + double ewma = 0.0; + if (json_token_to_double(json, &tokens[value_index], 0.0, 1.0, &ewma) != 0) { + return -1; + } + cfg->telemetry_ewma_alpha = ewma; + } + cursor = next; + } + return end; +} + +static int parse_config_file(tsd_runtime_config *cfg, const char *path) { + if (!cfg || !path) { + return -1; + } + FILE *fp = fopen(path, "rb"); + if (!fp) { + tsd_log_error(LOG_COMPONENT, "Failed to open config file '%s': %s", path, strerror(errno)); + return -1; + } + if (fseek(fp, 0, SEEK_END) != 0) { + tsd_log_error(LOG_COMPONENT, "Failed to seek config file '%s'", path); + fclose(fp); + return -1; + } + long length = ftell(fp); + if (length < 0) { + tsd_log_error(LOG_COMPONENT, "Failed to read config file length for '%s'", path); + fclose(fp); + return -1; + } + if (fseek(fp, 0, SEEK_SET) != 0) { + tsd_log_error(LOG_COMPONENT, "Failed to rewind config file '%s'", path); + fclose(fp); + return -1; + } + char *buffer = (char *)malloc((size_t)length + 1); + if (!buffer) { + tsd_log_error(LOG_COMPONENT, "Out of memory while reading '%s'", path); + fclose(fp); + return -1; + } + size_t read = fread(buffer, 1, (size_t)length, fp); + fclose(fp); + if (read != (size_t)length) { + free(buffer); + tsd_log_error(LOG_COMPONENT, "Failed to read config file '%s'", path); + return -1; + } + buffer[length] = '\0'; + + size_t token_capacity = 128; + jsmntok_t *tokens = NULL; + int parsed = JSMN_ERROR_NOMEM; + jsmn_parser parser; + while (1) { + tokens = (jsmntok_t *)calloc(token_capacity, sizeof(jsmntok_t)); + if (!tokens) { + free(buffer); + tsd_log_error(LOG_COMPONENT, "Out of memory while parsing '%s'", path); + return -1; + } + jsmn_init(&parser); + parsed = jsmn_parse(&parser, buffer, (size_t)length, tokens, (unsigned int)token_capacity); + if (parsed == JSMN_ERROR_NOMEM) { + free(tokens); + token_capacity *= 2; + continue; + } + break; + } + if (parsed < 0) { + free(tokens); + free(buffer); + tsd_log_error(LOG_COMPONENT, "Invalid JSON in config file '%s'", path); + return -1; + } + if (parsed == 0 || tokens[0].type != JSMN_OBJECT) { + free(tokens); + free(buffer); + tsd_log_error(LOG_COMPONENT, "Top-level JSON in '%s' must be an object", path); + return -1; + } + + int cursor = 1; + for (int pair = 0; pair < tokens[0].size; ++pair) { + const jsmntok_t *key = &tokens[cursor]; + if (key->type != JSMN_STRING) { + free(tokens); + free(buffer); + tsd_log_error(LOG_COMPONENT, "Invalid key in config file '%s'", path); + return -1; + } + cursor++; + int value_index = cursor; + if (json_token_equals(buffer, key, "predictive")) { + int next = parse_predictive_object(buffer, tokens, parsed, value_index, cfg); + if (next < 0) { + free(tokens); + free(buffer); + return -1; + } + cursor = next; + continue; + } + if (json_token_equals(buffer, key, "telemetry")) { + int next = parse_telemetry_object(buffer, tokens, parsed, value_index, cfg); + if (next < 0) { + free(tokens); + free(buffer); + return -1; + } + cursor = next; + continue; + } + if (json_token_equals(buffer, key, "metrics")) { + int next = parse_metrics_object(buffer, tokens, parsed, value_index, cfg); + if (next < 0) { + free(tokens); + free(buffer); + return -1; + } + cursor = next; + continue; + } + cursor = json_skip(tokens, parsed, value_index); + } + + free(tokens); + free(buffer); + return 0; +} + +static int validate_runtime_config(tsd_runtime_config *cfg) { + if (!cfg) { + return -1; + } + if ((cfg->metrics_tls_cert_path[0] != '\0') != (cfg->metrics_tls_key_path[0] != '\0')) { + tsd_log_error(LOG_COMPONENT, "Both --metrics-cert and --metrics-key must be provided together"); + return -1; + } + if (cfg->metrics_tls_require_client_auth && cfg->metrics_tls_ca_path[0] == '\0') { + tsd_log_error(LOG_COMPONENT, "Client auth requires --metrics-ca to be set"); + return -1; + } + if ((cfg->metrics_basic_auth_user[0] != '\0') != (cfg->metrics_basic_auth_pass[0] != '\0')) { + tsd_log_error(LOG_COMPONENT, "--metrics-basic-auth requires both username and password"); + return -1; + } + if (cfg->statsd_host[0] == '\0') { + cfg->statsd_port = 0; + } + if (cfg->telemetry_interval_ms < TSD_MIN_TELEMETRY_INTERVAL_MS || + cfg->telemetry_interval_ms > TSD_MAX_TELEMETRY_INTERVAL_MS) { + tsd_log_error(LOG_COMPONENT, "Telemetry interval must be between %d and %d ms", + TSD_MIN_TELEMETRY_INTERVAL_MS, TSD_MAX_TELEMETRY_INTERVAL_MS); + return -1; + } + if (cfg->telemetry_max_skew_ms < TSD_MIN_TELEMETRY_SKEW_MS || + cfg->telemetry_max_skew_ms > TSD_MAX_TELEMETRY_SKEW_MS) { + tsd_log_error(LOG_COMPONENT, "Telemetry max skew must be between %d and %d ms", + TSD_MIN_TELEMETRY_SKEW_MS, TSD_MAX_TELEMETRY_SKEW_MS); + return -1; + } + if (cfg->predictive_temp_ceiling_c < TSD_MIN_TEMP_C || cfg->predictive_temp_ceiling_c > TSD_MAX_TEMP_C) { + tsd_log_error(LOG_COMPONENT, "Temp ceiling must be between %d and %d Celsius", TSD_MIN_TEMP_C, TSD_MAX_TEMP_C); + return -1; + } + if (cfg->predictive_safety_margin_c < TSD_MIN_MARGIN_C || cfg->predictive_safety_margin_c > TSD_MAX_MARGIN_C) { + tsd_log_error(LOG_COMPONENT, "Safety margin must be between %d and %d Celsius", TSD_MIN_MARGIN_C, TSD_MAX_MARGIN_C); + return -1; + } + if (cfg->predictive_emergency_margin_c < TSD_MIN_MARGIN_C || + cfg->predictive_emergency_margin_c > TSD_MAX_MARGIN_C) { + tsd_log_error(LOG_COMPONENT, + "Emergency margin must be between %d and %d Celsius", + TSD_MIN_MARGIN_C, + TSD_MAX_MARGIN_C); + return -1; + } + if (cfg->predictive_alpha < 0.0 || cfg->predictive_alpha > 1.0) { + tsd_log_error(LOG_COMPONENT, "Predictive alpha must be between 0.0 and 1.0"); + return -1; + } + return 0; +} + static void log_policy_change(const tsd_runtime_config *cfg, const char *reason, const char *state) { if (!cfg) { return; @@ -177,15 +793,23 @@ int tsd_runtime_config_refresh_ticks(tsd_runtime_config *cfg) { return 0; } -#ifndef TSD_ENABLE_TESTS static void die_invalid_option(const char *option, const char *value) { tsd_log_error(LOG_COMPONENT, "Invalid value for %s: '%s'", option, value ? value : ""); exit(1); } +static void die_config_error(const char *path, const char *message) { + tsd_log_error(LOG_COMPONENT, + "Configuration error in %s: %s", + path ? path : "(unknown)", + message ? message : "unknown"); + exit(1); +} + void tsd_runtime_config_print_usage(const char *prog) { printf("Usage: %s [OPTIONS]\n", prog); printf("Options:\n"); + printf(" --config=FILE Load configuration overrides from JSON file\n"); printf(" --interval=MS Check interval in milliseconds (default: 50)\n"); printf(" --down-count=N Throttle events before downgrade (default: 3)\n"); printf(" --up-count=N Stable events before upgrade (default: 5)\n"); @@ -203,8 +827,32 @@ void tsd_runtime_config_print_usage(const char *prog) { printf(" --work-iters=N Inner work iterations per second (default: 10000000)\n"); printf(" --metrics-port=P Prometheus metrics port (0 to disable, default: %d)\n", k_default_config.metrics_port); printf(" --metrics-bind=ADDR Metrics bind address (default: %s)\n", k_default_config.metrics_bind_host); + printf(" --metrics-cert=PATH Enable TLS for metrics endpoint (requires --metrics-key)\n"); + printf(" --metrics-key=PATH TLS private key for metrics endpoint\n"); + printf(" --metrics-ca=PATH Optional client CA bundle for metrics TLS\n"); + printf(" --metrics-require-client-auth Enforce mutual TLS for metrics\n"); + printf(" --metrics-basic-auth=user:pass Protect metrics endpoint with basic auth\n"); + printf(" --statsd-host=HOST Enable StatsD export to HOST (default: disabled)\n"); + printf(" --statsd-port=PORT StatsD UDP port (default: %d)\n", k_default_config.statsd_port); printf(" --degraded-timeout-sec=S Fail closed if hardware counters missing for S seconds (default: %d)\n", k_default_config.degraded_timeout_sec); + printf(" --temp-ceiling=C Predictive controller temperature ceiling (default: %d)\n", + k_default_config.predictive_temp_ceiling_c); + printf(" --safety-margin=C Predictive safety margin below ceiling (default: %d)\n", + k_default_config.predictive_safety_margin_c); + printf(" --emergency-margin=C Predictive emergency margin (default: %d)\n", + k_default_config.predictive_emergency_margin_c); + printf(" --predictive-alpha=A Predictive CPI EWMA alpha [0-1] (default: %.2f)\n", + k_default_config.predictive_alpha); + printf(" --coeff-path=PATH Controller coefficients JSON (default: %s)\n", + k_default_config.predictive_coeff_path); + printf(" --telemetry-interval=MS Telemetry fusion interval (default: %d)\n", + k_default_config.telemetry_interval_ms); + printf(" --telemetry-max-skew=MS Telemetry freshness window (default: %d)\n", + k_default_config.telemetry_max_skew_ms); + printf(" --telemetry-ewma=A Telemetry EWMA alpha [0-1] (default: %.2f)\n", + k_default_config.telemetry_ewma_alpha); + printf(" --telemetry-profile=PATH Telemetry profile manifest\n"); printf(" --policy-slo-ratio=R Predictive policy CPI target ratio (default: %.3f)\n", (double)k_default_policy_config.slo_ratio_milli / 1000.0); printf(" --policy-slo-temp=C Predictive policy package temperature target in Celsius (default: %.1f)\n", @@ -240,7 +888,15 @@ int tsd_runtime_config_parse_cli(tsd_runtime_config *cfg, int argc, char **argv) } for (int i = 1; i < argc; i++) { - if (!strncmp(argv[i], "--interval=", 11)) { + if (!strncmp(argv[i], "--config=", 9)) { + const char *path = argv[i] + 9; + if (path[0] == '\0') { + die_invalid_option("--config", path); + } + if (parse_config_file(cfg, path) != 0) { + die_config_error(path, "failed to load configuration"); + } + } else if (!strncmp(argv[i], "--interval=", 11)) { if (tsd_parse_ms_option(argv[i] + 11, 1, 10000, &cfg->check_interval_us) != 0) { die_invalid_option("--interval", argv[i] + 11); } @@ -303,15 +959,109 @@ int tsd_runtime_config_parse_cli(tsd_runtime_config *cfg, int argc, char **argv) cfg->metrics_enabled = cfg->metrics_port > 0; } else if (!strncmp(argv[i], "--metrics-bind=", 15)) { const char *value = argv[i] + 15; - size_t len = strlen(value); - if (len == 0 || len >= sizeof(cfg->metrics_bind_host)) { + if (value[0] == '\0' || + copy_string_field(cfg->metrics_bind_host, sizeof(cfg->metrics_bind_host), value) != 0) { die_invalid_option("--metrics-bind", value); } - snprintf(cfg->metrics_bind_host, sizeof(cfg->metrics_bind_host), "%s", value); + } else if (!strncmp(argv[i], "--metrics-cert=", 15)) { + const char *value = argv[i] + 15; + if (value[0] == '\0' || + copy_string_field(cfg->metrics_tls_cert_path, sizeof(cfg->metrics_tls_cert_path), value) != 0) { + die_invalid_option("--metrics-cert", value); + } + } else if (!strncmp(argv[i], "--metrics-key=", 14)) { + const char *value = argv[i] + 14; + if (value[0] == '\0' || + copy_string_field(cfg->metrics_tls_key_path, sizeof(cfg->metrics_tls_key_path), value) != 0) { + die_invalid_option("--metrics-key", value); + } + } else if (!strncmp(argv[i], "--metrics-ca=", 13)) { + const char *value = argv[i] + 13; + if (value[0] == '\0' || + copy_string_field(cfg->metrics_tls_ca_path, sizeof(cfg->metrics_tls_ca_path), value) != 0) { + die_invalid_option("--metrics-ca", value); + } + } else if (!strcmp(argv[i], "--metrics-require-client-auth")) { + cfg->metrics_tls_require_client_auth = 1; + } else if (!strncmp(argv[i], "--metrics-basic-auth=", 21)) { + const char *value = argv[i] + 21; + const char *sep = strchr(value, ':'); + if (!sep || sep == value || sep[1] == '\0') { + die_invalid_option("--metrics-basic-auth", value); + } + size_t user_len = (size_t)(sep - value); + size_t pass_len = strlen(sep + 1); + if (user_len >= sizeof(cfg->metrics_basic_auth_user) || + pass_len >= sizeof(cfg->metrics_basic_auth_pass)) { + die_invalid_option("--metrics-basic-auth", value); + } + memcpy(cfg->metrics_basic_auth_user, value, user_len); + cfg->metrics_basic_auth_user[user_len] = '\0'; + memcpy(cfg->metrics_basic_auth_pass, sep + 1, pass_len); + cfg->metrics_basic_auth_pass[pass_len] = '\0'; + } else if (!strncmp(argv[i], "--statsd-host=", 14)) { + const char *value = argv[i] + 14; + if (value[0] == '\0' || + copy_string_field(cfg->statsd_host, sizeof(cfg->statsd_host), value) != 0) { + die_invalid_option("--statsd-host", value); + } + } else if (!strncmp(argv[i], "--statsd-port=", 14)) { + if (tsd_parse_int_option(argv[i] + 14, 1, 65535, &cfg->statsd_port) != 0) { + die_invalid_option("--statsd-port", argv[i] + 14); + } } else if (!strncmp(argv[i], "--degraded-timeout-sec=", 23)) { if (tsd_parse_int_option(argv[i] + 23, 1, 86400, &cfg->degraded_timeout_sec) != 0) { die_invalid_option("--degraded-timeout-sec", argv[i] + 23); } + } else if (!strncmp(argv[i], "--temp-ceiling=", 15)) { + if (tsd_parse_int_option(argv[i] + 15, TSD_MIN_TEMP_C, TSD_MAX_TEMP_C, + &cfg->predictive_temp_ceiling_c) != 0) { + die_invalid_option("--temp-ceiling", argv[i] + 15); + } + } else if (!strncmp(argv[i], "--safety-margin=", 16)) { + if (tsd_parse_int_option(argv[i] + 16, TSD_MIN_MARGIN_C, TSD_MAX_MARGIN_C, + &cfg->predictive_safety_margin_c) != 0) { + die_invalid_option("--safety-margin", argv[i] + 16); + } + } else if (!strncmp(argv[i], "--emergency-margin=", 19)) { + if (tsd_parse_int_option(argv[i] + 19, TSD_MIN_MARGIN_C, TSD_MAX_MARGIN_C, + &cfg->predictive_emergency_margin_c) != 0) { + die_invalid_option("--emergency-margin", argv[i] + 19); + } + } else if (!strncmp(argv[i], "--predictive-alpha=", 19)) { + double alpha = 0.0; + if (tsd_parse_double_option(argv[i] + 19, 0.0, 1.0, &alpha) != 0) { + die_invalid_option("--predictive-alpha", argv[i] + 19); + } + cfg->predictive_alpha = alpha; + } else if (!strncmp(argv[i], "--telemetry-interval=", 21)) { + if (tsd_parse_int_option(argv[i] + 21, TSD_MIN_TELEMETRY_INTERVAL_MS, TSD_MAX_TELEMETRY_INTERVAL_MS, + &cfg->telemetry_interval_ms) != 0) { + die_invalid_option("--telemetry-interval", argv[i] + 21); + } + } else if (!strncmp(argv[i], "--telemetry-max-skew=", 21)) { + if (tsd_parse_int_option(argv[i] + 21, TSD_MIN_TELEMETRY_SKEW_MS, TSD_MAX_TELEMETRY_SKEW_MS, + &cfg->telemetry_max_skew_ms) != 0) { + die_invalid_option("--telemetry-max-skew", argv[i] + 21); + } + } else if (!strncmp(argv[i], "--telemetry-ewma=", 17)) { + double ewma = 0.0; + if (tsd_parse_double_option(argv[i] + 17, 0.0, 1.0, &ewma) != 0) { + die_invalid_option("--telemetry-ewma", argv[i] + 17); + } + cfg->telemetry_ewma_alpha = ewma; + } else if (!strncmp(argv[i], "--telemetry-profile=", 20)) { + const char *value = argv[i] + 20; + if (value[0] == '\0' || + copy_string_field(cfg->telemetry_profile_path, sizeof(cfg->telemetry_profile_path), value) != 0) { + die_invalid_option("--telemetry-profile", value); + } + } else if (!strncmp(argv[i], "--coeff-path=", 13)) { + const char *value = argv[i] + 13; + if (value[0] == '\0' || + copy_string_field(cfg->predictive_coeff_path, sizeof(cfg->predictive_coeff_path), value) != 0) { + die_invalid_option("--coeff-path", value); + } } else if (!strncmp(argv[i], "--policy-slo-ratio=", 19)) { double ratio = 0.0; uint64_t scaled = 0; @@ -373,6 +1123,10 @@ int tsd_runtime_config_parse_cli(tsd_runtime_config *cfg, int argc, char **argv) snprintf(cfg->metrics_bind_host, sizeof(cfg->metrics_bind_host), "%s", k_default_config.metrics_bind_host); } + if (validate_runtime_config(cfg) != 0) { + exit(1); + } + cfg->down_ratio_milli = (uint64_t)(cfg->down_ratio * 1000.0 + 0.5); if (cfg->down_ratio_milli == 0) { cfg->down_ratio_milli = 1; @@ -388,4 +1142,3 @@ int tsd_runtime_config_parse_cli(tsd_runtime_config *cfg, int argc, char **argv) tsd_log_set_level(cfg->log_level); return 0; } -#endif diff --git a/src/thermal_simd.c b/src/thermal_simd.c index cd2f7d8..a87fe22 100644 --- a/src/thermal_simd.c +++ b/src/thermal_simd.c @@ -387,7 +387,35 @@ int tsd_dispatcher_main(int argc, char **argv) { int metrics_started = 0; if (!tsd_runtime_flags_sandbox_only() && g_tsd_config.metrics_enabled && g_tsd_config.metrics_port > 0) { - if (tsd_metrics_exporter_start(g_tsd_config.metrics_bind_host, (uint16_t)g_tsd_config.metrics_port) == 0) { + tsd_metrics_exporter_config_t exporter_cfg = {0}; + exporter_cfg.bind_address = g_tsd_config.metrics_bind_host; + exporter_cfg.port = (uint16_t)g_tsd_config.metrics_port; + + tsd_metrics_tls_config_t tls_cfg = {0}; + if (g_tsd_config.metrics_tls_cert_path[0] != '\0' && g_tsd_config.metrics_tls_key_path[0] != '\0') { + tls_cfg.certificate_path = g_tsd_config.metrics_tls_cert_path; + tls_cfg.private_key_path = g_tsd_config.metrics_tls_key_path; + if (g_tsd_config.metrics_tls_ca_path[0] != '\0') { + tls_cfg.ca_certificate_path = g_tsd_config.metrics_tls_ca_path; + } + tls_cfg.require_client_auth = g_tsd_config.metrics_tls_require_client_auth; + exporter_cfg.tls = &tls_cfg; + } + + tsd_metrics_basic_auth_t auth_cfg = {0}; + if (g_tsd_config.metrics_basic_auth_user[0] != '\0' && + g_tsd_config.metrics_basic_auth_pass[0] != '\0') { + auth_cfg.username = g_tsd_config.metrics_basic_auth_user; + auth_cfg.password = g_tsd_config.metrics_basic_auth_pass; + exporter_cfg.basic_auth = &auth_cfg; + } + + if (g_tsd_config.statsd_host[0] != '\0' && g_tsd_config.statsd_port > 0) { + exporter_cfg.statsd_host = g_tsd_config.statsd_host; + exporter_cfg.statsd_port = (uint16_t)g_tsd_config.statsd_port; + } + + if (tsd_metrics_exporter_start_with_config(&exporter_cfg) == 0) { metrics_started = 1; uint16_t actual_port = tsd_metrics_exporter_listen_port(); tsd_log_info(LOG_COMPONENT, diff --git a/src/third_party/jsmn.c b/src/third_party/jsmn.c new file mode 100644 index 0000000..31bf4ec --- /dev/null +++ b/src/third_party/jsmn.c @@ -0,0 +1,183 @@ +#include "jsmn.h" +#include + +static jsmntok_t *jsmn_alloc_token(jsmn_parser *parser, jsmntok_t *tokens, size_t num_tokens) { + if (!parser) { + return NULL; + } + if (parser->toknext >= num_tokens) { + return NULL; + } + if (!tokens) { + return NULL; + } + jsmntok_t *tok = &tokens[parser->toknext++]; + tok->start = tok->end = -1; + tok->size = 0; +#ifdef JSMN_PARENT_LINKS + tok->parent = -1; +#endif + return tok; +} + +static void jsmn_fill_token(jsmntok_t *token, jsmntype_t type, int start, int end) { + if (!token) { + return; + } + token->type = type; + token->start = start; + token->end = end; + token->size = 0; +} + +static int jsmn_parse_primitive(jsmn_parser *parser, const char *js, size_t len, jsmntok_t *tokens, size_t num_tokens) { + int start = (int)parser->pos; + for (; parser->pos < len; parser->pos++) { + char c = js[parser->pos]; + if (c == '\t' || c == '\r' || c == '\n' || c == ' ' || c == ',' || c == ']' || c == '}') { + jsmntok_t *token = jsmn_alloc_token(parser, tokens, num_tokens); + if (!token) { + parser->pos = start; + return JSMN_ERROR_NOMEM; + } + jsmn_fill_token(token, JSMN_PRIMITIVE, start, (int)parser->pos); +#ifdef JSMN_PARENT_LINKS + token->parent = parser->toksuper; +#endif + parser->pos--; + return 0; + } + if (c < 32 || c >= 127) { + parser->pos = start; + return JSMN_ERROR_INVAL; + } + } + parser->pos = start; + return JSMN_ERROR_PART; +} + +static int jsmn_parse_string(jsmn_parser *parser, const char *js, size_t len, jsmntok_t *tokens, size_t num_tokens) { + int start = (int)parser->pos; + parser->pos++; + + for (; parser->pos < len; parser->pos++) { + char c = js[parser->pos]; + if (c == '"') { + jsmntok_t *token = jsmn_alloc_token(parser, tokens, num_tokens); + if (!token) { + parser->pos = start; + return JSMN_ERROR_NOMEM; + } + jsmn_fill_token(token, JSMN_STRING, start + 1, (int)parser->pos); +#ifdef JSMN_PARENT_LINKS + token->parent = parser->toksuper; +#endif + return 0; + } + if (c == '\\' && parser->pos + 1 < len) { + parser->pos++; + char esc = js[parser->pos]; + if (esc == '"' || esc == '/' || esc == '\\' || esc == 'b' || esc == 'f' || esc == 'r' || esc == 'n' || esc == 't') { + continue; + } + if (esc == 'u' && parser->pos + 4 < len) { + parser->pos += 4; + continue; + } + parser->pos = start; + return JSMN_ERROR_INVAL; + } + } + parser->pos = start; + return JSMN_ERROR_PART; +} + +void jsmn_init(jsmn_parser *parser) { + if (!parser) { + return; + } + parser->pos = 0; + parser->toknext = 0; + parser->toksuper = -1; +} + +int jsmn_parse(jsmn_parser *parser, const char *js, size_t len, jsmntok_t *tokens, unsigned int num_tokens) { + int count = parser ? (int)parser->toknext : 0; + if (!parser || !js) { + return JSMN_ERROR_INVAL; + } + + for (; parser->pos < len; parser->pos++) { + char c = js[parser->pos]; + jsmntok_t *token; + switch (c) { + case '{': + case '[': + count++; + token = jsmn_alloc_token(parser, tokens, num_tokens); + if (!token) { + return JSMN_ERROR_NOMEM; + } + token->type = (c == '{') ? JSMN_OBJECT : JSMN_ARRAY; + token->start = (int)parser->pos; +#ifdef JSMN_PARENT_LINKS + token->parent = parser->toksuper; +#endif + parser->toksuper = (int)(parser->toknext - 1); + break; + case '}': + case ']': + if (!tokens) { + break; + } + int type = (c == '}') ? JSMN_OBJECT : JSMN_ARRAY; + for (int i = (int)parser->toknext - 1; i >= 0; i--) { + token = &tokens[i]; + if (token->start != -1 && token->end == -1) { + if (token->type != type) { + return JSMN_ERROR_INVAL; + } + token->end = (int)parser->pos + 1; + parser->toksuper = token->parent; + break; + } + } + break; + case '"': + count++; + if (jsmn_parse_string(parser, js, len, tokens, num_tokens) < 0) { + return JSMN_ERROR_PART; + } + if (parser->toksuper != -1 && tokens) { + tokens[parser->toksuper].size++; + } + break; + case '\t': + case '\r': + case '\n': + case ' ': + case ':': + case ',': + break; + default: + count++; + if (jsmn_parse_primitive(parser, js, len, tokens, num_tokens) < 0) { + return JSMN_ERROR_PART; + } + if (parser->toksuper != -1 && tokens) { + tokens[parser->toksuper].size++; + } + break; + } + } + + if (tokens) { + for (int i = (int)parser->toknext - 1; i >= 0; i--) { + if (tokens[i].start != -1 && tokens[i].end == -1) { + return JSMN_ERROR_PART; + } + } + } + + return count; +} diff --git a/src/third_party/jsmn.h b/src/third_party/jsmn.h new file mode 100644 index 0000000..d095443 --- /dev/null +++ b/src/third_party/jsmn.h @@ -0,0 +1,71 @@ +/* + * JSMN (Jasmine) - a minimalistic JSON parser in C + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef JSMN_H +#define JSMN_H + +#include + +#define JSMN_PARENT_LINKS + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { + JSMN_UNDEFINED = 0, + JSMN_OBJECT = 1, + JSMN_ARRAY = 2, + JSMN_STRING = 3, + JSMN_PRIMITIVE = 4 +} jsmntype_t; + +typedef enum { + JSMN_ERROR_NOMEM = -1, + JSMN_ERROR_INVAL = -2, + JSMN_ERROR_PART = -3 +} jsmnerr_t; + +typedef struct { + jsmntype_t type; + int start; + int end; + int size; +#ifdef JSMN_PARENT_LINKS + int parent; +#endif +} jsmntok_t; + +typedef struct { + unsigned int pos; + unsigned int toknext; + int toksuper; +} jsmn_parser; + +void jsmn_init(jsmn_parser *parser); +int jsmn_parse(jsmn_parser *parser, const char *js, size_t len, jsmntok_t *tokens, unsigned int num_tokens); + +#ifdef __cplusplus +} +#endif + +#endif /* JSMN_H */ diff --git a/tests/config/test_runtime_config_cli.c b/tests/config/test_runtime_config_cli.c new file mode 100644 index 0000000..74bc64a --- /dev/null +++ b/tests/config/test_runtime_config_cli.c @@ -0,0 +1,178 @@ +#include +#include +#include +#include +#include +#include + +#include + +static void write_temp_file(const char *contents, char *path, size_t path_len) { + if (!contents || !path || path_len < 1) { + fprintf(stderr, "invalid arguments to write_temp_file\n"); + exit(1); + } + strncpy(path, "/tmp/tsd_cfgXXXXXX", path_len); + path[path_len - 1] = '\0'; + int fd = mkstemp(path); + if (fd < 0) { + perror("mkstemp"); + exit(1); + } + FILE *fp = fdopen(fd, "w"); + if (!fp) { + perror("fdopen"); + close(fd); + exit(1); + } + if (fputs(contents, fp) == EOF) { + perror("fputs"); + fclose(fp); + exit(1); + } + if (fclose(fp) != 0) { + perror("fclose"); + exit(1); + } +} + +static void test_cli_predictive_and_metrics(void) { + tsd_runtime_config cfg; + char *argv[] = { + "thermal_simd", + "--temp-ceiling=95", + "--safety-margin=6", + "--emergency-margin=15", + "--predictive-alpha=0.35", + "--coeff-path=/etc/tsd/coeff.json", + "--telemetry-interval=60", + "--telemetry-max-skew=180", + "--telemetry-ewma=0.4", + "--telemetry-profile=/etc/tsd/telemetry.json", + "--metrics-port=9200", + "--metrics-bind=0.0.0.0", + "--metrics-cert=/etc/tsd/cert.pem", + "--metrics-key=/etc/tsd/key.pem", + "--metrics-ca=/etc/tsd/ca.pem", + "--metrics-require-client-auth", + "--metrics-basic-auth=user:pass", + "--statsd-host=127.0.0.1", + "--statsd-port=9000", + }; + int argc = (int)(sizeof(argv) / sizeof(argv[0])); + assert(tsd_runtime_config_parse_cli(&cfg, argc, argv) == 0); + assert(cfg.predictive_temp_ceiling_c == 95); + assert(cfg.predictive_safety_margin_c == 6); + assert(cfg.predictive_emergency_margin_c == 15); + assert(fabs(cfg.predictive_alpha - 0.35) < 0.0001); + assert(strcmp(cfg.predictive_coeff_path, "/etc/tsd/coeff.json") == 0); + assert(cfg.telemetry_interval_ms == 60); + assert(cfg.telemetry_max_skew_ms == 180); + assert(fabs(cfg.telemetry_ewma_alpha - 0.4) < 0.0001); + assert(strcmp(cfg.telemetry_profile_path, "/etc/tsd/telemetry.json") == 0); + assert(cfg.metrics_port == 9200); + assert(strcmp(cfg.metrics_bind_host, "0.0.0.0") == 0); + assert(strcmp(cfg.metrics_tls_cert_path, "/etc/tsd/cert.pem") == 0); + assert(strcmp(cfg.metrics_tls_key_path, "/etc/tsd/key.pem") == 0); + assert(strcmp(cfg.metrics_tls_ca_path, "/etc/tsd/ca.pem") == 0); + assert(cfg.metrics_tls_require_client_auth == 1); + assert(strcmp(cfg.metrics_basic_auth_user, "user") == 0); + assert(strcmp(cfg.metrics_basic_auth_pass, "pass") == 0); + assert(strcmp(cfg.statsd_host, "127.0.0.1") == 0); + assert(cfg.statsd_port == 9000); +} + +static void test_config_file_loading(void) { + char path[64]; + const char *json = + "{\n" + " \"predictive\": {\n" + " \"coeff_path\": \"/opt/coeff.json\",\n" + " \"temp_ceiling_c\": 90,\n" + " \"safety_margin_c\": 5,\n" + " \"emergency_margin_c\": 12,\n" + " \"alpha\": 0.22\n" + " },\n" + " \"telemetry\": {\n" + " \"profile\": \"/opt/telemetry.json\",\n" + " \"interval_ms\": 80,\n" + " \"max_skew_ms\": 240,\n" + " \"ewma\": 0.3\n" + " },\n" + " \"metrics\": {\n" + " \"bind_address\": \"::\",\n" + " \"port\": 9100,\n" + " \"tls\": {\n" + " \"certificate\": \"/opt/cert.pem\",\n" + " \"private_key\": \"/opt/key.pem\"\n" + " },\n" + " \"basic_auth\": {\n" + " \"username\": \"cfguser\",\n" + " \"password\": \"cfgpass\"\n" + " },\n" + " \"statsd\": {\n" + " \"host\": \"statsd.local\",\n" + " \"port\": 8126\n" + " }\n" + " }\n" + "}\n"; + + write_temp_file(json, path, sizeof(path)); + + tsd_runtime_config cfg; + char config_arg[80]; + snprintf(config_arg, sizeof(config_arg), "--config=%s", path); + char *argv[] = {"thermal_simd", config_arg}; + assert(tsd_runtime_config_parse_cli(&cfg, 2, argv) == 0); + + assert(strcmp(cfg.predictive_coeff_path, "/opt/coeff.json") == 0); + assert(cfg.predictive_temp_ceiling_c == 90); + assert(cfg.predictive_safety_margin_c == 5); + assert(cfg.predictive_emergency_margin_c == 12); + assert(fabs(cfg.predictive_alpha - 0.22) < 0.0001); + assert(strcmp(cfg.telemetry_profile_path, "/opt/telemetry.json") == 0); + assert(cfg.telemetry_interval_ms == 80); + assert(cfg.telemetry_max_skew_ms == 240); + assert(fabs(cfg.telemetry_ewma_alpha - 0.3) < 0.0001); + assert(strcmp(cfg.metrics_bind_host, "::") == 0); + assert(cfg.metrics_port == 9100); + assert(strcmp(cfg.metrics_tls_cert_path, "/opt/cert.pem") == 0); + assert(strcmp(cfg.metrics_tls_key_path, "/opt/key.pem") == 0); + assert(strcmp(cfg.metrics_basic_auth_user, "cfguser") == 0); + assert(strcmp(cfg.metrics_basic_auth_pass, "cfgpass") == 0); + assert(strcmp(cfg.statsd_host, "statsd.local") == 0); + assert(cfg.statsd_port == 8126); + + unlink(path); +} + +static void test_cli_overrides_config(void) { + char path[64]; + const char *json = + "{\n" + " \"predictive\": { \"temp_ceiling_c\": 85 },\n" + " \"metrics\": { \"port\": 9300 }\n" + "}\n"; + write_temp_file(json, path, sizeof(path)); + + tsd_runtime_config cfg; + char config_arg[80]; + snprintf(config_arg, sizeof(config_arg), "--config=%s", path); + char *argv[] = { + "thermal_simd", + config_arg, + "--temp-ceiling=100", + "--metrics-port=9400" + }; + assert(tsd_runtime_config_parse_cli(&cfg, 4, argv) == 0); + assert(cfg.predictive_temp_ceiling_c == 100); + assert(cfg.metrics_port == 9400); + unlink(path); +} + +int main(void) { + test_cli_predictive_and_metrics(); + test_config_file_loading(); + test_cli_overrides_config(); + return 0; +}