swiss-ai
diff --git a/‎docs/loadtesting.md‎
Lines changed: 8 additions & 14 deletions b/‎docs/loadtesting.md‎
Lines changed: 8 additions & 14 deletions
diff --git a/‎src/swiss_ai_model_launch/assets/scenarios/decode.yaml‎
Lines changed: 0 additions & 9 deletions b/‎src/swiss_ai_model_launch/assets/scenarios/decode.yaml‎
Lines changed: 0 additions & 9 deletions
diff --git a/‎src/swiss_ai_model_launch/assets/scenarios/kv_stress.yaml‎
Lines changed: 0 additions & 17 deletions b/‎src/swiss_ai_model_launch/assets/scenarios/kv_stress.yaml‎
Lines changed: 0 additions & 17 deletions
diff --git a/‎src/swiss_ai_model_launch/assets/scenarios/realistic.yaml‎
Lines changed: 0 additions & 6 deletions b/‎src/swiss_ai_model_launch/assets/scenarios/realistic.yaml‎
Lines changed: 0 additions & 6 deletions
diff --git a/‎src/swiss_ai_model_launch/assets/scenarios/soak.yaml‎
Lines changed: 0 additions & 6 deletions b/‎src/swiss_ai_model_launch/assets/scenarios/soak.yaml‎
Lines changed: 0 additions & 6 deletions
diff --git a/‎src/swiss_ai_model_launch/assets/scenarios/spike.yaml‎
Lines changed: 0 additions & 19 deletions b/‎src/swiss_ai_model_launch/assets/scenarios/spike.yaml‎
Lines changed: 0 additions & 19 deletions
diff --git a/‎src/swiss_ai_model_launch/assets/scenarios/stress.yaml‎
Lines changed: 0 additions & 17 deletions b/‎src/swiss_ai_model_launch/assets/scenarios/stress.yaml‎
Lines changed: 0 additions & 17 deletions
diff --git a/‎src/swiss_ai_model_launch/loadtest/core.py‎
Lines changed: 0 additions & 1 deletion b/‎src/swiss_ai_model_launch/loadtest/core.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎src/swiss_ai_model_launch/loadtest/k6/script.js‎
Lines changed: 8 additions & 24 deletions b/‎src/swiss_ai_model_launch/loadtest/k6/script.js‎
Lines changed: 8 additions & 24 deletions
@@ -102,23 +102,17 @@ The model-ready health check waits up to 1000000 seconds by default. Override it
 
 Built-in scenarios are packaged under `src/swiss_ai_model_launch/assets/scenarios`.
 
-| Scenario           | Pattern                         | Duration | Think time | Max tokens | Prompt labels                         | Use case                                                  |
-| ------------------ | ------------------------------- | -------- | ---------- | ---------- | ------------------------------------- | --------------------------------------------------------- |
-| `throughput`       | 20 constant VUs                 | 15m      | 2s         | 2048       | all                                   | Baseline sustained throughput.                            |
-| `ramp`             | 0 -> 10 -> 25 -> 50 VUs         | 16m      | 2s         | 2048       | all                                   | Gradual capacity ramp with plateaus.                      |
-| `stress`           | 0 -> 20 -> 50 -> 100 -> 150 VUs | 16m      | 2s         | 2048       | all                                   | Push the service past normal operating load.              |
-| `spike`            | 10 -> 100 -> 10 VUs             | 8m30s    | 0s         | 4096       | all                                   | Sudden traffic surge and recovery behavior.               |
-| `soak`             | 20 constant VUs                 | 30m      | 2s         | 2048       | all                                   | Longer stability run for drift, leaks, and tail latency.  |
-| `decode`           | 50 constant VUs                 | 15m      | 0s         | 4096       | `short`, `medium`                     | Decode-heavy run with shorter prompts and longer outputs. |
-| `kv_stress`        | 0 -> 30 -> 0 VUs                | 15m      | 0s         | 4096       | `long_input`, `xl_input`, `conv_long` | KV-cache pressure with long inputs and long outputs.      |
-| `open_loop`        | 20 arrivals/s                   | 15m      | 0s         | 2048       | all                                   | Fixed request-rate latency test with EOS ignored.         |
-| `open_loop_ramp`   | 2 -> 30 arrivals/s              | 15m      | 0s         | 2048       | all                                   | Open-loop capacity sweep with EOS ignored.                |
-| `open_loop_decode` | 2 -> 5 arrivals/s               | 12m      | 0s         | 512        | `short`, `medium`                     | Open-loop decode-focused A/B benchmark.                   |
-| `realistic`        | 20 constant VUs                 | 15m      | 30s        | 2048       | all                                   | Lower-pressure interactive traffic shape.                 |
+| Scenario           | Pattern                   | Duration | Think time | Max tokens | Prompt labels     | Use case                                |
+| ------------------ | ------------------------- | -------- | ---------- | ---------- | ----------------- | --------------------------------------- |
+| `throughput`       | 20 constant VUs           | 15m      | 2s         | 2048       | all               | Baseline sustained throughput.          |
+| `ramp`             | 0 -> 10 -> 25 -> 50 VUs   | 16m      | 2s         | 2048       | all               | Gradual capacity ramp with plateaus.    |
+| `open_loop`        | 20 arrivals/s             | 15m      | 0s         | 2048       | all               | Fixed request-rate latency test.        |
+| `open_loop_ramp`   | 2 -> 30 arrivals/s        | 15m      | 0s         | 2048       | all               | Open-loop capacity sweep.               |
+| `open_loop_decode` | 2 -> 5 arrivals/s         | 12m      | 0s         | 512        | `short`, `medium` | Open-loop decode-focused A/B benchmark. |
 
 Custom scenarios can be placed in `./scenarios/` where you run `sml`. Use YAML, YML, or JSON. A custom scenario with the same name overrides the built-in one.
 
-Prompt labels are tags inside the prompt corpus. Scenarios use them to select a subset of prompts, for example `decode` selects shorter prompts while `kv_stress` selects long-input prompts. Put label choices in scenario YAML rather than on the command line.
+Prompt labels are tags inside the prompt corpus. Scenarios use them to select a subset of prompts, for example `open_loop_decode` selects shorter prompts. Put label choices in scenario YAML rather than on the command line.
 
 The k6 script shuffles the selected prompt corpus with a deterministic seed, then cycles through that shuffled order by global iteration number. This keeps repeated runs comparable while avoiding artifacts from sorted prompt files. The default seed is `1`; override it with `--loadtest-prompt-seed`. For paired A/B runs, use the same seed for both configurations.
 
 
@@ -55,6 +55,5 @@ def build_run_config(server: ServerConfig, bench: LoadtestConfig) -> dict[str, A
         "prompt_labels": bench.prompt_labels,
         "ignore_eos": bench.ignore_eos,
         "prompt_seed": bench.prompt_seed,
-        "realistic": None,
         "custom": None,
     }
@@ -7,7 +7,6 @@
  *
  *   This script is strict: RUN_CONFIG_JSON must contain one of:
  *   - custom
- *   - realistic
  *   - scenario_definition
  *   If none are present, initialization fails.
  *
@@ -36,10 +35,8 @@ const DEFAULT_REQUEST_TIMEOUT = "120s";
 const DEFAULT_PROMPT_SEED = 1;
 const DEFAULT_THINK_TIME = 2;
 const DEFAULT_MAX_VUS = 10;
-const DEFAULT_REALISTIC_USERS = 20;
 const DEFAULT_RAMP_DOWN = "30s";
 const DEFAULT_DURATION = "5m";
-const DEFAULT_REALISTIC_DURATION = "15m";
 const ESTIMATED_CHARS_PER_TOKEN = 4;
 const MS_PER_SECOND = 1000;
 const LATENCY_LABEL_PATTERN = /^e2e_latency_ms\{label:(.+)\}$/;
@@ -107,15 +104,18 @@ const IGNORE_EOS =
   (RUN_CFG.ignore_eos ?? RUN_CFG.scenario_definition?.ignore_eos ?? false) ===
   true;
 const PROMPT_SEED = parseInteger(
-  __ENV.PROMPT_SEED ?? RUN_CFG.prompt_seed ?? RUN_CFG.scenario_definition?.prompt_seed,
+  __ENV.PROMPT_SEED ??
+    RUN_CFG.prompt_seed ??
+    RUN_CFG.scenario_definition?.prompt_seed,
   DEFAULT_PROMPT_SEED,
 );
 // THINK_TIME: max seconds of sleep between requests per VU (uniform [0, THINK_TIME]).
 // Lower values → more in-flight requests → higher KV cache fill. 0 = no sleep.
 const THINK_TIME = parseNumber(RUN_CFG.think_time, DEFAULT_THINK_TIME);
 // MAX_TOKENS: when set, overrides the per-prompt max_tokens.
 // KV cache fill is driven by the decode phase — longer outputs hold KV blocks longer.
-// Use 1024–4096 with kv_stress to keep requests alive and fill the cache.
+// Use 1024–4096 in KV-heavy custom scenarios to keep requests alive and
+// fill the cache.
 const MAX_TOKENS = RUN_CFG.max_tokens
   ? Number.parseInt(RUN_CFG.max_tokens, 10)
   : null;
@@ -302,37 +302,21 @@ function buildCustomScenario(custom) {
   };
 }
 
-function buildRealisticScenario(realistic) {
-  if (!realistic) return null;
-  return {
-    executor: SCENARIO_CONSTANT_VUS,
-    vus: parsePositiveInteger(realistic.users, DEFAULT_REALISTIC_USERS),
-    duration: realistic.duration ?? DEFAULT_REALISTIC_DURATION,
-  };
-}
-
 const customScenario = RUN_CFG.custom
   ? buildCustomScenario(RUN_CFG.custom)
   : null;
-const realisticScenario = RUN_CFG.realistic
-  ? buildRealisticScenario(RUN_CFG.realistic)
-  : null;
 const definedScenario = scenarioToK6(RUN_CFG.scenario_definition);
-const scenarioCandidates = [
-  customScenario,
-  realisticScenario,
-  definedScenario,
-].filter(Boolean);
+const scenarioCandidates = [customScenario, definedScenario].filter(Boolean);
 
 if (scenarioCandidates.length === 0) {
   throw new Error(
-    "No scenario found in RUN_CONFIG_JSON. Expected one of: custom, realistic, scenario_definition",
+    "No scenario found in RUN_CONFIG_JSON. Expected one of: custom, scenario_definition",
   );
 }
 
 if (scenarioCandidates.length > 1) {
   throw new Error(
-    "Ambiguous RUN_CONFIG_JSON: provide only one of custom, realistic, scenario_definition",
+    "Ambiguous RUN_CONFIG_JSON: provide only one of custom, scenario_definition",
   );
 }
Original file line number	Diff line number	Diff line change
`@@ -55,6 +55,5 @@ def build_run_config(server: ServerConfig, bench: LoadtestConfig) -> dict[str, A`
`55`	`55`	`"prompt_labels": bench.prompt_labels,`
`56`	`56`	`"ignore_eos": bench.ignore_eos,`
`57`	`57`	`"prompt_seed": bench.prompt_seed,`
`58`		`- "realistic": None,`
`59`	`58`	`"custom": None,`
`60`	`59`	`}`