Updated: README and test_tokens.py

SB159 · SB159 · commit 9a102608a7d2 · 2025-10-02T14:29:05.000-05:00
diff --git a/testing/maas_billing_tests_independent/tests/README.MD b/testing/maas_billing_tests_independent/tests/README.MD
@@ -18,7 +18,7 @@ It covers Admin, Free, and Premium and shows how to generate HTML/JUnit reports.
 Create a venv and install deps:
 
 ```bash
-cd maas_billing_tests_independent_v5_full
+cd maas_billing_tests_independent
 python3 -m venv .venv
 source .venv/bin/activate
 pip install -r requirements.txt
@@ -38,32 +38,25 @@ oc login https://api.<cluster>:6443 --token '<your-user-token>'
 oc whoami
 ```
 
-Maas API base URLs:
-
-# Preferred: apps-domain host (TLS + correct Host header)
-APPS=$(oc get ingresses.config/cluster -o jsonpath='{.spec.domain}')
-export MAAS_API_BASE_URL="https://maas-api.${APPS}"
-export USAGE_API_BASE="${MAAS_API_BASE_URL}"   # used by usage tests
-
-# Fallback (commented): ELB endpoint + explicit Host header
-# HOST=$(oc -n openshift-ingress get gateway openshift-ai-inference -o jsonpath='{.status.addresses[0].value}')
-# curl -H "Host: maas-api.${APPS}" "http://${HOST}/v1/models"
-
+Get Gateway Endpoint and set base URLs:
 
+```bash
+HOST=$(oc -n openshift-ingress get gateway openshift-ai-inference -o jsonpath='{.status.addresses[0].value}')
+export MAAS_API_BASE_URL="http://${HOST}/maas-api"
+export USAGE_API_BASE="${MAAS_API_BASE_URL}"     # used by usage tests
+```
 Export the **current user’s** OpenShift token into `FREE_OC_TOKEN`
 (the tests use this name for “who you are right now”):
 
 ```bash
 export FREE_OC_TOKEN="$(oc whoami -t)"
 ```
-
 Pick a **MODEL_NAME** from the catalog (`id` field):
 
 ```bash
 curl -s -H "Authorization: Bearer ${FREE_OC_TOKEN}" "${MAAS_API_BASE_URL}/v1/models" | jq -r '.data[] | [.id,.name,.url] | @tsv'
 export MODEL_NAME="<paste-id-from-output>"     # e.g., facebook-opt-125m-simulated
 ```
-
 ---
 
 ## 2) Configure limits the tests will use
@@ -74,16 +67,16 @@ Read your gateway **RateLimitPolicy** values and export them so the tests know
 what to expect:
 
 ```bash
-# Free (update the jsonpath if your CR layout differs)
+# FREE request-rate burst (per window)
 export RATE_LIMIT_BURST_FREE=$(
-  oc -n openshift-ingress get ratelimitpolicies.gateway.networking.k8s.io gateway-rate-limits \
+  oc -n openshift-ingress get ratelimitpolicies.kuadrant.io gateway-rate-limits \
   -o jsonpath='{.spec.limits.free.rates[0].limit}'
 )
 
 # Premium (optional; only needed for the Free-vs-Premium test)
 export RATE_LIMIT_BURST_PREMIUM=$(
-  oc -n openshift-ingress get ratelimitpolicies.gateway.networking.k8s.io gateway-rate-limits \
-  -o jsonpath='{.spec.limits.premium.rates[0].limit}'
+  oc -n openshift-ingress get ratelimitpolicies.kuadrant.io gateway-rate-limits \
+  -o jsonpath='{.spec.limits.enterprise.rates[0].limit}'
 )
 ```
 
@@ -108,9 +101,11 @@ The suite assumes **`FREE_OC_TOKEN`** holds the *current* user’s token.
 ### A) Admin (sanity / wiring)
 
 ```bash
-pytest -q tests/test_tokens.py::test_minted_token_is_jwt
-pytest -q tests/test_models_user.py
-pytest -q tests/test_gateway_endpoints.py::test_chat_completion_works
+
+pytest -q testing/maas_billing_tests_independent/tests/test_tokens.py::test_minted_token_is_jwt
+pytest -q testing/maas_billing_tests_independent/tests/test_models_user.py
+pytest -q testing/maas_billing_tests_independent/tests/test_gateway_endpoints.py::test_chat_completion_works
+
 ```
 
 ### B) Free user (authz + request-rate burst + usage)
@@ -120,35 +115,35 @@ pytest -q tests/test_gateway_endpoints.py::test_chat_completion_works
 export FREE_OC_TOKEN="$(oc whoami -t)"
 
 # basics
-pytest -q tests/test_tokens.py::test_minted_token_is_jwt
-pytest -q tests/test_models_user.py
-pytest -q tests/test_gateway_endpoints.py::test_chat_completion_works
+pytest -q testing/maas_billing_tests_independent/tests/test_tokens.py::test_minted_token_is_jwt
+pytest -q testing/maas_billing_tests_independent/tests/test_models_user.py
+pytest -q testing/maas_billing_tests_independent/tests/test_gateway_endpoints.py::test_chat_completion_works
 
 # request-rate burst (expects some 429s after RATE_LIMIT_BURST_FREE)
-pytest -q tests/test_quota_global.py::test_rate_limit_burst
+pytest -q testing/maas_billing_tests_independent/tests/test_quota_global.py::test_rate_limit_burst
 
 # usage (optional; requires USAGE_API_BASE)
-pytest -q tests/test_usage_logs.py
+pytest -q testing/maas_billing_tests_independent/tests/test_usage_logs.py
 ```
 
 #### Token‑rate for Free
 Trigger token-based limiting by making each call expensive in tokens:
 ```bash
 export TOKENS_PER_CALL_LARGE=1200
-pytest -q tests/test_token_ratelimit.py
+pytest -q testing/maas_billing_tests_independent/tests/test_token_ratelimit.py
 ```
 
 #### Interplay for Free — which limiter fires first?
 **Request‑rate first:** many *cheap* calls
 ```bash
 export TOKENS_PER_CALL_SMALL=16
 export BURST_SLEEP=0.05
-pytest -q tests/test_quota_global.py::test_rate_limit_burst
+pytest -q testing/maas_billing_tests_independent/tests/test_quota_global.py::test_rate_limit_burst
 ```
 **Token‑rate first:** few *expensive* calls
 ```bash
 export TOKENS_PER_CALL_LARGE=1200
-pytest -q tests/test_token_ratelimit.py
+pytest -q testing/maas_billing_tests_independent/tests/test_token_ratelimit.py
 ```
 
 ### C) Premium user (same flow + Free-vs-Premium comparison)
@@ -158,31 +153,31 @@ pytest -q tests/test_token_ratelimit.py
 export FREE_OC_TOKEN="$(oc whoami -t)"     # current user’s token again
 export PREMIUM_OC_TOKEN="$FREE_OC_TOKEN"   # used by the test to mint for premium
 
-pytest -q tests/test_gateway_endpoints.py::test_chat_completion_works
+pytest -q testing/maas_billing_tests_independent/tests/test_gateway_endpoints.py::test_chat_completion_works
 
 # Compare Free vs Premium burst; Premium must not be worse than Free
 # (uses RATE_LIMIT_BURST_FREE / RATE_LIMIT_BURST_PREMIUM)
-pytest -q tests/test_quota_per_user.py::test_free_vs_premium_quota
+pytest -q testing/maas_billing_tests_independent/tests/test_quota_per_user.py::test_free_vs_premium_quota
 ```
 
 #### Token‑rate for Premium
 Run the token limiter test while logged in as your Premium user:
 ```bash
 export TOKENS_PER_CALL_LARGE=1200
-pytest -q tests/test_token_ratelimit.py
+pytest -q testing/maas_billing_tests_independent/tests/test_token_ratelimit.py
 ```
 
 #### Interplay for Premium — which limiter fires first?
 **Request‑rate first:** many *cheap* calls (uses `RATE_LIMIT_BURST_PREMIUM` if you exported it)
 ```bash
 export TOKENS_PER_CALL_SMALL=16
 export BURST_SLEEP=0.05
-pytest -q tests/test_quota_global.py::test_rate_limit_burst
+pytest -q testing/maas_billing_tests_independent/tests/test_quota_global.py::test_rate_limit_burst
 ```
 **Token‑rate first:** few *expensive* calls
 ```bash
 export TOKENS_PER_CALL_LARGE=1200
-pytest -q tests/test_token_ratelimit.py
+pytest -q testing/maas_billing_tests_independent/tests/test_token_ratelimit.py
 ```
 
 ### D) Token-rate (current user – Free **or** Premium)
@@ -191,7 +186,7 @@ If you want to *exercise* token-rate limiting, increase tokens per call to make
 
 ```bash
 export TOKENS_PER_CALL_LARGE=1200   # example value to drive token usage
-pytest -q tests/test_token_ratelimit.py
+pytest -q testing/maas_billing_tests_independent/tests/test_token_ratelimit.py
 ```
 
 ---
@@ -206,12 +201,12 @@ By shaping traffic as above (many *cheap* calls vs few *expensive* calls), you c
 ## 4) Reports (HTML & JUnit)
 
 ```bash
-mkdir -p reports
+mkdir -p testing/maas_billing_tests_independent/reports
 
 # Example: run everything for the current user and produce reports
-pytest -q \
-  --html=reports/current.html --self-contained-html \
-  --junitxml=reports/current.xml
+pytest -q testing/maas_billing_tests_independent/tests \
+  --html=testing/maas_billing_tests_independent/reports/current.html --self-contained-html \
+  --junitxml=testing/maas_billing_tests_independent/reports/current.xml
 ```
 
 Open `reports/current.html` in your browser.
@@ -235,21 +230,7 @@ Open `reports/current.html` in your browser.
 
 ---
 
-## 6) Troubleshooting
-
-- **401 Unauthorized** – ensure you exported `FREE_OC_TOKEN="$(oc whoami -t)"` in this shell.
-- **404 on chat** – the test already posts to **`<model-url>/v1/chat/completions`**.
-  If you edited anything, make sure you didn’t send to `/maas-api/v1/chat/completions`.
-- **Burst test returns 429 too early** – your exported `RATE_LIMIT_BURST_FREE` is higher than
-  the actual policy. Re-read the CR and export the real value (or lower `N_BURST` if you set it).
-- **Never see 429** – increase `N_BURST` or verify the RateLimitPolicy is **Accepted/Enforced**
-  in the `openshift-ingress` project.
-- **WSL vs PowerShell** – they’re separate shells; log in and re-export vars in whichever one
-  you use to run `pytest`.
-
----
-
-## 7) PowerShell equivalents (Windows)
+## 6) PowerShell equivalents (Windows)
 
 ```powershell
 # venv
@@ -319,10 +300,10 @@ export TOKENS_PER_CALL_SMALL=16
 export BURST_SLEEP=0.05
 
 # run a few
-pytest -q tests/test_tokens.py::test_minted_token_is_jwt
-pytest -q tests/test_models_user.py
-pytest -q tests/test_gateway_endpoints.py::test_chat_completion_works
-pytest -q tests/test_quota_global.py::test_rate_limit_burst
+pytest -q testing/maas_billing_tests_independent/tests/test_tokens.py::test_minted_token_is_jwt
+pytest -q testing/maas_billing_tests_independent/tests/test_models_user.py
+pytest -q testing/maas_billing_tests_independent/tests/test_gateway_endpoints.py::test_chat_completion_works
+pytest -q testing/maas_billing_tests_independent/tests/test_quota_global.py::test_rate_limit_burst
 
 # report
 mkdir -p reports && pytest -q --html=reports/current.html --self-contained-html --junitxml=reports/current.xml
diff --git a/testing/maas_billing_tests_independent/tests/test_tokens.py b/testing/maas_billing_tests_independent/tests/test_tokens.py
@@ -1,51 +1,87 @@
-from conftest import bearer, parse_usage_headers, USAGE_HEADERS, ensure_free_key, ensure_premium_key
-import os, json, base64
+from conftest import bearer, parse_usage_headers, USAGE_HEADERS, ensure_free_key
+import json, base64
 
 def _b64url_decode(s):
     pad = "=" * (-len(s) % 4)
     return base64.urlsafe_b64decode((s + pad).encode("utf-8"))
 
-def test_minted_token_is_jwt(http, base_url, maas_key):
+def test_minted_token_is_jwt(maas_key):
     parts = maas_key.split(".")
     assert len(parts) == 3
     hdr = json.loads(_b64url_decode(parts[0]).decode("utf-8"))
     assert isinstance(hdr, dict)
 
 def test_tokens_issue_201_and_schema(http, base_url):
-    from conftest import FREE_OC_TOKEN, mint_maas_key
-    key, body, _ = mint_maas_key(http, base_url, FREE_OC_TOKEN, minutes=10)
-    assert isinstance(body, dict) and key and len(key) > 10
+    from conftest import FREE_OC_TOKEN, mint_maas_key, bearer as bh
+    # mint_maas_key returns a single string (the MaaS key)
+    key = mint_maas_key(http, base_url, FREE_OC_TOKEN, minutes=10)
+    assert isinstance(key, str) and len(key) > 10
+    # prove the key works and don’t hang forever
+    r_ok = http.get(f"{base_url}/v1/models", headers=bh(key), timeout=30)
+    assert r_ok.status_code == 200
 
 def test_tokens_invalid_ttl_400(http, base_url):
-    from conftest import FREE_OC_TOKEN, http_post, bearer
+    from conftest import FREE_OC_TOKEN, http_post
     url = f"{base_url}/v1/tokens"
-    code, body, r = http_post(http, url, headers=bearer(FREE_OC_TOKEN), json={"ttl":"4hours"})
+    code, body, r = http_post(
+        http,
+        url,
+        headers=bearer(FREE_OC_TOKEN),
+        json={"expiration": "4hours"},
+        timeout=30,          # add timeout so it can’t hang
+    )
     assert code == 400
 
 def test_tokens_models_happy_then_revoked_fails(http, base_url, model_name):
-    from conftest import FREE_OC_TOKEN, mint_maas_key, revoke_maas_key, bearer as bh
-    key, _, _ = mint_maas_key(http, base_url, FREE_OC_TOKEN, minutes=10)
-    r_ok = http.get(f"{base_url}/v1/models", headers=bh(key))
-    assert r_ok.status_code == 200
+    from conftest import FREE_OC_TOKEN, mint_maas_key, revoke_maas_key, bearer
+
+    # 1) Mint a MaaS key from the current OC user token
+    key = mint_maas_key(http, base_url, FREE_OC_TOKEN, minutes=10)
+
+    # 2) Discover the model URL
+    models = http.get(f"{base_url}/v1/models", headers=bearer(key), timeout=30).json()
+    items = models.get("data") or models.get("models") or []
+    target = next((m for m in items if m.get("id")==model_name or m.get("name")==model_name), None)
+    assert target and target.get("url"), "model not found or missing url"
+    murl = target["url"]
+
+    payload = {"model": model_name,
+               "messages":[{"role":"user","content":"hi"}],
+               "max_tokens": 32}
 
+    # 3) Works before revoke
+    r_ok = http.post(f"{murl}/v1/chat/completions", headers=bearer(key), json=payload, timeout=60)
+    assert r_ok.status_code in (200, 201)
+
+    # 4) Revoke the key
     r_del = revoke_maas_key(http, base_url, FREE_OC_TOKEN, key)
-    assert r_del.status_code in (200,202,204)
+    assert r_del.status_code in (200, 202, 204)
 
-    r_again = http.get(f"{base_url}/v1/models", headers=bh(key))
-    assert r_again.status_code in (401,403)
+    # 5) Fails after revoke
+    r_bad = http.post(f"{murl}/v1/chat/completions", headers=bearer(key), json=payload, timeout=60)
+    assert r_bad.status_code in (401, 403)
 
 def test_usage_headers_present(http, base_url, model_name):
+    from conftest import bearer, ensure_free_key, parse_usage_headers
+
     key = ensure_free_key(http)
+
+    # discover model URL
+    models = http.get(f"{base_url}/v1/models", headers=bearer(key), timeout=30).json()
+    items = models.get("data") or models.get("models") or []
+    target = next((m for m in items if m.get("id")==model_name or m.get("name")==model_name), None)
+    assert target and target.get("url"), "model not found or missing url"
+    murl = target["url"]
+
     r = http.post(
-        f"{base_url}/v1/chat/completions",
+        f"{murl}/v1/chat/completions",
         headers=bearer(key),
-        json={
-            "model": model_name,
-            "messages": [{"role":"user","content":"Say hi"}],
-            "temperature": 0,
-        },
+        json={"model": model_name, "messages":[{"role":"user","content":"Say hi"}], "temperature":0},
         timeout=60,
     )
-    assert r.status_code in (200,201), f"unexpected {r.status_code}: {r.text[:200]}"
+    assert r.status_code in (200, 201), f"unexpected {r.status_code}: {r.text[:200]}"
+
     usage = parse_usage_headers(r)
-    assert any(h in usage for h in USAGE_HEADERS), f"No usage headers: {dict(r.headers)}"
+    # assert presence and non-negative total
+    assert "x-odhu-usage-total-tokens" in usage, f"No usage headers present: {dict(r.headers)}"
+    assert int(usage["x-odhu-usage-total-tokens"]) >= 0