Skip to content

Commit 94853bb

Browse files
committed
fix(auth): address gateway auth review follow-ups
Tighten the follow-up review points from PR 1404. Restrict GetInferenceBundle to sandbox principals because the response carries provider route credentials. User callers continue to manage inference through the user-facing inference APIs. Fail startup for in-cluster K8s ServiceAccount bootstrap when the Kubernetes driver config is missing instead of silently falling back to the default namespace. Collapse sandbox-principal name lookups on supervisor-callable policy RPCs so missing and foreign sandbox names both return PermissionDenied, avoiding sandbox name enumeration. Rename the local unauthenticated development identity provider from Internal to LocalDev, add Helm coverage for OIDC CA mounts with TLS disabled, fill generated sandboxJwt values docs, and document the current offline JWT signing-key rotation procedure. Follow-up: created #1510 for online gateway sandbox JWT key rotation. Validation: mise run pre-commit.
1 parent a42ce8d commit 94853bb

10 files changed

Lines changed: 244 additions & 69 deletions

File tree

architecture/gateway.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,15 @@ JWTs in memory before expiry only while the sandbox record still exists. Older
6262
tokens are not server-revoked; deployments bound replay exposure with short
6363
`gateway_jwt.ttl_secs` lifetimes.
6464

65+
Gateway JWT signing-key rotation is currently an offline operator action. The
66+
runtime loads one active signing key and one matching public verification key
67+
from the configured secret at startup. To rotate that key material today,
68+
operators must delete or replace the JWT key secret, let certgen recreate it,
69+
and restart the gateway pods. This invalidates outstanding supervisor tokens;
70+
running supervisors recover by re-running their bootstrap path where available
71+
or by reconnecting after sandbox restart. Online rotation with multiple
72+
verification keys keyed by `kid` is tracked separately.
73+
6574
Sandbox JWTs are not user credentials. The gRPC router accepts
6675
`Principal::Sandbox` only on the supervisor-to-gateway RPC allowlist
6776
(`ConnectSupervisor`, `RelayStream`, token renewal, config sync, policy status,

crates/openshell-server/src/auth/identity.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,6 @@ pub enum IdentityProvider {
3939
Mtls,
4040
/// Cloudflare Access JWT.
4141
CloudflareAccess,
42-
/// Internal (skip-listed methods, sandbox supervisor RPCs).
43-
Internal,
42+
/// Explicit unauthenticated local-development user principal.
43+
LocalDev,
4444
}

crates/openshell-server/src/grpc/policy.rs

Lines changed: 96 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,41 @@ fn validate_sandbox_caller_update(req: &UpdateConfigRequest) -> Result<(), Statu
349349
Ok(())
350350
}
351351

352+
async fn resolve_sandbox_by_name_for_principal(
353+
store: &Store,
354+
principal: &Principal,
355+
name: &str,
356+
) -> Result<Sandbox, Status> {
357+
let sandbox = store
358+
.get_message_by_name::<Sandbox>(name)
359+
.await
360+
.map_err(|e| Status::internal(format!("fetch sandbox failed: {e}")))?;
361+
362+
match principal {
363+
Principal::Sandbox(_) => {
364+
let Some(sandbox) = sandbox else {
365+
return Err(Status::permission_denied(
366+
"sandbox not found or not owned by caller",
367+
));
368+
};
369+
crate::auth::guard::ensure_sandbox_scope(principal, sandbox.object_id()).map_err(
370+
|status| {
371+
if status.code() == tonic::Code::PermissionDenied {
372+
Status::permission_denied("sandbox not found or not owned by caller")
373+
} else {
374+
status
375+
}
376+
},
377+
)?;
378+
Ok(sandbox)
379+
}
380+
Principal::User(_) => sandbox.ok_or_else(|| Status::not_found("sandbox not found")),
381+
Principal::Anonymous => Err(Status::unauthenticated(
382+
"sandbox-scoped methods require an authenticated caller",
383+
)),
384+
}
385+
}
386+
352387
// ---------------------------------------------------------------------------
353388
// Config handlers
354389
// ---------------------------------------------------------------------------
@@ -672,21 +707,14 @@ pub(super) async fn handle_update_config(
672707
let req = request.into_inner();
673708
if sandbox_caller {
674709
validate_sandbox_caller_update(&req)?;
675-
// Resolve req.name to a sandbox UUID and verify the calling
676-
// sandbox principal owns it. User callers (CLI / TUI) bypass
677-
// this check because RBAC was their gate.
678-
let sandbox = state
679-
.store
680-
.get_message_by_name::<Sandbox>(&req.name)
681-
.await
682-
.map_err(|e| Status::internal(format!("fetch sandbox failed: {e}")))?
683-
.ok_or_else(|| Status::not_found("sandbox not found"))?;
684-
crate::auth::guard::ensure_sandbox_scope(
710+
resolve_sandbox_by_name_for_principal(
711+
state.store.as_ref(),
685712
principal
686713
.as_ref()
687714
.expect("sandbox_caller implies principal"),
688-
sandbox.object_id(),
689-
)?;
715+
&req.name,
716+
)
717+
.await?;
690718
}
691719
let key = req.setting_key.trim();
692720
let has_policy = req.policy.is_some();
@@ -1413,16 +1441,9 @@ pub(super) async fn handle_submit_policy_analysis(
14131441
return Err(Status::invalid_argument("name is required"));
14141442
}
14151443

1416-
let sandbox = state
1417-
.store
1418-
.get_message_by_name::<Sandbox>(&req.name)
1419-
.await
1420-
.map_err(|e| Status::internal(format!("fetch sandbox failed: {e}")))?
1421-
.ok_or_else(|| Status::not_found("sandbox not found"))?;
1444+
let sandbox =
1445+
resolve_sandbox_by_name_for_principal(state.store.as_ref(), &principal, &req.name).await?;
14221446
let sandbox_id = sandbox.object_id().to_string();
1423-
// Name → id resolved; now enforce that a sandbox principal only acts
1424-
// on its own sandbox. User principals are unaffected.
1425-
crate::auth::guard::ensure_sandbox_scope(&principal, &sandbox_id)?;
14261447

14271448
let current_version = state
14281449
.store
@@ -1549,14 +1570,9 @@ pub(super) async fn handle_get_draft_policy(
15491570
return Err(Status::invalid_argument("name is required"));
15501571
}
15511572

1552-
let sandbox = state
1553-
.store
1554-
.get_message_by_name::<Sandbox>(&req.name)
1555-
.await
1556-
.map_err(|e| Status::internal(format!("fetch sandbox failed: {e}")))?
1557-
.ok_or_else(|| Status::not_found("sandbox not found"))?;
1573+
let sandbox =
1574+
resolve_sandbox_by_name_for_principal(state.store.as_ref(), &principal, &req.name).await?;
15581575
let sandbox_id = sandbox.object_id().to_string();
1559-
crate::auth::guard::ensure_sandbox_scope(&principal, &sandbox_id)?;
15601576

15611577
let status_filter = if req.status_filter.is_empty() {
15621578
None
@@ -3151,6 +3167,58 @@ mod tests {
31513167
assert_eq!(err.code(), Code::PermissionDenied);
31523168
}
31533169

3170+
#[tokio::test]
3171+
async fn sandbox_update_config_missing_name_returns_permission_denied() {
3172+
let state = test_server_state().await;
3173+
let req = with_sandbox(
3174+
Request::new(UpdateConfigRequest {
3175+
name: "missing-sandbox".to_string(),
3176+
policy: Some(ProtoSandboxPolicy::default()),
3177+
..Default::default()
3178+
}),
3179+
"sb-a",
3180+
);
3181+
3182+
let err = handle_update_config(&state, req)
3183+
.await
3184+
.expect_err("missing name must not leak existence to sandbox callers");
3185+
assert_eq!(err.code(), Code::PermissionDenied);
3186+
}
3187+
3188+
#[tokio::test]
3189+
async fn sandbox_submit_policy_analysis_missing_name_returns_permission_denied() {
3190+
let state = test_server_state().await;
3191+
let req = with_sandbox(
3192+
Request::new(SubmitPolicyAnalysisRequest {
3193+
name: "missing-sandbox".to_string(),
3194+
..Default::default()
3195+
}),
3196+
"sb-a",
3197+
);
3198+
3199+
let err = handle_submit_policy_analysis(&state, req)
3200+
.await
3201+
.expect_err("missing name must not leak existence to sandbox callers");
3202+
assert_eq!(err.code(), Code::PermissionDenied);
3203+
}
3204+
3205+
#[tokio::test]
3206+
async fn sandbox_get_draft_policy_missing_name_returns_permission_denied() {
3207+
let state = test_server_state().await;
3208+
let req = with_sandbox(
3209+
Request::new(GetDraftPolicyRequest {
3210+
name: "missing-sandbox".to_string(),
3211+
status_filter: String::new(),
3212+
}),
3213+
"sb-a",
3214+
);
3215+
3216+
let err = handle_get_draft_policy(&state, req)
3217+
.await
3218+
.expect_err("missing name must not leak existence to sandbox callers");
3219+
assert_eq!(err.code(), Code::PermissionDenied);
3220+
}
3221+
31543222
#[tokio::test]
31553223
async fn user_principal_can_read_any_sandbox_config() {
31563224
// RBAC was the user gate; the IDOR guard must NOT trip for users.

crates/openshell-server/src/inference.rs

Lines changed: 58 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -61,23 +61,11 @@ impl Inference for InferenceService {
6161
&self,
6262
request: Request<GetInferenceBundleRequest>,
6363
) -> Result<Response<GetInferenceBundleResponse>, Status> {
64-
// GetInferenceBundle is gateway-wide (no per-sandbox routes yet),
65-
// so it has no `sandbox_id` to compare against. Just reject
66-
// anonymous callers; both user and sandbox principals are allowed.
67-
match request
68-
.extensions()
69-
.get::<crate::auth::principal::Principal>()
70-
{
71-
Some(
72-
crate::auth::principal::Principal::User(_)
73-
| crate::auth::principal::Principal::Sandbox(_),
74-
) => {}
75-
Some(crate::auth::principal::Principal::Anonymous) | None => {
76-
return Err(Status::unauthenticated(
77-
"GetInferenceBundle requires an authenticated caller",
78-
));
79-
}
80-
}
64+
authorize_inference_bundle(
65+
request
66+
.extensions()
67+
.get::<crate::auth::principal::Principal>(),
68+
)?;
8169
resolve_inference_bundle(self.state.store.as_ref())
8270
.await
8371
.map(Response::new)
@@ -418,6 +406,20 @@ fn find_provider_config_value(provider: &Provider, preferred_keys: &[&str]) -> O
418406
None
419407
}
420408

409+
fn authorize_inference_bundle(
410+
principal: Option<&crate::auth::principal::Principal>,
411+
) -> Result<(), Status> {
412+
match principal {
413+
Some(crate::auth::principal::Principal::Sandbox(_)) => Ok(()),
414+
Some(crate::auth::principal::Principal::User(_)) => Err(Status::permission_denied(
415+
"GetInferenceBundle requires a sandbox principal",
416+
)),
417+
Some(crate::auth::principal::Principal::Anonymous) | None => Err(Status::unauthenticated(
418+
"GetInferenceBundle requires an authenticated sandbox principal",
419+
)),
420+
}
421+
}
422+
421423
/// Resolve the inference bundle (all managed routes + revision hash).
422424
async fn resolve_inference_bundle(store: &Store) -> Result<GetInferenceBundleResponse, Status> {
423425
let mut routes = Vec::new();
@@ -515,6 +517,10 @@ async fn resolve_route_by_name(
515517
#[cfg(test)]
516518
mod tests {
517519
use super::*;
520+
use crate::auth::identity::{Identity, IdentityProvider};
521+
use crate::auth::principal::{
522+
Principal, SandboxIdentitySource, SandboxPrincipal, UserPrincipal,
523+
};
518524
use openshell_core::ObjectId;
519525
use wiremock::matchers::{body_partial_json, header, method, path};
520526
use wiremock::{Mock, MockServer, ResponseTemplate};
@@ -525,6 +531,28 @@ mod tests {
525531
.expect("in-memory SQLite store should connect")
526532
}
527533

534+
fn test_user_principal() -> Principal {
535+
Principal::User(UserPrincipal {
536+
identity: Identity {
537+
subject: "user-a".to_string(),
538+
display_name: None,
539+
roles: vec!["openshell-user".to_string()],
540+
scopes: vec![],
541+
provider: IdentityProvider::Oidc,
542+
},
543+
})
544+
}
545+
546+
fn test_sandbox_principal() -> Principal {
547+
Principal::Sandbox(SandboxPrincipal {
548+
sandbox_id: "sandbox-a".to_string(),
549+
source: SandboxIdentitySource::BootstrapJwt {
550+
issuer: "openshell-gateway:test".to_string(),
551+
},
552+
trust_domain: Some("openshell".to_string()),
553+
})
554+
}
555+
528556
fn make_route(name: &str, provider_name: &str, model_id: &str) -> InferenceRoute {
529557
InferenceRoute {
530558
metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta {
@@ -573,6 +601,19 @@ mod tests {
573601
}
574602
}
575603

604+
#[test]
605+
fn inference_bundle_requires_sandbox_principal() {
606+
let sandbox = test_sandbox_principal();
607+
assert!(authorize_inference_bundle(Some(&sandbox)).is_ok());
608+
609+
let user = test_user_principal();
610+
let err = authorize_inference_bundle(Some(&user)).expect_err("users cannot fetch bundle");
611+
assert_eq!(err.code(), tonic::Code::PermissionDenied);
612+
613+
let err = authorize_inference_bundle(None).expect_err("missing principal rejected");
614+
assert_eq!(err.code(), tonic::Code::Unauthenticated);
615+
}
616+
576617
#[tokio::test]
577618
async fn upsert_cluster_route_creates_and_increments_version() {
578619
let store = test_store().await;

crates/openshell-server/src/lib.rs

Lines changed: 48 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -300,16 +300,8 @@ pub async fn run_server(
300300
// and without the issuer there's nothing to exchange the SA token for.
301301
if state.sandbox_jwt_issuer.is_some() && std::env::var_os("KUBERNETES_SERVICE_HOST").is_some() {
302302
// Pod lookups and TokenReview identity checks must match the sandbox
303-
// namespace and service account used by the Kubernetes driver. Fall
304-
// back to the historical "default" namespace only if driver config
305-
// cannot be parsed while bootstrapping the authenticator.
306-
let kubernetes_config =
307-
kubernetes_config_from_file(config_file.as_ref()).unwrap_or_else(|_| {
308-
KubernetesComputeConfig {
309-
namespace: "default".to_string(),
310-
..Default::default()
311-
}
312-
});
303+
// namespace and service account used by the Kubernetes driver.
304+
let kubernetes_config = kubernetes_config_for_k8s_sa_bootstrap(config_file.as_ref())?;
313305
let sandbox_namespace = kubernetes_config.namespace;
314306
let sandbox_service_account = kubernetes_config.service_account_name;
315307
match kube::Client::try_default().await {
@@ -788,6 +780,22 @@ fn kubernetes_config_from_file(
788780
.map_err(|e| Error::config(format!("invalid [openshell.drivers.kubernetes] table: {e}")))
789781
}
790782

783+
fn kubernetes_config_for_k8s_sa_bootstrap(
784+
file: Option<&config_file::ConfigFile>,
785+
) -> Result<KubernetesComputeConfig> {
786+
let Some(file) = file else {
787+
return Err(Error::config(
788+
"K8s ServiceAccount bootstrap requires [openshell.drivers.kubernetes] when sandbox JWT issuing is enabled in-cluster",
789+
));
790+
};
791+
if !file.openshell.drivers.contains_key("kubernetes") {
792+
return Err(Error::config(
793+
"K8s ServiceAccount bootstrap requires [openshell.drivers.kubernetes] when sandbox JWT issuing is enabled in-cluster",
794+
));
795+
}
796+
kubernetes_config_from_file(Some(file))
797+
}
798+
791799
/// Same pattern as [`kubernetes_config_from_file`] but for Podman.
792800
fn podman_config_from_file(
793801
file: Option<&config_file::ConfigFile>,
@@ -863,6 +871,7 @@ mod tests {
863871
ConnectionProtocol, MultiplexService, ServerState, TlsAcceptor,
864872
allow_plaintext_service_http, classify_initial_bytes, configured_compute_driver,
865873
gateway_listener_addresses, is_benign_tls_handshake_failure, serve_gateway_listener,
874+
kubernetes_config_for_k8s_sa_bootstrap,
866875
};
867876
use openshell_core::{
868877
ComputeDriverKind, Config,
@@ -1266,6 +1275,35 @@ mod tests {
12661275
);
12671276
}
12681277

1278+
#[test]
1279+
fn k8s_sa_bootstrap_rejects_missing_kubernetes_driver_config() {
1280+
let err = kubernetes_config_for_k8s_sa_bootstrap(None).unwrap_err();
1281+
assert!(err.to_string().contains("[openshell.drivers.kubernetes]"));
1282+
1283+
let file: crate::config_file::ConfigFile =
1284+
toml::from_str("[openshell.gateway]\n").expect("valid config");
1285+
let err = kubernetes_config_for_k8s_sa_bootstrap(Some(&file)).unwrap_err();
1286+
assert!(err.to_string().contains("[openshell.drivers.kubernetes]"));
1287+
}
1288+
1289+
#[test]
1290+
fn k8s_sa_bootstrap_uses_configured_namespace_and_service_account() {
1291+
let file: crate::config_file::ConfigFile = toml::from_str(
1292+
r#"
1293+
[openshell.gateway]
1294+
1295+
[openshell.drivers.kubernetes]
1296+
namespace = "sandboxes"
1297+
service_account_name = "sandbox-sa"
1298+
"#,
1299+
)
1300+
.expect("valid config");
1301+
1302+
let cfg = kubernetes_config_for_k8s_sa_bootstrap(Some(&file)).unwrap();
1303+
assert_eq!(cfg.namespace, "sandboxes");
1304+
assert_eq!(cfg.service_account_name, "sandbox-sa");
1305+
}
1306+
12691307
#[test]
12701308
fn gateway_listener_addresses_skip_driver_address_covered_by_wildcard() {
12711309
let primary: SocketAddr = "0.0.0.0:8080".parse().unwrap();

0 commit comments

Comments
 (0)