-
Notifications
You must be signed in to change notification settings - Fork 1
auth: support model alias claims with body rewrite #42
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,6 +17,14 @@ pub struct RftClaims { | |
| /// Allowed LoRA adapter name | ||
| #[serde(default)] | ||
| pub lora: Option<String>, | ||
| /// Alternate names that should resolve to the base `model`. Used when | ||
| /// the platform exposes a user-facing model identifier (e.g. | ||
| /// `sprints/Llama-3.2-1B-Instruct`) that differs from the canonical | ||
| /// HF path vLLM serves (`meta-llama/Llama-3.2-1B-Instruct`). A | ||
| /// request hitting an alias is authorized and the request body's | ||
| /// `model` field is rewritten to `self.model` before dispatch. | ||
| #[serde(default)] | ||
| pub model_aliases: Vec<String>, | ||
| } | ||
|
|
||
| /// Verifier for RS256 JWTs signed by the platform. | ||
|
|
@@ -73,6 +81,10 @@ impl RftClaims { | |
| } | ||
| } | ||
|
|
||
| if self.is_model_alias(requested) { | ||
| return true; | ||
| } | ||
|
|
||
| if let Some(lora) = self.lora.as_deref() { | ||
| // Empty lora claim must never authorize anything; an empty | ||
| // string is a prefix of every other string, which would let | ||
|
|
@@ -96,19 +108,92 @@ impl RftClaims { | |
|
|
||
| false | ||
| } | ||
|
|
||
| /// Whether `requested` is one of the alternate names declared in | ||
| /// `model_aliases`. Requires a non-empty base `model` claim: aliases | ||
| /// exist to rewrite to canonical and there is no canonical without | ||
| /// a base. Without this gate a JWT with `model_aliases` but no | ||
| /// `model` would authorize the alias and forward it unchanged to | ||
| /// vLLM, broadening scope rather than failing closed. Empty alias | ||
| /// entries are also ignored so a misconfigured claim never | ||
| /// authorizes the empty model. | ||
| fn is_model_alias(&self, requested: &str) -> bool { | ||
| if requested.is_empty() { | ||
| return false; | ||
| } | ||
| let Some(base) = self.model.as_deref() else { | ||
| return false; | ||
| }; | ||
| if base.is_empty() { | ||
| return false; | ||
| } | ||
| self.model_aliases | ||
| .iter() | ||
| .any(|a| !a.is_empty() && a == requested) | ||
| } | ||
|
|
||
| /// If `requested` matched the JWT only via `model_aliases`, return | ||
| /// the canonical model name (`self.model`) so the request body can | ||
| /// be rewritten before forwarding to vLLM. Returns `None` if the | ||
| /// request already targets the base model or a LoRA — those need to | ||
| /// pass through unchanged so vLLM can dispatch the LoRA adapter. | ||
| /// | ||
| /// LoRA-shadowing is the load-bearing case: a pathological JWT could | ||
| /// list the same string in both `lora` (or as a `<lora>-…` step | ||
| /// adapter) and `model_aliases`. Rewriting such a request to the | ||
| /// base model would silently swap a LoRA call for a base-model call, | ||
| /// so we treat LoRA matches as taking precedence over alias matches. | ||
| pub fn canonical_for_alias(&self, requested: &str) -> Option<String> { | ||
| if !self.is_model_alias(requested) { | ||
| return None; | ||
| } | ||
| let base = self.model.as_deref()?; | ||
| if base.is_empty() || base == requested { | ||
| return None; | ||
| } | ||
| if self.matches_lora(requested) { | ||
| return None; | ||
| } | ||
| Some(base.to_string()) | ||
|
cursor[bot] marked this conversation as resolved.
Comment on lines
+146
to
+157
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
When a signed JWT contains a Useful? React with 👍 / 👎. |
||
| } | ||
|
|
||
| /// Whether `requested` is authorized via the `lora` claim — either | ||
| /// an exact match or a `<lora>-<suffix>` step adapter. Mirrors the | ||
| /// LoRA branch of `allows_model` so `canonical_for_alias` can defer | ||
| /// to LoRA dispatch when both branches would otherwise authorize. | ||
| fn matches_lora(&self, requested: &str) -> bool { | ||
| let Some(lora) = self.lora.as_deref() else { | ||
| return false; | ||
| }; | ||
| if lora.is_empty() { | ||
| return false; | ||
| } | ||
| if requested == lora { | ||
| return true; | ||
| } | ||
| if let Some(rest) = requested.strip_prefix(lora) { | ||
| return rest.starts_with('-'); | ||
| } | ||
| false | ||
| } | ||
| } | ||
|
|
||
| #[cfg(test)] | ||
| mod tests { | ||
| use super::*; | ||
|
|
||
| fn claims(model: Option<&str>, lora: Option<&str>) -> RftClaims { | ||
| claims_with_aliases(model, lora, &[]) | ||
| } | ||
|
|
||
| fn claims_with_aliases(model: Option<&str>, lora: Option<&str>, aliases: &[&str]) -> RftClaims { | ||
| RftClaims { | ||
| sub: "user".into(), | ||
| run_id: "abc".into(), | ||
| team_id: String::new(), | ||
| model: model.map(String::from), | ||
| lora: lora.map(String::from), | ||
| model_aliases: aliases.iter().map(|s| (*s).to_string()).collect(), | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -165,4 +250,91 @@ mod tests { | |
| let c = claims(Some(""), Some("rft-abc")); | ||
| assert!(!c.allows_model("")); | ||
| } | ||
|
|
||
| #[test] | ||
| fn allows_model_alias() { | ||
| let c = claims_with_aliases( | ||
| Some("meta-llama/Llama-3.2-1B-Instruct"), | ||
| Some("rft-abc"), | ||
| &["sprints/Llama-3.2-1B-Instruct"], | ||
| ); | ||
| assert!(c.allows_model("sprints/Llama-3.2-1B-Instruct")); | ||
| assert!(c.allows_model("meta-llama/Llama-3.2-1B-Instruct")); | ||
| assert!(!c.allows_model("other/model")); | ||
| } | ||
|
|
||
| #[test] | ||
| fn canonical_for_alias_rewrites_alias_only() { | ||
| let c = claims_with_aliases( | ||
| Some("meta-llama/Llama-3.2-1B-Instruct"), | ||
| Some("rft-abc"), | ||
| &["sprints/Llama-3.2-1B-Instruct"], | ||
| ); | ||
| assert_eq!( | ||
| c.canonical_for_alias("sprints/Llama-3.2-1B-Instruct").as_deref(), | ||
| Some("meta-llama/Llama-3.2-1B-Instruct"), | ||
| ); | ||
| // Base model and lora must NOT be rewritten — lora dispatch | ||
| // depends on the original name reaching vLLM. | ||
| assert_eq!(c.canonical_for_alias("meta-llama/Llama-3.2-1B-Instruct"), None); | ||
| assert_eq!(c.canonical_for_alias("rft-abc"), None); | ||
| assert_eq!(c.canonical_for_alias("rft-abc-step-42"), None); | ||
| assert_eq!(c.canonical_for_alias("unrelated"), None); | ||
| } | ||
|
|
||
| #[test] | ||
| fn empty_alias_entry_authorizes_nothing() { | ||
| let c = claims_with_aliases(Some("meta-llama/Llama-3.2-1B-Instruct"), Some("rft-abc"), &[""]); | ||
| assert!(!c.allows_model("")); | ||
| assert_eq!(c.canonical_for_alias(""), None); | ||
| } | ||
|
|
||
| #[test] | ||
| fn alias_matching_base_does_not_rewrite() { | ||
| // Pathological config: alias equals the base. Should still | ||
| // authorize, but not produce a rewrite (would be a no-op anyway). | ||
| let c = claims_with_aliases(Some("meta-llama/Llama-3.2-1B-Instruct"), None, &["meta-llama/Llama-3.2-1B-Instruct"]); | ||
| assert!(c.allows_model("meta-llama/Llama-3.2-1B-Instruct")); | ||
| assert_eq!(c.canonical_for_alias("meta-llama/Llama-3.2-1B-Instruct"), None); | ||
| } | ||
|
|
||
| #[test] | ||
| fn alias_matching_lora_does_not_rewrite() { | ||
| // Pathological config: an alias entry collides with the lora | ||
| // claim. Rewriting would silently swap the LoRA call for a | ||
| // base-model call, so LoRA matches take precedence. | ||
| let c = claims_with_aliases( | ||
| Some("meta-llama/Llama-3.2-1B-Instruct"), | ||
| Some("rft-abc"), | ||
| &["rft-abc"], | ||
| ); | ||
| assert!(c.allows_model("rft-abc")); | ||
| assert_eq!(c.canonical_for_alias("rft-abc"), None); | ||
| } | ||
|
|
||
| #[test] | ||
| fn alias_matching_step_versioned_lora_does_not_rewrite() { | ||
| let c = claims_with_aliases( | ||
| Some("meta-llama/Llama-3.2-1B-Instruct"), | ||
| Some("rft-abc"), | ||
| &["rft-abc-step-42"], | ||
| ); | ||
| assert!(c.allows_model("rft-abc-step-42")); | ||
| assert_eq!(c.canonical_for_alias("rft-abc-step-42"), None); | ||
| } | ||
|
|
||
| #[test] | ||
| fn alias_without_base_model_does_not_authorize() { | ||
| // Without a non-empty base model the router has no canonical | ||
| // name to rewrite to, so authorizing the alias would forward an | ||
| // arbitrary string to vLLM unchanged and broaden JWT scope. | ||
| // Fail closed instead. | ||
| let c_no_base = claims_with_aliases(None, Some("rft-abc"), &["sprints/Llama-3.2-1B-Instruct"]); | ||
| assert!(!c_no_base.allows_model("sprints/Llama-3.2-1B-Instruct")); | ||
| assert_eq!(c_no_base.canonical_for_alias("sprints/Llama-3.2-1B-Instruct"), None); | ||
|
|
||
| let c_empty_base = claims_with_aliases(Some(""), Some("rft-abc"), &["sprints/Llama-3.2-1B-Instruct"]); | ||
| assert!(!c_empty_base.allows_model("sprints/Llama-3.2-1B-Instruct")); | ||
| assert_eq!(c_empty_base.canonical_for_alias("sprints/Llama-3.2-1B-Instruct"), None); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When a JWT contains
model_aliasesbut omitsmodel(or sets it empty, which this struct permits via#[serde(default)]), this branch authorizes the alias even thoughcanonical_for_aliaslater returnsNonebecause there is no canonical base to rewrite to. In that misconfigured/lora-only token case the server forwards the alias unchanged on chat/completions/embeddings/responses, broadening the JWT scope instead of failing closed; alias authorization should be gated on a non-empty base model or handled as an error.Useful? React with 👍 / 👎.