Skip to content

Commit 8c87ff3

Browse files
fix(oauth): auto-learn IdP audience and persist as resource for token validation (#4404)
* fix(oauth): auto-learn IdP audience and persist as resource for token validation OAuth token audience validation fails for IdPs (ServiceNow, Authentik, etc.) that do not honor RFC 8707 and set the aud claim to an abstract identifier (e.g. client_id) rather than the resource URL sent in the authorization request. RFC 8707 Section 2 explicitly allows this: the AS may map the resource value to a different audience identifier. After a successful OAuth callback, extract the aud claim from the access token (best-effort, no signature verification) inside oauth_manager and return it as token_aud. Persist it as resource in the gateway's oauth_config. On subsequent flows, use the persisted resource as-is instead of re-deriving from gateway.url. Update _validate_audience to accept both resource (string or list) and gateway_url via set intersection. Closes #4384 Related: #4171 Signed-off-by: Madhav Kandukuri <madhav165@gmail.com> * update .secrets.baseline Signed-off-by: Madhav Kandukuri <madhav165@gmail.com> * fix(oauth): correct doctest quote style for blocking_errors example Signed-off-by: Madhav Kandukuri <madhav165@gmail.com> * Update secrets Signed-off-by: Brian Hussey <brian.hussey@ie.ibm.com> --------- Signed-off-by: Madhav Kandukuri <madhav165@gmail.com> Signed-off-by: Brian Hussey <brian.hussey@ie.ibm.com> Co-authored-by: Brian Hussey <brian.hussey@ie.ibm.com>
1 parent 9938c58 commit 8c87ff3

8 files changed

Lines changed: 234 additions & 73 deletions

File tree

.secrets.baseline

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"files": "(?x)( package-lock\\.json$ |Cargo\\.lock$ |uv\\.lock$ |go\\.sum$ |mcpgateway/sri_hashes\\.json$ )|^.secrets.baseline$",
44
"lines": null
55
},
6-
"generated_at": "2026-04-27T12:40:08Z",
6+
"generated_at": "2026-04-27T14:14:15Z",
77
"plugins_used": [
88
{
99
"name": "AWSKeyDetector"
@@ -5000,7 +5000,7 @@
50005000
"hashed_secret": "d3ecb0d890368d7659ee54010045b835dacb8efe",
50015001
"is_secret": false,
50025002
"is_verified": false,
5003-
"line_number": 625,
5003+
"line_number": 643,
50045004
"type": "Secret Keyword",
50055005
"verified_result": null
50065006
}
@@ -7290,7 +7290,7 @@
72907290
"hashed_secret": "72cb70dbbafe97e5ea13ad88acd65d08389439b0",
72917291
"is_secret": false,
72927292
"is_verified": false,
7293-
"line_number": 137,
7293+
"line_number": 229,
72947294
"type": "Secret Keyword",
72957295
"verified_result": null
72967296
}

mcpgateway/routers/oauth_router.py

Lines changed: 51 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,42 @@ def _normalize_resource_url(url: str | None, *, preserve_query: bool = False) ->
7373
return normalized
7474

7575

76+
async def _persist_learned_audience(gateway: Gateway, oauth_result: Dict[str, Any], db: Session) -> None:
77+
"""Learn the IdP's audience identifier from the token and persist it.
78+
79+
Many IdPs (ServiceNow, Authentik, etc.) do not honor RFC 8707 and set the
80+
``aud`` claim to an abstract identifier (often the ``client_id``) rather than
81+
the ``resource`` URL sent in the authorization request. By persisting the
82+
actual ``aud`` value as ``resource`` in the gateway's ``oauth_config``, we
83+
ensure that subsequent token validation in ``_validate_audience`` succeeds
84+
and that future OAuth requests use the IdP's preferred audience identifier.
85+
86+
This is a best-effort operation: opaque tokens and missing aud claims are
87+
silently ignored.
88+
89+
Args:
90+
gateway: The gateway ORM object (will be mutated and flushed).
91+
oauth_result: The result dict from ``complete_authorization_code_flow``,
92+
expected to contain ``token_aud``.
93+
db: Active database session.
94+
"""
95+
token_aud = oauth_result.get("token_aud")
96+
if token_aud is None:
97+
return
98+
99+
# Store aud as-is (string or list) -- RFC 7519 allows both forms.
100+
current_resource = (gateway.oauth_config or {}).get("resource")
101+
if current_resource == token_aud:
102+
return # Already correct
103+
104+
# Persist the learned audience as resource
105+
updated_config = dict(gateway.oauth_config) if gateway.oauth_config else {}
106+
updated_config["resource"] = token_aud
107+
gateway.oauth_config = updated_config
108+
db.flush()
109+
logger.debug("Learned OAuth audience from IdP token for gateway %s; persisted as resource", gateway.name)
110+
111+
76112
oauth_router = APIRouter(prefix="/oauth", tags=["oauth"])
77113

78114

@@ -298,22 +334,10 @@ async def initiate_oauth_flow(
298334

299335
oauth_config = gateway.oauth_config.copy() # Work with a copy to avoid mutating the original
300336

301-
# RFC 8707: Set resource parameter for JWT access tokens
302-
# Respect pre-configured resource (e.g., for providers requiring pre-registered resources)
303-
# Only derive from gateway.url if not explicitly configured
304-
if oauth_config.get("resource"):
305-
# Normalize existing resource - preserve query for explicit config (RFC 8707 allows when necessary)
306-
existing = oauth_config["resource"]
307-
if isinstance(existing, list):
308-
original_count = len(existing)
309-
normalized = [_normalize_resource_url(r, preserve_query=True) for r in existing]
310-
oauth_config["resource"] = [r for r in normalized if r]
311-
if not oauth_config["resource"] and original_count > 0:
312-
logger.warning(f"All {original_count} configured resource values were invalid and removed")
313-
else:
314-
oauth_config["resource"] = _normalize_resource_url(existing, preserve_query=True)
315-
else:
316-
# Default to gateway.url as the resource (strip query per RFC 8707 SHOULD NOT)
337+
# RFC 8707: Set resource parameter for JWT access tokens.
338+
# If resource was previously learned from the IdP's token aud claim, use it as-is.
339+
# Otherwise derive from gateway.url for the first authorization request.
340+
if not oauth_config.get("resource"):
317341
oauth_config["resource"] = _normalize_resource_url(gateway.url)
318342

319343
# Phase 1.4: Auto-trigger DCR if credentials are missing
@@ -541,30 +565,24 @@ def _invalid_state_response() -> HTMLResponse:
541565

542566
# Complete OAuth flow
543567

544-
# RFC 8707: Add resource parameter for JWT access tokens
545-
# Must be set here in callback, not just in /authorize, because complete_authorization_code_flow
546-
# needs it for the token exchange request
547-
# Respect pre-configured resource; only derive from gateway.url if not explicitly configured
568+
# RFC 8707: Set resource parameter for the token exchange request.
569+
# If resource was previously learned from the IdP's token aud claim, use it as-is.
570+
# Otherwise derive from gateway.url for the first authorization request.
548571
oauth_config_with_resource = gateway.oauth_config.copy()
549-
if oauth_config_with_resource.get("resource"):
550-
# Preserve query for explicit config (RFC 8707 allows when necessary)
551-
existing = oauth_config_with_resource["resource"]
552-
if isinstance(existing, list):
553-
original_count = len(existing)
554-
normalized = [_normalize_resource_url(r, preserve_query=True) for r in existing]
555-
oauth_config_with_resource["resource"] = [r for r in normalized if r]
556-
if not oauth_config_with_resource["resource"] and original_count > 0:
557-
logger.warning(f"All {original_count} configured resource values were invalid and removed")
558-
else:
559-
oauth_config_with_resource["resource"] = _normalize_resource_url(existing, preserve_query=True)
560-
else:
561-
# Strip query for auto-derived (RFC 8707 SHOULD NOT)
572+
if not oauth_config_with_resource.get("resource"):
562573
oauth_config_with_resource["resource"] = _normalize_resource_url(gateway.url)
563574

564575
result = await oauth_manager.complete_authorization_code_flow(
565576
gateway_id, code, state, oauth_config_with_resource, ca_certificate=gateway.ca_certificate, client_cert=gateway.client_cert, client_key=gateway.client_key
566577
)
567578

579+
# Learn the IdP's audience mapping from the token and persist as resource.
580+
# RFC 8707 Section 2: "The authorization server may use the exact resource value
581+
# as the audience or it may map from that value to a more general URI or abstract
582+
# identifier for the given resource." We persist whatever the IdP chose so that
583+
# subsequent token validation matches.
584+
await _persist_learned_audience(gateway, result, db)
585+
568586
logger.info(f"Completed OAuth flow for gateway {SecurityValidator.sanitize_log_message(gateway_id)}, user {SecurityValidator.sanitize_log_message(str(result.get('user_id')))}")
569587

570588
# Return success page with option to return to admin

mcpgateway/services/oauth_manager.py

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -777,6 +777,10 @@ async def complete_authorization_code_flow(
777777
# Extract user information from token response
778778
user_id = self._extract_user_id(token_response, credentials)
779779

780+
# Extract audience from token (best-effort) for caller to persist as resource.
781+
# This enables audience learning for IdPs that map resource to a different aud.
782+
token_aud = self._extract_token_audience(token_response.get("access_token", ""))
783+
780784
# Store tokens if storage service is available
781785
if self.token_storage:
782786
token_record = await self.token_storage.store_tokens(
@@ -789,8 +793,8 @@ async def complete_authorization_code_flow(
789793
scopes=token_response.get("scope", "").split(),
790794
)
791795

792-
return {"success": True, "user_id": user_id, "expires_at": token_record.expires_at.isoformat() if token_record.expires_at else None}
793-
return {"success": True, "user_id": user_id, "expires_at": None}
796+
return {"success": True, "user_id": user_id, "expires_at": token_record.expires_at.isoformat() if token_record.expires_at else None, "token_aud": token_aud}
797+
return {"success": True, "user_id": user_id, "expires_at": None, "token_aud": token_aud}
794798

795799
async def get_access_token_for_user(self, gateway_id: str, app_user_email: str) -> Optional[str]:
796800
"""Get valid access token for a specific user.
@@ -1591,6 +1595,34 @@ def _extract_user_id(self, token_response: Dict[str, Any], credentials: Dict[str
15911595
# Final fallback
15921596
return "unknown_user"
15931597

1598+
@staticmethod
1599+
def _extract_token_audience(access_token: str) -> Any:
1600+
"""Extract the ``aud`` claim from a JWT access token (best-effort).
1601+
1602+
Returns the raw ``aud`` value (string or list) or ``None`` for opaque
1603+
tokens or decode failures. No signature verification is performed.
1604+
1605+
Args:
1606+
access_token: The raw access token string.
1607+
1608+
Returns:
1609+
The ``aud`` claim value, or None.
1610+
"""
1611+
if not access_token:
1612+
return None
1613+
try:
1614+
# Third-Party
1615+
import jwt as pyjwt # pylint: disable=import-outside-toplevel
1616+
1617+
claims = pyjwt.decode(
1618+
access_token,
1619+
options={"verify_signature": False, "verify_aud": False, "verify_iss": False, "verify_exp": False},
1620+
algorithms=["RS256", "RS384", "RS512", "ES256", "ES384", "ES512", "PS256", "PS384", "PS512", "HS256", "HS384", "HS512", "EdDSA"],
1621+
)
1622+
return claims.get("aud")
1623+
except Exception: # noqa: BLE001
1624+
return None
1625+
15941626

15951627
class OAuthError(Exception):
15961628
"""OAuth-related errors.

mcpgateway/services/token_validation_service.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -73,9 +73,9 @@ def blocking_errors(self) -> List[str]:
7373
>>> r.blocking_errors
7474
[]
7575
>>> r.audience_match = False
76-
>>> r.warnings.append("Token audience mismatch: token aud=[api://wrong], expected 'api://correct'")
76+
>>> r.warnings.append("Token audience mismatch: token aud does not match expected resource or gateway URL")
7777
>>> r.blocking_errors
78-
["Token audience mismatch: token aud=[api://wrong], expected 'api://correct'"]
78+
['Token audience mismatch: token aud does not match expected resource or gateway URL']
7979
"""
8080
if not self.warnings:
8181
return []
@@ -152,23 +152,24 @@ def _validate_audience(claims: Dict[str, Any], oauth_config: Dict[str, Any], gat
152152
gateway_name: Gateway name for log messages.
153153
result: Validation result to update in-place.
154154
"""
155-
expected_audience = oauth_config.get("resource") or gateway_url
156-
if not expected_audience:
155+
expected = oauth_config.get("resource") or gateway_url
156+
if not expected:
157157
return
158158

159159
token_aud = claims.get("aud")
160160
if token_aud is None:
161161
logger.debug("OAuth token for gateway %s has no 'aud' claim", gateway_name)
162162
return
163163

164+
# Normalize both sides to lists for a simple membership check.
165+
# Per RFC 7519 Section 4.1.3, aud can be a string or array.
166+
expected_list = expected if isinstance(expected, list) else [expected]
164167
aud_list = token_aud if isinstance(token_aud, list) else [token_aud]
165-
if expected_audience in aud_list:
168+
if any(a in expected_list for a in aud_list):
166169
result.audience_match = True
167170
else:
168171
result.audience_match = False
169-
safe_aud = ", ".join(str(a)[:80] for a in aud_list[:3])
170-
safe_expected = str(expected_audience)[:80]
171-
result.warnings.append(f"Token audience mismatch: token aud=[{safe_aud}], expected '{safe_expected}'")
172+
result.warnings.append("Token audience mismatch: token aud does not match expected resource or gateway URL")
172173

173174

174175
def _validate_scopes(claims: Dict[str, Any], oauth_config: Dict[str, Any], gateway_name: str, result: TokenValidationResult) -> None:

0 commit comments

Comments
 (0)