diff --git a/02-use-cases/role-based-hr-data-agent/.gitignore b/02-use-cases/role-based-hr-data-agent/.gitignore new file mode 100644 index 000000000..825611dfd --- /dev/null +++ b/02-use-cases/role-based-hr-data-agent/.gitignore @@ -0,0 +1,43 @@ +# Python +__pycache__/ +*.pyc +*.pyo +*.pyd +*.egg-info/ +.Python + +# Virtual environments +.venv/ +venv/ +env/ + +# Build / packaging artifacts +dist/ +build/ + +# Test and coverage +.pytest_cache/ +.mypy_cache/ +.coverage +htmlcov/ + +# Credentials and secrets +.env +.env.* +persona_app_clients.json + +# macOS +.DS_Store + +# IDE +.vscode/ +.idea/ + +# Logs +*.log + +# Backup files +*.bak + +# Claude Code workspace instructions +CLAUDE.md diff --git a/02-use-cases/role-based-hr-data-agent/README.md b/02-use-cases/role-based-hr-data-agent/README.md new file mode 100644 index 000000000..5134726c6 --- /dev/null +++ b/02-use-cases/role-based-hr-data-agent/README.md @@ -0,0 +1,247 @@ +# Role-Based HR Data Agent + +> [!IMPORTANT] +> This sample uses synthetic HR data for demonstration purposes only. No real employee data is processed. Review IAM permissions before deploying in production. + +A role-based HR data access agent with automatic **scope-based field redaction** using Amazon Bedrock AgentCore. The agent enforces data access policies based on each caller's OAuth 2.0 scopes — without changing application code. + +**Key capabilities:** +- **AgentCore Runtime** — hosts the Strands Agent; receives user prompts and drives MCP tool calls via the Gateway +- **AgentCore Gateway** — central policy enforcement point; routes every `tools/list` and `tools/call` through interceptors and Cedar +- **Request Interceptor** — decodes JWT and injects tenant context on every `tools/call` +- **Cedar Policy Engine** — Allow/Deny per tool based on OAuth scopes +- **Response Interceptor** — hides tools from `tools/list` and redacts fields on `tools/call` responses +- **Multi-tenant isolation** — tenant resolved from OAuth `client_id`; no custom JWT claims needed +- **Cognito OAuth 2.0** — `client_credentials` with custom scopes per persona + +> **Note:** This sample uses AWS Lambda as the AgentCore Gateway target. + +![Architecture](docs/screenshots/full-architecture.png) + +| # | Step | +|---|---| +| 1 | Application sends a prompt to AgentCore Runtime with an inbound auth token | +| 2 | Runtime obtains a scoped JWT from Cognito (`client_credentials` flow) | +| 3 | Strands Agent sends an MCP request (`tools/list` or `tools/call`) to AgentCore Gateway with the JWT in the header | +| 4 | Gateway forwards the request to the **Request Interceptor Lambda** | +| 5 | Request Interceptor decodes the JWT, injects `tenantId` into tool arguments, and returns the transformed request | +| 6 | Gateway evaluates the **Cedar Policy Engine** — Allow or Deny based on OAuth scopes | +| 7 | Gateway calls the **Lambda target** (HR Data Provider) with the transformed request, using AgentCore Identity for outbound auth | +| 8 | Lambda returns the full (unredacted) response | +| 9 | Gateway passes the response to the **Response Interceptor Lambda** | +| 10 | Response Interceptor applies field-level redaction and filters tool discovery by scope; transformed response returned to the Runtime | + +## Demo + +| HR Manager — full access | Employee — all sensitive fields redacted | +|:---:|:---:| +| ![HR Manager](docs/screenshots/hr-manager.png) | ![Employee](docs/screenshots/employee.png) | + +> Same query, same agent, different OAuth scopes — field redaction applied automatically by the Response Interceptor. + +> See [per-persona request flow](docs/diagrams/flow.md) for a detailed sequence diagram with per-persona field redaction steps. + +## Reference + +### Scope → Field Mapping + +The Lambda target returns full unredacted records for every caller. The Response Interceptor applies field-level redaction based on the caller's OAuth scopes — ensuring sensitive fields never reach the agent or the user unless the persona has explicit permission. This mapping is defined in `_redact_employee()` in [`prerequisite/lambda/interceptors/response_interceptor.py`](prerequisite/lambda/interceptors/response_interceptor.py). To extend redaction to other data sources (DynamoDB, RDS, S3), update the field lists in that function — the Gateway interceptor pattern applies identically regardless of what the Lambda target reads from. + +| Scope | Redacted fields | +|---|---| +| `hr-dlp-gateway/pii` | email, phone, personal_phone, emergency_contact | +| `hr-dlp-gateway/address` | address, city, state, zip_code | +| `hr-dlp-gateway/comp` | salary, bonus, stock_options, pay_grade, benefits_value, compensation_history | + +### Persona Access Matrix + +Step 2 (`prereq.sh`) creates a Cognito User Pool with a resource server (`hr-dlp-gateway`) that defines four custom OAuth scopes — `read`, `pii`, `address`, and `comp` — and provisions one app client per persona with a fixed `AllowedOAuthScopes` list. Each persona gets a `client_id` and `client_secret` stored in SSM; the agent fetches a token via `client_credentials` flow using those credentials. The Gateway enforces what tools are visible and what fields are returned based on the scopes present in the token. + +| Persona | Scopes | Tools visible | Salary | Email | Address | +|---|---|---|---|---|---| +| HR Manager | read, pii, address, comp | 3 | Visible | Visible | Visible | +| HR Specialist | read, pii | 2 | `[REDACTED]` | Visible | `[REDACTED]` | +| Employee | read | 1 | `[REDACTED]` | `[REDACTED]` | `[REDACTED]` | +| Admin | read, pii, address, comp | 3 | Visible | Visible | Visible | + +## Prerequisites + +- AWS account with Amazon Bedrock AgentCore access (us-east-1) +- **Claude Haiku 4.5** enabled via cross-region inference (CRIS) in your account +- Python 3.10+ +- AWS CLI configured (`aws configure`) +- [uv](https://docs.astral.sh/uv/) (recommended) or pip + +## Setup + +### Step 1: Clone and install + +```bash +git clone https://github.com/awslabs/agentcore-samples.git +cd agentcore-samples/02-use-cases/role-based-hr-data-agent + +uv sync +``` + +### Step 2: Deploy infrastructure + +Packages Lambda functions and deploys CloudFormation stacks for Lambda, IAM, and Cognito. Stores all resource IDs in SSM under `/app/hrdlp/*`. + +```bash +bash scripts/prereq.sh --region us-east-1 --env dev +``` + +### Step 3: Create the AgentCore Gateway + +Creates the Gateway with JWT authorizer, Lambda target (3 HR tools), and request/response interceptors. The Lambda target **must** be attached before Step 4 — Cedar builds its policy schema from the registered tool names. + +```bash +python scripts/agentcore_gateway.py create --config prerequisite/prereqs_config.yaml +``` + +### Step 4: Create the Cedar Policy Engine + +Attaches the Cedar Policy Engine and creates the three HR authorization policies. Uses a two-phase `update_gateway` approach: Phase A attaches the engine **without interceptors** so Cedar's internal schema initialization call succeeds, then Phase B restores the interceptors once policies are ACTIVE. + +```bash +python scripts/create_cedar_policies.py --region us-east-1 --env dev +``` + +Default mode is `LOG_ONLY`. Switch to enforcement for production: + +```bash +python scripts/create_cedar_policies.py --mode ENFORCE +``` + +### Step 5: Deploy the AgentCore Runtime + +```bash +bash scripts/package_runtime.sh + +BUCKET=$(aws ssm get-parameter --name /app/hrdlp/deploy-bucket --query Parameter.Value --output text) +aws s3 cp dist/runtime.zip s3://${BUCKET}/hr-data-agent/runtime.zip + +python scripts/agentcore_agent_runtime.py create +``` + +### Step 6: Run the Streamlit app + +```bash +streamlit run app.py +``` + +Open http://localhost:8501. Select a persona, click **Get OAuth Token**, then ask a question such as *"Show me John Smith's compensation"*. Switch personas to see field redaction applied automatically. + +## Testing + +> **Note:** Cedar defaults to `LOG_ONLY` mode — policies log decisions but do not block requests. Tests will pass in either mode; switch to `ENFORCE` only when ready for production. + +### Verify field redaction + +```bash +python test/test_dlp_redaction.py +``` + +Expected output: + +``` +Testing persona: hr-manager → PASS (salary visible, email visible) +Testing persona: hr-specialist → PASS (salary redacted, email visible) +Testing persona: employee → PASS (salary redacted, email redacted) +Testing persona: admin → PASS (salary visible, email visible) +``` + +### Test the full agent + +```bash +python test/test_agent.py --persona hr-manager --prompt "Show me John Smith's compensation" +python test/test_agent.py --persona employee --prompt "Show me John Smith's compensation" +``` + +### Test the Gateway directly + +```bash +python test/test_gateway.py --persona hr-manager --list-tools +python test/test_gateway.py --persona employee --list-tools +python test/test_gateway.py --persona hr-specialist --query "Sarah Johnson" +``` + +### View CloudWatch logs + +```bash +ENV=dev +aws logs tail /aws/lambda/hr-data-provider-lambda-${ENV} --since 1h --follow +aws logs tail /aws/lambda/hr-request-interceptor-lambda-${ENV} --since 1h --follow +aws logs tail /aws/lambda/hr-response-interceptor-lambda-${ENV} --since 1h --follow +``` + +## Troubleshooting + +**Cedar `CREATE_FAILED: An internal error occurred during creation`** +Cedar's schema initialization failed — usually the engine is in a corrupted state from a prior failed run. Clean up and redeploy from Step 2: +```bash +bash scripts/cleanup.sh && bash scripts/prereq.sh --region us-east-1 --env dev +``` + +**Cedar `CREATE_FAILED: unable to find at offset 0`** +No Lambda target is registered. Complete Step 3 before running Step 4. +```bash +python scripts/agentcore_gateway.py create --config prerequisite/prereqs_config.yaml +``` + +**Runtime `CREATE_FAILED` — ARM64 binary incompatibility** +macOS packaging pulled darwin binaries. Delete the old zip and repackage: +```bash +rm -f dist/runtime.zip && bash scripts/package_runtime.sh +``` + +**SSM parameters missing when running the app** +Complete Steps 2–5 first. Verify all parameters are present: +```bash +aws ssm get-parameters-by-path --path /app/hrdlp --recursive --query "Parameters[].Name" --output text +``` + +**Runtime returns 403 after update** +`update-agent-runtime` resets fields not explicitly passed. Always run the full update: +```bash +python scripts/agentcore_agent_runtime.py update +``` + +## Project Structure + +``` +role-based-hr-data-agent/ +├── agent_config/ # HRDataAgent — Strands + MCP/JSON-RPC +├── app_modules/ # Streamlit UI (auth, chat, persona selector) +├── docs/ +│ ├── screenshots/ # Demo screenshots + full architecture diagram +│ └── diagrams/ # Per-persona request flow (flow.md) +├── scripts/ # Deployment CLI (gateway, runtime, Cedar, Cognito) +├── prerequisite/ +│ ├── lambda/ # HR Data Provider + Request/Response Interceptors +│ ├── cedar/ # Cedar authorization policies +│ ├── infrastructure.yaml +│ └── cognito.yaml +├── test/ # Gateway, agent, and field redaction tests +├── app.py # Streamlit entry point +├── main.py # AgentCore Runtime entry point +└── requirements.txt +``` + +## Cleanup + +```bash +bash scripts/cleanup.sh --region us-east-1 --env dev +``` + +## Contributing + +We welcome contributions! See [Contributing Guidelines](../../CONTRIBUTING.md) for details. + +## License + +MIT License — see [LICENSE](../../LICENSE). + +## Support + +Report issues via [GitHub Issues](https://github.com/awslabs/agentcore-samples/issues). diff --git a/02-use-cases/role-based-hr-data-agent/__init__.py b/02-use-cases/role-based-hr-data-agent/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/02-use-cases/role-based-hr-data-agent/agent_config/__init__.py b/02-use-cases/role-based-hr-data-agent/agent_config/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/02-use-cases/role-based-hr-data-agent/agent_config/access_token.py b/02-use-cases/role-based-hr-data-agent/agent_config/access_token.py new file mode 100644 index 000000000..1ba1796f7 --- /dev/null +++ b/02-use-cases/role-based-hr-data-agent/agent_config/access_token.py @@ -0,0 +1,57 @@ +""" +M2M (machine-to-machine) access token acquisition for AgentCore Gateway. + +Fetches a Cognito client_credentials token and caches it for reuse. +""" + +import logging +import time +from typing import Optional + +import requests + +from agent_config.utils import get_ssm_parameter + +logger = logging.getLogger(__name__) + +_cached_token: Optional[str] = None +_token_expiry: float = 0.0 + + +def get_gateway_access_token(client_id: Optional[str] = None, client_secret: Optional[str] = None) -> Optional[str]: + """ + Return a valid Cognito client_credentials access token. + + Credentials are read from SSM if not provided directly: + /app/hrdlp/cognito-client-id + /app/hrdlp/cognito-client-secret + /app/hrdlp/cognito-token-url + """ + global _cached_token, _token_expiry + + if _cached_token and time.time() < _token_expiry - 60: + return _cached_token + + client_id = client_id or get_ssm_parameter("/app/hrdlp/cognito-client-id") + client_secret = client_secret or get_ssm_parameter("/app/hrdlp/cognito-client-secret") + token_url = get_ssm_parameter("/app/hrdlp/cognito-token-url") + + if not all([client_id, client_secret, token_url]): + logger.error("Missing Cognito credentials in SSM") + return None + + try: + response = requests.post( + token_url, + data={"grant_type": "client_credentials"}, + auth=(client_id, client_secret), + timeout=10, + ) + response.raise_for_status() + data = response.json() + _cached_token = data["access_token"] + _token_expiry = time.time() + data.get("expires_in", 3600) + return _cached_token + except Exception as e: + logger.error(f"Failed to acquire access token: {e}") + return None diff --git a/02-use-cases/role-based-hr-data-agent/agent_config/agent.py b/02-use-cases/role-based-hr-data-agent/agent_config/agent.py new file mode 100644 index 000000000..cf54e7a3e --- /dev/null +++ b/02-use-cases/role-based-hr-data-agent/agent_config/agent.py @@ -0,0 +1,215 @@ +""" +HRDataAgent — Strands-based agent that connects to AgentCore Gateway via MCP/JSON-RPC. + +Discovers tools dynamically from the Gateway (filtered per OAuth scope) and +invokes them over HTTP JSON-RPC. All field-level DLP redaction is applied +transparently by the Gateway Response Interceptor before data reaches here. +""" + +from __future__ import annotations + +import asyncio +import logging +import re +import uuid +from typing import Any, AsyncGenerator, Dict, List, Optional + +import httpx +from strands import Agent +from strands.models import BedrockModel +from strands.types.tools import AgentTool, ToolResult, ToolUse +from strands.types._events import ToolResultEvent + +logger = logging.getLogger(__name__) + +MODEL_ID = "us.anthropic.claude-haiku-4-5-20251001-v1:0" + +_SAFE_NAME = re.compile(r"[^A-Za-z0-9_-]+") + + +def _safe_tool_name(name: str) -> str: + safe = _SAFE_NAME.sub("_", name).strip("_") + return safe or "tool" + + +def _normalize_input_schema(tool_schema: Dict[str, Any]) -> Dict[str, Any]: + schema = tool_schema.get("inputSchema") or tool_schema.get("input_schema") or {} + if isinstance(schema, dict) and "json" in schema and isinstance(schema["json"], dict): + return schema + if isinstance(schema, dict): + return {"json": schema} + return {"json": {"type": "object", "properties": {}}} + + +async def _call_gateway_jsonrpc( + gateway_url: str, + access_token: str, + method: str, + params: Optional[dict] = None, +) -> Any: + async with httpx.AsyncClient(timeout=30.0) as client: + payload = { + "jsonrpc": "2.0", + "id": uuid.uuid4().hex, + "method": method, + "params": params or {}, + } + resp = await client.post( + gateway_url, + json=payload, + headers={ + "Content-Type": "application/json", + "Accept": "application/json", + "Authorization": f"Bearer {access_token}", + }, + ) + resp.raise_for_status() + body = resp.json() + if body.get("error") is not None: + raise RuntimeError(f"Gateway error: {body['error']}") + return body.get("result") + + +async def _list_tools(gateway_url: str, access_token: str) -> List[Dict[str, Any]]: + result = await _call_gateway_jsonrpc(gateway_url, access_token, "tools/list", {}) + if result is None: + return [] + if isinstance(result, list): + return result + if isinstance(result, dict) and isinstance(result.get("tools"), list): + return result["tools"] + return [] + + +class _HTTPGatewayTool(AgentTool): + """Wraps a single MCP tool from the Gateway as a Strands AgentTool.""" + + def __init__( + self, + tool_schema: Dict[str, Any], + gateway_url: str, + access_token: str, + name_map: Dict[str, str], + ): + original_name = tool_schema.get("name") + if not original_name: + raise ValueError(f"Tool schema missing 'name': {tool_schema}") + + self._original_name = original_name + self._name = _safe_tool_name(original_name) + self._description = tool_schema.get("description", "") + self._input_schema = _normalize_input_schema(tool_schema) + self._gateway_url = gateway_url + self._access_token = access_token + name_map[self._name] = self._original_name + super().__init__() + + @property + def tool_name(self) -> str: + return self._name + + @property + def tool_spec(self) -> Dict[str, Any]: + return { + "name": self._name, + "description": self._description, + "inputSchema": self._input_schema, + } + + @property + def tool_type(self) -> str: + return "agentcore_gateway_http_jsonrpc" + + async def stream( + self, + tool_use: ToolUse, + invocation_state: Dict[str, Any], + **kwargs, + ) -> AsyncGenerator[ToolResultEvent, None]: + tool_input = tool_use.get("input", {}) + params = {"name": self._original_name, "arguments": tool_input} + result = await _call_gateway_jsonrpc( + self._gateway_url, self._access_token, "tools/call", params + ) + + result_text = "" + if isinstance(result, dict): + content = result.get("content", []) + if isinstance(content, list): + parts = [ + item.get("text", "") + for item in content + if isinstance(item, dict) and item.get("type") == "text" + ] + result_text = "\n".join(p for p in parts if p) or str(result) + else: + result_text = str(result) + else: + result_text = str(result) + + tool_result: ToolResult = { + "toolUseId": tool_use["toolUseId"], + "status": "success", + "content": [{"text": result_text}], + } + yield ToolResultEvent(tool_result) + + +SYSTEM_PROMPT = """You are a secure HR Assistant with role-based data access control. + +You help users access HR information through the Amazon Bedrock AgentCore Gateway. +The Gateway enforces OAuth scope-based authorization and applies field-level DLP +redaction automatically — you receive data that is already correctly filtered for +the current user's role. + +IMPORTANT RULES: +- Always call tools FIRST before responding. Never fabricate data. +- Present tool responses directly. Do not invent placeholder values. +- If a field contains [REDACTED - Insufficient Permissions], display it exactly. +- Never assume parameter values. Ask the user if required information is missing. +- Only explain redaction AFTER presenting data, and only if the user asks. + +Available tools (shown based on your OAuth scopes): +- search_employee: Search employees by name, department, or role +- get_employee_profile: Get detailed employee profile (PII/address may be redacted) +- get_employee_compensation: Get salary and compensation data (requires comp scope) + +Role capabilities: +- HR Manager / Admin: Full access — all fields visible +- HR Specialist: Profiles + PII visible; compensation and address redacted +- Employee: Search only — all PII, address, and compensation redacted +""" + + +class HRDataAgent: + """ + Strands agent wired to AgentCore Gateway via JSON-RPC. + + Discovers tools dynamically on each invocation so tool visibility + always reflects the caller's current OAuth scopes. + """ + + def __init__(self, gateway_url: str, access_token: str): + self.gateway_url = gateway_url + self.access_token = access_token + + async def process(self, user_prompt: str) -> Dict[str, Any]: + model = BedrockModel(model_id=MODEL_ID, temperature=0.0, streaming=False) + + tool_schemas = await _list_tools(self.gateway_url, self.access_token) + logger.info(f"Loaded {len(tool_schemas)} tools from Gateway") + + name_map: Dict[str, str] = {} + tools = [ + _HTTPGatewayTool(schema, self.gateway_url, self.access_token, name_map) + for schema in tool_schemas + ] + + agent = Agent(model=model, system_prompt=SYSTEM_PROMPT, tools=tools) + result = await asyncio.to_thread(agent, user_prompt) + + return { + "result": result.message, + "model": MODEL_ID, + "tool_count": len(tools), + } diff --git a/02-use-cases/role-based-hr-data-agent/agent_config/agent_task.py b/02-use-cases/role-based-hr-data-agent/agent_config/agent_task.py new file mode 100644 index 000000000..2ddd38a43 --- /dev/null +++ b/02-use-cases/role-based-hr-data-agent/agent_config/agent_task.py @@ -0,0 +1,56 @@ +""" +Async agent workflow orchestration. + +Wires together: context setup → token extraction → agent invocation → response. +""" + +from __future__ import annotations + +import logging +from typing import Any, Dict, Optional + +from agent_config.agent import HRDataAgent + +logger = logging.getLogger(__name__) + + +def _get_auth_header(context: Any) -> Optional[str]: + if hasattr(context, "request_headers") and isinstance(context.request_headers, dict): + headers = context.request_headers + return headers.get("Authorization") or headers.get("authorization") + return None + + +def _strip_bearer(header: str) -> str: + return header.replace("Bearer ", "").replace("bearer ", "").strip() + + +async def run_agent_task( + payload: Dict[str, Any], + context: Any, + gateway_url: str, + session_id: str, +) -> Dict[str, Any]: + """ + Main async workflow: + 1. Extract OAuth token from request headers + 2. Create/retrieve AgentContext for this session + 3. Instantiate HRDataAgent and invoke with user prompt + 4. Return structured response + """ + user_prompt = payload.get("prompt") or "How can I help you today?" + logger.info(f"[agent_task] session={session_id} prompt={user_prompt[:80]!r}") + + # Extract access token from incoming request (pass-through from caller) + auth_header = _get_auth_header(context) + if not auth_header: + return {"error": "Missing Authorization header"} + access_token = _strip_bearer(auth_header) + + try: + agent = HRDataAgent(gateway_url=gateway_url, access_token=access_token) + result = await agent.process(user_prompt) + return result + except Exception as e: + logger.exception(f"[agent_task] failure: {e}") + return {"error": str(e)} diff --git a/02-use-cases/role-based-hr-data-agent/agent_config/tools/__init__.py b/02-use-cases/role-based-hr-data-agent/agent_config/tools/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/02-use-cases/role-based-hr-data-agent/agent_config/utils.py b/02-use-cases/role-based-hr-data-agent/agent_config/utils.py new file mode 100644 index 000000000..e2b29de9d --- /dev/null +++ b/02-use-cases/role-based-hr-data-agent/agent_config/utils.py @@ -0,0 +1,32 @@ +""" +Utility helpers for agent_config. +""" + +import json +import logging +from typing import Any, Optional + +import boto3 +import yaml +from botocore.exceptions import ClientError + +logger = logging.getLogger(__name__) + + +def get_ssm_parameter(name: str, decrypt: bool = True) -> Optional[str]: + """Retrieve a parameter from AWS SSM Parameter Store.""" + try: + client = boto3.client("ssm") + response = client.get_parameter(Name=name, WithDecryption=decrypt) + return response["Parameter"]["Value"] + except ClientError as e: + logger.warning(f"SSM parameter not found: {name} — {e}") + return None + + +def read_config(path: str) -> dict: + """Read a JSON or YAML config file and return as dict.""" + with open(path, "r", encoding="utf-8") as f: + if path.endswith(".yaml") or path.endswith(".yml"): + return yaml.safe_load(f) + return json.load(f) diff --git a/02-use-cases/role-based-hr-data-agent/app.py b/02-use-cases/role-based-hr-data-agent/app.py new file mode 100644 index 000000000..125a88fdf --- /dev/null +++ b/02-use-cases/role-based-hr-data-agent/app.py @@ -0,0 +1,538 @@ +#!/usr/bin/env python3 +""" +HR DLP Demo — Streamlit frontend. + +All configuration is read dynamically from SSM Parameter Store on startup: + /app/hrdlp/runtime-url — AgentCore Runtime invocation URL + /app/hrdlp/gateway-url — AgentCore Gateway MCP endpoint + /app/hrdlp/cognito-token-url — Cognito OAuth2 token endpoint + /app/hrdlp/personas/*/client-id — Per-persona Cognito app client ID + /app/hrdlp/personas/*/client-secret — Per-persona client secret (SecureString) + +Usage: + streamlit run app.py +""" + +import base64 +import json +import os +from datetime import datetime +from typing import Optional + +import boto3 +import requests +import streamlit as st + +# --------------------------------------------------------------------------- +# SSM helpers +# --------------------------------------------------------------------------- + +@st.cache_resource(show_spinner=False) +def _ssm_client(): + region = os.getenv("AWS_REGION", os.getenv("AWS_DEFAULT_REGION", "us-east-1")) + return boto3.client("ssm", region_name=region) + + +def _get_param(name: str, secure: bool = False) -> Optional[str]: + try: + resp = _ssm_client().get_parameter(Name=name, WithDecryption=secure) + return resp["Parameter"]["Value"] + except Exception: + return None + + +# --------------------------------------------------------------------------- +# Config — loaded once per session +# --------------------------------------------------------------------------- + +@st.cache_resource(show_spinner="Loading configuration from SSM…") +def load_config() -> dict: + runtime_url = _get_param("/app/hrdlp/runtime-url") + gateway_url = _get_param("/app/hrdlp/gateway-url") + token_url = _get_param("/app/hrdlp/cognito-token-url") + + personas = {} + for persona in ["hr-manager", "hr-specialist", "employee", "admin"]: + client_id = _get_param(f"/app/hrdlp/personas/{persona}/client-id") + client_secret = _get_param(f"/app/hrdlp/personas/{persona}/client-secret", secure=True) + if client_id and client_secret: + personas[persona] = {"client_id": client_id, "client_secret": client_secret} + + missing = [] + if not runtime_url: missing.append("/app/hrdlp/runtime-url") + if not gateway_url: missing.append("/app/hrdlp/gateway-url") + if not token_url: missing.append("/app/hrdlp/cognito-token-url") + if not personas: missing.append("/app/hrdlp/personas/*/client-id and client-secret") + + return { + "runtime_url": runtime_url, + "gateway_url": gateway_url, + "token_url": token_url, + "personas": personas, + "missing": missing, + } + + +# --------------------------------------------------------------------------- +# Persona display definitions +# --------------------------------------------------------------------------- + +PERSONAS = { + "HR Manager": { + "key": "hr-manager", + "icon": "👔", + "description": "Full access — compensation, PII, and address visible", + "scopes": ["read", "pii", "address", "comp"], + "color": "#1f77b4", + "expected_tools": 3, + }, + "HR Specialist": { + "key": "hr-specialist", + "icon": "👨‍💼", + "description": "Profiles + PII; compensation and address redacted", + "scopes": ["read", "pii"], + "color": "#ff7f0e", + "expected_tools": 2, + }, + "Employee": { + "key": "employee", + "icon": "👤", + "description": "Search only; all sensitive fields redacted", + "scopes": ["read"], + "color": "#2ca02c", + "expected_tools": 1, + }, + "Admin": { + "key": "admin", + "icon": "🛡️", + "description": "Full administrative access", + "scopes": ["read", "pii", "address", "comp"], + "color": "#9467bd", + "expected_tools": 3, + }, +} + +SUGGESTED_QUERIES = [ + "What can you help me with?", + "Find all software engineers", + "Show me Sarah Johnson's profile", + "What is John Smith's compensation?", + "Search for HR department employees", +] + +# --------------------------------------------------------------------------- +# Auth helpers +# --------------------------------------------------------------------------- + +def get_token(config: dict, persona_key: str) -> Optional[str]: + """Obtain a client_credentials access token for the given persona.""" + creds = config["personas"].get(persona_key) + if not creds: + add_log(f"No credentials found in SSM for persona: {persona_key}", "error", "Cognito") + return None + add_log(f"POST {config['token_url']} (grant_type=client_credentials)", "info", "Cognito") + encoded = base64.b64encode(f"{creds['client_id']}:{creds['client_secret']}".encode()).decode() + try: + resp = requests.post( + config["token_url"], + headers={"Content-Type": "application/x-www-form-urlencoded", + "Authorization": f"Basic {encoded}"}, + data={"grant_type": "client_credentials"}, + timeout=10, + ) + resp.raise_for_status() + token_data = resp.json() + expires = token_data.get("expires_in", "?") + scopes = token_data.get("scope", "") + add_log(f"Token issued (expires {expires}s) | scopes: {scopes}", "success", "Cognito") + return token_data.get("access_token") + except Exception as e: + add_log(f"Token request failed: {e}", "error", "Cognito") + st.error(f"Token request failed: {e}") + return None + + +# --------------------------------------------------------------------------- +# Runtime / Gateway calls +# --------------------------------------------------------------------------- + +def call_runtime(config: dict, token: str, prompt: str, session_id: str = "") -> tuple[list, Optional[str]]: + """POST to AgentCore Runtime and return (raw_chunks, final_text).""" + add_log(f"POST {config['runtime_url'].split('/runtimes/')[0]}/runtimes/…/invocations", "info", "Runtime") + add_log(f"Session: {session_id[:16]}…", "info", "Runtime") + + headers = {"Content-Type": "application/json", "Authorization": f"Bearer {token}"} + chunks, llm_response = [], None + try: + resp = requests.post( + config["runtime_url"], + headers=headers, + json={"prompt": prompt, "sessionId": session_id or str(id(prompt))}, + stream=True, + timeout=120, + ) + if resp.status_code != 200: + add_log(f"HTTP {resp.status_code}: {resp.text[:120]}", "error", "Runtime") + st.error(f"Runtime returned HTTP {resp.status_code}: {resp.text[:200]}") + return [], None + + add_log(f"HTTP 200 — streaming response…", "success", "Runtime") + + for line in resp.iter_lines(): + if not line: + continue + decoded = line.decode("utf-8") + if decoded.startswith("data: "): + decoded = decoded[6:] + chunks.append(decoded) + try: + data = json.loads(decoded) + + # Error returned as JSON body (e.g. missing sessionId) + if "error" in data and "result" not in data: + add_log(f"Runtime error: {data['error']}", "error", "Runtime") + st.error(f"Runtime error: {data['error']}") + return [], None + + if "result" in data: + result = data["result"] + model = data.get("model", "") + tool_count = data.get("tool_count", "?") + add_log(f"Tools used: {tool_count} | Model: {model.split('/')[-1] if model else 'unknown'}", "info", "Runtime") + if isinstance(result, dict) and "content" in result: + content = result["content"] + llm_response = content[0].get("text", str(result)) if content else str(result) + else: + llm_response = str(result) + add_log(f"Response received ({len(llm_response)} chars)", "success", "Runtime") + + elif data.get("type") == "response": + llm_response = data.get("message", "") + add_log(f"Response received ({len(llm_response)} chars)", "success", "Runtime") + + elif data.get("type") == "status": + add_log(data.get("message", ""), "info", "Runtime") + + elif data.get("type") == "tools_discovered": + tools = data.get("tools", []) + add_log(f"Tools discovered: {len(tools)}", "success", "Gateway") + for t in tools: + add_log(f" - {t}", "info", "Gateway") + + elif data.get("type") == "tool_result": + add_log(data.get("message", "Tool call completed"), "success", "Lambda") + + elif data.get("type") == "error": + add_log(data.get("message", "Unknown error"), "error", "Runtime") + + except json.JSONDecodeError: + pass + + except requests.Timeout: + add_log("Request timed out after 120s", "error", "Runtime") + st.error("Request timed out — the agent may still be processing.") + except Exception as e: + add_log(f"Exception: {e}", "error", "Runtime") + st.error(f"Runtime error: {e}") + return chunks, llm_response + + +def discover_tools(config: dict, token: str) -> list[str]: + """Call Gateway tools/list and return tool names.""" + add_log("POST tools/list → Gateway", "info", "Gateway") + try: + resp = requests.post( + config["gateway_url"], + headers={"Content-Type": "application/json", "Authorization": f"Bearer {token}"}, + json={"jsonrpc": "2.0", "id": 1, "method": "tools/list", "params": {}}, + timeout=30, + ) + resp.raise_for_status() + tools = resp.json().get("result", {}).get("tools", []) + add_log(f"HTTP 200 — {len(tools)} tool(s) visible to this persona", "success", "Gateway") + for t in tools: + short = t["name"].replace("hr-lambda-target___", "") + add_log(f" ✓ {short}", "info", "Gateway") + return [t["name"] for t in tools] + except Exception as e: + add_log(f"Tool discovery failed: {e}", "error", "Gateway") + st.error(f"Tool discovery failed: {e}") + return [] + + +def call_tool(config: dict, token: str, tool_name: str, arguments: dict): + """Call a specific tool via the Gateway.""" + try: + resp = requests.post( + config["gateway_url"], + headers={"Content-Type": "application/json", "Authorization": f"Bearer {token}"}, + json={"jsonrpc": "2.0", "id": 1, "method": "tools/call", + "params": {"name": tool_name, "arguments": arguments}}, + timeout=30, + ) + resp.raise_for_status() + return resp.json() + except Exception as e: + st.error(f"Tool call failed: {e}") + return None + + +# --------------------------------------------------------------------------- +# Session state helpers +# --------------------------------------------------------------------------- + +def _init_state(): + import uuid + defaults = { + "selected_persona": "HR Manager", + "token": None, + "tools": [], + "logs": [], + "llm_response": None, + "conversation_history": [], + "is_processing": False, + "session_id": str(uuid.uuid4()), + } + for k, v in defaults.items(): + if k not in st.session_state: + st.session_state[k] = v + + +def add_log(message: str, level: str = "info", component: str = ""): + ts = datetime.now().strftime("%H:%M:%S.%f")[:-3] + st.session_state.logs.append({"ts": ts, "msg": message, "level": level, "comp": component}) + + +def _switch_persona(name: str): + import uuid + st.session_state.selected_persona = name + st.session_state.token = None + st.session_state.tools = [] + st.session_state.logs = [] + st.session_state.llm_response = None + st.session_state.conversation_history = [] + st.session_state.session_id = str(uuid.uuid4()) + + +# --------------------------------------------------------------------------- +# Main app +# --------------------------------------------------------------------------- + +st.set_page_config(page_title="HR DLP Demo", page_icon="🔒", layout="wide") +_init_state() + +# Load config (cached — only hits SSM once per process) +config = load_config() + +if config["missing"]: + st.error( + "**Missing SSM parameters** — run the full deployment sequence first:\n\n" + "```\nbash scripts/prereq.sh --region us-east-1 --env dev\n" + "python scripts/agentcore_gateway.py create --config prerequisite/prereqs_config.yaml\n" + "python scripts/create_cedar_policies.py --region us-east-1 --env dev\n" + "python scripts/agentcore_agent_runtime.py create\n```\n\n" + f"Missing: `{'`, `'.join(config['missing'])}`" + ) + st.stop() + +st.title("🔒 HR DLP Gateway — Interactive Demo") +st.caption("Role-based data access with automatic field-level redaction via Amazon Bedrock AgentCore") + +# --------------------------------------------------------------------------- +# Sidebar +# --------------------------------------------------------------------------- +with st.sidebar: + st.header("👥 Persona") + + for display_name, meta in PERSONAS.items(): + is_active = st.session_state.selected_persona == display_name + label = f"{meta['icon']} {display_name}" + if st.button(label, key=f"btn_{display_name}", use_container_width=True, + type="primary" if is_active else "secondary"): + _switch_persona(display_name) + st.rerun() + + st.divider() + p = PERSONAS[st.session_state.selected_persona] + st.markdown(f"### {p['icon']} {st.session_state.selected_persona}") + st.caption(p["description"]) + st.markdown(f"**Scopes:** `{'`, `'.join(p['scopes'])}`") + st.markdown(f"**Expected tools:** {p['expected_tools']}") + + st.divider() + st.header("Actions") + + if st.button("🔑 Get OAuth Token", use_container_width=True): + with st.spinner("Requesting token from Cognito…"): + token = get_token(config, p["key"]) + if token: + st.session_state.token = token + st.session_state.llm_response = None + st.session_state.conversation_history = [] + add_log(f"Token obtained for {st.session_state.selected_persona}", "success", "Cognito") + st.success("Token obtained") + st.rerun() + + if st.button("🔧 Discover Tools", use_container_width=True, + disabled=not st.session_state.token): + with st.spinner("Calling Gateway tools/list…"): + tools = discover_tools(config, st.session_state.token) + st.session_state.tools = tools + add_log(f"Discovered {len(tools)} tools", "success", "Gateway") + for t in tools: + add_log(f" - {t}", "info", "Gateway") + st.rerun() + + if st.button("🗑️ Clear", use_container_width=True): + st.session_state.logs = [] + st.session_state.llm_response = None + st.rerun() + + # Connection info (collapsed) + with st.expander("ℹ️ Connection info"): + st.caption(f"**Runtime:** `{config['runtime_url'][:60]}…`") + st.caption(f"**Gateway:** `{config['gateway_url'][:60]}…`") + st.caption(f"**Token URL:** `{config['token_url'][:60]}…`") + +# --------------------------------------------------------------------------- +# Main area — two columns +# --------------------------------------------------------------------------- +col_chat, col_tools = st.columns([1, 1]) + +# ---- Left column: agent chat ---- +with col_chat: + st.header("💬 Agent Chat") + + if st.session_state.token: + st.success(f"✅ Authenticated as **{st.session_state.selected_persona}**") + else: + st.warning("No token — click **Get OAuth Token** in the sidebar") + + # Suggested queries + st.markdown("**Quick examples:**") + for q in SUGGESTED_QUERIES: + if st.button(q, key=f"quick_{q[:20]}", use_container_width=True, + disabled=not st.session_state.token): + st.session_state.logs = [] + add_log(f"Query: {q}", "info", "Client") + with st.spinner("Processing…"): + _, llm_response = call_runtime(config, st.session_state.token, q, st.session_state.session_id) + if llm_response: + st.session_state.llm_response = llm_response + st.session_state.conversation_history.append({"role": "user", "content": q}) + st.session_state.conversation_history.append({"role": "assistant", "content": llm_response}) + st.rerun() + + st.divider() + + # Custom query + query = st.text_area("Custom query:", value="Show me John Smith's full profile", + height=80, disabled=not st.session_state.token) + if st.button("🚀 Send", use_container_width=True, + disabled=(not st.session_state.token or st.session_state.is_processing)): + st.session_state.is_processing = True + st.session_state.logs = [] + add_log(f"Sending query as {st.session_state.selected_persona}", "info", "Client") + with st.spinner("Processing…"): + _, llm_response = call_runtime(config, st.session_state.token, query, st.session_state.session_id) + if llm_response: + st.session_state.llm_response = llm_response + st.session_state.conversation_history.append({"role": "user", "content": query}) + st.session_state.conversation_history.append({"role": "assistant", "content": llm_response}) + st.session_state.is_processing = False + st.rerun() + + # Response display + if st.session_state.llm_response: + st.divider() + st.subheader("🤖 Agent Response") + st.markdown(st.session_state.llm_response) + if st.button("Clear response"): + st.session_state.llm_response = None + st.session_state.conversation_history = [] + st.rerun() + + # Conversation history + if st.session_state.conversation_history: + with st.expander("💬 Conversation history", expanded=False): + for msg in st.session_state.conversation_history: + prefix = "**You:**" if msg["role"] == "user" else "**Agent:**" + text = msg["content"] + st.markdown(f"{prefix} {text[:300]}{'…' if len(text) > 300 else ''}") + st.markdown("---") + +# ---- Right column: direct tool calling + logs ---- +with col_tools: + st.header("🔧 Direct Tool Calling") + + if not st.session_state.tools: + st.info("Click **Discover Tools** in the sidebar to see what this persona can access.") + else: + tool_labels = { + "hr-lambda-target___search_employee": "Search Employee", + "hr-lambda-target___get_employee_profile": "Get Employee Profile", + "hr-lambda-target___get_employee_compensation": "Get Employee Compensation", + } + available = {k: v for k, v in tool_labels.items() if k in st.session_state.tools} + + if not available: + st.warning("No recognized tools visible for this persona.") + else: + selected_tool = st.selectbox( + "Tool:", options=list(available.keys()), + format_func=lambda x: available[x], + ) + + with st.form(key="tool_form"): + if selected_tool == "hr-lambda-target___search_employee": + search_q = st.text_input("Search query:", value="John") + tenant = st.text_input("Tenant ID:", value="tenant-alpha") + submitted = st.form_submit_button("🚀 Call Tool", use_container_width=True) + if submitted: + result = call_tool(config, st.session_state.token, selected_tool, + {"query": search_q, "tenantId": tenant}) + if result: + st.json(result) + + elif selected_tool == "hr-lambda-target___get_employee_profile": + emp_id = st.text_input("Employee ID:", value="EMP001") + tenant = st.text_input("Tenant ID:", value="tenant-alpha") + inc_pii = st.checkbox("Include PII") + inc_addr = st.checkbox("Include Address") + submitted = st.form_submit_button("🚀 Call Tool", use_container_width=True) + if submitted: + result = call_tool(config, st.session_state.token, selected_tool, + {"employee_id": emp_id, "tenantId": tenant, + "include_pii": inc_pii, "include_address": inc_addr}) + if result: + st.json(result) + + elif selected_tool == "hr-lambda-target___get_employee_compensation": + emp_id = st.text_input("Employee ID:", value="EMP001") + tenant = st.text_input("Tenant ID:", value="tenant-alpha") + submitted = st.form_submit_button("🚀 Call Tool", use_container_width=True) + if submitted: + result = call_tool(config, st.session_state.token, selected_tool, + {"employee_id": emp_id, "tenantId": tenant}) + if result: + st.json(result) + + # Activity log + if st.session_state.logs: + st.divider() + st.subheader("📝 Activity Log") + log_icons = {"error": "🔴", "success": "🟢", "warning": "🟡", "info": "⚪"} + with st.container(height=250): + for entry in st.session_state.logs[-15:]: + icon = log_icons.get(entry["level"], "⚪") + comp = f"[{entry['comp']}] " if entry["comp"] else "" + st.markdown(f"{icon} `{entry['ts']}` {comp}{entry['msg']}") + +# --------------------------------------------------------------------------- +# Footer +# --------------------------------------------------------------------------- +st.divider() +st.markdown( + "**Flow:** Client → Cognito OAuth2 → AgentCore Runtime → AgentCore Gateway " + "→ Request Interceptor → Cedar Policy Engine → HR Lambda → Response Interceptor (DLP) → Response" +) diff --git a/02-use-cases/role-based-hr-data-agent/app_modules/__init__.py b/02-use-cases/role-based-hr-data-agent/app_modules/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/02-use-cases/role-based-hr-data-agent/app_modules/auth.py b/02-use-cases/role-based-hr-data-agent/app_modules/auth.py new file mode 100644 index 000000000..bb97faad4 --- /dev/null +++ b/02-use-cases/role-based-hr-data-agent/app_modules/auth.py @@ -0,0 +1,101 @@ +""" +Cognito OAuth2 PKCE authentication for the Streamlit UI. + +Manages the authorization code flow, token exchange, and cookie-based +token storage so the user stays logged in across Streamlit reruns. +""" + +import base64 +import hashlib +import json +import logging +import os +import secrets +from typing import Optional +from urllib.parse import urlencode + +import requests +import streamlit as st + +from agent_config.utils import get_ssm_parameter + +logger = logging.getLogger(__name__) + + +class AuthManager: + """Handles Cognito PKCE OAuth2 flow for the Streamlit UI.""" + + def __init__(self): + self.region = os.getenv("AWS_REGION", "us-east-1") + self.client_id = get_ssm_parameter("/app/hrdlp/cognito-client-id") or os.getenv("COGNITO_CLIENT_ID", "") + self.token_url = get_ssm_parameter("/app/hrdlp/cognito-token-url") or os.getenv("COGNITO_TOKEN_URL", "") + self.user_pool_id = get_ssm_parameter("/app/hrdlp/cognito-user-pool-id") or "" + # Derive auth URL from token URL + self.auth_url = self.token_url.replace("/oauth2/token", "/oauth2/authorize") if self.token_url else "" + self.redirect_uri = os.getenv("STREAMLIT_REDIRECT_URI", "http://localhost:8501") + self.scopes = "openid email profile hr-dlp-gateway/read hr-dlp-gateway/pii hr-dlp-gateway/address hr-dlp-gateway/comp" + + def get_auth_url(self) -> str: + """Generate the Cognito authorization URL with PKCE code challenge.""" + verifier = secrets.token_urlsafe(64) + challenge = base64.urlsafe_b64encode( + hashlib.sha256(verifier.encode()).digest() + ).rstrip(b"=").decode() + st.session_state["pkce_verifier"] = verifier + + params = { + "response_type": "code", + "client_id": self.client_id, + "redirect_uri": self.redirect_uri, + "scope": self.scopes, + "code_challenge": challenge, + "code_challenge_method": "S256", + } + return f"{self.auth_url}?{urlencode(params)}" + + def exchange_code(self, code: str) -> Optional[dict]: + """Exchange an authorization code for tokens.""" + verifier = st.session_state.get("pkce_verifier", "") + try: + resp = requests.post( + self.token_url, + data={ + "grant_type": "authorization_code", + "client_id": self.client_id, + "code": code, + "redirect_uri": self.redirect_uri, + "code_verifier": verifier, + }, + timeout=15, + ) + resp.raise_for_status() + return resp.json() + except Exception as e: + logger.error(f"Token exchange failed: {e}") + return None + + def decode_token(self, access_token: str) -> dict: + """Decode JWT payload (without verification — display only).""" + try: + payload_b64 = access_token.split(".")[1] + padding = 4 - len(payload_b64) % 4 + payload_bytes = base64.urlsafe_b64decode(payload_b64 + "=" * padding) + return json.loads(payload_bytes.decode()) + except Exception: + return {} + + def store_tokens(self, tokens: dict) -> None: + """Persist tokens in Streamlit session state.""" + st.session_state["access_token"] = tokens.get("access_token") + st.session_state["id_token"] = tokens.get("id_token") + st.session_state["refresh_token"] = tokens.get("refresh_token") + + def get_access_token(self) -> Optional[str]: + return st.session_state.get("access_token") + + def is_authenticated(self) -> bool: + return bool(self.get_access_token()) + + def logout(self) -> None: + for key in ["access_token", "id_token", "refresh_token", "pkce_verifier"]: + st.session_state.pop(key, None) diff --git a/02-use-cases/role-based-hr-data-agent/app_modules/chat.py b/02-use-cases/role-based-hr-data-agent/app_modules/chat.py new file mode 100644 index 000000000..1f2a4d5bc --- /dev/null +++ b/02-use-cases/role-based-hr-data-agent/app_modules/chat.py @@ -0,0 +1,78 @@ +""" +Chat manager — sends messages to the AgentCore Runtime and streams responses. +""" + +import json +import logging +import os +from typing import Optional + +import boto3 +import requests +import streamlit as st + +from agent_config.utils import get_ssm_parameter +from app_modules.utils import make_urls_clickable + +logger = logging.getLogger(__name__) + + +class ChatManager: + """Manages chat interactions with the AgentCore Runtime.""" + + def __init__(self): + self.region = os.getenv("AWS_REGION", "us-east-1") + # runtime-url is the full ARN-encoded invocation URL stored by agentcore_agent_runtime.py + self.runtime_url = get_ssm_parameter("/app/hrdlp/runtime-url") or "" + + def send_message( + self, + message: str, + session_id: str, + access_token: str, + message_placeholder, + ) -> Optional[str]: + """ + POST to AgentCore Runtime and stream the response into message_placeholder. + """ + if not self.runtime_url: + st.error("Runtime URL not configured. Check /app/hrdlp/runtime-id in SSM.") + return None + + session = boto3.session.Session() + credentials = session.get_credentials().get_frozen_credentials() + + payload = {"prompt": message, "sessionId": session_id} + headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {access_token}", + } + + try: + with requests.post( + self.runtime_url, + json=payload, + headers=headers, + stream=True, + timeout=120, + ) as resp: + resp.raise_for_status() + full_response = "" + for chunk in resp.iter_content(chunk_size=None, decode_unicode=True): + if chunk: + full_response += chunk + message_placeholder.markdown( + make_urls_clickable(full_response) + " ▌", + unsafe_allow_html=True, + ) + message_placeholder.markdown( + make_urls_clickable(full_response), unsafe_allow_html=True + ) + return full_response + except requests.Timeout: + st.error("Request timed out. The agent may still be processing.") + except requests.HTTPError as e: + st.error(f"Runtime error: {e.response.status_code} — {e.response.text[:200]}") + except Exception as e: + st.error(f"Unexpected error: {e}") + return None diff --git a/02-use-cases/role-based-hr-data-agent/app_modules/main.py b/02-use-cases/role-based-hr-data-agent/app_modules/main.py new file mode 100644 index 000000000..b455b8496 --- /dev/null +++ b/02-use-cases/role-based-hr-data-agent/app_modules/main.py @@ -0,0 +1,154 @@ +""" +Streamlit entry point for the Role-Based HR Data Agent demo. + +Handles OAuth callback, authentication check, and chat interface rendering. +""" + +import uuid +from urllib.parse import parse_qs, urlparse + +import streamlit as st + +from app_modules.auth import AuthManager +from app_modules.chat import ChatManager +from app_modules.styles import apply_custom_styles + +# Persona display config +PERSONAS = { + "HR Manager": { + "icon": "👔", + "badge_class": "badge-manager", + "description": "Full access — all fields visible", + "scopes": ["read", "pii", "address", "comp"], + }, + "HR Specialist": { + "icon": "👨‍💼", + "badge_class": "badge-specialist", + "description": "Profiles + PII; compensation and address redacted", + "scopes": ["read", "pii"], + }, + "Employee": { + "icon": "👤", + "badge_class": "badge-employee", + "description": "Search only; all sensitive fields redacted", + "scopes": ["read"], + }, +} + +SUGGESTED_QUERIES = [ + "Find all engineers in the company", + "Show me Sarah Johnson's profile", + "What is John Smith's compensation?", + "Search for HR department employees", +] + + +def main(): + st.set_page_config( + page_title="HR Data Agent", + page_icon="🔐", + layout="centered", + initial_sidebar_state="expanded", + ) + apply_custom_styles() + + auth = AuthManager() + chat = ChatManager() + + # ------------------------------------------------------------------ + # OAuth callback handling + # ------------------------------------------------------------------ + query_params = st.query_params + if "code" in query_params and not auth.is_authenticated(): + code = query_params["code"] + tokens = auth.exchange_code(code) + if tokens: + auth.store_tokens(tokens) + st.query_params.clear() + st.rerun() + else: + st.error("Authentication failed. Please try again.") + + # ------------------------------------------------------------------ + # Login screen + # ------------------------------------------------------------------ + if not auth.is_authenticated(): + st.title("🔐 HR Data Agent") + st.markdown("Secure HR data access with role-based DLP enforcement via Amazon Bedrock AgentCore.") + st.markdown("---") + if st.button("Login with Cognito", use_container_width=True): + st.markdown(f'', + unsafe_allow_html=True) + return + + # ------------------------------------------------------------------ + # Authenticated — Chat interface + # ------------------------------------------------------------------ + token_claims = auth.decode_token(auth.get_access_token()) + + # Session state + if "session_id" not in st.session_state: + st.session_state.session_id = str(uuid.uuid4()) + if "messages" not in st.session_state: + st.session_state.messages = [] + + # ------------------------------------------------------------------ + # Sidebar + # ------------------------------------------------------------------ + with st.sidebar: + st.title("HR Data Agent") + st.markdown("---") + + # Persona selector (for demo — switches OAuth persona client) + st.subheader("Demo Persona") + selected_persona = st.selectbox("Select Role", list(PERSONAS.keys())) + p = PERSONAS[selected_persona] + st.markdown( + f'{p["icon"]} {selected_persona}', + unsafe_allow_html=True, + ) + st.caption(p["description"]) + st.markdown(f"**Scopes:** `{'`, `'.join(p['scopes'])}`") + + st.markdown("---") + st.subheader("Suggested Queries") + for q in SUGGESTED_QUERIES: + if st.button(q, key=f"suggest_{q[:20]}"): + st.session_state.messages.append({"role": "user", "content": q}) + st.rerun() + + st.markdown("---") + if st.button("Clear conversation"): + st.session_state.messages = [] + st.session_state.session_id = str(uuid.uuid4()) + st.rerun() + + if st.button("Logout"): + auth.logout() + st.rerun() + + # ------------------------------------------------------------------ + # Chat area + # ------------------------------------------------------------------ + st.title("🔐 HR Data Agent") + + for msg in st.session_state.messages: + with st.chat_message(msg["role"]): + st.markdown(msg["content"]) + + if prompt := st.chat_input("Ask about employees..."): + st.session_state.messages.append({"role": "user", "content": prompt}) + with st.chat_message("user"): + st.markdown(prompt) + + with st.chat_message("assistant"): + placeholder = st.empty() + placeholder.markdown('Thinking...', unsafe_allow_html=True) + response = chat.send_message( + message=prompt, + session_id=st.session_state.session_id, + access_token=auth.get_access_token(), + message_placeholder=placeholder, + ) + if response: + st.session_state.messages.append({"role": "assistant", "content": response}) diff --git a/02-use-cases/role-based-hr-data-agent/app_modules/styles.py b/02-use-cases/role-based-hr-data-agent/app_modules/styles.py new file mode 100644 index 000000000..ca12f8d71 --- /dev/null +++ b/02-use-cases/role-based-hr-data-agent/app_modules/styles.py @@ -0,0 +1,50 @@ +"""Dark theme CSS styles for the Streamlit HR Data Agent UI.""" + +import streamlit as st + + +def apply_custom_styles() -> None: + st.markdown( + """ + + """, + unsafe_allow_html=True, + ) diff --git a/02-use-cases/role-based-hr-data-agent/app_modules/utils.py b/02-use-cases/role-based-hr-data-agent/app_modules/utils.py new file mode 100644 index 000000000..d945893b6 --- /dev/null +++ b/02-use-cases/role-based-hr-data-agent/app_modules/utils.py @@ -0,0 +1,14 @@ +"""Utility helpers for the Streamlit frontend.""" + +import re + + +def make_urls_clickable(text: str) -> str: + """Wrap bare URLs in tags.""" + pattern = r"(https?://[^\s\)\]\"']+)" + return re.sub(pattern, r'\1', text) + + +def create_safe_markdown_text(text: str) -> str: + """Convert newlines to
for safe HTML rendering.""" + return text.replace("\n", "
") diff --git a/02-use-cases/role-based-hr-data-agent/dev-requirements.txt b/02-use-cases/role-based-hr-data-agent/dev-requirements.txt new file mode 100644 index 000000000..2183d6b40 --- /dev/null +++ b/02-use-cases/role-based-hr-data-agent/dev-requirements.txt @@ -0,0 +1,24 @@ +# Testing +pytest>=7.4.0 +pytest-cov>=4.1.0 +pytest-asyncio>=0.23.0 +pytest-mock>=3.12.0 +moto>=4.2.0 + +# Code quality +black>=23.12.0 +flake8>=6.1.0 +mypy>=1.8.0 +isort>=5.13.0 +pre-commit>=3.6.0 + +# Frontend +streamlit>=1.31.0 +streamlit-cookies-controller>=0.0.4 + +# Infrastructure helpers +opensearch-py>=2.4.0 +pandas>=2.1.0 + +# CLI +click>=8.1.7 diff --git a/02-use-cases/role-based-hr-data-agent/docs/diagrams/flow.md b/02-use-cases/role-based-hr-data-agent/docs/diagrams/flow.md new file mode 100644 index 000000000..1fd4a8ff2 --- /dev/null +++ b/02-use-cases/role-based-hr-data-agent/docs/diagrams/flow.md @@ -0,0 +1,88 @@ +# DLP Gateway — Request Flow by Persona + +```mermaid +sequenceDiagram + participant HRM as 👔 HR Manager
(read, pii, address, comp) + participant HRS as 👨‍💼 HR Specialist
(read, pii) + participant EMP as 👤 Employee
(read) + participant RT as 🤖 AgentCore Runtime
(Strands Agent) + participant GW as 🔒 Gateway + participant RI as 🛡️ Response Interceptor
(DLP + tool filter) + participant CP as 📜 Cedar Policy
Engine + participant LM as 💾 Lambda
(HR Data Provider) + + Note over HRM,LM: ═══ HR Manager: "Show me John Smith's compensation" ═══ + + HRM->>RT: POST /invocations {prompt} + RT->>GW: tools/list + GW->>LM: Get all 3 tools + GW->>RI: Filter tools by scope + RI-->>GW: ✅ All 3 tools (has read, pii, address, comp) + GW-->>RT: search_employee, get_employee_profile, get_employee_compensation + + RT->>GW: tools/call search_employee {query: "John Smith"} + GW->>CP: Evaluate — hr-dlp-gateway/read scope? + CP-->>GW: ✅ ALLOW + GW->>LM: Invoke Lambda + LM-->>GW: Employee list (EMP001) + GW->>RI: Apply DLP + RI-->>GW: ✅ No redaction (has pii, address, comp) + GW-->>RT: John Smith, EMP001 + + RT->>GW: tools/call get_employee_compensation {employeeId: "EMP001"} + GW->>CP: Evaluate — hr-dlp-gateway/comp scope? + CP-->>GW: ✅ ALLOW + GW->>LM: Invoke Lambda + LM-->>GW: Salary: $145,000, Bonus: $15,000 + GW->>RI: Apply DLP + RI-->>GW: ✅ No redaction (has comp) + GW-->>RT: Full compensation data + RT-->>HRM: 💰 Salary: $145,000 | Bonus: $15,000 | Stock: 500 units + + Note over HRM,LM: ═══ HR Specialist: "Show me John Smith's profile" ═══ + + HRS->>RT: POST /invocations {prompt} + RT->>GW: tools/list + GW->>LM: Get all 3 tools + GW->>RI: Filter tools by scope + RI-->>GW: ✅ 2 tools (has read, pii — no comp) + GW-->>RT: search_employee, get_employee_profile (❌ compensation hidden) + + RT->>GW: tools/call search_employee {query: "John Smith"} + GW->>CP: Evaluate — hr-dlp-gateway/read scope? + CP-->>GW: ✅ ALLOW + GW->>LM: Invoke Lambda + LM-->>GW: Employee list (EMP001) + GW->>RI: Apply DLP + RI-->>GW: ✅ No redaction on search results + GW-->>RT: John Smith, EMP001 + + RT->>GW: tools/call get_employee_profile {employeeId: "EMP001"} + GW->>CP: Evaluate — hr-dlp-gateway/pii scope? + CP-->>GW: ✅ ALLOW + GW->>LM: Invoke Lambda + LM-->>GW: Full profile (PII + address + comp) + GW->>RI: Apply DLP + RI-->>GW: 🔒 Redact address & comp (missing hr-dlp-gateway/address, hr-dlp-gateway/comp) + GW-->>RT: PII ✅ | Address: [REDACTED] | Comp: [REDACTED] + RT-->>HRS: 👤 Name, Email, Phone ✅ | Address: [REDACTED] | Comp: [REDACTED] + + Note over HRM,LM: ═══ Employee: "Search for engineers" ═══ + + EMP->>RT: POST /invocations {prompt} + RT->>GW: tools/list + GW->>LM: Get all 3 tools + GW->>RI: Filter tools by scope + RI-->>GW: ✅ 1 tool (has read only — no pii, no comp) + GW-->>RT: search_employee only (❌ profile hidden, ❌ compensation hidden) + + RT->>GW: tools/call search_employee {query: "engineer"} + GW->>CP: Evaluate — hr-dlp-gateway/read scope? + CP-->>GW: ✅ ALLOW + GW->>LM: Invoke Lambda + LM-->>GW: Employee list (names, departments) + GW->>RI: Apply DLP + RI-->>GW: 🔒 Redact PII fields (missing hr-dlp-gateway/pii) + GW-->>RT: Names & Departments ✅ | Email: [REDACTED] | Phone: [REDACTED] + RT-->>EMP: 📋 John Smith - Engineering | Charlie Brown - Engineering
Contact info: [REDACTED] +``` diff --git a/02-use-cases/role-based-hr-data-agent/docs/screenshots/employee.png b/02-use-cases/role-based-hr-data-agent/docs/screenshots/employee.png new file mode 100644 index 000000000..17157c5ef Binary files /dev/null and b/02-use-cases/role-based-hr-data-agent/docs/screenshots/employee.png differ diff --git a/02-use-cases/role-based-hr-data-agent/docs/screenshots/full-architecture.png b/02-use-cases/role-based-hr-data-agent/docs/screenshots/full-architecture.png new file mode 100644 index 000000000..ae532a339 Binary files /dev/null and b/02-use-cases/role-based-hr-data-agent/docs/screenshots/full-architecture.png differ diff --git a/02-use-cases/role-based-hr-data-agent/docs/screenshots/hr-manager.png b/02-use-cases/role-based-hr-data-agent/docs/screenshots/hr-manager.png new file mode 100644 index 000000000..439974d45 Binary files /dev/null and b/02-use-cases/role-based-hr-data-agent/docs/screenshots/hr-manager.png differ diff --git a/02-use-cases/role-based-hr-data-agent/main.py b/02-use-cases/role-based-hr-data-agent/main.py new file mode 100644 index 000000000..91258fa99 --- /dev/null +++ b/02-use-cases/role-based-hr-data-agent/main.py @@ -0,0 +1,44 @@ +""" +AgentCore Runtime entry point for the Role-Based HR Data Agent. + +Reads configuration from SSM, then delegates to agent_task for async workflow. +""" + +import os +import asyncio +import logging + +from bedrock_agentcore.runtime import BedrockAgentCoreApp +from agent_config.utils import get_ssm_parameter +from agent_config.agent_task import run_agent_task + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +# Enable OpenTelemetry console export for local debugging +os.environ.setdefault("STRANDS_OTEL_ENABLE_CONSOLE_EXPORT", "false") + +app = BedrockAgentCoreApp() + + +@app.entrypoint +async def agent_invocation(payload: dict, context) -> dict: + """ + AgentCore Runtime entry point. + + Reads gateway URL from SSM, validates session, then runs the agent task. + """ + session_id = payload.get("sessionId") or ( + context.session_id if hasattr(context, "session_id") else None + ) + if not session_id: + return {"error": "Missing sessionId in payload"} + + gateway_url = get_ssm_parameter("/app/hrdlp/gateway-url") + if not gateway_url: + return {"error": "Gateway URL not found in SSM (/app/hrdlp/gateway-url)"} + + return await run_agent_task(payload, context, gateway_url, session_id) + + +app.run() diff --git a/02-use-cases/role-based-hr-data-agent/prerequisite/cedar/hr_dlp_policies.cedar b/02-use-cases/role-based-hr-data-agent/prerequisite/cedar/hr_dlp_policies.cedar new file mode 100644 index 000000000..3ac573965 --- /dev/null +++ b/02-use-cases/role-based-hr-data-agent/prerequisite/cedar/hr_dlp_policies.cedar @@ -0,0 +1,76 @@ +// HR DLP Gateway — Cedar Authorization Policies +// +// These policies enforce OAuth scope-based authorization on each HR tool. +// Attach this policy set to your AgentCore Gateway via the AWS Console or CLI. +// +// Scope format issued by Cognito: hr-dlp-gateway/ +// hr-dlp-gateway/read — basic employee search +// hr-dlp-gateway/pii — PII access (email, phone) +// hr-dlp-gateway/address — address data access +// hr-dlp-gateway/comp — compensation data access +// +// Mode: LOG_ONLY (testing) | ENFORCE (production) +// Switch with: scripts/utils/switch_policy_mode.py + +// --------------------------------------------------------------------------- +// Policy 1: Employee Search +// Required scope: hr-dlp-gateway/read +// --------------------------------------------------------------------------- +permit( + principal is AgentCore::OAuthUser, + action == AgentCore::Action::"hr-lambda-target___search_employee", + resource == AgentCore::Gateway::"arn:aws:bedrock-agentcore:us-east-1:943677087104:gateway/hr-data-agent-gateway-1bukfwh8a5" +) +when { + principal.hasTag("scope") && + principal.getTag("scope") like "*hr-dlp-gateway/read*" +}; + +// --------------------------------------------------------------------------- +// Policy 2: Employee Profile +// Required scope: hr-dlp-gateway/read (PII/address redacted by Response Interceptor) +// --------------------------------------------------------------------------- +permit( + principal is AgentCore::OAuthUser, + action == AgentCore::Action::"hr-lambda-target___get_employee_profile", + resource == AgentCore::Gateway::"arn:aws:bedrock-agentcore:us-east-1:943677087104:gateway/hr-data-agent-gateway-1bukfwh8a5" +) +when { + principal.hasTag("scope") && + principal.getTag("scope") like "*hr-dlp-gateway/read*" +}; + +// --------------------------------------------------------------------------- +// Policy 3: Employee Compensation +// Required scope: hr-dlp-gateway/comp +// --------------------------------------------------------------------------- +permit( + principal is AgentCore::OAuthUser, + action == AgentCore::Action::"hr-lambda-target___get_employee_compensation", + resource == AgentCore::Gateway::"arn:aws:bedrock-agentcore:us-east-1:943677087104:gateway/hr-data-agent-gateway-1bukfwh8a5" +) +when { + principal.hasTag("scope") && + principal.getTag("scope") like "*hr-dlp-gateway/comp*" +}; + +// --------------------------------------------------------------------------- +// DEPLOYMENT INSTRUCTIONS +// --------------------------------------------------------------------------- +// +// 1. Replace arn:aws:bedrock-agentcore:us-east-1:943677087104:gateway/hr-data-agent-gateway-1bukfwh8a5 with the actual Gateway ARN after deployment. +// +// 2. Create Policy Engine: +// aws bedrock-agentcore-control create-policy-engine \ +// --name hr-dlp-policies \ +// --policy-document file://prerequisite/cedar/hr_dlp_policies.cedar \ +// --region us-east-1 +// +// 3. Attach to Gateway (LOG_ONLY for testing): +// aws bedrock-agentcore-control update-gateway \ +// --gateway-identifier \ +// --policy-engine-configuration \ +// arn=,mode=LOG_ONLY +// +// 4. Switch to ENFORCE for production: +// python scripts/utils/switch_policy_mode.py --mode ENFORCE diff --git a/02-use-cases/role-based-hr-data-agent/prerequisite/cognito.yaml b/02-use-cases/role-based-hr-data-agent/prerequisite/cognito.yaml new file mode 100644 index 000000000..99e9a0942 --- /dev/null +++ b/02-use-cases/role-based-hr-data-agent/prerequisite/cognito.yaml @@ -0,0 +1,197 @@ +AWSTemplateFormatVersion: "2010-09-09" +Description: > + Role-Based HR Data Agent — Cognito User Pool + Creates Cognito User Pool with custom OAuth scopes for HR DLP demo personas. + +Parameters: + Environment: + Type: String + Default: dev + AllowedValues: [dev, staging, prod] + CognitoDomainPrefix: + Type: String + Default: hr-dlp-agent + Description: Unique prefix for the Cognito hosted UI domain + +Resources: + + # --------------------------------------------------------------------------- + # User Pool + # --------------------------------------------------------------------------- + HRUserPool: + Type: AWS::Cognito::UserPool + Properties: + UserPoolName: !Sub hr-dlp-user-pool-${Environment} + AutoVerifiedAttributes: [email] + UsernameAttributes: [email] + Policies: + PasswordPolicy: + MinimumLength: 8 + RequireUppercase: true + RequireLowercase: true + RequireNumbers: true + RequireSymbols: false + Schema: + - Name: role + AttributeDataType: String + Mutable: true + - Name: tenantId + AttributeDataType: String + Mutable: true + - Name: department + AttributeDataType: String + Mutable: true + + HRUserPoolDomain: + Type: AWS::Cognito::UserPoolDomain + Properties: + Domain: !Sub ${CognitoDomainPrefix}-${Environment}-${AWS::AccountId} + UserPoolId: !Ref HRUserPool + + # --------------------------------------------------------------------------- + # Resource Server — defines OAuth scopes + # --------------------------------------------------------------------------- + HRResourceServer: + Type: AWS::Cognito::UserPoolResourceServer + Properties: + UserPoolId: !Ref HRUserPool + Identifier: hr-dlp-gateway + Name: HR DLP Gateway + Scopes: + - ScopeName: read + ScopeDescription: Basic employee search access + - ScopeName: pii + ScopeDescription: Access to PII fields (email, phone, emergency contact) + - ScopeName: address + ScopeDescription: Access to address fields + - ScopeName: comp + ScopeDescription: Access to compensation data (salary, bonus, stock options) + + # --------------------------------------------------------------------------- + # User Groups + # --------------------------------------------------------------------------- + HRManagerGroup: + Type: AWS::Cognito::UserPoolGroup + Properties: + GroupName: hr-manager + UserPoolId: !Ref HRUserPool + Precedence: 1 + Description: Full access to all HR data + + HRSpecialistGroup: + Type: AWS::Cognito::UserPoolGroup + Properties: + GroupName: hr-specialist + UserPoolId: !Ref HRUserPool + Precedence: 2 + Description: Employee profiles + PII, no compensation + + EmployeeGroup: + Type: AWS::Cognito::UserPoolGroup + Properties: + GroupName: employee + UserPoolId: !Ref HRUserPool + Precedence: 3 + Description: Basic search only + + AdminGroup: + Type: AWS::Cognito::UserPoolGroup + Properties: + GroupName: admin + UserPoolId: !Ref HRUserPool + Precedence: 1 + Description: Full administrative access + + # --------------------------------------------------------------------------- + # App Client — Web (PKCE, for Streamlit UI) + # --------------------------------------------------------------------------- + HRWebClient: + Type: AWS::Cognito::UserPoolClient + DependsOn: HRResourceServer + Properties: + ClientName: hr-dlp-web-client + UserPoolId: !Ref HRUserPool + GenerateSecret: true + AllowedOAuthFlows: [code] + AllowedOAuthFlowsUserPoolClient: true + AllowedOAuthScopes: + - openid + - email + - profile + - hr-dlp-gateway/read + - hr-dlp-gateway/pii + - hr-dlp-gateway/address + - hr-dlp-gateway/comp + CallbackURLs: [http://localhost:8501] + LogoutURLs: [http://localhost:8501] + SupportedIdentityProviders: [COGNITO] + AccessTokenValidity: 60 + IdTokenValidity: 60 + RefreshTokenValidity: 30 + TokenValidityUnits: + AccessToken: minutes + IdToken: minutes + RefreshToken: days + + # --------------------------------------------------------------------------- + # App Client — Machine (client_credentials, for persona testing) + # --------------------------------------------------------------------------- + HRMachineClient: + Type: AWS::Cognito::UserPoolClient + DependsOn: HRResourceServer + Properties: + ClientName: hr-dlp-machine-client + UserPoolId: !Ref HRUserPool + GenerateSecret: true + AllowedOAuthFlows: [client_credentials] + AllowedOAuthFlowsUserPoolClient: true + AllowedOAuthScopes: + - hr-dlp-gateway/read + - hr-dlp-gateway/pii + - hr-dlp-gateway/address + - hr-dlp-gateway/comp + AccessTokenValidity: 60 + TokenValidityUnits: + AccessToken: minutes + + # --------------------------------------------------------------------------- + # SSM Parameters + # --------------------------------------------------------------------------- + SSMUserPoolId: + Type: AWS::SSM::Parameter + Properties: + Name: /app/hrdlp/cognito-user-pool-id + Type: String + Value: !Ref HRUserPool + + SSMWebClientId: + Type: AWS::SSM::Parameter + Properties: + Name: /app/hrdlp/cognito-client-id + Type: String + Value: !Ref HRWebClient + + SSMTokenUrl: + Type: AWS::SSM::Parameter + Properties: + Name: /app/hrdlp/cognito-token-url + Type: String + Value: !Sub https://${CognitoDomainPrefix}-${Environment}-${AWS::AccountId}.auth.${AWS::Region}.amazoncognito.com/oauth2/token + + SSMDiscoveryUrl: + Type: AWS::SSM::Parameter + Properties: + Name: /app/hrdlp/cognito-discovery-url + Type: String + Value: !Sub https://cognito-idp.${AWS::Region}.amazonaws.com/${HRUserPool}/.well-known/openid-configuration + +Outputs: + UserPoolId: + Value: !Ref HRUserPool + Export: {Name: !Sub "${AWS::StackName}-UserPoolId"} + WebClientId: + Value: !Ref HRWebClient + MachineClientId: + Value: !Ref HRMachineClient + TokenUrl: + Value: !Sub https://${CognitoDomainPrefix}-${Environment}-${AWS::AccountId}.auth.${AWS::Region}.amazoncognito.com/oauth2/token diff --git a/02-use-cases/role-based-hr-data-agent/prerequisite/infrastructure.yaml b/02-use-cases/role-based-hr-data-agent/prerequisite/infrastructure.yaml new file mode 100644 index 000000000..584aedf64 --- /dev/null +++ b/02-use-cases/role-based-hr-data-agent/prerequisite/infrastructure.yaml @@ -0,0 +1,242 @@ +AWSTemplateFormatVersion: "2010-09-09" +Description: > + Role-Based HR Data Agent — Infrastructure + Deploys: Lambda HR Provider, Request/Response Interceptors, IAM roles, + and SSM parameters for AgentCore Gateway integration. + +Parameters: + LambdaS3Bucket: + Type: String + Description: S3 bucket containing the Lambda deployment ZIP + LambdaS3Key: + Type: String + Default: hr-data-provider/deployment.zip + InterceptorS3Key: + Type: String + Default: hr-interceptors/deployment.zip + Environment: + Type: String + Default: dev + AllowedValues: [dev, staging, prod] + +Resources: + + # --------------------------------------------------------------------------- + # IAM — Lambda execution role + # --------------------------------------------------------------------------- + HRLambdaExecutionRole: + Type: AWS::IAM::Role + Properties: + RoleName: !Sub hr-dlp-lambda-role-${Environment} + AssumeRolePolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Principal: {Service: lambda.amazonaws.com} + Action: sts:AssumeRole + ManagedPolicyArns: + - arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole + Policies: + - PolicyName: SSMReadPolicy + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: [ssm:GetParameter, ssm:GetParameters] + Resource: !Sub arn:aws:ssm:${AWS::Region}:${AWS::AccountId}:parameter/app/hrdlp/* + + # --------------------------------------------------------------------------- + # IAM — AgentCore Gateway execution role + # --------------------------------------------------------------------------- + AgentCoreGatewayRole: + Type: AWS::IAM::Role + Properties: + RoleName: !Sub hr-dlp-gateway-role-${Environment} + AssumeRolePolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Principal: {Service: bedrock-agentcore.amazonaws.com} + Action: sts:AssumeRole + Policies: + - PolicyName: InvokeLambdaPolicy + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: lambda:InvokeFunction + Resource: + - !GetAtt HRDataProviderLambda.Arn + - !GetAtt RequestInterceptorLambda.Arn + - !GetAtt ResponseInterceptorLambda.Arn + - PolicyName: AgentCoreGatewayPolicy + PolicyDocument: + Version: "2012-10-17" + Statement: + # Required for Cedar policy evaluation (AuthorizeAction) and + # general AgentCore Gateway operations (interceptor invocation, + # JWT validation, tool routing). + - Effect: Allow + Action: bedrock-agentcore:* + Resource: "*" + + # --------------------------------------------------------------------------- + # IAM — AgentCore Runtime execution role + # --------------------------------------------------------------------------- + AgentCoreRuntimeRole: + Type: AWS::IAM::Role + Properties: + RoleName: !Sub hr-dlp-runtime-role-${Environment} + AssumeRolePolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Principal: {Service: bedrock-agentcore.amazonaws.com} + Action: sts:AssumeRole + ManagedPolicyArns: + - arn:aws:iam::aws:policy/AmazonBedrockFullAccess + Policies: + - PolicyName: SSMPolicy + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: [ssm:GetParameter, ssm:PutParameter] + Resource: !Sub arn:aws:ssm:${AWS::Region}:${AWS::AccountId}:parameter/app/hrdlp/* + - PolicyName: AgentCoreRuntimePolicy + PolicyDocument: + Version: "2012-10-17" + Statement: + # CloudWatch Logs — runtime writes execution logs to its own log group. + # DescribeLogStreams/Groups required for the AgentCore service to + # locate and tail the runtime log group on behalf of the caller. + - Effect: Allow + Action: + - logs:CreateLogGroup + - logs:CreateLogStream + - logs:PutLogEvents + - logs:DescribeLogStreams + - logs:DescribeLogGroups + Resource: + - !Sub arn:aws:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/bedrock-agentcore/runtimes/* + - !Sub arn:aws:logs:${AWS::Region}:${AWS::AccountId}:log-group:* + # X-Ray — distributed tracing for runtime invocations + - Effect: Allow + Action: + - xray:PutTraceSegments + - xray:PutTelemetryRecords + - xray:GetSamplingRules + - xray:GetSamplingTargets + Resource: "*" + # AgentCore — GetWorkloadAccessTokenForUserId is needed to authenticate + # outbound Gateway requests. CreateWorkloadIdentity is needed on first + # runtime initialisation to register the runtime's workload identity. + - Effect: Allow + Action: + - bedrock-agentcore:GetWorkloadAccessTokenForUserId + - bedrock-agentcore:CreateWorkloadIdentity + Resource: "*" + + # --------------------------------------------------------------------------- + # Lambda — HR Data Provider + # --------------------------------------------------------------------------- + HRDataProviderLambda: + Type: AWS::Lambda::Function + Properties: + FunctionName: !Sub hr-data-provider-lambda-${Environment} + Description: HR data provider for role-based-hr-data-agent + Runtime: python3.11 + Handler: lambda_handler.lambda_handler + Role: !GetAtt HRLambdaExecutionRole.Arn + Timeout: 30 + MemorySize: 256 + Code: + S3Bucket: !Ref LambdaS3Bucket + S3Key: !Ref LambdaS3Key + + # --------------------------------------------------------------------------- + # Lambda — Request Interceptor + # --------------------------------------------------------------------------- + RequestInterceptorLambda: + Type: AWS::Lambda::Function + Properties: + FunctionName: !Sub hr-request-interceptor-lambda-${Environment} + Description: AgentCore Gateway Request Interceptor — tenant injection + scope normalization + Runtime: python3.11 + Handler: request_interceptor.lambda_handler + Role: !GetAtt HRLambdaExecutionRole.Arn + Timeout: 10 + MemorySize: 128 + Code: + S3Bucket: !Ref LambdaS3Bucket + S3Key: !Ref InterceptorS3Key + + # --------------------------------------------------------------------------- + # Lambda — Response Interceptor + # --------------------------------------------------------------------------- + ResponseInterceptorLambda: + Type: AWS::Lambda::Function + Properties: + FunctionName: !Sub hr-response-interceptor-lambda-${Environment} + Description: AgentCore Gateway Response Interceptor — DLP field-level redaction + Runtime: python3.11 + Handler: response_interceptor.lambda_handler + Role: !GetAtt HRLambdaExecutionRole.Arn + Timeout: 10 + MemorySize: 128 + Code: + S3Bucket: !Ref LambdaS3Bucket + S3Key: !Ref InterceptorS3Key + + # --------------------------------------------------------------------------- + # SSM Parameters + # --------------------------------------------------------------------------- + SSMLambdaArn: + Type: AWS::SSM::Parameter + Properties: + Name: /app/hrdlp/lambda-arn + Type: String + Value: !GetAtt HRDataProviderLambda.Arn + + SSMGatewayRoleArn: + Type: AWS::SSM::Parameter + Properties: + Name: /app/hrdlp/gateway-role-arn + Type: String + Value: !GetAtt AgentCoreGatewayRole.Arn + + SSMRuntimeRoleArn: + Type: AWS::SSM::Parameter + Properties: + Name: /app/hrdlp/runtime-role-arn + Type: String + Value: !GetAtt AgentCoreRuntimeRole.Arn + + SSMRequestInterceptorArn: + Type: AWS::SSM::Parameter + Properties: + Name: /app/hrdlp/request-interceptor-arn + Type: String + Value: !GetAtt RequestInterceptorLambda.Arn + + SSMResponseInterceptorArn: + Type: AWS::SSM::Parameter + Properties: + Name: /app/hrdlp/response-interceptor-arn + Type: String + Value: !GetAtt ResponseInterceptorLambda.Arn + +Outputs: + LambdaArn: + Value: !GetAtt HRDataProviderLambda.Arn + Export: {Name: !Sub "${AWS::StackName}-LambdaArn"} + GatewayRoleArn: + Value: !GetAtt AgentCoreGatewayRole.Arn + Export: {Name: !Sub "${AWS::StackName}-GatewayRoleArn"} + RuntimeRoleArn: + Value: !GetAtt AgentCoreRuntimeRole.Arn + Export: {Name: !Sub "${AWS::StackName}-RuntimeRoleArn"} + RequestInterceptorArn: + Value: !GetAtt RequestInterceptorLambda.Arn + ResponseInterceptorArn: + Value: !GetAtt ResponseInterceptorLambda.Arn diff --git a/02-use-cases/role-based-hr-data-agent/prerequisite/lambda/__init__.py b/02-use-cases/role-based-hr-data-agent/prerequisite/lambda/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/02-use-cases/role-based-hr-data-agent/prerequisite/lambda/api_spec.json b/02-use-cases/role-based-hr-data-agent/prerequisite/lambda/api_spec.json new file mode 100644 index 000000000..65751d118 --- /dev/null +++ b/02-use-cases/role-based-hr-data-agent/prerequisite/lambda/api_spec.json @@ -0,0 +1,106 @@ +{ + "openapi": "3.0.0", + "info": { + "title": "HR Data Provider API", + "version": "1.0.0", + "description": "HR data tools exposed via AgentCore Gateway with role-based DLP enforcement" + }, + "paths": { + "/search_employee": { + "post": { + "operationId": "search_employee", + "summary": "Search for employees by name, department, or role", + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "Search query — employee name, department, or role" + }, + "tenantId": { + "type": "string", + "description": "Tenant identifier (auto-injected by Request Interceptor)" + } + }, + "required": ["query"] + } + } + } + }, + "responses": { + "200": { + "description": "List of matching employees (PII/address/comp fields redacted by scope)" + } + } + } + }, + "/get_employee_profile": { + "post": { + "operationId": "get_employee_profile", + "summary": "Get detailed employee profile", + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "employeeId": { + "type": "string", + "description": "Employee identifier from search results" + }, + "tenantId": { + "type": "string", + "description": "Tenant identifier (auto-injected)" + } + }, + "required": ["employeeId"] + } + } + } + }, + "responses": { + "200": { + "description": "Employee profile (PII requires hr-dlp-gateway/pii scope, address requires hr-dlp-gateway/address scope)" + } + } + } + }, + "/get_employee_compensation": { + "post": { + "operationId": "get_employee_compensation", + "summary": "Get employee compensation data", + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "employeeId": { + "type": "string", + "description": "Employee identifier from search results" + }, + "tenantId": { + "type": "string", + "description": "Tenant identifier (auto-injected)" + } + }, + "required": ["employeeId"] + } + } + } + }, + "responses": { + "200": { + "description": "Compensation data — requires hr-dlp-gateway/comp scope" + } + } + } + } + } +} diff --git a/02-use-cases/role-based-hr-data-agent/prerequisite/lambda/interceptors/__init__.py b/02-use-cases/role-based-hr-data-agent/prerequisite/lambda/interceptors/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/02-use-cases/role-based-hr-data-agent/prerequisite/lambda/interceptors/request_interceptor.py b/02-use-cases/role-based-hr-data-agent/prerequisite/lambda/interceptors/request_interceptor.py new file mode 100644 index 000000000..f87e4c364 --- /dev/null +++ b/02-use-cases/role-based-hr-data-agent/prerequisite/lambda/interceptors/request_interceptor.py @@ -0,0 +1,185 @@ +""" +AgentCore Gateway Request Interceptor. + +Processes AgentCore interceptor payloads to: +- Decode JWT and resolve tenant context (client_id → tenantId/role/department) +- Inject tenantId into tool arguments (override mismatched values) +- Normalize scope strings for Cedar evaluation +- Generate correlation IDs for end-to-end request tracing +- Pass through tools/list unchanged (filtering handled by response interceptor) + +Payload format (interceptorInputVersion: "1.0"): +{ + "mcp": { + "rawGatewayRequest": {...}, + "gatewayRequest": { + "path": "/mcp", + "httpMethod": "POST", + "headers": {"Authorization": "Bearer ", ...}, + "body": {"jsonrpc": "2.0", "method": "tools/call", "params": {...}} + } + } +} +""" + +import base64 +import json +import logging +import os +import sys +import uuid +from dataclasses import dataclass +from datetime import datetime +from typing import Any, Dict, List, Optional + +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +from tenant_mapping import resolve_client_context + +logging.getLogger().setLevel(logging.INFO) +logger = logging.getLogger(__name__) + + +@dataclass +class JWTClaims: + sub: str + username: str + tenant_id: str + role: Optional[str] + department: Optional[str] + scopes: List[str] + + @classmethod + def from_jwt_payload(cls, payload: Dict[str, Any]) -> "JWTClaims": + client_id = payload.get("sub", "") + scopes = payload.get("scope", "").split() if payload.get("scope") else [] + tenant_id = payload.get("custom:tenantId", "") + role = payload.get("custom:role") + department = payload.get("custom:department") + username = payload.get("username", payload.get("cognito:username", "")) + if not tenant_id: + ctx = resolve_client_context(client_id) + tenant_id = ctx["tenantId"] + role = ctx["role"] + department = ctx["department"] + username = ctx["username"] + return cls(sub=client_id, username=username, tenant_id=tenant_id, + role=role, department=department, scopes=scopes) + + +class HRRequestInterceptor: + def __init__(self, logger: Optional[logging.Logger] = None): + self.logger = logger or logging.getLogger(__name__) + self._correlation_id: Optional[str] = None + + def lambda_handler(self, event: Dict[str, Any], context: Any) -> Dict[str, Any]: + try: + if not self._valid_payload(event): + return self._error("Invalid AgentCore interceptor payload") + mcp = event["mcp"] + req = mcp["gatewayRequest"] + claims = self._decode_jwt(req.get("headers", {})) + return self._process(mcp, req, claims) + except Exception as e: + self.logger.error(f"Request interceptor error: {e}") + return self._error(f"Request processing failed: {e}") + + def _valid_payload(self, event: Dict[str, Any]) -> bool: + if event.get("interceptorInputVersion") != "1.0": + return False + mcp = event.get("mcp", {}) + return bool(mcp.get("gatewayRequest", {}).get("body")) + + def _decode_jwt(self, headers: Dict[str, str]) -> JWTClaims: + headers_ci = {k.lower(): v for k, v in headers.items()} + auth = headers_ci.get("authorization", "") + if not auth.startswith("Bearer "): + raise ValueError("Missing Authorization header") + token = auth[7:] + parts = token.split(".") + if len(parts) != 3: + raise ValueError("Invalid JWT format") + pad = parts[1] + "=" * (4 - len(parts[1]) % 4) + payload = json.loads(base64.urlsafe_b64decode(pad).decode()) + return JWTClaims.from_jwt_payload(payload) + + def _process(self, mcp: Dict, req: Dict, claims: JWTClaims) -> Dict[str, Any]: + body = req.get("body", {}) + method = body.get("method", "") + params = body.get("params", {}) + cid = str(uuid.uuid4()) + self._correlation_id = cid + + if method == "tools/call": + tool_name = params.get("name", "") + args = params.get("arguments", {}) + self._inject_tenant(args, claims, tool_name, cid) + scope_string = " ".join(claims.scopes) + return { + "interceptorOutputVersion": "1.0", + "mcp": { + "transformedGatewayRequest": { + "headers": {"X-Correlation-ID": cid}, + "body": { + "jsonrpc": "2.0", + "id": body.get("id"), + "method": method, + "params": { + "name": tool_name, + "arguments": { + **args, + "normalized_scope": f" {scope_string} ", + "correlation_id": cid, + }, + }, + }, + } + }, + } + + # tools/list and all other methods — pass through unchanged + self.logger.info(json.dumps({ + "event": "tool_discovery_request", "correlation_id": cid, + "tenant_id": claims.tenant_id, "scopes": claims.scopes, + "timestamp": datetime.utcnow().isoformat(), + })) + return { + "interceptorOutputVersion": "1.0", + "mcp": {"transformedGatewayRequest": {"body": body}}, + } + + def _inject_tenant(self, args: Dict, claims: JWTClaims, tool_name: str, cid: str) -> None: + if "tenantId" not in args: + args["tenantId"] = claims.tenant_id + self.logger.info(json.dumps({ + "event": "tenant_injection", "correlation_id": cid, + "tenant_id": claims.tenant_id, "tool_name": tool_name, + "timestamp": datetime.utcnow().isoformat(), + })) + elif args["tenantId"] != claims.tenant_id: + self.logger.warning(json.dumps({ + "event": "tenant_override", + "security_alert": "POTENTIAL_CROSS_TENANT_ACCESS", + "correlation_id": cid, + "attempted_tenant": args["tenantId"], + "correct_tenant": claims.tenant_id, + "tool_name": tool_name, + "timestamp": datetime.utcnow().isoformat(), + })) + args["tenantId"] = claims.tenant_id + + def _error(self, message: str) -> Dict[str, Any]: + return { + "interceptorOutputVersion": "1.0", + "mcp": { + "transformedGatewayResponse": { + "statusCode": 400, + "body": {"jsonrpc": "2.0", "id": 1, + "error": {"code": -32600, "message": "Invalid Request", "data": message}}, + } + }, + } + + +def lambda_handler(event: Dict[str, Any], context: Any) -> Dict[str, Any]: + interceptor = HRRequestInterceptor() + return interceptor.lambda_handler(event, context) diff --git a/02-use-cases/role-based-hr-data-agent/prerequisite/lambda/interceptors/response_interceptor.py b/02-use-cases/role-based-hr-data-agent/prerequisite/lambda/interceptors/response_interceptor.py new file mode 100644 index 000000000..79f9cf8f9 --- /dev/null +++ b/02-use-cases/role-based-hr-data-agent/prerequisite/lambda/interceptors/response_interceptor.py @@ -0,0 +1,263 @@ +""" +AgentCore Gateway Response Interceptor — DLP enforcement. + +Processes AgentCore response interceptor payloads to: +- Filter tool discovery (tools/list) based on caller OAuth scopes +- Apply field-level DLP redaction (tools/call responses) based on scopes: + • Without hr-dlp-gateway/pii → redact email, phone, emergency_contact + • Without hr-dlp-gateway/address → redact address, city, state, zip_code + • Without hr-dlp-gateway/comp → redact salary, bonus, stock_options, pay_grade +- Pass through all other MCP methods unchanged + +Redacted fields receive the value: "[REDACTED - Insufficient Permissions]" +""" + +import base64 +import copy +import json +import logging +import os +import sys +import uuid +from dataclasses import dataclass +from datetime import datetime +from typing import Any, Dict, List, Optional, Tuple + +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +from tenant_mapping import resolve_client_context + +logging.getLogger().setLevel(logging.INFO) +logger = logging.getLogger(__name__) + +REDACTED = "[REDACTED - Insufficient Permissions]" + + +@dataclass +class JWTClaims: + sub: str + username: str + tenant_id: str + role: Optional[str] + department: Optional[str] + scopes: List[str] + + @classmethod + def from_jwt_payload(cls, payload: Dict[str, Any]) -> "JWTClaims": + client_id = payload.get("sub", "") + scopes = payload.get("scope", "").split() if payload.get("scope") else [] + tenant_id = payload.get("custom:tenantId", "") + role = payload.get("custom:role") + department = payload.get("custom:department") + username = payload.get("username", payload.get("cognito:username", "")) + if not tenant_id: + ctx = resolve_client_context(client_id) + tenant_id, role, department, username = ( + ctx["tenantId"], ctx["role"], ctx["department"], ctx["username"] + ) + return cls(sub=client_id, username=username, tenant_id=tenant_id, + role=role, department=department, scopes=scopes) + + +def _normalize_scopes(scopes: List[str]) -> List[str]: + """Expand scope list to include both hr:x and x and hr-dlp-gateway/x forms.""" + out = list(scopes) + for s in scopes: + if "/" in s: + short = s.split("/")[-1] + out.append(short) + out.append(f"hr:{short}") + if s.startswith("hr:"): + out.append(s[3:]) + return out + + +class HRResponseInterceptor: + def __init__(self, log: Optional[logging.Logger] = None): + self.logger = log or logging.getLogger(__name__) + + def lambda_handler(self, event: Dict[str, Any], context: Any) -> Dict[str, Any]: + try: + if not self._valid_payload(event): + return self._error("Invalid AgentCore interceptor payload") + mcp = event["mcp"] + req_headers = mcp["gatewayRequest"].get("headers", {}) + req_body = mcp["gatewayRequest"].get("body", {}) + resp_body = mcp["gatewayResponse"].get("body", {}) + claims = self._decode_jwt(req_headers) + method = req_body.get("method", "") + cid = str(uuid.uuid4()) + + if method == "tools/list": + processed = self._filter_tools(resp_body, claims, cid) + elif method == "tools/call": + processed = self._redact(resp_body, claims, cid) + else: + processed = resp_body + + return { + "interceptorOutputVersion": "1.0", + "mcp": {"transformedGatewayResponse": {"statusCode": 200, "body": processed}}, + } + except Exception as e: + self.logger.error(f"Response interceptor error: {e}", exc_info=True) + return self._error(f"Response processing failed: {e}") + + def _valid_payload(self, event: Dict[str, Any]) -> bool: + if event.get("interceptorInputVersion") != "1.0": + return False + mcp = event.get("mcp", {}) + return bool( + mcp.get("gatewayRequest", {}).get("body") + and mcp.get("gatewayResponse", {}).get("body") is not None + ) + + def _decode_jwt(self, headers: Dict[str, str]) -> JWTClaims: + headers_ci = {k.lower(): v for k, v in headers.items()} + auth = headers_ci.get("authorization", "") + if not auth.startswith("Bearer "): + raise ValueError("Missing Authorization header") + token = auth[7:] + parts = token.split(".") + if len(parts) != 3: + raise ValueError("Invalid JWT format") + pad = parts[1] + "=" * (4 - len(parts[1]) % 4) + payload = json.loads(base64.urlsafe_b64decode(pad).decode()) + return JWTClaims.from_jwt_payload(payload) + + # ------------------------------------------------------------------ + # Tool discovery filtering + # ------------------------------------------------------------------ + + def _filter_tools(self, resp_body: Dict, claims: JWTClaims, cid: str) -> Dict: + result = resp_body.get("result", {}) + if not isinstance(result, dict) or "tools" not in result: + return resp_body + + ns = _normalize_scopes(claims.scopes) + filtered, hidden = [], 0 + + for tool in result.get("tools", []): + name = tool.get("name", "") + if "search_employee" in name: + if any(s in ns for s in ["hr:read", "read", "hr-dlp-gateway/read"]): + filtered.append(tool) + else: + hidden += 1 + elif "get_employee_profile" in name: + if any(s in ns for s in ["hr:pii", "pii", "hr-dlp-gateway/pii"]): + filtered.append(tool) + else: + hidden += 1 + elif "get_employee_compensation" in name: + if any(s in ns for s in ["hr:comp", "comp", "hr-dlp-gateway/comp"]): + filtered.append(tool) + else: + hidden += 1 + + if hidden: + self.logger.info(json.dumps({ + "event": "tool_discovery_filtering", "correlation_id": cid, + "tenant_id": claims.tenant_id, "hidden_tools": hidden, + "scopes": claims.scopes, "timestamp": datetime.utcnow().isoformat(), + })) + + return {**resp_body, "result": {**result, "tools": filtered}} + + # ------------------------------------------------------------------ + # Field-level DLP redaction + # ------------------------------------------------------------------ + + def _redact(self, resp_body: Dict, claims: JWTClaims, cid: str) -> Dict: + result = resp_body.get("result", {}) + if not isinstance(result, dict): + return resp_body + + content = result.get("content", []) + if not content or not isinstance(content, list): + return resp_body + + text = content[0].get("text", "") if content else "" + if not text: + return resp_body + + try: + lambda_resp = json.loads(text) + body_data = json.loads(lambda_resp.get("body", "{}")) + redacted_body, log = self._redact_data(body_data, claims.scopes) + lambda_resp["body"] = json.dumps(redacted_body) + content[0]["text"] = json.dumps(lambda_resp) + + if log: + self.logger.info(json.dumps({ + "event": "dlp_redaction", "correlation_id": cid, + "tenant_id": claims.tenant_id, "redacted_fields": log, + "timestamp": datetime.utcnow().isoformat(), + })) + + return {**resp_body, "result": result} + except (json.JSONDecodeError, KeyError, TypeError) as e: + self.logger.warning(f"DLP parse failed: {e}") + return resp_body + + def _redact_data(self, data: Dict, scopes: List[str]) -> Tuple[Dict, List[str]]: + ns = _normalize_scopes(scopes) + redacted = copy.deepcopy(data) + log: List[str] = [] + + if "employees" in redacted and isinstance(redacted["employees"], list): + for i, emp in enumerate(redacted["employees"]): + if isinstance(emp, dict): + redacted["employees"][i], emp_log = self._redact_employee(emp, ns) + log.extend(emp_log) + elif any(k in redacted for k in ["employee_id", "name", "email", "salary"]): + redacted, log = self._redact_employee(redacted, ns) + + return redacted, log + + def _redact_employee(self, emp: Dict, ns: List[str]) -> Tuple[Dict, List[str]]: + out = emp.copy() + log: List[str] = [] + + if not any(s in ns for s in ["hr:pii", "pii", "hr-dlp-gateway/pii"]): + for f in ["email", "phone", "personal_phone", "emergency_contact"]: + if f in out: + out[f] = REDACTED + log.append(f"Redacted {f} (missing hr-dlp-gateway/pii)") + + if not any(s in ns for s in ["hr:address", "address", "hr-dlp-gateway/address"]): + for f in ["address", "home_address", "street", "city", "state", "zip_code"]: + if f in out: + out[f] = REDACTED + log.append(f"Redacted {f} (missing hr-dlp-gateway/address)") + + if not any(s in ns for s in ["hr:comp", "comp", "hr-dlp-gateway/comp"]): + for f in ["salary", "bonus", "stock_options", "pay_grade", "benefits_value", "total_compensation"]: + if f in out: + out[f] = REDACTED + log.append(f"Redacted {f} (missing hr-dlp-gateway/comp)") + if "compensation_history" in out and isinstance(out["compensation_history"], list): + out["compensation_history"] = [ + {**e, "salary": REDACTED, "bonus": REDACTED} + if isinstance(e, dict) else e + for e in out["compensation_history"] + ] + log.append("Redacted compensation_history (missing hr-dlp-gateway/comp)") + + return out, log + + def _error(self, message: str) -> Dict[str, Any]: + return { + "interceptorOutputVersion": "1.0", + "mcp": { + "transformedGatewayResponse": { + "statusCode": 500, + "body": {"jsonrpc": "2.0", "id": 1, + "error": {"code": -32603, "message": "Internal Error", "data": message}}, + } + }, + } + + +def lambda_handler(event: Dict[str, Any], context: Any) -> Dict[str, Any]: + interceptor = HRResponseInterceptor() + return interceptor.lambda_handler(event, context) diff --git a/02-use-cases/role-based-hr-data-agent/prerequisite/lambda/interceptors/tenant_mapping.py b/02-use-cases/role-based-hr-data-agent/prerequisite/lambda/interceptors/tenant_mapping.py new file mode 100644 index 000000000..6b8f139ec --- /dev/null +++ b/02-use-cases/role-based-hr-data-agent/prerequisite/lambda/interceptors/tenant_mapping.py @@ -0,0 +1,64 @@ +""" +Shared tenant mapping loader for request/response interceptors. + +Maps Cognito client_id → tenant context (tenantId, role, department, username). +Loaded once per Lambda container lifetime via lru_cache. + +Why this exists: Cognito V2_0 client_credentials tokens do not carry custom +claims, so tenant context must be derived from the client_id (JWT sub claim) +using this external mapping rather than from the token itself. + +Load order: + 1. Local file (CLIENT_TENANT_MAPPING_PATH env var, default config/client_tenant_mapping.json) + 2. SSM Parameter Store (/app/hrdlp/client-tenant-mapping) — populated by + scripts/cognito_credentials_provider.py create + 3. Empty fallback (all client_ids return "unknown" — safe deny-by-default) +""" + +import json +import os +from functools import lru_cache +from typing import Dict + +_SSM_PARAM = "/app/hrdlp/client-tenant-mapping" +_FALLBACK: Dict[str, Dict[str, str]] = {} + + +@lru_cache(maxsize=1) +def _load_mapping() -> Dict[str, Dict[str, str]]: + # 1. Try local file + path = os.getenv("CLIENT_TENANT_MAPPING_PATH", "config/client_tenant_mapping.json") + try: + with open(path, "r", encoding="utf-8") as f: + mapping = json.load(f) + print(f"[tenant-mapping] Loaded {len(mapping)} clients from {path}") + return mapping + except FileNotFoundError: + pass + except Exception as e: + print(f"[tenant-mapping] WARNING: Failed to load {path}: {e}") + + # 2. Fall back to SSM (populated by cognito_credentials_provider.py create) + try: + import boto3 + resp = boto3.client("ssm").get_parameter(Name=_SSM_PARAM, WithDecryption=False) + mapping = json.loads(resp["Parameter"]["Value"]) + print(f"[tenant-mapping] Loaded {len(mapping)} clients from SSM ({_SSM_PARAM})") + return mapping + except Exception as e: + print(f"[tenant-mapping] WARNING: SSM fallback failed: {e}, using empty mapping") + return _FALLBACK + + +def resolve_client_context(client_id: str) -> Dict[str, str]: + """Return tenant context for client_id; returns 'unknown' values if not found.""" + ctx = _load_mapping().get(client_id) + if not ctx: + print(f"[tenant-mapping] WARNING: Unknown client_id: {client_id}") + return {"tenantId": "unknown", "role": "unknown", "department": "unknown", "username": "unknown"} + return ctx + + +def reload_mapping() -> None: + """Force re-read of the mapping file (clears lru_cache).""" + _load_mapping.cache_clear() diff --git a/02-use-cases/role-based-hr-data-agent/prerequisite/lambda/python/__init__.py b/02-use-cases/role-based-hr-data-agent/prerequisite/lambda/python/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/02-use-cases/role-based-hr-data-agent/prerequisite/lambda/python/audit_logger.py b/02-use-cases/role-based-hr-data-agent/prerequisite/lambda/python/audit_logger.py new file mode 100644 index 000000000..8fc2ed2ab --- /dev/null +++ b/02-use-cases/role-based-hr-data-agent/prerequisite/lambda/python/audit_logger.py @@ -0,0 +1,188 @@ +""" +Audit Logger for the HR Data Provider Lambda. + +All HR data access attempts are logged to CloudWatch with structured JSON, +correlation IDs, and tenant context for compliance analysis. +""" + +import json +import logging +from datetime import datetime +from enum import Enum +from typing import Any, Dict, List, Optional + + +class AuditEventType(Enum): + LAMBDA_EXECUTION = "lambda_execution" + TOOL_INVOCATION = "tool_invocation" + DATA_ACCESS = "data_access" + ERROR_OCCURRED = "error_occurred" + VALIDATION_FAILED = "validation_failed" + TENANT_ACCESS_CHECK = "tenant_access_check" + + +class AuditLogger: + """Centralized audit logger for Lambda HR operations.""" + + def __init__(self, logger_name: str = "hr-lambda-audit"): + self.logger = logging.getLogger(logger_name) + self.logger.setLevel(logging.INFO) + if not self.logger.handlers: + handler = logging.StreamHandler() + handler.setFormatter(logging.Formatter("%(message)s")) + self.logger.addHandler(handler) + + def log_lambda_execution( + self, + correlation_id: str, + tool_name: str, + tenant_id: str, + function_name: str, + arguments: Optional[Dict[str, Any]] = None, + ) -> None: + self.logger.info( + json.dumps( + { + "event_type": AuditEventType.LAMBDA_EXECUTION.value, + "correlation_id": correlation_id, + "tool_name": tool_name, + "tenant_id": tenant_id, + "function_name": function_name, + "arguments": self._sanitize(arguments or {}), + "timestamp": datetime.utcnow().isoformat(), + } + ) + ) + + def log_tool_invocation( + self, + correlation_id: str, + tool_name: str, + tenant_id: str, + result_count: Optional[int] = None, + success: bool = True, + ) -> None: + self.logger.info( + json.dumps( + { + "event_type": AuditEventType.TOOL_INVOCATION.value, + "correlation_id": correlation_id, + "tool_name": tool_name, + "tenant_id": tenant_id, + "result_count": result_count, + "success": success, + "timestamp": datetime.utcnow().isoformat(), + } + ) + ) + + def log_data_access( + self, + correlation_id: str, + tenant_id: str, + employee_id: Optional[str] = None, + data_type: str = "employee_data", + access_granted: bool = True, + reason: Optional[str] = None, + ) -> None: + entry = json.dumps( + { + "event_type": AuditEventType.DATA_ACCESS.value, + "correlation_id": correlation_id, + "tenant_id": tenant_id, + "employee_id": employee_id, + "data_type": data_type, + "access_granted": access_granted, + "reason": reason, + "timestamp": datetime.utcnow().isoformat(), + } + ) + if access_granted: + self.logger.info(entry) + else: + self.logger.warning(entry) + + def log_tenant_access_check( + self, + correlation_id: str, + tenant_id: str, + employee_id: str, + access_granted: bool, + reason: Optional[str] = None, + ) -> None: + entry = json.dumps( + { + "event_type": AuditEventType.TENANT_ACCESS_CHECK.value, + "correlation_id": correlation_id, + "tenant_id": tenant_id, + "employee_id": employee_id, + "access_granted": access_granted, + "reason": reason or ("Access granted" if access_granted else "Tenant mismatch"), + "security_check": "tenant_isolation", + "timestamp": datetime.utcnow().isoformat(), + } + ) + if access_granted: + self.logger.info(entry) + else: + self.logger.warning(entry) + + def log_error( + self, + correlation_id: str, + error_message: str, + error_type: str, + tool_name: Optional[str] = None, + tenant_id: Optional[str] = None, + additional_context: Optional[Dict[str, Any]] = None, + ) -> None: + self.logger.error( + json.dumps( + { + "event_type": AuditEventType.ERROR_OCCURRED.value, + "correlation_id": correlation_id, + "error_message": error_message, + "error_type": error_type, + "tool_name": tool_name, + "tenant_id": tenant_id, + "additional_context": additional_context or {}, + "timestamp": datetime.utcnow().isoformat(), + } + ) + ) + + def _sanitize(self, arguments: Dict[str, Any]) -> Dict[str, Any]: + sanitized = arguments.copy() + for field in ["password", "token", "secret", "key", "credential", "ssn"]: + if field in sanitized: + sanitized[field] = "[REDACTED]" + for key, value in sanitized.items(): + if isinstance(value, str) and len(value) > 200: + sanitized[key] = value[:200] + "...[TRUNCATED]" + return sanitized + + +# Global instance + module-level convenience functions +audit_logger = AuditLogger() + + +def log_lambda_execution(correlation_id, tool_name, tenant_id, function_name, arguments=None): + audit_logger.log_lambda_execution(correlation_id, tool_name, tenant_id, function_name, arguments) + + +def log_tool_invocation(correlation_id, tool_name, tenant_id, result_count=None, success=True): + audit_logger.log_tool_invocation(correlation_id, tool_name, tenant_id, result_count, success) + + +def log_data_access(correlation_id, tenant_id, employee_id=None, data_type="employee_data", + access_granted=True, reason=None): + audit_logger.log_data_access(correlation_id, tenant_id, employee_id, data_type, access_granted, reason) + + +def log_tenant_access_check(correlation_id, tenant_id, employee_id, access_granted, reason=None): + audit_logger.log_tenant_access_check(correlation_id, tenant_id, employee_id, access_granted, reason) + + +def log_error(correlation_id, error_message, error_type, tool_name=None, tenant_id=None, + additional_context=None): + audit_logger.log_error(correlation_id, error_message, error_type, tool_name, tenant_id, additional_context) diff --git a/02-use-cases/role-based-hr-data-agent/prerequisite/lambda/python/dummy_data.py b/02-use-cases/role-based-hr-data-agent/prerequisite/lambda/python/dummy_data.py new file mode 100644 index 000000000..991f8b923 --- /dev/null +++ b/02-use-cases/role-based-hr-data-agent/prerequisite/lambda/python/dummy_data.py @@ -0,0 +1,127 @@ +""" +Deterministic dummy HR data generator. + +Uses MD5 hashing of tenant_id as a seed so every tenant always gets +the same synthetic employee records — enabling repeatable DLP demos. +""" + +import hashlib +import random +from datetime import datetime +from typing import Any, Dict, List, Optional + + +def get_dummy_employees(tenant_id: str) -> List[Dict[str, Any]]: + """Generate consistent synthetic employees for the given tenant.""" + seed = int(hashlib.md5(tenant_id.encode()).hexdigest()[:8], 16) + random.seed(seed) + + base = [ + ("John Smith", "Engineering", "Senior Developer", "john.smith@company.com"), + ("Sarah Johnson", "HR", "HR Manager", "sarah.johnson@company.com"), + ("Mike Davis", "Finance", "Financial Analyst", "mike.davis@company.com"), + ("Lisa Wilson", "Marketing", "Marketing Director", "lisa.wilson@company.com"), + ("David Brown", "Engineering", "DevOps Engineer", "david.brown@company.com"), + ("Emily Chen", "HR", "HR Specialist", "emily.chen@company.com"), + ("Robert Taylor", "Finance", "Controller", "robert.taylor@company.com"), + ("Jennifer Lee", "Marketing", "Content Manager", "jennifer.lee@company.com"), + ("Michael Rodriguez", "Engineering", "Software Architect", "michael.rodriguez@company.com"), + ("Amanda White", "HR", "Talent Acquisition", "amanda.white@company.com"), + ] + + employees = [] + for i, (name, dept, role, email) in enumerate(base): + emp_id = f"{tenant_id}-emp-{i + 1:03d}" + employees.append( + { + "employee_id": emp_id, + "name": name, + "department": dept, + "role": role, + "email": email, + # PII — redacted without hr-dlp-gateway/pii scope + "phone": f"555-{random.randint(100,999)}-{random.randint(1000,9999)}", + "personal_phone": f"555-{random.randint(100,999)}-{random.randint(1000,9999)}", + "emergency_contact": f"Emergency: 555-{random.randint(100,999)}-{random.randint(1000,9999)}", + # Address — redacted without hr-dlp-gateway/address scope + "address": f"{random.randint(100,9999)} {random.choice(['Main','Oak','Pine','Elm','Cedar'])} St", + "city": random.choice(["Seattle", "Portland", "San Francisco", "Austin", "Denver", "Boston"]), + "state": random.choice(["WA", "OR", "CA", "TX", "CO", "MA"]), + "zip_code": str(random.randint(10000, 99999)), + # Employment + "hire_date": f"20{random.randint(18,23)}-{random.randint(1,12):02d}-{random.randint(1,28):02d}", + "manager": "Jane Manager" if i > 0 else None, + "status": "Active", + # Compensation — redacted without hr-dlp-gateway/comp scope + "salary": random.randint(60000, 150000), + "bonus": random.randint(5000, 25000), + "stock_options": random.randint(0, 10000), + "pay_grade": random.choice(["L3", "L4", "L5", "L6", "L7"]), + "benefits_value": random.randint(15000, 30000), + "compensation_history": [ + { + "year": 2023, + "salary": random.randint(55000, 140000), + "bonus": random.randint(3000, 20000), + "promotion": random.choice([True, False]), + }, + { + "year": 2022, + "salary": random.randint(50000, 130000), + "bonus": random.randint(2000, 18000), + "promotion": random.choice([True, False]), + }, + ], + } + ) + return employees + + +def get_employee_by_id(employee_id: str, tenant_id: str) -> Optional[Dict[str, Any]]: + employees = get_dummy_employees(tenant_id) + return next((e for e in employees if e["employee_id"] == employee_id), None) + + +def search_employees_by_query(query: str, tenant_id: str, max_results: int = 10) -> List[Dict[str, Any]]: + employees = get_dummy_employees(tenant_id) + if not query: + return employees[:max_results] + q = query.lower() + return [ + e for e in employees + if q in e["name"].lower() + or q in e["department"].lower() + or q in e["role"].lower() + or q in e["email"].lower() + ][:max_results] + + +def get_employee_compensation_data(employee_id: str, tenant_id: str) -> Optional[Dict[str, Any]]: + emp = get_employee_by_id(employee_id, tenant_id) + if not emp: + return None + total = emp["salary"] + emp["bonus"] + emp["benefits_value"] + return { + "employee_id": emp["employee_id"], + "name": emp["name"], + "department": emp["department"], + "role": emp["role"], + "salary": emp["salary"], + "bonus": emp["bonus"], + "stock_options": emp["stock_options"], + "pay_grade": emp["pay_grade"], + "benefits_value": emp["benefits_value"], + "total_compensation": total, + "compensation_history": emp["compensation_history"], + "last_review_date": f"2023-{random.randint(1,12):02d}-{random.randint(1,28):02d}", + "next_review_date": f"2024-{random.randint(1,12):02d}-{random.randint(1,28):02d}", + "performance_rating": random.choice(["Exceeds Expectations", "Meets Expectations", "Outstanding"]), + "_data_classification": "HIGHLY_SENSITIVE_COMPENSATION_DATA", + "_requires_scope": "hr-dlp-gateway/comp", + } + + +def validate_tenant_access(employee_id: str, tenant_id: str) -> bool: + if not employee_id.startswith(f"{tenant_id}-emp-"): + return False + return any(e["employee_id"] == employee_id for e in get_dummy_employees(tenant_id)) diff --git a/02-use-cases/role-based-hr-data-agent/prerequisite/lambda/python/hr_handlers.py b/02-use-cases/role-based-hr-data-agent/prerequisite/lambda/python/hr_handlers.py new file mode 100644 index 000000000..bae8ddc75 --- /dev/null +++ b/02-use-cases/role-based-hr-data-agent/prerequisite/lambda/python/hr_handlers.py @@ -0,0 +1,207 @@ +""" +HR tool handlers: search_employee, get_employee_profile, get_employee_compensation. + +The Lambda returns all data; field-level DLP redaction is applied downstream +by the Gateway Response Interceptor based on the caller's OAuth scopes. +""" + +from typing import Any, Dict, List + +from dummy_data import ( + get_employee_by_id, + get_employee_compensation_data, + search_employees_by_query, + validate_tenant_access, +) +from audit_logger import ( + log_data_access, + log_tenant_access_check, + log_tool_invocation, +) + + +def inject_correlation_id(arguments: Dict[str, Any], correlation_id: str) -> Dict[str, Any]: + out = arguments.copy() + out["_correlation_id"] = correlation_id + return out + + +# --------------------------------------------------------------------------- +# search_employee +# --------------------------------------------------------------------------- + +def handle_search_employee(arguments: Dict[str, Any]) -> Dict[str, Any]: + query = arguments.get("query", "").strip() + tenant_id = arguments.get("tenantId", "") + correlation_id = arguments.get("_correlation_id", "unknown") + + if not tenant_id: + return {"error": "Missing required parameter: tenantId", "error_code": "MISSING_TENANT_ID"} + if not query: + return {"error": "Missing required parameter: query", "error_code": "MISSING_QUERY"} + + try: + employees = search_employees_by_query(query, tenant_id, max_results=10) + log_data_access(correlation_id, tenant_id, data_type="employee_search", access_granted=True, + reason=f"Search query: {query}") + log_tool_invocation(correlation_id, "search_employee", tenant_id, result_count=len(employees), success=True) + return { + "employees": employees, + "total_count": len(employees), + "query": query, + "tenant_id": tenant_id, + } + except Exception as e: + log_tool_invocation(correlation_id, "search_employee", tenant_id, success=False) + return {"error": f"Employee search failed: {str(e)}", "error_code": "SEARCH_FAILED"} + + +# --------------------------------------------------------------------------- +# get_employee_profile +# --------------------------------------------------------------------------- + +def handle_get_employee_profile(arguments: Dict[str, Any]) -> Dict[str, Any]: + employee_id = arguments.get("employeeId", "").strip() + tenant_id = arguments.get("tenantId", "") + correlation_id = arguments.get("_correlation_id", "unknown") + + if not tenant_id: + return {"error": "Missing required parameter: tenantId", "error_code": "MISSING_TENANT_ID"} + if not employee_id: + return {"error": "Missing required parameter: employeeId", "error_code": "MISSING_EMPLOYEE_ID"} + + access_ok = validate_tenant_access(employee_id, tenant_id) + log_tenant_access_check(correlation_id, tenant_id, employee_id, access_ok) + if not access_ok: + return {"error": "Employee not found or access denied", "error_code": "EMPLOYEE_NOT_FOUND", + "employee_id": employee_id, "tenant_id": tenant_id} + + try: + emp = get_employee_by_id(employee_id, tenant_id) + if not emp: + return {"error": "Employee not found", "error_code": "EMPLOYEE_NOT_FOUND", + "employee_id": employee_id, "tenant_id": tenant_id} + + log_data_access(correlation_id, tenant_id, employee_id, "employee_profile", True) + log_tool_invocation(correlation_id, "get_employee_profile", tenant_id, result_count=1, success=True) + + return { + "employee_id": emp["employee_id"], + "name": emp["name"], + "department": emp["department"], + "role": emp["role"], + "hire_date": emp["hire_date"], + "manager": emp["manager"], + "status": emp["status"], + # PII — redacted by Response Interceptor without hr-dlp-gateway/pii scope + "email": emp["email"], + "phone": emp["phone"], + "personal_phone": emp["personal_phone"], + "emergency_contact": emp["emergency_contact"], + # Address — redacted without hr-dlp-gateway/address scope + "address": emp["address"], + "city": emp["city"], + "state": emp["state"], + "zip_code": emp["zip_code"], + # Compensation hint — redacted without hr-dlp-gateway/comp scope + "pay_grade": emp["pay_grade"], + } + except Exception as e: + log_tool_invocation(correlation_id, "get_employee_profile", tenant_id, success=False) + return {"error": f"Profile retrieval failed: {str(e)}", "error_code": "PROFILE_RETRIEVAL_FAILED"} + + +# --------------------------------------------------------------------------- +# get_employee_compensation +# --------------------------------------------------------------------------- + +def handle_get_employee_compensation(arguments: Dict[str, Any]) -> Dict[str, Any]: + employee_id = arguments.get("employeeId", "").strip() + tenant_id = arguments.get("tenantId", "") + correlation_id = arguments.get("_correlation_id", "unknown") + + if not tenant_id: + return {"error": "Missing required parameter: tenantId", "error_code": "MISSING_TENANT_ID"} + if not employee_id: + return {"error": "Missing required parameter: employeeId", "error_code": "MISSING_EMPLOYEE_ID"} + + access_ok = validate_tenant_access(employee_id, tenant_id) + log_tenant_access_check(correlation_id, tenant_id, employee_id, access_ok) + if not access_ok: + return {"error": "Employee not found or access denied", "error_code": "EMPLOYEE_NOT_FOUND", + "employee_id": employee_id, "tenant_id": tenant_id} + + try: + comp = get_employee_compensation_data(employee_id, tenant_id) + if not comp: + return {"error": "Employee not found", "error_code": "EMPLOYEE_NOT_FOUND"} + + log_data_access(correlation_id, tenant_id, employee_id, "employee_compensation", True) + log_tool_invocation(correlation_id, "get_employee_compensation", tenant_id, result_count=1, success=True) + return comp + except Exception as e: + log_tool_invocation(correlation_id, "get_employee_compensation", tenant_id, success=False) + return {"error": f"Compensation retrieval failed: {str(e)}", "error_code": "COMPENSATION_RETRIEVAL_FAILED"} + + +# --------------------------------------------------------------------------- +# Schema + validation helpers +# --------------------------------------------------------------------------- + +def get_available_tools() -> List[Dict[str, Any]]: + return [ + { + "name": "hr-lambda-target___search_employee", + "description": "Search for employees by name, department, or role", + "inputSchema": { + "type": "object", + "properties": { + "tenantId": {"type": "string"}, + "query": {"type": "string"}, + }, + "required": ["query"], + }, + }, + { + "name": "hr-lambda-target___get_employee_profile", + "description": "Get detailed employee profile information", + "inputSchema": { + "type": "object", + "properties": { + "tenantId": {"type": "string"}, + "employeeId": {"type": "string"}, + }, + "required": ["employeeId"], + }, + }, + { + "name": "hr-lambda-target___get_employee_compensation", + "description": "Get employee compensation data (requires hr-dlp-gateway/comp scope)", + "inputSchema": { + "type": "object", + "properties": { + "tenantId": {"type": "string"}, + "employeeId": {"type": "string"}, + }, + "required": ["employeeId"], + }, + }, + ] + + +def validate_tool_arguments(tool_name: str, arguments: Dict[str, Any]) -> Dict[str, Any]: + tools = get_available_tools() + tool_def = next((t for t in tools if t["name"] == tool_name), None) + if not tool_def: + # Unknown tool name — pass through for base-tool routing + return {"valid": True, "message": "Unknown tool name, routing by base name"} + + required = tool_def["inputSchema"].get("required", []) + missing = [f for f in required if not arguments.get(f)] + if missing: + return { + "valid": False, + "error": f"Missing required fields: {', '.join(missing)}", + "error_code": "MISSING_REQUIRED_FIELDS", + } + return {"valid": True, "message": "Arguments are valid"} diff --git a/02-use-cases/role-based-hr-data-agent/prerequisite/lambda/python/lambda_handler.py b/02-use-cases/role-based-hr-data-agent/prerequisite/lambda/python/lambda_handler.py new file mode 100644 index 000000000..d401eaa7e --- /dev/null +++ b/02-use-cases/role-based-hr-data-agent/prerequisite/lambda/python/lambda_handler.py @@ -0,0 +1,144 @@ +""" +HR Data Provider Lambda — main entry point. + +Routes AgentCore Gateway tool calls to the appropriate HR handler. +All data is returned unredacted; the Gateway Response Interceptor +applies field-level DLP based on the caller's OAuth scopes. +""" + +import json +import logging +import uuid +from datetime import datetime +from typing import Any, Dict, Optional + +from audit_logger import log_error, log_lambda_execution +from hr_handlers import ( + handle_get_employee_compensation, + handle_get_employee_profile, + handle_search_employee, + inject_correlation_id, + validate_tool_arguments, +) + +logger = logging.getLogger() +logger.setLevel(logging.INFO) + + +def lambda_handler(event: Dict[str, Any], context: Any) -> Dict[str, Any]: + correlation_id = _extract_correlation_id(event, context) + + try: + logger.info(f"Event: {json.dumps(event)}") + + tool_name = _extract_tool_name(event, context) + arguments = _extract_arguments(event, tool_name) + tenant_id = arguments.get("tenantId", "") + + log_lambda_execution( + correlation_id=correlation_id, + tool_name=tool_name, + tenant_id=tenant_id, + function_name=getattr(context, "function_name", "hr-data-provider"), + arguments=arguments, + ) + + if not tool_name: + return _error(correlation_id, "Missing tool_name") + + # Strip target prefix: "hr-lambda-target___search_employee" → "search_employee" + base_name = tool_name.split("___")[-1] if "___" in tool_name else tool_name.split("__")[-1] if "__" in tool_name else tool_name + + arguments_with_cid = inject_correlation_id(arguments, correlation_id) + + validation = validate_tool_arguments(tool_name, arguments_with_cid) + if not validation["valid"]: + return _error(correlation_id, validation["error"], validation.get("error_code", "VALIDATION_ERROR")) + + if base_name == "search_employee": + result = handle_search_employee(arguments_with_cid) + elif base_name == "get_employee_profile": + result = handle_get_employee_profile(arguments_with_cid) + elif base_name == "get_employee_compensation": + result = handle_get_employee_compensation(arguments_with_cid) + else: + return _error(correlation_id, f"Unknown tool: {tool_name}") + + if "error" in result: + status = 403 if result.get("error_code") == "EMPLOYEE_NOT_FOUND" else 400 + return _error(correlation_id, result["error"], result.get("error_code", "HANDLER_ERROR"), status) + + result["_metadata"] = { + "data_type": "DUMMY_DEMONSTRATION_DATA", + "correlation_id": correlation_id, + "tenant_id": tenant_id, + "tool_name": tool_name, + "timestamp": datetime.utcnow().isoformat(), + } + + return { + "statusCode": 200, + "headers": {"Content-Type": "application/json", "X-Correlation-ID": correlation_id}, + "body": json.dumps(result), + } + + except Exception as e: + log_error(correlation_id, str(e), type(e).__name__, additional_context={"event_keys": list(event.keys())}) + return _error(correlation_id, f"Lambda execution error: {str(e)}") + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _extract_tool_name(event: Dict[str, Any], context: Any) -> str: + # 1. AgentCore: context.client_context.custom + if context and hasattr(context, "client_context") and context.client_context: + custom = getattr(context.client_context, "custom", {}) or {} + if custom.get("bedrockAgentCoreToolName"): + return custom["bedrockAgentCoreToolName"] + # 2. event.params.name (JSON-RPC style) + if "params" in event and "name" in event.get("params", {}): + return event["params"]["name"] + # 3. explicit fields + return event.get("tool_name") or event.get("tool", "") + + +def _extract_arguments(event: Dict[str, Any], tool_name: str) -> Dict[str, Any]: + if "params" in event and "arguments" in event.get("params", {}): + return event["params"]["arguments"] + if tool_name and not any(k in event for k in ["params", "tool_name", "tool", "arguments"]): + return event # event IS the arguments (AgentCore Gateway format) + return event.get("arguments", {}) + + +def _extract_correlation_id(event: Dict[str, Any], context: Any) -> str: + for path in [["context", "correlation_id"], ["metadata", "correlation_id"]]: + obj = event + for key in path: + if isinstance(obj, dict): + obj = obj.get(key) + if isinstance(obj, str): + return obj + headers = event.get("headers", {}) + cid = headers.get("X-Correlation-ID") or headers.get("x-correlation-id") + return cid or str(uuid.uuid4()) + + +def _error( + correlation_id: str, + message: str, + error_code: str = "BAD_REQUEST", + status_code: int = 400, +) -> Dict[str, Any]: + body = { + "error": message, + "error_code": error_code, + "correlation_id": correlation_id, + "timestamp": datetime.utcnow().isoformat(), + } + return { + "statusCode": status_code, + "headers": {"Content-Type": "application/json", "X-Correlation-ID": correlation_id}, + "body": json.dumps(body), + } diff --git a/02-use-cases/role-based-hr-data-agent/prerequisite/policies/dlp_redaction_rules.txt b/02-use-cases/role-based-hr-data-agent/prerequisite/policies/dlp_redaction_rules.txt new file mode 100644 index 000000000..f8a9ee05c --- /dev/null +++ b/02-use-cases/role-based-hr-data-agent/prerequisite/policies/dlp_redaction_rules.txt @@ -0,0 +1,68 @@ +DLP REDACTION RULES +=================== + +This document defines the field-level redaction rules applied by the AgentCore +Gateway Response Interceptor to HR data tool responses. + +1. PII FIELDS (requires scope: hr-dlp-gateway/pii) +----------------------------------------------------- +Fields redacted when hr-dlp-gateway/pii scope is absent: + + email Work email address + phone Work phone number + personal_phone Personal mobile phone + emergency_contact Emergency contact name and number + +Redaction marker: "[REDACTED - Insufficient Permissions]" + +2. ADDRESS FIELDS (requires scope: hr-dlp-gateway/address) +------------------------------------------------------------ +Fields redacted when hr-dlp-gateway/address scope is absent: + + address Street address + city City + state State/Province + zip_code Postal code + +Redaction marker: "[REDACTED - Insufficient Permissions]" + +3. COMPENSATION FIELDS (requires scope: hr-dlp-gateway/comp) +-------------------------------------------------------------- +Fields redacted when hr-dlp-gateway/comp scope is absent: + + salary Annual base salary + bonus Annual bonus + stock_options Stock option grant + pay_grade Pay grade / level + benefits_value Total benefits value + total_compensation Total compensation (sum) + compensation_history[] Historical salary and bonus records + .salary — individual record salary + .bonus — individual record bonus + +Redaction marker: "[REDACTED - Insufficient Permissions]" + +4. TOOL DISCOVERY FILTERING +---------------------------- +The compensation tool is hidden entirely from tools/list responses for users +without hr-dlp-gateway/comp scope. Users without hr-dlp-gateway/read scope +see no tools at all. + +5. REDACTION IMPLEMENTATION +----------------------------- +Redaction is applied in the Response Interceptor Lambda +(prerequisite/lambda/interceptors/response_interceptor.py). + +The Lambda receives the raw Lambda HR Provider response, applies scope-based +redaction in-place, and returns the modified response to the Gateway. + +All redaction actions are logged to CloudWatch with correlation IDs. + +6. SCOPE NORMALIZATION +----------------------- +The following scope formats are treated as equivalent: + + hr-dlp-gateway/pii == hr:pii == pii + hr-dlp-gateway/comp == hr:comp == comp + hr-dlp-gateway/read == hr:read == read + hr-dlp-gateway/address == hr:address == address diff --git a/02-use-cases/role-based-hr-data-agent/prerequisite/policies/hr_data_governance.txt b/02-use-cases/role-based-hr-data-agent/prerequisite/policies/hr_data_governance.txt new file mode 100644 index 000000000..482d4aca7 --- /dev/null +++ b/02-use-cases/role-based-hr-data-agent/prerequisite/policies/hr_data_governance.txt @@ -0,0 +1,74 @@ +HR DATA GOVERNANCE POLICY +========================= + +1. DATA CLASSIFICATION +---------------------- +HR data is classified into four sensitivity tiers: + + PUBLIC — Employee name, department, job title + INTERNAL — Work email, work phone, hire date, reporting manager + CONFIDENTIAL — Home address, personal phone, emergency contacts (PII) + RESTRICTED — Salary, bonus, stock options, pay grade, compensation history + +2. ACCESS CONTROL +----------------- +Access to HR data is governed by OAuth 2.0 scopes issued via Amazon Cognito: + + hr-dlp-gateway/read — PUBLIC + INTERNAL data + hr-dlp-gateway/pii — CONFIDENTIAL personal contact data + hr-dlp-gateway/address — CONFIDENTIAL address data + hr-dlp-gateway/comp — RESTRICTED compensation data + +Role-to-scope mapping: + + HR Manager : read, pii, address, comp (full access) + HR Specialist : read, pii (no compensation, no address) + Employee : read (search only; all sensitive data redacted) + Admin : read, pii, address, comp (full access) + +3. DATA LOSS PREVENTION (DLP) ENFORCEMENT +------------------------------------------ +Field-level redaction is applied at the AgentCore Gateway layer by the +Response Interceptor Lambda. Redacted fields display: + "[REDACTED - Insufficient Permissions]" + +Redaction is applied AFTER Cedar policy authorization and BEFORE the +response reaches the calling agent or end user. + +4. TENANT ISOLATION +------------------- +All HR data is scoped to a tenant identifier (tenantId). The Request +Interceptor Lambda injects the correct tenantId from the OAuth token's +client_id mapping, preventing any cross-tenant data access. + +5. AUDIT LOGGING +---------------- +All data access events are logged to Amazon CloudWatch Logs with: + - Correlation ID (end-to-end request tracing) + - Tenant ID + - OAuth scopes of the requesting party + - Fields redacted and reason + - Timestamp (UTC) + +Log groups: + /aws/lambda/hr-data-provider-lambda + /aws/lambda/hr-request-interceptor-lambda + /aws/lambda/hr-response-interceptor-lambda + +6. INCIDENT RESPONSE +-------------------- +Potential cross-tenant access attempts are logged as SECURITY_ALERT events +in CloudWatch. The Request Interceptor overrides mismatched tenantId values +and logs a POTENTIAL_CROSS_TENANT_ACCESS warning for investigation. + +7. DATA RETENTION +----------------- + - CloudWatch audit logs: 90 days + - HR data (synthetic demo): no persistence — all data generated deterministically + - OAuth tokens: 60-minute access token, 30-day refresh token + +8. COMPLIANCE NOTE +------------------ +This system uses synthetic (dummy) data for demonstration purposes only. +No real employee PII is stored or processed. In a production deployment, +ensure GDPR, CCPA, and applicable HR privacy regulations are met. diff --git a/02-use-cases/role-based-hr-data-agent/prerequisite/prereqs_config.yaml b/02-use-cases/role-based-hr-data-agent/prerequisite/prereqs_config.yaml new file mode 100644 index 000000000..47a77d2c6 --- /dev/null +++ b/02-use-cases/role-based-hr-data-agent/prerequisite/prereqs_config.yaml @@ -0,0 +1,53 @@ +# Prerequisites configuration for role-based-hr-data-agent +# Used by scripts/prereq.sh and scripts/agentcore_gateway.py + +aws: + region: us-east-1 + account_id: "" # Set via AWS CLI / environment + +lambda: + function_name: hr-data-provider-lambda + runtime: python3.11 + timeout: 30 + memory: 256 + handler: lambda_handler.lambda_handler + s3_bucket: "" # Set by prereq.sh + s3_key: hr-data-provider/deployment.zip + +interceptors: + request: + function_name: hr-request-interceptor-lambda + handler: request_interceptor.lambda_handler + response: + function_name: hr-response-interceptor-lambda + handler: response_interceptor.lambda_handler + +cognito: + user_pool_name: hr-dlp-user-pool + web_client_name: hr-dlp-web-client + machine_client_name: hr-dlp-machine-client + resource_server_identifier: hr-dlp-gateway + scopes: + - name: read + description: Basic employee search access + - name: pii + description: Access to PII fields (email, phone) + - name: address + description: Access to address fields + - name: comp + description: Access to compensation data + +gateway: + name: hr-data-agent-gateway + target_name: hr-lambda-target + +ssm_parameters: + gateway_url: /app/hrdlp/gateway-url + gateway_id: /app/hrdlp/gateway-id + runtime_id: /app/hrdlp/runtime-id + cognito_user_pool_id: /app/hrdlp/cognito-user-pool-id + cognito_client_id: /app/hrdlp/cognito-client-id + cognito_client_secret: /app/hrdlp/cognito-client-secret + cognito_token_url: /app/hrdlp/cognito-token-url + lambda_arn: /app/hrdlp/lambda-arn + cedar_policy_engine_arn: /app/hrdlp/cedar-policy-engine-arn diff --git a/02-use-cases/role-based-hr-data-agent/pyproject.toml b/02-use-cases/role-based-hr-data-agent/pyproject.toml new file mode 100644 index 000000000..ae4ef1310 --- /dev/null +++ b/02-use-cases/role-based-hr-data-agent/pyproject.toml @@ -0,0 +1,31 @@ +[project] +name = "role-based-hr-data-agent" +version = "0.1.0" +description = "Role-based HR data access agent with DLP enforcement via Amazon Bedrock AgentCore" +requires-python = ">=3.10" +dependencies = [ + "bedrock-agentcore>=0.1.0", + "bedrock-agentcore-starter-toolkit>=0.1.0", + "strands-agents>=0.1.0", + "strands-agents-tools>=0.1.0", + "boto3>=1.34.0", + "fastmcp>=0.1.0", + "httpx>=0.27.0", + "PyJWT>=2.8.0", + "pydantic>=2.5.0", + "PyYAML>=6.0.1", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.black] +line-length = 120 + +[tool.isort] +profile = "black" +line_length = 120 + +[tool.flake8] +max-line-length = 120 diff --git a/02-use-cases/role-based-hr-data-agent/requirements.txt b/02-use-cases/role-based-hr-data-agent/requirements.txt new file mode 100644 index 000000000..5d3fe2117 --- /dev/null +++ b/02-use-cases/role-based-hr-data-agent/requirements.txt @@ -0,0 +1,7 @@ +# AgentCore Runtime base environment already includes boto3/botocore. +# Only include packages not in the base environment. +bedrock-agentcore +strands-agents +httpx +requests +PyYAML diff --git a/02-use-cases/role-based-hr-data-agent/scripts/__init__.py b/02-use-cases/role-based-hr-data-agent/scripts/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/02-use-cases/role-based-hr-data-agent/scripts/agentcore_agent_runtime.py b/02-use-cases/role-based-hr-data-agent/scripts/agentcore_agent_runtime.py new file mode 100644 index 000000000..fca2f46de --- /dev/null +++ b/02-use-cases/role-based-hr-data-agent/scripts/agentcore_agent_runtime.py @@ -0,0 +1,190 @@ +#!/usr/bin/env python3 +import os, sys +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +""" +CLI for managing the AgentCore Runtime for role-based-hr-data-agent. + +Usage: + python scripts/agentcore_agent_runtime.py create --s3-bucket + python scripts/agentcore_agent_runtime.py delete --runtime-id + +The create command reads infrastructure config from SSM (populated by prereq.sh +and agentcore_gateway.py) so no manual ARN/URL entry is needed. +""" + +import json +import sys +import time + +import boto3 +import click + +from scripts.utils import get_ssm_parameter, put_ssm_parameter + +RUNTIME_NAME = "hr_data_agent_runtime" +ENTRY_POINT = "main.py" +RUNTIME_LANG = "PYTHON_3_11" +S3_KEY = "hr-data-agent/runtime.zip" + + +@click.group() +def cli(): + """Manage the AgentCore Runtime for the HR Data Agent.""" + + +@cli.command() +@click.option("--s3-bucket", default=None, + help="S3 bucket holding runtime.zip (reads /app/hrdlp/deploy-bucket from SSM if omitted)") +@click.option("--name", default=RUNTIME_NAME, show_default=True, + help="AgentCore Runtime name") +@click.option("--region", default="us-east-1", show_default=True) +def create(s3_bucket: str, name: str, region: str): + """Create the AgentCore Runtime and store its ID + URL in SSM. + + Prerequisites (all populated by prereq.sh + agentcore_gateway.py): + - /app/hrdlp/runtime-role-arn — Runtime execution role + - /app/hrdlp/gateway-url — Gateway MCP endpoint + - /app/hrdlp/cognito-user-pool-id — Cognito User Pool for JWT authorizer + - /app/hrdlp/deploy-bucket — S3 bucket (or pass --s3-bucket) + + Upload the runtime package first: + bash scripts/prereq.sh # builds hr-data-agent/runtime.zip + OR: aws s3 cp dist/runtime.zip s3:///hr-data-agent/runtime.zip + """ + # Resolve S3 bucket + bucket = s3_bucket or get_ssm_parameter("/app/hrdlp/deploy-bucket") + if not bucket: + click.echo( + "ERROR: --s3-bucket not provided and /app/hrdlp/deploy-bucket not in SSM.\n" + "Run prereq.sh first or pass --s3-bucket .", + err=True, + ) + sys.exit(1) + + # Read required SSM parameters + role_arn = get_ssm_parameter("/app/hrdlp/runtime-role-arn") + gateway_url = get_ssm_parameter("/app/hrdlp/gateway-url") + user_pool_id = get_ssm_parameter("/app/hrdlp/cognito-user-pool-id") + + missing = [k for k, v in { + "/app/hrdlp/runtime-role-arn": role_arn, + "/app/hrdlp/gateway-url": gateway_url, + "/app/hrdlp/cognito-user-pool-id": user_pool_id, + }.items() if not v] + if missing: + click.echo(f"ERROR: Missing SSM parameters: {missing}\nRun prereq.sh and agentcore_gateway.py first.", err=True) + sys.exit(1) + + # Collect persona client IDs for JWT authorizer allowedClients + persona_client_ids = [ + cid for cid in [ + get_ssm_parameter(f"/app/hrdlp/personas/{p}/client-id") + for p in ["hr-manager", "hr-specialist", "employee", "admin"] + ] if cid + ] + if not persona_client_ids: + click.echo("ERROR: No persona client IDs found in SSM. Run cognito_credentials_provider.py create first.", err=True) + sys.exit(1) + + # Build JWT authorizer from Cognito User Pool + discovery_url = ( + f"https://cognito-idp.{region}.amazonaws.com/{user_pool_id}" + "/.well-known/openid-configuration" + ) + + client = boto3.client("bedrock-agentcore-control", region_name=region) + click.echo(f"Creating AgentCore Runtime: {name}") + click.echo(f" S3 artifact : s3://{bucket}/{S3_KEY}") + click.echo(f" Role : {role_arn}") + click.echo(f" Gateway URL : {gateway_url}") + + try: + resp = client.create_agent_runtime( + agentRuntimeName=name, + description="Role-based HR data agent with field-level DLP via AgentCore Gateway", + agentRuntimeArtifact={ + "codeConfiguration": { + "code": {"s3": {"bucket": bucket, "prefix": S3_KEY}}, + "runtime": RUNTIME_LANG, + "entryPoint": [ENTRY_POINT], + } + }, + roleArn=role_arn, + networkConfiguration={"networkMode": "PUBLIC"}, + environmentVariables={"GATEWAY_URL": gateway_url, "AWS_DEFAULT_REGION": region}, + authorizerConfiguration={ + "customJWTAuthorizer": { + "discoveryUrl": discovery_url, + "allowedClients": persona_client_ids, + } + }, + requestHeaderConfiguration={ + "requestHeaderAllowlist": ["Authorization"] + }, + ) + + runtime_id = resp["agentRuntimeId"] + click.echo(f"Runtime created: {runtime_id}") + + # Wait for READY + endpoint_url = _wait_for_runtime(client, runtime_id) + + # Build ARN-based invocation URL — required when runtime name contains underscores, + # since DNS hostnames don't allow underscores. + import urllib.parse + account_id = boto3.client("sts").get_caller_identity()["Account"] + runtime_arn = f"arn:aws:bedrock-agentcore:{region}:{account_id}:runtime/{runtime_id}" + encoded_arn = urllib.parse.quote(runtime_arn, safe="") + endpoint_url = f"https://bedrock-agentcore.{region}.amazonaws.com/runtimes/{encoded_arn}/invocations" + + # Persist to SSM + put_ssm_parameter("/app/hrdlp/runtime-id", runtime_id) + put_ssm_parameter("/app/hrdlp/runtime-url", endpoint_url) + + click.echo(f"\nRuntime ID : {runtime_id}") + click.echo(f"Endpoint URL: {endpoint_url}") + click.echo("SSM parameters updated.") + + except Exception as e: + click.echo(f"ERROR: {e}", err=True) + sys.exit(1) + + +@cli.command() +@click.option("--runtime-id", default=None, + help="Runtime ID (reads from SSM /app/hrdlp/runtime-id if omitted)") +@click.option("--region", default="us-east-1", show_default=True) +def delete(runtime_id: str, region: str): + """Delete the AgentCore Runtime.""" + runtime_id = runtime_id or get_ssm_parameter("/app/hrdlp/runtime-id") + if not runtime_id: + click.echo("ERROR: runtime-id not provided and not found in SSM.", err=True) + sys.exit(1) + + client = boto3.client("bedrock-agentcore-control", region_name=region) + click.echo(f"Deleting runtime: {runtime_id}") + try: + client.delete_agent_runtime(agentRuntimeId=runtime_id) + click.echo("Runtime deleted.") + except Exception as e: + click.echo(f"ERROR: {e}", err=True) + sys.exit(1) + + +def _wait_for_runtime(client, runtime_id: str, max_attempts: int = 30) -> str: + """Poll until runtime is READY; return endpoint URL.""" + for _ in range(max_attempts): + time.sleep(10) + resp = client.get_agent_runtime(agentRuntimeId=runtime_id) + status = resp.get("status") + endpoint = resp.get("agentRuntimeEndpoint", "") + click.echo(f" Runtime status: {status}") + if status == "READY": + return endpoint + if status in ("FAILED", "DELETING"): + raise RuntimeError(f"Runtime reached terminal status: {status}") + raise TimeoutError("Timed out waiting for Runtime to become READY") + + +if __name__ == "__main__": + cli() diff --git a/02-use-cases/role-based-hr-data-agent/scripts/agentcore_gateway.py b/02-use-cases/role-based-hr-data-agent/scripts/agentcore_gateway.py new file mode 100644 index 000000000..88ea22344 --- /dev/null +++ b/02-use-cases/role-based-hr-data-agent/scripts/agentcore_gateway.py @@ -0,0 +1,201 @@ +#!/usr/bin/env python3 +import os, sys +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +""" +CLI for creating and deleting the AgentCore Gateway for role-based-hr-data-agent. + +Usage: + python scripts/agentcore_gateway.py create --config prerequisite/prereqs_config.yaml + python scripts/agentcore_gateway.py delete --gateway-id +""" + +import json +import sys +import time + +import boto3 +import click + +from scripts.utils import get_ssm_parameter, put_ssm_parameter, read_config + + +@click.group() +def cli(): + """Manage the AgentCore Gateway for the HR Data Agent.""" + + +@cli.command() +@click.option("--config", default="prerequisite/prereqs_config.yaml", show_default=True, + help="Path to prereqs_config.yaml") +@click.option("--region", default=None, help="AWS region (overrides config)") +def create(config: str, region: str): + """Create the AgentCore MCP Gateway with Lambda target and interceptors.""" + cfg = read_config(config) + aws_cfg = cfg.get("aws", {}) + gw_cfg = cfg.get("gateway", {}) + region = region or aws_cfg.get("region", "us-east-1") + + lambda_arn = get_ssm_parameter(cfg["ssm_parameters"]["lambda_arn"]) + gateway_role_arn = get_ssm_parameter("/app/hrdlp/gateway-role-arn") + request_interceptor_arn = get_ssm_parameter("/app/hrdlp/request-interceptor-arn") + response_interceptor_arn = get_ssm_parameter("/app/hrdlp/response-interceptor-arn") + user_pool_id = get_ssm_parameter(cfg["ssm_parameters"]["cognito_user_pool_id"]) + + # Collect all persona client IDs for the JWT authorizer allowedAudience + persona_client_ids = [ + cid for cid in [ + get_ssm_parameter(f"/app/hrdlp/personas/{p}/client-id") + for p in ["hr-manager", "hr-specialist", "employee", "admin"] + ] if cid + ] + + if not all([lambda_arn, gateway_role_arn, user_pool_id]): + click.echo("ERROR: Required SSM parameters missing. Run prereq.sh first.", err=True) + sys.exit(1) + + if not persona_client_ids: + click.echo("ERROR: No persona client IDs found in SSM. Run cognito_credentials_provider.py create first.", err=True) + sys.exit(1) + + client = boto3.client("bedrock-agentcore-control", region_name=region) + gateway_name = gw_cfg.get("name", "hr-data-agent-gateway") + target_name = gw_cfg.get("target_name", "hr-lambda-target") + + # Read tool schemas from api_spec.json + api_spec = read_config("prerequisite/lambda/api_spec.json") + inline_payload = [ + { + "name": op["operationId"], + "description": op.get("summary", ""), + "inputSchema": list(op["requestBody"]["content"].values())[0]["schema"], + } + for path_item in api_spec["paths"].values() + for op in [list(path_item.values())[0]] + ] + + # Build authorizer config — allowedClients = all persona client IDs + discovery_url = ( + f"https://cognito-idp.{region}.amazonaws.com/{user_pool_id}" + "/.well-known/openid-configuration" + ) + authorizer_config = { + "customJWTAuthorizer": { + "discoveryUrl": discovery_url, + "allowedClients": persona_client_ids, + } + } + click.echo(f" Authorizer: {len(persona_client_ids)} persona clients in allowedClients") + + # Build interceptor configurations — passRequestHeaders ensures Authorization header + # flows through to interceptors so they can decode the JWT for tenant resolution + interceptor_configs = [] + if request_interceptor_arn: + interceptor_configs.append({ + "interceptor": {"lambda": {"arn": request_interceptor_arn}}, + "interceptionPoints": ["REQUEST"], + "inputConfiguration": {"passRequestHeaders": True}, + }) + if response_interceptor_arn: + interceptor_configs.append({ + "interceptor": {"lambda": {"arn": response_interceptor_arn}}, + "interceptionPoints": ["RESPONSE"], + "inputConfiguration": {"passRequestHeaders": True}, + }) + click.echo(f" Interceptors: {len(interceptor_configs)} configured (REQUEST + RESPONSE)") + + click.echo(f"Creating Gateway: {gateway_name} in {region}") + + try: + create_kwargs = dict( + name=gateway_name, + protocolType="MCP", + roleArn=gateway_role_arn, + authorizerType="CUSTOM_JWT", + authorizerConfiguration=authorizer_config, + ) + if interceptor_configs: + create_kwargs["interceptorConfigurations"] = interceptor_configs + + resp = client.create_gateway(**create_kwargs) + gateway_id = resp["gatewayId"] + gateway_url = resp.get("gatewayUrl", "") + click.echo(f"Gateway created: {gateway_id}") + + # Wait for READY + _wait_for_gateway(client, gateway_id) + + # Attach Lambda target — targetConfiguration uses mcp.lambda shape + client.create_gateway_target( + gatewayIdentifier=gateway_id, + name=target_name, + targetConfiguration={ + "mcp": { + "lambda": { + "lambdaArn": lambda_arn, + "toolSchema": {"inlinePayload": inline_payload}, + } + } + }, + credentialProviderConfigurations=[ + {"credentialProviderType": "GATEWAY_IAM_ROLE"} + ], + ) + click.echo(f"Lambda target attached: {target_name}") + + # Construct full Gateway ARN (needed by Cedar policies) + account_id = boto3.client("sts").get_caller_identity()["Account"] + gateway_arn = f"arn:aws:bedrock-agentcore:{region}:{account_id}:gateway/{gateway_id}" + + # Persist to SSM + put_ssm_parameter(cfg["ssm_parameters"]["gateway_id"], gateway_id) + put_ssm_parameter(cfg["ssm_parameters"]["gateway_url"], gateway_url) + put_ssm_parameter("/app/hrdlp/gateway-arn", gateway_arn) + + click.echo(f"\nGateway URL: {gateway_url}") + click.echo(f"Gateway ARN: {gateway_arn}") + click.echo("SSM parameters updated (/app/hrdlp/gateway-id, gateway-url, gateway-arn).") + click.echo("\nNext: update Cedar policy with Gateway ARN:") + click.echo(f" sed -i 's||{gateway_arn}|g' prerequisite/cedar/hr_dlp_policies.cedar") + + except Exception as e: + click.echo(f"ERROR: {e}", err=True) + sys.exit(1) + + +@cli.command() +@click.option("--gateway-id", required=True, help="Gateway ID to delete") +@click.option("--region", default="us-east-1", show_default=True) +def delete(gateway_id: str, region: str): + """Delete the AgentCore Gateway and its targets.""" + client = boto3.client("bedrock-agentcore-control", region_name=region) + click.echo(f"Deleting gateway: {gateway_id}") + try: + # List and delete targets first + targets = client.list_gateway_targets(gatewayIdentifier=gateway_id).get("items", []) + for target in targets: + client.delete_gateway_target( + gatewayIdentifier=gateway_id, targetId=target["targetId"] + ) + click.echo(f"Deleted target: {target['targetId']}") + client.delete_gateway(gatewayIdentifier=gateway_id) + click.echo("Gateway deleted.") + except Exception as e: + click.echo(f"ERROR: {e}", err=True) + sys.exit(1) + + +def _wait_for_gateway(client, gateway_id: str, max_attempts: int = 20) -> None: + for _ in range(max_attempts): + time.sleep(10) + resp = client.get_gateway(gatewayIdentifier=gateway_id) + status = resp.get("status") + click.echo(f" Gateway status: {status}") + if status == "READY": + return + if status in ("FAILED", "DELETING"): + raise RuntimeError(f"Gateway reached terminal status: {status}") + raise TimeoutError("Timed out waiting for Gateway to become READY") + + +if __name__ == "__main__": + cli() diff --git a/02-use-cases/role-based-hr-data-agent/scripts/cleanup.sh b/02-use-cases/role-based-hr-data-agent/scripts/cleanup.sh new file mode 100644 index 000000000..36f7447d5 --- /dev/null +++ b/02-use-cases/role-based-hr-data-agent/scripts/cleanup.sh @@ -0,0 +1,214 @@ +#!/bin/bash +# ============================================================================= +# role-based-hr-data-agent — Cleanup Script +# +# Removes all deployed AWS resources in reverse deployment order: +# 1. AgentCore Runtime +# 2. AgentCore Gateway (+ Cedar Policy Engine) +# 3. CloudFormation stacks (Lambda, IAM, Cognito) +# 4. S3 deployment bucket +# 5. SSM parameters +# +# Usage: +# bash scripts/cleanup.sh [--region us-east-1] [--env dev] +# ============================================================================= + +set -euo pipefail + +REGION="us-east-1" +ENV="dev" + +while [[ $# -gt 0 ]]; do + case $1 in + --region) REGION="$2"; shift 2 ;; + --env) ENV="$2"; shift 2 ;; + *) echo "Unknown option: $1"; exit 1 ;; + esac +done + +ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text) +BUCKET="hr-dlp-deploy-${ACCOUNT_ID}-${REGION}" +STACK_INFRA="hr-dlp-infrastructure-${ENV}" +STACK_COGNITO="hr-dlp-cognito-${ENV}" + +echo "============================================================" +echo " Role-Based HR Data Agent — Cleanup" +echo " Region: ${REGION} | Environment: ${ENV}" +echo " Account: ${ACCOUNT_ID}" +echo "============================================================" + +# --------------------------------------------------------------------------- +# Step 1: Delete AgentCore Runtimes (hr_data_agent_runtime* or hr_dlp*) +# Waits for DELETING status before proceeding. +# --------------------------------------------------------------------------- +echo "" +echo "Step 1: Deleting AgentCore Runtime(s)" +python3 - </dev/null || true +aws cloudformation wait stack-delete-complete --stack-name "${STACK_INFRA}" --region "${REGION}" 2>/dev/null || true +echo " Done: ${STACK_INFRA}" + +echo "Deleting CloudFormation stack: ${STACK_COGNITO}" +aws cloudformation delete-stack --stack-name "${STACK_COGNITO}" --region "${REGION}" 2>/dev/null || true +aws cloudformation wait stack-delete-complete --stack-name "${STACK_COGNITO}" --region "${REGION}" 2>/dev/null || true +echo " Done: ${STACK_COGNITO}" + +# --------------------------------------------------------------------------- +# Step 5: Empty and delete S3 bucket +# --------------------------------------------------------------------------- +echo "" +echo "Step 5: Cleaning up S3 bucket: ${BUCKET}" +if aws s3api head-bucket --bucket "${BUCKET}" --region "${REGION}" 2>/dev/null; then + aws s3 rm "s3://${BUCKET}" --recursive + aws s3api delete-bucket --bucket "${BUCKET}" --region "${REGION}" + echo " Bucket deleted: ${BUCKET}" +else + echo " Bucket not found — skipping" +fi + +# --------------------------------------------------------------------------- +# Step 6: Delete all SSM parameters under /app/hrdlp/ +# --------------------------------------------------------------------------- +echo "" +echo "Step 6: Deleting SSM parameters under /app/hrdlp/" + +# Collect all parameter names (paginated) +ALL_PARAMS=$(aws ssm get-parameters-by-path \ + --path /app/hrdlp \ + --recursive \ + --query "Parameters[].Name" \ + --output text \ + --region "${REGION}" 2>/dev/null || true) + +if [[ -n "${ALL_PARAMS}" ]]; then + # delete-parameters accepts up to 10 names at a time + echo "${ALL_PARAMS}" | tr '\t' '\n' | xargs -n 10 \ + aws ssm delete-parameters --region "${REGION}" --names 2>/dev/null || true + echo " SSM parameters deleted" +else + echo " No SSM parameters found — skipping" +fi + +echo "" +echo "============================================================" +echo " Cleanup complete." +echo " You can now re-deploy with:" +echo " bash scripts/prereq.sh --region ${REGION} --env ${ENV}" +echo "============================================================" diff --git a/02-use-cases/role-based-hr-data-agent/scripts/cognito_credentials_provider.py b/02-use-cases/role-based-hr-data-agent/scripts/cognito_credentials_provider.py new file mode 100644 index 000000000..8c1f18c71 --- /dev/null +++ b/02-use-cases/role-based-hr-data-agent/scripts/cognito_credentials_provider.py @@ -0,0 +1,164 @@ +#!/usr/bin/env python3 +import os, sys +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +""" +CLI for creating and managing Cognito OAuth2 credential providers. + +Creates machine-client app clients for each HR persona with the correct +OAuth scopes, then stores credentials in SSM for use by test scripts. + +Usage: + python scripts/cognito_credentials_provider.py create --config prerequisite/prereqs_config.yaml + python scripts/cognito_credentials_provider.py list + python scripts/cognito_credentials_provider.py delete --client-id +""" + +import json +import sys + +import boto3 +import click + +from scripts.utils import get_ssm_parameter, put_ssm_parameter, read_config + +# Persona definitions: name → scopes + tenant context +# tenant context is written to SSM as the client_id→context mapping used by +# the Gateway interceptors (tenant_mapping.py) to resolve tenantId from JWT sub. +PERSONAS = { + "hr-manager": { + "description": "Full access to all HR data", + "scopes": [ + "hr-dlp-gateway/read", + "hr-dlp-gateway/pii", + "hr-dlp-gateway/address", + "hr-dlp-gateway/comp", + ], + "tenantId": "tenant-alpha", + "role": "hr-manager", + "department": "Human Resources", + "username": "hr-manager", + }, + "hr-specialist": { + "description": "Employee profiles + PII, no compensation", + "scopes": ["hr-dlp-gateway/read", "hr-dlp-gateway/pii"], + "tenantId": "tenant-alpha", + "role": "hr-specialist", + "department": "Human Resources", + "username": "hr-specialist", + }, + "employee": { + "description": "Basic search only", + "scopes": ["hr-dlp-gateway/read"], + "tenantId": "tenant-alpha", + "role": "employee", + "department": "Engineering", + "username": "employee", + }, + "admin": { + "description": "Full administrative access", + "scopes": [ + "hr-dlp-gateway/read", + "hr-dlp-gateway/pii", + "hr-dlp-gateway/address", + "hr-dlp-gateway/comp", + ], + "tenantId": "tenant-alpha", + "role": "admin", + "department": "IT", + "username": "admin", + }, +} + + +@click.group() +def cli(): + """Manage Cognito credential providers for HR Data Agent personas.""" + + +@cli.command() +@click.option("--config", default="prerequisite/prereqs_config.yaml", show_default=True) +@click.option("--region", default=None) +def create(config: str, region: str): + """Create one app client per persona and store credentials in SSM.""" + cfg = read_config(config) + region = region or cfg.get("aws", {}).get("region", "us-east-1") + user_pool_id = get_ssm_parameter("/app/hrdlp/cognito-user-pool-id") + if not user_pool_id: + click.echo("ERROR: Cognito User Pool ID not found in SSM.", err=True) + sys.exit(1) + + cognito = boto3.client("cognito-idp", region_name=region) + clients = [] + + tenant_mapping: dict = {} + + for persona, meta in PERSONAS.items(): + resp = cognito.create_user_pool_client( + UserPoolId=user_pool_id, + ClientName=f"hr-dlp-{persona}", + GenerateSecret=True, + AllowedOAuthFlows=["client_credentials"], + AllowedOAuthFlowsUserPoolClient=True, + AllowedOAuthScopes=meta["scopes"], + AccessTokenValidity=60, + TokenValidityUnits={"AccessToken": "minutes"}, + ) + client_data = resp["UserPoolClient"] + client_id = client_data["ClientId"] + client_secret = client_data["ClientSecret"] + + put_ssm_parameter(f"/app/hrdlp/personas/{persona}/client-id", client_id) + put_ssm_parameter(f"/app/hrdlp/personas/{persona}/client-secret", client_secret, secure=True) + + # Build entry for interceptor tenant mapping + tenant_mapping[client_id] = { + "tenantId": meta["tenantId"], + "role": meta["role"], + "department": meta["department"], + "username": meta["username"], + } + + clients.append({"persona": persona, "client_id": client_id, "scopes": meta["scopes"]}) + click.echo(f" Created client for {persona}: {client_id}") + + # Write mapping to SSM — interceptors (tenant_mapping.py) read this at cold-start + put_ssm_parameter("/app/hrdlp/client-tenant-mapping", json.dumps(tenant_mapping)) + click.echo(f"\nCreated {len(clients)} persona clients.") + click.echo("Tenant mapping stored in SSM: /app/hrdlp/client-tenant-mapping") + + +@cli.command("list") +@click.option("--config", default="prerequisite/prereqs_config.yaml", show_default=True) +@click.option("--region", default=None) +def list_clients(config: str, region: str): + """List existing app clients in the User Pool.""" + cfg = read_config(config) + region = region or cfg.get("aws", {}).get("region", "us-east-1") + user_pool_id = get_ssm_parameter("/app/hrdlp/cognito-user-pool-id") + if not user_pool_id: + click.echo("ERROR: User Pool ID not found in SSM.", err=True) + sys.exit(1) + + cognito = boto3.client("cognito-idp", region_name=region) + paginator = cognito.get_paginator("list_user_pool_clients") + for page in paginator.paginate(UserPoolId=user_pool_id): + for c in page["UserPoolClients"]: + click.echo(f" {c['ClientName']:40s} {c['ClientId']}") + + +@cli.command() +@click.option("--client-id", required=True) +@click.option("--config", default="prerequisite/prereqs_config.yaml", show_default=True) +@click.option("--region", default=None) +def delete(client_id: str, config: str, region: str): + """Delete a specific app client.""" + cfg = read_config(config) + region = region or cfg.get("aws", {}).get("region", "us-east-1") + user_pool_id = get_ssm_parameter("/app/hrdlp/cognito-user-pool-id") + cognito = boto3.client("cognito-idp", region_name=region) + cognito.delete_user_pool_client(UserPoolId=user_pool_id, ClientId=client_id) + click.echo(f"Deleted client: {client_id}") + + +if __name__ == "__main__": + cli() diff --git a/02-use-cases/role-based-hr-data-agent/scripts/create_cedar_policies.py b/02-use-cases/role-based-hr-data-agent/scripts/create_cedar_policies.py new file mode 100644 index 000000000..31bff1e65 --- /dev/null +++ b/02-use-cases/role-based-hr-data-agent/scripts/create_cedar_policies.py @@ -0,0 +1,350 @@ +#!/usr/bin/env python3 +import os, sys +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +""" +Creates the Cedar Policy Engine, attaches it to the AgentCore Gateway, +and creates the three HR DLP authorization policies. + +This must run AFTER agentcore_gateway.py create (needs gateway-id, gateway-arn, +gateway-url, gateway-role-arn, cognito-user-pool-id, request/response interceptor ARNs +in SSM, and persona client IDs). + +Usage: + python scripts/create_cedar_policies.py --region us-east-1 --env dev + python scripts/create_cedar_policies.py --mode ENFORCE # switch to enforcement +""" + +import json +import time + +import boto3 +import click + +from scripts.utils import get_ssm_parameter, put_ssm_parameter + +# Cedar policies reference the gateway ARN and tool action names in the format: +# ___ +POLICIES = [ + { + "name": "allow_search_employee", + "description": "Allow search_employee for users with hr-dlp-gateway/read scope", + "statement": ( + 'permit(principal is AgentCore::OAuthUser, ' + 'action == AgentCore::Action::"hr-lambda-target___search_employee", ' + 'resource == AgentCore::Gateway::"{gateway_arn}") ' + 'when {{ principal.hasTag("scope") && principal.getTag("scope") like "*hr-dlp-gateway/read*" }};' + ), + }, + { + "name": "allow_get_employee_profile", + "description": "Allow get_employee_profile for users with hr-dlp-gateway/pii scope", + "statement": ( + 'permit(principal is AgentCore::OAuthUser, ' + 'action == AgentCore::Action::"hr-lambda-target___get_employee_profile", ' + 'resource == AgentCore::Gateway::"{gateway_arn}") ' + 'when {{ principal.hasTag("scope") && principal.getTag("scope") like "*hr-dlp-gateway/pii*" }};' + ), + }, + { + "name": "allow_get_employee_compensation", + "description": "Allow get_employee_compensation for users with hr-dlp-gateway/comp scope", + "statement": ( + 'permit(principal is AgentCore::OAuthUser, ' + 'action == AgentCore::Action::"hr-lambda-target___get_employee_compensation", ' + 'resource == AgentCore::Gateway::"{gateway_arn}") ' + 'when {{ principal.hasTag("scope") && principal.getTag("scope") like "*hr-dlp-gateway/comp*" }};' + ), + }, +] + + +_CEDAR_INIT_WAIT = 15 # seconds to let Cedar schema finish indexing after gateway READY +_MAX_POLICY_ATTEMPTS = 3 # retries per real policy on transient internal errors +_POLICY_RETRY_WAIT = 30 # seconds between policy retries + + +def _poll_policy_status(client, engine_id, policy_id, polls=12, interval=5): + """Poll until policy leaves CREATING. Returns (status, statusReasons) tuple.""" + for _ in range(polls): + time.sleep(interval) + resp = client.get_policy(policyEngineId=engine_id, policyId=policy_id) + status = resp["status"] + if status != "CREATING": + return status, resp.get("statusReasons", []) + return "CREATING", [] + + +def _create_policy_with_retry(client, engine_id, gateway_arn, policy_def): + """ + Create a single Cedar policy, retrying only on transient internal errors. + Validation failures (Overly Permissive, schema errors) are non-retriable + and abort immediately with the full reason from the service. + """ + statement = policy_def["statement"].format(gateway_arn=gateway_arn) + definition = {"cedar": {"statement": statement}} + + for attempt in range(1, _MAX_POLICY_ATTEMPTS + 1): + try: + resp = client.create_policy( + policyEngineId=engine_id, + name=policy_def["name"], + description=policy_def["description"], + definition=definition, + ) + policy_id = resp["policyId"] + except client.exceptions.ConflictException: + # Policy with this name already exists — find and reuse it + policies = client.list_policies(policyEngineId=engine_id).get("policies", []) + existing = next((p for p in policies if p["name"] == policy_def["name"]), None) + if not existing: + click.echo( + f"ERROR: ConflictException but could not find existing policy '{policy_def['name']}'.", + err=True, + ) + raise SystemExit(1) + click.echo(f" Policy '{policy_def['name']}' already exists, reusing: {existing['policyId']}") + return existing["policyId"] + + click.echo( + f" Creating: {policy_def['name']} ({policy_id})" + + (f" [attempt {attempt}/{_MAX_POLICY_ATTEMPTS}]" if attempt > 1 else "") + + " — waiting for ACTIVE..." + ) + + status, reasons = _poll_policy_status(client, engine_id, policy_id) + + if status == "ACTIVE": + click.echo(f" ACTIVE: {policy_def['name']}") + return policy_id + + if status == "CREATE_FAILED": + # Determine if this is a validation failure (non-retriable) or a + # transient internal error (retriable). Validation failures contain + # descriptive reasons (e.g. "Overly Permissive"); internal errors + # say "An internal error occurred during creation". + is_internal = any( + "internal error" in r.lower() for r in reasons + ) or not reasons + + try: + client.delete_policy(policyEngineId=engine_id, policyId=policy_id) + except Exception: + pass + + if not is_internal: + # Validation failure — retrying won't help + click.echo( + f"ERROR: Policy '{policy_def['name']}' failed validation:\n" + + "\n".join(f" - {r}" for r in reasons), + err=True, + ) + raise SystemExit(1) + + if attempt < _MAX_POLICY_ATTEMPTS: + click.echo( + f" CREATE_FAILED (internal error) for {policy_def['name']} — " + f"retrying in {_POLICY_RETRY_WAIT}s..." + ) + time.sleep(_POLICY_RETRY_WAIT) + continue + + # TIMED_OUT or exhausted retries on internal errors + click.echo( + f"ERROR: Policy '{policy_def['name']}' failed after " + f"{attempt} attempt(s) (last status: {status}, reasons: {reasons}). " + "This is a service-side issue — wait a few minutes and re-run.", + err=True, + ) + raise SystemExit(1) + + +@click.command() +@click.option("--region", default="us-east-1", show_default=True) +@click.option("--env", default="dev", show_default=True, help="Environment suffix for policy engine name") +@click.option("--mode", default="LOG_ONLY", type=click.Choice(["LOG_ONLY", "ENFORCE"]), + show_default=True, help="Cedar policy enforcement mode") +def create(region: str, env: str, mode: str): + """ + Create the Cedar Policy Engine, attach to Gateway, and create HR DLP policies. + + Prerequisites (populated by prereq.sh + agentcore_gateway.py): + - /app/hrdlp/gateway-id Gateway identifier + - /app/hrdlp/gateway-arn Gateway ARN (for Cedar policy resource) + + All other gateway configuration (authorizer, interceptors, role) is read + directly from the live gateway via get_gateway — no extra SSM parameters needed. + """ + gateway_id = get_ssm_parameter("/app/hrdlp/gateway-id") + gateway_arn = get_ssm_parameter("/app/hrdlp/gateway-arn") + + if not gateway_id or not gateway_arn: + click.echo("ERROR: Missing /app/hrdlp/gateway-id or /app/hrdlp/gateway-arn in SSM.\nRun prereq.sh and agentcore_gateway.py first.", err=True) + raise SystemExit(1) + + client = boto3.client("bedrock-agentcore-control", region_name=region) + + # Read current gateway configuration — authorizer, interceptors, role are + # taken directly from the live gateway instead of being reconstructed from SSM. + click.echo(f"Reading current gateway configuration: {gateway_id}") + gw = client.get_gateway(gatewayIdentifier=gateway_id) + gw_name = gw["name"] + gw_role_arn = gw["roleArn"] + gw_protocol = gw["protocolType"] + gw_authorizer_type = gw["authorizerType"] + gw_authorizer_config = gw["authorizerConfiguration"] + gw_interceptors = gw.get("interceptorConfigurations", []) + click.echo(f" name={gw_name} interceptors={len(gw_interceptors)}") + + # ------------------------------------------------------------------------- + # Step 1: Create (or reuse) the policy engine — idempotent + # ------------------------------------------------------------------------- + engine_name = f"hr_dlp_policies_{env}" + click.echo(f"Creating policy engine: {engine_name}") + try: + resp = client.create_policy_engine( + name=engine_name, + description=f"Cedar authorization policies for HR DLP Gateway ({env})", + ) + engine_id = resp["policyEngineId"] + click.echo(f" Engine ID: {engine_id} — waiting for ACTIVE...") + + for _ in range(12): + time.sleep(5) + status = client.get_policy_engine(policyEngineId=engine_id)["status"] + if status == "ACTIVE": + break + if status == "FAILED": + click.echo("ERROR: Policy engine reached FAILED status.", err=True) + raise SystemExit(1) + else: + click.echo("ERROR: Timed out waiting for policy engine to become ACTIVE.", err=True) + raise SystemExit(1) + + except client.exceptions.ConflictException: + # Engine already exists — find it by name and reuse it + engines = client.list_policy_engines().get("policyEngines", []) + existing = next((e for e in engines if e["name"] == engine_name), None) + if not existing: + click.echo(f"ERROR: ConflictException but could not find existing engine '{engine_name}'.", err=True) + raise SystemExit(1) + engine_id = existing["policyEngineId"] + click.echo(f" Policy engine already exists, reusing: {engine_id}") + + click.echo(f" Policy engine ACTIVE: {engine_id}") + + # ------------------------------------------------------------------------- + # Step 2: Attach the engine to the gateway — two-phase update. + # + # Phase A: Attach policy engine WITHOUT interceptors. + # The reference sample (08-AgentCore-policy) never includes interceptors + # in the update_gateway call that attaches the policy engine. Including them + # causes Cedar schema initialization to fail with an internal error on every + # subsequent create_policy call. Attach cleanly first so Cedar can initialize. + # + # Phase B: After Cedar is confirmed ready and policies are created, restore + # the interceptors in a second update_gateway call. + # ------------------------------------------------------------------------- + click.echo(f"\nAttaching policy engine to gateway (phase A — no interceptors): {gateway_id}") + + account_id = boto3.client("sts").get_caller_identity()["Account"] + engine_arn = ( + f"arn:aws:bedrock-agentcore:{region}:{account_id}:policy-engine/{engine_id}" + ) + + # Phase A — policy engine only, no interceptors. + # Strip interceptorConfigurations so Cedar's internal tools/list call (used + # to index the schema) is not blocked by JWT auth from the interceptors. + client.update_gateway( + gatewayIdentifier=gateway_id, + name=gw_name, + roleArn=gw_role_arn, + protocolType=gw_protocol, + authorizerType=gw_authorizer_type, + authorizerConfiguration=gw_authorizer_config, + policyEngineConfiguration={"arn": engine_arn, "mode": mode}, + ) + click.echo(" Waiting for gateway to return to READY...") + + for _ in range(20): + time.sleep(10) + gw_status = client.get_gateway(gatewayIdentifier=gateway_id)["status"] + click.echo(f" Gateway status: {gw_status}") + if gw_status == "READY": + break + if gw_status == "FAILED": + click.echo("ERROR: Gateway reached FAILED status after update.", err=True) + raise SystemExit(1) + else: + click.echo("ERROR: Timed out waiting for gateway to become READY.", err=True) + raise SystemExit(1) + + # ------------------------------------------------------------------------- + # Step 3: Brief wait for Cedar schema to finish indexing. + # The gateway returns READY before Cedar's internal schema is fully + # initialized. A short fixed sleep is sufficient; transient failures + # during policy creation are handled by _create_policy_with_retry. + # ------------------------------------------------------------------------- + click.echo(f"\nWaiting {_CEDAR_INIT_WAIT}s for Cedar schema to initialize...") + time.sleep(_CEDAR_INIT_WAIT) + + # ------------------------------------------------------------------------- + # Step 4: Create each Cedar policy. + # Retries up to _MAX_POLICY_ATTEMPTS times on transient internal errors. + # Validation failures (Overly Permissive, schema errors) abort immediately. + # ------------------------------------------------------------------------- + click.echo(f"\nCreating {len(POLICIES)} Cedar policies (mode: {mode})...") + + created_policy_ids = [] + for policy_def in POLICIES: + policy_id = _create_policy_with_retry(client, engine_id, gateway_arn, policy_def) + created_policy_ids.append(policy_id) + + # ------------------------------------------------------------------------- + # Step 5: Restore interceptors — phase B of the two-phase gateway update. + # Now that Cedar is initialized and policies are ACTIVE, re-add the + # interceptors. Cedar schema is already indexed so this update won't + # interfere with it. + # ------------------------------------------------------------------------- + if gw_interceptors: + click.echo(f"\nRestoring interceptors on gateway (phase B): {gateway_id}") + client.update_gateway( + gatewayIdentifier=gateway_id, + name=gw_name, + roleArn=gw_role_arn, + protocolType=gw_protocol, + authorizerType=gw_authorizer_type, + authorizerConfiguration=gw_authorizer_config, + policyEngineConfiguration={"arn": engine_arn, "mode": mode}, + interceptorConfigurations=gw_interceptors, + ) + click.echo(" Waiting for gateway to return to READY...") + for _ in range(20): + time.sleep(10) + gw_status = client.get_gateway(gatewayIdentifier=gateway_id)["status"] + click.echo(f" Gateway status: {gw_status}") + if gw_status == "READY": + break + if gw_status == "FAILED": + click.echo("ERROR: Gateway reached FAILED status restoring interceptors.", err=True) + raise SystemExit(1) + else: + click.echo("ERROR: Timed out waiting for gateway to become READY.", err=True) + raise SystemExit(1) + click.echo(" Interceptors restored.") + + # ------------------------------------------------------------------------- + # Step 6: Persist policy engine ARN to SSM + # ------------------------------------------------------------------------- + put_ssm_parameter("/app/hrdlp/cedar-policy-engine-arn", engine_arn) + + click.echo(f"\nCedar setup complete.") + click.echo(f" Policy engine : {engine_id} ({mode})") + click.echo(f" Policies : {len(created_policy_ids)} ACTIVE") + click.echo(f" SSM : /app/hrdlp/cedar-policy-engine-arn") + if mode == "LOG_ONLY": + click.echo("\n Mode is LOG_ONLY — policies log but do not block requests.") + click.echo(" To enforce, re-run with: --mode ENFORCE") + + +if __name__ == "__main__": + create() diff --git a/02-use-cases/role-based-hr-data-agent/scripts/package_runtime.sh b/02-use-cases/role-based-hr-data-agent/scripts/package_runtime.sh new file mode 100755 index 000000000..e83b34687 --- /dev/null +++ b/02-use-cases/role-based-hr-data-agent/scripts/package_runtime.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# ============================================================================= +# Package the AgentCore Runtime into dist/runtime.zip +# +# Bundles: +# main.py — BedrockAgentCoreApp entry point +# agent_config/ — HRDataAgent, task orchestration, SSM utils +# requirements.txt — AgentCore Runtime installs deps on first launch +# +# Output: dist/runtime.zip +# +# Usage: +# bash scripts/package_runtime.sh +# ============================================================================= + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +ROOT_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)" +DIST_DIR="${ROOT_DIR}/dist" + +TMP_DIR=$(mktemp -d) +trap "rm -rf ${TMP_DIR}" EXIT + +mkdir -p "${DIST_DIR}" +rm -f "${DIST_DIR}/runtime.zip" + +echo "Packaging AgentCore Runtime..." +echo " Source: ${ROOT_DIR}" +echo " Output: ${DIST_DIR}/runtime.zip" + +# Copy runtime entry point and agent module +cp "${ROOT_DIR}/main.py" "${TMP_DIR}/" +cp -r "${ROOT_DIR}/agent_config" "${TMP_DIR}/" + +# Install Python dependencies for Linux ARM64 (AgentCore Runtime target platform). +# Use --platform flags to download manylinux aarch64 wheels from PyPI instead of +# local macOS binaries, and --no-cache-dir to bypass the macOS wheel cache. +if [[ -f "${ROOT_DIR}/requirements.txt" ]]; then + echo " Installing dependencies for Linux ARM64..." + pip install \ + -r "${ROOT_DIR}/requirements.txt" \ + -t "${TMP_DIR}/" \ + --quiet \ + --upgrade \ + --platform manylinux_2_17_aarch64 \ + --platform manylinux2014_aarch64 \ + --only-binary=:all: \ + --python-version 3.11 \ + --implementation cp \ + --no-cache-dir +else + echo " WARNING: requirements.txt not found, skipping dependency install" +fi + +# Create ZIP +(cd "${TMP_DIR}" && zip -qr "${DIST_DIR}/runtime.zip" .) + +SIZE=$(du -sh "${DIST_DIR}/runtime.zip" | cut -f1) +FILE_COUNT=$(unzip -l "${DIST_DIR}/runtime.zip" | tail -1 | awk '{print $2}') +echo " Built: dist/runtime.zip (${SIZE}, ${FILE_COUNT} files)" +echo "" +echo "Next: upload to S3 before running agentcore_agent_runtime.py create" +echo " BUCKET=\$(aws ssm get-parameter --name /app/hrdlp/deploy-bucket --query Parameter.Value --output text)" +echo " aws s3 cp dist/runtime.zip s3://\${BUCKET}/hr-data-agent/runtime.zip" diff --git a/02-use-cases/role-based-hr-data-agent/scripts/prereq.sh b/02-use-cases/role-based-hr-data-agent/scripts/prereq.sh new file mode 100644 index 000000000..d0335c3bf --- /dev/null +++ b/02-use-cases/role-based-hr-data-agent/scripts/prereq.sh @@ -0,0 +1,172 @@ +#!/bin/bash +# ============================================================================= +# role-based-hr-data-agent — Master Prerequisites Deployment Script +# +# Deploys all infrastructure required before configuring AgentCore Gateway +# and Runtime. Run once per environment. +# +# Steps: +# 1. Create S3 bucket for Lambda artifacts +# 2. Package and upload Lambda ZIPs (HR Provider + Interceptors) +# 3. Deploy infrastructure.yaml CloudFormation stack +# 4. Deploy cognito.yaml CloudFormation stack +# 5. Create Cognito persona app clients +# +# Usage: +# bash scripts/prereq.sh [--region us-east-1] [--env dev] +# ============================================================================= + +set -euo pipefail + +REGION="us-east-1" +ENV="dev" +CONFIG="prerequisite/prereqs_config.yaml" +STACK_INFRA="hr-dlp-infrastructure-${ENV}" +STACK_COGNITO="hr-dlp-cognito-${ENV}" + +# Parse arguments +while [[ $# -gt 0 ]]; do + case $1 in + --region) REGION="$2"; shift 2 ;; + --env) ENV="$2"; STACK_INFRA="hr-dlp-infrastructure-${ENV}"; STACK_COGNITO="hr-dlp-cognito-${ENV}"; shift 2 ;; + *) echo "Unknown option: $1"; exit 1 ;; + esac +done + +ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text) +BUCKET="hr-dlp-deploy-${ACCOUNT_ID}-${REGION}" + +echo "============================================================" +echo " Role-Based HR Data Agent — Prerequisites Deployment" +echo " Region: ${REGION} | Environment: ${ENV}" +echo " Account: ${ACCOUNT_ID}" +echo "============================================================" + +# --------------------------------------------------------------------------- +# Step 1: Create S3 bucket +# --------------------------------------------------------------------------- +echo "" +echo "Step 1: Creating S3 bucket: ${BUCKET}" +if aws s3api head-bucket --bucket "${BUCKET}" --region "${REGION}" 2>/dev/null; then + echo " Bucket already exists — skipping creation" +else + if [[ "${REGION}" == "us-east-1" ]]; then + aws s3api create-bucket --bucket "${BUCKET}" --region "${REGION}" + else + aws s3api create-bucket --bucket "${BUCKET}" --region "${REGION}" \ + --create-bucket-configuration LocationConstraint="${REGION}" + fi + echo " Bucket created: ${BUCKET}" +fi +# Store bucket name in SSM so agentcore_agent_runtime.py create can read it +aws ssm put-parameter \ + --name "/app/hrdlp/deploy-bucket" \ + --value "${BUCKET}" \ + --type String \ + --overwrite \ + --region "${REGION}" > /dev/null +echo " SSM: /app/hrdlp/deploy-bucket = ${BUCKET}" + +# --------------------------------------------------------------------------- +# Step 2: Package and upload Lambda ZIPs +# --------------------------------------------------------------------------- +echo "" +echo "Step 2: Packaging Lambda functions" + +TMP_DIR=$(mktemp -d) +trap "rm -rf ${TMP_DIR}" EXIT + +# HR Data Provider +echo " Packaging HR Data Provider..." +cp prerequisite/lambda/python/*.py "${TMP_DIR}/" +(cd "${TMP_DIR}" && zip -q hr-data-provider.zip *.py) +aws s3 cp "${TMP_DIR}/hr-data-provider.zip" "s3://${BUCKET}/hr-data-provider/deployment.zip" +echo " Uploaded: s3://${BUCKET}/hr-data-provider/deployment.zip" + +# Interceptors +echo " Packaging Interceptors..." +mkdir -p "${TMP_DIR}/interceptors" +cp prerequisite/lambda/interceptors/*.py "${TMP_DIR}/interceptors/" +(cd "${TMP_DIR}/interceptors" && zip -q ../hr-interceptors.zip *.py) +aws s3 cp "${TMP_DIR}/hr-interceptors.zip" "s3://${BUCKET}/hr-interceptors/deployment.zip" +echo " Uploaded: s3://${BUCKET}/hr-interceptors/deployment.zip" + +# --------------------------------------------------------------------------- +# Step 3: Deploy infrastructure.yaml +# --------------------------------------------------------------------------- +echo "" +echo "Step 3: Deploying infrastructure CloudFormation stack: ${STACK_INFRA}" +aws cloudformation deploy \ + --template-file prerequisite/infrastructure.yaml \ + --stack-name "${STACK_INFRA}" \ + --region "${REGION}" \ + --capabilities CAPABILITY_NAMED_IAM \ + --parameter-overrides \ + LambdaS3Bucket="${BUCKET}" \ + LambdaS3Key="hr-data-provider/deployment.zip" \ + InterceptorS3Key="hr-interceptors/deployment.zip" \ + Environment="${ENV}" \ + --no-fail-on-empty-changeset +echo " Infrastructure stack deployed" + +# --------------------------------------------------------------------------- +# Step 4: Deploy cognito.yaml +# --------------------------------------------------------------------------- +echo "" +echo "Step 4: Deploying Cognito CloudFormation stack: ${STACK_COGNITO}" +COGNITO_DOMAIN_PREFIX="hr-dlp-agent" +aws cloudformation deploy \ + --template-file prerequisite/cognito.yaml \ + --stack-name "${STACK_COGNITO}" \ + --region "${REGION}" \ + --capabilities CAPABILITY_NAMED_IAM \ + --parameter-overrides \ + Environment="${ENV}" \ + CognitoDomainPrefix="${COGNITO_DOMAIN_PREFIX}" \ + --no-fail-on-empty-changeset +echo " Cognito stack deployed" + +# --------------------------------------------------------------------------- +# Step 5: Create persona app clients +# --------------------------------------------------------------------------- +echo "" +echo "Step 5: Creating Cognito persona app clients" +python scripts/cognito_credentials_provider.py create --config "${CONFIG}" --region "${REGION}" + +# --------------------------------------------------------------------------- +# Done +# --------------------------------------------------------------------------- +echo "" +echo "============================================================" +echo " Prerequisites deployment complete!" +echo "" +echo " Next steps:" +echo " 1. Build and upload runtime package:" +echo " bash scripts/package_runtime.sh" +echo " BUCKET=\$(aws ssm get-parameter --name /app/hrdlp/deploy-bucket --query Parameter.Value --output text)" +echo " aws s3 cp dist/runtime.zip s3://\${BUCKET}/hr-data-agent/runtime.zip" +echo "" +echo " 2. Create AgentCore Gateway:" +echo " python scripts/agentcore_gateway.py create --config ${CONFIG}" +echo " (Gateway ARN printed + stored in SSM /app/hrdlp/gateway-arn)" +echo "" +echo " 3. Create Cedar Policy Engine and attach to Gateway:" +echo " python scripts/create_cedar_policies.py --region ${REGION} --env ${ENV}" +echo " # Creates engine, attaches to gateway (preserving interceptors), creates 3 policies" +echo " # Add --mode ENFORCE to block unauthorized requests (default: LOG_ONLY)" +echo "" +echo " 4. Create AgentCore Runtime:" +echo " python scripts/agentcore_agent_runtime.py create" +echo "" +echo " 5. Run tests:" +echo " python test/test_gateway.py --persona hr-manager" +echo " python test/test_dlp_redaction.py" +echo " python test/test_agent.py --persona hr-manager" +echo "" +echo " 6. Run the Streamlit app:" +echo " streamlit run app.py" +echo "" +echo " The app reads all config from SSM automatically — no manual setup." +echo " Usage: select a persona → Get OAuth Token → Discover Tools → send a query." +echo " Switch personas to see DLP redaction applied based on OAuth scopes." +echo "============================================================" diff --git a/02-use-cases/role-based-hr-data-agent/scripts/utils.py b/02-use-cases/role-based-hr-data-agent/scripts/utils.py new file mode 100644 index 000000000..b1d2a8d3e --- /dev/null +++ b/02-use-cases/role-based-hr-data-agent/scripts/utils.py @@ -0,0 +1,61 @@ +""" +Shared AWS utilities for deployment scripts. +""" + +import json +import logging +from typing import Any, Optional + +import boto3 +import yaml +from botocore.exceptions import ClientError + +logger = logging.getLogger(__name__) + + +def get_aws_region() -> str: + session = boto3.session.Session() + return session.region_name or "us-east-1" + + +def get_aws_account_id() -> str: + return boto3.client("sts").get_caller_identity()["Account"] + + +def get_ssm_parameter(name: str, decrypt: bool = True) -> Optional[str]: + try: + resp = boto3.client("ssm").get_parameter(Name=name, WithDecryption=decrypt) + return resp["Parameter"]["Value"] + except ClientError: + return None + + +def put_ssm_parameter(name: str, value: str, secure: bool = False) -> None: + boto3.client("ssm").put_parameter( + Name=name, + Value=value, + Type="SecureString" if secure else "String", + Overwrite=True, + ) + logger.info(f"SSM parameter set: {name}") + + +def delete_ssm_parameter(name: str) -> None: + try: + boto3.client("ssm").delete_parameter(Name=name) + except ClientError: + pass + + +def read_config(path: str) -> dict: + with open(path, "r", encoding="utf-8") as f: + if path.endswith((".yaml", ".yml")): + return yaml.safe_load(f) + return json.load(f) + + +def get_cognito_client_secret(user_pool_id: str, client_id: str) -> str: + resp = boto3.client("cognito-idp").describe_user_pool_client( + UserPoolId=user_pool_id, ClientId=client_id + ) + return resp["UserPoolClient"]["ClientSecret"] diff --git a/02-use-cases/role-based-hr-data-agent/test/__init__.py b/02-use-cases/role-based-hr-data-agent/test/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/02-use-cases/role-based-hr-data-agent/test/test_agent.py b/02-use-cases/role-based-hr-data-agent/test/test_agent.py new file mode 100644 index 000000000..8858b280f --- /dev/null +++ b/02-use-cases/role-based-hr-data-agent/test/test_agent.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python3 +""" +Agent smoke test — sends a natural language prompt to the AgentCore Runtime +and prints the response. Tests the full flow: Runtime → Gateway → Lambda → Redaction. + +Usage: + python test/test_agent.py --persona hr-manager --prompt "Find all engineers" + python test/test_agent.py --persona employee --prompt "Show me John Smith's email" +""" + +import argparse +import json +import sys +import uuid + +import boto3 +import requests + +sys.path.insert(0, ".") +from scripts.utils import get_ssm_parameter + + +def get_token(persona: str) -> str: + client_id = get_ssm_parameter(f"/app/hrdlp/personas/{persona}/client-id") + client_secret = get_ssm_parameter(f"/app/hrdlp/personas/{persona}/client-secret") + token_url = get_ssm_parameter("/app/hrdlp/cognito-token-url") + + if not all([client_id, client_secret, token_url]): + print(f"ERROR: Credentials for persona '{persona}' not in SSM. Run prereq.sh first.") + sys.exit(1) + + resp = requests.post( + token_url, + data={"grant_type": "client_credentials"}, + auth=(client_id, client_secret), + timeout=15, + ) + resp.raise_for_status() + return resp.json()["access_token"] + + +def invoke_agent(runtime_url: str, token: str, prompt: str, session_id: str) -> str: + payload = {"prompt": prompt, "sessionId": session_id} + resp = requests.post( + runtime_url, + json=payload, + headers={"Content-Type": "application/json", "Authorization": f"Bearer {token}"}, + timeout=120, + stream=True, + ) + resp.raise_for_status() + full = "" + for chunk in resp.iter_content(chunk_size=None, decode_unicode=True): + if chunk: + full += chunk + print(chunk, end="", flush=True) + print() + return full + + +def main(): + parser = argparse.ArgumentParser(description="AgentCore Runtime smoke test") + parser.add_argument("--persona", default="hr-manager", + choices=["hr-manager", "hr-specialist", "employee", "admin"]) + parser.add_argument("--prompt", default="Find all engineers in the company") + args = parser.parse_args() + + runtime_url = get_ssm_parameter("/app/hrdlp/runtime-url") + if not runtime_url: + print("ERROR: Runtime URL not found in SSM (/app/hrdlp/runtime-url). Run agentcore_agent_runtime.py create first.") + sys.exit(1) + + print(f"\n[test_agent] Persona: {args.persona}") + print(f"[test_agent] Prompt: {args.prompt}\n") + print("-" * 60) + + token = get_token(args.persona) + session_id = str(uuid.uuid4()) + invoke_agent(runtime_url, token, args.prompt, session_id) + + print("-" * 60) + print("[test_agent] Done.") + + +if __name__ == "__main__": + main() diff --git a/02-use-cases/role-based-hr-data-agent/test/test_dlp_redaction.py b/02-use-cases/role-based-hr-data-agent/test/test_dlp_redaction.py new file mode 100644 index 000000000..bab04d9f1 --- /dev/null +++ b/02-use-cases/role-based-hr-data-agent/test/test_dlp_redaction.py @@ -0,0 +1,178 @@ +#!/usr/bin/env python3 +""" +DLP redaction verification test — calls the Gateway directly as each persona +and verifies that the correct fields are (or are not) redacted. + +Usage: + python test/test_dlp_redaction.py + python test/test_dlp_redaction.py --persona hr-manager +""" + +import argparse +import json +import sys +import uuid + +import requests + +sys.path.insert(0, ".") +from scripts.utils import get_ssm_parameter + +REDACTED_MARKER = "[REDACTED - Insufficient Permissions]" + +# Expected redaction behaviour per persona +PERSONA_EXPECTATIONS = { + "hr-manager": { + "pii_visible": True, + "address_visible": True, + "comp_visible": True, + "comp_tool_visible": True, + }, + "hr-specialist": { + "pii_visible": True, + "address_visible": False, + "comp_visible": False, + "comp_tool_visible": False, + }, + "employee": { + "pii_visible": False, + "address_visible": False, + "comp_visible": False, + "comp_tool_visible": False, + }, + "admin": { + "pii_visible": True, + "address_visible": True, + "comp_visible": True, + "comp_tool_visible": True, + }, +} + + +def get_token(persona: str, token_url: str) -> str: + client_id = get_ssm_parameter(f"/app/hrdlp/personas/{persona}/client-id") + client_secret = get_ssm_parameter(f"/app/hrdlp/personas/{persona}/client-secret") + resp = requests.post( + token_url, + data={"grant_type": "client_credentials"}, + auth=(client_id, client_secret), + timeout=15, + ) + resp.raise_for_status() + return resp.json()["access_token"] + + +def jsonrpc(gateway_url: str, token: str, method: str, params: dict = None) -> dict: + resp = requests.post( + gateway_url, + json={"jsonrpc": "2.0", "id": uuid.uuid4().hex, "method": method, "params": params or {}}, + headers={"Content-Type": "application/json", "Authorization": f"Bearer {token}"}, + timeout=30, + ) + resp.raise_for_status() + return resp.json() + + +def test_persona(persona: str, gateway_url: str, token_url: str) -> bool: + expected = PERSONA_EXPECTATIONS[persona] + token = get_token(persona, token_url) + print(f"\n{'='*60}") + print(f"Testing persona: {persona}") + print(f"{'='*60}") + + passed = True + + # 1. Tool discovery + tools_result = jsonrpc(gateway_url, token, "tools/list") + tools = tools_result.get("result", {}).get("tools", []) + tool_names = [t["name"] for t in tools] + comp_visible = any("compensation" in n for n in tool_names) + + if comp_visible == expected["comp_tool_visible"]: + print(f" ✓ Compensation tool visibility: {comp_visible}") + else: + print(f" ✗ Compensation tool visibility: expected={expected['comp_tool_visible']}, got={comp_visible}") + passed = False + + # 2. Search employee + search_result = jsonrpc(gateway_url, token, "tools/call", { + "name": "hr-lambda-target___search_employee", + "arguments": {"query": "John"}, + }) + content = search_result.get("result", {}).get("content", []) + body = {} + if content: + try: + lr = json.loads(content[0]["text"]) + body = json.loads(lr.get("body", "{}")) + except Exception: + pass + + employees = body.get("employees", []) + if not employees: + print(" ⚠ No employees returned from search — check Lambda deployment") + return passed + + emp = employees[0] + + # Check PII + email = emp.get("email", "") + pii_redacted = email == REDACTED_MARKER + pii_ok = (not pii_redacted) == expected["pii_visible"] + print(f" {'✓' if pii_ok else '✗'} PII (email): {'visible' if not pii_redacted else 'redacted'}") + if not pii_ok: + passed = False + + # Check address + city = emp.get("city", "") + addr_redacted = city == REDACTED_MARKER + addr_ok = (not addr_redacted) == expected["address_visible"] + print(f" {'✓' if addr_ok else '✗'} Address (city): {'visible' if not addr_redacted else 'redacted'}") + if not addr_ok: + passed = False + + # Check compensation + salary = emp.get("salary", "") + comp_redacted = salary == REDACTED_MARKER + comp_ok = (not comp_redacted) == expected["comp_visible"] + print(f" {'✓' if comp_ok else '✗'} Compensation (salary): {'visible' if not comp_redacted else 'redacted'}") + if not comp_ok: + passed = False + + print(f" Result: {'PASS' if passed else 'FAIL'}") + return passed + + +def main(): + parser = argparse.ArgumentParser(description="DLP redaction verification") + parser.add_argument("--persona", default=None, + choices=["hr-manager", "hr-specialist", "employee", "admin"], + help="Test a single persona (default: all)") + args = parser.parse_args() + + gateway_url = get_ssm_parameter("/app/hrdlp/gateway-url") + token_url = get_ssm_parameter("/app/hrdlp/cognito-token-url") + + if not gateway_url or not token_url: + print("ERROR: Required SSM parameters missing. Run prereq.sh first.") + sys.exit(1) + + personas = [args.persona] if args.persona else list(PERSONA_EXPECTATIONS.keys()) + results = {} + for p in personas: + results[p] = test_persona(p, gateway_url, token_url) + + print(f"\n{'='*60}") + print("SUMMARY") + print(f"{'='*60}") + all_passed = True + for p, ok in results.items(): + print(f" {p:20s} {'PASS' if ok else 'FAIL'}") + if not ok: + all_passed = False + + sys.exit(0 if all_passed else 1) + + +if __name__ == "__main__": + main() diff --git a/02-use-cases/role-based-hr-data-agent/test/test_gateway.py b/02-use-cases/role-based-hr-data-agent/test/test_gateway.py new file mode 100644 index 000000000..618a69d00 --- /dev/null +++ b/02-use-cases/role-based-hr-data-agent/test/test_gateway.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python3 +""" +Gateway smoke test — connects to the AgentCore Gateway and exercises MCP tools directly. + +Usage: + python test/test_gateway.py --persona hr-manager --query "Find all engineers" + python test/test_gateway.py --persona employee --list-tools +""" + +import argparse +import base64 +import json +import sys +import uuid + +import boto3 +import requests + +sys.path.insert(0, ".") +from scripts.utils import get_ssm_parameter + + +def get_token(persona: str) -> str: + client_id = get_ssm_parameter(f"/app/hrdlp/personas/{persona}/client-id") + client_secret = get_ssm_parameter(f"/app/hrdlp/personas/{persona}/client-secret") + token_url = get_ssm_parameter("/app/hrdlp/cognito-token-url") + + if not all([client_id, client_secret, token_url]): + print(f"ERROR: Credentials not found for persona '{persona}'. Run prereq.sh first.") + sys.exit(1) + + resp = requests.post( + token_url, + data={"grant_type": "client_credentials"}, + auth=(client_id, client_secret), + timeout=15, + ) + resp.raise_for_status() + token = resp.json()["access_token"] + print(f"[auth] Token acquired for persona: {persona}") + return token + + +def call_gateway(gateway_url: str, token: str, method: str, params: dict = None) -> dict: + payload = { + "jsonrpc": "2.0", + "id": uuid.uuid4().hex, + "method": method, + "params": params or {}, + } + resp = requests.post( + gateway_url, + json=payload, + headers={ + "Content-Type": "application/json", + "Authorization": f"Bearer {token}", + }, + timeout=30, + ) + resp.raise_for_status() + return resp.json() + + +def list_tools(gateway_url: str, token: str) -> None: + print("\n[gateway] Listing available tools...") + result = call_gateway(gateway_url, token, "tools/list") + tools = result.get("result", {}).get("tools", []) + if not tools: + print(" No tools visible for this persona (check scopes).") + else: + for t in tools: + print(f" ✓ {t['name']}") + print(f" Total: {len(tools)} tools") + + +def call_tool(gateway_url: str, token: str, tool_name: str, arguments: dict) -> None: + print(f"\n[gateway] Calling tool: {tool_name}") + result = call_gateway(gateway_url, token, "tools/call", {"name": tool_name, "arguments": arguments}) + content = result.get("result", {}).get("content", []) + for item in content: + if item.get("type") == "text": + try: + data = json.loads(item["text"]) + body = json.loads(data.get("body", "{}")) + print(json.dumps(body, indent=2)) + except Exception: + print(item["text"]) + + +def main(): + parser = argparse.ArgumentParser(description="AgentCore Gateway smoke test") + parser.add_argument("--persona", default="hr-manager", + choices=["hr-manager", "hr-specialist", "employee", "admin"], + help="Test persona to use") + parser.add_argument("--query", default="John Smith", help="Search query") + parser.add_argument("--list-tools", action="store_true", help="Only list tools, no invocation") + args = parser.parse_args() + + gateway_url = get_ssm_parameter("/app/hrdlp/gateway-url") + if not gateway_url: + print("ERROR: Gateway URL not found in SSM (/app/hrdlp/gateway-url)") + sys.exit(1) + + token = get_token(args.persona) + list_tools(gateway_url, token) + + if not args.list_tools: + call_tool( + gateway_url, token, + "hr-lambda-target___search_employee", + {"query": args.query}, + ) + + +if __name__ == "__main__": + main()