diff --git a/examples/agent_observability/OpenInference-Arize-Phoenix/BedrockAgent-Observability-sample/README.md b/examples/agent_observability/OpenInference-Arize-Phoenix/BedrockAgent-Observability-sample/README.md new file mode 100644 index 00000000..8360c040 --- /dev/null +++ b/examples/agent_observability/OpenInference-Arize-Phoenix/BedrockAgent-Observability-sample/README.md @@ -0,0 +1,85 @@ +# Bedrock Agent Arize-Phoenix Integration + +This package provides Phoenix observability tool for Amazon Bedrock Agents for experimentation tracking, evaluation, and troubleshooting of AI and LLM applications + +## Features + +- Comprehensive Traceability: Gain visibility into every step of your agent’s execution path, from initial user query through knowledge retrieval and action execution +- Systematic Evaluation Framework: Apply consistent evaluation methodologies to measure and understand agent performance +- Data-Driven Optimization: Run structured experiments to compare different agent configurations and identify optimal settings + + +## Hierarchy + +The trace hierarchy follows this pattern: + +``` +L1: "agent" : "bedrock_agent.invoke_agent" + L2: "chain" : "orchestrationTrace" + L3: "llm" : "LLM" + L3: "retriever" : "knowledge_base" + L3: "tool" : "action_group" + L3: "llm" : "LLM" + +``` + +## Quick Start + +1. Install the required dependencies: +```bash +!pip install -q arize-phoenix-otel boto3 anthropic openinference-instrumentation-bedrock +``` + +2. Create a function to run your agent and capture its outputs: + +```python +def run(input_text): + session_id = f"default-session1_{int(time.time())}" + + attributes = dict( + inputText=input_text, + agentId=AGENT_ID, + agentAliasId=AGENT_ALIAS_ID, + sessionId=session_id, + enableTrace=True, + ) + response = bedrock_agent_runtime.invoke_agent(**attributes) + + # Stream the response + for _, event in enumerate(response["completion"]): + if "chunk" in event: + print(event) + chunk_data = event["chunk"] + if "bytes" in chunk_data: + output_text = chunk_data["bytes"].decode("utf8") + print(output_text) + elif "trace" in event: + print(event["trace"]) + +``` + +## Configuration + +The following configuration options can be provided: + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `api_key` | Phoenix api key | Environment variable or demo key | +| `PHOENIX_CLIENT_HEADERS` | Phoenix client header| "api_key=ENTER YOUR API KEY" | +| `PHOENIX_COLLECTOR_ENDPOINT` | URL | https://app.phoenix.arize.com | +| `project_name` | Project name | Amazon Bedrock Agents | +| `userId` | User ID for Arize-phoenix | anonymous | +| `session_id` | Sessiojn Id| anonymous | +| `metadata` | custom attributes | ["key":"value"] | +| `tags` | Tags for filtering in Arize Phoenix | [] | +| `prompt_template` | Prompt Management | "" | +| `prompt_template_version` | Promot version | "" | +| `prompt_template_variables` | Custom trace ID | ["key":"value"] | +| `show_traces` | Whether to print raw trace data | False | + +## Attribute Naming + +This integration follows Openinference Semantic Conventions: + +For a complete guide to Python semantic conventions, refer to the following link +https://docs.arize.com/arize/llm-tracing/tracing/semantic-conventions diff --git a/examples/agent_observability/OpenInference-Arize-Phoenix/BedrockAgent-Observability-sample/images/eval_1.png b/examples/agent_observability/OpenInference-Arize-Phoenix/BedrockAgent-Observability-sample/images/eval_1.png new file mode 100644 index 00000000..82c904a9 Binary files /dev/null and b/examples/agent_observability/OpenInference-Arize-Phoenix/BedrockAgent-Observability-sample/images/eval_1.png differ diff --git a/examples/agent_observability/OpenInference-Arize-Phoenix/BedrockAgent-Observability-sample/images/eval_2.png b/examples/agent_observability/OpenInference-Arize-Phoenix/BedrockAgent-Observability-sample/images/eval_2.png new file mode 100644 index 00000000..04c379c0 Binary files /dev/null and b/examples/agent_observability/OpenInference-Arize-Phoenix/BedrockAgent-Observability-sample/images/eval_2.png differ diff --git a/examples/agent_observability/OpenInference-Arize-Phoenix/BedrockAgent-Observability-sample/images/filter_1.png b/examples/agent_observability/OpenInference-Arize-Phoenix/BedrockAgent-Observability-sample/images/filter_1.png new file mode 100644 index 00000000..c952cd7b Binary files /dev/null and b/examples/agent_observability/OpenInference-Arize-Phoenix/BedrockAgent-Observability-sample/images/filter_1.png differ diff --git a/examples/agent_observability/OpenInference-Arize-Phoenix/BedrockAgent-Observability-sample/images/trace_1.png b/examples/agent_observability/OpenInference-Arize-Phoenix/BedrockAgent-Observability-sample/images/trace_1.png new file mode 100644 index 00000000..b1003d8e Binary files /dev/null and b/examples/agent_observability/OpenInference-Arize-Phoenix/BedrockAgent-Observability-sample/images/trace_1.png differ diff --git a/examples/agent_observability/OpenInference-Arize-Phoenix/BedrockAgent-Observability-sample/images/trace_2.png b/examples/agent_observability/OpenInference-Arize-Phoenix/BedrockAgent-Observability-sample/images/trace_2.png new file mode 100644 index 00000000..94ca995c Binary files /dev/null and b/examples/agent_observability/OpenInference-Arize-Phoenix/BedrockAgent-Observability-sample/images/trace_2.png differ diff --git a/examples/agent_observability/OpenInference-Arize-Phoenix/BedrockAgent-Observability-sample/requirements.txt b/examples/agent_observability/OpenInference-Arize-Phoenix/BedrockAgent-Observability-sample/requirements.txt new file mode 100644 index 00000000..14a57d9c --- /dev/null +++ b/examples/agent_observability/OpenInference-Arize-Phoenix/BedrockAgent-Observability-sample/requirements.txt @@ -0,0 +1,5 @@ +arize-phoenix-otel +boto3==1.34.162 +anthropic +openinference-instrumentation-bedrock +arize-phoenix==8.27.0 \ No newline at end of file diff --git a/examples/agent_observability/OpenInference-Arize-Phoenix/BedrockAgent-Observability-sample/trace-and-eval-bedrock-agents-with-phoenix.ipynb b/examples/agent_observability/OpenInference-Arize-Phoenix/BedrockAgent-Observability-sample/trace-and-eval-bedrock-agents-with-phoenix.ipynb new file mode 100644 index 00000000..09fbdc39 --- /dev/null +++ b/examples/agent_observability/OpenInference-Arize-Phoenix/BedrockAgent-Observability-sample/trace-and-eval-bedrock-agents-with-phoenix.ipynb @@ -0,0 +1,560 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Amazon Bedrock Recipe: Arize Phoenix Integration with Bedrock Agents\n", + "\n", + "## Overview\n", + "Arize AI delivers comprehensive observability tools specifically designed for AI applications. The platform is available in two versions:\n", + "\n", + "**Arize AX:** An enterprise solution offering advanced monitoring capabilities\n", + "\n", + "**Arize Phoenix:** An open-source platform making tracing and evaluation accessible to all developers\n", + "\n", + "The integration between Arize AI and Amazon Bedrock Agents delivers three primary benefits:\n", + "\n", + "**Comprehensive Traceability:** Gain visibility into every step of your agent’s execution path, from initial user query through knowledge retrieval and action execution\n", + "\n", + "**Systematic Evaluation Framework:** Apply consistent evaluation methodologies to measure and understand agent performance\n", + "\n", + "**Data-Driven Optimization:** Run structured experiments to compare different agent configurations and identify optimal settings\n", + "\n", + "### Context\n", + "Amazon Bedrock Agents is a fully managed capability in Amazon Bedrock that allows you to build AI agents that can complete tasks by interacting with enterprise systems, data sources, and APIs. These agents can understand user requests in natural language, break down complex tasks into steps, retrieve relevant information, and take actions to fulfill user requests. With Bedrock Agents, you can create conversational assistants that can answer questions, provide recommendations, and perform actions on behalf of users, all while maintaining context throughout the conversation.\n", + "\n", + "\n", + "The integration between Arize AI and Amazon Bedrock Agents, provides developers with powerful capabilities for tracing, evaluating, and monitoring AI agent applications.\n", + "This Phoenix's tracing and span analysis capabilities are invaluable during the prototyping and debugging stages. \n", + "\n", + "#### Use Case\n", + "To demonstrate the integration between Arize Phoenix and Amazon Bedrock Agents providing observability outside of AWS tooling. \n", + "\n", + "#### Implementation\n", + "In this notebook we will show how to integrate Amazon Bedrock Agents and Arize Pheonix using the Arize AI platform. We will configure agent observability, send traces to Arize Pheonix, and evaluate the results using a single agent.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prerequisites\n", + "AWS account with appropriate IAM permissions for Amazon Bedrock Agents and Model Access." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Python Dependencies\n", + "\n", + "To run this notebook, you'll need to install some libraries in your environment:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install -r requirements.txt --quiet" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, let's load the libraries:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# load the libraries\n", + "import time\n", + "import boto3\n", + "import logging\n", + "import os\n", + "import nest_asyncio\n", + "from phoenix.otel import register\n", + "from openinference.instrumentation import using_metadata\n", + "\n", + "nest_asyncio.apply()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Set Phoenix Environment Variables\n", + "\n", + "This example used [Phoenix Cloud](https://app.phoenix.arize.com), our free online hosted version of Phoenix. If you'd prefer, you can [self-host Phoenix](https://docs.arize.com/phoenix/self-hosting) instead.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "os.environ[\"PHOENIX_COLLECTOR_ENDPOINT\"] = \"https://app.phoenix.arize.com\"\n", + "if not os.environ.get(\"PHOENIX_CLIENT_HEADERS\"):\n", + " os.environ[\"PHOENIX_CLIENT_HEADERS\"] = \"api_key=\" + input(\"Enter your Phoenix API key: \")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Connect to Phoenix\n", + "Now you can connect your notebook to a Phoenix instance.\n", + "\n", + "The `auto_instrument` flag below will search your environment for any openinference-instrumentation packages, and call any that are found. Because you installed the openinference-instrumentation-bedrock library, any calls you make to Bedrock or Bedrock agents will be automatically instrumented and sent to Phoenix." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "project_name = \"Amazon Bedrock Agent Example\"\n", + "\n", + "tracer_provider = register(project_name=project_name, auto_instrument=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Configure metadata for the span - Using_metadata context manager to add metadata to the current OpenTelemetry Context. OpenInference auto instrumentators will read this Context and pass the metadata as a span attribute, following the OpenInference semantic conventions. The metadata, must be a dictionary with string keys. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# metadata for filtering\n", + "metadata = { \"agent\" : \"bedrock-agent\", \n", + " \"env\" : \"development\"\n", + " }" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### AWS Credentials\n", + "Before using Amazon Bedrock, ensure that your AWS credentials are configured correctly. You can set them up using the AWS CLI or by setting environment variables. For this notebook assumes that the credentials are already configured.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# Create the client to invoke Agents in Amazon Bedrock:\n", + "session = boto3.Session()\n", + "REGION = session.region_name\n", + "bedrock_agent_runtime = session.client(service_name=\"bedrock-agent-runtime\",region_name=REGION)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Amazon Bedrock Agent\n", + "\n", + "\n", + "We assume you've already created an [Amazon Bedrock Agent](https://docs.aws.amazon.com/bedrock/latest/userguide/agents.html). If you don't have one already you can follow the **[instructions here]()** to set up an example agent.\n", + "\n", + "Configure your agent's **ID** and (optionally) alias ID in the cell below. You can find these by looking up your agent in the [\"Agents\" page on the AWS Console for Amazon Bedrock](https://console.aws.amazon.com/bedrock/home?#/agents) or CLI.\n", + "\n", + "The Agent ID should be ten characters, uppercase, and alphanumeric." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agent_id = \"\" # <- Configure your Bedrock Agent ID\n", + "agent_alias_id = \"ZZCHKSYYJE\" # <- Optionally set a different Alias ID if you have one" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Before moving on lets validate invoke agent is working correctly. The response is not important we are simply testing the API call. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(f\"Trying to invoke alias {agent_alias_id} of agent {agent_id}...\")\n", + "agent_resp = bedrock_agent_runtime.invoke_agent(\n", + " agentAliasId=agent_alias_id,\n", + " agentId=agent_id,\n", + " inputText=\"Hello!\",\n", + " sessionId=\"dummy-session\",\n", + ")\n", + "if \"completion\" in agent_resp:\n", + " print(\"✅ Got response\")\n", + "else:\n", + " raise ValueError(f\"No 'completion' in agent response:\\n{agent_resp}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Run your Agent\n", + "You're now ready to run your Bedrock Agent." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# add constructor for metadata\n", + "@using_metadata(metadata)\n", + "def run(input_text):\n", + " session_id = f\"default-session1_{int(time.time())}\"\n", + "\n", + " attributes = dict(\n", + " inputText=input_text,\n", + " agentId=agent_id,\n", + " agentAliasId=agent_alias_id,\n", + " sessionId=session_id,\n", + " enableTrace=True,\n", + " )\n", + " response = bedrock_agent_runtime.invoke_agent(**attributes)\n", + "\n", + " # Stream the response\n", + " for _, event in enumerate(response[\"completion\"]):\n", + " if \"chunk\" in event:\n", + " print(event)\n", + " chunk_data = event[\"chunk\"]\n", + " if \"bytes\" in chunk_data:\n", + " output_text = chunk_data[\"bytes\"].decode(\"utf8\")\n", + " print(output_text)\n", + " elif \"trace\" in event:\n", + " print(event[\"trace\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "run(\"What are the starters in the childrens menu?\") ## your prompt to the agent" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, + "source": [ + "## View your Traces in Phoenix\n", + "\n", + "You should now be able to see traces in your Phoenix dashboard:\n", + "\n", + "![image trace1](./images/trace_1.png)\n", + "![image trace2](./images/trace_2.png)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Filter your Traces in Phoenix\n", + "\n", + "Arize has 3 ways users can filter and search across their traces. \n", + "\n", + "- Use AI Search with natural language \n", + "\n", + "- Use AI Search to construct the filter syntax \n", + "\n", + "- Directly use Filter Syntax" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You should now be able to see subset of traces in your Phoenix dashboard:\n", + "\n", + "![image filter1](./images/filter_1.png)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Evaluating your Agent\n", + "\n", + "Phoenix also includes built in LLM evaluations and code-based experiment testing. In this next section, you'll add Agent tool calling evaluations to your traces.\n", + "\n", + "Up until now, you'd just used the lighter-weight Phoenix OTEL tracing library. To run evals, you'll need to install the full library." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install -q arize-phoenix --quiet" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# load the libraries\n", + "import re\n", + "import json\n", + "import phoenix as px\n", + "from phoenix.evals import (\n", + " TOOL_CALLING_PROMPT_RAILS_MAP,\n", + " TOOL_CALLING_PROMPT_TEMPLATE,\n", + " BedrockModel,\n", + " llm_classify,\n", + ")\n", + "from phoenix.trace import SpanEvaluations\n", + "from phoenix.trace.dsl import SpanQuery\n", + "\n", + "px.launch_app()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# function to process json string\n", + "def process_question(query):\n", + " # print(query)\n", + " query = re.sub(r'\\'', '', query)\n", + " query = re.sub(r'\\\\\"','', query)\n", + " query = re.sub(r'\\\\+', '', query)\n", + " dict_data = json.loads(query)\n", + " \n", + " return dict_data.get(\"messages\", [{}])[0].get(\"content\", \"\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "query = (\n", + " SpanQuery()\n", + " .where(\n", + " # Filter for the `LLM` span kind.\n", + " # The filter condition is a string of valid Python boolean expression.\n", + " \"span_kind == 'LLM' and 'evaluation' not in input.value\"\n", + " )\n", + " .select(\n", + " question=\"input.value\",\n", + " outputs=\"output.value\",\n", + " )\n", + ")\n", + "trace_df = px.Client().query_spans(query, project_name=project_name)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# Apply the function using lambda\n", + "trace_df[\"question\"] = trace_df[\"question\"].apply(\n", + " lambda x: process_question(x) if isinstance(x, str) else x\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Function to extract tool call names from the output\n", + "# extract all tool calls\n", + "def extract_tool_calls(output_value):\n", + " try:\n", + " tool_calls = []\n", + " # Look for tool calls within tags\n", + " if \"\" in output_value:\n", + " # Find all tool_name tags\n", + " tool_name_pattern = r\"(.*?)\"\n", + " tool_names = re.findall(tool_name_pattern, output_value)\n", + "\n", + " # Add each found tool name to the list\n", + " for tool_name in tool_names:\n", + " if tool_name:\n", + " tool_calls.append(tool_name)\n", + " except Exception as e:\n", + " print(f\"Error extracting tool calls: {e}\")\n", + " pass\n", + "\n", + " return tool_calls\n", + "\n", + "# Apply the function to each row of trace_df.output.value\n", + "trace_df[\"tool_call\"] = trace_df[\"outputs\"].apply(\n", + " lambda x: extract_tool_calls(x) if isinstance(x, str) else []\n", + ")\n", + "\n", + "# Display the tool calls found\n", + "print(\"Tool calls found in traces:\", trace_df[\"tool_call\"].sum())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# Keep only rows where tool_calls is not empty (has at least one tool call)\n", + "trace_df = trace_df[trace_df[\"tool_call\"].apply(lambda x: len(x) > 0)]\n", + "\n", + "# Show the dataframe\n", + "trace_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# include tool definitions\n", + "trace_df[\"tool_definitions\"] = (\n", + " \"phoenix-traces retrieves the latest trace information from Phoenix, phoenix-experiments retrieves the latest experiment information from Phoenix, phoenix-datasets retrieves the latest dataset information from Phoenix\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "rails = list(TOOL_CALLING_PROMPT_RAILS_MAP.values())\n", + "\n", + "# Use LLM as judge to evaluate the reponse\n", + "eval_model = BedrockModel(session=session, model_id=\"us.anthropic.claude-3-5-haiku-20241022-v1:0\")\n", + "\n", + "response_classifications = llm_classify(\n", + " data=trace_df,\n", + " template=TOOL_CALLING_PROMPT_TEMPLATE,\n", + " model=eval_model,\n", + " rails=rails,\n", + " provide_explanation=True,\n", + ")\n", + "\n", + "#create a label\n", + "response_classifications[\"score\"] = response_classifications.apply(\n", + " lambda x: 1 if x[\"label\"] == \"correct\" else 0, axis=1\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Logging evaluations to Phoenix" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# log the evaluations\n", + "px.Client().log_evaluations(\n", + " SpanEvaluations(eval_name=\"Tool Calling Eval\", dataframe=response_classifications),\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You should now see your evaluation labels in Phoenix!\n", + "\n", + "![image eval1](./images/eval_1.png)\n", + "![image eval2](./images/eval_2.png)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# The End" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}