Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
import os
from dotenv import load_dotenv
from azure.identity import DefaultAzureCredential
from azure.ai.projects import AIProjectClient
from azure.ai.projects.models import AgentReference, PromptAgentDefinition, ComputerUsePreviewTool

# Import shared helper functions
from computer_use_util import (
SearchState,
load_screenshot_assets,
handle_computer_action_and_take_screenshot,
print_final_output,
)

load_dotenv()

"""Main function to demonstrate Computer Use Agent functionality."""
# Initialize state machine
current_state = SearchState.INITIAL

# Load screenshot assets
try:
screenshots = load_screenshot_assets()
print("Successfully loaded screenshot assets")
except FileNotFoundError:
print("Failed to load required screenshot assets. Please ensure the asset files exist in ../assets/")
exit(1)

project_client = AIProjectClient(
endpoint=os.environ["AZURE_AI_PROJECT_ENDPOINT"],
credential=DefaultAzureCredential(),
)

with project_client:
# <create_agent_with_computer_use_tool>
computer_use_tool = ComputerUsePreviewTool(display_width=1026, display_height=769, environment="windows")
agent = project_client.agents.create_version(
agent_name="ComputerUseAgent",
definition=PromptAgentDefinition(
model=os.environ["AZURE_AI_MODEL_DEPLOYMENT_NAME"],
instructions="""
You are a computer automation assistant.

Be direct and efficient. When you reach the search results page, read and describe the actual search result titles and descriptions you can see.
""",
tools=[computer_use_tool],
),
description="Computer automation agent with screen interaction capabilities.",
)
# </create_agent_with_computer_use_tool>
print(f"Agent created (id: {agent.id}, name: {agent.name}, version: {agent.version})")

openai_client = project_client.get_openai_client()

# Initial request with screenshot - start with Bing search page
print("Starting computer automation session (initial screenshot: cua_browser_search.png)...")
# <computer_use_initial_request>
response = openai_client.responses.create(
input=[
{
"role": "user",
"content": [
{
"type": "input_text",
"text": "I need you to help me search for 'OpenAI news'. Please type 'OpenAI news' and submit the search. Once you see search results, the task is complete.",
},
{
"type": "input_image",
"image_url": screenshots["browser_search"]["url"],
"detail": "high",
}, # Start with Bing search page
],
}
],
extra_body={"agent": AgentReference(name=agent.name).as_dict()},
truncation="auto",
)
# </computer_use_initial_request>

print(f"Initial response received (ID: {response.id})")

# <computer_use_interaction_loop>
max_iterations = 10 # Allow enough iterations for completion
iteration = 0

while True:
if iteration >= max_iterations:
print(f"\nReached maximum iterations ({max_iterations}). Stopping.")
break

iteration += 1
print(f"\n--- Iteration {iteration} ---")

# Check for computer calls in the response
computer_calls = [item for item in response.output if item.type == "computer_call"]

if not computer_calls:
print_final_output(response)
break

# Process the first computer call
computer_call = computer_calls[0]
action = computer_call.action
call_id = computer_call.call_id

print(f"Processing computer call (ID: {call_id})")

# Handle the action and get the screenshot info
screenshot_info, current_state = handle_computer_action_and_take_screenshot(action, current_state, screenshots)

print(f"Sending action result back to agent (using {screenshot_info['filename']})...")

# Regular response with just the screenshot
response = openai_client.responses.create(
previous_response_id=response.id,
input=[
{
"call_id": call_id,
"type": "computer_call_output",
"output": {
"type": "computer_screenshot",
"image_url": screenshot_info["url"],
},
}
],
extra_body={"agent": AgentReference(name=agent.name).as_dict()},
truncation="auto",
)

print(f"Follow-up response received (ID: {response.id})")
# </computer_use_interaction_loop>

# uncomment the following line to clean up the created agent after running the sample
# project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version)
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
from azure.identity import DefaultAzureCredential
from azure.ai.projects import AIProjectClient
from azure.ai.projects.models import PromptAgentDefinition, FileSearchTool
from dotenv import load_dotenv
import os

load_dotenv()
# Load the file to be indexed for search
file_1 = os.path.abspath(os.path.join(os.path.dirname(__file__), "../assets/product_info.md"))

project_client = AIProjectClient(
endpoint=os.environ["AZURE_AI_PROJECT_ENDPOINT"],
credential=DefaultAzureCredential(),
)

openai_client = project_client.get_openai_client()

# Create vector store for file search

# <create_vector_store_basic>
vector_store = openai_client.vector_stores.create(name="ProductInfoStore")
print(f"Vector store created (id: {vector_store.id})")
# </create_vector_store_basic>

# Create vector store with expiration

# <create_vector_store_with_expiration>
vector_store_with_expiration = openai_client.vector_stores.create_and_poll(
name="Product Documentation",
file_ids=[file_1.id],
expires_after={
"anchor": "last_active_at",
"days": 7
}
)
# </create_vector_store_with_expiration>

with project_client:
# Create agent with file search tool
# <create_agent_with_file_search_tool>
agent = project_client.agents.create_version(
agent_name="MyAgent",
definition=PromptAgentDefinition(
model=os.environ["AZURE_AI_MODEL_DEPLOYMENT_NAME"],
instructions="You are a helpful assistant that can search through product information.",
tools=[FileSearchTool(vector_store_ids=[vector_store.id])],
),
description="File search agent for product information queries.",
)
# </create_agent_with_file_search_tool>
print(f"Agent created (id: {agent.id}, name: {agent.name}, version: {agent.version})")

# Create a conversation for the agent interaction
conversation = openai_client.conversations.create()
print(f"Created conversation (id: {conversation.id})")

# Send a query to search through the uploaded file
response = openai_client.responses.create(
conversation=conversation.id,
input="Tell me about Contoso products",
extra_body={"agent": {"name": agent.name, "type": "agent_reference"}},
)
print(f"Response: {response.output_text}")

print("\nCleaning up...")
# <delete_vector_store>
openai_client.vector_stores.delete(vector_store.id)
print("Deleted vector store")
# </delete_vector_store>

# Uncomment the following lines to delete the agent after testing
#project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version)
#print("Agent deleted")


Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import os
import json
from dotenv import load_dotenv
from azure.ai.projects import AIProjectClient
from azure.ai.projects.models import PromptAgentDefinition, Tool, FunctionTool
from azure.identity import DefaultAzureCredential
from openai.types.responses.response_input_param import FunctionCallOutput, ResponseInputParam

load_dotenv()

# Define a function tool for the model to use
# <define_function_tool>
func_tool = FunctionTool(
name="get_horoscope",
parameters={
"type": "object",
"properties": {
"sign": {
"type": "string",
"description": "An astrological sign like Taurus or Aquarius",
},
},
"required": ["sign"],
"additionalProperties": False,
},
description="Get today's horoscope for an astrological sign.",
strict=True,
)

# Create tools list with proper typing for the agent definition
tools: list[Tool] = [mcp_tool]
# </define_function_tool>

with project_client:
# Create a prompt agent with MCP tool capabilities
# The agent will be able to access external GitHub repositories through the MCP protocol
# <create_agent_with_mcp_tool>
agent = project_client.agents.create_version(
agent_name="MyAgent",
definition=PromptAgentDefinition(
model=os.environ["AZURE_AI_MODEL_DEPLOYMENT_NAME"],
instructions="You are a helpful agent that can use MCP tools to assist users. Use the available MCP tools to answer questions and perform tasks.",
tools=tools,
),
)
# </create_agent_with_mcp_tool>
print(f"Agent created (id: {agent.id}, name: {agent.name}, version: {agent.version})")
openai_client = project_client.get_openai_client()

# Prompt the model with tools defined
response = openai_client.responses.create(
input="What is my horoscope? I am an Aquarius.",
extra_body={"agent": {"name": agent.name, "type": "agent_reference"}},
)
print(f"Response output: {response.output_text}")

input_list: ResponseInputParam = []
# Process function calls
# <process_function_calls>
for item in response.output:
if item.type == "function_call":
if item.name == "get_horoscope":
# Execute the function logic for get_horoscope
horoscope = get_horoscope(**json.loads(item.arguments))

# Provide function call results to the model
input_list.append(
FunctionCallOutput(
type="function_call_output",
call_id=item.call_id,
output=json.dumps({"horoscope": horoscope}),
)
)
# </process_function_calls>
print("Final input:")
print(input_list)
response = openai_client.responses.create(
input=input_list,
previous_response_id=response.id,
extra_body={"agent": {"name": agent.name, "type": "agent_reference"}},
)

# The model should be able to give a response!
print("Final output:")
print("\n" + response.output_text)

# Uncomment these lines to clean up resources by deleting the agent version
# This prevents accumulation of unused agent versions in your project
# project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version)
# print("Agent deleted")
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import base64
import os
from dotenv import load_dotenv

from azure.identity import DefaultAzureCredential
from azure.ai.projects import AIProjectClient
from azure.ai.projects.models import PromptAgentDefinition, ImageGenTool

load_dotenv()

project_client = AIProjectClient(
endpoint=os.environ["AZURE_AI_PROJECT_ENDPOINT"],
credential=DefaultAzureCredential(),
)

openai_client = project_client.get_openai_client()

with project_client:
# <create_agent_with_image_gen_tool>
agent = project_client.agents.create_version(
agent_name="MyAgent",
definition=PromptAgentDefinition(
model=os.environ["AZURE_AI_MODEL_DEPLOYMENT_NAME"],
instructions="Generate images based on user prompts",
tools=[ImageGenTool(quality="low", size="1024x1024")],
),
description="Agent for image generation.",
)
# </create_agent_with_image_gen_tool>
print(f"Agent created (id: {agent.id}, name: {agent.name}, version: {agent.version})")
response = openai_client.responses.create(
input="Generate an image of Microsoft logo.",
extra_headers={
"x-ms-oai-image-generation-deployment": "gpt-image-1"
}, # this is required at the moment for image generation
extra_body={"agent": {"name": agent.name, "type": "agent_reference"}},
)
print(f"Response created: {response.id}")
# Save the image to a file
# <download_generated_image>
image_data = [output.result for output in response.output if output.type == "image_generation_call"]

if image_data and image_data[0]:
print("Downloading generated image...")
filename = "microsoft.png"
file_path = os.path.abspath(filename)

with open(file_path, "wb") as f:
f.write(base64.b64decode(image_data[0]))
# </download_generated_image>
print(f"Image downloaded and saved to: {file_path}")
# uncomment the following lines to clean up the created agent after running the sample
#print("\nCleaning up...")
#project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version)
#print("Agent deleted")