diff --git a/samples/microsoft/python/getting-started-agents/agent-client/v2/computer_use.py b/samples/microsoft/python/getting-started-agents/agent-client/v2/computer_use.py new file mode 100644 index 00000000..d0a14b68 --- /dev/null +++ b/samples/microsoft/python/getting-started-agents/agent-client/v2/computer_use.py @@ -0,0 +1,134 @@ +import os +from dotenv import load_dotenv +from azure.identity import DefaultAzureCredential +from azure.ai.projects import AIProjectClient +from azure.ai.projects.models import AgentReference, PromptAgentDefinition, ComputerUsePreviewTool + +# Import shared helper functions +from computer_use_util import ( + SearchState, + load_screenshot_assets, + handle_computer_action_and_take_screenshot, + print_final_output, +) + +load_dotenv() + +"""Main function to demonstrate Computer Use Agent functionality.""" +# Initialize state machine +current_state = SearchState.INITIAL + +# Load screenshot assets +try: + screenshots = load_screenshot_assets() + print("Successfully loaded screenshot assets") +except FileNotFoundError: + print("Failed to load required screenshot assets. Please ensure the asset files exist in ../assets/") + exit(1) + +project_client = AIProjectClient( + endpoint=os.environ["AZURE_AI_PROJECT_ENDPOINT"], + credential=DefaultAzureCredential(), +) + +with project_client: + # + computer_use_tool = ComputerUsePreviewTool(display_width=1026, display_height=769, environment="windows") + agent = project_client.agents.create_version( + agent_name="ComputerUseAgent", + definition=PromptAgentDefinition( + model=os.environ["AZURE_AI_MODEL_DEPLOYMENT_NAME"], + instructions=""" + You are a computer automation assistant. + + Be direct and efficient. When you reach the search results page, read and describe the actual search result titles and descriptions you can see. + """, + tools=[computer_use_tool], + ), + description="Computer automation agent with screen interaction capabilities.", + ) + # + print(f"Agent created (id: {agent.id}, name: {agent.name}, version: {agent.version})") + + openai_client = project_client.get_openai_client() + + # Initial request with screenshot - start with Bing search page + print("Starting computer automation session (initial screenshot: cua_browser_search.png)...") + # + response = openai_client.responses.create( + input=[ + { + "role": "user", + "content": [ + { + "type": "input_text", + "text": "I need you to help me search for 'OpenAI news'. Please type 'OpenAI news' and submit the search. Once you see search results, the task is complete.", + }, + { + "type": "input_image", + "image_url": screenshots["browser_search"]["url"], + "detail": "high", + }, # Start with Bing search page + ], + } + ], + extra_body={"agent": AgentReference(name=agent.name).as_dict()}, + truncation="auto", + ) + # + + print(f"Initial response received (ID: {response.id})") + +# +max_iterations = 10 # Allow enough iterations for completion +iteration = 0 + +while True: + if iteration >= max_iterations: + print(f"\nReached maximum iterations ({max_iterations}). Stopping.") + break + + iteration += 1 + print(f"\n--- Iteration {iteration} ---") + + # Check for computer calls in the response + computer_calls = [item for item in response.output if item.type == "computer_call"] + + if not computer_calls: + print_final_output(response) + break + + # Process the first computer call + computer_call = computer_calls[0] + action = computer_call.action + call_id = computer_call.call_id + + print(f"Processing computer call (ID: {call_id})") + + # Handle the action and get the screenshot info + screenshot_info, current_state = handle_computer_action_and_take_screenshot(action, current_state, screenshots) + + print(f"Sending action result back to agent (using {screenshot_info['filename']})...") + + # Regular response with just the screenshot + response = openai_client.responses.create( + previous_response_id=response.id, + input=[ + { + "call_id": call_id, + "type": "computer_call_output", + "output": { + "type": "computer_screenshot", + "image_url": screenshot_info["url"], + }, + } + ], + extra_body={"agent": AgentReference(name=agent.name).as_dict()}, + truncation="auto", + ) + + print(f"Follow-up response received (ID: {response.id})") +# + +# uncomment the following line to clean up the created agent after running the sample +# project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) diff --git a/samples/microsoft/python/getting-started-agents/agent-client/v2/file_search.py b/samples/microsoft/python/getting-started-agents/agent-client/v2/file_search.py new file mode 100644 index 00000000..d3e4c02d --- /dev/null +++ b/samples/microsoft/python/getting-started-agents/agent-client/v2/file_search.py @@ -0,0 +1,75 @@ +from azure.identity import DefaultAzureCredential +from azure.ai.projects import AIProjectClient +from azure.ai.projects.models import PromptAgentDefinition, FileSearchTool +from dotenv import load_dotenv +import os + +load_dotenv() +# Load the file to be indexed for search +file_1 = os.path.abspath(os.path.join(os.path.dirname(__file__), "../assets/product_info.md")) + +project_client = AIProjectClient( + endpoint=os.environ["AZURE_AI_PROJECT_ENDPOINT"], + credential=DefaultAzureCredential(), +) + +openai_client = project_client.get_openai_client() + +# Create vector store for file search + +# +vector_store = openai_client.vector_stores.create(name="ProductInfoStore") +print(f"Vector store created (id: {vector_store.id})") +# + +# Create vector store with expiration + +# +vector_store_with_expiration = openai_client.vector_stores.create_and_poll( + name="Product Documentation", + file_ids=[file_1.id], + expires_after={ + "anchor": "last_active_at", + "days": 7 + } +) +# + +with project_client: + # Create agent with file search tool + # + agent = project_client.agents.create_version( + agent_name="MyAgent", + definition=PromptAgentDefinition( + model=os.environ["AZURE_AI_MODEL_DEPLOYMENT_NAME"], + instructions="You are a helpful assistant that can search through product information.", + tools=[FileSearchTool(vector_store_ids=[vector_store.id])], + ), + description="File search agent for product information queries.", + ) + # + print(f"Agent created (id: {agent.id}, name: {agent.name}, version: {agent.version})") + + # Create a conversation for the agent interaction + conversation = openai_client.conversations.create() + print(f"Created conversation (id: {conversation.id})") + + # Send a query to search through the uploaded file + response = openai_client.responses.create( + conversation=conversation.id, + input="Tell me about Contoso products", + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + ) + print(f"Response: {response.output_text}") + + print("\nCleaning up...") + # + openai_client.vector_stores.delete(vector_store.id) + print("Deleted vector store") + # + + # Uncomment the following lines to delete the agent after testing + #project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) + #print("Agent deleted") + + \ No newline at end of file diff --git a/samples/microsoft/python/getting-started-agents/agent-client/v2/function_calling.py b/samples/microsoft/python/getting-started-agents/agent-client/v2/function_calling.py new file mode 100644 index 00000000..29382c5f --- /dev/null +++ b/samples/microsoft/python/getting-started-agents/agent-client/v2/function_calling.py @@ -0,0 +1,90 @@ +import os +import json +from dotenv import load_dotenv +from azure.ai.projects import AIProjectClient +from azure.ai.projects.models import PromptAgentDefinition, Tool, FunctionTool +from azure.identity import DefaultAzureCredential +from openai.types.responses.response_input_param import FunctionCallOutput, ResponseInputParam + +load_dotenv() + +# Define a function tool for the model to use +# +func_tool = FunctionTool( + name="get_horoscope", + parameters={ + "type": "object", + "properties": { + "sign": { + "type": "string", + "description": "An astrological sign like Taurus or Aquarius", + }, + }, + "required": ["sign"], + "additionalProperties": False, + }, + description="Get today's horoscope for an astrological sign.", + strict=True, +) + +# Create tools list with proper typing for the agent definition +tools: list[Tool] = [mcp_tool] +# + +with project_client: + # Create a prompt agent with MCP tool capabilities + # The agent will be able to access external GitHub repositories through the MCP protocol + # + agent = project_client.agents.create_version( + agent_name="MyAgent", + definition=PromptAgentDefinition( + model=os.environ["AZURE_AI_MODEL_DEPLOYMENT_NAME"], + instructions="You are a helpful agent that can use MCP tools to assist users. Use the available MCP tools to answer questions and perform tasks.", + tools=tools, + ), + ) + # + print(f"Agent created (id: {agent.id}, name: {agent.name}, version: {agent.version})") + openai_client = project_client.get_openai_client() + + # Prompt the model with tools defined + response = openai_client.responses.create( + input="What is my horoscope? I am an Aquarius.", + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + ) + print(f"Response output: {response.output_text}") + + input_list: ResponseInputParam = [] + # Process function calls + # + for item in response.output: + if item.type == "function_call": + if item.name == "get_horoscope": + # Execute the function logic for get_horoscope + horoscope = get_horoscope(**json.loads(item.arguments)) + + # Provide function call results to the model + input_list.append( + FunctionCallOutput( + type="function_call_output", + call_id=item.call_id, + output=json.dumps({"horoscope": horoscope}), + ) + ) + # + print("Final input:") + print(input_list) + response = openai_client.responses.create( + input=input_list, + previous_response_id=response.id, + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + ) + + # The model should be able to give a response! + print("Final output:") + print("\n" + response.output_text) + + # Uncomment these lines to clean up resources by deleting the agent version + # This prevents accumulation of unused agent versions in your project + # project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) + # print("Agent deleted") \ No newline at end of file diff --git a/samples/microsoft/python/getting-started-agents/agent-client/v2/image_generation.py b/samples/microsoft/python/getting-started-agents/agent-client/v2/image_generation.py new file mode 100644 index 00000000..c98723a6 --- /dev/null +++ b/samples/microsoft/python/getting-started-agents/agent-client/v2/image_generation.py @@ -0,0 +1,55 @@ +import base64 +import os +from dotenv import load_dotenv + +from azure.identity import DefaultAzureCredential +from azure.ai.projects import AIProjectClient +from azure.ai.projects.models import PromptAgentDefinition, ImageGenTool + +load_dotenv() + +project_client = AIProjectClient( + endpoint=os.environ["AZURE_AI_PROJECT_ENDPOINT"], + credential=DefaultAzureCredential(), +) + +openai_client = project_client.get_openai_client() + +with project_client: + # + agent = project_client.agents.create_version( + agent_name="MyAgent", + definition=PromptAgentDefinition( + model=os.environ["AZURE_AI_MODEL_DEPLOYMENT_NAME"], + instructions="Generate images based on user prompts", + tools=[ImageGenTool(quality="low", size="1024x1024")], + ), + description="Agent for image generation.", + ) + # + print(f"Agent created (id: {agent.id}, name: {agent.name}, version: {agent.version})") + response = openai_client.responses.create( + input="Generate an image of Microsoft logo.", + extra_headers={ + "x-ms-oai-image-generation-deployment": "gpt-image-1" + }, # this is required at the moment for image generation + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + ) + print(f"Response created: {response.id}") + # Save the image to a file + # + image_data = [output.result for output in response.output if output.type == "image_generation_call"] + + if image_data and image_data[0]: + print("Downloading generated image...") + filename = "microsoft.png" + file_path = os.path.abspath(filename) + + with open(file_path, "wb") as f: + f.write(base64.b64decode(image_data[0])) + # + print(f"Image downloaded and saved to: {file_path}") + # uncomment the following lines to clean up the created agent after running the sample + #print("\nCleaning up...") + #project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) + #print("Agent deleted") \ No newline at end of file