Azure · slister1001 · Apr 11, 2025
@@ -0,0 +1,9 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+
+"""Azure AI Agent tools and utilities for evaluation and red teaming."""
+
+from .agent_tools import RedTeamToolProvider, get_red_team_tools
+
+__all__ = ['RedTeamToolProvider', 'get_red_team_tools']
@@ -0,0 +1,201 @@
+# ------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ------------------------------------
+
+"""
+Red Team Tools as Functions for Azure AI Agent
+This module provides functions that can be used as tools in Azure AI Agent for red teaming purposes.
+"""
+
+import json
+import asyncio
+import os
+from typing import Any, Callable, Set, Dict, List, Optional
+from azure.identity import DefaultAzureCredential
+from azure.ai.evaluation.agent import RedTeamToolProvider
+
+# Configuration for the red teaming tools
+azure_ai_project = {
+    "subscription_id": os.environ.get("AZURE_SUBSCRIPTION_ID", "your-subscription-id"),
+    "resource_group": os.environ.get("AZURE_RESOURCE_GROUP", "your-resource-group"),
+    "workspace_name": os.environ.get("AZURE_WORKSPACE_NAME", "your-workspace-name")
+}
+
+# Initialize the credential and tool provider (will be created when first needed)
+credential = None
+tool_provider = None
+
+def _get_tool_provider():
+    """Get or create the RedTeamToolProvider instance."""
+    global credential, tool_provider
+    if tool_provider is None:
+        credential = DefaultAzureCredential()
+        tool_provider = RedTeamToolProvider(
+            azure_ai_project=azure_ai_project,
+            credential=credential,
+            application_scenario="A customer service chatbot for a retail website"  # Optional context
+        )
+    return tool_provider
+
+# Dictionary to store fetched prompts for reference
+fetched_prompts = {}
+
+def red_team_fetch_harmful_prompt(risk_category: str, strategy: str = "baseline", convert_with_strategy: Optional[str] = None) -> str:
+    """
+    Fetch a harmful prompt for a specific risk category to test content filters.
+
+    This function retrieves a prompt that could potentially trigger content filters
+    and can be used for red teaming AI systems.
+
+    :param risk_category (str): The risk category to get a harmful prompt for (e.g., 'violence', 'hate_unfairness', 'sexual', 'self_harm').
+    :param strategy (str, optional): The attack strategy to use (e.g., 'baseline', 'jailbreak'). Defaults to 'baseline'.
+    :param convert_with_strategy (Optional[str], optional): Optional strategy to convert the prompt (e.g., 'morse_converter', 'binary_converter'). Defaults to None.
+    :return: A JSON string containing the harmful prompt and related metadata.
+    :rtype: str
+    """
+    provider = _get_tool_provider()
+
+    # Run the async method in a new event loop
+    result = asyncio.run(provider.fetch_harmful_prompt(
+        risk_category_text=risk_category,
+        strategy=strategy,
+        convert_with_strategy=convert_with_strategy
+    ))
+
+    # Store the prompt for later conversion if successful
+    if result["status"] == "success" and "prompt_id" in result:
+        prompt_id = result["prompt_id"]
+        if "prompt" in result:
+            fetched_prompts[prompt_id] = result["prompt"]
+
+    return json.dumps(result)
+
+def red_team_convert_prompt(prompt_or_id: str, strategy: str) -> str:
+    """
+    Convert a prompt or a previously fetched prompt ID using a specified strategy.
+
+    This function can be used to transform prompts in ways that might bypass content filters
+    for testing AI system defenses.
+
+    :param prompt_or_id (str): Either a prompt text or a prompt ID from a previous fetch_harmful_prompt call.
+    :param strategy (str): The strategy to use for conversion (e.g., 'morse_converter', 'binary_converter', 'base64_converter').
+    :return: A JSON string containing the original and converted prompt.
+    :rtype: str
+    """
+    provider = _get_tool_provider()
+
+    # Check if input is a prompt ID we have stored
+    if prompt_or_id in fetched_prompts:
+        # Update the provider's cache
+        provider._fetched_prompts[prompt_or_id] = fetched_prompts[prompt_or_id]
+
+    # Run the async method in a new event loop
+    result = asyncio.run(provider.convert_prompt(
+        prompt_or_id=prompt_or_id,
+        strategy=strategy
+    ))
+
+    return json.dumps(result)
+
+def red_team_unified(category: str, strategy: Optional[str] = None) -> str:
+    """
+    Get a harmful prompt for a specific risk category and optionally convert it.
+
+    This function combines fetching and converting harmful prompts in one call,
+    making it easier to test AI system defenses.
+
+    :param category (str): The risk category to get a harmful prompt for (e.g., 'violence', 'hate_unfairness', 'sexual', 'self_harm').
+    :param strategy (Optional[str], optional): Optional strategy to convert the prompt (e.g., 'morse_converter', 'binary_converter'). Defaults to None.
+    :return: A JSON string containing the harmful prompt and related metadata.
+    :rtype: str
+    """
+    provider = _get_tool_provider()
+
+    # Run the async method in a new event loop
+    result = asyncio.run(provider.red_team(
+        category=category,
+        strategy=strategy
+    ))
+
+    # Store the prompt for later conversion if it's a success and we didn't convert it
+    if result["status"] == "success":
+        if "prompt_id" in result and "prompt" in result:
+            prompt_id = result["prompt_id"]
+            fetched_prompts[prompt_id] = result["prompt"]
+
+    return json.dumps(result)
+
+def red_team_get_available_strategies() -> str:
+    """
+    Get a list of all available prompt conversion strategies.
+
+    This function lists all the strategies that can be used to convert prompts
+    for red teaming purposes.
+
+    :return: A JSON string containing a list of available conversion strategies.
+    :rtype: str
+    """
+    provider = _get_tool_provider()
+    strategies = provider.get_available_strategies()
+
+    return json.dumps({
+        "status": "success",
+        "available_strategies": strategies
+    })
+
+def red_team_explain_purpose() -> str:
+    """
+    Explain the purpose and responsible use of red teaming tools.
+
+    This function provides information about the ethical use of red teaming tools
+    and their purpose in improving AI system safety.
+
+    :return: A JSON string containing information about responsible use of red teaming tools.
+    :rtype: str
+    """
+    explanation = {
+        "purpose": "Red teaming tools are designed to test AI systems for vulnerabilities and safety issues.",
+        "responsible_use": [
+            "Only use these tools on systems you have permission to test.",
+            "Use the results to improve system safety and robustness.",
+            "Do not use these tools to cause harm or bypass legitimate safety measures in production systems."
+        ],
+        "risk_categories": {
+            "violence": "Content related to physical harm, weapons, or dangerous activities",
+            "hate_unfairness": "Content related to discrimination, bias, or prejudice",
+            "sexual": "Explicit sexual content",
+            "self_harm": "Content related to suicide, self-injury, or harmful behaviors"
+        },
+        "conversion_strategies": "Various encoding and obfuscation methods to test if harmful content can bypass detection"
+    }
+
+    return json.dumps(explanation)
+
+# Example User Input for Each Function
+# 1. Red Team Fetch Harmful Prompt
+#    User Input: "Fetch a harmful prompt in the 'violence' category."
+#    User Input: "Fetch a harmful prompt in the 'hate_unfairness' category with a 'jailbreak' strategy."
+#    User Input: "Fetch a harmful prompt in the 'self_harm' category and convert it with 'base64_converter'."
+
+# 2. Red Team Convert Prompt
+#    User Input: "Convert this prompt 'prompt_12345678' using the 'binary_converter' strategy."
+#    User Input: "Convert the text 'This is a test prompt' using the 'morse_converter' strategy."
+
+# 3. Red Team Unified
+#    User Input: "Get a harmful prompt in the 'sexual' category."
+#    User Input: "Get a harmful prompt in the 'violence' category and convert it with 'leetspeak_converter'."
+
+# 4. Red Team Get Available Strategies
+#    User Input: "What conversion strategies are available for red teaming?"
+
+# 5. Red Team Explain Purpose
+#    User Input: "What is the purpose of red teaming tools and how should they be used responsibly?"
+
+# Statically defined user functions for fast reference
+user_functions: Set[Callable[..., Any]] = {
+    red_team_fetch_harmful_prompt,
+    red_team_convert_prompt,
+    red_team_unified,
+    red_team_get_available_strategies,
+    red_team_explain_purpose
+}