Merge pull request #385 from howardbaik/main

rich-iannone · web-flow · commit 84aef77407e4 · 2026-04-15T22:43:33.000-04:00
Add AzureOpenAI as a model provider
diff --git a/pointblank/_constants.py b/pointblank/_constants.py
@@ -141,6 +141,7 @@
     "anthropic",
     "ollama",
     "bedrock",
+    "azure-openai",
 ]
 
 TABLE_TYPE_STYLES = {
diff --git a/pointblank/_utils_ai.py b/pointblank/_utils_ai.py
@@ -172,6 +172,41 @@ def _create_chat_instance(
             kwargs={"http_client": http_client},
         )
 
+    elif provider == "azure-openai":  # pragma: no cover
+        try:
+            import openai  # noqa
+        except ImportError:
+            raise ImportError(
+                "The `openai` package is required to use AI validation with "
+                "`azure-openai`. Please install it using `pip install openai`."
+            )
+
+        import os
+
+        endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
+        api_version = os.getenv("OPENAI_API_VERSION")
+        if not endpoint:
+            raise ValueError(
+                "AZURE_OPENAI_ENDPOINT environment variable must be set to use "
+                "the 'azure-openai' provider."
+            )
+        if not api_version:
+            raise ValueError(
+                "OPENAI_API_VERSION environment variable must be set to use "
+                "the 'azure-openai' provider (e.g. '2024-06-01')."
+            )
+
+        from chatlas import ChatAzureOpenAI
+
+        chat = ChatAzureOpenAI(
+            endpoint=endpoint,
+            deployment_id=model_name,
+            api_version=api_version,
+            api_key=api_key,
+            system_prompt=system_prompt,
+            kwargs={"http_client": http_client},
+        )
+
     else:
         raise ValueError(f"Unsupported provider: {provider}")
 
diff --git a/pointblank/validate.py b/pointblank/validate.py
@@ -10929,9 +10929,10 @@ def prompt(
         model
             The model to be used. This should be in the form of `provider:model` (e.g.,
             `"anthropic:claude-opus-4-6"`). Supported providers are `"anthropic"`, `"openai"`,
-            `"ollama"`, and `"bedrock"`. The model name should be the specific model to be used from
-            the provider. Model names are subject to change so consult the provider's documentation
-            for the most up-to-date model names.
+            `"ollama"`, `"bedrock"`, and `"azure-openai"`. The model name should be the specific
+            model to be used from the provider (for `"azure-openai"`, the value after the colon is
+            the Azure *deployment id*). Model names are subject to change so consult the provider's
+            documentation for the most up-to-date model names.
         batch_size
             Number of rows to process in each batch. Larger batches are more efficient but may hit
             API limits. Default is `1000`.
@@ -10985,10 +10986,13 @@ def prompt(
         - `"openai"` (OpenAI)
         - `"ollama"` (Ollama)
         - `"bedrock"` (Amazon Bedrock)
+        - `"azure-openai"` (Azure OpenAI)
 
         The model name should be the specific model to be used from the provider. Model names are
         subject to change so consult the provider's documentation for the most up-to-date model
-        names.
+        names. For `"azure-openai"`, the value after the colon is the Azure *deployment id* (the
+        name you assigned when deploying the model in your Azure OpenAI resource), not an OpenAI
+        model id.
 
         Notes on Authentication
         -----------------------
@@ -11019,6 +11023,8 @@ def prompt(
         - **Anthropic**: set `ANTHROPIC_API_KEY` environment variable or create `.env` file
         - **Ollama**: no API key required, just ensure Ollama is running locally
         - **Bedrock**: configure AWS credentials through standard AWS methods
+        - **Azure OpenAI**: set `AZURE_OPENAI_API_KEY`, `AZURE_OPENAI_ENDPOINT` (e.g.,
+          `https://<resource>.openai.azure.com`), and `OPENAI_API_VERSION` (e.g., `"2024-06-01"`)
 
         AI Validation Process
         ---------------------
diff --git a/tests/test__utils_ai.py b/tests/test__utils_ai.py
@@ -119,6 +119,46 @@ def test_create_chat_instance_invalid_provider():
         _create_chat_instance("invalid", "model")
 
 
+def test_create_chat_instance_azure_openai_missing_endpoint(monkeypatch):
+    """Azure OpenAI provider raises if AZURE_OPENAI_ENDPOINT is unset."""
+    pytest.importorskip("openai")
+    monkeypatch.delenv("AZURE_OPENAI_ENDPOINT", raising=False)
+    monkeypatch.setenv("OPENAI_API_VERSION", "2024-06-01")
+    with pytest.raises(ValueError, match="AZURE_OPENAI_ENDPOINT"):
+        _create_chat_instance("azure-openai", "my-deployment")
+
+
+def test_create_chat_instance_azure_openai_missing_api_version(monkeypatch):
+    """Azure OpenAI provider raises if OPENAI_API_VERSION is unset."""
+    pytest.importorskip("openai")
+    monkeypatch.setenv("AZURE_OPENAI_ENDPOINT", "https://example.openai.azure.com")
+    monkeypatch.delenv("OPENAI_API_VERSION", raising=False)
+    with pytest.raises(ValueError, match="OPENAI_API_VERSION"):
+        _create_chat_instance("azure-openai", "my-deployment")
+
+
+def test_create_chat_instance_azure_openai_forwards_params(monkeypatch):
+    """Azure OpenAI provider forwards env vars + deployment id to ChatAzureOpenAI."""
+    pytest.importorskip("openai")
+    chatlas = pytest.importorskip("chatlas")
+    monkeypatch.setenv("AZURE_OPENAI_ENDPOINT", "https://example.openai.azure.com")
+    monkeypatch.setenv("OPENAI_API_VERSION", "2024-06-01")
+
+    sentinel = object()
+    with patch.object(chatlas, "ChatAzureOpenAI", return_value=sentinel) as mock_cls:
+        result = _create_chat_instance("azure-openai", "my-deployment", api_key="secret")
+
+    assert result is sentinel
+    mock_cls.assert_called_once()
+    kwargs = mock_cls.call_args.kwargs
+    assert kwargs["endpoint"] == "https://example.openai.azure.com"
+    assert kwargs["deployment_id"] == "my-deployment"
+    assert kwargs["api_version"] == "2024-06-01"
+    assert kwargs["api_key"] == "secret"
+    assert "system_prompt" in kwargs
+    assert "http_client" in kwargs["kwargs"]
+
+
 # ============================================================================
 # Test BatchConfig
 # ============================================================================
diff --git a/tests/test_prompt_method.py b/tests/test_prompt_method.py
@@ -410,6 +410,7 @@ def test_prompt_inactive():
         ("openai", "gpt-4o-mini"),
         ("ollama", "llama2"),
         ("bedrock", "anthropic.claude-3-sonnet-20240229-v1:0"),
+        ("azure-openai", "my-gpt4-deployment"),
     ],
 )
 def test_prompt_with_different_providers(provider, model):
diff --git a/user_guide/01-validation-plan/02-validation-methods.qmd b/user_guide/01-validation-plan/02-validation-methods.qmd
@@ -611,7 +611,13 @@ The `columns_subset=` parameter lets you specify which columns to include in the
 improving performance and reducing API costs by only sending relevant data to the LLM.
 
 **Note:** To use [`Validate.prompt()`](`Validate.prompt`), you need to have the appropriate API credentials configured
-for your chosen LLM provider (Anthropic, OpenAI, Ollama, or AWS Bedrock).
+for your chosen LLM provider (Anthropic, OpenAI, Ollama, AWS Bedrock, or Azure OpenAI).
+
+For **Azure OpenAI**, use `model="azure-openai:<deployment_id>"` where `<deployment_id>` is the
+name you assigned when deploying the model in your Azure OpenAI resource. In addition to
+`AZURE_OPENAI_API_KEY`, you must set `AZURE_OPENAI_ENDPOINT` (e.g.,
+`https://<resource>.openai.azure.com`) and `OPENAI_API_VERSION` (e.g., `"2024-06-01"`) in your
+environment.
 
 ## 5. Aggregate Validations
 

Original file line number	Diff line number	Diff line change
`@@ -141,6 +141,7 @@`
`141`	`141`	`"anthropic",`
`142`	`142`	`"ollama",`
`143`	`143`	`"bedrock",`
	`144`	`+ "azure-openai",`
`144`	`145`	`]`
`145`	`146`
`146`	`147`	`TABLE_TYPE_STYLES = {`
Original file line number	Diff line number	Diff line change
`@@ -410,6 +410,7 @@ def test_prompt_inactive():`
`410`	`410`	`("openai", "gpt-4o-mini"),`
`411`	`411`	`("ollama", "llama2"),`
`412`	`412`	`("bedrock", "anthropic.claude-3-sonnet-20240229-v1:0"),`
	`413`	`+ ("azure-openai", "my-gpt4-deployment"),`
`413`	`414`	`],`
`414`	`415`	`)`
`415`	`416`	`def test_prompt_with_different_providers(provider, model):`