chenmoneygithub
diff --git a/‎docs/api_reference/api_inventory.txt‎
Lines changed: 6 additions & 0 deletions b/‎docs/api_reference/api_inventory.txt‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎docs/docs/genai/prompt-registry/index.mdx‎
Lines changed: 193 additions & 9 deletions b/‎docs/docs/genai/prompt-registry/index.mdx‎
Lines changed: 193 additions & 9 deletions
diff --git a/‎mlflow/entities/model_registry/__init__.py‎
Lines changed: 2 additions & 1 deletion b/‎mlflow/entities/model_registry/__init__.py‎
Lines changed: 2 additions & 1 deletion
@@ -588,6 +588,9 @@ mlflow.entities.model_registry.ModelVersionSearch.tags
 mlflow.entities.model_registry.ModelVersionTag
 mlflow.entities.model_registry.ModelVersionTag.from_proto
 mlflow.entities.model_registry.ModelVersionTag.to_proto
+mlflow.entities.model_registry.PromptModelConfig
+mlflow.entities.model_registry.PromptModelConfig.from_dict
+mlflow.entities.model_registry.PromptModelConfig.to_dict
 mlflow.entities.model_registry.PromptVersion
 mlflow.entities.model_registry.PromptVersion.convert_response_format_to_dict
 mlflow.entities.model_registry.PromptVersion.format
@@ -613,6 +616,7 @@ mlflow.entities.model_registry.model_version_deployment_job_state.ModelVersionDe
 mlflow.entities.model_registry.model_version_search.ModelVersionSearch
 mlflow.entities.model_registry.model_version_tag.ModelVersionTag
 mlflow.entities.model_registry.prompt.Prompt
+mlflow.entities.model_registry.prompt_version.PromptModelConfig
 mlflow.entities.model_registry.prompt_version.PromptVersion
 mlflow.entities.model_registry.registered_model.RegisteredModel
 mlflow.entities.model_registry.registered_model_alias.RegisteredModelAlias
@@ -761,6 +765,7 @@ mlflow.genai.delete_dataset
 mlflow.genai.delete_dataset_tag
 mlflow.genai.delete_labeling_session
 mlflow.genai.delete_prompt_alias
+mlflow.genai.delete_prompt_model_config
 mlflow.genai.delete_prompt_tag
 mlflow.genai.delete_prompt_version_tag
 mlflow.genai.disable_git_model_versioning
@@ -898,6 +903,7 @@ mlflow.genai.search_datasets
 mlflow.genai.search_prompts
 mlflow.genai.set_dataset_tags
 mlflow.genai.set_prompt_alias
+mlflow.genai.set_prompt_model_config
 mlflow.genai.set_prompt_tag
 mlflow.genai.set_prompt_version_tag
 mlflow.genai.to_predict_fn
 
@@ -246,6 +246,7 @@ Key attributes of a Prompt object:
 - `Alias`: An mutable named reference to the prompt. For example, you can create an alias named `production` to refer to the version used in your production system. See [Aliases](/genai/prompt-registry/manage-prompt-lifecycles-with-aliases) for more details.
 - `is_text_prompt`: A boolean property indicating whether the prompt is a text prompt (True) or chat prompt (False).
 - `response_format`: An optional property containing the expected response structure specification, which can be used to validate or structure outputs from LLM calls.
+- `model_config`: An optional dictionary containing model-specific configuration such as model name, temperature, max_tokens, and other inference parameters. See [Model Configuration](#model-configuration) for more details.
 
 ### Prompt Types
 
@@ -312,18 +313,202 @@ mlflow.genai.load_prompt("prompts:/summarization-prompt/1").tags
 mlflow.genai.delete_prompt_version_tag("summarization-prompt", 1, "author")
 ```
 
+## Model Configuration
+
+MLflow Prompt Registry allows you to store model-specific configuration alongside your prompts, ensuring reproducibility and clarity about which model and parameters were used with a particular prompt version. This is especially useful when you want to:
+
+- Version both prompt templates and model parameters together
+- Share prompts with recommended model settings across your team
+- Reproduce exact inference configurations from previous experiments
+- Maintain different model configurations for different prompt versions
+
+### Basic Usage
+
+You can attach model configuration to a prompt by passing a `model_config` parameter when registering:
+
+```python
+import mlflow
+
+# Using a dictionary
+model_config = {
+    "model_name": "gpt-4",
+    "temperature": 0.7,
+    "max_tokens": 1000,
+    "top_p": 0.9,
+}
+
+mlflow.genai.register_prompt(
+    name="qa-prompt",
+    template="Answer the following question: {{question}}",
+    model_config=model_config,
+    commit_message="QA prompt with model config",
+)
+
+# Load and access the model config
+prompt = mlflow.genai.load_prompt("qa-prompt")
+print(f"Model: {prompt.model_config['model_name']}")
+print(f"Temperature: {prompt.model_config['temperature']}")
+```
+
+### Using PromptModelConfig Class
+
+For better type safety and validation, you can use the <APILink fn="mlflow.entities.model_registry.PromptModelConfig" /> class:
+
+```python
+import mlflow
+from mlflow.entities.model_registry import PromptModelConfig
+
+# Create a validated config object
+config = PromptModelConfig(
+    model_name="gpt-4-turbo",
+    temperature=0.5,
+    max_tokens=2000,
+    top_p=0.95,
+    frequency_penalty=0.2,
+    presence_penalty=0.1,
+    stop_sequences=["END", "\n\n"],
+)
+
+mlflow.genai.register_prompt(
+    name="creative-prompt",
+    template="Write a creative story about {{topic}}",
+    model_config=config,
+)
+```
+
+The `PromptModelConfig` class provides validation to catch errors early:
+
+```python
+# This will raise a ValueError
+config = PromptModelConfig(temperature=-1.0)  # temperature must be non-negative
+
+# This will raise a ValueError
+config = PromptModelConfig(max_tokens=-100)  # max_tokens must be positive
+```
+
+### Supported Configuration Parameters
+
+The following standard parameters are supported in `PromptModelConfig`:
+
+- `model_name` (str): The name or identifier of the model (e.g., "gpt-4", "claude-3-opus")
+- `temperature` (float): Sampling temperature for controlling randomness (typically 0.0-2.0)
+- `max_tokens` (int): Maximum number of tokens to generate in the response
+- `top_p` (float): Nucleus sampling parameter (typically 0.0-1.0)
+- `top_k` (int): Top-k sampling parameter
+- `frequency_penalty` (float): Penalty for token frequency (typically -2.0 to 2.0)
+- `presence_penalty` (float): Penalty for token presence (typically -2.0 to 2.0)
+- `stop_sequences` (list[str]): List of sequences that will cause the model to stop generating
+- `extra_params` (dict): Additional provider-specific or experimental parameters
+
+### Provider-Specific Parameters
+
+You can include provider-specific parameters using the `extra_params` field:
+
+```python
+# Anthropic-specific configuration with extended thinking
+# See: https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking
+anthropic_thinking_config = PromptModelConfig(
+    model_name="claude-sonnet-4-20250514",
+    max_tokens=16000,
+    extra_params={
+        # Enable extended thinking for complex reasoning tasks
+        "thinking": {
+            "type": "enabled",
+            "budget_tokens": 10000,  # Max tokens for internal reasoning
+        },
+        # User tracking for abuse detection
+        "metadata": {
+            "user_id": "user-123",
+        },
+    },
+)
+
+# OpenAI-specific configuration with reproducibility and structured output
+# See: https://platform.openai.com/docs/api-reference/chat/create
+openai_config = PromptModelConfig(
+    model_name="gpt-4o",
+    temperature=0.7,
+    max_tokens=2000,
+    extra_params={
+        # Seed for reproducible outputs
+        "seed": 42,
+        # Bias specific tokens (token_id: bias from -100 to 100)
+        "logit_bias": {"50256": -100},  # Discourage <|endoftext|>
+        # User identifier for abuse tracking
+        "user": "user-123",
+        # Service tier for priority processing
+        "service_tier": "default",
+    },
+)
+```
+
+### Managing Model Configuration
+
+Model configuration is mutable and can be updated after a prompt version is created. This makes it easy to fix mistakes or iterate on model parameters without creating new prompt versions.
+
+#### Setting or Updating Model Config
+
+Use <APILink fn="mlflow.genai.set_prompt_model_config" /> to set or update the model configuration for a prompt version:
+
+```python
+import mlflow
+from mlflow.entities.model_registry import PromptModelConfig
+
+# Register a prompt without model config
+mlflow.genai.register_prompt(
+    name="my-prompt",
+    template="Analyze: {{text}}",
+)
+
+# Later, add model config
+mlflow.genai.set_prompt_model_config(
+    name="my-prompt",
+    version=1,
+    model_config={"model_name": "gpt-4", "temperature": 0.7},
+)
+
+# Or update existing model config
+mlflow.genai.set_prompt_model_config(
+    name="my-prompt",
+    version=1,
+    model_config={"model_name": "gpt-4-turbo", "temperature": 0.8, "max_tokens": 2000},
+)
+
+# Verify the update
+prompt = mlflow.genai.load_prompt("my-prompt", version=1)
+print(prompt.model_config)
+```
+
+#### Deleting Model Config
+
+Use <APILink fn="mlflow.genai.delete_prompt_model_config" /> to remove model configuration from a prompt version:
+
+```python
+import mlflow
+
+# Remove model config
+mlflow.genai.delete_prompt_model_config(name="my-prompt", version=1)
+
+# Verify removal
+prompt = mlflow.genai.load_prompt("my-prompt", version=1)
+assert prompt.model_config is None
+```
+
+#### Important Notes
+
+- Model config changes are **version-specific** - updating one version doesn't affect others
+- Model config is **mutable** - unlike the prompt template, it can be changed after creation
+- Changes are **immediate** - no need to create a new version to fix model parameters
+- **Validation applies** - The same validation rules apply when updating as when creating
+
 ## Prompt Caching
 
-MLflow automatically caches loaded prompts in memory to improve performance and reduce repeated API
-calls. The caching behavior differs based on whether you're loading a prompt by **version** or
-by **alias**.
+MLflow automatically caches loaded prompts in memory to improve performance and reduce repeated API calls. The caching behavior differs based on whether you're loading a prompt by **version** or by **alias**.
 
 ### Default Caching Behavior
 
-- **Version-based prompts** (e.g., `prompts:/summarization-prompt/1`): Cached with **infinite TTL**
-  by default.
-- **Alias-based prompts** (e.g., `prompts:/summarization-prompt@latest` or `prompts:/summarization-prompt@production`): Cached with **60 seconds TTL** by default. Aliases can point to
-  different versions over time, so a shorter TTL ensures your application picks up updates.
+- **Version-based prompts** (e.g., `prompts:/summarization-prompt/1`): Cached with **infinite TTL** by default. Since prompt versions are immutable, they can be safely cached indefinitely.
+- **Alias-based prompts** (e.g., `prompts:/summarization-prompt@latest` or `prompts:/summarization-prompt@production`): Cached with **60 seconds TTL** by default. Aliases can point to different versions over time, so a shorter TTL ensures your application picks up updates.
 
 ### Customizing Cache Behavior
 
@@ -368,8 +553,7 @@ export MLFLOW_VERSION_PROMPT_CACHE_TTL_SECONDS=0
 
 ### Cache Invalidation
 
-The cache item is automatically invalidated when you modify the prompt version or alias tags,
-including the following operations:
+The cache is automatically invalidated when you modify the prompt version or alias, including the following operations:
 
 - `mlflow.genai.set_prompt_version_tag`
 - `mlflow.genai.set_prompt_alias`
 
@@ -5,7 +5,7 @@
 from mlflow.entities.model_registry.model_version_search import ModelVersionSearch
 from mlflow.entities.model_registry.model_version_tag import ModelVersionTag
 from mlflow.entities.model_registry.prompt import Prompt
-from mlflow.entities.model_registry.prompt_version import PromptVersion
+from mlflow.entities.model_registry.prompt_version import PromptModelConfig, PromptVersion
 from mlflow.entities.model_registry.registered_model import RegisteredModel
 from mlflow.entities.model_registry.registered_model_alias import RegisteredModelAlias
 from mlflow.entities.model_registry.registered_model_deployment_job_state import (
@@ -16,6 +16,7 @@
 
 __all__ = [
     "Prompt",
+    "PromptModelConfig",
     "PromptVersion",
     "RegisteredModel",
     "ModelVersion",