feat: expose LLM parameter for dedupe operation (#5115)

RafaelPo · github-actions[bot] · commit 33ee2ee99d69 · 2026-03-27T17:50:01.000Z
## Summary - Exposes the existing internal `llm` field on the dedupe operation through the public API and Python SDK - Users can now choose which model is used for dedupe comparisons (defaults to system default if not specified) - Changes: `DedupeOperation` (API), `DedupePublicParams` → `DedupeFullParams` (pass-through), handler, SDK `dedupe()`/`dedupe_async()`, generated types ## Test plan - [ ] Verify dedupe works without `llm` (existing default behavior unchanged) - [ ] Verify dedupe with `llm` set uses the specified model - [ ] Verify invalid enum values are rejected with 422 🤖 Generated with [Claude Code](https://claude.com/claude-code) --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com> Sourced from commit 7b8d2a7d2c43c548b86f8f113ce355b0ac406f8a
diff --git a/src/futuresearch/generated/models/dedupe_operation.py b/src/futuresearch/generated/models/dedupe_operation.py
@@ -8,6 +8,7 @@
 from attrs import field as _attrs_field
 
 from ..models.dedupe_operation_strategy import DedupeOperationStrategy
+from ..models.llm_enum_public import LLMEnumPublic
 from ..types import UNSET, Unset
 
 if TYPE_CHECKING:
@@ -31,6 +32,7 @@ class DedupeOperation:
         strategy (DedupeOperationStrategy | Unset): Strategy for handling duplicates: 'identify' (cluster only),
             'select' (pick best), 'combine' (synthesize combined row) Default: DedupeOperationStrategy.SELECT.
         strategy_prompt (None | str | Unset): Optional instructions guiding how selection or combining is performed
+        llm (LLMEnumPublic | None | Unset): LLM to use for dedupe comparisons. If not provided, uses the system default.
     """
 
     input_: DedupeOperationInputType2 | list[DedupeOperationInputType1Item] | UUID
@@ -39,6 +41,7 @@ class DedupeOperation:
     webhook_url: None | str | Unset = UNSET
     strategy: DedupeOperationStrategy | Unset = DedupeOperationStrategy.SELECT
     strategy_prompt: None | str | Unset = UNSET
+    llm: LLMEnumPublic | None | Unset = UNSET
     additional_properties: dict[str, Any] = _attrs_field(init=False, factory=dict)
 
     def to_dict(self) -> dict[str, Any]:
@@ -80,6 +83,14 @@ def to_dict(self) -> dict[str, Any]:
         else:
             strategy_prompt = self.strategy_prompt
 
+        llm: None | str | Unset
+        if isinstance(self.llm, Unset):
+            llm = UNSET
+        elif isinstance(self.llm, LLMEnumPublic):
+            llm = self.llm.value
+        else:
+            llm = self.llm
+
         field_dict: dict[str, Any] = {}
         field_dict.update(self.additional_properties)
         field_dict.update(
@@ -96,6 +107,8 @@ def to_dict(self) -> dict[str, Any]:
             field_dict["strategy"] = strategy
         if strategy_prompt is not UNSET:
             field_dict["strategy_prompt"] = strategy_prompt
+        if llm is not UNSET:
+            field_dict["llm"] = llm
 
         return field_dict
 
@@ -180,13 +193,31 @@ def _parse_strategy_prompt(data: object) -> None | str | Unset:
 
         strategy_prompt = _parse_strategy_prompt(d.pop("strategy_prompt", UNSET))
 
+        def _parse_llm(data: object) -> LLMEnumPublic | None | Unset:
+            if data is None:
+                return data
+            if isinstance(data, Unset):
+                return data
+            try:
+                if not isinstance(data, str):
+                    raise TypeError()
+                llm_type_0 = LLMEnumPublic(data)
+
+                return llm_type_0
+            except (TypeError, ValueError, AttributeError, KeyError):
+                pass
+            return cast(LLMEnumPublic | None | Unset, data)
+
+        llm = _parse_llm(d.pop("llm", UNSET))
+
         dedupe_operation = cls(
             input_=input_,
             equivalence_relation=equivalence_relation,
             session_id=session_id,
             webhook_url=webhook_url,
             strategy=strategy,
             strategy_prompt=strategy_prompt,
+            llm=llm,
         )
 
         dedupe_operation.additional_properties = d
diff --git a/src/futuresearch/ops.py b/src/futuresearch/ops.py
@@ -625,6 +625,7 @@ async def dedupe(
     input: DataFrame | UUID | TableResult | None = None,
     strategy: Literal["identify", "select", "combine"] | None = None,
     strategy_prompt: str | None = None,
+    llm: LLM | None = None,
 ) -> TableResult:
     """Dedupe a table by removing duplicates using AI.
 
@@ -653,6 +654,7 @@ async def dedupe(
             Examples: "Prefer the record with the most complete contact information",
             "For each field, keep the most recent and complete value",
             "Prefer records from the CRM system over spreadsheet imports".
+        llm: LLM to use for dedupe comparisons. If not provided, uses the system default.
 
     Returns:
         TableResult containing the deduped table with cluster metadata columns.
@@ -667,6 +669,7 @@ async def dedupe(
                 equivalence_relation=equivalence_relation,
                 strategy=strategy,
                 strategy_prompt=strategy_prompt,
+                llm=llm,
             )
             result = await cohort_task.await_result()
             if isinstance(result, TableResult):
@@ -678,6 +681,7 @@ async def dedupe(
         equivalence_relation=equivalence_relation,
         strategy=strategy,
         strategy_prompt=strategy_prompt,
+        llm=llm,
     )
     result = await cohort_task.await_result()
     if isinstance(result, TableResult):
@@ -691,6 +695,7 @@ async def dedupe_async(
     equivalence_relation: str,
     strategy: Literal["identify", "select", "combine"] | None = None,
     strategy_prompt: str | None = None,
+    llm: LLM | None = None,
 ) -> EveryrowTask[BaseModel]:
     """Submit a dedupe task asynchronously."""
     input_data = _prepare_table_input(input, DedupeOperationInputType1Item)
@@ -701,6 +706,7 @@ async def dedupe_async(
         session_id=session.session_id,
         strategy=DedupeOperationStrategy(strategy) if strategy is not None else UNSET,
         strategy_prompt=strategy_prompt if strategy_prompt is not None else UNSET,
+        llm=LLMEnumPublic(llm.value) if llm is not None else UNSET,
     )
 
     response = await dedupe_operations_dedupe_post.asyncio(