Skip to content

Commit 33ee2ee

Browse files
RafaelPogithub-actions[bot]
authored andcommitted
feat: expose LLM parameter for dedupe operation (#5115)
## Summary - Exposes the existing internal `llm` field on the dedupe operation through the public API and Python SDK - Users can now choose which model is used for dedupe comparisons (defaults to system default if not specified) - Changes: `DedupeOperation` (API), `DedupePublicParams` → `DedupeFullParams` (pass-through), handler, SDK `dedupe()`/`dedupe_async()`, generated types ## Test plan - [ ] Verify dedupe works without `llm` (existing default behavior unchanged) - [ ] Verify dedupe with `llm` set uses the specified model - [ ] Verify invalid enum values are rejected with 422 🤖 Generated with [Claude Code](https://claude.com/claude-code) --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com> Sourced from commit 7b8d2a7d2c43c548b86f8f113ce355b0ac406f8a
1 parent f3efc30 commit 33ee2ee

File tree

2 files changed

+37
-0
lines changed

2 files changed

+37
-0
lines changed

src/futuresearch/generated/models/dedupe_operation.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from attrs import field as _attrs_field
99

1010
from ..models.dedupe_operation_strategy import DedupeOperationStrategy
11+
from ..models.llm_enum_public import LLMEnumPublic
1112
from ..types import UNSET, Unset
1213

1314
if TYPE_CHECKING:
@@ -31,6 +32,7 @@ class DedupeOperation:
3132
strategy (DedupeOperationStrategy | Unset): Strategy for handling duplicates: 'identify' (cluster only),
3233
'select' (pick best), 'combine' (synthesize combined row) Default: DedupeOperationStrategy.SELECT.
3334
strategy_prompt (None | str | Unset): Optional instructions guiding how selection or combining is performed
35+
llm (LLMEnumPublic | None | Unset): LLM to use for dedupe comparisons. If not provided, uses the system default.
3436
"""
3537

3638
input_: DedupeOperationInputType2 | list[DedupeOperationInputType1Item] | UUID
@@ -39,6 +41,7 @@ class DedupeOperation:
3941
webhook_url: None | str | Unset = UNSET
4042
strategy: DedupeOperationStrategy | Unset = DedupeOperationStrategy.SELECT
4143
strategy_prompt: None | str | Unset = UNSET
44+
llm: LLMEnumPublic | None | Unset = UNSET
4245
additional_properties: dict[str, Any] = _attrs_field(init=False, factory=dict)
4346

4447
def to_dict(self) -> dict[str, Any]:
@@ -80,6 +83,14 @@ def to_dict(self) -> dict[str, Any]:
8083
else:
8184
strategy_prompt = self.strategy_prompt
8285

86+
llm: None | str | Unset
87+
if isinstance(self.llm, Unset):
88+
llm = UNSET
89+
elif isinstance(self.llm, LLMEnumPublic):
90+
llm = self.llm.value
91+
else:
92+
llm = self.llm
93+
8394
field_dict: dict[str, Any] = {}
8495
field_dict.update(self.additional_properties)
8596
field_dict.update(
@@ -96,6 +107,8 @@ def to_dict(self) -> dict[str, Any]:
96107
field_dict["strategy"] = strategy
97108
if strategy_prompt is not UNSET:
98109
field_dict["strategy_prompt"] = strategy_prompt
110+
if llm is not UNSET:
111+
field_dict["llm"] = llm
99112

100113
return field_dict
101114

@@ -180,13 +193,31 @@ def _parse_strategy_prompt(data: object) -> None | str | Unset:
180193

181194
strategy_prompt = _parse_strategy_prompt(d.pop("strategy_prompt", UNSET))
182195

196+
def _parse_llm(data: object) -> LLMEnumPublic | None | Unset:
197+
if data is None:
198+
return data
199+
if isinstance(data, Unset):
200+
return data
201+
try:
202+
if not isinstance(data, str):
203+
raise TypeError()
204+
llm_type_0 = LLMEnumPublic(data)
205+
206+
return llm_type_0
207+
except (TypeError, ValueError, AttributeError, KeyError):
208+
pass
209+
return cast(LLMEnumPublic | None | Unset, data)
210+
211+
llm = _parse_llm(d.pop("llm", UNSET))
212+
183213
dedupe_operation = cls(
184214
input_=input_,
185215
equivalence_relation=equivalence_relation,
186216
session_id=session_id,
187217
webhook_url=webhook_url,
188218
strategy=strategy,
189219
strategy_prompt=strategy_prompt,
220+
llm=llm,
190221
)
191222

192223
dedupe_operation.additional_properties = d

src/futuresearch/ops.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -625,6 +625,7 @@ async def dedupe(
625625
input: DataFrame | UUID | TableResult | None = None,
626626
strategy: Literal["identify", "select", "combine"] | None = None,
627627
strategy_prompt: str | None = None,
628+
llm: LLM | None = None,
628629
) -> TableResult:
629630
"""Dedupe a table by removing duplicates using AI.
630631
@@ -653,6 +654,7 @@ async def dedupe(
653654
Examples: "Prefer the record with the most complete contact information",
654655
"For each field, keep the most recent and complete value",
655656
"Prefer records from the CRM system over spreadsheet imports".
657+
llm: LLM to use for dedupe comparisons. If not provided, uses the system default.
656658
657659
Returns:
658660
TableResult containing the deduped table with cluster metadata columns.
@@ -667,6 +669,7 @@ async def dedupe(
667669
equivalence_relation=equivalence_relation,
668670
strategy=strategy,
669671
strategy_prompt=strategy_prompt,
672+
llm=llm,
670673
)
671674
result = await cohort_task.await_result()
672675
if isinstance(result, TableResult):
@@ -678,6 +681,7 @@ async def dedupe(
678681
equivalence_relation=equivalence_relation,
679682
strategy=strategy,
680683
strategy_prompt=strategy_prompt,
684+
llm=llm,
681685
)
682686
result = await cohort_task.await_result()
683687
if isinstance(result, TableResult):
@@ -691,6 +695,7 @@ async def dedupe_async(
691695
equivalence_relation: str,
692696
strategy: Literal["identify", "select", "combine"] | None = None,
693697
strategy_prompt: str | None = None,
698+
llm: LLM | None = None,
694699
) -> EveryrowTask[BaseModel]:
695700
"""Submit a dedupe task asynchronously."""
696701
input_data = _prepare_table_input(input, DedupeOperationInputType1Item)
@@ -701,6 +706,7 @@ async def dedupe_async(
701706
session_id=session.session_id,
702707
strategy=DedupeOperationStrategy(strategy) if strategy is not None else UNSET,
703708
strategy_prompt=strategy_prompt if strategy_prompt is not None else UNSET,
709+
llm=LLMEnumPublic(llm.value) if llm is not None else UNSET,
704710
)
705711

706712
response = await dedupe_operations_dedupe_post.asyncio(

0 commit comments

Comments
 (0)