Skip to content

Commit d0e6ce2

Browse files
authored
feat: add openai computer use support (#32)
1 parent 951a49c commit d0e6ce2

10 files changed

Lines changed: 2190 additions & 15 deletions

providers/openai/computer_use.go

Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
package openai
2+
3+
import (
4+
"context"
5+
"encoding/json"
6+
"fmt"
7+
8+
"charm.land/fantasy"
9+
"github.com/charmbracelet/openai-go/responses"
10+
)
11+
12+
const computerUseToolID = "openai.computer_use"
13+
14+
// Type identifier for computer use metadata, registered in
15+
// responses_options.go init().
16+
const TypeComputerUseMetadata = Name + ".responses.computer_use_metadata"
17+
18+
// Type identifier for computer call output options, registered in
19+
// responses_options.go init().
20+
const TypeComputerCallOutputOptions = Name + ".responses.computer_call_output_options"
21+
22+
// ComputerUseMetadata stores the raw wire-format JSON of a computer_call
23+
// output item for faithful round-tripping via param.Override.
24+
type ComputerUseMetadata struct {
25+
RawJSON string `json:"raw_json"`
26+
}
27+
28+
var _ fantasy.ProviderOptionsData = (*ComputerUseMetadata)(nil)
29+
30+
// Options implements the ProviderOptionsData interface.
31+
func (*ComputerUseMetadata) Options() {}
32+
33+
// MarshalJSON implements custom JSON marshaling with type info.
34+
func (m ComputerUseMetadata) MarshalJSON() ([]byte, error) {
35+
type plain ComputerUseMetadata
36+
return fantasy.MarshalProviderType(TypeComputerUseMetadata, plain(m))
37+
}
38+
39+
// UnmarshalJSON implements custom JSON unmarshaling with type info.
40+
func (m *ComputerUseMetadata) UnmarshalJSON(data []byte) error {
41+
type plain ComputerUseMetadata
42+
var p plain
43+
if err := fantasy.UnmarshalProviderType(data, &p); err != nil {
44+
return err
45+
}
46+
*m = ComputerUseMetadata(p)
47+
return nil
48+
}
49+
50+
// ComputerCallOutputOptions tunes the wire payload fantasy emits for a
51+
// computer_call_output input item. Set it on a ToolResultPart's
52+
// ProviderOptions under the OpenAI provider key. Detail mirrors the
53+
// `output.detail` field documented in the OpenAI computer-use guide:
54+
// values are "auto", "low", "high", or "original". "original" is
55+
// recommended for full-resolution screenshots so the model sees pixel
56+
// coordinates that match the underlying display.
57+
type ComputerCallOutputOptions struct {
58+
Detail string `json:"detail,omitempty"`
59+
}
60+
61+
var _ fantasy.ProviderOptionsData = (*ComputerCallOutputOptions)(nil)
62+
63+
// Options implements the ProviderOptionsData interface.
64+
func (*ComputerCallOutputOptions) Options() {}
65+
66+
// MarshalJSON implements custom JSON marshaling with type info.
67+
func (o ComputerCallOutputOptions) MarshalJSON() ([]byte, error) {
68+
type plain ComputerCallOutputOptions
69+
return fantasy.MarshalProviderType(TypeComputerCallOutputOptions, plain(o))
70+
}
71+
72+
// UnmarshalJSON implements custom JSON unmarshaling with type info.
73+
func (o *ComputerCallOutputOptions) UnmarshalJSON(data []byte) error {
74+
type plain ComputerCallOutputOptions
75+
var p plain
76+
if err := fantasy.UnmarshalProviderType(data, &p); err != nil {
77+
return err
78+
}
79+
*o = ComputerCallOutputOptions(p)
80+
return nil
81+
}
82+
83+
// GetComputerCallOutputOptions extracts ComputerCallOutputOptions from
84+
// provider options, returning nil if not present or of a different
85+
// type.
86+
func GetComputerCallOutputOptions(opts fantasy.ProviderOptions) *ComputerCallOutputOptions {
87+
if v, ok := opts[Name]; ok {
88+
if o, ok := v.(*ComputerCallOutputOptions); ok {
89+
return o
90+
}
91+
}
92+
return nil
93+
}
94+
95+
// NewComputerUseTool creates an executable provider-defined computer use
96+
// tool for OpenAI models. The run function receives a ToolCall whose
97+
// Input is a JSON object containing call_id and actions. Parse it with
98+
// ParseComputerUseInput. Return an image response
99+
// (fantasy.NewImageResponse) with a screenshot.
100+
func NewComputerUseTool(
101+
run func(ctx context.Context, call fantasy.ToolCall) (fantasy.ToolResponse, error),
102+
) fantasy.ExecutableProviderTool {
103+
return fantasy.NewExecutableProviderTool(
104+
fantasy.ProviderDefinedTool{
105+
ID: computerUseToolID,
106+
Name: "computer",
107+
},
108+
run,
109+
)
110+
}
111+
112+
// IsComputerUseTool reports whether tool is an OpenAI computer use tool.
113+
// It returns true for both ExecutableProviderTool and bare
114+
// ProviderDefinedTool instances with the computer use tool ID.
115+
func IsComputerUseTool(tool fantasy.Tool) bool {
116+
pdt, ok := asProviderDefinedTool(tool)
117+
if !ok {
118+
return false
119+
}
120+
return pdt.ID == computerUseToolID
121+
}
122+
123+
// asProviderDefinedTool extracts the underlying ProviderDefinedTool from
124+
// either a ProviderDefinedTool or an ExecutableProviderTool.
125+
func asProviderDefinedTool(tool fantasy.Tool) (fantasy.ProviderDefinedTool, bool) {
126+
switch t := tool.(type) {
127+
case fantasy.ProviderDefinedTool:
128+
return t, true
129+
case fantasy.ExecutableProviderTool:
130+
return t.Definition(), true
131+
default:
132+
return fantasy.ProviderDefinedTool{}, false
133+
}
134+
}
135+
136+
// GetComputerUseMetadata extracts ComputerUseMetadata from provider
137+
// options, returning nil if not present or of a different type.
138+
func GetComputerUseMetadata(opts fantasy.ProviderOptions) *ComputerUseMetadata {
139+
if v, ok := opts[Name]; ok {
140+
if m, ok := v.(*ComputerUseMetadata); ok {
141+
return m
142+
}
143+
}
144+
return nil
145+
}
146+
147+
// computerCallInput builds a JSON string from a ResponseComputerToolCall
148+
// using per-action RawJSON() for faithful serialization.
149+
func computerCallInput(call responses.ResponseComputerToolCall) (string, error) {
150+
callIDJSON, err := json.Marshal(call.CallID)
151+
if err != nil {
152+
return "", fmt.Errorf("marshal call_id: %w", err)
153+
}
154+
obj := map[string]json.RawMessage{
155+
"call_id": callIDJSON,
156+
}
157+
158+
if len(call.Actions) > 0 {
159+
rawActions := make([]json.RawMessage, len(call.Actions))
160+
for i, a := range call.Actions {
161+
rawActions[i] = json.RawMessage(a.RawJSON())
162+
}
163+
actionsJSON, err := json.Marshal(rawActions)
164+
if err != nil {
165+
return "", fmt.Errorf("marshal actions: %w", err)
166+
}
167+
obj["actions"] = actionsJSON
168+
} else {
169+
return "", fmt.Errorf("computer_call has no actions")
170+
}
171+
172+
data, err := json.Marshal(obj)
173+
if err != nil {
174+
return "", fmt.Errorf("marshal computer call input: %w", err)
175+
}
176+
return string(data), nil
177+
}
178+
179+
// ComputerUseInput is the parsed representation of a computer_call
180+
// tool call input. Use ParseComputerUseInput to create one from the
181+
// raw JSON string passed to the Run function.
182+
type ComputerUseInput struct {
183+
CallID string `json:"call_id"`
184+
Actions []responses.ComputerActionUnion `json:"actions,omitempty"`
185+
}
186+
187+
// ParseComputerUseInput parses the JSON input string from a computer
188+
// use tool call into typed SDK structures. Callers can type-switch on
189+
// individual actions via action.AsAny().
190+
func ParseComputerUseInput(input string) (*ComputerUseInput, error) {
191+
if input == "" {
192+
return nil, fmt.Errorf("empty computer use input")
193+
}
194+
var parsed ComputerUseInput
195+
if err := json.Unmarshal([]byte(input), &parsed); err != nil {
196+
return nil, fmt.Errorf("parse computer use input: %w", err)
197+
}
198+
return &parsed, nil
199+
}

0 commit comments

Comments
 (0)