|
| 1 | +package openai |
| 2 | + |
| 3 | +import ( |
| 4 | + "context" |
| 5 | + "encoding/json" |
| 6 | + "fmt" |
| 7 | + |
| 8 | + "charm.land/fantasy" |
| 9 | + "github.com/charmbracelet/openai-go/responses" |
| 10 | +) |
| 11 | + |
| 12 | +const computerUseToolID = "openai.computer_use" |
| 13 | + |
| 14 | +// Type identifier for computer use metadata, registered in |
| 15 | +// responses_options.go init(). |
| 16 | +const TypeComputerUseMetadata = Name + ".responses.computer_use_metadata" |
| 17 | + |
| 18 | +// Type identifier for computer call output options, registered in |
| 19 | +// responses_options.go init(). |
| 20 | +const TypeComputerCallOutputOptions = Name + ".responses.computer_call_output_options" |
| 21 | + |
| 22 | +// ComputerUseMetadata stores the raw wire-format JSON of a computer_call |
| 23 | +// output item for faithful round-tripping via param.Override. |
| 24 | +type ComputerUseMetadata struct { |
| 25 | + RawJSON string `json:"raw_json"` |
| 26 | +} |
| 27 | + |
| 28 | +var _ fantasy.ProviderOptionsData = (*ComputerUseMetadata)(nil) |
| 29 | + |
| 30 | +// Options implements the ProviderOptionsData interface. |
| 31 | +func (*ComputerUseMetadata) Options() {} |
| 32 | + |
| 33 | +// MarshalJSON implements custom JSON marshaling with type info. |
| 34 | +func (m ComputerUseMetadata) MarshalJSON() ([]byte, error) { |
| 35 | + type plain ComputerUseMetadata |
| 36 | + return fantasy.MarshalProviderType(TypeComputerUseMetadata, plain(m)) |
| 37 | +} |
| 38 | + |
| 39 | +// UnmarshalJSON implements custom JSON unmarshaling with type info. |
| 40 | +func (m *ComputerUseMetadata) UnmarshalJSON(data []byte) error { |
| 41 | + type plain ComputerUseMetadata |
| 42 | + var p plain |
| 43 | + if err := fantasy.UnmarshalProviderType(data, &p); err != nil { |
| 44 | + return err |
| 45 | + } |
| 46 | + *m = ComputerUseMetadata(p) |
| 47 | + return nil |
| 48 | +} |
| 49 | + |
| 50 | +// ComputerCallOutputOptions tunes the wire payload fantasy emits for a |
| 51 | +// computer_call_output input item. Set it on a ToolResultPart's |
| 52 | +// ProviderOptions under the OpenAI provider key. Detail mirrors the |
| 53 | +// `output.detail` field documented in the OpenAI computer-use guide: |
| 54 | +// values are "auto", "low", "high", or "original". "original" is |
| 55 | +// recommended for full-resolution screenshots so the model sees pixel |
| 56 | +// coordinates that match the underlying display. |
| 57 | +type ComputerCallOutputOptions struct { |
| 58 | + Detail string `json:"detail,omitempty"` |
| 59 | +} |
| 60 | + |
| 61 | +var _ fantasy.ProviderOptionsData = (*ComputerCallOutputOptions)(nil) |
| 62 | + |
| 63 | +// Options implements the ProviderOptionsData interface. |
| 64 | +func (*ComputerCallOutputOptions) Options() {} |
| 65 | + |
| 66 | +// MarshalJSON implements custom JSON marshaling with type info. |
| 67 | +func (o ComputerCallOutputOptions) MarshalJSON() ([]byte, error) { |
| 68 | + type plain ComputerCallOutputOptions |
| 69 | + return fantasy.MarshalProviderType(TypeComputerCallOutputOptions, plain(o)) |
| 70 | +} |
| 71 | + |
| 72 | +// UnmarshalJSON implements custom JSON unmarshaling with type info. |
| 73 | +func (o *ComputerCallOutputOptions) UnmarshalJSON(data []byte) error { |
| 74 | + type plain ComputerCallOutputOptions |
| 75 | + var p plain |
| 76 | + if err := fantasy.UnmarshalProviderType(data, &p); err != nil { |
| 77 | + return err |
| 78 | + } |
| 79 | + *o = ComputerCallOutputOptions(p) |
| 80 | + return nil |
| 81 | +} |
| 82 | + |
| 83 | +// GetComputerCallOutputOptions extracts ComputerCallOutputOptions from |
| 84 | +// provider options, returning nil if not present or of a different |
| 85 | +// type. |
| 86 | +func GetComputerCallOutputOptions(opts fantasy.ProviderOptions) *ComputerCallOutputOptions { |
| 87 | + if v, ok := opts[Name]; ok { |
| 88 | + if o, ok := v.(*ComputerCallOutputOptions); ok { |
| 89 | + return o |
| 90 | + } |
| 91 | + } |
| 92 | + return nil |
| 93 | +} |
| 94 | + |
| 95 | +// NewComputerUseTool creates an executable provider-defined computer use |
| 96 | +// tool for OpenAI models. The run function receives a ToolCall whose |
| 97 | +// Input is a JSON object containing call_id and actions. Parse it with |
| 98 | +// ParseComputerUseInput. Return an image response |
| 99 | +// (fantasy.NewImageResponse) with a screenshot. |
| 100 | +func NewComputerUseTool( |
| 101 | + run func(ctx context.Context, call fantasy.ToolCall) (fantasy.ToolResponse, error), |
| 102 | +) fantasy.ExecutableProviderTool { |
| 103 | + return fantasy.NewExecutableProviderTool( |
| 104 | + fantasy.ProviderDefinedTool{ |
| 105 | + ID: computerUseToolID, |
| 106 | + Name: "computer", |
| 107 | + }, |
| 108 | + run, |
| 109 | + ) |
| 110 | +} |
| 111 | + |
| 112 | +// IsComputerUseTool reports whether tool is an OpenAI computer use tool. |
| 113 | +// It returns true for both ExecutableProviderTool and bare |
| 114 | +// ProviderDefinedTool instances with the computer use tool ID. |
| 115 | +func IsComputerUseTool(tool fantasy.Tool) bool { |
| 116 | + pdt, ok := asProviderDefinedTool(tool) |
| 117 | + if !ok { |
| 118 | + return false |
| 119 | + } |
| 120 | + return pdt.ID == computerUseToolID |
| 121 | +} |
| 122 | + |
| 123 | +// asProviderDefinedTool extracts the underlying ProviderDefinedTool from |
| 124 | +// either a ProviderDefinedTool or an ExecutableProviderTool. |
| 125 | +func asProviderDefinedTool(tool fantasy.Tool) (fantasy.ProviderDefinedTool, bool) { |
| 126 | + switch t := tool.(type) { |
| 127 | + case fantasy.ProviderDefinedTool: |
| 128 | + return t, true |
| 129 | + case fantasy.ExecutableProviderTool: |
| 130 | + return t.Definition(), true |
| 131 | + default: |
| 132 | + return fantasy.ProviderDefinedTool{}, false |
| 133 | + } |
| 134 | +} |
| 135 | + |
| 136 | +// GetComputerUseMetadata extracts ComputerUseMetadata from provider |
| 137 | +// options, returning nil if not present or of a different type. |
| 138 | +func GetComputerUseMetadata(opts fantasy.ProviderOptions) *ComputerUseMetadata { |
| 139 | + if v, ok := opts[Name]; ok { |
| 140 | + if m, ok := v.(*ComputerUseMetadata); ok { |
| 141 | + return m |
| 142 | + } |
| 143 | + } |
| 144 | + return nil |
| 145 | +} |
| 146 | + |
| 147 | +// computerCallInput builds a JSON string from a ResponseComputerToolCall |
| 148 | +// using per-action RawJSON() for faithful serialization. |
| 149 | +func computerCallInput(call responses.ResponseComputerToolCall) (string, error) { |
| 150 | + callIDJSON, err := json.Marshal(call.CallID) |
| 151 | + if err != nil { |
| 152 | + return "", fmt.Errorf("marshal call_id: %w", err) |
| 153 | + } |
| 154 | + obj := map[string]json.RawMessage{ |
| 155 | + "call_id": callIDJSON, |
| 156 | + } |
| 157 | + |
| 158 | + if len(call.Actions) > 0 { |
| 159 | + rawActions := make([]json.RawMessage, len(call.Actions)) |
| 160 | + for i, a := range call.Actions { |
| 161 | + rawActions[i] = json.RawMessage(a.RawJSON()) |
| 162 | + } |
| 163 | + actionsJSON, err := json.Marshal(rawActions) |
| 164 | + if err != nil { |
| 165 | + return "", fmt.Errorf("marshal actions: %w", err) |
| 166 | + } |
| 167 | + obj["actions"] = actionsJSON |
| 168 | + } else { |
| 169 | + return "", fmt.Errorf("computer_call has no actions") |
| 170 | + } |
| 171 | + |
| 172 | + data, err := json.Marshal(obj) |
| 173 | + if err != nil { |
| 174 | + return "", fmt.Errorf("marshal computer call input: %w", err) |
| 175 | + } |
| 176 | + return string(data), nil |
| 177 | +} |
| 178 | + |
| 179 | +// ComputerUseInput is the parsed representation of a computer_call |
| 180 | +// tool call input. Use ParseComputerUseInput to create one from the |
| 181 | +// raw JSON string passed to the Run function. |
| 182 | +type ComputerUseInput struct { |
| 183 | + CallID string `json:"call_id"` |
| 184 | + Actions []responses.ComputerActionUnion `json:"actions,omitempty"` |
| 185 | +} |
| 186 | + |
| 187 | +// ParseComputerUseInput parses the JSON input string from a computer |
| 188 | +// use tool call into typed SDK structures. Callers can type-switch on |
| 189 | +// individual actions via action.AsAny(). |
| 190 | +func ParseComputerUseInput(input string) (*ComputerUseInput, error) { |
| 191 | + if input == "" { |
| 192 | + return nil, fmt.Errorf("empty computer use input") |
| 193 | + } |
| 194 | + var parsed ComputerUseInput |
| 195 | + if err := json.Unmarshal([]byte(input), &parsed); err != nil { |
| 196 | + return nil, fmt.Errorf("parse computer use input: %w", err) |
| 197 | + } |
| 198 | + return &parsed, nil |
| 199 | +} |
0 commit comments