Skip to content

Commit 94d6a50

Browse files
committed
fix: harden codex openai request content conversion
1 parent 04f7b30 commit 94d6a50

2 files changed

Lines changed: 178 additions & 9 deletions

File tree

internal/converter/codex_to_openai.go

Lines changed: 85 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -105,10 +105,12 @@ func (c *codexToOpenAIRequest) Transform(body []byte, model string, stream bool)
105105
})
106106
case "function_call_output":
107107
callID, _ := m["call_id"].(string)
108-
outputStr, _ := m["output"].(string)
108+
if callID == "" {
109+
continue
110+
}
109111
openaiReq.Messages = append(openaiReq.Messages, OpenAIMessage{
110112
Role: "tool",
111-
Content: outputStr,
113+
Content: codexToolOutputToOpenAI(m["output"]),
112114
ToolCallID: callID,
113115
})
114116
}
@@ -139,6 +141,18 @@ func codexContentToOpenAI(content interface{}) interface{} {
139141
onlyText := true
140142
sawCodexPart := false
141143
for _, rawPart := range value {
144+
if text, ok := rawPart.(string); ok {
145+
if text == "" {
146+
continue
147+
}
148+
textParts = append(textParts, text)
149+
parts = append(parts, map[string]interface{}{
150+
"type": "text",
151+
"text": text,
152+
})
153+
continue
154+
}
155+
142156
part, ok := rawPart.(map[string]interface{})
143157
if !ok {
144158
onlyText = false
@@ -156,19 +170,31 @@ func codexContentToOpenAI(content interface{}) interface{} {
156170
"type": "text",
157171
"text": text,
158172
})
159-
case "input_image":
173+
case "input_image", "output_image", "image_url":
160174
sawCodexPart = true
161175
onlyText = false
162-
imageURL := codexImageURLToOpenAI(part["image_url"])
176+
imageURL := codexImageURLToOpenAI(part["image_url"], part["detail"])
177+
if imageURL == nil {
178+
imageURL = codexImageURLToOpenAI(part["image"], part["detail"])
179+
}
163180
if imageURL == nil {
164-
imageURL = codexImageURLToOpenAI(part["image"])
181+
imageURL = codexImageURLToOpenAI(part["url"], part["detail"])
165182
}
166183
if imageURL != nil {
167184
parts = append(parts, map[string]interface{}{
168185
"type": "image_url",
169186
"image_url": imageURL,
170187
})
171188
}
189+
case "input_file", "file":
190+
sawCodexPart = true
191+
onlyText = false
192+
if file := codexFileToOpenAI(part); file != nil {
193+
parts = append(parts, map[string]interface{}{
194+
"type": "file",
195+
"file": file,
196+
})
197+
}
172198
default:
173199
onlyText = false
174200
}
@@ -186,19 +212,69 @@ func codexContentToOpenAI(content interface{}) interface{} {
186212
return content
187213
}
188214

189-
func codexImageURLToOpenAI(raw interface{}) map[string]interface{} {
215+
func codexToolOutputToOpenAI(output interface{}) interface{} {
216+
if output == nil {
217+
return ""
218+
}
219+
if text, ok := output.(string); ok {
220+
return text
221+
}
222+
if _, ok := output.([]interface{}); ok {
223+
switch normalized := codexContentToOpenAI(output).(type) {
224+
case string:
225+
return normalized
226+
case []map[string]interface{}:
227+
return normalized
228+
}
229+
}
230+
encoded, err := json.Marshal(output)
231+
if err != nil {
232+
return ""
233+
}
234+
return string(encoded)
235+
}
236+
237+
func codexImageURLToOpenAI(raw interface{}, detailRaw interface{}) map[string]interface{} {
238+
var imageURL map[string]interface{}
190239
switch image := raw.(type) {
191240
case string:
192241
if image == "" {
193242
return nil
194243
}
195-
return map[string]interface{}{"url": image}
244+
imageURL = map[string]interface{}{"url": image}
196245
case map[string]interface{}:
197246
if _, ok := image["url"].(string); ok {
198-
return image
247+
imageURL = image
199248
}
200249
}
201-
return nil
250+
if imageURL == nil {
251+
return nil
252+
}
253+
if detail, ok := detailRaw.(string); ok && detail != "" {
254+
if _, exists := imageURL["detail"]; !exists {
255+
imageURL["detail"] = detail
256+
}
257+
}
258+
return imageURL
259+
}
260+
261+
func codexFileToOpenAI(part map[string]interface{}) map[string]interface{} {
262+
file := map[string]interface{}{}
263+
if fileID, ok := part["file_id"].(string); ok && fileID != "" {
264+
file["file_id"] = fileID
265+
} else if fileURL, ok := part["file_url"].(string); ok && fileURL != "" {
266+
file["file_id"] = fileURL
267+
}
268+
if fileData, ok := part["file_data"].(string); ok && fileData != "" {
269+
file["file_data"] = fileData
270+
}
271+
if filename, ok := part["filename"].(string); ok && filename != "" {
272+
file["filename"] = filename
273+
}
274+
if len(file) == 0 {
275+
return nil
276+
}
277+
return file
202278
}
203279

204280
func (c *codexToOpenAIResponse) Transform(body []byte) ([]byte, error) {

internal/converter/coverage_openai_request_test.go

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1004,3 +1004,96 @@ func TestCodexToOpenAIRequestDoesNotLeakMalformedResponsesImagePart(t *testing.T
10041004
t.Fatalf("expected malformed known Codex part to collapse to empty content: %s", string(out))
10051005
}
10061006
}
1007+
1008+
func TestCodexToOpenAIRequestNormalizesToolOutputParts(t *testing.T) {
1009+
req := CodexRequest{Input: []interface{}{
1010+
map[string]interface{}{
1011+
"type": "function_call_output",
1012+
"call_id": "call_1",
1013+
"output": []interface{}{
1014+
map[string]interface{}{"type": "input_text", "text": "a"},
1015+
map[string]interface{}{"type": "output_text", "text": "b"},
1016+
},
1017+
},
1018+
}}
1019+
body, _ := json.Marshal(req)
1020+
conv := &codexToOpenAIRequest{}
1021+
out, err := conv.Transform(body, "gpt", false)
1022+
if err != nil {
1023+
t.Fatalf("Transform: %v", err)
1024+
}
1025+
if strings.Contains(string(out), "input_text") || strings.Contains(string(out), "output_text") {
1026+
t.Fatalf("Responses tool output part type leaked into OpenAI request: %s", string(out))
1027+
}
1028+
if !strings.Contains(string(out), `"content":"ab"`) {
1029+
t.Fatalf("expected text-only tool output parts to collapse to string: %s", string(out))
1030+
}
1031+
}
1032+
1033+
func TestCodexToOpenAIRequestSkipsToolOutputWithoutCallID(t *testing.T) {
1034+
req := CodexRequest{Input: []interface{}{
1035+
map[string]interface{}{"type": "function_call_output", "output": "orphan"},
1036+
}}
1037+
body, _ := json.Marshal(req)
1038+
conv := &codexToOpenAIRequest{}
1039+
out, err := conv.Transform(body, "gpt", false)
1040+
if err != nil {
1041+
t.Fatalf("Transform: %v", err)
1042+
}
1043+
if strings.Contains(string(out), `"role":"tool"`) || strings.Contains(string(out), "orphan") {
1044+
t.Fatalf("expected orphan tool output to be skipped: %s", string(out))
1045+
}
1046+
}
1047+
1048+
func TestCodexToOpenAIRequestJSONEncodesUnknownStructuredToolOutput(t *testing.T) {
1049+
req := CodexRequest{Input: []interface{}{
1050+
map[string]interface{}{
1051+
"type": "function_call_output",
1052+
"call_id": "call_1",
1053+
"output": []interface{}{
1054+
map[string]interface{}{"unknown": "value"},
1055+
},
1056+
},
1057+
}}
1058+
body, _ := json.Marshal(req)
1059+
conv := &codexToOpenAIRequest{}
1060+
out, err := conv.Transform(body, "gpt", false)
1061+
if err != nil {
1062+
t.Fatalf("Transform: %v", err)
1063+
}
1064+
if !strings.Contains(string(out), `"content":"[{\"unknown\":\"value\"}]"`) {
1065+
t.Fatalf("expected unknown structured tool output to be JSON encoded: %s", string(out))
1066+
}
1067+
}
1068+
1069+
func TestCodexToOpenAIRequestNormalizesFilesImagesAndBareTextParts(t *testing.T) {
1070+
req := CodexRequest{Input: []interface{}{
1071+
map[string]interface{}{
1072+
"type": "message",
1073+
"role": "user",
1074+
"content": []interface{}{
1075+
"prefix",
1076+
map[string]interface{}{"type": "input_text", "text": " body"},
1077+
map[string]interface{}{"type": "input_image", "image_url": "https://example.com/a.png", "detail": "low"},
1078+
map[string]interface{}{"type": "input_file", "file_url": "https://example.com/a.pdf", "file_data": "data"},
1079+
},
1080+
},
1081+
}}
1082+
body, _ := json.Marshal(req)
1083+
conv := &codexToOpenAIRequest{}
1084+
out, err := conv.Transform(body, "gpt", false)
1085+
if err != nil {
1086+
t.Fatalf("Transform: %v", err)
1087+
}
1088+
outStr := string(out)
1089+
for _, leaked := range []string{"input_text", "input_image", "input_file"} {
1090+
if strings.Contains(outStr, leaked) {
1091+
t.Fatalf("Responses content part type %q leaked into OpenAI request: %s", leaked, outStr)
1092+
}
1093+
}
1094+
for _, expected := range []string{`"text":"prefix"`, `"text":" body"`, `"type":"image_url"`, `"detail":"low"`, `"type":"file"`, `"file_id":"https://example.com/a.pdf"`, `"file_data":"data"`} {
1095+
if !strings.Contains(outStr, expected) {
1096+
t.Fatalf("expected %s in converted request: %s", expected, outStr)
1097+
}
1098+
}
1099+
}

0 commit comments

Comments
 (0)