1
+ import base64
1
2
import json
3
+ import mimetypes
2
4
from pathlib import Path
3
5
import sys
4
6
import traceback
8
10
from openai .types .chat import (
9
11
ChatCompletionMessageParam ,
10
12
ChatCompletionAssistantMessageParam ,
13
+ ChatCompletionUserMessageParam ,
14
+ ChatCompletionContentPartParam ,
11
15
ChatCompletionMessage ,
12
16
ParsedChatCompletionMessage ,
13
17
)
14
18
import rich
19
+ import petname
15
20
from typer import Typer
16
21
import uuid
17
22
18
23
from .common import Models , discard_input
19
24
from .common import CostData , History
20
25
from .openai_utils import get_input_cost , get_output_cost
21
- from .tools import ExecuteBash
26
+ from .tools import ExecuteBash , ReadImage , ImageData
22
27
23
28
from .tools import (
24
29
BASH_CLF_OUTPUT ,
@@ -80,6 +85,38 @@ def save_history(history: History, session_id: str) -> None:
80
85
json .dump (history , f , indent = 3 )
81
86
82
87
88
+ def parse_user_message_special (msg : str ) -> ChatCompletionUserMessageParam :
89
+ # Search for lines starting with `%` and treat them as special commands
90
+ parts : list [ChatCompletionContentPartParam ] = []
91
+ for line in msg .split ("\n " ):
92
+ if line .startswith ("%" ):
93
+ args = line [1 :].strip ().split (" " )
94
+ command = args [0 ]
95
+ assert command == 'image'
96
+ image_path = args [1 ]
97
+ with open (image_path , 'rb' ) as f :
98
+ image_bytes = f .read ()
99
+ image_b64 = base64 .b64encode (image_bytes ).decode ("utf-8" )
100
+ image_type = mimetypes .guess_type (image_path )[0 ]
101
+ dataurl = f'data:{ image_type } ;base64,{ image_b64 } '
102
+ parts .append ({
103
+ 'type' : 'image_url' ,
104
+ 'image_url' : {
105
+ 'url' : dataurl ,
106
+ 'detail' : 'auto'
107
+ }
108
+ })
109
+ else :
110
+ if len (parts ) > 0 and parts [- 1 ]['type' ] == 'text' :
111
+ parts [- 1 ]['text' ] += '\n ' + line
112
+ else :
113
+ parts .append ({'type' : 'text' , 'text' : line })
114
+ return {
115
+ 'role' : 'user' ,
116
+ 'content' : parts
117
+ }
118
+
119
+
83
120
app = Typer (pretty_exceptions_show_locals = False )
84
121
85
122
@@ -94,6 +131,7 @@ def loop(
94
131
session_id = str (uuid .uuid4 ())[:6 ]
95
132
96
133
history : History = []
134
+ waiting_for_assistant = False
97
135
if resume :
98
136
if resume == "latest" :
99
137
resume_path = sorted (Path (".wcgw" ).iterdir (), key = os .path .getmtime )[- 1 ]
@@ -108,6 +146,7 @@ def loop(
108
146
if history [1 ]["role" ] != "user" :
109
147
raise ValueError ("Invalid history file, second message should be user" )
110
148
first_message = ""
149
+ waiting_for_assistant = history [- 1 ]['role' ] != 'assistant'
111
150
112
151
my_dir = os .path .dirname (__file__ )
113
152
config_file = os .path .join (my_dir , ".." , ".." , "config.toml" )
@@ -164,12 +203,11 @@ def loop(
164
203
- Machine: { uname_machine }
165
204
"""
166
205
167
- has_tool_output = False
168
206
if not history :
169
207
history = [{"role" : "system" , "content" : system }]
170
208
else :
171
209
if history [- 1 ]["role" ] == "tool" :
172
- has_tool_output = True
210
+ waiting_for_assistant = True
173
211
174
212
client = OpenAI ()
175
213
@@ -188,16 +226,16 @@ def loop(
188
226
)
189
227
break
190
228
191
- if not has_tool_output :
229
+ if not waiting_for_assistant :
192
230
if first_message :
193
231
msg = first_message
194
232
first_message = ""
195
233
else :
196
234
msg = text_from_editor (user_console )
197
235
198
- history .append ({ "role" : "user" , "content" : msg } )
236
+ history .append (parse_user_message_special ( msg ) )
199
237
else :
200
- has_tool_output = False
238
+ waiting_for_assistant = False
201
239
202
240
cost_ , input_toks_ = get_input_cost (
203
241
config .cost_file [config .model ], enc , history
@@ -222,6 +260,7 @@ def loop(
222
260
_histories : History = []
223
261
item : ChatCompletionMessageParam
224
262
full_response : str = ""
263
+ image_histories : History = []
225
264
try :
226
265
for chunk in stream :
227
266
if chunk .choices [0 ].finish_reason == "tool_calls" :
@@ -235,7 +274,7 @@ def loop(
235
274
"type" : "function" ,
236
275
"function" : {
237
276
"arguments" : tool_args ,
238
- "name" : "execute_bash" ,
277
+ "name" : type ( which_tool ( tool_args )). __name__ ,
239
278
},
240
279
}
241
280
for tool_call_id , toolcallargs in tool_call_args_by_id .items ()
@@ -251,7 +290,7 @@ def loop(
251
290
)
252
291
system_console .print (f"\n Total cost: { config .cost_unit } { cost :.3f} " )
253
292
output_toks += output_toks_
254
-
293
+
255
294
_histories .append (item )
256
295
for tool_call_id , toolcallargs in tool_call_args_by_id .items ():
257
296
for toolindex , tool_args in toolcallargs .items ():
@@ -283,21 +322,58 @@ def loop(
283
322
f"\n Total cost: { config .cost_unit } { cost :.3f} "
284
323
)
285
324
return output_or_done .task_output , cost
325
+
286
326
output = output_or_done
287
327
288
- item = {
289
- "role" : "tool" ,
290
- "content" : str (output ),
291
- "tool_call_id" : tool_call_id + str (toolindex ),
292
- }
328
+ if isinstance (output , ImageData ):
329
+ randomId = petname .Generate (2 , "-" )
330
+ if not image_histories :
331
+ image_histories .extend ([
332
+ {
333
+ 'role' : 'assistant' ,
334
+ 'content' : f'Share images with ids: { randomId } '
335
+
336
+ },
337
+ {
338
+ 'role' : 'user' ,
339
+ 'content' : [{
340
+ 'type' : 'image_url' ,
341
+ 'image_url' : {
342
+ 'url' : output .dataurl ,
343
+ 'detail' : 'auto'
344
+ }
345
+ }]
346
+ }]
347
+ )
348
+ else :
349
+ image_histories [0 ]['content' ] += ', ' + randomId
350
+ image_histories [1 ]["content" ].append ({ # type: ignore
351
+ 'type' : 'image_url' ,
352
+ 'image_url' : {
353
+ 'url' : output .dataurl ,
354
+ 'detail' : 'auto'
355
+ }
356
+ })
357
+
358
+ item = {
359
+ "role" : "tool" ,
360
+ "content" : f'Ask user for image id: { randomId } ' ,
361
+ "tool_call_id" : tool_call_id + str (toolindex ),
362
+ }
363
+ else :
364
+ item = {
365
+ "role" : "tool" ,
366
+ "content" : str (output ),
367
+ "tool_call_id" : tool_call_id + str (toolindex ),
368
+ }
293
369
cost_ , output_toks_ = get_output_cost (
294
370
config .cost_file [config .model ], enc , item
295
371
)
296
372
cost += cost_
297
373
output_toks += output_toks_
298
374
299
375
_histories .append (item )
300
- has_tool_output = True
376
+ waiting_for_assistant = True
301
377
break
302
378
elif chunk .choices [0 ].finish_reason :
303
379
assistant_console .print ("" )
@@ -326,11 +402,11 @@ def loop(
326
402
assistant_console .print (chunk_str , end = "" )
327
403
full_response += chunk_str
328
404
except KeyboardInterrupt :
329
- has_tool_output = False
405
+ waiting_for_assistant = False
330
406
input ("Interrupted...enter to redo the current turn" )
331
407
else :
332
408
history .extend (_histories )
333
-
409
+ history . extend ( image_histories )
334
410
save_history (history , session_id )
335
411
336
412
return "Couldn't finish the task" , cost
0 commit comments