55import os
66import platform
77import pyautogui
8+ import signal
89import sys
910import time
1011
1516
1617current_platform = platform .system ().lower ()
1718
19+ # Global flag to track pause state for debugging
20+ paused = False
21+
22+ def get_char ():
23+ """Get a single character from stdin without pressing Enter"""
24+ try :
25+ # Import termios and tty on Unix-like systems
26+ if platform .system () in ["Darwin" , "Linux" ]:
27+ import termios
28+ import tty
29+ fd = sys .stdin .fileno ()
30+ old_settings = termios .tcgetattr (fd )
31+ try :
32+ tty .setraw (sys .stdin .fileno ())
33+ ch = sys .stdin .read (1 )
34+ finally :
35+ termios .tcsetattr (fd , termios .TCSADRAIN , old_settings )
36+ return ch
37+ else :
38+ # Windows fallback
39+ import msvcrt
40+ return msvcrt .getch ().decode ('utf-8' , errors = 'ignore' )
41+ except :
42+ return input () # Fallback for non-terminal environments
43+
44+ def signal_handler (signum , frame ):
45+ """Handle Ctrl+C signal for debugging during agent execution"""
46+ global paused
47+
48+ if not paused :
49+ print ("\n \n 🔸 Agent-S Workflow Paused 🔸" )
50+ print ("=" * 50 )
51+ print ("Options:" )
52+ print (" • Press Ctrl+C again to quit" )
53+ print (" • Press Esc to resume workflow" )
54+ print ("=" * 50 )
55+
56+ paused = True
57+
58+ while paused :
59+ try :
60+ print ("\n [PAUSED] Waiting for input... " , end = "" , flush = True )
61+ char = get_char ()
62+
63+ if ord (char ) == 3 : # Ctrl+C
64+ print ("\n \n 🛑 Exiting Agent-S..." )
65+ sys .exit (0 )
66+ elif ord (char ) == 27 : # Esc
67+ print ("\n \n ▶️ Resuming Agent-S workflow..." )
68+ paused = False
69+ break
70+ else :
71+ print (f"\n Unknown command: '{ char } ' (ord: { ord (char )} )" )
72+
73+ except KeyboardInterrupt :
74+ print ("\n \n 🛑 Exiting Agent-S..." )
75+ sys .exit (0 )
76+ else :
77+ # Already paused, second Ctrl+C means quit
78+ print ("\n \n 🛑 Exiting Agent-S..." )
79+ sys .exit (0 )
80+
81+ # Set up signal handler for Ctrl+C
82+ signal .signal (signal .SIGINT , signal_handler )
83+
1884logger = logging .getLogger ()
1985logger .setLevel (logging .DEBUG )
2086
@@ -81,10 +147,15 @@ def scale_screen_dimensions(width: int, height: int, max_dim_size: int):
81147
82148
83149def run_agent (agent , instruction : str , scaled_width : int , scaled_height : int ):
150+ global paused
84151 obs = {}
85152 traj = "Task:\n " + instruction
86153 subtask_traj = ""
87- for _ in range (15 ):
154+ for step in range (15 ):
155+ # Check if we're in paused state and wait
156+ while paused :
157+ time .sleep (0.1 )
158+
88159 # Get screen shot using pyautogui
89160 screenshot = pyautogui .screenshot ()
90161 screenshot = screenshot .resize ((scaled_width , scaled_height ), Image .LANCZOS )
@@ -98,6 +169,12 @@ def run_agent(agent, instruction: str, scaled_width: int, scaled_height: int):
98169 # Convert to base64 string.
99170 obs ["screenshot" ] = screenshot_bytes
100171
172+ # Check again for pause state before prediction
173+ while paused :
174+ time .sleep (0.1 )
175+
176+ print (f"\n 🔄 Step { step + 1 } /15: Getting next action from agent..." )
177+
101178 # Get next action code from the agent
102179 info , code = agent .predict (instruction = instruction , observation = obs )
103180
@@ -118,13 +195,18 @@ def run_agent(agent, instruction: str, scaled_width: int, scaled_height: int):
118195 continue
119196
120197 if "wait" in code [0 ].lower ():
198+ print ("⏳ Agent requested wait..." )
121199 time .sleep (5 )
122200 continue
123201
124202 else :
125203 time .sleep (1.0 )
126204 print ("EXECUTING CODE:" , code [0 ])
127205
206+ # Check for pause state before execution
207+ while paused :
208+ time .sleep (0.1 )
209+
128210 # Ask for permission before executing
129211 exec (code [0 ])
130212 time .sleep (1.0 )
0 commit comments