@@ -10,6 +10,9 @@ use async_openai::{
1010use dotenvy:: dotenv;
1111use futures:: StreamExt ;
1212use speakstream:: ss:: SpeakStream ;
13+ use colored:: Colorize ;
14+ use clap:: Parser ;
15+ use clipboard:: { ClipboardContext , ClipboardProvider } ;
1316use std:: error:: Error ;
1417use std:: path:: PathBuf ;
1518use std:: sync:: { Arc , Mutex } ;
@@ -24,8 +27,24 @@ use flume::{Receiver, Sender};
2427use uuid:: Uuid ;
2528use std:: thread;
2629
30+ #[ derive( Parser , Debug ) ]
31+ struct Opt {
32+ /// How fast the AI speaks. 1.0 is normal speed.
33+ #[ arg( long, default_value_t = 1.0 ) ]
34+ speech_speed : f32 ,
35+
36+ /// Enable ticking sound while speaking.
37+ #[ arg( long, default_value_t = false ) ]
38+ tick : bool ,
39+
40+ /// Enable audio ducking while the push-to-talk key is held.
41+ #[ arg( long, default_value_t = false ) ]
42+ duck_ptt : bool ,
43+ }
44+
2745#[ tokio:: main]
2846async fn main ( ) -> Result < ( ) , Box < dyn Error > > {
47+ let opt = Opt :: parse ( ) ;
2948 let _ = dotenv ( ) ;
3049
3150 tracing_subscriber:: registry ( )
@@ -80,6 +99,17 @@ async fn main() -> Result<(), Box<dyn Error>> {
8099 strict: None ,
81100 }
82101 . into( ) ,
102+ FunctionObject {
103+ name: "set_clipboard" . into( ) ,
104+ description: Some ( "Sets the clipboard to the given text." . into( ) ) ,
105+ parameters: Some ( serde_json:: json!( {
106+ "type" : "object" ,
107+ "properties" : { "clipboard_text" : { "type" : "string" } } ,
108+ "required" : [ "clipboard_text" ]
109+ } ) ) ,
110+ strict: None ,
111+ }
112+ . into( ) ,
83113 ] )
84114 . build ( ) ?;
85115
@@ -90,15 +120,21 @@ async fn main() -> Result<(), Box<dyn Error>> {
90120 . create ( CreateThreadRequest :: default ( ) )
91121 . await ?;
92122
93- let speak_stream = Arc :: new ( Mutex :: new ( SpeakStream :: new ( Voice :: Echo , 1.0 , true , true ) ) ) ;
123+ let speak_stream = Arc :: new ( Mutex :: new ( SpeakStream :: new (
124+ Voice :: Echo ,
125+ opt. speech_speed ,
126+ opt. tick ,
127+ opt. duck_ptt ,
128+ ) ) ) ;
94129
95130 let ( audio_tx, audio_rx) = flume:: unbounded ( ) ;
96- start_ptt_thread ( audio_tx. clone ( ) ) ;
131+ start_ptt_thread ( audio_tx. clone ( ) , speak_stream . clone ( ) , opt . duck_ptt ) ;
97132
98133 loop {
99134 let audio_path = audio_rx. recv ( ) . unwrap ( ) ;
100135 let transcription = transcribe:: transcribe ( & client, & audio_path) . await ?;
101- println ! ( "You: {}" , transcription) ;
136+ println ! ( "{}" , "You: " . truecolor( 0 , 255 , 0 ) ) ;
137+ println ! ( "{}" , transcription) ;
102138
103139 client
104140 . threads ( )
@@ -123,6 +159,7 @@ async fn main() -> Result<(), Box<dyn Error>> {
123159 let speak_stream_cloned = speak_stream. clone ( ) ;
124160 let client_cloned = client. clone ( ) ;
125161 let mut task_handle = None ;
162+ let mut displayed_ai_label = false ;
126163
127164 while let Some ( event) = event_stream. next ( ) . await {
128165 match event {
@@ -140,6 +177,10 @@ async fn main() -> Result<(), Box<dyn Error>> {
140177 if let MessageDeltaContent :: Text ( text) = content {
141178 if let Some ( text) = text. text {
142179 if let Some ( text) = text. value {
180+ if !displayed_ai_label {
181+ print ! ( "{}" , "AI: " . truecolor( 0 , 0 , 255 ) ) ;
182+ displayed_ai_label = true ;
183+ }
143184 print ! ( "{}" , text) ;
144185 speak_stream_cloned. lock ( ) . unwrap ( ) . add_token ( & text) ;
145186 }
@@ -163,7 +204,7 @@ async fn main() -> Result<(), Box<dyn Error>> {
163204 }
164205}
165206
166- fn start_ptt_thread ( audio_tx : Sender < PathBuf > ) {
207+ fn start_ptt_thread ( audio_tx : Sender < PathBuf > , speak_stream : Arc < Mutex < SpeakStream > > , duck_ptt : bool ) {
167208 thread:: spawn ( move || {
168209 let mut recorder = rec:: Recorder :: new ( ) ;
169210 let tmp_dir = tempdir ( ) . unwrap ( ) ;
@@ -175,6 +216,9 @@ fn start_ptt_thread(audio_tx: Sender<PathBuf>) {
175216 match event. event_type {
176217 EventType :: KeyPress ( key) if key == ptt_key && !key_pressed => {
177218 key_pressed = true ;
219+ if duck_ptt {
220+ speak_stream. lock ( ) . unwrap ( ) . start_audio_ducking ( ) ;
221+ }
178222 let path = tmp_dir. path ( ) . join ( format ! ( "{}.wav" , Uuid :: new_v4( ) ) ) ;
179223 if recorder. start_recording ( & path, None ) . is_ok ( ) {
180224 current_path = Some ( path) ;
@@ -187,6 +231,9 @@ fn start_ptt_thread(audio_tx: Sender<PathBuf>) {
187231 audio_tx. send ( p) . unwrap ( ) ;
188232 }
189233 }
234+ if duck_ptt {
235+ speak_stream. lock ( ) . unwrap ( ) . stop_audio_ducking ( ) ;
236+ }
190237 }
191238 _ => { }
192239 }
@@ -220,6 +267,23 @@ async fn handle_requires_action(
220267 output : Some ( "0.06" . into ( ) ) ,
221268 } ) ;
222269 }
270+
271+ if tool. function . name == "set_clipboard" {
272+ let mut clipboard: ClipboardContext = ClipboardProvider :: new ( ) . unwrap ( ) ;
273+ let text = match serde_json:: from_str :: < serde_json:: Value > ( & tool. function . arguments ) {
274+ Ok ( v) => v[ "clipboard_text" ] . as_str ( ) . unwrap_or ( "" ) . to_string ( ) ,
275+ Err ( _) => String :: new ( ) ,
276+ } ;
277+ let result = clipboard. set_contents ( text. clone ( ) ) ;
278+ let msg = match result {
279+ Ok ( _) => "Clipboard set" . to_string ( ) ,
280+ Err ( e) => format ! ( "Failed to set clipboard: {}" , e) ,
281+ } ;
282+ tool_outputs. push ( ToolsOutputs {
283+ tool_call_id : Some ( tool. id . clone ( ) ) ,
284+ output : Some ( msg. into ( ) ) ,
285+ } ) ;
286+ }
223287 }
224288
225289 if let Err ( e) = submit_tool_outputs ( client, run_object, tool_outputs, speak_stream) . await {
0 commit comments