Skip to content

Commit 5d02045

Browse files
authored
Merge pull request #92 from sloganking/codex/enable-speech-speed-flag-and-audio-ducking-in-v2
Add clipboard feature and CLI speech options
2 parents 1f101a1 + 987ff51 commit 5d02045

File tree

3 files changed

+72
-5
lines changed

3 files changed

+72
-5
lines changed

assistant_v2/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,6 @@ hound = "3.5.1"
1919
uuid = { version = "1.6.1", features = ["v4"] }
2020
tempfile = "3.10.1"
2121
tracing = "0.1"
22+
clap = { version = "4.4.6", features = ["derive"] }
23+
colored = "2.0.4"
24+
clipboard = "0.5.0"

assistant_v2/FEATURE_PROGRESS.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ This document tracks which features from the original assistant have been implem
1212
| Get system info | Pending |
1313
| List and kill processes | Pending |
1414
| Run internet speed tests | Pending |
15-
| Set the clipboard contents | Pending |
15+
| Set the clipboard contents | Done |
1616
| Timers with alarm sounds | Pending |
1717
| Change voice or speaking speed | Pending |
1818
| Mute/unmute voice output | Pending |

assistant_v2/src/main.rs

Lines changed: 68 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ use async_openai::{
1010
use dotenvy::dotenv;
1111
use futures::StreamExt;
1212
use speakstream::ss::SpeakStream;
13+
use colored::Colorize;
14+
use clap::Parser;
15+
use clipboard::{ClipboardContext, ClipboardProvider};
1316
use std::error::Error;
1417
use std::path::PathBuf;
1518
use std::sync::{Arc, Mutex};
@@ -24,8 +27,24 @@ use flume::{Receiver, Sender};
2427
use uuid::Uuid;
2528
use std::thread;
2629

30+
#[derive(Parser, Debug)]
31+
struct Opt {
32+
/// How fast the AI speaks. 1.0 is normal speed.
33+
#[arg(long, default_value_t = 1.0)]
34+
speech_speed: f32,
35+
36+
/// Enable ticking sound while speaking.
37+
#[arg(long, default_value_t = false)]
38+
tick: bool,
39+
40+
/// Enable audio ducking while the push-to-talk key is held.
41+
#[arg(long, default_value_t = false)]
42+
duck_ptt: bool,
43+
}
44+
2745
#[tokio::main]
2846
async fn main() -> Result<(), Box<dyn Error>> {
47+
let opt = Opt::parse();
2948
let _ = dotenv();
3049

3150
tracing_subscriber::registry()
@@ -80,6 +99,17 @@ async fn main() -> Result<(), Box<dyn Error>> {
8099
strict: None,
81100
}
82101
.into(),
102+
FunctionObject {
103+
name: "set_clipboard".into(),
104+
description: Some("Sets the clipboard to the given text.".into()),
105+
parameters: Some(serde_json::json!({
106+
"type": "object",
107+
"properties": {"clipboard_text": {"type": "string"}},
108+
"required": ["clipboard_text"]
109+
})),
110+
strict: None,
111+
}
112+
.into(),
83113
])
84114
.build()?;
85115

@@ -90,15 +120,21 @@ async fn main() -> Result<(), Box<dyn Error>> {
90120
.create(CreateThreadRequest::default())
91121
.await?;
92122

93-
let speak_stream = Arc::new(Mutex::new(SpeakStream::new(Voice::Echo, 1.0, true, true)));
123+
let speak_stream = Arc::new(Mutex::new(SpeakStream::new(
124+
Voice::Echo,
125+
opt.speech_speed,
126+
opt.tick,
127+
opt.duck_ptt,
128+
)));
94129

95130
let (audio_tx, audio_rx) = flume::unbounded();
96-
start_ptt_thread(audio_tx.clone());
131+
start_ptt_thread(audio_tx.clone(), speak_stream.clone(), opt.duck_ptt);
97132

98133
loop {
99134
let audio_path = audio_rx.recv().unwrap();
100135
let transcription = transcribe::transcribe(&client, &audio_path).await?;
101-
println!("You: {}", transcription);
136+
println!("{}", "You: ".truecolor(0, 255, 0));
137+
println!("{}", transcription);
102138

103139
client
104140
.threads()
@@ -123,6 +159,7 @@ async fn main() -> Result<(), Box<dyn Error>> {
123159
let speak_stream_cloned = speak_stream.clone();
124160
let client_cloned = client.clone();
125161
let mut task_handle = None;
162+
let mut displayed_ai_label = false;
126163

127164
while let Some(event) = event_stream.next().await {
128165
match event {
@@ -140,6 +177,10 @@ async fn main() -> Result<(), Box<dyn Error>> {
140177
if let MessageDeltaContent::Text(text) = content {
141178
if let Some(text) = text.text {
142179
if let Some(text) = text.value {
180+
if !displayed_ai_label {
181+
print!("{}", "AI: ".truecolor(0, 0, 255));
182+
displayed_ai_label = true;
183+
}
143184
print!("{}", text);
144185
speak_stream_cloned.lock().unwrap().add_token(&text);
145186
}
@@ -163,7 +204,7 @@ async fn main() -> Result<(), Box<dyn Error>> {
163204
}
164205
}
165206

166-
fn start_ptt_thread(audio_tx: Sender<PathBuf>) {
207+
fn start_ptt_thread(audio_tx: Sender<PathBuf>, speak_stream: Arc<Mutex<SpeakStream>>, duck_ptt: bool) {
167208
thread::spawn(move || {
168209
let mut recorder = rec::Recorder::new();
169210
let tmp_dir = tempdir().unwrap();
@@ -175,6 +216,9 @@ fn start_ptt_thread(audio_tx: Sender<PathBuf>) {
175216
match event.event_type {
176217
EventType::KeyPress(key) if key == ptt_key && !key_pressed => {
177218
key_pressed = true;
219+
if duck_ptt {
220+
speak_stream.lock().unwrap().start_audio_ducking();
221+
}
178222
let path = tmp_dir.path().join(format!("{}.wav", Uuid::new_v4()));
179223
if recorder.start_recording(&path, None).is_ok() {
180224
current_path = Some(path);
@@ -187,6 +231,9 @@ fn start_ptt_thread(audio_tx: Sender<PathBuf>) {
187231
audio_tx.send(p).unwrap();
188232
}
189233
}
234+
if duck_ptt {
235+
speak_stream.lock().unwrap().stop_audio_ducking();
236+
}
190237
}
191238
_ => {}
192239
}
@@ -220,6 +267,23 @@ async fn handle_requires_action(
220267
output: Some("0.06".into()),
221268
});
222269
}
270+
271+
if tool.function.name == "set_clipboard" {
272+
let mut clipboard: ClipboardContext = ClipboardProvider::new().unwrap();
273+
let text = match serde_json::from_str::<serde_json::Value>(&tool.function.arguments) {
274+
Ok(v) => v["clipboard_text"].as_str().unwrap_or("").to_string(),
275+
Err(_) => String::new(),
276+
};
277+
let result = clipboard.set_contents(text.clone());
278+
let msg = match result {
279+
Ok(_) => "Clipboard set".to_string(),
280+
Err(e) => format!("Failed to set clipboard: {}", e),
281+
};
282+
tool_outputs.push(ToolsOutputs {
283+
tool_call_id: Some(tool.id.clone()),
284+
output: Some(msg.into()),
285+
});
286+
}
223287
}
224288

225289
if let Err(e) = submit_tool_outputs(client, run_object, tool_outputs, speak_stream).await {

0 commit comments

Comments
 (0)