Skip to content

Commit d0db5f3

Browse files
committed
✨ (assistant_v2): add voice and speed controls
1 parent 35cde64 commit d0db5f3

File tree

2 files changed

+222
-2
lines changed

2 files changed

+222
-2
lines changed

assistant_v2/FEATURE_PROGRESS.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ This document tracks which features from the original assistant have been implem
1414
| Run internet speed tests | Pending |
1515
| Set the clipboard contents | Done |
1616
| Timers with alarm sounds | Pending |
17-
| Change voice or speaking speed | Pending |
18-
| Mute/unmute voice output | Pending |
17+
| Change voice or speaking speed | Done |
18+
| Mute/unmute voice output | Done |
1919
| Open OpenAI billing page | Done |
2020
| Push-to-talk text-to-speech interface | Done |
2121
| Interrupt AI speech with push-to-talk | Done |

assistant_v2/src/main.rs

Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,36 @@ struct Opt {
4444
duck_ptt: bool,
4545
}
4646

47+
fn parse_voice(name: &str) -> Option<Voice> {
48+
match name.to_lowercase().as_str() {
49+
"alloy" => Some(Voice::Alloy),
50+
"ash" => Some(Voice::Ash),
51+
"coral" => Some(Voice::Coral),
52+
"echo" => Some(Voice::Echo),
53+
"fable" => Some(Voice::Fable),
54+
"onyx" => Some(Voice::Onyx),
55+
"nova" => Some(Voice::Nova),
56+
"sage" => Some(Voice::Sage),
57+
"shimmer" => Some(Voice::Shimmer),
58+
_ => None,
59+
}
60+
}
61+
62+
fn voice_to_str(voice: &Voice) -> &'static str {
63+
match voice {
64+
Voice::Alloy => "alloy",
65+
Voice::Ash => "ash",
66+
Voice::Coral => "coral",
67+
Voice::Echo => "echo",
68+
Voice::Fable => "fable",
69+
Voice::Onyx => "onyx",
70+
Voice::Nova => "nova",
71+
Voice::Sage => "sage",
72+
Voice::Shimmer => "shimmer",
73+
_ => "unknown",
74+
}
75+
}
76+
4777
#[tokio::main]
4878
async fn main() -> Result<(), Box<dyn Error>> {
4979
let opt = Opt::parse();
@@ -138,6 +168,76 @@ async fn main() -> Result<(), Box<dyn Error>> {
138168
strict: None,
139169
}
140170
.into(),
171+
FunctionObject {
172+
name: "set_speech_speed".into(),
173+
description: Some(
174+
"Sets how fast the AI voice speaks. Speed must be between 0.5 and 100.".into(),
175+
),
176+
parameters: Some(serde_json::json!({
177+
"type": "object",
178+
"properties": {"speed": {"type": "number"}},
179+
"required": ["speed"],
180+
})),
181+
strict: None,
182+
}
183+
.into(),
184+
FunctionObject {
185+
name: "get_speech_speed".into(),
186+
description: Some("Returns the current AI voice speech speed.".into()),
187+
parameters: Some(serde_json::json!({
188+
"type": "object",
189+
"properties": {},
190+
"required": [],
191+
})),
192+
strict: None,
193+
}
194+
.into(),
195+
FunctionObject {
196+
name: "mute_speech".into(),
197+
description: Some("Mutes the AI voice output.".into()),
198+
parameters: Some(serde_json::json!({
199+
"type": "object",
200+
"properties": {},
201+
"required": [],
202+
})),
203+
strict: None,
204+
}
205+
.into(),
206+
FunctionObject {
207+
name: "unmute_speech".into(),
208+
description: Some("Unmutes the AI voice output.".into()),
209+
parameters: Some(serde_json::json!({
210+
"type": "object",
211+
"properties": {},
212+
"required": [],
213+
})),
214+
strict: None,
215+
}
216+
.into(),
217+
FunctionObject {
218+
name: "set_voice".into(),
219+
description: Some(
220+
"Changes the AI speaking voice. Pass one of: alloy, ash, coral, echo, fable, onyx, nova, sage, shimmer.".into(),
221+
),
222+
parameters: Some(serde_json::json!({
223+
"type": "object",
224+
"properties": {"voice": {"type": "string"}},
225+
"required": ["voice"],
226+
})),
227+
strict: None,
228+
}
229+
.into(),
230+
FunctionObject {
231+
name: "get_voice".into(),
232+
description: Some("Returns the name of the current AI voice.".into()),
233+
parameters: Some(serde_json::json!({
234+
"type": "object",
235+
"properties": {},
236+
"required": [],
237+
})),
238+
strict: None,
239+
}
240+
.into(),
141241
])
142242
.build()?;
143243

@@ -382,6 +482,73 @@ async fn handle_requires_action(
382482
output: Some(msg.into()),
383483
});
384484
}
485+
486+
if tool.function.name == "set_speech_speed" {
487+
let speed = match serde_json::from_str::<serde_json::Value>(&tool.function.arguments) {
488+
Ok(v) => v["speed"].as_f64().unwrap_or(1.0) as f32,
489+
Err(_) => 1.0,
490+
};
491+
let msg = if (0.5..=100.0).contains(&speed) {
492+
speak_stream.lock().unwrap().set_speech_speed(speed);
493+
format!("Speech speed set to {}", speed)
494+
} else {
495+
"Speed must be between 0.5 and 100.0".to_string()
496+
};
497+
tool_outputs.push(ToolsOutputs {
498+
tool_call_id: Some(tool.id.clone()),
499+
output: Some(msg.into()),
500+
});
501+
}
502+
503+
if tool.function.name == "get_speech_speed" {
504+
let speed = speak_stream.lock().unwrap().get_speech_speed();
505+
tool_outputs.push(ToolsOutputs {
506+
tool_call_id: Some(tool.id.clone()),
507+
output: Some(format!("{}", speed).into()),
508+
});
509+
}
510+
511+
if tool.function.name == "mute_speech" {
512+
speak_stream.lock().unwrap().mute();
513+
tool_outputs.push(ToolsOutputs {
514+
tool_call_id: Some(tool.id.clone()),
515+
output: Some("AI voice muted".into()),
516+
});
517+
}
518+
519+
if tool.function.name == "unmute_speech" {
520+
speak_stream.lock().unwrap().unmute();
521+
tool_outputs.push(ToolsOutputs {
522+
tool_call_id: Some(tool.id.clone()),
523+
output: Some("AI voice unmuted".into()),
524+
});
525+
}
526+
527+
if tool.function.name == "set_voice" {
528+
let name = match serde_json::from_str::<serde_json::Value>(&tool.function.arguments) {
529+
Ok(v) => v["voice"].as_str().unwrap_or("").to_string(),
530+
Err(_) => String::new(),
531+
};
532+
let msg = match parse_voice(&name) {
533+
Some(v) => {
534+
speak_stream.lock().unwrap().set_voice(v);
535+
format!("Voice set to {}", name.to_lowercase())
536+
}
537+
None => "Invalid voice name".to_string(),
538+
};
539+
tool_outputs.push(ToolsOutputs {
540+
tool_call_id: Some(tool.id.clone()),
541+
output: Some(msg.into()),
542+
});
543+
}
544+
545+
if tool.function.name == "get_voice" {
546+
let name = voice_to_str(&speak_stream.lock().unwrap().get_voice());
547+
tool_outputs.push(ToolsOutputs {
548+
tool_call_id: Some(tool.id.clone()),
549+
output: Some(format!("{}", name).into()),
550+
});
551+
}
385552
}
386553

387554
if let Err(e) = submit_tool_outputs(client, run_object, tool_outputs, speak_stream).await {
@@ -503,4 +670,57 @@ mod tests {
503670
_ => false,
504671
}));
505672
}
673+
674+
#[test]
675+
fn includes_set_speech_speed_function() {
676+
let req = CreateAssistantRequestArgs::default()
677+
.instructions("test")
678+
.model("gpt-4o")
679+
.tools(vec![FunctionObject {
680+
name: "set_speech_speed".into(),
681+
description: Some("Sets how fast the AI voice speaks.".into()),
682+
parameters: Some(serde_json::json!({
683+
"type": "object",
684+
"properties": {"speed": {"type": "number"}},
685+
"required": ["speed"],
686+
})),
687+
strict: None,
688+
}
689+
.into()])
690+
.build()
691+
.unwrap();
692+
693+
let tools = req.tools.unwrap();
694+
assert!(tools.iter().any(|t| match t {
695+
async_openai::types::AssistantTools::Function(f) =>
696+
f.function.name == "set_speech_speed",
697+
_ => false,
698+
}));
699+
}
700+
701+
#[test]
702+
fn includes_mute_speech_function() {
703+
let req = CreateAssistantRequestArgs::default()
704+
.instructions("test")
705+
.model("gpt-4o")
706+
.tools(vec![FunctionObject {
707+
name: "mute_speech".into(),
708+
description: Some("Mutes the AI voice output.".into()),
709+
parameters: Some(serde_json::json!({
710+
"type": "object",
711+
"properties": {},
712+
"required": [],
713+
})),
714+
strict: None,
715+
}
716+
.into()])
717+
.build()
718+
.unwrap();
719+
720+
let tools = req.tools.unwrap();
721+
assert!(tools.iter().any(|t| match t {
722+
async_openai::types::AssistantTools::Function(f) => f.function.name == "mute_speech",
723+
_ => false,
724+
}));
725+
}
506726
}

0 commit comments

Comments
 (0)