Skip to content

Commit ae488e6

Browse files
authored
Merge pull request #103 from sloganking/codex/add-v2-media-playback-commands
Add media controls to assistant_v2
2 parents 4ec4a24 + 553a0ca commit ae488e6

File tree

3 files changed

+117
-1
lines changed

3 files changed

+117
-1
lines changed

assistant_v2/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,4 @@ clap = { version = "4.4.6", features = ["derive"] }
2323
colored = "2.0.4"
2424
clipboard = "0.5.0"
2525
open = "5.3.1"
26+
enigo = "0.1.3"

assistant_v2/FEATURE_PROGRESS.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ This document tracks which features from the original assistant have been implem
66
| --- | --- |
77
| Screen brightness control | Done |
88
| System volume adjustment (Windows only) | Pending |
9-
| Media playback commands | Pending |
9+
| Media playback commands | Done |
1010
| Launch applications from voice | Pending |
1111
| Display log files | Pending |
1212
| Get system info | Pending |

assistant_v2/src/main.rs

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ use colored::Colorize;
1313
use dotenvy::dotenv;
1414
use futures::StreamExt;
1515
use open;
16+
use enigo::{Enigo, KeyboardControllable};
1617
use speakstream::ss::SpeakStream;
1718
use std::error::Error;
1819
use std::path::PathBuf;
@@ -145,6 +146,30 @@ async fn main() -> Result<(), Box<dyn Error>> {
145146
strict: None,
146147
}
147148
.into(),
149+
FunctionObject {
150+
name: "media_controls".into(),
151+
description: Some("Plays, pauses or seeks media.".into()),
152+
parameters: Some(serde_json::json!({
153+
"type": "object",
154+
"properties": {
155+
"media_button": {
156+
"type": "string",
157+
"enum": [
158+
"MediaStop",
159+
"MediaNextTrack",
160+
"MediaPlayPause",
161+
"MediaPrevTrack",
162+
"VolumeUp",
163+
"VolumeDown",
164+
"VolumeMute"
165+
]
166+
}
167+
},
168+
"required": ["media_button"]
169+
})),
170+
strict: None,
171+
}
172+
.into(),
148173
FunctionObject {
149174
name: "set_clipboard".into(),
150175
description: Some("Sets the clipboard to the given text.".into()),
@@ -484,6 +509,56 @@ async fn handle_requires_action(
484509
});
485510
}
486511

512+
if tool.function.name == "media_controls" {
513+
let button = match serde_json::from_str::<serde_json::Value>(&tool.function.arguments) {
514+
Ok(v) => v["media_button"].as_str().unwrap_or("").to_string(),
515+
Err(_) => String::new(),
516+
};
517+
518+
let mut enigo = Enigo::new();
519+
let msg = match button.as_str() {
520+
"MediaStop" => {
521+
enigo.key_click(enigo::Key::MediaStop);
522+
"MediaStop"
523+
}
524+
"MediaNextTrack" => {
525+
enigo.key_click(enigo::Key::MediaNextTrack);
526+
"MediaNextTrack"
527+
}
528+
"MediaPlayPause" => {
529+
enigo.key_click(enigo::Key::MediaPlayPause);
530+
"MediaPlayPause"
531+
}
532+
"MediaPrevTrack" => {
533+
enigo.key_click(enigo::Key::MediaPrevTrack);
534+
enigo.key_click(enigo::Key::MediaPrevTrack);
535+
"MediaPrevTrack"
536+
}
537+
"VolumeUp" => {
538+
for _ in 0..5 {
539+
enigo.key_click(enigo::Key::VolumeUp);
540+
}
541+
"VolumeUp"
542+
}
543+
"VolumeDown" => {
544+
for _ in 0..5 {
545+
enigo.key_click(enigo::Key::VolumeDown);
546+
}
547+
"VolumeDown"
548+
}
549+
"VolumeMute" => {
550+
enigo.key_click(enigo::Key::VolumeMute);
551+
"VolumeMute"
552+
}
553+
_ => "Unknown button",
554+
};
555+
556+
tool_outputs.push(ToolsOutputs {
557+
tool_call_id: Some(tool.id.clone()),
558+
output: Some(msg.into()),
559+
});
560+
}
561+
487562
if tool.function.name == "open_openai_billing" {
488563
let result = open::that("https://platform.openai.com/usage");
489564
let msg = match result {
@@ -711,6 +786,46 @@ mod tests {
711786
}));
712787
}
713788

789+
#[test]
790+
fn includes_media_controls_function() {
791+
let req = CreateAssistantRequestArgs::default()
792+
.instructions("test")
793+
.model("gpt-4o")
794+
.tools(vec![FunctionObject {
795+
name: "media_controls".into(),
796+
description: Some("Plays, pauses or seeks media.".into()),
797+
parameters: Some(serde_json::json!({
798+
"type": "object",
799+
"properties": {
800+
"media_button": {
801+
"type": "string",
802+
"enum": [
803+
"MediaStop",
804+
"MediaNextTrack",
805+
"MediaPlayPause",
806+
"MediaPrevTrack",
807+
"VolumeUp",
808+
"VolumeDown",
809+
"VolumeMute"
810+
]
811+
}
812+
},
813+
"required": ["media_button"]
814+
})),
815+
strict: None,
816+
}
817+
.into()])
818+
.build()
819+
.unwrap();
820+
821+
let tools = req.tools.unwrap();
822+
assert!(tools.iter().any(|t| match t {
823+
async_openai::types::AssistantTools::Function(f) =>
824+
f.function.name == "media_controls",
825+
_ => false,
826+
}));
827+
}
828+
714829
#[test]
715830
fn includes_mute_speech_function() {
716831
let req = CreateAssistantRequestArgs::default()

0 commit comments

Comments
 (0)