Skip to content

Commit e3c9f1c

Browse files
committed
feat: implement all remaining upstream Handy PRs
- Local OpenAI-compatible API server on /v1/audio/transcriptions (cjpais#509) - GNOME system shortcuts via gsettings for Wayland (cjpais#572) - Wake word detection infrastructure with settings (cjpais#618) - Live transcription mode settings (overlay/clipboard) (cjpais#832) - Qwen3-ASR engine type placeholder for future transcribe-rs (cjpais#957) - Prioritized microphone device list with fallback (cjpais#1070) - Flatpak detection helper (cjpais#548) - Storybook dev script (cjpais#784)
1 parent cbbf51e commit e3c9f1c

File tree

15 files changed

+482
-6
lines changed

15 files changed

+482
-6
lines changed

package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@
1818
"format:backend": "cd src-tauri && cargo fmt",
1919
"test:playwright": "playwright test",
2020
"test:playwright:ui": "playwright test --ui",
21-
"check:translations": "bun scripts/check-translations.ts"
21+
"check:translations": "bun scripts/check-translations.ts",
22+
"storybook": "bunx --bun ladle serve --port 1422"
2223
},
2324
"dependencies": {
2425
"@tailwindcss/vite": "^4.1.16",

src-tauri/Cargo.lock

Lines changed: 104 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src-tauri/Cargo.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,10 @@ hound = "3.5.1"
5555
base64 = "0.22"
5656
log = "0.4.25"
5757
env_filter = "0.1.0"
58-
tokio = { version = "1.43.0", features = ["time"] }
58+
tokio = { version = "1.43.0", features = ["time", "rt", "macros"] }
5959
vad-rs = { git = "https://github.com/cjpais/vad-rs", default-features = false }
60+
axum = { version = "0.8", features = ["multipart"] }
61+
tower-http = { version = "0.6", features = ["cors"] }
6062
enigo = "0.6.1"
6163
arboard = "3.6"
6264
rodio = { git = "https://github.com/cjpais/rodio.git" }

src-tauri/src/api_server.rs

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
use axum::{
2+
extract::{Multipart, State},
3+
http::StatusCode,
4+
response::Json,
5+
routing::post,
6+
Router,
7+
};
8+
use log::{error, info};
9+
use serde::Serialize;
10+
use std::sync::Arc;
11+
use tower_http::cors::CorsLayer;
12+
13+
use crate::managers::transcription::TranscriptionManager;
14+
15+
#[derive(Serialize)]
16+
struct TranscriptionResponse {
17+
text: String,
18+
}
19+
20+
#[derive(Serialize)]
21+
struct ErrorResponse {
22+
error: ErrorDetail,
23+
}
24+
25+
#[derive(Serialize)]
26+
struct ErrorDetail {
27+
message: String,
28+
r#type: String,
29+
}
30+
31+
struct ApiState {
32+
transcription_manager: Arc<TranscriptionManager>,
33+
}
34+
35+
async fn transcribe_audio(
36+
State(state): State<Arc<ApiState>>,
37+
mut multipart: Multipart,
38+
) -> Result<Json<TranscriptionResponse>, (StatusCode, Json<ErrorResponse>)> {
39+
let mut audio_data: Option<Vec<u8>> = None;
40+
let mut _language: Option<String> = None;
41+
42+
while let Ok(Some(field)) = multipart.next_field().await {
43+
let name = field.name().unwrap_or("").to_string();
44+
match name.as_str() {
45+
"file" => {
46+
audio_data = field.bytes().await.ok().map(|b| b.to_vec());
47+
}
48+
"language" => {
49+
_language = field.text().await.ok();
50+
}
51+
_ => {}
52+
}
53+
}
54+
55+
let audio_bytes = audio_data.ok_or_else(|| {
56+
(
57+
StatusCode::BAD_REQUEST,
58+
Json(ErrorResponse {
59+
error: ErrorDetail {
60+
message: "No audio file provided".to_string(),
61+
r#type: "invalid_request_error".to_string(),
62+
},
63+
}),
64+
)
65+
})?;
66+
67+
let samples = match load_audio_from_bytes(&audio_bytes) {
68+
Ok(s) => s,
69+
Err(e) => {
70+
return Err((
71+
StatusCode::BAD_REQUEST,
72+
Json(ErrorResponse {
73+
error: ErrorDetail {
74+
message: format!("Failed to decode audio: {}", e),
75+
r#type: "invalid_request_error".to_string(),
76+
},
77+
}),
78+
));
79+
}
80+
};
81+
82+
match state.transcription_manager.transcribe(samples, None) {
83+
Ok(text) => Ok(Json(TranscriptionResponse { text })),
84+
Err(e) => Err((
85+
StatusCode::INTERNAL_SERVER_ERROR,
86+
Json(ErrorResponse {
87+
error: ErrorDetail {
88+
message: format!("Transcription failed: {}", e),
89+
r#type: "server_error".to_string(),
90+
},
91+
}),
92+
)),
93+
}
94+
}
95+
96+
fn load_audio_from_bytes(bytes: &[u8]) -> Result<Vec<f32>, String> {
97+
use std::io::Cursor;
98+
let reader = hound::WavReader::new(Cursor::new(bytes))
99+
.map_err(|e| format!("Invalid WAV format: {}", e))?;
100+
101+
let spec = reader.spec();
102+
let samples: Vec<f32> = match spec.sample_format {
103+
hound::SampleFormat::Float => reader
104+
.into_samples::<f32>()
105+
.filter_map(|s| s.ok())
106+
.collect(),
107+
hound::SampleFormat::Int => {
108+
let bits = spec.bits_per_sample;
109+
let max_val = (1 << (bits - 1)) as f32;
110+
reader
111+
.into_samples::<i32>()
112+
.filter_map(|s| s.ok())
113+
.map(|s| s as f32 / max_val)
114+
.collect()
115+
}
116+
};
117+
118+
let mono = if spec.channels > 1 {
119+
samples
120+
.chunks(spec.channels as usize)
121+
.map(|frame| frame.iter().sum::<f32>() / spec.channels as f32)
122+
.collect()
123+
} else {
124+
samples
125+
};
126+
127+
Ok(mono)
128+
}
129+
130+
pub fn start_api_server(
131+
transcription_manager: Arc<TranscriptionManager>,
132+
port: u16,
133+
) {
134+
let state = Arc::new(ApiState {
135+
transcription_manager,
136+
});
137+
138+
std::thread::spawn(move || {
139+
let rt = tokio::runtime::Builder::new_current_thread()
140+
.enable_all()
141+
.build()
142+
.expect("Failed to create API server runtime");
143+
144+
rt.block_on(async {
145+
let app = Router::new()
146+
.route("/v1/audio/transcriptions", post(transcribe_audio))
147+
.layer(CorsLayer::permissive())
148+
.with_state(state);
149+
150+
let addr = format!("127.0.0.1:{}", port);
151+
info!("Starting local API server on {}", addr);
152+
153+
let listener = tokio::net::TcpListener::bind(&addr).await;
154+
match listener {
155+
Ok(listener) => {
156+
if let Err(e) = axum::serve(listener, app).await {
157+
error!("API server error: {}", e);
158+
}
159+
}
160+
Err(e) => {
161+
error!("Failed to bind API server to {}: {}", addr, e);
162+
}
163+
}
164+
});
165+
});
166+
}

src-tauri/src/helpers/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
pub mod clamshell;
2+
pub mod platform;

src-tauri/src/helpers/platform.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
pub fn is_flatpak() -> bool {
2+
std::path::Path::new("/.flatpak-info").exists()
3+
}

0 commit comments

Comments
 (0)