zeroclaw-labs · antonvice · Mar 8, 2026 · Mar 8, 2026 · Mar 8, 2026 · Mar 8, 2026
@@ -29,3 +29,5 @@ venv/
 *.pem
 credentials.json
 .worktrees/
+
+.vscode/
@@ -150,6 +150,7 @@ opentelemetry-otlp = { version = "0.31", default-features = false, features = ["
 
 # Serial port for peripheral communication (STM32, etc.)
 tokio-serial = { version = "5", default-features = false, optional = true }
+whisper-rs = { version = "0.15.1", optional = true }
 
 # USB device enumeration (hardware discovery) — only on platforms nusb supports
 # (Linux, macOS, Windows). Android/Termux uses target_os="android" and is excluded.
@@ -186,6 +187,7 @@ libc = "0.2"
 [features]
 default = []
 hardware = ["nusb", "tokio-serial"]
+transcription-local = ["dep:whisper-rs"]
 channel-matrix = ["dep:matrix-sdk"]
 channel-lark = ["dep:prost"]
 memory-postgres = ["dep:postgres"]

diff --git a/check_output_vfix.txt b/check_output_vfix.txt
@@ -0,0 +1,36 @@
+    Checking zeroclaw v0.1.7 (/Users/antonvice/Documents/programming/zeroclaw)
+error[E0609]: no field `voice_messages_enabled` on type `&TelegramChannel`
+    --> src/channels/telegram.rs:1132:28
+     |
+1132 |         let text = if self.voice_messages_enabled {
+     |                            ^^^^^^^^^^^^^^^^^^^^^^ unknown field
+     |
+     = note: available fields are: `bot_token`, `allowed_users`, `pairing`, `client`, `typing_handle` ... and 10 others
+
+error[E0560]: struct `TelegramChannel` has no field named `voice_messages_enabled`
+   --> src/channels/telegram.rs:347:13
+    |
+347 |             voice_messages_enabled: false,
+    |             ^^^^^^^^^^^^^^^^^^^^^^ `TelegramChannel` does not have this field
+    |
+    = note: all struct fields are already assigned
+
+error[E0609]: no field `voice_messages_enabled` on type `TelegramChannel`
+   --> src/channels/telegram.rs:386:14
+    |
+386 |         self.voice_messages_enabled = enabled;
+    |              ^^^^^^^^^^^^^^^^^^^^^^ unknown field
+    |
+    = note: available fields are: `bot_token`, `allowed_users`, `pairing`, `client`, `typing_handle` ... and 10 others
+
+error[E0560]: struct `LarkConfig` has no field named `allow_group_mentions`
+    --> src/onboard/wizard.rs:4960:21
+     |
+4960 |                     allow_group_mentions: true,
+     |                     ^^^^^^^^^^^^^^^^^^^^ `LarkConfig` does not have this field
+     |
+     = note: all struct fields are already assigned
+
+Some errors have detailed explanations: E0560, E0609.
+For more information about an error, try `rustc --explain E0560`.
+error: could not compile `zeroclaw` (lib) due to 4 previous errors
@@ -162,6 +162,10 @@ Telegram notes:
 
 - `interrupt_on_new_message = true` preserves interrupted user turns in conversation history, then restarts generation on the newest message.
 - Interruption scope is strict: same sender in the same chat. Messages from different chats are processed independently.
+- **Voice Transcription**: Telegram voice notes (`.ogg`) can be transcribed using the global transcription service (configured in the `[transcription]` section).
+  - Supported providers: **Groq** (Cloud) and **Local** (Whisper-RS).
+  - Local transcription requires `ffmpeg` and automatically downloads a small model on first use.
+  - Transcribed text is prepended with `🎙️ [Voice transcription]:`.
 
 ### 4.2 Discord
 

@@ -488,8 +488,15 @@ Notes:
 - If using cloud APIs (OpenAI, Anthropic, etc.), you can reduce this to `60` or lower.
 - Values below `30` are clamped to `30` to avoid immediate timeout churn.
 - When a timeout occurs, users receive: `⚠️ Request timed out while waiting for the model. Please try again.`
-- Telegram-only interruption behavior is controlled with `channels_config.telegram.interrupt_on_new_message` (default `false`).
-  When enabled, a newer message from the same sender in the same chat cancels the in-flight request and preserves interrupted user context.
+- Telegram-only configuration options:
+  ```toml
+  [channels_config.telegram]
+  mention_only = false              # optional: require @mention in groups
+  interrupt_on_new_message = false  # optional: cancel in-flight same-sender same-chat request
+  voice_messages = true             # optional: enable local voice-to-text transcription (requires ffmpeg)
+  whisper_model = "models/tiny.bin" # optional: path to ggml model bin
+  ```
+  When `interrupt_on_new_message` is enabled, a newer message from the same sender in the same chat cancels the in-flight request and preserves interrupted user context.
 - While `zeroclaw channel start` is running, updates to `default_provider`, `default_model`, `default_temperature`, `api_key`, `api_url`, and `reliability.*` are hot-applied from `config.toml` on the next inbound message.
 
 ### `[channels_config.nostr]`
@@ -629,6 +636,26 @@ Notes:
 - Place `.md`/`.txt` datasheet files named by board (e.g. `nucleo-f401re.md`, `rpi-gpio.md`) in `datasheet_dir` for RAG retrieval.
 - See [hardware-peripherals-design.md](hardware-peripherals-design.md) for board protocol and firmware notes.
 
+## Transcription Configuration
+
+Global settings for voice-to-text transcription.
+
+| Key | Default | Purpose |
+|---|---|---|
+| `enabled` | `false` | Enable transcription globally |
+| `provider` | `"groq"` | Transcription provider: `"groq"` or `"local"` |
+| `api_url` | Groq API URL | API endpoint (Groq only) |
+| `model` | `"whisper-large-v3-turbo"` | Model name (Groq or local hint) |
+| `whisper_model_path` | unset | Custom path to local GGML `.bin` model (Local only) |
+| `max_duration_secs` | `120` | Max audio duration to process |
+
+```toml
+[transcription]
+enabled = true
+provider = "local"
+# whisper_model_path = "models/ggml-base.bin"
+```
+
 ## Security-Relevant Defaults
 
 - deny-by-default channel allowlists (`[]` means deny all)

@@ -298,6 +298,8 @@ pub struct LarkChannel {
     mention_only: bool,
     /// When true, use Feishu (CN) endpoints; when false, use Lark (international).
     use_feishu: bool,
+    /// The platform (Lark vs Feishu) for selecting base URLs and locale headers.
+    platform: LarkPlatform,
     /// How to receive events: WebSocket long-connection or HTTP webhook.
     receive_mode: crate::config::schema::LarkReceiveMode,
     /// Cached tenant access token
@@ -321,6 +323,7 @@ impl LarkChannel {
             verification_token,
             port,
             allowed_users,
+            mention_only,
             LarkPlatform::Lark,
         )
     }
@@ -331,6 +334,7 @@ impl LarkChannel {
         verification_token: String,
         port: Option<u16>,
         allowed_users: Vec<String>,
+        mention_only: bool,
         platform: LarkPlatform,
     ) -> Self {
         Self {
@@ -341,7 +345,8 @@ impl LarkChannel {
             allowed_users,
             resolved_bot_open_id: Arc::new(StdRwLock::new(None)),
             mention_only,
-            use_feishu: true,
+            use_feishu: platform == LarkPlatform::Feishu,
+            platform,
             receive_mode: crate::config::schema::LarkReceiveMode::default(),
             tenant_token: Arc::new(RwLock::new(None)),
             ws_seen_ids: Arc::new(RwLock::new(HashMap::new())),
@@ -363,11 +368,31 @@ impl LarkChannel {
             config.port,
             config.allowed_users.clone(),
             config.mention_only,
+            platform,
         );
         ch.receive_mode = config.receive_mode.clone();
         ch
     }
 
+    pub fn from_lark_config(config: &crate::config::schema::LarkConfig) -> Self {
+        Self::from_config(config)
+    }
-    pub fn from_lark_config(config: &crate::config::schema::LarkConfig) -> Self {
-        Self::from_config(config)
-    }
+    pub fn from_lark_config(config: &crate::config::schema::LarkConfig) -> Self {
+        let mut ch = Self::new_with_platform(
+            config.app_id.clone(),
+            config.app_secret.clone(),
+            config.verification_token.clone().unwrap_or_default(),
+            config.port,
+            config.allowed_users.clone(),
+            config.mention_only,
+            LarkPlatform::Lark,
+        );
+        ch.receive_mode = config.receive_mode.clone();
+        ch
+    }
-    pub fn from_lark_config(config: &crate::config::schema::LarkConfig) -> Self {
-        Self::from_config(config)
-    }
+    pub fn from_lark_config(config: &crate::config::schema::LarkConfig) -> Self {
+        let mut ch = Self::new_with_platform(
+            config.app_id.clone(),
+            config.app_secret.clone(),
+            config.verification_token.clone().unwrap_or_default(),
+            config.port,
+            config.allowed_users.clone(),
+            config.mention_only,
+            LarkPlatform::Lark,
+        );
+        ch.receive_mode = config.receive_mode.clone();
+        ch
+    }
+
+    pub fn from_feishu_config(config: &crate::config::schema::FeishuConfig) -> Self {
+        let mut ch = Self::new_with_platform(
+            config.app_id.clone(),
+            config.app_secret.clone(),
+            config.verification_token.clone().unwrap_or_default(),
+            config.port,
+            config.allowed_users.clone(),
+            config.mention_only,
+            LarkPlatform::Feishu,
+        );
+        ch.receive_mode = config.receive_mode.clone();
+        ch.use_feishu = true;
+        ch
+    }
+
     fn http_client(&self) -> reqwest::Client {
         crate::config::build_runtime_proxy_client(self.platform.proxy_service_key())
     }
@@ -1024,16 +1049,12 @@ impl LarkChannel {
 
         let (text, post_mentioned_open_ids): (String, Vec<String>) = match msg_type {
             "text" => {
-                let extracted = serde_json::from_str::<serde_json::Value>(content_str)
-                    .ok()
-                    .and_then(|v| {
-                        v.get("text")
-                            .and_then(|t| t.as_str())
-                            .filter(|s| !s.is_empty())
-                            .map(String::from)
-                    });
-                match extracted {
-                    Some(t) => (t, Vec::new()),
+                let v: serde_json::Value = match serde_json::from_str::<serde_json::Value>(content_str) {
+                    Ok(v) => v,
+                    Err(_) => return messages,
+                };
+                match v.get("text").and_then(|t| t.as_str()).filter(|s| !s.is_empty()) {
+                    Some(t) => (t.to_string(), Vec::new()),
                     None => return messages,
                 }
             }

@@ -2697,6 +2697,8 @@ fn collect_configured_channels(
                 )
                 .with_streaming(tg.stream_mode, tg.draft_update_interval_ms)
                 .with_transcription(config.transcription.clone())
+                .with_voice_messages(tg.voice_messages)
+                .with_whisper_model(tg.whisper_model.clone())
                 .with_workspace_dir(config.workspace_dir.clone()),
             ),
         });