-
-
Notifications
You must be signed in to change notification settings - Fork 9
Expand file tree
/
Copy pathio.github.ashbuk.speak-to-ai.appdata.xml
More file actions
84 lines (84 loc) · 3.98 KB
/
io.github.ashbuk.speak-to-ai.appdata.xml
File metadata and controls
84 lines (84 loc) · 3.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
<?xml version="1.0" encoding="UTF-8"?>
<component type="desktop-application">
<id>io.github.ashbuk.speak-to-ai</id>
<metadata_license>MIT</metadata_license>
<project_license>MIT</project_license>
<name>Speak-to-AI</name>
<summary>Offline speech-to-text desktop app</summary>
<description>
<p>
A minimalist, privacy-focused desktop application for offline speech-to-text.
Converts voice input directly into any active window (editors, browsers, IDEs, AI assistants)
using the Whisper model locally for speech recognition.
</p>
<p>
For system tray integration on GNOME (install AppIndicator extension) and automatic typing on Wayland (setup ydotool),
see <a href="https://github.com/AshBuk/speak-to-ai/blob/master/docs/Desktop_Environment_Support.md">Desktop Environment Support Guide</a>.
</p>
<p>Features:</p>
<ul>
<li><b>Offline speech-to-text, privacy-first</b>: all processing happens locally</li>
<li><b>Portable</b>: AppImage package</li>
<li><b>Cross-platform support</b> for X11 and Wayland</li>
<li><b>Linux DEs</b>: native integration with GNOME, KDE, and others</li>
<li><b>GPU + CPU support</b>: Vulkan backend for faster transcription (auto-fallback to CPU)</li>
<li><b>Voice typing or clipboard mode</b></li>
<li><b>Flexible audio recording</b>: arecord (ALSA) or ffmpeg (PulseAudio/PipeWire), see <a href="docs/AUDIO_PIPELINE_DIAGRAM.txt">audio pipeline</a></li>
<li><b>Multi-language support, custom hotkey binding, visual notifications</b></li>
<li><b>Model management</b>: switch between base, small, medium, and large-v3 whisper models via tray or CLI</li>
</ul>
</description>
<screenshots>
<screenshot type="default">
<caption>Quick demo: Voice recording and automatic text output</caption>
<video>https://github.com/user-attachments/assets/e8448f73-57f2-46dc-98f9-e36f685a6587</video>
</screenshot>
</screenshots>
<url type="homepage">https://github.com/AshBuk/speak-to-ai</url>
<url type="bugtracker">https://github.com/AshBuk/speak-to-ai/issues</url>
<url type="vcs-browser">https://github.com/AshBuk/speak-to-ai</url>
<url type="help">https://github.com/AshBuk/speak-to-ai/blob/master/README.md</url>
<url type="donation">https://github.com/sponsors/AshBuk</url>
<developer_name>Asher Buk</developer_name>
<launchable type="desktop-id">io.github.ashbuk.speak-to-ai.desktop</launchable>
<provides>
<binary>speak-to-ai</binary>
</provides>
<releases>
<release version="1.8.0" date="2026-03-21">
<description>
<p>Cobra CLI framework migration. Add Whisper Large v3 Turbo (Q5_0) model — fast, GPU-optimized variant for modern laptops.</p>
</description>
</release>
<release version="1.7.2" date="2026-03-15"/>
<release version="1.7.1" date="2026-03-02"/>
<release version="1.7.0" date="2026-03-02"/>
<release version="1.6.3" date="2026-02-23"/>
<release version="1.6.2" date="2026-02-07"/>
<release version="1.6.1" date="2026-01-19"/>
<release version="1.6.0" date="2026-01-14"/>
<release version="1.5.2" date="2026-01-07"/>
<release version="1.5.1" date="2026-01-06"/>
<release version="1.5.0" date="2026-01-06"/>
<release version="1.4.2" date="2025-12-29"/>
<release version="1.4.1" date="2025-12-25"/>
<release version="1.4.0" date="2025-12-15"/>
<release version="1.3.4" date="2025-12-07"/>
<release version="1.3.3" date="2025-11-27"/>
<release version="1.3.2" date="2025-11-13"/>
<release version="1.3.1" date="2025-10-31"/>
<release version="1.3.0" date="2025-10-29"/>
<release version="1.2.0" date="2025-10-17"/>
<release version="1.1.0" date="2025-10-06"/>
</releases>
<content_rating type="oars-1.1"/>
<keywords>
<keyword>speech</keyword>
<keyword>voice</keyword>
<keyword>transcription</keyword>
<keyword>ai</keyword>
<keyword>whisper</keyword>
<keyword>offline</keyword>
<keyword>privacy</keyword>
</keywords>
</component>