speak-to-ai/io.github.ashbuk.speak-to-ai.appdata.xml at master · AshBuk/speak-to-ai · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
<?xml version="1.0" encoding="UTF-8"?>
<component type="desktop-application">
  <id>io.github.ashbuk.speak-to-ai</id>
  <metadata_license>MIT</metadata_license>
  <project_license>MIT</project_license>
  <name>Speak-to-AI</name>
  <summary>Offline speech-to-text desktop app</summary>
  <description>
    <p>
      A minimalist, privacy-focused desktop application for offline speech-to-text.
      Converts voice input directly into any active window (editors, browsers, IDEs, AI assistants)
      using the Whisper model locally for speech recognition.
    </p>
    <p>
      For system tray integration on GNOME (install AppIndicator extension) and automatic typing on Wayland (setup ydotool),
      see <a href="https://github.com/AshBuk/speak-to-ai/blob/master/docs/Desktop_Environment_Support.md">Desktop Environment Support Guide</a>.
    </p>
    <p>Features:</p>
    <ul>
      <li><b>Offline speech-to-text, privacy-first</b>: all processing happens locally</li>
      <li><b>Portable</b>: AppImage package</li>
      <li><b>Cross-platform support</b> for X11 and Wayland</li>
      <li><b>Linux DEs</b>: native integration with GNOME, KDE, and others</li>
      <li><b>GPU + CPU support</b>: Vulkan backend for faster transcription (auto-fallback to CPU)</li>
      <li><b>Voice typing or clipboard mode</b></li>
      <li><b>Flexible audio recording</b>: arecord (ALSA) or ffmpeg (PulseAudio/PipeWire), see <a href="docs/AUDIO_PIPELINE_DIAGRAM.txt">audio pipeline</a></li>
      <li><b>Multi-language support, custom hotkey binding, visual notifications</b></li>
      <li><b>Model management</b>: switch between base, small, medium, and large-v3 whisper models via tray or CLI</li>
    </ul>
  </description>
  <screenshots>
    <screenshot type="default">
      <caption>Quick demo: Voice recording and automatic text output</caption>
      <video>https://github.com/user-attachments/assets/e8448f73-57f2-46dc-98f9-e36f685a6587</video>
    </screenshot>
  </screenshots>
  <url type="homepage">https://github.com/AshBuk/speak-to-ai</url>
  <url type="bugtracker">https://github.com/AshBuk/speak-to-ai/issues</url>
  <url type="vcs-browser">https://github.com/AshBuk/speak-to-ai</url>
  <url type="help">https://github.com/AshBuk/speak-to-ai/blob/master/README.md</url>
  <url type="donation">https://github.com/sponsors/AshBuk</url>
  <developer_name>Asher Buk</developer_name>
  <launchable type="desktop-id">io.github.ashbuk.speak-to-ai.desktop</launchable>
  <provides>
    <binary>speak-to-ai</binary>
  </provides>
  <releases>
    <release version="1.8.0" date="2026-03-21">
      <description>
        <p>Cobra CLI framework migration. Add Whisper Large v3 Turbo (Q5_0) model — fast, GPU-optimized variant for modern laptops.</p>
      </description>
    </release>
    <release version="1.7.2" date="2026-03-15"/>
    <release version="1.7.1" date="2026-03-02"/>
    <release version="1.7.0" date="2026-03-02"/>
    <release version="1.6.3" date="2026-02-23"/>
    <release version="1.6.2" date="2026-02-07"/>
    <release version="1.6.1" date="2026-01-19"/>
    <release version="1.6.0" date="2026-01-14"/>
    <release version="1.5.2" date="2026-01-07"/>
    <release version="1.5.1" date="2026-01-06"/>
    <release version="1.5.0" date="2026-01-06"/>
    <release version="1.4.2" date="2025-12-29"/>
    <release version="1.4.1" date="2025-12-25"/>
    <release version="1.4.0" date="2025-12-15"/>
    <release version="1.3.4" date="2025-12-07"/>
    <release version="1.3.3" date="2025-11-27"/>
    <release version="1.3.2" date="2025-11-13"/>
    <release version="1.3.1" date="2025-10-31"/>
    <release version="1.3.0" date="2025-10-29"/>
    <release version="1.2.0" date="2025-10-17"/>
    <release version="1.1.0" date="2025-10-06"/>
  </releases>
  <content_rating type="oars-1.1"/>
  <keywords>
    <keyword>speech</keyword>
    <keyword>voice</keyword>
    <keyword>transcription</keyword>
    <keyword>ai</keyword>
    <keyword>whisper</keyword>
    <keyword>offline</keyword>
    <keyword>privacy</keyword>
  </keywords>
</component>