smallnest
diff --git a/‎.env.example‎
Lines changed: 24 additions & 0 deletions b/‎.env.example‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 7 additions & 1 deletion b/‎README.md‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎README_CN.md‎
Lines changed: 7 additions & 0 deletions b/‎README_CN.md‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎backend/config.go‎
Lines changed: 6 additions & 0 deletions b/‎backend/config.go‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎backend/frontend/index.html‎
Lines changed: 6 additions & 6 deletions b/‎backend/frontend/index.html‎
Lines changed: 6 additions & 6 deletions
@@ -49,6 +49,30 @@ CHUNK_OVERLAP=200
 # Set to false to use original simple text extraction (may not work well for binary formats)
 ENABLE_MARKITDOWN=true
 
+# Audio Transcription Configuration
+# ============================
+# Enable vosk-transcriber for converting audio files to text
+# Requires vosk-transcriber CLI tool to be installed
+# Installation: https://github.com/alphacep/vosk-transcriber
+# Supported formats: mp3, wav, m4a, aac, flac, ogg, wma, opus, mp4, avi, mkv, mov, webm
+ENABLE_VOSK_TRANSCRIBER=true
+
+# Path to vosk model directory (download from https://alphacephei.com/vosk/models)
+# 
+# English models:
+#   - Small: vosk-model-small-en-us-0.15 (fast, good accuracy)
+#   - Full: vosk-model-en-us-0.22 (slower, better accuracy)
+#   Path example: /root/.cache/vosk/vosk-model-small-en-us-0.15
+#
+# Chinese models:
+#   - Small: vosk-model-small-cn-0.22 (fast, good for Mandarin)
+#   Path example: /root/.cache/vosk/vosk-model-small-cn-0.22
+#
+# Download and extract:
+#   wget https://alphacephei.com/vosk/models/vosk-model-small-cn-0.22.zip
+#   unzip vosk-model-small-cn-0.22.zip -d /root/.cache/vosk/
+VOSK_MODEL_PATH=/root/.cache/vosk/vosk-model-small-cn-0.22
+
 # 允许删除（默认为 true）
 ALLOW_DELETE=true
 
 
@@ -21,7 +21,7 @@ An AI-powered knowledge management application that lets you create intelligent
 
 ## ✨ Features
 
-- 📚 **Multiple Source Types** - Upload PDFs, text files, Markdown, DOCX, HTML documents, and video URLs (YouTube, Bilibili with automatic subtitle extraction)
+- 📚 **Multiple Source Types** - Upload PDFs, text files, Markdown, DOCX, HTML documents, audio files (MP3, WAV, M4A, etc.), and video URLs (YouTube, Bilibili with automatic subtitle extraction)
 - 🤖 **AI-Powered Chat** - Ask questions and get answers based on your sources
 - ✨ **Multiple Transformations** - Generate summaries, FAQs, study guides, outlines, timelines, glossaries, quizzes, mindmaps, infographics and podcast scripts
 - 📊 **Infographic Generation** - Create beautiful, hand-drawn style infographics from your content using Google's Gemini Nano Banana
@@ -38,6 +38,7 @@ An AI-powered knowledge management application that lets you create intelligent
 - An LLM API key (OpenAI) or Ollama running locally
 - [markitdown](https://github.com/microsoft/markitdown) (optional, for better document conversion)
 - [yt-dlp](https://github.com/yt-dlp/yt-dlp) (optional, for extracting subtitles from YouTube and Bilibili videos)
+- [vosk-transcriber](https://github.com/alphacep/vosk-transcriber) (optional, for transcribing audio files to text)
 
 ### Installation
 
@@ -171,6 +172,7 @@ You can add content to your notebook in three ways:
 - Click the "+" button in the Sources panel
 - Drag and drop or browse for files
 - Supported: PDF, TXT, MD, DOCX, HTML
+- Audio files: MP3, WAV, M4A, AAC, FLAC, OGG, WMA, OPUS (auto-transcribed to text)
 
 **Paste Text**
 
@@ -230,6 +232,10 @@ CHUNK_OVERLAP=200      # Overlap between chunks
 # Document Conversion
 ENABLE_MARKITDOWN=true  # Use Microsoft markitdown for better PDF/DOCX conversion
 
+# Audio Transcription
+ENABLE_VOSK_TRANSCRIBER=false  # Enable audio file transcription
+VOSK_MODEL_PATH=/usr/local/share/vosk-model-en  # Path to vosk model
+
 # Podcast Generation
 ENABLE_PODCAST=true
 PODCAST_VOICE=alloy    # Options: alloy, echo, fable, onyx, nova, shimmer
 
@@ -230,6 +230,13 @@ CHUNK_OVERLAP=200      # 分块重叠
 # 文档转换
 ENABLE_MARKITDOWN=true  # 使用 Microsoft markitdown 更好地转换 PDF/DOCX
 
+# 音频转录
+ENABLE_VOSK_TRANSCRIBER=false  # 启用音频文件转录
+# 中文模型路径示例
+VOSK_MODEL_PATH=/root/.cache/vosk/vosk-model-small-cn-0.22
+# 英文模型路径示例
+# VOSK_MODEL_PATH=/root/.cache/vosk/vosk-model-small-en-us-0.15
+
 # 播客生成
 ENABLE_PODCAST=true
 PODCAST_VOICE=alloy    # 选项：alloy、echo、fable、onyx、nova、shimmer
 
@@ -58,6 +58,10 @@ type Config struct {
 	// Document conversion
 	EnableMarkitdown bool
 
+	// Audio transcription
+	EnableVoskTranscriber bool
+	VoskModelPath        string // Path to vosk model directory
+
 	// Demo settings
 	AllowMultipleNotesOfSameType bool
 
@@ -133,6 +137,8 @@ func LoadConfig() Config {
 		EnablePodcast:                getEnvBool("ENABLE_PODCAST", true),
 		PodcastVoice:                 getEnv("PODCAST_VOICE", "alloy"),
 		EnableMarkitdown:             getEnvBool("ENABLE_MARKITDOWN", true),
+		EnableVoskTranscriber:        getEnvBool("ENABLE_VOSK_TRANSCRIBER", false),
+		VoskModelPath:                getEnv("VOSK_MODEL_PATH", "/usr/local/share/vosk-model-en"),
 		AllowMultipleNotesOfSameType: getEnvBool("ALLOW_MULTIPLE_NOTES_OF_SAME_TYPE", true),
 		LangChainAPIKey:              getEnv("LANGCHAIN_API_KEY", ""),
 		LangChainProject:             getEnv("LANGCHAIN_PROJECT", "notex"),
 
@@ -203,15 +203,15 @@ <h2 class="panel-title">来源</h2>
                     <div class="panel-header">
                         <div class="panel-tabs" id="centerPanelTabs">
                             <button class="tab-btn active" data-tab="chat">对话</button>
-                            <button class="tab-btn" data-tab="note" id="tabBtnNote">
-                                笔记
-                                <span class="tab-close" id="btnCloseNote">×</span>
+                            <button class="tab-btn" data-tab="sessions" id="tabBtnSessions">
+                                会话历史
                             </button>
-                            <button class="tab-btn hidden" data-tab="notes_list" id="tabBtnNotesList">
+                            <button class="tab-btn" data-tab="notes_list" id="tabBtnNotesList">
                                 笔记列表
                             </button>
-                            <button class="tab-btn" data-tab="sessions" id="tabBtnSessions">
-                                会话历史
+                            <button class="tab-btn" data-tab="note" id="tabBtnNote">
+                                笔记
+                                <span class="tab-close" id="btnCloseNote">×</span>
                             </button>
                         </div>
                         <div class="panel-header-actions">