|
3 | 3 | from uuid import uuid4 |
4 | 4 | import atexit |
5 | 5 | import shutil |
6 | | -from utils.config_loader import config |
| 6 | +import platform,time |
7 | 7 | import logging |
| 8 | +from utils.config_loader import config |
| 9 | +from utils.runtime_config_loader import RuntimeConfig |
| 10 | +from dto.audiosource import AudioSource |
8 | 11 |
|
9 | 12 | logger = logging.getLogger(__name__) |
10 | 13 |
|
11 | | -CHUNK_DURATION = config.audio_preprocessing.chunk_duration_sec # seconds |
12 | | -SILENCE_THRESH = config.audio_preprocessing.silence_threshold # in dB |
13 | | -SILENCE_DURATION = config.audio_preprocessing.silence_duration # in seconds |
| 14 | +CHUNK_DURATION = config.audio_preprocessing.chunk_duration_sec |
| 15 | +SILENCE_THRESH = config.audio_preprocessing.silence_threshold |
| 16 | +SILENCE_DURATION = config.audio_preprocessing.silence_duration |
14 | 17 | SEARCH_WINDOW = config.audio_preprocessing.search_window_sec |
15 | 18 | CLEAN_UP_ON_EXIT = config.app.cleanup_on_exit |
16 | 19 |
|
17 | 20 | CHUNKS_DIR = config.audio_preprocessing.chunk_output_path |
18 | 21 | os.makedirs(CHUNKS_DIR, exist_ok=True) |
19 | 22 |
|
| 23 | +FFMPEG_PROCESSES = {} |
| 24 | + |
20 | 25 | @atexit.register |
21 | 26 | def cleanup_chunks_folder(): |
22 | 27 | if os.path.exists(CHUNKS_DIR) and CLEAN_UP_ON_EXIT: |
@@ -70,45 +75,128 @@ def get_closest_silence(silences, target_time, window=SEARCH_WINDOW): |
70 | 75 |
|
71 | 76 | return closest # None if nothing close enough |
72 | 77 |
|
73 | | -def chunk_audio_by_silence(audio_path): |
| 78 | +def process_audio_segment(audio_path, start_time, end_time, chunk_index): |
| 79 | + chunk_name = f"chunk_{chunk_index}_{uuid4().hex[:6]}.wav" |
| 80 | + chunk_path = os.path.join(CHUNKS_DIR, chunk_name) |
| 81 | + subprocess.run( |
| 82 | + [ |
| 83 | + "ffmpeg", "-y", "-i", audio_path, |
| 84 | + "-ss", str(start_time), "-to", str(end_time), |
| 85 | + "-ar", "16000", "-ac", "1", |
| 86 | + "-c:a", "pcm_s16le", "-vn", |
| 87 | + chunk_path |
| 88 | + ], |
| 89 | + stdout=subprocess.DEVNULL, |
| 90 | + stderr=subprocess.DEVNULL, |
| 91 | + encoding="utf-8", |
| 92 | + errors="replace" |
| 93 | + ) |
| 94 | + logger.debug(f"Chunk {chunk_index} saved: {chunk_path}") |
| 95 | + return { |
| 96 | + "chunk_path": chunk_path, |
| 97 | + "start_time": start_time, |
| 98 | + "end_time": end_time, |
| 99 | + "chunk_index": chunk_index |
| 100 | + } |
74 | 101 |
|
| 102 | +def chunk_audio_by_silence(audio_path): |
75 | 103 | if SEARCH_WINDOW > CHUNK_DURATION: |
76 | | - raise ValueError(f"Silence search window ({SEARCH_WINDOW}s) can't be more then Chunk Duration({CHUNK_DURATION}s).") |
77 | | - |
| 104 | + raise ValueError( |
| 105 | + f"Silence search window ({SEARCH_WINDOW}s) can't be more than chunk duration ({CHUNK_DURATION}s)." |
| 106 | + ) |
78 | 107 | duration = get_audio_duration(audio_path) |
79 | 108 | silences = detect_silences(audio_path) |
80 | | - |
81 | | - current_time = 0.0 |
82 | | - chunk_index = 0 |
83 | | - |
| 109 | + current_time, chunk_index = 0.0, 0 |
84 | 110 | while current_time < duration: |
85 | 111 | ideal_end = current_time + CHUNK_DURATION |
86 | | - end_time = get_closest_silence(silences, ideal_end) |
87 | | - |
88 | | - cut_by_silence = True |
89 | | - if not end_time or end_time <= current_time or end_time > duration: |
| 112 | + end_time = get_closest_silence(silences, ideal_end) or min(ideal_end, duration) |
| 113 | + if end_time <= current_time: |
90 | 114 | end_time = min(ideal_end, duration) |
91 | | - cut_by_silence = False |
92 | | - |
93 | | - chunk_name = f"chunk_{chunk_index}_{uuid4().hex[:6]}.wav" |
94 | | - chunk_path = os.path.join(CHUNKS_DIR, chunk_name) |
95 | | - |
96 | | - subprocess.run([ |
97 | | - "ffmpeg", "-y", "-i", audio_path, |
98 | | - "-ss", str(current_time), "-to", str(end_time), |
99 | | - "-ar", "16000", "-ac", "1", "-c:a", "pcm_s16le", "-vn", |
100 | | - chunk_path |
101 | | - ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, encoding="utf-8", errors="replace") |
102 | | - |
103 | | - chunk_meta = { |
104 | | - "chunk_path": chunk_path, |
105 | | - "start_time": current_time, |
106 | | - "end_time": end_time if end_time < duration else None, |
107 | | - "chunk_index": chunk_index, |
108 | | - "cut_by_silence": cut_by_silence |
109 | | - } |
110 | | - |
111 | | - yield chunk_meta |
112 | | - |
| 115 | + yield process_audio_segment(audio_path, current_time, end_time, chunk_index) |
113 | 116 | current_time = end_time |
114 | 117 | chunk_index += 1 |
| 118 | + |
| 119 | +def chunk_audiostream_by_silence(session_id: str): |
| 120 | + global FFMPEG_PROCESSES |
| 121 | + mic_device = RuntimeConfig.get_section("Project").get("microphone", "").strip() |
| 122 | + if not mic_device: |
| 123 | + raise ValueError( |
| 124 | + "Microphone device not set in runtime_config.yaml under Project.microphone" |
| 125 | + ) |
| 126 | + record_file = os.path.join(CHUNKS_DIR, f"live_input_{session_id}.wav") |
| 127 | + process = subprocess.Popen( |
| 128 | + [ |
| 129 | + "ffmpeg", "-y", |
| 130 | + "-f", "dshow", |
| 131 | + "-i", f"audio={mic_device}", |
| 132 | + "-ar", "16000", "-ac", "1", |
| 133 | + "-c:a", "pcm_s16le", "-rf64", "auto", |
| 134 | + record_file |
| 135 | + ], |
| 136 | + stdout=subprocess.DEVNULL, |
| 137 | + stderr=subprocess.DEVNULL |
| 138 | + ) |
| 139 | + FFMPEG_PROCESSES[session_id] = process |
| 140 | + logger.info(f"🎙️ Recording from {mic_device} (session={session_id}) ... use /stop-mic to stop.") |
| 141 | + current_time, chunk_index = 0.0, 0 |
| 142 | + MAX_DURATION = 45 * 60 |
| 143 | + try: |
| 144 | + while True: |
| 145 | + if current_time >= MAX_DURATION: |
| 146 | + logger.info(f"Session {session_id}: reached 45 min limit, stopping.") |
| 147 | + break |
| 148 | + if not os.path.exists(record_file) or os.path.getsize(record_file) < 44: |
| 149 | + time.sleep(0.02) |
| 150 | + continue |
| 151 | + duration = get_audio_duration(record_file) |
| 152 | + if (process.poll() is not None) and (duration - current_time < CHUNK_DURATION): |
| 153 | + logger.info(f"Session {session_id}: FFmpeg stopped, processing final chunk...") |
| 154 | + yield process_audio_segment(record_file, current_time, duration, chunk_index) |
| 155 | + break |
| 156 | + if duration - current_time < CHUNK_DURATION: |
| 157 | + time.sleep(0.02) |
| 158 | + continue |
| 159 | + segment_file = os.path.join(CHUNKS_DIR, f"temp_segment_{uuid4().hex[:6]}.wav") |
| 160 | + subprocess.run( |
| 161 | + [ |
| 162 | + "ffmpeg", "-y", "-i", record_file, |
| 163 | + "-ss", str(current_time), "-to", str(duration), |
| 164 | + "-ar", "16000", "-ac", "1", |
| 165 | + "-c:a", "pcm_s16le", "-vn", |
| 166 | + segment_file |
| 167 | + ], |
| 168 | + stdout=subprocess.DEVNULL, |
| 169 | + stderr=subprocess.DEVNULL |
| 170 | + ) |
| 171 | + silences = detect_silences(segment_file) |
| 172 | + silences = [ |
| 173 | + {"start": s["start"] + current_time, "end": s["end"] + current_time} |
| 174 | + for s in detect_silences(segment_file) |
| 175 | + ] |
| 176 | + ideal_end = current_time + CHUNK_DURATION |
| 177 | + end_time = get_closest_silence(silences, ideal_end) or min(ideal_end, duration) |
| 178 | + if end_time <= current_time: |
| 179 | + end_time = min(ideal_end, duration) |
| 180 | + yield process_audio_segment(record_file, current_time, end_time, chunk_index) |
| 181 | + current_time = end_time |
| 182 | + chunk_index += 1 |
| 183 | + os.remove(segment_file) |
| 184 | + finally: |
| 185 | + proc = FFMPEG_PROCESSES.pop(session_id, None) |
| 186 | + if proc: |
| 187 | + try: |
| 188 | + proc.terminate() |
| 189 | + except Exception as e: |
| 190 | + logger.warning(f"Error stopping FFmpeg for session {session_id}: {e}") |
| 191 | + if os.path.exists(record_file): |
| 192 | + try: |
| 193 | + os.remove(record_file) |
| 194 | + except Exception as e: |
| 195 | + logger.warning(f"Could not remove {record_file}: {e}") |
| 196 | + logger.info(f"🎧 Live recording stopped for session {session_id}.") |
| 197 | + |
| 198 | +def chunk_by_silence(input, session_id: str): |
| 199 | + if input.source_type == AudioSource.MICROPHONE: |
| 200 | + yield from chunk_audiostream_by_silence(session_id) |
| 201 | + else: |
| 202 | + yield from chunk_audio_by_silence(input.audio_filename) |
0 commit comments