Skip to content

Commit c77df7d

Browse files
committed
v0.8.2: configurable OpenAI STT model for voice messages
1 parent b01abdb commit c77df7d

4 files changed

Lines changed: 517 additions & 131 deletions

File tree

README.md

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
## 🤖 _Powered by ChatKeke_ 🚀
44

55
- **A simple-to-use, quick-to-deploy Python-based Telegram bot for OpenAI API & Perplexity API**
6-
- **🎙 Transcribed voice messages over Whisper API**
7-
- (auto-transcriptions, translations, and other messages to the bot over TG's voice messages)
6+
- **🎙 Transcribed Telegram voice messages via OpenAI speech-to-text models (Whisper and others)**
7+
- Supports configurable STT models such as `gpt-4o-transcribe`, `gpt-4o-mini-transcribe`, and legacy `whisper-1`
88
- **☁️ Real-time weather info, weather alerts, and geolocation data via [OpenWeatherMap](https://openweathermap.org/), [WeatherAPI](https://www.weatherapi.com/) and U.S. NWS ([weather.gov](https://weather.gov))**
99
- **🗺 Geolocation and map lookups via MapTiler API**
1010
- (with weather forecasts around the world in all OpenAI API supported languages)
@@ -240,6 +240,44 @@ If you run into any issues, consult the logs or reach out on the repository's [I
240240
---
241241

242242
# Changelog
243+
244+
- v0.8.2 - Configurable OpenAI speech-to-text model support for Telegram voice messages
245+
- Updated Telegram voice message transcription handling for the current OpenAI audio transcription model lineup.
246+
- Added configurable speech-to-text model selection through `config.ini`:
247+
- `STTModel = gpt-4o-transcribe`
248+
- `STTModel = gpt-4o-mini-transcribe`
249+
- `STTModel = whisper-1`
250+
- Kept backward compatibility with the old `EnableWhisper` config flag so existing configs do not break.
251+
- Added `OPENAI_STT_MODEL` environment-variable fallback for overriding the configured STT model in Docker/systemd/shell deployments.
252+
- Changed the default voice transcription model from legacy `whisper-1` to modern `gpt-4o-transcribe`.
253+
- Refactored `src/voice_message_handler.py` so the STT model is read from the main `TelegramBot` config object instead of being hardcoded in the voice module.
254+
- Added lazy OpenAI async client initialization for voice transcription so the client is created only after API key loading.
255+
- Preserved support for legacy `whisper-1` as a fallback model.
256+
- Preserved Telegram-visible voice transcription marker:
257+
- `🎤📝`
258+
- Improved Telegram HTML safety by escaping transcribed text before wrapping it in HTML formatting.
259+
- Improved model-facing voice transcription formatting through `context.user_data["transcribed_text"]`.
260+
- Fixed voice message duration checking so `MaxDurationMinutes` is correctly treated as minutes while Telegram/audio duration values are handled as seconds.
261+
- Improved voice message logging with detailed metadata:
262+
- Telegram user ID
263+
- username
264+
- first/last name
265+
- chat ID
266+
- chat type/title
267+
- message ID
268+
- Telegram voice file ID / unique ID
269+
- voice duration
270+
- MIME type
271+
- file size
272+
- local downloaded file path
273+
- selected STT model
274+
- final transcription text
275+
- Logs detailed voice transcription events into `bot.log`.
276+
- Logs transcription events into `chat.log` when `ChatLoggingEnabled = True`.
277+
- Cleaned up `main.py` voice handler registration:
278+
- removed the old extra-argument voice handler path
279+
- now routes voice messages through `self.voice_message_handler`
280+
- Fixed `load_config()` directory creation logging by replacing undefined `logger` references with a local `TelegramBotLogger`.
243281
- v0.8.1 - Startup status banner cleanup
244282
- Added Perplexity API enabled/disabled status to the startup banner.
245283
- Reads Perplexity status from `[Perplexity] -> Enabled` in `config.ini`.

config/config.ini

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,14 @@ MaxHistoryDays = 30
8282
# ~~~~~~~~~~~
8383
# Allow speech-to-text transcriptions via Whisper API
8484
EnableWhisper = True
85+
86+
# Speech-to-text model.
87+
# Good values:
88+
# gpt-4o-transcribe = modern default, same per-minute price as whisper-1
89+
# gpt-4o-mini-transcribe = cheaper
90+
# whisper-1 = legacy fallback
91+
STTModel = gpt-4o-transcribe
92+
8593
# Maximum duration of a voice message (in minutes)
8694
MaxDurationMinutes = 5
8795

src/main.py

Lines changed: 24 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
# https://github.com/FlyingFathead/TelegramBot-OpenAI-API
77
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
88
# version of this program
9-
version_number = "0.8.1"
9+
version_number = "0.8.2"
1010

1111
# Add the project root directory to Python's path
1212
import sys
@@ -155,7 +155,10 @@ def __init__(self):
155155
# Because we do that in main() before constructing TelegramBot.
156156

157157
self.logger = logging.getLogger('TelegramBotLogger')
158-
self.logger.info("Initializing TelegramBot...")
158+
self.logger.info("Initializing Telegram Bot...")
159+
160+
self.logger.info(f"Voice STT enabled: {self.enable_whisper}")
161+
self.logger.info(f"Voice STT model: {self.stt_model}")
159162

160163
# The rest is mostly unchanged:
161164
self.reminders_enabled = self._parser.getboolean('Reminders', 'EnableReminders', fallback=False)
@@ -229,6 +232,14 @@ def load_config(self):
229232
self.bot_disabled_msg = self.config.get('BotDisabledMsg', 'The bot is currently disabled.')
230233

231234
self.enable_whisper = self.config.getboolean('EnableWhisper', True)
235+
236+
# Speech-to-text model for voice messages.
237+
# Primary source: config.ini
238+
# Fallback source: OPENAI_STT_MODEL env var
239+
# Final fallback: gpt-4o-transcribe
240+
configured_stt_model = self.config.get('STTModel', fallback='').strip()
241+
env_stt_model = os.getenv('OPENAI_STT_MODEL', '').strip()
242+
self.stt_model = configured_stt_model or env_stt_model or 'gpt-4o-transcribe'
232243
self.max_voice_message_length = self.config.getint('MaxDurationMinutes', 5)
233244

234245
self.data_directory = self.config.get('DataDirectory', 'data')
@@ -241,25 +252,29 @@ def load_config(self):
241252
# Build paths
242253
project_root = Path(__file__).resolve().parents[1]
243254
self.data_directory = str(project_root / self.config.get('DataDirectory', 'data'))
255+
self.logs_directory = str(project_root / self.config.get('LogsDirectory', 'logs'))
256+
257+
# self.logger is not assigned yet during load_config(), so use a local logger.
258+
config_logger = logging.getLogger('TelegramBotLogger')
244259

245260
# Create data directory if needed
246261
try:
247262
if not os.path.exists(self.data_directory):
248263
os.makedirs(self.data_directory, exist_ok=True)
249-
logger.info(f"Created data directory at {self.data_directory}")
264+
config_logger.info(f"Created data directory at {self.data_directory}")
250265
except OSError as e:
251-
logger.error(
266+
config_logger.error(
252267
f"Failed to create data directory {self.data_directory}: {e} "
253268
"-- Some commands might be disabled due to this."
254269
)
255270

256-
self.logs_directory = str(project_root / self.config.get('LogsDirectory', 'logs'))
271+
# Create logs directory if needed
257272
try:
258273
if not os.path.exists(self.logs_directory):
259274
os.makedirs(self.logs_directory, exist_ok=True)
260-
logger.info(f"Created logs directory at {self.logs_directory}")
275+
config_logger.info(f"Created logs directory at {self.logs_directory}")
261276
except OSError as e:
262-
logger.error(
277+
config_logger.error(
263278
f"Failed to create logs directory {self.logs_directory}: {e} "
264279
"-- Some commands might be disabled due to this."
265280
)
@@ -405,15 +420,7 @@ def split_large_messages(self, message, max_length=4096):
405420
# voice message handler - see: voice_message_handler.py
406421
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
407422
async def voice_message_handler(self, update: Update, context: CallbackContext) -> None:
408-
await handle_voice_message(
409-
self,
410-
update,
411-
context,
412-
self.data_directory,
413-
self.enable_whisper,
414-
self.max_voice_message_length,
415-
logger
416-
)
423+
await handle_voice_message(self, update, context)
417424

418425
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
419426
# text message handler - see: text_message_handler.py
@@ -438,7 +445,7 @@ def run(self):
438445
application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, self.handle_message))
439446

440447
# Voice handler
441-
application.add_handler(MessageHandler(filters.VOICE, partial(handle_voice_message, self)))
448+
application.add_handler(MessageHandler(filters.VOICE, self.voice_message_handler))
442449

443450
# Register command handlers from bot_commands module
444451
application.add_handler(

0 commit comments

Comments
 (0)