diff --git a/tools/transcription_tools.py b/tools/transcription_tools.py index 0c0a1fc9f..70791b0ca 100644 --- a/tools/transcription_tools.py +++ b/tools/transcription_tools.py @@ -12,7 +12,7 @@ Provides speech-to-text transcription with three providers: Used by the messaging gateway to automatically transcribe voice messages sent by users on Telegram, Discord, WhatsApp, Slack, and Signal. -Supported input formats: mp3, mp4, mpeg, mpga, m4a, wav, webm, ogg +Supported input formats: mp3, mp4, mpeg, mpga, m4a, wav, webm, ogg, aac Usage:: @@ -60,7 +60,7 @@ COMMON_LOCAL_BIN_DIRS = ("/opt/homebrew/bin", "/usr/local/bin") GROQ_BASE_URL = os.getenv("GROQ_BASE_URL", "https://api.groq.com/openai/v1") OPENAI_BASE_URL = os.getenv("STT_OPENAI_BASE_URL", "https://api.openai.com/v1") -SUPPORTED_FORMATS = {".mp3", ".mp4", ".mpeg", ".mpga", ".m4a", ".wav", ".webm", ".ogg"} +SUPPORTED_FORMATS = {".mp3", ".mp4", ".mpeg", ".mpga", ".m4a", ".wav", ".webm", ".ogg", ".aac"} LOCAL_NATIVE_AUDIO_FORMATS = {".wav", ".aiff", ".aif"} MAX_FILE_SIZE = 25 * 1024 * 1024 # 25 MB