fix: force azure asr mode
This commit is contained in:
36
api/main.py
36
api/main.py
@@ -62,6 +62,8 @@ ASR_BASE_URL = os.getenv("ASR_BASE_URL", LLM_BASE_URL).strip()
|
||||
ASR_API_KEY = (os.getenv("ASR_API_KEY") or LLM_API_KEY).strip()
|
||||
ASR_MODEL = os.getenv("ASR_MODEL", "whisper-1")
|
||||
ASR_REMOTE_ENABLED = os.getenv("ASR_REMOTE_ENABLED", "true").strip().lower() not in {"0", "false", "no", "off"}
|
||||
ASR_LOCAL_FALLBACK_ENABLED = os.getenv("ASR_LOCAL_FALLBACK_ENABLED", "true").strip().lower() not in {"0", "false", "no", "off"}
|
||||
ASR_AUDIO_FALLBACK_ENABLED = os.getenv("ASR_AUDIO_FALLBACK_ENABLED", "true").strip().lower() not in {"0", "false", "no", "off"}
|
||||
ASR_FALLBACK_MODEL = os.getenv("ASR_FALLBACK_MODEL", "gemini-2.5-flash").strip() or "gemini-2.5-flash"
|
||||
ASR_TIMEOUT_SECONDS = max(15, int(os.getenv("ASR_TIMEOUT_SECONDS", "45")))
|
||||
FASTER_WHISPER_MODEL = os.getenv("FASTER_WHISPER_MODEL", "tiny.en").strip() or "tiny.en"
|
||||
@@ -2855,7 +2857,7 @@ def _transcribe_gemini_sync(wav: Path) -> list[dict]:
|
||||
|
||||
|
||||
def _transcribe_sync(wav: Path) -> list[dict]:
|
||||
"""Remote ASR first, local mlx_whisper second. Gemini fallback is guarded against fake timelines."""
|
||||
"""Remote ASR first; local/multimodal fallbacks are explicit runtime switches."""
|
||||
errors: list[str] = []
|
||||
duration = media_duration(wav)
|
||||
if ASR_REMOTE_ENABLED:
|
||||
@@ -2877,18 +2879,24 @@ def _transcribe_sync(wav: Path) -> list[dict]:
|
||||
errors.append(f"{ASR_MODEL}: {e}")
|
||||
else:
|
||||
errors.append(f"{ASR_MODEL}: remote disabled")
|
||||
try:
|
||||
return _transcribe_faster_whisper_sync(wav)
|
||||
except Exception as e:
|
||||
errors.append(f"faster-whisper: {e}")
|
||||
try:
|
||||
return _transcribe_mlx_sync(wav)
|
||||
except Exception as e:
|
||||
errors.append(f"mlx_whisper: {e}")
|
||||
try:
|
||||
return _transcribe_gemini_sync(wav)
|
||||
except Exception as e:
|
||||
errors.append(f"{ASR_FALLBACK_MODEL}: {e}")
|
||||
if ASR_LOCAL_FALLBACK_ENABLED:
|
||||
try:
|
||||
return _transcribe_faster_whisper_sync(wav)
|
||||
except Exception as e:
|
||||
errors.append(f"faster-whisper: {e}")
|
||||
try:
|
||||
return _transcribe_mlx_sync(wav)
|
||||
except Exception as e:
|
||||
errors.append(f"mlx_whisper: {e}")
|
||||
else:
|
||||
errors.append("local ASR fallback disabled")
|
||||
if ASR_AUDIO_FALLBACK_ENABLED:
|
||||
try:
|
||||
return _transcribe_gemini_sync(wav)
|
||||
except Exception as e:
|
||||
errors.append(f"{ASR_FALLBACK_MODEL}: {e}")
|
||||
else:
|
||||
errors.append("multimodal audio fallback disabled")
|
||||
raise TranscriptionUnavailable(";".join(errors))
|
||||
|
||||
|
||||
@@ -3994,6 +4002,8 @@ def health() -> dict:
|
||||
"asr": ASR_MODEL,
|
||||
"asr_base_url": ASR_BASE_URL or LLM_BASE_URL or "openai-default",
|
||||
"asr_remote_enabled": ASR_REMOTE_ENABLED,
|
||||
"asr_local_fallback_enabled": ASR_LOCAL_FALLBACK_ENABLED,
|
||||
"asr_audio_fallback_enabled": ASR_AUDIO_FALLBACK_ENABLED,
|
||||
"faster_whisper": FASTER_WHISPER_MODEL,
|
||||
"local_asr": LOCAL_ASR_MODEL,
|
||||
"asr_fallback": ASR_FALLBACK_MODEL,
|
||||
|
||||
@@ -6,6 +6,7 @@ python-dotenv==1.0.1
|
||||
yt-dlp==2026.3.17
|
||||
openai==1.55.3
|
||||
httpx==0.27.2
|
||||
requests==2.32.5
|
||||
imagehash==4.3.1
|
||||
Pillow>=11.0
|
||||
numpy>=2.0
|
||||
|
||||
Reference in New Issue
Block a user