fix: force azure asr mode

This commit is contained in:
2026-05-19 10:31:31 +08:00
parent 5b44d35316
commit ff7bf00f6d
5 changed files with 49 additions and 22 deletions

View File

@@ -62,6 +62,8 @@ ASR_BASE_URL = os.getenv("ASR_BASE_URL", LLM_BASE_URL).strip()
ASR_API_KEY = (os.getenv("ASR_API_KEY") or LLM_API_KEY).strip()
ASR_MODEL = os.getenv("ASR_MODEL", "whisper-1")
ASR_REMOTE_ENABLED = os.getenv("ASR_REMOTE_ENABLED", "true").strip().lower() not in {"0", "false", "no", "off"}
ASR_LOCAL_FALLBACK_ENABLED = os.getenv("ASR_LOCAL_FALLBACK_ENABLED", "true").strip().lower() not in {"0", "false", "no", "off"}
ASR_AUDIO_FALLBACK_ENABLED = os.getenv("ASR_AUDIO_FALLBACK_ENABLED", "true").strip().lower() not in {"0", "false", "no", "off"}
ASR_FALLBACK_MODEL = os.getenv("ASR_FALLBACK_MODEL", "gemini-2.5-flash").strip() or "gemini-2.5-flash"
ASR_TIMEOUT_SECONDS = max(15, int(os.getenv("ASR_TIMEOUT_SECONDS", "45")))
FASTER_WHISPER_MODEL = os.getenv("FASTER_WHISPER_MODEL", "tiny.en").strip() or "tiny.en"
@@ -2855,7 +2857,7 @@ def _transcribe_gemini_sync(wav: Path) -> list[dict]:
def _transcribe_sync(wav: Path) -> list[dict]:
"""Remote ASR first, local mlx_whisper second. Gemini fallback is guarded against fake timelines."""
"""Remote ASR first; local/multimodal fallbacks are explicit runtime switches."""
errors: list[str] = []
duration = media_duration(wav)
if ASR_REMOTE_ENABLED:
@@ -2877,18 +2879,24 @@ def _transcribe_sync(wav: Path) -> list[dict]:
errors.append(f"{ASR_MODEL}: {e}")
else:
errors.append(f"{ASR_MODEL}: remote disabled")
try:
return _transcribe_faster_whisper_sync(wav)
except Exception as e:
errors.append(f"faster-whisper: {e}")
try:
return _transcribe_mlx_sync(wav)
except Exception as e:
errors.append(f"mlx_whisper: {e}")
try:
return _transcribe_gemini_sync(wav)
except Exception as e:
errors.append(f"{ASR_FALLBACK_MODEL}: {e}")
if ASR_LOCAL_FALLBACK_ENABLED:
try:
return _transcribe_faster_whisper_sync(wav)
except Exception as e:
errors.append(f"faster-whisper: {e}")
try:
return _transcribe_mlx_sync(wav)
except Exception as e:
errors.append(f"mlx_whisper: {e}")
else:
errors.append("local ASR fallback disabled")
if ASR_AUDIO_FALLBACK_ENABLED:
try:
return _transcribe_gemini_sync(wav)
except Exception as e:
errors.append(f"{ASR_FALLBACK_MODEL}: {e}")
else:
errors.append("multimodal audio fallback disabled")
raise TranscriptionUnavailable("".join(errors))
@@ -3994,6 +4002,8 @@ def health() -> dict:
"asr": ASR_MODEL,
"asr_base_url": ASR_BASE_URL or LLM_BASE_URL or "openai-default",
"asr_remote_enabled": ASR_REMOTE_ENABLED,
"asr_local_fallback_enabled": ASR_LOCAL_FALLBACK_ENABLED,
"asr_audio_fallback_enabled": ASR_AUDIO_FALLBACK_ENABLED,
"faster_whisper": FASTER_WHISPER_MODEL,
"local_asr": LOCAL_ASR_MODEL,
"asr_fallback": ASR_FALLBACK_MODEL,

View File

@@ -6,6 +6,7 @@ python-dotenv==1.0.1
yt-dlp==2026.3.17
openai==1.55.3
httpx==0.27.2
requests==2.32.5
imagehash==4.3.1
Pillow>=11.0
numpy>=2.0