fix: enforce asr client timeout

2026-05-19 09:27:47 +08:00
parent e5652c463c
commit 9a4268281e
2 changed files with 15 additions and 6 deletions
--- a/api/main.py
+++ b/api/main.py
@@ -2790,14 +2790,13 @@ def _transcribe_gemini_sync(wav: Path) -> list[dict]:
    last_error: Exception | None = None
    for attempt in range(3):
        try:
-            resp = llm().chat.completions.create(
+            resp = llm().with_options(timeout=ASR_TIMEOUT_SECONDS).chat.completions.create(
                model=ASR_FALLBACK_MODEL,
                messages=[{"role": "user", "content": [
                    {"type": "text", "text": prompt},
                    {"type": "input_audio", "input_audio": {"data": audio_b64, "format": "wav"}},
                ]}],
                temperature=0,
-                timeout=ASR_TIMEOUT_SECONDS,
            )
            content = (resp.choices[0].message.content or "").strip()
            return _validate_asr_segments(_parse_asr_segments(content, duration), duration, "gemini audio fallback")
@@ -2814,12 +2813,11 @@ def _transcribe_sync(wav: Path) -> list[dict]:
    duration = media_duration(wav)
    try:
        with wav.open("rb") as f:
-            resp = llm().audio.transcriptions.create(
+            resp = llm().with_options(timeout=ASR_TIMEOUT_SECONDS).audio.transcriptions.create(
                file=(wav.name, f, "audio/wav"),
                model=ASR_MODEL,
                response_format="verbose_json",
                timestamp_granularities=["segment"],
-                timeout=ASR_TIMEOUT_SECONDS,
            )
        raw = resp.model_dump() if hasattr(resp, "model_dump") else resp
        segments = raw.get("segments") or []
@@ -2978,7 +2976,7 @@ def _audio_profile_model_sync(wav: Path, segments: list[TranscriptSegment], targ
    last_error: Exception | None = None
    for attempt in range(2):
        try:
-            resp = llm().chat.completions.create(
+            resp = llm().with_options(timeout=ASR_TIMEOUT_SECONDS).chat.completions.create(
                model=ASR_FALLBACK_MODEL,
                messages=[{"role": "user", "content": [
                    {"type": "text", "text": prompt},
@@ -2987,7 +2985,6 @@ def _audio_profile_model_sync(wav: Path, segments: list[TranscriptSegment], targ
                response_format={"type": "json_object"},
                temperature=0.1,
                max_tokens=900,
-                timeout=ASR_TIMEOUT_SECONDS,
            )
            content = (resp.choices[0].message.content or "").strip()
            data = json.loads(content)
--- a/docs/source-analysis.html
+++ b/docs/source-analysis.html
@@ -1238,6 +1238,18 @@ ProductRefStateItem {
              <p><strong>影响：</strong>只改变工作台视觉模式，不改变素材下载、音频解析、抽帧、主体模板、产品素材池、首尾帧或模型链路；<code>web/app/page.tsx</code> 同步移除旧全局浮动主题按钮，避免右下角出现第二套不相关的主题入口。后续新增图片/视频板块仍应复用同一套媒体悬停放大和删除逻辑。</p>
            </div>
          </article>
+          <article class="change">
+            <header>
+              <h3>2026-05-19 · ASR 客户端级超时硬化</h3>
+              <span class="tag violet">API</span>
+              <span class="tag cyan">Ops</span>
+            </header>
+            <div class="body">
+              <p><strong>问题：</strong>云端全流程中远端 <code>whisper-1</code> 转录可能长期停在“转录中”，页面没有 transcript，也不会进入后续分镜。</p>
+              <p><strong>改动：</strong><code>api/main.py</code> 的远端 ASR、Gemini 音频转写兜底和音频画像调用改为 <code>llm().with_options(timeout=ASR_TIMEOUT_SECONDS)</code> 后再发起请求，避免只在调用参数里传 <code>timeout</code> 时被兼容网关或 SDK 路径忽略。</p>
+              <p><strong>影响：</strong>远端 ASR 超时后应进入本机 ASR 或多模态音频兜底，并最终给出成功 transcript 或明确失败状态；不会无限占用“音频文案”步骤。</p>
+            </div>
+          </article>
          <article class="change">
            <header>
              <h3>2026-05-19 · 云端 TikTok cookies 文件挂载</h3>