diff --git a/api/main.py b/api/main.py
index 9bb3869..61dcf67 100644
--- a/api/main.py
+++ b/api/main.py
@@ -2790,14 +2790,13 @@ def _transcribe_gemini_sync(wav: Path) -> list[dict]:
     last_error: Exception | None = None
     for attempt in range(3):
         try:
-            resp = llm().chat.completions.create(
+            resp = llm().with_options(timeout=ASR_TIMEOUT_SECONDS).chat.completions.create(
                 model=ASR_FALLBACK_MODEL,
                 messages=[{"role": "user", "content": [
                     {"type": "text", "text": prompt},
                     {"type": "input_audio", "input_audio": {"data": audio_b64, "format": "wav"}},
                 ]}],
                 temperature=0,
-                timeout=ASR_TIMEOUT_SECONDS,
             )
             content = (resp.choices[0].message.content or "").strip()
             return _validate_asr_segments(_parse_asr_segments(content, duration), duration, "gemini audio fallback")
@@ -2814,12 +2813,11 @@ def _transcribe_sync(wav: Path) -> list[dict]:
     duration = media_duration(wav)
     try:
         with wav.open("rb") as f:
-            resp = llm().audio.transcriptions.create(
+            resp = llm().with_options(timeout=ASR_TIMEOUT_SECONDS).audio.transcriptions.create(
                 file=(wav.name, f, "audio/wav"),
                 model=ASR_MODEL,
                 response_format="verbose_json",
                 timestamp_granularities=["segment"],
-                timeout=ASR_TIMEOUT_SECONDS,
             )
         raw = resp.model_dump() if hasattr(resp, "model_dump") else resp
         segments = raw.get("segments") or []
@@ -2978,7 +2976,7 @@ def _audio_profile_model_sync(wav: Path, segments: list[TranscriptSegment], targ
     last_error: Exception | None = None
     for attempt in range(2):
         try:
-            resp = llm().chat.completions.create(
+            resp = llm().with_options(timeout=ASR_TIMEOUT_SECONDS).chat.completions.create(
                 model=ASR_FALLBACK_MODEL,
                 messages=[{"role": "user", "content": [
                     {"type": "text", "text": prompt},
@@ -2987,7 +2985,6 @@ def _audio_profile_model_sync(wav: Path, segments: list[TranscriptSegment], targ
                 response_format={"type": "json_object"},
                 temperature=0.1,
                 max_tokens=900,
-                timeout=ASR_TIMEOUT_SECONDS,
             )
             content = (resp.choices[0].message.content or "").strip()
             data = json.loads(content)
diff --git a/docs/source-analysis.html b/docs/source-analysis.html
index dc8d546..582260e 100644
--- a/docs/source-analysis.html
+++ b/docs/source-analysis.html
@@ -1238,6 +1238,18 @@ ProductRefStateItem {
               <p><strong>影响：</strong>只改变工作台视觉模式，不改变素材下载、音频解析、抽帧、主体模板、产品素材池、首尾帧或模型链路；<code>web/app/page.tsx</code> 同步移除旧全局浮动主题按钮，避免右下角出现第二套不相关的主题入口。后续新增图片/视频板块仍应复用同一套媒体悬停放大和删除逻辑。</p>
             </div>
           </article>
+          <article class="change">
+            <header>
+              <h3>2026-05-19 · ASR 客户端级超时硬化</h3>
+              <span class="tag violet">API</span>
+              <span class="tag cyan">Ops</span>
+            </header>
+            <div class="body">
+              <p><strong>问题：</strong>云端全流程中远端 <code>whisper-1</code> 转录可能长期停在“转录中”，页面没有 transcript，也不会进入后续分镜。</p>
+              <p><strong>改动：</strong><code>api/main.py</code> 的远端 ASR、Gemini 音频转写兜底和音频画像调用改为 <code>llm().with_options(timeout=ASR_TIMEOUT_SECONDS)</code> 后再发起请求，避免只在调用参数里传 <code>timeout</code> 时被兼容网关或 SDK 路径忽略。</p>
+              <p><strong>影响：</strong>远端 ASR 超时后应进入本机 ASR 或多模态音频兜底，并最终给出成功 transcript 或明确失败状态；不会无限占用“音频文案”步骤。</p>
+            </div>
+          </article>
           <article class="change">
             <header>
               <h3>2026-05-19 · 云端 TikTok cookies 文件挂载</h3>