fix: enforce asr client timeout

This commit is contained in:
2026-05-19 09:27:47 +08:00
parent e5652c463c
commit 9a4268281e
2 changed files with 15 additions and 6 deletions

View File

@@ -2790,14 +2790,13 @@ def _transcribe_gemini_sync(wav: Path) -> list[dict]:
last_error: Exception | None = None
for attempt in range(3):
try:
resp = llm().chat.completions.create(
resp = llm().with_options(timeout=ASR_TIMEOUT_SECONDS).chat.completions.create(
model=ASR_FALLBACK_MODEL,
messages=[{"role": "user", "content": [
{"type": "text", "text": prompt},
{"type": "input_audio", "input_audio": {"data": audio_b64, "format": "wav"}},
]}],
temperature=0,
timeout=ASR_TIMEOUT_SECONDS,
)
content = (resp.choices[0].message.content or "").strip()
return _validate_asr_segments(_parse_asr_segments(content, duration), duration, "gemini audio fallback")
@@ -2814,12 +2813,11 @@ def _transcribe_sync(wav: Path) -> list[dict]:
duration = media_duration(wav)
try:
with wav.open("rb") as f:
resp = llm().audio.transcriptions.create(
resp = llm().with_options(timeout=ASR_TIMEOUT_SECONDS).audio.transcriptions.create(
file=(wav.name, f, "audio/wav"),
model=ASR_MODEL,
response_format="verbose_json",
timestamp_granularities=["segment"],
timeout=ASR_TIMEOUT_SECONDS,
)
raw = resp.model_dump() if hasattr(resp, "model_dump") else resp
segments = raw.get("segments") or []
@@ -2978,7 +2976,7 @@ def _audio_profile_model_sync(wav: Path, segments: list[TranscriptSegment], targ
last_error: Exception | None = None
for attempt in range(2):
try:
resp = llm().chat.completions.create(
resp = llm().with_options(timeout=ASR_TIMEOUT_SECONDS).chat.completions.create(
model=ASR_FALLBACK_MODEL,
messages=[{"role": "user", "content": [
{"type": "text", "text": prompt},
@@ -2987,7 +2985,6 @@ def _audio_profile_model_sync(wav: Path, segments: list[TranscriptSegment], targ
response_format={"type": "json_object"},
temperature=0.1,
max_tokens=900,
timeout=ASR_TIMEOUT_SECONDS,
)
content = (resp.choices[0].message.content or "").strip()
data = json.loads(content)

View File

@@ -1238,6 +1238,18 @@ ProductRefStateItem {
<p><strong>影响:</strong>只改变工作台视觉模式,不改变素材下载、音频解析、抽帧、主体模板、产品素材池、首尾帧或模型链路;<code>web/app/page.tsx</code> 同步移除旧全局浮动主题按钮,避免右下角出现第二套不相关的主题入口。后续新增图片/视频板块仍应复用同一套媒体悬停放大和删除逻辑。</p>
</div>
</article>
<article class="change">
<header>
<h3>2026-05-19 · ASR 客户端级超时硬化</h3>
<span class="tag violet">API</span>
<span class="tag cyan">Ops</span>
</header>
<div class="body">
<p><strong>问题:</strong>云端全流程中远端 <code>whisper-1</code> 转录可能长期停在“转录中”,页面没有 transcript也不会进入后续分镜。</p>
<p><strong>改动:</strong><code>api/main.py</code> 的远端 ASR、Gemini 音频转写兜底和音频画像调用改为 <code>llm().with_options(timeout=ASR_TIMEOUT_SECONDS)</code> 后再发起请求,避免只在调用参数里传 <code>timeout</code> 时被兼容网关或 SDK 路径忽略。</p>
<p><strong>影响:</strong>远端 ASR 超时后应进入本机 ASR 或多模态音频兜底,并最终给出成功 transcript 或明确失败状态;不会无限占用“音频文案”步骤。</p>
</div>
</article>
<article class="change">
<header>
<h3>2026-05-19 · 云端 TikTok cookies 文件挂载</h3>