diff --git a/api/main.py b/api/main.py
index 9bb3869..61dcf67 100644
--- a/api/main.py
+++ b/api/main.py
@@ -2790,14 +2790,13 @@ def _transcribe_gemini_sync(wav: Path) -> list[dict]:
last_error: Exception | None = None
for attempt in range(3):
try:
- resp = llm().chat.completions.create(
+ resp = llm().with_options(timeout=ASR_TIMEOUT_SECONDS).chat.completions.create(
model=ASR_FALLBACK_MODEL,
messages=[{"role": "user", "content": [
{"type": "text", "text": prompt},
{"type": "input_audio", "input_audio": {"data": audio_b64, "format": "wav"}},
]}],
temperature=0,
- timeout=ASR_TIMEOUT_SECONDS,
)
content = (resp.choices[0].message.content or "").strip()
return _validate_asr_segments(_parse_asr_segments(content, duration), duration, "gemini audio fallback")
@@ -2814,12 +2813,11 @@ def _transcribe_sync(wav: Path) -> list[dict]:
duration = media_duration(wav)
try:
with wav.open("rb") as f:
- resp = llm().audio.transcriptions.create(
+ resp = llm().with_options(timeout=ASR_TIMEOUT_SECONDS).audio.transcriptions.create(
file=(wav.name, f, "audio/wav"),
model=ASR_MODEL,
response_format="verbose_json",
timestamp_granularities=["segment"],
- timeout=ASR_TIMEOUT_SECONDS,
)
raw = resp.model_dump() if hasattr(resp, "model_dump") else resp
segments = raw.get("segments") or []
@@ -2978,7 +2976,7 @@ def _audio_profile_model_sync(wav: Path, segments: list[TranscriptSegment], targ
last_error: Exception | None = None
for attempt in range(2):
try:
- resp = llm().chat.completions.create(
+ resp = llm().with_options(timeout=ASR_TIMEOUT_SECONDS).chat.completions.create(
model=ASR_FALLBACK_MODEL,
messages=[{"role": "user", "content": [
{"type": "text", "text": prompt},
@@ -2987,7 +2985,6 @@ def _audio_profile_model_sync(wav: Path, segments: list[TranscriptSegment], targ
response_format={"type": "json_object"},
temperature=0.1,
max_tokens=900,
- timeout=ASR_TIMEOUT_SECONDS,
)
content = (resp.choices[0].message.content or "").strip()
data = json.loads(content)
diff --git a/docs/source-analysis.html b/docs/source-analysis.html
index dc8d546..582260e 100644
--- a/docs/source-analysis.html
+++ b/docs/source-analysis.html
@@ -1238,6 +1238,18 @@ ProductRefStateItem {
影响:只改变工作台视觉模式,不改变素材下载、音频解析、抽帧、主体模板、产品素材池、首尾帧或模型链路;web/app/page.tsx 同步移除旧全局浮动主题按钮,避免右下角出现第二套不相关的主题入口。后续新增图片/视频板块仍应复用同一套媒体悬停放大和删除逻辑。
+
+
+ 2026-05-19 · ASR 客户端级超时硬化
+ API
+ Ops
+
+
+
问题:云端全流程中远端 whisper-1 转录可能长期停在“转录中”,页面没有 transcript,也不会进入后续分镜。
+
改动:api/main.py 的远端 ASR、Gemini 音频转写兜底和音频画像调用改为 llm().with_options(timeout=ASR_TIMEOUT_SECONDS) 后再发起请求,避免只在调用参数里传 timeout 时被兼容网关或 SDK 路径忽略。
+
影响:远端 ASR 超时后应进入本机 ASR 或多模态音频兜底,并最终给出成功 transcript 或明确失败状态;不会无限占用“音频文案”步骤。
+
+
2026-05-19 · 云端 TikTok cookies 文件挂载