diff --git a/api/main.py b/api/main.py index 9bb3869..61dcf67 100644 --- a/api/main.py +++ b/api/main.py @@ -2790,14 +2790,13 @@ def _transcribe_gemini_sync(wav: Path) -> list[dict]: last_error: Exception | None = None for attempt in range(3): try: - resp = llm().chat.completions.create( + resp = llm().with_options(timeout=ASR_TIMEOUT_SECONDS).chat.completions.create( model=ASR_FALLBACK_MODEL, messages=[{"role": "user", "content": [ {"type": "text", "text": prompt}, {"type": "input_audio", "input_audio": {"data": audio_b64, "format": "wav"}}, ]}], temperature=0, - timeout=ASR_TIMEOUT_SECONDS, ) content = (resp.choices[0].message.content or "").strip() return _validate_asr_segments(_parse_asr_segments(content, duration), duration, "gemini audio fallback") @@ -2814,12 +2813,11 @@ def _transcribe_sync(wav: Path) -> list[dict]: duration = media_duration(wav) try: with wav.open("rb") as f: - resp = llm().audio.transcriptions.create( + resp = llm().with_options(timeout=ASR_TIMEOUT_SECONDS).audio.transcriptions.create( file=(wav.name, f, "audio/wav"), model=ASR_MODEL, response_format="verbose_json", timestamp_granularities=["segment"], - timeout=ASR_TIMEOUT_SECONDS, ) raw = resp.model_dump() if hasattr(resp, "model_dump") else resp segments = raw.get("segments") or [] @@ -2978,7 +2976,7 @@ def _audio_profile_model_sync(wav: Path, segments: list[TranscriptSegment], targ last_error: Exception | None = None for attempt in range(2): try: - resp = llm().chat.completions.create( + resp = llm().with_options(timeout=ASR_TIMEOUT_SECONDS).chat.completions.create( model=ASR_FALLBACK_MODEL, messages=[{"role": "user", "content": [ {"type": "text", "text": prompt}, @@ -2987,7 +2985,6 @@ def _audio_profile_model_sync(wav: Path, segments: list[TranscriptSegment], targ response_format={"type": "json_object"}, temperature=0.1, max_tokens=900, - timeout=ASR_TIMEOUT_SECONDS, ) content = (resp.choices[0].message.content or "").strip() data = json.loads(content) diff --git a/docs/source-analysis.html b/docs/source-analysis.html index dc8d546..582260e 100644 --- a/docs/source-analysis.html +++ b/docs/source-analysis.html @@ -1238,6 +1238,18 @@ ProductRefStateItem {

影响:只改变工作台视觉模式,不改变素材下载、音频解析、抽帧、主体模板、产品素材池、首尾帧或模型链路;web/app/page.tsx 同步移除旧全局浮动主题按钮,避免右下角出现第二套不相关的主题入口。后续新增图片/视频板块仍应复用同一套媒体悬停放大和删除逻辑。

+
+
+

2026-05-19 · ASR 客户端级超时硬化

+ API + Ops +
+
+

问题:云端全流程中远端 whisper-1 转录可能长期停在“转录中”,页面没有 transcript,也不会进入后续分镜。

+

改动:api/main.py 的远端 ASR、Gemini 音频转写兜底和音频画像调用改为 llm().with_options(timeout=ASR_TIMEOUT_SECONDS) 后再发起请求,避免只在调用参数里传 timeout 时被兼容网关或 SDK 路径忽略。

+

影响:远端 ASR 超时后应进入本机 ASR 或多模态音频兜底,并最终给出成功 transcript 或明确失败状态;不会无限占用“音频文案”步骤。

+
+

2026-05-19 · 云端 TikTok cookies 文件挂载