From 6d684e058f8ac938c0d6f218c3916205a1fe22d2 Mon Sep 17 00:00:00 2001
From: kang <wankang2050@gmail.com>
Date: Sun, 17 May 2026 13:23:56 +0800
Subject: [PATCH] auto-save 2026-05-17 13:23 (~2)

---
 .memory/worklog.json | 14 +++++++-------
 api/main.py          |  3 +++
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/.memory/worklog.json b/.memory/worklog.json
index a48fdb5..d6f6c78 100644
--- a/.memory/worklog.json
+++ b/.memory/worklog.json
@@ -1,12 +1,5 @@
 {
   "entries": [
-    {
-      "files_changed": 1,
-      "hash": "e576fa1",
-      "message": "auto-save 2026-05-14 18:05 (~1)",
-      "ts": "2026-05-14T18:05:20+08:00",
-      "type": "commit"
-    },
     {
       "files_changed": 1,
       "message": "Codex 会话活跃 · 最近命令：codex · 1 项未提交变更 · 最近提交：auto-save 2026-05-14 18:05 (~1)",
@@ -3269,6 +3262,13 @@
       "type": "session-heartbeat",
       "message": "Codex 会话活跃 · 最近命令：codex · 分支 main · 5 项未提交变更 · 最近提交：auto-save 2026-05-17 13:13 (~6)",
       "files_changed": 5
+    },
+    {
+      "ts": "2026-05-17T13:18:29+08:00",
+      "type": "commit",
+      "message": "auto-save 2026-05-17 13:18 (~5)",
+      "hash": "f2742df",
+      "files_changed": 5
     }
   ]
 }
diff --git a/api/main.py b/api/main.py
index 9a69e03..a9b4ad3 100644
--- a/api/main.py
+++ b/api/main.py
@@ -43,6 +43,7 @@ LLM_BASE_URL = os.getenv("LLM_BASE_URL", "").strip()
 LLM_API_KEY = os.getenv("LLM_API_KEY", "").strip()
 ASR_MODEL = os.getenv("ASR_MODEL", "whisper-1")
 ASR_FALLBACK_MODEL = os.getenv("ASR_FALLBACK_MODEL", "gemini-2.5-flash").strip() or "gemini-2.5-flash"
+ASR_TIMEOUT_SECONDS = max(15, int(os.getenv("ASR_TIMEOUT_SECONDS", "45")))
 TRANSLATE_MODEL = os.getenv("TRANSLATE_MODEL", "gemini-2.5-flash")
 REWRITE_MODEL = os.getenv("REWRITE_MODEL", "gemini-2.5-pro")
 VISION_MODEL = os.getenv("VISION_MODEL", "gemini-2.5-flash")
@@ -1752,6 +1753,7 @@ def _transcribe_gemini_sync(wav: Path) -> list[dict]:
                     {"type": "input_audio", "input_audio": {"data": audio_b64, "format": "wav"}},
                 ]}],
                 temperature=0,
+                timeout=ASR_TIMEOUT_SECONDS,
             )
             content = (resp.choices[0].message.content or "").strip()
             return _parse_asr_segments(content, duration)
@@ -1771,6 +1773,7 @@ def _transcribe_sync(wav: Path) -> list[dict]:
                 model=ASR_MODEL,
                 response_format="verbose_json",
                 timestamp_granularities=["segment"],
+                timeout=ASR_TIMEOUT_SECONDS,
             )
         raw = resp.model_dump() if hasattr(resp, "model_dump") else resp
         segments = raw.get("segments") or []