diff --git a/.memory/worklog.json b/.memory/worklog.json
index a9e32a3..829da85 100644
--- a/.memory/worklog.json
+++ b/.memory/worklog.json
@@ -1,11 +1,5 @@
 {
   "entries": [
-    {
-      "files_changed": 1,
-      "message": "Codex 会话活跃 · 最近命令：codex · 1 项未提交变更 · 最近提交：auto-save 2026-05-14 18:16 (~1)",
-      "ts": "2026-05-14T10:18:43Z",
-      "type": "session-heartbeat"
-    },
     {
       "files_changed": 1,
       "hash": "38ca0f2",
@@ -3269,6 +3263,13 @@
       "type": "session-heartbeat",
       "message": "Codex 会话活跃 · 最近命令：codex · 分支 main · 2 项未提交变更 · 最近提交：fix: recover media intake and remove audio strip",
       "files_changed": 2
+    },
+    {
+      "ts": "2026-05-17T13:50:47+08:00",
+      "type": "commit",
+      "message": "auto-save 2026-05-17 13:50 (~2)",
+      "hash": "5a87149",
+      "files_changed": 2
     }
   ]
 }
diff --git a/RULES.md b/RULES.md
index 5079e58..97c8666 100644
--- a/RULES.md
+++ b/RULES.md
@@ -52,8 +52,9 @@
 ## 环境变量
 - `LLM_BASE_URL` / `LLM_API_KEY`：OpenAI 兼容网关，用于 ASR、翻译、文案改写、图像等模型调用
 - `ASR_MODEL`：OpenAI Audio Transcriptions 音频转写模型，默认 `whisper-1`
-- `ASR_FALLBACK_MODEL`：当当前网关没有 `/audio/transcriptions` 时，用 Gemini 多模态 chat 直接识别 wav，默认 `gemini-2.5-flash`
-- `ASR_TIMEOUT_SECONDS`：ASR 单次请求超时，默认 45 秒；`whisper-1` 超时后自动走 `ASR_FALLBACK_MODEL`，避免第一步长时间停在转录中
+- `ASR_FALLBACK_MODEL`：远端 ASR 和本机 ASR 都不可用时才尝试的多模态兜底，默认 `gemini-2.5-flash`；如果模型不能真实听到音频或返回疑似逐秒假字幕，后端必须拒绝写入时间轴
+- `ASR_TIMEOUT_SECONDS`：远端 ASR / 音频分析单次请求超时，默认 45 秒，避免第一步长时间停在转录中
+- `LOCAL_ASR_BIN` / `LOCAL_ASR_MODEL` / `LOCAL_ASR_TIMEOUT_SECONDS`：本机 ASR 兜底，默认使用 `/opt/homebrew/bin/mlx_whisper` + `mlx-community/whisper-tiny`，用于当前 SKG 网关 `/audio/transcriptions` 不可用时生成真实逐句时间轴
 - `TRANSLATE_MODEL`：字幕翻译模型，默认 `gemini-2.5-flash`
 - `REWRITE_MODEL`：通用改写/分镜描述模型，默认 `gemini-2.5-pro`
 - `AUDIO_REWRITE_MODEL`：后续音频口播改写模型，默认跟随 `REWRITE_MODEL`；当前第一步不默认调用口播改写，只保留原文案和声音分析
diff --git a/api/.env.example b/api/.env.example
index 46ca140..c16f8de 100644
--- a/api/.env.example
+++ b/api/.env.example
@@ -12,6 +12,10 @@ WEB_AUTH_COOKIE_SECURE=false
 # 模型分工
 ASR_MODEL=whisper-1
 ASR_FALLBACK_MODEL=gemini-2.5-flash
+ASR_TIMEOUT_SECONDS=45
+LOCAL_ASR_BIN=/opt/homebrew/bin/mlx_whisper
+LOCAL_ASR_MODEL=mlx-community/whisper-tiny
+LOCAL_ASR_TIMEOUT_SECONDS=180
 TRANSLATE_MODEL=gemini-2.5-flash
 REWRITE_MODEL=gemini-2.5-pro
 IMAGE_MODEL=gemini-3-pro-image-preview
diff --git a/api/main.py b/api/main.py
index 93e9300..48fbf43 100644
--- a/api/main.py
+++ b/api/main.py
@@ -1793,6 +1793,8 @@ def _validate_asr_segments(segments: list[dict], duration: float, source: str) -
             raise TranscriptionUnavailable(f"{source} returned timestamps outside audio duration")
         if duration > 10 and last_end < duration * 0.45 and words < 20:
             raise TranscriptionUnavailable(f"{source} returned too little transcript coverage")
+    for item in clean:
+        item["_source"] = source
     return clean
 
 
@@ -2064,6 +2066,7 @@ def _audio_profile_model_sync(wav: Path, segments: list[TranscriptSegment], targ
                 response_format={"type": "json_object"},
                 temperature=0.1,
                 max_tokens=900,
+                timeout=ASR_TIMEOUT_SECONDS,
             )
             content = (resp.choices[0].message.content or "").strip()
             data = json.loads(content)
@@ -2322,6 +2325,7 @@ def pipeline_transcribe(job_id: str, manage_job_status: bool = True) -> None:
         segments = _transcribe_sync(wav)
         if not segments:
             raise TranscriptionUnavailable("ASR 未返回可用字幕段")
+        asr_source = str(segments[0].get("_source") or ASR_MODEL)
 
         # 先把英文段落落到 job 上（让 UI 提前看到，翻译再补 zh）
         en_only = [
@@ -2368,7 +2372,7 @@ def pipeline_transcribe(job_id: str, manage_job_status: bool = True) -> None:
         if manage_job_status:
             update(job, transcript=full, status="transcribed", progress=100,
                    audio_script=audio_script,
-                   message=f"音频解析完成 · {len(full)} 段（{ASR_MODEL} + {TRANSLATE_MODEL} + {ASR_FALLBACK_MODEL} 音频分析）")
+                   message=f"音频解析完成 · {len(full)} 段（{asr_source} + {TRANSLATE_MODEL} + {ASR_FALLBACK_MODEL} 音频分析）")
         else:
             update(job, transcript=full, audio_script=audio_script)