From 8bd52f676a11fb4364c82f6ecccf535be1d8c373 Mon Sep 17 00:00:00 2001
From: kang <wankang2050@gmail.com>
Date: Thu, 14 May 2026 10:53:54 +0800
Subject: [PATCH] auto-save 2026-05-14 10:51 (~7)

---
 .memory/worklog.json           | 40 ++++++++++++++++------------------
 api/README.md                  |  1 +
 api/main.py                    | 12 +++++++---
 docs/source-analysis.html      | 25 +++++++++++++++++++++
 web/app/page.tsx               | 35 +++++++++++++++++++++++++++--
 web/components/audio-strip.tsx |  2 +-
 web/components/nodes/index.tsx | 17 ++++++++++++++-
 7 files changed, 104 insertions(+), 28 deletions(-)

diff --git a/.memory/worklog.json b/.memory/worklog.json
index 6f66e57..d593210 100644
--- a/.memory/worklog.json
+++ b/.memory/worklog.json
@@ -1,26 +1,5 @@
 {
   "entries": [
-    {
-      "files_changed": 1,
-      "hash": "6128084",
-      "message": "auto-save 2026-05-13 01:48 (~1)",
-      "ts": "2026-05-13T01:50:21+08:00",
-      "type": "commit"
-    },
-    {
-      "files_changed": 1,
-      "hash": "1646390",
-      "message": "auto-save 2026-05-13 01:56 (~1)",
-      "ts": "2026-05-13T01:56:16+08:00",
-      "type": "commit"
-    },
-    {
-      "files_changed": 1,
-      "hash": "7fd0057",
-      "message": "auto-save 2026-05-13 02:01 (~1)",
-      "ts": "2026-05-13T02:02:13+08:00",
-      "type": "commit"
-    },
     {
       "files_changed": 1,
       "hash": "f4a421b",
@@ -3311,6 +3290,25 @@
       "message": "auto-save 2026-05-14 10:36 (~5)",
       "hash": "1014114",
       "files_changed": 5
+    },
+    {
+      "ts": "2026-05-14T10:45:48+08:00",
+      "type": "commit",
+      "message": "auto-save 2026-05-14 10:45 (+1, ~5)",
+      "hash": "d0abed6",
+      "files_changed": 6
+    },
+    {
+      "ts": "2026-05-14T02:46:10Z",
+      "type": "session-heartbeat",
+      "message": "Codex 会话活跃 · 最近命令：codex · 3 项未提交变更 · 最近提交：auto-save 2026-05-14 10:45 (+1, ~5)",
+      "files_changed": 3
+    },
+    {
+      "ts": "2026-05-14T02:48:38Z",
+      "type": "session-heartbeat",
+      "message": "Codex 会话活跃 · 最近命令：codex · 5 项未提交变更 · 最近提交：auto-save 2026-05-14 10:45 (+1, ~5)",
+      "files_changed": 5
     }
   ]
 }
diff --git a/api/README.md b/api/README.md
index 376568a..c4468f8 100644
--- a/api/README.md
+++ b/api/README.md
@@ -20,6 +20,7 @@ uvicorn main:app --port 4291 --reload
 - `GET  /jobs/{id}` — 当前状态 + 产物
 - `POST /jobs/{id}/transcribe` — 触发 ASR + 翻译 + SKG 文案改写；配置 MiniMax 后生成配音
 - `GET  /jobs/{id}/video.mp4` — 原视频
+- `GET  /jobs/{id}/audio.wav` — 拆轨后的原始音频，供前端底部音频条生成波形
 - `GET  /jobs/{id}/audio-script.mp3` — 改写文案的 MiniMax 配音
 - `GET  /jobs/{id}/frames/{i}.jpg` — 第 i 张关键帧（0-9）
 
diff --git a/api/main.py b/api/main.py
index 4ec88be..a308bd4 100644
--- a/api/main.py
+++ b/api/main.py
@@ -565,7 +565,7 @@ async def lifespan(_: FastAPI):
                 source_exists = (p / "source.mp4").exists()
                 if job.status in {"created", "downloading"}:
                     if source_exists:
-                        update(job, status="downloaded", progress=25, message="服务重启 · 视频已恢复，可重新解析")
+                        update(job, status="downloaded", progress=25, error="", message="服务重启 · 视频已恢复，可重新解析")
                     else:
                         update(job, status="failed", message="服务重启 · 下载任务已中断，请重新提交")
                 elif job.status == "splitting":
@@ -573,6 +573,7 @@ async def lifespan(_: FastAPI):
                         job,
                         status="frames_extracted" if job.frames else "downloaded",
                         progress=70 if job.frames else 25,
+                        error="",
                         message="服务重启 · 上次抽帧已中断，可重新抽帧",
                     )
                 elif job.status == "transcribing":
@@ -587,6 +588,7 @@ async def lifespan(_: FastAPI):
                         job,
                         status="frames_extracted",
                         progress=70,
+                        error="",
                         audio_script=audio_script,
                         message="服务重启 · 上次音频处理已中断，可重新处理",
                     )
@@ -1182,6 +1184,7 @@ def pipeline_download(job_id: str) -> None:
             width=int(v_stream["width"]) if v_stream else 0,
             height=int(v_stream["height"]) if v_stream else 0,
             progress=25,
+            error="",
             message=f"视频就绪 · {duration:.1f}s · 等待解析",
         )
     except Exception as e:
@@ -1333,6 +1336,7 @@ def pipeline_analyze(
             status="frames_extracted",
             frames=merged_frames,
             progress=70,
+            error="",
             message=final_message,
         )
 
@@ -1928,6 +1932,7 @@ async def trigger_analyze(
         job,
         status="splitting",
         progress=30,
+        error="",
         message="排队等待抽帧" if ANALYZE_WORKER_RUNNING or position > 1 else "准备抽帧…",
     )
     if not ANALYZE_WORKER_RUNNING:
@@ -1999,8 +2004,9 @@ async def trigger_transcribe(job_id: str, bg: BackgroundTasks) -> Job:
     job = JOBS.get(job_id)
     if not job:
         raise HTTPException(404, "job not found")
-    if job.status != "frames_extracted":
-        raise HTTPException(409, f"status must be frames_extracted, got {job.status}")
+    if job.status not in {"frames_extracted", "transcribed", "failed"}:
+        raise HTTPException(409, f"status must be frames_extracted/transcribed/failed, got {job.status}")
+    update(job, status="transcribing", progress=max(job.progress, 72), error="", message="准备音频转写…")
     bg.add_task(pipeline_transcribe, job_id)
     return job
 
diff --git a/docs/source-analysis.html b/docs/source-analysis.html
index cd876f6..0f13a5f 100644
--- a/docs/source-analysis.html
+++ b/docs/source-analysis.html
@@ -918,6 +918,18 @@ SubjectAsset {
         <h2>变更记录</h2>
         <p>这个记录不是 git log 的替代品。它记录“产品理解发生了什么变化、影响了哪些源码、你以后描述需求时该怎么说”。后续每次改功能都要补一条。</p>
         <div class="changelog">
+          <article class="change">
+            <header>
+              <h3>2026-05-14 · 新增底部可伸缩音频条</h3>
+              <span class="tag gray">Audio</span>
+              <span class="tag violet">Timeline</span>
+            </header>
+            <div class="body">
+              <p><strong>问题：</strong>音频和文案只在节点或侧栏里展示，审核时缺少“文字和声音时间轴对应”的空间；英文口播和中文翻译也没有上下对齐。</p>
+              <p><strong>改动：</strong>新增 <code>web/components/audio-strip.tsx</code>，在主工作台底部吸附显示，可拖拽调整高度、可收起。每个音频段按时间横向排列，上方显示英文，中间显示中文翻译，下方显示对应波形条；右侧显示改写后的 SKG 口播、MiniMax 配音和产品依据。后端新增 <code>GET /jobs/{id}/audio.wav</code> 只读接口，前端用 Web Audio API 解码生成波形峰值。</p>
+              <p><strong>影响：</strong><code>web/app/page.tsx</code>、<code>web/components/audio-strip.tsx</code>、<code>web/lib/api.ts</code>、<code>api/main.py</code>、<code>docs/source-analysis.html</code>。</p>
+            </div>
+          </article>
           <article class="change">
             <header>
               <h3>2026-05-14 · 音频结果改为改前/改后对照展示</h3>
@@ -942,6 +954,19 @@ SubjectAsset {
               <p><strong>影响：</strong><code>web/app/page.tsx</code>、<code>docs/source-analysis.html</code>。后端轮询本来已经覆盖所有运行中的 job，这轮主要修正前端 UI 工作上下文。</p>
             </div>
           </article>
+          <article class="change">
+            <header>
+              <h3>2026-05-14 · 抽帧后台任务不再卡住 API</h3>
+              <span class="tag blue">API</span>
+              <span class="tag orange">抽帧</span>
+            </header>
+            <div class="body">
+              <p><strong>问题：</strong>点击视频抽帧时，后端 4291 端口能连接但 <code>/health</code> 和后续请求长时间不返回，前端看起来像按钮没有反应。</p>
+              <p><strong>原因：</strong><code>pipeline_download</code> 和 <code>pipeline_analyze</code> 声明为 async background task，但内部实际是同步 <code>yt-dlp</code>、<code>ffmpeg</code> 和 Vision 验收；Starlette 会在事件循环里执行 async background task，导致长抽帧把 API 主循环堵住。</p>
+              <p><strong>改动：</strong>下载和抽帧 pipeline 改为普通同步函数，让 FastAPI/Starlette 按线程池后台任务执行；<code>analyze_queue_worker</code> 也改为同步 worker。服务启动恢复时，如果磁盘里有重启前遗留的 <code>downloading</code>、<code>splitting</code> 或 <code>transcribing</code> 运行态，会恢复成可重试状态，避免按钮一直 disabled。</p>
+              <p><strong>影响：</strong><code>api/main.py</code>、<code>docs/source-analysis.html</code>。已重启本地 4291 后端并验证 <code>/health</code> 立即返回；遗留的 <code>8b37e65521a6</code> job 已恢复为 <code>downloaded</code>，可重新点击抽帧。</p>
+            </div>
+          </article>
           <article class="change">
             <header>
               <h3>2026-05-14 · 生视频接入 SKG 豆包网关</h3>
diff --git a/web/app/page.tsx b/web/app/page.tsx
index 0caff3f..a4ac1b3 100644
--- a/web/app/page.tsx
+++ b/web/app/page.tsx
@@ -19,7 +19,7 @@ import { ThemeToggle } from "@/components/theme-toggle"
 import { AudioStrip } from "@/components/audio-strip"
 import {
   addManualFrame, analyzeJob, createJob, getJob, listJobs, uploadJob, deleteJob, deleteFrame, deleteGeneratedImage,
-  deleteGeneratedVideo, deleteCutout, generateStoryboardVideo, createProductFusionGuide,
+  deleteGeneratedVideo, deleteCutout, generateStoryboardVideo, createProductFusionGuide, triggerTranscribe,
   type Job, type ImageRef, type ProductFusionShot, type StoryboardScene, type FrameExtractMode, type FrameExtractQuality, type FrameExtractTarget,
 } from "@/lib/api"
 import { TRANSPARENT_HUMAN_NEGATIVE_PROMPT, TRANSPARENT_HUMAN_VIDEO_PROMPT } from "@/lib/workflow-target"
@@ -385,6 +385,36 @@ export default function Home() {
     toast.success(`已复制：${ref.label || (ref.kind === "keyframe" ? "关键帧" : "元素")} · 到分镜头编排工作台粘贴`)
   }, [])
 
+  const handleTranscribeAudio = useCallback(async (jobId?: string, options?: { silent?: boolean }) => {
+    const targetId = jobId ?? activeJobId
+    if (!targetId) return
+    const target = jobs.find((item) => item.id === targetId)
+    if (!target) return
+    if (!["frames_extracted", "transcribed", "failed"].includes(target.status)) {
+      if (!options?.silent) toast.info("先完成抽帧，音频轨会自动开始处理")
+      return
+    }
+    try {
+      const updated = await triggerTranscribe(targetId)
+      updateJobInList(updated)
+      if (!options?.silent) toast.success("音频处理已开始")
+    } catch (e) {
+      if (!options?.silent) toast.error("音频处理启动失败：" + (e instanceof Error ? e.message : String(e)))
+    }
+  }, [activeJobId, jobs, updateJobInList])
+
+  const autoAudioStartedRef = useRef<Set<string>>(new Set())
+  useEffect(() => {
+    for (const item of jobs) {
+      const audioStatus = item.audio_script?.status ?? "idle"
+      const hasAudioOutput = item.transcript.length > 0 || !!item.audio_script?.rewritten_text
+      const ready = item.status === "frames_extracted" && !hasAudioOutput && audioStatus !== "rewriting"
+      if (!ready || autoAudioStartedRef.current.has(item.id)) continue
+      autoAudioStartedRef.current.add(item.id)
+      void handleTranscribeAudio(item.id, { silent: true })
+    }
+  }, [jobs, handleTranscribeAudio])
+
   const handleQuickGenerateVideo = useCallback(async (frameIdx: number, scene: StoryboardScene, model: string) => {
     if (!job) return
     const frame = job.frames.find((f) => f.index === frameIdx)
@@ -696,9 +726,10 @@ export default function Home() {
     clipboard,
     onCopyImage: handleCopyImage,
     onGenerateProductFusionVideo: handleGenerateProductFusionVideo,
+    onTranscribeAudio: handleTranscribeAudio,
     pinnedNodes,
     onToggleNodePin: handleToggleNodePin,
-  }), [job, jobs, activeJobId, submitting, analyzing, frameTargets, frameCounts, frameQualities, selectedFrames, expandedFrame, framePanelScale, framePanelPinned, framePanelDock, videoPanelJobId, videoPanelScale, videoPanelDock, handleSubmit, handleUpload, handleAnalyze, handleAnalyzeJob, handleFrameTargetChange, handleFrameCountChange, handleFrameQualityChange, handleToggleFrame, handleOpenFramePanel, handleFramePanelScaleChange, handleCloseExpandedFrame, handleAddManualFrame, handleAddManualFrameForJob, handleOpenVideoPanel, handleVideoPanelScaleChange, handleSwitchJob, updateJobInList, handleDeleteJob, handleDeleteFrame, handleDeleteFrameForJob, handleDeleteGenerated, handleDeleteVideo, handleDeleteCutout, handleOpenStoryboard, handleOpenWorkbench, clipboard, handleCopyImage, handleGenerateProductFusionVideo, pinnedNodes, handleToggleNodePin])
+  }), [job, jobs, activeJobId, submitting, analyzing, frameTargets, frameCounts, frameQualities, selectedFrames, expandedFrame, framePanelScale, framePanelPinned, framePanelDock, videoPanelJobId, videoPanelScale, videoPanelDock, handleSubmit, handleUpload, handleAnalyze, handleAnalyzeJob, handleFrameTargetChange, handleFrameCountChange, handleFrameQualityChange, handleToggleFrame, handleOpenFramePanel, handleFramePanelScaleChange, handleCloseExpandedFrame, handleAddManualFrame, handleAddManualFrameForJob, handleOpenVideoPanel, handleVideoPanelScaleChange, handleSwitchJob, updateJobInList, handleDeleteJob, handleDeleteFrame, handleDeleteFrameForJob, handleDeleteGenerated, handleDeleteVideo, handleDeleteCutout, handleOpenStoryboard, handleOpenWorkbench, clipboard, handleCopyImage, handleGenerateProductFusionVideo, handleTranscribeAudio, pinnedNodes, handleToggleNodePin])
 
   // 用 useNodesState 让 ReactFlow 自己管位置（避免轮询时重置 drag）
   const savedSizes = useMemo(() => loadNodeSizes(), [])
diff --git a/web/components/audio-strip.tsx b/web/components/audio-strip.tsx
index f0bbd12..0b2d16f 100644
--- a/web/components/audio-strip.tsx
+++ b/web/components/audio-strip.tsx
@@ -74,7 +74,7 @@ function SegmentCard({
     >
       <div className="mb-2 flex items-center justify-between gap-3">
         <span className="font-mono text-[10px] text-[var(--text-faint)]">
-          {segment.start.toFixed(1)}s -> {segment.end.toFixed(1)}s
+          {segment.start.toFixed(1)}s to {segment.end.toFixed(1)}s
         </span>
         <span className="rounded-full border border-white/10 px-2 py-0.5 text-[9.5px] uppercase tracking-widest text-[var(--text-faint)]">
           #{segment.index + 1}
diff --git a/web/components/nodes/index.tsx b/web/components/nodes/index.tsx
index e5ba888..579075b 100644
--- a/web/components/nodes/index.tsx
+++ b/web/components/nodes/index.tsx
@@ -11,7 +11,7 @@ import {
   Link2, Upload, Download, Scissors, Image as ImageIcon,
   Mic, Languages, FileEdit, Film, FileVideo, Loader2, Plus, X, LayoutGrid, Maximize2,
   Copy, Trash2, Move, PanelLeft, PanelRight, PanelBottom, ChevronLeft, ChevronRight, SlidersHorizontal,
-  CheckCircle2, AlertTriangle, Sparkles, Package,
+  CheckCircle2, AlertTriangle, Sparkles, Package, PlayCircle, RotateCcw,
 } from "lucide-react"
 import { toast } from "sonner"
 import { NodeShell, type NodeStatus, type NodeKind } from "./node-shell"
@@ -75,6 +75,7 @@ export interface NodeData {
   clipboard?: ImageRef | null
   onCopyImage?: (ref: ImageRef) => void  // 复制图片到全局剪贴板（粘贴到分镜头编排插槽）
   onGenerateProductFusionVideo?: (frameIdx: number, shot: ProductFusionShot) => Promise<void> | void
+  onTranscribeAudio?: (jobId?: string) => Promise<void> | void
   pinnedNodes?: Set<string>              // 已钉住的节点 id 集合 — 钉住后位置 + 尺寸锁定
   onToggleNodePin?: (id: string) => void
 }
@@ -2108,6 +2109,7 @@ export function AudioNode({ data, selected }: any) {
   const voiceUrl = apiAssetUrl(audioScript?.voice_url)
   const hasASR = transcript.length > 0
   const isRewriting = audioScript?.status === "rewriting"
+  const canTriggerAudio = !!job && ["frames_extracted", "transcribed", "failed"].includes(job.status) && !isRewriting && job.status !== "transcribing"
   const originalPreview = transcript
     .slice(0, 2)
     .map((s) => (s.zh || s.en).trim())
@@ -2137,6 +2139,19 @@ export function AudioNode({ data, selected }: any) {
             {audioScript?.rewrite_model || "AUDIO_REWRITE_MODEL"} → {audioScript?.voice_model || "MiniMax T2A"}
           </span>
         </div>
+        {canTriggerAudio && (
+          <button
+            type="button"
+            onClick={(e) => {
+              e.stopPropagation()
+              void d.onTranscribeAudio?.(job.id)
+            }}
+            className="inline-flex min-h-8 w-full items-center justify-center gap-1.5 rounded-md border border-violet-300/25 bg-violet-400/10 px-2.5 py-1.5 text-[11px] font-medium text-[var(--text-strong)] transition hover:border-violet-200/45 hover:bg-violet-400/18"
+          >
+            {hasASR || rewrittenText ? <RotateCcw className="h-3.5 w-3.5" /> : <PlayCircle className="h-3.5 w-3.5" />}
+            {hasASR || rewrittenText ? "重新处理音频" : "开始音频处理"}
+          </button>
+        )}
         {(originalPreview || rewrittenText) && (
           <div className="grid gap-2">
             {originalPreview && (