From 8bd52f676a11fb4364c82f6ecccf535be1d8c373 Mon Sep 17 00:00:00 2001 From: kang Date: Thu, 14 May 2026 10:53:54 +0800 Subject: [PATCH] auto-save 2026-05-14 10:51 (~7) --- .memory/worklog.json | 40 ++++++++++++++++------------------ api/README.md | 1 + api/main.py | 12 +++++++--- docs/source-analysis.html | 25 +++++++++++++++++++++ web/app/page.tsx | 35 +++++++++++++++++++++++++++-- web/components/audio-strip.tsx | 2 +- web/components/nodes/index.tsx | 17 ++++++++++++++- 7 files changed, 104 insertions(+), 28 deletions(-) diff --git a/.memory/worklog.json b/.memory/worklog.json index 6f66e57..d593210 100644 --- a/.memory/worklog.json +++ b/.memory/worklog.json @@ -1,26 +1,5 @@ { "entries": [ - { - "files_changed": 1, - "hash": "6128084", - "message": "auto-save 2026-05-13 01:48 (~1)", - "ts": "2026-05-13T01:50:21+08:00", - "type": "commit" - }, - { - "files_changed": 1, - "hash": "1646390", - "message": "auto-save 2026-05-13 01:56 (~1)", - "ts": "2026-05-13T01:56:16+08:00", - "type": "commit" - }, - { - "files_changed": 1, - "hash": "7fd0057", - "message": "auto-save 2026-05-13 02:01 (~1)", - "ts": "2026-05-13T02:02:13+08:00", - "type": "commit" - }, { "files_changed": 1, "hash": "f4a421b", @@ -3311,6 +3290,25 @@ "message": "auto-save 2026-05-14 10:36 (~5)", "hash": "1014114", "files_changed": 5 + }, + { + "ts": "2026-05-14T10:45:48+08:00", + "type": "commit", + "message": "auto-save 2026-05-14 10:45 (+1, ~5)", + "hash": "d0abed6", + "files_changed": 6 + }, + { + "ts": "2026-05-14T02:46:10Z", + "type": "session-heartbeat", + "message": "Codex 会话活跃 · 最近命令:codex · 3 项未提交变更 · 最近提交:auto-save 2026-05-14 10:45 (+1, ~5)", + "files_changed": 3 + }, + { + "ts": "2026-05-14T02:48:38Z", + "type": "session-heartbeat", + "message": "Codex 会话活跃 · 最近命令:codex · 5 项未提交变更 · 最近提交:auto-save 2026-05-14 10:45 (+1, ~5)", + "files_changed": 5 } ] } diff --git a/api/README.md b/api/README.md index 376568a..c4468f8 100644 --- a/api/README.md +++ b/api/README.md @@ -20,6 +20,7 @@ uvicorn main:app --port 4291 --reload - `GET /jobs/{id}` — 当前状态 + 产物 - `POST /jobs/{id}/transcribe` — 触发 ASR + 翻译 + SKG 文案改写;配置 MiniMax 后生成配音 - `GET /jobs/{id}/video.mp4` — 原视频 +- `GET /jobs/{id}/audio.wav` — 拆轨后的原始音频,供前端底部音频条生成波形 - `GET /jobs/{id}/audio-script.mp3` — 改写文案的 MiniMax 配音 - `GET /jobs/{id}/frames/{i}.jpg` — 第 i 张关键帧(0-9) diff --git a/api/main.py b/api/main.py index 4ec88be..a308bd4 100644 --- a/api/main.py +++ b/api/main.py @@ -565,7 +565,7 @@ async def lifespan(_: FastAPI): source_exists = (p / "source.mp4").exists() if job.status in {"created", "downloading"}: if source_exists: - update(job, status="downloaded", progress=25, message="服务重启 · 视频已恢复,可重新解析") + update(job, status="downloaded", progress=25, error="", message="服务重启 · 视频已恢复,可重新解析") else: update(job, status="failed", message="服务重启 · 下载任务已中断,请重新提交") elif job.status == "splitting": @@ -573,6 +573,7 @@ async def lifespan(_: FastAPI): job, status="frames_extracted" if job.frames else "downloaded", progress=70 if job.frames else 25, + error="", message="服务重启 · 上次抽帧已中断,可重新抽帧", ) elif job.status == "transcribing": @@ -587,6 +588,7 @@ async def lifespan(_: FastAPI): job, status="frames_extracted", progress=70, + error="", audio_script=audio_script, message="服务重启 · 上次音频处理已中断,可重新处理", ) @@ -1182,6 +1184,7 @@ def pipeline_download(job_id: str) -> None: width=int(v_stream["width"]) if v_stream else 0, height=int(v_stream["height"]) if v_stream else 0, progress=25, + error="", message=f"视频就绪 · {duration:.1f}s · 等待解析", ) except Exception as e: @@ -1333,6 +1336,7 @@ def pipeline_analyze( status="frames_extracted", frames=merged_frames, progress=70, + error="", message=final_message, ) @@ -1928,6 +1932,7 @@ async def trigger_analyze( job, status="splitting", progress=30, + error="", message="排队等待抽帧" if ANALYZE_WORKER_RUNNING or position > 1 else "准备抽帧…", ) if not ANALYZE_WORKER_RUNNING: @@ -1999,8 +2004,9 @@ async def trigger_transcribe(job_id: str, bg: BackgroundTasks) -> Job: job = JOBS.get(job_id) if not job: raise HTTPException(404, "job not found") - if job.status != "frames_extracted": - raise HTTPException(409, f"status must be frames_extracted, got {job.status}") + if job.status not in {"frames_extracted", "transcribed", "failed"}: + raise HTTPException(409, f"status must be frames_extracted/transcribed/failed, got {job.status}") + update(job, status="transcribing", progress=max(job.progress, 72), error="", message="准备音频转写…") bg.add_task(pipeline_transcribe, job_id) return job diff --git a/docs/source-analysis.html b/docs/source-analysis.html index cd876f6..0f13a5f 100644 --- a/docs/source-analysis.html +++ b/docs/source-analysis.html @@ -918,6 +918,18 @@ SubjectAsset {

变更记录

这个记录不是 git log 的替代品。它记录“产品理解发生了什么变化、影响了哪些源码、你以后描述需求时该怎么说”。后续每次改功能都要补一条。

+
+
+

2026-05-14 · 新增底部可伸缩音频条

+ Audio + Timeline +
+
+

问题:音频和文案只在节点或侧栏里展示,审核时缺少“文字和声音时间轴对应”的空间;英文口播和中文翻译也没有上下对齐。

+

改动:新增 web/components/audio-strip.tsx,在主工作台底部吸附显示,可拖拽调整高度、可收起。每个音频段按时间横向排列,上方显示英文,中间显示中文翻译,下方显示对应波形条;右侧显示改写后的 SKG 口播、MiniMax 配音和产品依据。后端新增 GET /jobs/{id}/audio.wav 只读接口,前端用 Web Audio API 解码生成波形峰值。

+

影响:web/app/page.tsxweb/components/audio-strip.tsxweb/lib/api.tsapi/main.pydocs/source-analysis.html

+
+

2026-05-14 · 音频结果改为改前/改后对照展示

@@ -942,6 +954,19 @@ SubjectAsset {

影响:web/app/page.tsxdocs/source-analysis.html。后端轮询本来已经覆盖所有运行中的 job,这轮主要修正前端 UI 工作上下文。

+
+
+

2026-05-14 · 抽帧后台任务不再卡住 API

+ API + 抽帧 +
+
+

问题:点击视频抽帧时,后端 4291 端口能连接但 /health 和后续请求长时间不返回,前端看起来像按钮没有反应。

+

原因:pipeline_downloadpipeline_analyze 声明为 async background task,但内部实际是同步 yt-dlpffmpeg 和 Vision 验收;Starlette 会在事件循环里执行 async background task,导致长抽帧把 API 主循环堵住。

+

改动:下载和抽帧 pipeline 改为普通同步函数,让 FastAPI/Starlette 按线程池后台任务执行;analyze_queue_worker 也改为同步 worker。服务启动恢复时,如果磁盘里有重启前遗留的 downloadingsplittingtranscribing 运行态,会恢复成可重试状态,避免按钮一直 disabled。

+

影响:api/main.pydocs/source-analysis.html。已重启本地 4291 后端并验证 /health 立即返回;遗留的 8b37e65521a6 job 已恢复为 downloaded,可重新点击抽帧。

+
+

2026-05-14 · 生视频接入 SKG 豆包网关

diff --git a/web/app/page.tsx b/web/app/page.tsx index 0caff3f..a4ac1b3 100644 --- a/web/app/page.tsx +++ b/web/app/page.tsx @@ -19,7 +19,7 @@ import { ThemeToggle } from "@/components/theme-toggle" import { AudioStrip } from "@/components/audio-strip" import { addManualFrame, analyzeJob, createJob, getJob, listJobs, uploadJob, deleteJob, deleteFrame, deleteGeneratedImage, - deleteGeneratedVideo, deleteCutout, generateStoryboardVideo, createProductFusionGuide, + deleteGeneratedVideo, deleteCutout, generateStoryboardVideo, createProductFusionGuide, triggerTranscribe, type Job, type ImageRef, type ProductFusionShot, type StoryboardScene, type FrameExtractMode, type FrameExtractQuality, type FrameExtractTarget, } from "@/lib/api" import { TRANSPARENT_HUMAN_NEGATIVE_PROMPT, TRANSPARENT_HUMAN_VIDEO_PROMPT } from "@/lib/workflow-target" @@ -385,6 +385,36 @@ export default function Home() { toast.success(`已复制:${ref.label || (ref.kind === "keyframe" ? "关键帧" : "元素")} · 到分镜头编排工作台粘贴`) }, []) + const handleTranscribeAudio = useCallback(async (jobId?: string, options?: { silent?: boolean }) => { + const targetId = jobId ?? activeJobId + if (!targetId) return + const target = jobs.find((item) => item.id === targetId) + if (!target) return + if (!["frames_extracted", "transcribed", "failed"].includes(target.status)) { + if (!options?.silent) toast.info("先完成抽帧,音频轨会自动开始处理") + return + } + try { + const updated = await triggerTranscribe(targetId) + updateJobInList(updated) + if (!options?.silent) toast.success("音频处理已开始") + } catch (e) { + if (!options?.silent) toast.error("音频处理启动失败:" + (e instanceof Error ? e.message : String(e))) + } + }, [activeJobId, jobs, updateJobInList]) + + const autoAudioStartedRef = useRef>(new Set()) + useEffect(() => { + for (const item of jobs) { + const audioStatus = item.audio_script?.status ?? "idle" + const hasAudioOutput = item.transcript.length > 0 || !!item.audio_script?.rewritten_text + const ready = item.status === "frames_extracted" && !hasAudioOutput && audioStatus !== "rewriting" + if (!ready || autoAudioStartedRef.current.has(item.id)) continue + autoAudioStartedRef.current.add(item.id) + void handleTranscribeAudio(item.id, { silent: true }) + } + }, [jobs, handleTranscribeAudio]) + const handleQuickGenerateVideo = useCallback(async (frameIdx: number, scene: StoryboardScene, model: string) => { if (!job) return const frame = job.frames.find((f) => f.index === frameIdx) @@ -696,9 +726,10 @@ export default function Home() { clipboard, onCopyImage: handleCopyImage, onGenerateProductFusionVideo: handleGenerateProductFusionVideo, + onTranscribeAudio: handleTranscribeAudio, pinnedNodes, onToggleNodePin: handleToggleNodePin, - }), [job, jobs, activeJobId, submitting, analyzing, frameTargets, frameCounts, frameQualities, selectedFrames, expandedFrame, framePanelScale, framePanelPinned, framePanelDock, videoPanelJobId, videoPanelScale, videoPanelDock, handleSubmit, handleUpload, handleAnalyze, handleAnalyzeJob, handleFrameTargetChange, handleFrameCountChange, handleFrameQualityChange, handleToggleFrame, handleOpenFramePanel, handleFramePanelScaleChange, handleCloseExpandedFrame, handleAddManualFrame, handleAddManualFrameForJob, handleOpenVideoPanel, handleVideoPanelScaleChange, handleSwitchJob, updateJobInList, handleDeleteJob, handleDeleteFrame, handleDeleteFrameForJob, handleDeleteGenerated, handleDeleteVideo, handleDeleteCutout, handleOpenStoryboard, handleOpenWorkbench, clipboard, handleCopyImage, handleGenerateProductFusionVideo, pinnedNodes, handleToggleNodePin]) + }), [job, jobs, activeJobId, submitting, analyzing, frameTargets, frameCounts, frameQualities, selectedFrames, expandedFrame, framePanelScale, framePanelPinned, framePanelDock, videoPanelJobId, videoPanelScale, videoPanelDock, handleSubmit, handleUpload, handleAnalyze, handleAnalyzeJob, handleFrameTargetChange, handleFrameCountChange, handleFrameQualityChange, handleToggleFrame, handleOpenFramePanel, handleFramePanelScaleChange, handleCloseExpandedFrame, handleAddManualFrame, handleAddManualFrameForJob, handleOpenVideoPanel, handleVideoPanelScaleChange, handleSwitchJob, updateJobInList, handleDeleteJob, handleDeleteFrame, handleDeleteFrameForJob, handleDeleteGenerated, handleDeleteVideo, handleDeleteCutout, handleOpenStoryboard, handleOpenWorkbench, clipboard, handleCopyImage, handleGenerateProductFusionVideo, handleTranscribeAudio, pinnedNodes, handleToggleNodePin]) // 用 useNodesState 让 ReactFlow 自己管位置(避免轮询时重置 drag) const savedSizes = useMemo(() => loadNodeSizes(), []) diff --git a/web/components/audio-strip.tsx b/web/components/audio-strip.tsx index f0bbd12..0b2d16f 100644 --- a/web/components/audio-strip.tsx +++ b/web/components/audio-strip.tsx @@ -74,7 +74,7 @@ function SegmentCard({ >
- {segment.start.toFixed(1)}s -> {segment.end.toFixed(1)}s + {segment.start.toFixed(1)}s to {segment.end.toFixed(1)}s #{segment.index + 1} diff --git a/web/components/nodes/index.tsx b/web/components/nodes/index.tsx index e5ba888..579075b 100644 --- a/web/components/nodes/index.tsx +++ b/web/components/nodes/index.tsx @@ -11,7 +11,7 @@ import { Link2, Upload, Download, Scissors, Image as ImageIcon, Mic, Languages, FileEdit, Film, FileVideo, Loader2, Plus, X, LayoutGrid, Maximize2, Copy, Trash2, Move, PanelLeft, PanelRight, PanelBottom, ChevronLeft, ChevronRight, SlidersHorizontal, - CheckCircle2, AlertTriangle, Sparkles, Package, + CheckCircle2, AlertTriangle, Sparkles, Package, PlayCircle, RotateCcw, } from "lucide-react" import { toast } from "sonner" import { NodeShell, type NodeStatus, type NodeKind } from "./node-shell" @@ -75,6 +75,7 @@ export interface NodeData { clipboard?: ImageRef | null onCopyImage?: (ref: ImageRef) => void // 复制图片到全局剪贴板(粘贴到分镜头编排插槽) onGenerateProductFusionVideo?: (frameIdx: number, shot: ProductFusionShot) => Promise | void + onTranscribeAudio?: (jobId?: string) => Promise | void pinnedNodes?: Set // 已钉住的节点 id 集合 — 钉住后位置 + 尺寸锁定 onToggleNodePin?: (id: string) => void } @@ -2108,6 +2109,7 @@ export function AudioNode({ data, selected }: any) { const voiceUrl = apiAssetUrl(audioScript?.voice_url) const hasASR = transcript.length > 0 const isRewriting = audioScript?.status === "rewriting" + const canTriggerAudio = !!job && ["frames_extracted", "transcribed", "failed"].includes(job.status) && !isRewriting && job.status !== "transcribing" const originalPreview = transcript .slice(0, 2) .map((s) => (s.zh || s.en).trim()) @@ -2137,6 +2139,19 @@ export function AudioNode({ data, selected }: any) { {audioScript?.rewrite_model || "AUDIO_REWRITE_MODEL"} → {audioScript?.voice_model || "MiniMax T2A"}
+ {canTriggerAudio && ( + + )} {(originalPreview || rewrittenText) && (
{originalPreview && (