auto-save 2026-05-14 10:51 (~7)

This commit is contained in:
2026-05-14 10:53:54 +08:00
parent d0abed6740
commit 8bd52f676a
7 changed files with 104 additions and 28 deletions

View File

@@ -1,26 +1,5 @@
{
"entries": [
{
"files_changed": 1,
"hash": "6128084",
"message": "auto-save 2026-05-13 01:48 (~1)",
"ts": "2026-05-13T01:50:21+08:00",
"type": "commit"
},
{
"files_changed": 1,
"hash": "1646390",
"message": "auto-save 2026-05-13 01:56 (~1)",
"ts": "2026-05-13T01:56:16+08:00",
"type": "commit"
},
{
"files_changed": 1,
"hash": "7fd0057",
"message": "auto-save 2026-05-13 02:01 (~1)",
"ts": "2026-05-13T02:02:13+08:00",
"type": "commit"
},
{
"files_changed": 1,
"hash": "f4a421b",
@@ -3311,6 +3290,25 @@
"message": "auto-save 2026-05-14 10:36 (~5)",
"hash": "1014114",
"files_changed": 5
},
{
"ts": "2026-05-14T10:45:48+08:00",
"type": "commit",
"message": "auto-save 2026-05-14 10:45 (+1, ~5)",
"hash": "d0abed6",
"files_changed": 6
},
{
"ts": "2026-05-14T02:46:10Z",
"type": "session-heartbeat",
"message": "Codex 会话活跃 · 最近命令codex · 3 项未提交变更 · 最近提交auto-save 2026-05-14 10:45 (+1, ~5)",
"files_changed": 3
},
{
"ts": "2026-05-14T02:48:38Z",
"type": "session-heartbeat",
"message": "Codex 会话活跃 · 最近命令codex · 5 项未提交变更 · 最近提交auto-save 2026-05-14 10:45 (+1, ~5)",
"files_changed": 5
}
]
}

View File

@@ -20,6 +20,7 @@ uvicorn main:app --port 4291 --reload
- `GET /jobs/{id}` — 当前状态 + 产物
- `POST /jobs/{id}/transcribe` — 触发 ASR + 翻译 + SKG 文案改写;配置 MiniMax 后生成配音
- `GET /jobs/{id}/video.mp4` — 原视频
- `GET /jobs/{id}/audio.wav` — 拆轨后的原始音频,供前端底部音频条生成波形
- `GET /jobs/{id}/audio-script.mp3` — 改写文案的 MiniMax 配音
- `GET /jobs/{id}/frames/{i}.jpg` — 第 i 张关键帧0-9

View File

@@ -565,7 +565,7 @@ async def lifespan(_: FastAPI):
source_exists = (p / "source.mp4").exists()
if job.status in {"created", "downloading"}:
if source_exists:
update(job, status="downloaded", progress=25, message="服务重启 · 视频已恢复,可重新解析")
update(job, status="downloaded", progress=25, error="", message="服务重启 · 视频已恢复,可重新解析")
else:
update(job, status="failed", message="服务重启 · 下载任务已中断,请重新提交")
elif job.status == "splitting":
@@ -573,6 +573,7 @@ async def lifespan(_: FastAPI):
job,
status="frames_extracted" if job.frames else "downloaded",
progress=70 if job.frames else 25,
error="",
message="服务重启 · 上次抽帧已中断,可重新抽帧",
)
elif job.status == "transcribing":
@@ -587,6 +588,7 @@ async def lifespan(_: FastAPI):
job,
status="frames_extracted",
progress=70,
error="",
audio_script=audio_script,
message="服务重启 · 上次音频处理已中断,可重新处理",
)
@@ -1182,6 +1184,7 @@ def pipeline_download(job_id: str) -> None:
width=int(v_stream["width"]) if v_stream else 0,
height=int(v_stream["height"]) if v_stream else 0,
progress=25,
error="",
message=f"视频就绪 · {duration:.1f}s · 等待解析",
)
except Exception as e:
@@ -1333,6 +1336,7 @@ def pipeline_analyze(
status="frames_extracted",
frames=merged_frames,
progress=70,
error="",
message=final_message,
)
@@ -1928,6 +1932,7 @@ async def trigger_analyze(
job,
status="splitting",
progress=30,
error="",
message="排队等待抽帧" if ANALYZE_WORKER_RUNNING or position > 1 else "准备抽帧…",
)
if not ANALYZE_WORKER_RUNNING:
@@ -1999,8 +2004,9 @@ async def trigger_transcribe(job_id: str, bg: BackgroundTasks) -> Job:
job = JOBS.get(job_id)
if not job:
raise HTTPException(404, "job not found")
if job.status != "frames_extracted":
raise HTTPException(409, f"status must be frames_extracted, got {job.status}")
if job.status not in {"frames_extracted", "transcribed", "failed"}:
raise HTTPException(409, f"status must be frames_extracted/transcribed/failed, got {job.status}")
update(job, status="transcribing", progress=max(job.progress, 72), error="", message="准备音频转写…")
bg.add_task(pipeline_transcribe, job_id)
return job

View File

@@ -918,6 +918,18 @@ SubjectAsset {
<h2>变更记录</h2>
<p>这个记录不是 git log 的替代品。它记录“产品理解发生了什么变化、影响了哪些源码、你以后描述需求时该怎么说”。后续每次改功能都要补一条。</p>
<div class="changelog">
<article class="change">
<header>
<h3>2026-05-14 · 新增底部可伸缩音频条</h3>
<span class="tag gray">Audio</span>
<span class="tag violet">Timeline</span>
</header>
<div class="body">
<p><strong>问题:</strong>音频和文案只在节点或侧栏里展示,审核时缺少“文字和声音时间轴对应”的空间;英文口播和中文翻译也没有上下对齐。</p>
<p><strong>改动:</strong>新增 <code>web/components/audio-strip.tsx</code>,在主工作台底部吸附显示,可拖拽调整高度、可收起。每个音频段按时间横向排列,上方显示英文,中间显示中文翻译,下方显示对应波形条;右侧显示改写后的 SKG 口播、MiniMax 配音和产品依据。后端新增 <code>GET /jobs/{id}/audio.wav</code> 只读接口,前端用 Web Audio API 解码生成波形峰值。</p>
<p><strong>影响:</strong><code>web/app/page.tsx</code><code>web/components/audio-strip.tsx</code><code>web/lib/api.ts</code><code>api/main.py</code><code>docs/source-analysis.html</code></p>
</div>
</article>
<article class="change">
<header>
<h3>2026-05-14 · 音频结果改为改前/改后对照展示</h3>
@@ -942,6 +954,19 @@ SubjectAsset {
<p><strong>影响:</strong><code>web/app/page.tsx</code><code>docs/source-analysis.html</code>。后端轮询本来已经覆盖所有运行中的 job这轮主要修正前端 UI 工作上下文。</p>
</div>
</article>
<article class="change">
<header>
<h3>2026-05-14 · 抽帧后台任务不再卡住 API</h3>
<span class="tag blue">API</span>
<span class="tag orange">抽帧</span>
</header>
<div class="body">
<p><strong>问题:</strong>点击视频抽帧时,后端 4291 端口能连接但 <code>/health</code> 和后续请求长时间不返回,前端看起来像按钮没有反应。</p>
<p><strong>原因:</strong><code>pipeline_download</code><code>pipeline_analyze</code> 声明为 async background task但内部实际是同步 <code>yt-dlp</code><code>ffmpeg</code> 和 Vision 验收Starlette 会在事件循环里执行 async background task导致长抽帧把 API 主循环堵住。</p>
<p><strong>改动:</strong>下载和抽帧 pipeline 改为普通同步函数,让 FastAPI/Starlette 按线程池后台任务执行;<code>analyze_queue_worker</code> 也改为同步 worker。服务启动恢复时如果磁盘里有重启前遗留的 <code>downloading</code><code>splitting</code><code>transcribing</code> 运行态,会恢复成可重试状态,避免按钮一直 disabled。</p>
<p><strong>影响:</strong><code>api/main.py</code><code>docs/source-analysis.html</code>。已重启本地 4291 后端并验证 <code>/health</code> 立即返回;遗留的 <code>8b37e65521a6</code> job 已恢复为 <code>downloaded</code>,可重新点击抽帧。</p>
</div>
</article>
<article class="change">
<header>
<h3>2026-05-14 · 生视频接入 SKG 豆包网关</h3>

View File

@@ -19,7 +19,7 @@ import { ThemeToggle } from "@/components/theme-toggle"
import { AudioStrip } from "@/components/audio-strip"
import {
addManualFrame, analyzeJob, createJob, getJob, listJobs, uploadJob, deleteJob, deleteFrame, deleteGeneratedImage,
deleteGeneratedVideo, deleteCutout, generateStoryboardVideo, createProductFusionGuide,
deleteGeneratedVideo, deleteCutout, generateStoryboardVideo, createProductFusionGuide, triggerTranscribe,
type Job, type ImageRef, type ProductFusionShot, type StoryboardScene, type FrameExtractMode, type FrameExtractQuality, type FrameExtractTarget,
} from "@/lib/api"
import { TRANSPARENT_HUMAN_NEGATIVE_PROMPT, TRANSPARENT_HUMAN_VIDEO_PROMPT } from "@/lib/workflow-target"
@@ -385,6 +385,36 @@ export default function Home() {
toast.success(`已复制:${ref.label || (ref.kind === "keyframe" ? "关键帧" : "元素")} · 到分镜头编排工作台粘贴`)
}, [])
const handleTranscribeAudio = useCallback(async (jobId?: string, options?: { silent?: boolean }) => {
const targetId = jobId ?? activeJobId
if (!targetId) return
const target = jobs.find((item) => item.id === targetId)
if (!target) return
if (!["frames_extracted", "transcribed", "failed"].includes(target.status)) {
if (!options?.silent) toast.info("先完成抽帧,音频轨会自动开始处理")
return
}
try {
const updated = await triggerTranscribe(targetId)
updateJobInList(updated)
if (!options?.silent) toast.success("音频处理已开始")
} catch (e) {
if (!options?.silent) toast.error("音频处理启动失败:" + (e instanceof Error ? e.message : String(e)))
}
}, [activeJobId, jobs, updateJobInList])
const autoAudioStartedRef = useRef<Set<string>>(new Set())
useEffect(() => {
for (const item of jobs) {
const audioStatus = item.audio_script?.status ?? "idle"
const hasAudioOutput = item.transcript.length > 0 || !!item.audio_script?.rewritten_text
const ready = item.status === "frames_extracted" && !hasAudioOutput && audioStatus !== "rewriting"
if (!ready || autoAudioStartedRef.current.has(item.id)) continue
autoAudioStartedRef.current.add(item.id)
void handleTranscribeAudio(item.id, { silent: true })
}
}, [jobs, handleTranscribeAudio])
const handleQuickGenerateVideo = useCallback(async (frameIdx: number, scene: StoryboardScene, model: string) => {
if (!job) return
const frame = job.frames.find((f) => f.index === frameIdx)
@@ -696,9 +726,10 @@ export default function Home() {
clipboard,
onCopyImage: handleCopyImage,
onGenerateProductFusionVideo: handleGenerateProductFusionVideo,
onTranscribeAudio: handleTranscribeAudio,
pinnedNodes,
onToggleNodePin: handleToggleNodePin,
}), [job, jobs, activeJobId, submitting, analyzing, frameTargets, frameCounts, frameQualities, selectedFrames, expandedFrame, framePanelScale, framePanelPinned, framePanelDock, videoPanelJobId, videoPanelScale, videoPanelDock, handleSubmit, handleUpload, handleAnalyze, handleAnalyzeJob, handleFrameTargetChange, handleFrameCountChange, handleFrameQualityChange, handleToggleFrame, handleOpenFramePanel, handleFramePanelScaleChange, handleCloseExpandedFrame, handleAddManualFrame, handleAddManualFrameForJob, handleOpenVideoPanel, handleVideoPanelScaleChange, handleSwitchJob, updateJobInList, handleDeleteJob, handleDeleteFrame, handleDeleteFrameForJob, handleDeleteGenerated, handleDeleteVideo, handleDeleteCutout, handleOpenStoryboard, handleOpenWorkbench, clipboard, handleCopyImage, handleGenerateProductFusionVideo, pinnedNodes, handleToggleNodePin])
}), [job, jobs, activeJobId, submitting, analyzing, frameTargets, frameCounts, frameQualities, selectedFrames, expandedFrame, framePanelScale, framePanelPinned, framePanelDock, videoPanelJobId, videoPanelScale, videoPanelDock, handleSubmit, handleUpload, handleAnalyze, handleAnalyzeJob, handleFrameTargetChange, handleFrameCountChange, handleFrameQualityChange, handleToggleFrame, handleOpenFramePanel, handleFramePanelScaleChange, handleCloseExpandedFrame, handleAddManualFrame, handleAddManualFrameForJob, handleOpenVideoPanel, handleVideoPanelScaleChange, handleSwitchJob, updateJobInList, handleDeleteJob, handleDeleteFrame, handleDeleteFrameForJob, handleDeleteGenerated, handleDeleteVideo, handleDeleteCutout, handleOpenStoryboard, handleOpenWorkbench, clipboard, handleCopyImage, handleGenerateProductFusionVideo, handleTranscribeAudio, pinnedNodes, handleToggleNodePin])
// 用 useNodesState 让 ReactFlow 自己管位置(避免轮询时重置 drag
const savedSizes = useMemo(() => loadNodeSizes(), [])

View File

@@ -74,7 +74,7 @@ function SegmentCard({
>
<div className="mb-2 flex items-center justify-between gap-3">
<span className="font-mono text-[10px] text-[var(--text-faint)]">
{segment.start.toFixed(1)}s -> {segment.end.toFixed(1)}s
{segment.start.toFixed(1)}s to {segment.end.toFixed(1)}s
</span>
<span className="rounded-full border border-white/10 px-2 py-0.5 text-[9.5px] uppercase tracking-widest text-[var(--text-faint)]">
#{segment.index + 1}

View File

@@ -11,7 +11,7 @@ import {
Link2, Upload, Download, Scissors, Image as ImageIcon,
Mic, Languages, FileEdit, Film, FileVideo, Loader2, Plus, X, LayoutGrid, Maximize2,
Copy, Trash2, Move, PanelLeft, PanelRight, PanelBottom, ChevronLeft, ChevronRight, SlidersHorizontal,
CheckCircle2, AlertTriangle, Sparkles, Package,
CheckCircle2, AlertTriangle, Sparkles, Package, PlayCircle, RotateCcw,
} from "lucide-react"
import { toast } from "sonner"
import { NodeShell, type NodeStatus, type NodeKind } from "./node-shell"
@@ -75,6 +75,7 @@ export interface NodeData {
clipboard?: ImageRef | null
onCopyImage?: (ref: ImageRef) => void // 复制图片到全局剪贴板(粘贴到分镜头编排插槽)
onGenerateProductFusionVideo?: (frameIdx: number, shot: ProductFusionShot) => Promise<void> | void
onTranscribeAudio?: (jobId?: string) => Promise<void> | void
pinnedNodes?: Set<string> // 已钉住的节点 id 集合 — 钉住后位置 + 尺寸锁定
onToggleNodePin?: (id: string) => void
}
@@ -2108,6 +2109,7 @@ export function AudioNode({ data, selected }: any) {
const voiceUrl = apiAssetUrl(audioScript?.voice_url)
const hasASR = transcript.length > 0
const isRewriting = audioScript?.status === "rewriting"
const canTriggerAudio = !!job && ["frames_extracted", "transcribed", "failed"].includes(job.status) && !isRewriting && job.status !== "transcribing"
const originalPreview = transcript
.slice(0, 2)
.map((s) => (s.zh || s.en).trim())
@@ -2137,6 +2139,19 @@ export function AudioNode({ data, selected }: any) {
{audioScript?.rewrite_model || "AUDIO_REWRITE_MODEL"} {audioScript?.voice_model || "MiniMax T2A"}
</span>
</div>
{canTriggerAudio && (
<button
type="button"
onClick={(e) => {
e.stopPropagation()
void d.onTranscribeAudio?.(job.id)
}}
className="inline-flex min-h-8 w-full items-center justify-center gap-1.5 rounded-md border border-violet-300/25 bg-violet-400/10 px-2.5 py-1.5 text-[11px] font-medium text-[var(--text-strong)] transition hover:border-violet-200/45 hover:bg-violet-400/18"
>
{hasASR || rewrittenText ? <RotateCcw className="h-3.5 w-3.5" /> : <PlayCircle className="h-3.5 w-3.5" />}
{hasASR || rewrittenText ? "重新处理音频" : "开始音频处理"}
</button>
)}
{(originalPreview || rewrittenText) && (
<div className="grid gap-2">
{originalPreview && (