feat: parallelize ad recreation intake

2026-05-18 10:31:18 +08:00
parent 75c5d113ee
commit 4c8cb066d6
5 changed files with 161 additions and 37 deletions
--- a/RULES.md
+++ b/RULES.md
@@ -11,7 +11,7 @@
 - 详见 `CLAUDE.md` 立项决策段 + `.memory/plan.md` 七步管线拆解
 - 风格：`04-Dark-Gallery-Ambient`（路径：`~/Projects/research/20260305-网页风格库/04-Dark-Gallery-Ambient.md`）
 - 第一冲刺：步骤 1-4（下载 / 拆轨 / 关键帧 / ASR+翻译）
- 当前产品方向（2026-05-18 再确认）：先解决信息流广告快速复刻的第一步，不再沿用“开始后自动抽帧、分镜、元素生成、合成”的默认做法。主界面为“左侧素材输入列 + 右侧音频解析工作表”。用户粘贴 TK 链接或上传视频后点击“开始”，系统自动下载源视频；下载完成后优先提取原音频文案/字幕，并分析讲话人、语速节奏、背景音乐/环境声/音效。分镜工作台按逐句时间轴规划新口播、镜头类型、首帧/尾帧、人物需求和产品出现方式；不是所有分镜都必须是“人物 + 产品”，单条生成会按该行规划决定是否传产品图和相似主体参考图。
+- 当前产品方向（2026-05-18 再确认）：先解决信息流广告快速复刻的第一步，不再沿用“开始后线性完成抽帧、分镜、元素生成、合成”的旧做法。主界面为“左侧素材输入列 + 右侧信息流复刻工作表”。用户粘贴 TK 链接或上传视频后点击“开始分析”，系统自动下载源视频；下载完成后并行启动两条路：音频文案路提取原音频文案/字幕，并分析讲话人、语速节奏、背景音乐/环境声/音效；视频视觉路自动抽取 12 张参考帧，供人工选择可用主体并生成相似主体视图。产品图上传后独立形成产品资产包，自动识别视角/结构/比例并补缺角度。分镜工作台按逐句时间轴规划新口播、镜头类型、首帧/尾帧、人物需求和产品出现方式；单条或“一键提交全部”生成视频时，按该行规划自动调取产品图、人物主体和参考帧。

 ## 部署事实
 - 平台：VPS `76.13.31.179`（Ubuntu 24.04 / Docker Compose / Coolify Traefik）
--- a/api/main.py
+++ b/api/main.py
@@ -1657,8 +1657,11 @@ def pipeline_analyze(
            raise RuntimeError("source.mp4 不存在，先完成下载")

        wav = d / "audio.wav"
+        audio_running = job_id in AUDIO_WORKERS_RUNNING or job.audio_script.status == "rewriting"
        if wav.exists():
            update(job, status="splitting", message="复用音轨 · 准备抽帧…", progress=35, source_audio_url=f"/jobs/{job_id}/audio.wav")
+        elif audio_running:
+            update(job, status="splitting", message="音频路并行处理中 · 准备抽帧…", progress=35)
        else:
            update(job, status="splitting", message="ffmpeg 拆分音轨…", progress=35)
            run([
@@ -1778,7 +1781,7 @@ def pipeline_analyze(
        )
        update(
            job,
-            status="frames_extracted",
+            status="transcribed" if job.transcript else "frames_extracted",
            frames=merged_frames,
            progress=70,
            error="",
@@ -3140,8 +3143,8 @@ async def trigger_analyze(
    job = JOBS.get(job_id)
    if not job:
        raise HTTPException(404, "job not found")
-    if job.status not in {"downloaded", "frames_extracted", "transcribed", "failed"}:
-        raise HTTPException(409, f"status must be downloaded/failed, got {job.status}")
+    if job.status not in {"downloaded", "frames_extracted", "transcribed", "transcribing", "failed"}:
+        raise HTTPException(409, f"status must be downloaded/transcribing/failed, got {job.status}")
    ANALYZE_QUEUE.append((job_id, frames, target, mode, quality))
    position = len(ANALYZE_QUEUE)
    update(
--- a/docs/source-analysis.html
+++ b/docs/source-analysis.html
--- a/web/app/page.tsx
+++ b/web/app/page.tsx
@@ -58,6 +58,7 @@ const DEFAULT_PRODUCT_LIBRARY_IDS = [
  "desktop-skg-product-angle-03",
  "desktop-skg-product-angle-04",
 ]
+const VIDEO_READY_STATUSES: Job["status"][] = ["downloaded", "frames_extracted", "transcribed", "failed"]

 const PRODUCT_FUSION_WEARING_PROMPT = [
  "Product placement must be physically correct:",
@@ -229,7 +230,7 @@ export default function Home() {
      const created = await uploadJob(file)
      addJob(created)
      setProductionJobIds((prev) => new Set(prev).add(created.id))
-      toast.success(`已上传 ${created.id.slice(0, 8)}，下载完成后自动解析音频`)
+      toast.success(`已上传 ${created.id.slice(0, 8)}，视频就绪后自动跑音频和抽帧`)
    } catch (e) {
      toast.error("上传失败：" + (e instanceof Error ? e.message : String(e)))
    } finally {
@@ -461,6 +462,44 @@ export default function Home() {
    }
  }, [activeJobId, jobs, updateJobInList])

+  const startProductionLanesForJob = useCallback(async (target: Job) => {
+    const videoReady = !!target.video_url && VIDEO_READY_STATUSES.includes(target.status)
+    if (!videoReady) return
+
+    const audioKey = `${target.id}:audio`
+    const hasAudioResult = !!target.audio_script?.source_text || target.transcript.length > 0
+    const audioRunning = target.status === "transcribing" || target.audio_script?.status === "rewriting"
+    if (!hasAudioResult && !audioRunning && !autoTriggeredRef.current.has(audioKey)) {
+      autoTriggeredRef.current.add(audioKey)
+      try {
+        const updated = await triggerTranscribe(target.id)
+        updateJobInList(updated)
+        toast.info("音频路已启动：字幕、讲话人、节奏和背景音同步解析")
+      } catch (e) {
+        autoTriggeredRef.current.delete(audioKey)
+        toast.error("音频解析启动失败：" + (e instanceof Error ? e.message : String(e)))
+      }
+    }
+
+    const visualKey = `${target.id}:visual`
+    const hasVisualResult = target.frames.length > 0
+    const visualRunning = target.status === "splitting"
+    if (!hasVisualResult && !visualRunning && !autoTriggeredRef.current.has(visualKey)) {
+      autoTriggeredRef.current.add(visualKey)
+      const frameTarget = frameTargets[target.id] ?? "motion"
+      const frameCount = frameCounts[target.id] ?? 12
+      const frameQuality = frameQualities[target.id] ?? "accurate"
+      try {
+        const updated = await analyzeJob(target.id, frameCount, frameTarget, "replace", frameQuality)
+        updateJobInList(updated)
+        toast.info(`视觉路已启动：${FRAME_QUALITY_LABELS[frameQuality]} · ${FRAME_TARGET_LABELS[frameTarget]} · ${frameCount} 张参考帧`)
+      } catch (e) {
+        autoTriggeredRef.current.delete(visualKey)
+        toast.error("视觉抽帧启动失败：" + (e instanceof Error ? e.message : String(e)))
+      }
+    }
+  }, [frameCounts, frameQualities, frameTargets, updateJobInList])
+
  const ensureDefaultProductRefs = useCallback(async (jobId: string) => {
    const cached = defaultProductRefsByJob[jobId]
    if (cached?.length >= 4) return cached.slice(0, 4)
@@ -538,26 +577,19 @@ export default function Home() {
      return
    }
    setProductionJobIds((prev) => new Set(prev).add(target.id))
-    toast.success("已进入第一步：下载完成后自动解析音频文案、讲话人和背景音")
-    if (target.video_url && ["downloaded", "frames_extracted", "transcribed", "failed"].includes(target.status)) {
-      void handleTranscribeAudio(target.id, { silent: true })
-    }
-  }, [handleSubmit, handleTranscribeAudio, job])
+    toast.success("已进入并行素材分析：下载完成后自动跑音频文案路和视觉抽帧路")
+    void startProductionLanesForJob(target)
+  }, [handleSubmit, job, startProductionLanesForJob])

  useEffect(() => {
    if (productionJobIds.size === 0) return
    for (const item of jobs) {
      if (!productionJobIds.has(item.id)) continue
-      const videoReady = !!item.video_url && ["downloaded", "frames_extracted", "transcribed", "failed"].includes(item.status)
+      const videoReady = !!item.video_url && VIDEO_READY_STATUSES.includes(item.status)
      if (!videoReady) continue
-      const audioKey = `${item.id}:audio`
-      const hasAudioResult = !!item.audio_script?.source_text || item.transcript.length > 0
-      if (!autoTriggeredRef.current.has(audioKey) && item.audio_script?.status !== "rewriting" && !hasAudioResult) {
-        autoTriggeredRef.current.add(audioKey)
-        void handleTranscribeAudio(item.id, { silent: true })
-      }
+      void startProductionLanesForJob(item)
    }
-  }, [handleTranscribeAudio, jobs, productionJobIds])
+  }, [jobs, productionJobIds, startProductionLanesForJob])

  const handleQuickGenerateVideo = useCallback(async (frameIdx: number, scene: StoryboardScene, model: string) => {
    if (!job) return
--- a/web/components/ad-recreation-board.tsx
+++ b/web/components/ad-recreation-board.tsx
@@ -285,6 +285,13 @@ function countReadySegments(job: Job | null, drafts: DraftSegment[]) {
  return frameStoryboards + draftCount
 }

+function countSubjectAssetViews(job: Job | null) {
+  if (!job) return 0
+  return job.frames.reduce((sum, frame) =>
+    sum + (frame.elements ?? []).reduce((inner, element) => inner + (element.subject_assets?.length ?? 0), 0),
+  0)
+}
+
 function guessSubjectKind(name: string): SubjectKind {
  return /人|人物|模特|骨架|身体|脸|手|person|people|human|body|face|hand|character/i.test(name)
    ? "living"
@@ -976,6 +983,11 @@ export function AdRecreationBoard({
  const readySegments = countReadySegments(job, draftSegments)
  const transcriptCount = job?.transcript.length ?? 0
  const backgroundReady = !!job?.audio_script?.background_audio_profile?.trim()
+  const audioRunning = job?.status === "transcribing" || job?.audio_script?.status === "rewriting"
+  const visualRunning = job?.status === "splitting"
+  const visualReady = (job?.frames.length ?? 0) > 0
+  const subjectAssetCount = countSubjectAssetViews(job)
+  const productAssetCount = job?.product_refs?.length ?? 0

  useEffect(() => {
    setDraftSegments([])
@@ -1197,7 +1209,7 @@ export function AdRecreationBoard({
                </div>
                <div className="flex shrink-0 items-center gap-2">
                  <ModelTrace trace={audioModelTrace(runtimeModels)} compact />
-                  <ActionButton disabled={!job?.video_url || job.status === "transcribing"} onClick={() => data.onTranscribeAudio?.(job?.id)}>
+                  <ActionButton disabled={!job?.video_url || audioRunning} onClick={() => data.onTranscribeAudio?.(job?.id)}>
                    <Mic className="h-3.5 w-3.5" />
                    解析音频
                  </ActionButton>
@@ -1208,8 +1220,9 @@ export function AdRecreationBoard({
                <div className="flex min-w-0 flex-wrap items-center gap-1.5 text-[11px] text-white/46">
                  <Requirement label="素材" ready={!!job} detail={job ? shortId(job.id) : "待输入"} />
                  <Requirement label="视频" ready={!!job?.video_url} detail={job?.status === "downloading" ? "下载中" : job?.video_url ? "已就绪" : "待下载"} />
-                  <Requirement label="音频" ready={!!job?.source_audio_url} detail={job?.status === "transcribing" ? "解析中" : job?.source_audio_url ? "已提取" : "待提取"} />
+                  <Requirement label="音频" ready={!!job?.source_audio_url} detail={audioRunning ? "解析中" : job?.source_audio_url ? "已提取" : "待提取"} />
                  <Requirement label="文案" ready={audioReady} detail={audioReady ? `${transcriptCount} 段` : "待解析"} />
+                  <Requirement label="参考帧" ready={visualReady} detail={visualRunning ? "抽帧中" : visualReady ? `${job?.frames.length ?? 0} 张` : "待抽帧"} />
                </div>

                <details className="group rounded-md border border-white/10 bg-black/28 p-2">
@@ -1225,6 +1238,12 @@ export function AdRecreationBoard({
                  </div>
                </details>
              </div>
+              <div className="mt-2 grid grid-cols-1 gap-1.5 xl:grid-cols-4">
+                <PipelineLane label="音频文案路" detail={audioReady ? `${transcriptCount} 段字幕可规划` : "字幕 / 讲话人 / 节奏"} ready={audioReady} running={audioRunning} />
+                <PipelineLane label="视频视觉路" detail={visualReady ? `${job?.frames.length ?? 0} 张参考帧` : "关键帧 / 主体 / 场景"} ready={visualReady} running={visualRunning} />
+                <PipelineLane label="主体资产" detail={subjectAssetCount ? `${subjectAssetCount} 张主体视图` : "人工选帧后生成"} ready={subjectAssetCount > 0} />
+                <PipelineLane label="产品资产" detail={productAssetCount ? `${productAssetCount} 张产品图` : "上传后自动识别"} ready={productAssetCount > 0} />
+              </div>
            </header>

            <div className="min-h-0 flex-1 overflow-y-auto p-4">
@@ -1298,7 +1317,7 @@ function MaterialColumn({
          disabled={data.submitting || (!url.trim() && !job)}
          className="inline-flex h-10 items-center justify-center rounded-md bg-rose-600 px-3 text-[13px] font-semibold text-white transition hover:bg-rose-500 disabled:cursor-not-allowed disabled:opacity-45"
        >
-          开始
+          开始分析
        </button>
        <button
          type="button"
@@ -2148,6 +2167,7 @@ function AudioStoryboardPlanPanel({
  runtimeModels?: RuntimeModels
 }) {
  const [videoBusyRow, setVideoBusyRow] = useState<number | null>(null)
+  const [batchVideoBusy, setBatchVideoBusy] = useState(false)
  const [productItems, setProductItems] = useState<ProductRefItem[]>([])
  const [productUploading, setProductUploading] = useState(false)
  const [productAnalyzing, setProductAnalyzing] = useState(false)
@@ -2443,16 +2463,21 @@ function AudioStoryboardPlanPanel({
    }
  }

-  const generateRowVideo = async (row: AudioStoryboardRow, frame: KeyFrame | null) => {
-    if (!job || !frame || !onGenerateVideo) return
+  const submitRowVideo = async (row: AudioStoryboardRow, frame: KeyFrame) => {
+    if (!job || !onGenerateVideo) return
    const nextFrame = orderedFrames.find((item) => item.timestamp > frame.timestamp) ?? null
    const plannedRow = { ...planForRow(row, frame), skgCopy: copyForRow(row) }
    const scene = buildStoryboardSceneFromAudioRow(plannedRow, frame, nextFrame, productItems)
+    const updated = await updateStoryboard(job.id, frame.index, scene)
+    onJobUpdate?.(updated)
+    await onGenerateVideo(frame.index, scene, "seedance")
+  }
+
+  const generateRowVideo = async (row: AudioStoryboardRow, frame: KeyFrame | null) => {
+    if (!job || !frame || !onGenerateVideo) return
    setVideoBusyRow(row.index)
    try {
-      const updated = await updateStoryboard(job.id, frame.index, scene)
-      onJobUpdate?.(updated)
-      await onGenerateVideo(frame.index, scene, "seedance")
+      await submitRowVideo(row, frame)
    } catch (e) {
      toast.error("生成本条视频失败：" + (e instanceof Error ? e.message : String(e)))
    } finally {
@@ -2460,6 +2485,37 @@ function AudioStoryboardPlanPanel({
    }
  }

+  const generateAllRowVideos = async () => {
+    if (!job || !onGenerateVideo || !rows.length) return
+    const jobsToSubmit = rows
+      .map((row) => ({ row: planForRow(row, referenceFrameForRow(row)), frame: referenceFrameForRow(row) }))
+      .filter((item): item is { row: AudioStoryboardRow; frame: KeyFrame } => !!item.frame)
+    if (!jobsToSubmit.length) {
+      toast.warning("先完成自动抽帧，或在原版视频上手动补参考帧")
+      return
+    }
+    setBatchVideoBusy(true)
+    let ok = 0
+    let failed = 0
+    try {
+      for (const item of jobsToSubmit) {
+        setVideoBusyRow(item.row.index)
+        try {
+          await submitRowVideo(item.row, item.frame)
+          ok += 1
+        } catch (e) {
+          failed += 1
+          console.warn("批量提交分镜失败", item.row.index, e)
+        }
+      }
+      if (failed) toast.warning(`已提交 ${ok} 条，${failed} 条失败`)
+      else toast.success(`已提交全部 ${ok} 条分镜视频`)
+    } finally {
+      setVideoBusyRow(null)
+      setBatchVideoBusy(false)
+    }
+  }
+
  if (!job) return null

  return (
@@ -2578,6 +2634,15 @@ function AudioStoryboardPlanPanel({
            >
              还原初稿
            </button>
+            <button
+              type="button"
+              onClick={() => void generateAllRowVideos()}
+              disabled={batchVideoBusy || !onGenerateVideo || !rows.length || !orderedFrames.length}
+              className="inline-flex h-9 items-center justify-center gap-1 rounded-md bg-rose-600 px-2.5 text-[11px] font-semibold text-white transition hover:bg-rose-500 disabled:cursor-not-allowed disabled:opacity-40"
+            >
+              {batchVideoBusy ? <Loader2 className="h-3.5 w-3.5 animate-spin" /> : <Play className="h-3.5 w-3.5" />}
+              一键提交全部
+            </button>
          </div>
        </div>
        <div className="max-h-[560px] space-y-2 overflow-y-auto pr-1">
@@ -3695,6 +3760,18 @@ function Requirement({ label, ready, detail }: { label: string; ready: boolean;
  )
 }

+function PipelineLane({ label, detail, ready, running }: { label: string; detail: string; ready: boolean; running?: boolean }) {
+  return (
+    <div className="flex min-h-9 items-center justify-between gap-2 rounded-md border border-white/10 bg-black/24 px-2.5 py-1.5">
+      <div className="min-w-0">
+        <div className="truncate text-[11px] font-semibold text-white/64">{label}</div>
+        <div className="mt-0.5 truncate text-[10px] text-white/34" title={detail}>{detail}</div>
+      </div>
+      <StatusPill ready={ready} running={running} />
+    </div>
+  )
+}
+
 function VideoCandidate({
  job,
  video,