fix: isolate storyboard videos by row

2026-05-19 15:24:30 +08:00
parent 64a9673fa1
commit e03c5db3fd
4 changed files with 100 additions and 34 deletions
--- a/api/main.py
+++ b/api/main.py
@@ -347,6 +347,7 @@ class GeneratedVideo(BaseModel):
    id: str
    provider_id: str = ""
    frame_idx: int
+    storyboard_row_idx: int | None = None
    prompt: str
    model: str = ""
    status: Literal["queued", "in_progress", "completed", "failed"] = "queued"
@@ -376,6 +377,7 @@ class StoryboardScene(BaseModel):
    visual_mode: Literal["person_only", "person_product", "product_only", "environment"] = "person_product"
    needs_product: bool = True
    needs_subject: bool = True
+    storyboard_row_idx: int | None = None
    subject_brief: str = ""
    skg_copy_en: str = ""
    skg_copy_zh: str = ""
@@ -5609,6 +5611,7 @@ class UpdateStoryboardReq(BaseModel):
    visual_mode: Literal["person_only", "person_product", "product_only", "environment"] = "person_product"
    needs_product: bool = True
    needs_subject: bool = True
+    storyboard_row_idx: int | None = None
    subject_brief: str = ""
    skg_copy_en: str = ""
    skg_copy_zh: str = ""
@@ -5637,6 +5640,7 @@ class GenerateStoryboardVideoReq(BaseModel):
    duration: float = 4
    count: int = 1
    seed: int | None = None
+    storyboard_row_idx: int | None = None
    first_image: dict | None = None
    last_image: dict | None = None
    product_images: list[dict] = Field(default_factory=list)
@@ -6203,6 +6207,7 @@ def _enqueue_storyboard_videos(job: Job, frame: KeyFrame, req: GenerateStoryboar
            id=local_id,
            provider_id="",
            frame_idx=frame.index,
+            storyboard_row_idx=req.storyboard_row_idx,
            prompt=variant_prompt,
            model=model,
            status="queued",
@@ -7275,6 +7280,7 @@ def update_storyboard(job_id: str, idx: int, req: UpdateStoryboardReq) -> Job:
                visual_mode=req.visual_mode,
                needs_product=bool(req.needs_product),
                needs_subject=bool(req.needs_subject),
+                storyboard_row_idx=req.storyboard_row_idx,
                subject_brief=req.subject_brief.strip(),
                skg_copy_en=req.skg_copy_en.strip(),
                skg_copy_zh=req.skg_copy_zh.strip(),
--- a/docs/source-analysis.html
+++ b/docs/source-analysis.html
@@ -997,11 +997,11 @@ ProductRefStateItem {
            <tr><td>角色图入库到 job</td><td><code>POST /jobs/{id}/assets/character-library</code></td><td><code>copyCharacterLibraryAssets</code></td><td>把所选角色的 7 张参考图复制为当前 job asset，返回 <code>subject_images</code>，产品融合生成视频时作为人物身份参考图提交。</td></tr>
            <tr><td>产品融合引导图</td><td><code>POST /jobs/{id}/product-fusion/guide</code></td><td><code>createProductFusionGuide</code></td><td>旧流程兼容接口：读取产品图和白底人物图，按 <code>product_region</code> 合成位置引导图。当前内置角色 + 产品 + 描述流程不再主动调用它。</td></tr>
            <tr><td>产品融合描述词</td><td><code>POST /jobs/{id}/product-fusion/descriptions</code></td><td><code>generateProductFusionDescriptions</code></td><td>兼容接口：可生成产品融合动作描述库。当前前端默认直接用本地 36 条镜头语言模板预填 6 行镜头，并通过“换一组”按钮按 6 条一组轮换。</td></tr>
-            <tr><td>分镜保存</td><td><code>PUT /frames/{idx}/storyboard</code></td><td><code>updateStoryboard</code></td><td>保存三字段中英镜像、选中视频 ID、4 图槽、时长、改造说明，以及高级抽屉里的镜头类型、人物描述、人物/产品开关、首帧规划、尾帧规划和产品出现方式。</td></tr>
+            <tr><td>分镜保存</td><td><code>PUT /frames/{idx}/storyboard</code></td><td><code>updateStoryboard</code></td><td>保存三字段中英镜像、选中视频 ID、4 图槽、时长、改造说明，以及高级抽屉里的镜头类型、人物描述、人物/产品开关、首帧规划、尾帧规划和产品出现方式。当前音频分镜行会额外写 <code>storyboard_row_idx</code>，避免多条分镜共用同一参考帧时互相覆盖。</td></tr>
            <tr><td>三字段自动展开</td><td><code>POST /jobs/{job_id}/frames/{idx}/storyboard/quick-plan</code></td><td><code>quickPlanStoryboard</code></td><td>输入 <code>skg_copy_*</code>、<code>scene_one_line_*</code>、<code>action_one_line_*</code> 和 <code>subject_brief</code>，用 <code>REWRITE_MODEL</code> 展开为完整 <code>StoryboardScene</code>，只作为视频 prompt 来源，不直接持久化。</td></tr>
            <tr><td>AI 改文案</td><td><code>POST /jobs/{job_id}/frames/{idx}/storyboard/refine</code></td><td><code>refineStoryboard</code></td><td>输入当前三字段和中文反馈，返回新的三字段中英镜像。前端必须先弹改前/改后预览，用户点应用后才写入行状态。</td></tr>
-            <tr><td>单条视频候选生成</td><td><code>POST /jobs/{job_id}/frames/{idx}/storyboard/video</code></td><td><code>generateStoryboardVideo</code></td><td>新增 <code>count</code> 和 <code>seed</code>，默认一次创建 4 个 <code>GeneratedVideo</code> 任务并立即返回 job；每个候选独立排队、生成、失败或成功。前端提交 prompt 前用 quick-plan 展开，高级首尾帧存在时继续带上，不存在时后端用参考帧/主体图/产品图透明兜底。</td></tr>
-            <tr><td>整片一键生成候选</td><td><code>POST /jobs/{job_id}/storyboard/batch-generate-all</code></td><td><code>batchGenerateAll</code></td><td>输入 <code>count_per_row=4</code>、<code>concurrency=4</code>，后台遍历分镜并为每行提交 4 个视频候选；job message 用轮询展示进度。单行失败只写 job error，不阻断其他行。</td></tr>
+            <tr><td>单条视频候选生成</td><td><code>POST /jobs/{job_id}/frames/{idx}/storyboard/video</code></td><td><code>generateStoryboardVideo</code></td><td>新增 <code>count</code>、<code>seed</code> 和 <code>storyboard_row_idx</code>，默认一次创建 4 个 <code>GeneratedVideo</code> 任务并立即返回 job；每个候选独立排队、生成、失败或成功。前端提交 prompt 前用 quick-plan 展开，高级首尾帧存在时继续带上，不存在时后端用参考帧/主体图/产品图透明兜底。视频候选显示必须优先按 <code>storyboard_row_idx</code> 归属到音频分镜行，而不是只按 <code>frame_idx</code>。</td></tr>
+            <tr><td>整片一键生成候选</td><td><code>POST /jobs/{job_id}/storyboard/batch-generate-all</code></td><td>当前主路径改为逐行调用 <code>generateStoryboardVideo</code></td><td>用户选择“每行 N 条”后，前端按音频分镜逐行提交，确保每个候选都带 <code>storyboard_row_idx</code>。后端批量接口保留为兼容能力，默认 <code>concurrency=1</code>，但当前 UI 不再用它做主路径。</td></tr>
            <tr><td>生图</td><td><code>POST /frames/{idx}/generate</code></td><td><code>generateImage</code></td><td>基于关键帧或已选生成图做 image-to-image，目前可用。</td></tr>
          </tbody>
        </table>
@@ -1108,6 +1108,19 @@ ProductRefStateItem {
        <h2>变更记录</h2>
        <p>这个记录不是 git log 的替代品。它记录“产品理解发生了什么变化、影响了哪些源码、你以后描述需求时该怎么说”。后续每次改功能都要补一条。</p>
        <div class="changelog">
+          <article class="change">
+            <header>
+              <h3>2026-05-19 · 视频候选按音频分镜行隔离</h3>
+              <span class="tag blue">API</span>
+              <span class="tag green">Video</span>
+              <span class="tag cyan">Storyboard</span>
+            </header>
+            <div class="body">
+              <p><strong>问题：</strong>多条音频分镜会映射到同一张参考帧，旧视频候选只按 <code>frame_idx</code> 过滤，导致第一行生成的视频也出现在后面共用参考帧的分镜行里。</p>
+              <p><strong>改动：</strong><code>StoryboardScene</code>、<code>GeneratedVideo</code> 和 <code>/storyboard/video</code> 请求增加 <code>storyboard_row_idx</code>；前端显示候选和读取已保存分镜时优先按该行号隔离，旧无行号候选只归到同参考帧的第一条兼容行。</p>
+              <p><strong>影响：</strong>点击某一行生成视频后，只会在该音频分镜行右侧出现候选；整片生成也改为逐行提交，避免候选跨行串位。</p>
+            </div>
+          </article>
          <article class="change">
            <header>
              <h3>2026-05-19 · 分镜行改成左文字右视频轨</h3>
--- a/web/components/ad-recreation-board.tsx
+++ b/web/components/ad-recreation-board.tsx
@@ -34,7 +34,6 @@ import {
  analyzeJob,
  analyzeProductViews,
  apiAssetUrl,
-  batchGenerateAll,
  characterLibraryImageUrl,
  createAssetLibraryItem,
  createPromptLibraryItem,
@@ -1422,6 +1421,12 @@ function savedScenePatch(scene?: StoryboardScene | null): RowPlanPatch {
  }
 }

+function storyboardSceneBelongsToRow(scene: StoryboardScene | null | undefined, rowIndex: number, legacyRowIndex?: number | null) {
+  if (!scene) return false
+  if (typeof scene.storyboard_row_idx === "number") return scene.storyboard_row_idx === rowIndex
+  return legacyRowIndex != null && legacyRowIndex === rowIndex
+}
+
 function applyPlanPatch(row: AudioStoryboardRow, patch?: RowPlanPatch): AudioStoryboardRow {
  if (!patch) return row
  return {
@@ -1726,6 +1731,7 @@ function buildStoryboardSceneFromAudioRow(
    visual_mode: row.visualMode,
    needs_product: row.needsProduct,
    needs_subject: row.needsSubject,
+    storyboard_row_idx: row.index,
    subject_brief: row.needsSubject ? subjectBrief : "",
    skg_copy_en: row.skgCopy,
    skg_copy_zh: row.skgCopyZh,
@@ -1733,7 +1739,7 @@ function buildStoryboardSceneFromAudioRow(
    scene_one_line_zh: row.sceneOneLineZh,
    action_one_line_en: row.actionOneLine,
    action_one_line_zh: row.actionOneLineZh,
-    selected_video_id: frame.storyboard?.selected_video_id ?? "",
+    selected_video_id: frame.storyboard?.storyboard_row_idx === row.index ? frame.storyboard?.selected_video_id ?? "" : "",
    first_frame_plan: row.firstFramePlan,
    last_frame_plan: row.lastFramePlan,
    product_placement: row.productPlacement,
@@ -3544,8 +3550,23 @@ function AudioStoryboardPlanPanel({
    })
  }

-  const planForRow = (row: AudioStoryboardRow, frame: KeyFrame | null) =>
-    applyPlanPatch(applyPlanPatch(row, savedScenePatch(frame?.storyboard)), planOverrides[row.index])
+  const referenceFrameForRow = (row: AudioStoryboardRow) =>
+    closestFrameForTime(rowReferencePool, clampNumber((row.start + row.end) / 2, 0, Math.max(job?.duration || row.end, row.end)))
+
+  const legacyRowIndexForFrame = (frameIndex: number) => {
+    for (const item of rows) {
+      if (referenceFrameForRow(item)?.index === frameIndex) return item.index
+    }
+    return null
+  }
+
+  const planForRow = (row: AudioStoryboardRow, frame: KeyFrame | null) => {
+    const legacyRowIndex = frame ? legacyRowIndexForFrame(frame.index) : null
+    const savedPatch = storyboardSceneBelongsToRow(frame?.storyboard, row.index, legacyRowIndex)
+      ? savedScenePatch(frame?.storyboard)
+      : {}
+    return applyPlanPatch(applyPlanPatch(row, savedPatch), planOverrides[row.index])
+  }

  const rewriteSegmentForRow = (row: AudioStoryboardRow): StoryboardScriptRewriteSegment => ({
    index: row.index,
@@ -3556,12 +3577,22 @@ function AudioStoryboardPlanPanel({
    current_text: copyForRow(row),
  })

-  const referenceFrameForRow = (row: AudioStoryboardRow) =>
-    closestFrameForTime(rowReferencePool, clampNumber((row.start + row.end) / 2, 0, Math.max(job?.duration || row.end, row.end)))
-
-  const videosForFrame = (frame: KeyFrame | null) => {
+  const videosForRow = (row: AudioStoryboardRow, frame: KeyFrame | null) => {
    if (!frame) return []
-    return (job?.generated_videos ?? []).filter((video) => video.frame_idx === frame.index)
+    const legacyRowIndex = legacyRowIndexForFrame(frame.index)
+    return (job?.generated_videos ?? []).filter((video) => {
+      if (video.frame_idx !== frame.index) return false
+      if (typeof video.storyboard_row_idx === "number") return video.storyboard_row_idx === row.index
+      return legacyRowIndex === row.index
+    })
+  }
+
+  const selectedVideoIdForRow = (row: AudioStoryboardRow, frame: KeyFrame | null) => {
+    if (!frame?.storyboard) return ""
+    const legacyRowIndex = legacyRowIndexForFrame(frame.index)
+    return storyboardSceneBelongsToRow(frame.storyboard, row.index, legacyRowIndex)
+      ? frame.storyboard.selected_video_id ?? ""
+      : ""
  }

  const quickInputForRow = (row: AudioStoryboardRow, frame: KeyFrame | null): QuickStoryboardPlanInput => ({
@@ -3585,12 +3616,17 @@ function AudioStoryboardPlanPanel({
    selectedVideoId?: string,
  ): StoryboardScene => {
    const selectedSubjectRefs = row.needsSubject ? selectSubjectRefsForRow(row, subjectRefs) : []
+    const legacyRowIndex = legacyRowIndexForFrame(frame.index)
+    const savedSceneForRow = storyboardSceneBelongsToRow(frame.storyboard, row.index, legacyRowIndex)
+      ? frame.storyboard
+      : null
    const base = buildStoryboardSceneFromAudioRow(row, frame, productItems, selectedSubjectRefs, {
-      firstImage: frame.storyboard?.first_image ?? endpointAssetRef(frame, "first_frame"),
-      lastImage: frame.storyboard?.last_image ?? endpointAssetRef(frame, "last_frame"),
+      firstImage: savedSceneForRow?.first_image ?? endpointAssetRef(frame, "first_frame"),
+      lastImage: savedSceneForRow?.last_image ?? endpointAssetRef(frame, "last_frame"),
    })
+    const savedSelectedVideoId = selectedVideoIdForRow(row, frame)
    if (!quickPlan) {
-      return { ...base, selected_video_id: selectedVideoId ?? frame.storyboard?.selected_video_id ?? base.selected_video_id ?? "" }
+      return { ...base, selected_video_id: selectedVideoId ?? savedSelectedVideoId ?? base.selected_video_id ?? "" }
    }
    return {
      ...base,
@@ -3612,7 +3648,7 @@ function AudioStoryboardPlanPanel({
      scene: quickPlan.scene || base.scene,
      product: quickPlan.product || base.product,
      action: quickPlan.action || base.action,
-      selected_video_id: selectedVideoId ?? frame.storyboard?.selected_video_id ?? base.selected_video_id ?? "",
+      selected_video_id: selectedVideoId ?? savedSelectedVideoId ?? base.selected_video_id ?? "",
    }
  }

@@ -3628,10 +3664,10 @@ function AudioStoryboardPlanPanel({
    "Keep motion natural, creator-ad style, premium wellness lighting, no subtitles, no platform UI, no watermark, no medical treatment claims.",
  ].filter((line) => line.trim()).join("\n")

-  const drawVideosForRow = async (row: AudioStoryboardRow, frame: KeyFrame | null, count = 4) => {
+  const drawVideosForRow = async (row: AudioStoryboardRow, frame: KeyFrame | null, count = 4, quiet = false) => {
    if (!job || !frame) {
-      toast.warning("这条分镜还没有参考帧，先完成抽帧。")
-      return
+      if (!quiet) toast.warning("这条分镜还没有参考帧，先完成抽帧。")
+      return false
    }
    const plannedRow = { ...planForRow(row, frame), skgCopy: copyForRow(row), skgCopyZh: copyZhForRow(row) }
    setQuickVideoBusyRow(row.index)
@@ -3644,6 +3680,7 @@ function AudioStoryboardPlanPanel({
        prompt: promptForStoryboardScene(scene),
        duration: scene.duration || 4,
        count,
+        storyboard_row_idx: row.index,
        first_image: scene.first_image ?? null,
        last_image: scene.last_image ?? null,
        product_images: scene.product_images ?? [],
@@ -3656,9 +3693,11 @@ function AudioStoryboardPlanPanel({
        size: "720x1280",
      })
      onJobUpdate?.(updated)
-      toast.success(`分镜 ${row.index + 1} 已提交 ${count} 条视频候选`)
+      if (!quiet) toast.success(`分镜 ${row.index + 1} 已提交 ${count} 条视频候选`)
+      return true
    } catch (e) {
-      toast.error("视频候选生成失败：" + (e instanceof Error ? e.message : String(e)))
+      if (!quiet) toast.error("视频候选生成失败：" + (e instanceof Error ? e.message : String(e)))
+      return false
    } finally {
      setQuickVideoBusyRow(null)
    }
@@ -3668,7 +3707,11 @@ function AudioStoryboardPlanPanel({
    if (!job || !frame) return
    const plannedRow = { ...planForRow(row, frame), skgCopy: copyForRow(row), skgCopyZh: copyZhForRow(row) }
    try {
-      const scene = buildSceneForPlannedRow(plannedRow, frame, frame.storyboard, videoId)
+      const legacyRowIndex = legacyRowIndexForFrame(frame.index)
+      const savedSceneForRow = storyboardSceneBelongsToRow(frame.storyboard, row.index, legacyRowIndex)
+        ? frame.storyboard
+        : null
+      const scene = buildSceneForPlannedRow(plannedRow, frame, savedSceneForRow, videoId)
      const updated = await updateStoryboard(job.id, frame.index, scene)
      onJobUpdate?.(updated)
      toast.success(`分镜 ${row.index + 1} 已选用该视频`)
@@ -4026,16 +4069,17 @@ function AudioStoryboardPlanPanel({
    if (!job || !rows.length) return
    const count = clampVideoCount(batchVideoCount)
    setBatchCardBusy(true)
+    let submitted = 0
+    let failed = 0
    try {
-      await saveAllStoryboardDrafts(true)
-      const updated = await batchGenerateAll(job.id, {
-        count_per_row: count,
-        concurrency: 1,
-        model: "seedance",
-        size: "720x1280",
-      })
-      onJobUpdate?.(updated)
-      toast.success(`整片视频候选生成已启动：${rows.length} 条分镜 × 每条 ${count} 个候选`)
+      for (const row of rows) {
+        const frame = referenceFrameForRow(row)
+        const ok = await drawVideosForRow(row, frame, count, true)
+        if (ok) submitted += 1
+        else failed += 1
+      }
+      if (failed) toast.warning(`整片已排队 ${submitted} 条分镜，${failed} 条失败或缺少参考帧`)
+      else toast.success(`整片视频候选已按行排队：${submitted} 条分镜 × 每条 ${count} 个候选`)
    } catch (e) {
      toast.error("整片视频候选生成失败：" + (e instanceof Error ? e.message : String(e)))
    } finally {
@@ -4395,7 +4439,7 @@ function AudioStoryboardPlanPanel({
          {rows.map((row) => {
            const referenceFrame = referenceFrameForRow(row)
            const plannedRow = planForRow(row, referenceFrame)
-            const rowVideos = videosForFrame(referenceFrame)
+            const rowVideos = videosForRow(row, referenceFrame)
            const savingStoryboard = storyboardSaveBusyRow === row.index
            const copyText = copyForRow(row)
            const copyZhText = copyZhForRow(row)
@@ -4543,7 +4587,7 @@ function AudioStoryboardPlanPanel({
                      job={job}
                      videos={rowVideos}
                      enabled={!!referenceFrame}
-                      selectedVideoId={referenceFrame?.storyboard?.selected_video_id ?? ""}
+                      selectedVideoId={selectedVideoIdForRow(row, referenceFrame)}
                      busy={quickVideoBusyRow === row.index}
                      count={rowVideoCount}
                      onCountChange={(count) => patchRowVideoCount(row.index, count)}
@@ -4772,7 +4816,7 @@ function AudioStoryboardPlanPanel({
                    videos={rowVideos}
                    enabled={!!referenceFrame}
                    expanded={videosOpen}
-                    selectedVideoId={referenceFrame?.storyboard?.selected_video_id ?? ""}
+                    selectedVideoId={selectedVideoIdForRow(row, referenceFrame)}
                    busy={quickVideoBusyRow === row.index}
                    count={rowVideoCount}
                    onCountChange={(count) => patchRowVideoCount(row.index, count)}
--- a/web/lib/api.ts
+++ b/web/lib/api.ts
@@ -187,6 +187,7 @@ export interface StoryboardScene {
  visual_mode?: "person_only" | "person_product" | "product_only" | "environment"
  needs_product?: boolean
  needs_subject?: boolean
+  storyboard_row_idx?: number | null
  subject_brief?: string
  skg_copy_en?: string
  skg_copy_zh?: string
@@ -241,6 +242,7 @@ export interface GeneratedVideo {
  id: string
  provider_id?: string
  frame_idx: number
+  storyboard_row_idx?: number | null
  prompt: string
  model: string
  status: "queued" | "in_progress" | "completed" | "failed"
@@ -1278,6 +1280,7 @@ export async function generateStoryboardVideo(
    duration?: number
    count?: number
    seed?: number | null
+    storyboard_row_idx?: number | null
    first_image?: ImageRef | null
    last_image?: ImageRef | null
    product_images?: ImageRef[]