feat: plan storyboard frame endpoints

2026-05-18 09:47:13 +08:00
parent cf648eaac2
commit 75c5d113ee
6 changed files with 316 additions and 39 deletions
--- a/RULES.md
+++ b/RULES.md
@@ -11,7 +11,7 @@
 - 详见 `CLAUDE.md` 立项决策段 + `.memory/plan.md` 七步管线拆解
 - 风格：`04-Dark-Gallery-Ambient`（路径：`~/Projects/research/20260305-网页风格库/04-Dark-Gallery-Ambient.md`）
 - 第一冲刺：步骤 1-4（下载 / 拆轨 / 关键帧 / ASR+翻译）
- 当前产品方向（2026-05-17 再确认）：先解决信息流广告快速复刻的第一步，不再沿用“开始后自动抽帧、分镜、元素生成、合成”的默认做法。主界面为“左侧素材输入列 + 右侧音频解析工作表”。用户粘贴 TK 链接或上传视频后点击“开始”，系统自动下载源视频；下载完成后优先提取原音频文案/字幕，并分析讲话人、语速节奏、背景音乐/环境声/音效。抽帧、分镜规划、产品融入、相似主体高清视图包（最多 10 张，含肩颈/后背特写）和视频合成暂作为后续能力保留，不在当前第一步自动触发。
+- 当前产品方向（2026-05-18 再确认）：先解决信息流广告快速复刻的第一步，不再沿用“开始后自动抽帧、分镜、元素生成、合成”的默认做法。主界面为“左侧素材输入列 + 右侧音频解析工作表”。用户粘贴 TK 链接或上传视频后点击“开始”，系统自动下载源视频；下载完成后优先提取原音频文案/字幕，并分析讲话人、语速节奏、背景音乐/环境声/音效。分镜工作台按逐句时间轴规划新口播、镜头类型、首帧/尾帧、人物需求和产品出现方式；不是所有分镜都必须是“人物 + 产品”，单条生成会按该行规划决定是否传产品图和相似主体参考图。

 ## 部署事实
 - 平台：VPS `76.13.31.179`（Ubuntu 24.04 / Docker Compose / Coolify Traefik）
--- a/api/main.py
+++ b/api/main.py
@@ -331,6 +331,12 @@ class StoryboardScene(BaseModel):
    last_image: dict | None = None
    product_images: list[dict] = Field(default_factory=list)
    product_fusion_shots: list[dict] = Field(default_factory=list)
+    visual_mode: Literal["person_only", "person_product", "product_only", "environment"] = "person_product"
+    needs_product: bool = True
+    needs_subject: bool = True
+    first_frame_plan: str = ""
+    last_frame_plan: str = ""
+    product_placement: str = ""
    # 4 图槽：dict 含 {kind, frame_idx, element_id?, cutout_id?, label}
    subject_image: dict | None = None
    scene_image: dict | None = None
@@ -4379,6 +4385,12 @@ class UpdateStoryboardReq(BaseModel):
    last_image: dict | None = None
    product_images: list[dict] = Field(default_factory=list)
    product_fusion_shots: list[dict] = Field(default_factory=list)
+    visual_mode: Literal["person_only", "person_product", "product_only", "environment"] = "person_product"
+    needs_product: bool = True
+    needs_subject: bool = True
+    first_frame_plan: str = ""
+    last_frame_plan: str = ""
+    product_placement: str = ""
    subject_image: dict | None = None
    scene_image: dict | None = None
    product_image: dict | None = None
@@ -5548,6 +5560,12 @@ def update_storyboard(job_id: str, idx: int, req: UpdateStoryboardReq) -> Job:
                last_image=req.last_image,
                product_images=list(req.product_images),
                product_fusion_shots=list(req.product_fusion_shots),
+                visual_mode=req.visual_mode,
+                needs_product=bool(req.needs_product),
+                needs_subject=bool(req.needs_subject),
+                first_frame_plan=req.first_frame_plan.strip(),
+                last_frame_plan=req.last_frame_plan.strip(),
+                product_placement=req.product_placement.strip(),
                subject_image=req.subject_image,
                scene_image=req.scene_image,
                product_image=req.product_image,
--- a/docs/source-analysis.html
+++ b/docs/source-analysis.html
--- a/web/app/page.tsx
+++ b/web/app/page.tsx
@@ -84,6 +84,20 @@ const PRODUCT_FUSION_NEGATIVE_PROMPT = [
  "no product passing through the neck, no product inside the transparent body, no x-ray blending, no transparent product, no product becoming bones or skin, no product fused with spine/ribs/throat, no clipping through shoulders, no floating device, no melted device, no deformed U-shape, no wrong body part, no necklace/scarf/headphones/brace, no random replacement product.",
 ].join("\n")

+function storyboardNeedsProduct(scene: StoryboardScene) {
+  if (scene.needs_product === false) return false
+  if (scene.needs_product === true) return true
+  const text = `${scene.visual_mode ?? ""} ${scene.product ?? ""} ${scene.product_placement ?? ""}`.toLowerCase()
+  return !/(不出现产品|不露产品|无需产品|不需要产品|无产品|no product|environment|person_only)/.test(text)
+}
+
+function storyboardNeedsSubject(scene: StoryboardScene) {
+  if (scene.needs_subject === false) return false
+  if (scene.needs_subject === true) return true
+  const text = `${scene.visual_mode ?? ""} ${scene.subject ?? ""}`.toLowerCase()
+  return !/(不需要人物|无人物|不出现人物|no person|product_only|environment)/.test(text)
+}
+
 // 合并 input + download + split 为一个节点
 // 分叉：上路 input → visual lab ↘
 //       下路 input → audio ──────────────────────────→ compose
@@ -565,8 +579,10 @@ export default function Home() {
      : null
    const firstRef = scene.first_image ?? keyframeRef
    const lastRef = scene.last_image ?? defaultLastRef
-    let productRefs = (scene.product_images?.length ? scene.product_images : scene.product_image ? [scene.product_image] : [])
-    if (productRefs.length === 0) {
+    const needsProduct = storyboardNeedsProduct(scene)
+    const needsSubject = storyboardNeedsSubject(scene)
+    let productRefs = needsProduct ? (scene.product_images?.length ? scene.product_images : scene.product_image ? [scene.product_image] : []) : []
+    if (needsProduct && productRefs.length === 0) {
      try {
        productRefs = await ensureDefaultProductRefs(job.id)
      } catch (e) {
@@ -574,7 +590,7 @@ export default function Home() {
        return
      }
    }
-    const subjectRefs: ImageRef[] = (frame.elements ?? [])
+    const subjectRefs: ImageRef[] = needsSubject ? (frame.elements ?? [])
      .flatMap((element) => element.subject_assets ?? [])
      .slice(0, 6)
      .map((asset) => ({
@@ -583,8 +599,8 @@ export default function Home() {
        element_id: asset.id,
        cutout_id: asset.id,
        label: asset.label,
-      }))
-    const primarySubjectRef = subjectRefs[0] ?? firstRef
+      })) : []
+    const primarySubjectRef = needsSubject ? (subjectRefs[0] ?? firstRef) : null
    const duration = scene.duration && scene.duration > 0 ? scene.duration : 5
    const sourceScene = frame.description?.scene ? `参考画面识别：${frame.description.scene}` : ""
    const sourceStyle = frame.description?.style ? `参考风格：${frame.description.style}` : ""
@@ -607,31 +623,47 @@ export default function Home() {
    ].join("\n")
    const prompt = [
      `竖屏 9:16，${duration.toFixed(1)} 秒，SKG 产品短视频广告。`,
-      productNature,
-      productRefs.length
+      needsProduct
+        ? productNature
+        : "本条分镜规划为非产品主镜头：可以只拍人物状态、场景过渡、情绪停点或节奏承接。不要硬插 SKG 产品、白底产品图、包装或任何随机商品。",
+      needsProduct && productRefs.length
        ? `已上传 ${productRefs.length} 张 SKG 真实产品参考图。产品参考图是唯一产品真源：视频中出现的产品必须严格匹配这些图的外观、颜色、材质、结构比例和关键细节。`
-        : "未上传产品图时，仍需生成一个干净高级的 SKG 产品广告画面，但不得保留原视频里的竞品包装或平台元素。",
-      "首帧和尾帧只用于控制画面起止、构图、场景和动作方向；如果首尾帧里有竞品、文字包装或非 SKG 产品，必须替换为上传的 SKG 产品参考。",
+        : needsProduct
+          ? "未上传产品图时，仍需生成一个干净高级的 SKG 产品广告画面，但不得保留原视频里的竞品包装或平台元素。"
+          : "本条不传产品参考图；如首尾帧里出现竞品、包装或非 SKG 商品，应弱化、移除或作为模糊背景，不要替换成 SKG 产品。",
+      needsProduct
+        ? "首帧和尾帧只用于控制画面起止、构图、场景和动作方向；如果首尾帧里有竞品、文字包装或非 SKG 产品，必须替换为上传的 SKG 产品参考。"
+        : "首帧和尾帧用于控制画面起止、构图、场景和动作方向；本条没有产品任务，不要因为广告语而自动添加产品。",
      "使用首帧和尾帧生成连续过渡视频：首帧必须严格作为视频开始画面，尾帧必须作为视频结束目标画面，中间只做自然运动补间。",
      "生成一段单镜头连续视频，一镜到底，从首帧平滑过渡到尾帧；不要跳切，不要突然换场景，不要突然换主体，不要蒙太奇，不要多镜头拼接。",
      "如果提供了原视频链接，把它只作为节奏、镜头运动、动作顺序和画面调度参考；不要照搬原视频里的品牌、文字、水印、竞品产品或具体人物。",
      "时间线：0%-15% 锁住首帧构图并轻微启动；15%-85% 做平滑连续运动；85%-100% 缓慢贴近尾帧并稳定收住。",
-      TRANSPARENT_HUMAN_VIDEO_PROMPT,
+      `镜头类型：${scene.visual_mode ?? "未标注"}；需要人物=${needsSubject ? "是" : "否"}；需要产品=${needsProduct ? "是" : "否"}。`,
+      scene.first_frame_plan ? `首帧规划：${scene.first_frame_plan}` : "",
+      scene.last_frame_plan ? `尾帧规划：${scene.last_frame_plan}` : "",
+      scene.product_placement ? `产品出现方式：${scene.product_placement}` : "",
+      needsSubject
+        ? TRANSPARENT_HUMAN_VIDEO_PROMPT
+        : "本条不传人物主体参考图；如果画面需要人物，只能作为背景、手部局部或模糊生活方式元素，不要生成主角式透明骨架人。",
      `主体改造：${subjectDirection}`,
-      `产品替换：${productDirection} 产品必须作为颈部/肩颈按摩仪被正确佩戴或展示，不要放在脸上、手臂上、桌面当摆件，也不要变成瓶子、面霜、医疗设备或食品。`,
+      needsProduct
+        ? `产品替换：${productDirection} 产品必须作为颈部/肩颈按摩仪被正确佩戴或展示，不要放在脸上、手臂上、桌面当摆件，也不要变成瓶子、面霜、医疗设备或食品。`
+        : `产品处理：${productDirection} 本条不需要露出 SKG 产品，不要硬插产品、包装、瓶罐、医疗器械或随机商品。`,
      `场景改造：${sceneDirection}`,
      `连续动作和镜头：${actionDirection}`,
      `首帧：${labelOf(firstRef, "当前分镜关键帧")}`,
      `尾帧：${labelOf(lastRef, "未指定，按首帧小幅自然运动收尾")}`,
-      `SKG 产品参考：${productRefs.length ? productRefs.map((ref, i) => `${i + 1}. ${labelOf(ref, "SKG 产品角度")}`).join("；") : "SKG 产品视觉主角"}`,
-      subjectRefs.length ? `关键元素 6 视图参考：${subjectRefs.map((ref, i) => `${i + 1}. ${labelOf(ref, "元素视图")}`).join("；")}` : "如果该分镜还没有关键元素 6 视图，优先使用首帧主体关系生成。",
+      needsProduct ? `SKG 产品参考：${productRefs.length ? productRefs.map((ref, i) => `${i + 1}. ${labelOf(ref, "SKG 产品角度")}`).join("；") : "SKG 产品视觉主角"}` : "SKG 产品参考：本条不使用产品参考图。",
+      needsSubject
+        ? (subjectRefs.length ? `关键元素 6 视图参考：${subjectRefs.map((ref, i) => `${i + 1}. ${labelOf(ref, "元素视图")}`).join("；")}` : "如果该分镜还没有关键元素 6 视图，优先使用首帧主体关系生成。")
+        : "关键元素 6 视图参考：本条不使用人物主体参考图。",
      sourceScene,
      sourceStyle,
      sourceObjects,
-      "产品一致性要求：整个视频只能出现同一个白色 U 形 SKG 颈部按摩仪或同一套包装；不要生成第二种产品，不要改变 U 形机身、金属按摩触点、侧边按键、白色材质和整体比例，不要凭空增加屏幕、线缆、文字标签或说明书。",
-      "产品呈现要求：至少一次让产品在脖子/肩颈位置清晰占据视觉中心，边缘清楚、材质真实、比例可信；手部接触产品时不要遮挡关键外观，产品不能融化、扭曲、穿帮或漂移。",
-      "状态改善要求：画面应形成明确的使用前后感受变化：使用前可以是低头久坐、揉脖子、肩颈疲惫或紧绷；使用后变为肩颈放松、抬头、动作舒展、精神更好。人形骷髅也可以表现为从僵硬难受变轻松放松。表达舒缓和放松，不要承诺治疗。",
-      "运动要求：动作幅度小而连续，速度均匀，手部和产品位置前后一致，产品外形不变形，人物表情和姿态不漂移，背景只允许轻微景深和光影变化。",
+      needsProduct ? "产品一致性要求：整个视频只能出现同一个白色 U 形 SKG 颈部按摩仪或同一套包装；不要生成第二种产品，不要改变 U 形机身、金属按摩触点、侧边按键、白色材质和整体比例，不要凭空增加屏幕、线缆、文字标签或说明书。" : "",
+      needsProduct ? "产品呈现要求：至少一次让产品在脖子/肩颈位置清晰占据视觉中心，边缘清楚、材质真实、比例可信；手部接触产品时不要遮挡关键外观，产品不能融化、扭曲、穿帮或漂移。" : "",
+      needsSubject || needsProduct ? "状态改善要求：画面应形成明确的使用前后感受变化：使用前可以是低头久坐、揉脖子、肩颈疲惫或紧绷；使用后变为肩颈放松、抬头、动作舒展、精神更好。人形骷髅也可以表现为从僵硬难受变轻松放松。表达舒缓和放松，不要承诺治疗。" : "节奏要求：作为过渡镜头时只负责情绪、空间和节奏承接，不承诺疗效，不强行展示使用动作。",
+      needsProduct ? "运动要求：动作幅度小而连续，速度均匀，手部和产品位置前后一致，产品外形不变形，人物表情和姿态不漂移，背景只允许轻微景深和光影变化。" : "运动要求：动作幅度小而连续，速度均匀，构图从首帧自然过渡到尾帧，不突然添加人物或产品。",
      "商业质感：真实拍摄感，干净高级，柔和稳定打光，产品边缘清晰，材质真实，画面无抖动、无拉伸、无闪烁。",
      "禁止：字幕、文字、平台 UI、TikTok 水印、logo 水印、免责声明、竞品包装、随机新物体、非 SKG 产品、医学骨架、夸张病症画面、恐怖元素、画面撕裂、人物或产品突然变形。",
      TRANSPARENT_HUMAN_NEGATIVE_PROMPT,
@@ -649,7 +681,7 @@ export default function Home() {
        subject_image: primarySubjectRef,
        subject_images: subjectRefs,
        scene_image: null,
-        product_image: productRefs[0] ?? null,
+        product_image: needsProduct ? (productRefs[0] ?? null) : null,
        action_image: null,
        source_ref: sourceUrl ? { kind: "source_video", url: sourceUrl } : null,
        model,
--- a/web/components/ad-recreation-board.tsx
+++ b/web/components/ad-recreation-board.tsx
@@ -91,15 +91,30 @@ type AudioStoryboardRow = {
  end: number
  source: string
  role: string
+  visualMode: StoryboardVisualMode
+  needsProduct: boolean
+  needsSubject: boolean
  skgCopy: string
  visualPlan: string
+  firstFramePlan: string
+  lastFramePlan: string
  referencePlan: string
  keyElements: string
  productIntegration: string
+  productPlacement: string
 }

 type ProductRefItem = ProductRefStateItem
 type SubjectStyleMode = "transparent_human" | "source_actor"
+type StoryboardVisualMode = NonNullable<StoryboardScene["visual_mode"]>
+type RowPlanPatch = Partial<Pick<AudioStoryboardRow, "visualMode" | "needsProduct" | "needsSubject" | "visualPlan" | "firstFramePlan" | "lastFramePlan" | "productIntegration" | "productPlacement">>
+
+const VISUAL_MODE_OPTIONS: Array<{ value: StoryboardVisualMode; label: string; description: string }> = [
+  { value: "person_only", label: "人物/情绪", description: "只拍人物、状态、痛点或口播，不强制露产品。" },
+  { value: "person_product", label: "人物+产品", description: "人物佩戴、拿起、调整或使用 SKG 产品。" },
+  { value: "product_only", label: "产品特写", description: "只拍产品、包装、功能细节或 hero packshot。" },
+  { value: "environment", label: "场景过渡", description: "只做空间、生活方式、转场或情绪氛围。" },
+]

 const SUBJECT_ASSET_VIEWS = [
  { value: "front", label: "正面" },
@@ -526,22 +541,84 @@ function buildVisualPlan(role: string) {
  return "保持原片同类构图和运镜，把画面内容替换成 SKG 肩颈放松场景。"
 }

+function visualModeDefaults(mode: StoryboardVisualMode) {
+  if (mode === "person_only") {
+    return {
+      needsProduct: false,
+      needsSubject: true,
+      productPlacement: "本条不出现产品，只用人物状态、痛点或口播承接节奏；不要硬插 SKG 产品。",
+    }
+  }
+  if (mode === "product_only") {
+    return {
+      needsProduct: true,
+      needsSubject: false,
+      productPlacement: "只展示 SKG 肩颈按摩仪本体、佩戴角度或功能细节；不要强行加入人物。",
+    }
+  }
+  if (mode === "environment") {
+    return {
+      needsProduct: false,
+      needsSubject: false,
+      productPlacement: "本条作为场景/情绪/节奏过渡，不出现产品和人物主体；只保留空间、光线和运动节奏。",
+    }
+  }
+  return {
+    needsProduct: true,
+    needsSubject: true,
+    productPlacement: "SKG 肩颈按摩仪作为外置佩戴产品出现，围绕拿起、佩戴、调整、按键或放松状态展开。",
+  }
+}
+
+function visualModeForRole(role: string): StoryboardVisualMode {
+  if (role === "开场钩子" || role === "痛点推进") return "person_only"
+  if (role === "转化收口") return "product_only"
+  if (role === "节奏承接") return "environment"
+  return "person_product"
+}
+
+function buildFirstFramePlan(role: string) {
+  if (role === "开场钩子") return "人物近景看向镜头或低头办公，手轻扶后颈，画面先不露产品。"
+  if (role === "痛点推进") return "保留原片人物动作节奏，肩颈紧绷、低头、揉脖子或久坐状态明确。"
+  if (role === "利益证明") return "人物拿起或准备佩戴 SKG 肩颈按摩仪，产品位置清晰但动作刚开始。"
+  if (role === "方案过渡") return "人物从痛点状态切到拿起产品/靠近肩颈，准备进入使用动作。"
+  if (role === "转化收口") return "产品干净特写或佩戴完成后的稳定画面，留出转化收口的视觉焦点。"
+  return "按原视频当前句的构图启动，先承接节奏，不强行改变镜头主体。"
+}
+
+function buildLastFramePlan(role: string) {
+  if (role === "开场钩子") return "人物抬头或表情更集中，给下一镜产品或方案进入留出空间。"
+  if (role === "痛点推进") return "紧绷状态被放大到一个明确停点，准备切入产品解决方案。"
+  if (role === "利益证明") return "产品已正确佩戴在后颈/肩颈位置，人物放松，产品比例稳定。"
+  if (role === "方案过渡") return "产品贴合肩颈，手部调整完成，画面自然进入功能细节或放松状态。"
+  if (role === "转化收口") return "产品或佩戴状态稳定收住，画面干净，适合后续接购买/行动号召。"
+  return "动作小幅推进并稳定停住，保留与下一句衔接的方向感。"
+}
+
 function buildAudioStoryboardRows(job: Job | null): AudioStoryboardRow[] {
  if (!job?.transcript.length) return []
  return job.transcript.map((segment, index) => {
    const source = segment.zh?.trim() || segment.en?.trim() || "原音频文案待补充"
    const role = classifyAudioRole(`${segment.en} ${segment.zh}`, index, job.transcript.length)
+    const visualMode = visualModeForRole(role)
+    const defaults = visualModeDefaults(visualMode)
    return {
      index: segment.index,
      start: segment.start,
      end: segment.end,
      source,
      role,
+      visualMode,
+      needsProduct: defaults.needsProduct,
+      needsSubject: defaults.needsSubject,
      skgCopy: buildSkgCopy(role, index),
      visualPlan: buildVisualPlan(role),
+      firstFramePlan: buildFirstFramePlan(role),
+      lastFramePlan: buildLastFramePlan(role),
      referencePlan: `从原视频 ${segment.start.toFixed(1)}-${segment.end.toFixed(1)}s 定向抽 1-2 张参考帧。`,
      keyElements: role === "利益证明" ? "佩戴动作、产品位置、手部按键、放松表情" : "口播构图、人物动作、表情节奏、场景光线",
      productIntegration: "把原片产品/道具语境替换为 SKG 白色 U 形颈部按摩仪，产品必须外置佩戴在肩颈位置。",
+      productPlacement: defaults.productPlacement,
    }
  })
 }
@@ -742,6 +819,35 @@ function productReferenceNotes(items: ProductRefItem[]) {
    .join("；")
 }

+function savedScenePatch(scene?: StoryboardScene | null): RowPlanPatch {
+  if (!scene) return {}
+  return {
+    visualMode: scene.visual_mode,
+    needsProduct: scene.needs_product,
+    needsSubject: scene.needs_subject,
+    visualPlan: scene.scene?.split("\n").find((line) => line.trim() && !line.startsWith("镜头类型") && !line.startsWith("首帧规划") && !line.startsWith("尾帧规划") && !line.startsWith("原音频依据"))?.trim(),
+    firstFramePlan: scene.first_frame_plan,
+    lastFramePlan: scene.last_frame_plan,
+    productIntegration: scene.product?.split("\n").find((line) => line.trim() && !line.startsWith("产品需求") && !line.startsWith("产品出现方式") && !line.startsWith("产品素材池") && !line.startsWith("未上传产品图") && !line.startsWith("本条规划"))?.trim(),
+    productPlacement: scene.product_placement,
+  }
+}
+
+function applyPlanPatch(row: AudioStoryboardRow, patch?: RowPlanPatch): AudioStoryboardRow {
+  if (!patch) return row
+  return {
+    ...row,
+    visualMode: patch.visualMode ?? row.visualMode,
+    needsProduct: patch.needsProduct ?? row.needsProduct,
+    needsSubject: patch.needsSubject ?? row.needsSubject,
+    visualPlan: patch.visualPlan ?? row.visualPlan,
+    firstFramePlan: patch.firstFramePlan ?? row.firstFramePlan,
+    lastFramePlan: patch.lastFramePlan ?? row.lastFramePlan,
+    productIntegration: patch.productIntegration ?? row.productIntegration,
+    productPlacement: patch.productPlacement ?? row.productPlacement,
+  }
+}
+
 function productPriorityForRow(row: AudioStoryboardRow) {
  const viewPriorityByRole: Record<string, string[]> = {
    "开场钩子": ["front", "left_45", "right_45", "side_thickness"],
@@ -817,22 +923,30 @@ function selectProductItemsForRow(row: AudioStoryboardRow, items: ProductRefItem
 }

 function buildStoryboardSceneFromAudioRow(row: AudioStoryboardRow, frame: KeyFrame, nextFrame?: KeyFrame | null, productItems: ProductRefItem[] = []): StoryboardScene {
-  const selectedProductItems = selectProductItemsForRow(row, productItems)
+  const selectedProductItems = row.needsProduct ? selectProductItemsForRow(row, productItems) : []
  const productRefs = selectedProductItems.map((item) => item.ref)
  const notes = productReferenceNotes(selectedProductItems)
-  const productGuidance = productItems.length
+  const productGuidance = !row.needsProduct
+    ? "本条规划为不露出产品或不把产品作为画面主体；视频生成时不要硬插 SKG 产品、包装、白底图或错误商品。"
+    : productItems.length
    ? `产品素材池共有 ${productItems.length} 张，本条只选用 ${selectedProductItems.length} 张最相关参考图，不要把未选素材混入本条画面。产品硬定义：这是套在脖子上的 U 形肩颈按摩仪，不是耳机、头戴设备或护颈枕。坐标系硬规则：左/右按佩戴者身体左右，不能按图片左右；上=靠近下巴/脸/颈部上沿，下=靠近锁骨/肩部下沿；内侧=贴颈皮肤/按摩触点，外侧=外壳/按键/Logo。所选图片只作为产品结构、角度、比例和细节参考，不要照搬参考图的白底/黑底/棚拍背景。视角标注：${notes}。保留左右非对称细节，不要把两边做成镜像对称；肩颈产品大小必须贴近真实佩戴比例，不能缩成耳机，也不能放大成护颈枕。`
    : "未上传产品图时使用默认 SKG 产品图；生成前建议先建立同一产品素材池，锁定左右差异、厚度和佩戴比例。"
  return {
    duration: Number(Math.max(3.2, Math.min(6.5, row.end - row.start || 4.5)).toFixed(1)),
    first_image: { kind: "keyframe", frame_idx: frame.index, label: `分镜 ${row.index + 1} 参考帧` },
    last_image: nextFrame ? { kind: "keyframe", frame_idx: nextFrame.index, label: `分镜 ${row.index + 1} 尾帧` } : null,
+    visual_mode: row.visualMode,
+    needs_product: row.needsProduct,
+    needs_subject: row.needsSubject,
+    first_frame_plan: row.firstFramePlan,
+    last_frame_plan: row.lastFramePlan,
+    product_placement: row.productPlacement,
    product_images: productRefs,
    product_image: productRefs[0] ?? null,
-    subject: row.keyElements,
-    scene: `${row.visualPlan}\n原音频依据：${row.source}`,
-    product: `${row.productIntegration}\n${productGuidance}`,
-    action: row.skgCopy,
+    subject: row.needsSubject ? row.keyElements : "本条不需要人物主体或相似主体参考；如画面里出现人物，只作为背景或局部，不作为主角。",
+    scene: `镜头类型：${VISUAL_MODE_OPTIONS.find((item) => item.value === row.visualMode)?.label ?? row.visualMode}\n${row.visualPlan}\n首帧规划：${row.firstFramePlan}\n尾帧规划：${row.lastFramePlan}\n原音频依据：${row.source}`,
+    product: `产品需求：${row.needsProduct ? "需要产品参考" : "本条不需要产品"}\n产品出现方式：${row.productPlacement}\n${row.needsProduct ? row.productIntegration : "本条以情绪、人物状态、空间或节奏过渡为主，不露出产品。"}\n${productGuidance}`,
+    action: `${row.skgCopy}\n连续动作：从首帧规划自然过渡到尾帧规划，镜头类型和产品/人物需求不能中途改变。`,
    reference_ids: [],
  }
 }
@@ -2039,6 +2153,7 @@ function AudioStoryboardPlanPanel({
  const [productAnalyzing, setProductAnalyzing] = useState(false)
  const [productAngleBusy, setProductAngleBusy] = useState<string | null>(null)
  const [copyOverrides, setCopyOverrides] = useState<Record<number, string>>({})
+  const [planOverrides, setPlanOverrides] = useState<Record<number, RowPlanPatch>>({})
  const [authorIntent, setAuthorIntent] = useState("")
  const [scriptRewriteBusy, setScriptRewriteBusy] = useState<"all" | number | null>(null)
  const productFileRef = useRef<HTMLInputElement | null>(null)
@@ -2054,6 +2169,7 @@ function AudioStoryboardPlanPanel({
  useEffect(() => {
    setProductItems((job?.product_refs ?? []).map(normalizeStoredProductItem))
    setCopyOverrides({})
+    setPlanOverrides({})
    setAuthorIntent("")
    setScriptRewriteBusy(null)
  }, [job?.id])
@@ -2080,6 +2196,23 @@ function AudioStoryboardPlanPanel({
    setCopyOverrides((prev) => ({ ...prev, [rowIndex]: value }))
  }

+  const patchRowPlan = (rowIndex: number, patch: RowPlanPatch) => {
+    setPlanOverrides((prev) => ({ ...prev, [rowIndex]: { ...(prev[rowIndex] ?? {}), ...patch } }))
+  }
+
+  const applyVisualMode = (rowIndex: number, mode: StoryboardVisualMode) => {
+    const defaults = visualModeDefaults(mode)
+    patchRowPlan(rowIndex, {
+      visualMode: mode,
+      needsProduct: defaults.needsProduct,
+      needsSubject: defaults.needsSubject,
+      productPlacement: defaults.productPlacement,
+    })
+  }
+
+  const planForRow = (row: AudioStoryboardRow, frame: KeyFrame | null) =>
+    applyPlanPatch(applyPlanPatch(row, savedScenePatch(frame?.storyboard)), planOverrides[row.index])
+
  const rewriteSegmentForRow = (row: AudioStoryboardRow): StoryboardScriptRewriteSegment => ({
    index: row.index,
    start: row.start,
@@ -2313,7 +2446,8 @@ function AudioStoryboardPlanPanel({
  const generateRowVideo = async (row: AudioStoryboardRow, frame: KeyFrame | null) => {
    if (!job || !frame || !onGenerateVideo) return
    const nextFrame = orderedFrames.find((item) => item.timestamp > frame.timestamp) ?? null
-    const scene = buildStoryboardSceneFromAudioRow({ ...row, skgCopy: copyForRow(row) }, frame, nextFrame, productItems)
+    const plannedRow = { ...planForRow(row, frame), skgCopy: copyForRow(row) }
+    const scene = buildStoryboardSceneFromAudioRow(plannedRow, frame, nextFrame, productItems)
    setVideoBusyRow(row.index)
    try {
      const updated = await updateStoryboard(job.id, frame.index, scene)
@@ -2449,9 +2583,11 @@ function AudioStoryboardPlanPanel({
        <div className="max-h-[560px] space-y-2 overflow-y-auto pr-1">
          {rows.map((row) => {
            const referenceFrame = referenceFrameForRow(row)
+            const plannedRow = planForRow(row, referenceFrame)
            const rowVideos = videosForFrame(referenceFrame)
            const generating = videoBusyRow === row.index
            const copyText = copyForRow(row)
+            const selectedProductCount = plannedRow.needsProduct ? selectProductItemsForRow(plannedRow, productItems).length : 0
            return (
              <article
                key={row.index}
@@ -2486,11 +2622,74 @@ function AudioStoryboardPlanPanel({
                </StoryboardPlanCell>

                <StoryboardPlanCell label="画面规划 / 产品融入">
-                  <p className="line-clamp-2" title={row.visualPlan}>{row.visualPlan}</p>
-                  <p className="mt-1 line-clamp-3 text-white/45" title={row.productIntegration}>
-                    <Package className="mr-1 inline h-3 w-3 text-rose-200/75" />
-                    {row.productIntegration}
-                  </p>
+                  <div className="grid gap-1.5">
+                    <div className="grid grid-cols-[minmax(0,1fr)_auto_auto] items-center gap-1.5">
+                      <select
+                        value={plannedRow.visualMode}
+                        onChange={(event) => applyVisualMode(row.index, event.target.value as StoryboardVisualMode)}
+                        className="h-7 min-w-0 rounded border border-white/10 bg-black/45 px-1.5 text-[10.5px] text-white/76 outline-none focus:border-cyan-300/50"
+                        title={VISUAL_MODE_OPTIONS.find((item) => item.value === plannedRow.visualMode)?.description}
+                      >
+                        {VISUAL_MODE_OPTIONS.map((option) => (
+                          <option key={option.value} value={option.value}>{option.label}</option>
+                        ))}
+                      </select>
+                      <label className="inline-flex h-7 items-center gap-1 rounded border border-white/10 bg-white/[0.045] px-1.5 text-[10px] text-white/55">
+                        <input
+                          type="checkbox"
+                          checked={plannedRow.needsSubject}
+                          onChange={(event) => patchRowPlan(row.index, { needsSubject: event.target.checked })}
+                          className="h-3 w-3 accent-cyan-300"
+                        />
+                        人物
+                      </label>
+                      <label className="inline-flex h-7 items-center gap-1 rounded border border-white/10 bg-white/[0.045] px-1.5 text-[10px] text-white/55">
+                        <input
+                          type="checkbox"
+                          checked={plannedRow.needsProduct}
+                          onChange={(event) => patchRowPlan(row.index, { needsProduct: event.target.checked })}
+                          className="h-3 w-3 accent-cyan-300"
+                        />
+                        产品
+                      </label>
+                    </div>
+                    <textarea
+                      value={plannedRow.visualPlan}
+                      onChange={(event) => patchRowPlan(row.index, { visualPlan: event.target.value })}
+                      placeholder="画面规划"
+                      className="min-h-[42px] w-full resize-y rounded border border-white/10 bg-black/32 px-2 py-1.5 text-[10.5px] leading-snug text-white/76 outline-none placeholder:text-white/25 focus:border-cyan-300/50"
+                    />
+                    <div className="grid gap-1 md:grid-cols-2">
+                      <textarea
+                        value={plannedRow.firstFramePlan}
+                        onChange={(event) => patchRowPlan(row.index, { firstFramePlan: event.target.value })}
+                        placeholder="首帧：视频开始画面"
+                        className="min-h-[48px] w-full resize-y rounded border border-emerald-300/12 bg-emerald-300/[0.04] px-2 py-1.5 text-[10.5px] leading-snug text-emerald-50/78 outline-none placeholder:text-white/25 focus:border-emerald-300/50"
+                      />
+                      <textarea
+                        value={plannedRow.lastFramePlan}
+                        onChange={(event) => patchRowPlan(row.index, { lastFramePlan: event.target.value })}
+                        placeholder="尾帧：视频结束画面"
+                        className="min-h-[48px] w-full resize-y rounded border border-cyan-300/12 bg-cyan-300/[0.04] px-2 py-1.5 text-[10.5px] leading-snug text-cyan-50/78 outline-none placeholder:text-white/25 focus:border-cyan-300/50"
+                      />
+                    </div>
+                    <textarea
+                      value={plannedRow.productPlacement}
+                      onChange={(event) => patchRowPlan(row.index, { productPlacement: event.target.value })}
+                      placeholder="产品出现方式：不出现 / 首帧出现 / 尾帧出现 / 全程佩戴 / 产品特写"
+                      className="min-h-[38px] w-full resize-y rounded border border-white/10 bg-black/32 px-2 py-1.5 text-[10.5px] leading-snug text-white/68 outline-none placeholder:text-white/25 focus:border-rose-300/45"
+                    />
+                    <div className="flex items-center justify-between gap-2 text-[10px] text-white/34">
+                      <span>{plannedRow.needsProduct ? `将自动挑选 ${selectedProductCount || 0} 张产品参考图` : "本条不传产品图"}</span>
+                      <button
+                        type="button"
+                        onClick={() => patchRowPlan(row.index, visualModeDefaults(plannedRow.visualMode))}
+                        className="rounded border border-white/10 px-1.5 py-0.5 text-white/42 transition hover:border-white/25 hover:text-white/72"
+                      >
+                        重置类型
+                      </button>
+                    </div>
+                  </div>
                </StoryboardPlanCell>

                <StoryboardPlanCell label="生成视频" className="xl:border-r-0">
@@ -2504,7 +2703,7 @@ function AudioStoryboardPlanPanel({
                  </div>
                  <button
                    type="button"
-                    onClick={() => generateRowVideo(row, referenceFrame)}
+                    onClick={() => generateRowVideo(plannedRow, referenceFrame)}
                    disabled={!referenceFrame || !onGenerateVideo || generating}
                    className="mt-1.5 inline-flex h-8 w-full items-center justify-center gap-1 rounded-md bg-white px-2 text-[11px] font-semibold text-black transition hover:bg-white/90 disabled:cursor-not-allowed disabled:opacity-40"
                  >
--- a/web/lib/api.ts
+++ b/web/lib/api.ts
@@ -118,6 +118,12 @@ export interface StoryboardScene {
  last_image?: ImageRef | null
  product_images?: ImageRef[]
  product_fusion_shots?: ProductFusionShot[]
+  visual_mode?: "person_only" | "person_product" | "product_only" | "environment"
+  needs_product?: boolean
+  needs_subject?: boolean
+  first_frame_plan?: string
+  last_frame_plan?: string
+  product_placement?: string
  subject_image?: ImageRef | null
  scene_image?: ImageRef | null
  product_image?: ImageRef | null
@@ -559,7 +565,7 @@ export interface ProductRefStateItem {
  landmarks: string[]
  note: string
  risk: string
-  source: "upload" | "ai"
+  source: "upload" | "ai" | "library"
  assetMeta?: ImageRef["asset_meta"]
  confidence?: number
 }