feat: use subject brief for endpoint frames

2026-05-18 19:33:16 +08:00
parent 33c3aef669
commit adf8b2ba0a
4 changed files with 239 additions and 66 deletions
--- a/api/main.py
+++ b/api/main.py
@@ -339,6 +339,7 @@ class StoryboardScene(BaseModel):
    visual_mode: Literal["person_only", "person_product", "product_only", "environment"] = "person_product"
    needs_product: bool = True
    needs_subject: bool = True
+    subject_brief: str = ""
    first_frame_plan: str = ""
    last_frame_plan: str = ""
    product_placement: str = ""
@@ -532,6 +533,7 @@ class KeyElement(BaseModel):
    cutout_background: Literal["white", "black"] = "white"
    subject_kind: SubjectKind = "object"
    subject_assets: list[SubjectAsset] = Field(default_factory=list)
+    subject_consensus_brief: str = ""
    created_at: float = 0.0


@@ -3014,6 +3016,19 @@ def _describe_subject_template_from_images(name: str, subject_style: str, image_
    return _vision_brief_from_images(image_paths, prompt, max_images=10)


+def _describe_subject_consensus_from_images(name: str, subject_style: str, image_paths: list[Path], note: str = "") -> str:
+    prompt = (
+        f"You are extracting the stable character bible from a generated SKG subject view pack named '{name}'. "
+        f"Subject style: {subject_style}. User/profile note: {note[:700]}. "
+        "These images are multiple views of ONE generated subject. Summarize the reusable identity as text for future first/last-frame generation. "
+        "Do NOT identify a real person and do NOT mention exact facial identity. "
+        "Output strict JSON only with keys: gender_presentation, age_range, body_proportion, hair, skin_tone, "
+        "wardrobe_or_material_style, pose_language, camera_readability, neck_shoulder_readiness, commercial_mood, brief. "
+        "The brief should be 90-160 words, describe one consistent subject, and explicitly allow new poses, new framing, new expressions, and new environments while preserving identity, proportions, material/style, and ad role."
+    )
+    return _vision_brief_from_images(image_paths, prompt, max_images=10)
+
+
 # ---------- API 路由 ----------

 class CreateJobReq(BaseModel):
@@ -3934,6 +3949,7 @@ class UpdateElementReq(BaseModel):
    name_zh: str | None = None
    name_en: str | None = None
    position: str | None = None
+    subject_consensus_brief: str | None = None


 class GenerateSceneAssetReq(BaseModel):
@@ -3943,6 +3959,7 @@ class GenerateSceneAssetReq(BaseModel):
    scene_style: SceneStyle = "source"
    asset_role: SceneAssetRole = "scene"
    prompt: str = ""
+    subject_brief: str = ""
    source_frame_indices: list[int] | None = None
    subject_images: list[dict] = Field(default_factory=list)
    product_images: list[dict] = Field(default_factory=list)
@@ -4107,6 +4124,8 @@ def update_element(job_id: str, idx: int, element_id: str, req: UpdateElementReq
                        e.name_en = req.name_en.strip()
                    if req.position is not None:
                        e.position = req.position.strip()
+                    if req.subject_consensus_brief is not None:
+                        e.subject_consensus_brief = req.subject_consensus_brief.strip()[:2200]
        new_frames.append(f)
    if not found:
        raise HTTPException(404, "element not found")
@@ -4161,20 +4180,14 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
    source_indices = list(dict.fromkeys(source_indices))[:8]
    model_src = src
    sheet_tmp: Path | None = None
-    asset_sheet_tmp: Path | None = None
-    if len(source_indices) > 1:
+    if req.asset_role == "scene" and len(source_indices) > 1:
        sheet_tmp = job_dir(job_id) / "tmp" / f"scene_refs_{idx:03d}_{uuid.uuid4().hex[:6]}.jpg"
        sheet = _make_reference_contact_sheet(job_id, source_indices, sheet_tmp)
        if sheet:
            model_src = sheet
-    subject_ref_paths = [p for p in (storyboard_ref_path(job_id, r) for r in req.subject_images[:8]) if p and p.exists()]
-    product_ref_paths = [p for p in (storyboard_ref_path(job_id, r) for r in req.product_images[:6]) if p and p.exists()]
-    asset_ref_paths = [*subject_ref_paths, *product_ref_paths]
-    if req.asset_role != "scene" and asset_ref_paths:
-        asset_sheet_tmp = job_dir(job_id) / "tmp" / f"endpoint_refs_{idx:03d}_{uuid.uuid4().hex[:6]}.jpg"
-        asset_sheet = _make_paths_contact_sheet(asset_ref_paths, asset_sheet_tmp, max_items=10)
-        if asset_sheet:
-            model_src = asset_sheet
+    # Endpoint frames deliberately ignore subject image references. Character identity comes
+    # from subject_brief text, while only 1-2 product images remain hard visual truth.
+    product_ref_paths = [p for p in (storyboard_ref_path(job_id, r) for r in req.product_images[:2]) if p and p.exists()]

    confirmed_subjects = [
        (e.name_en or e.name_zh).strip()
@@ -4195,12 +4208,13 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
        if confirmed_subjects
        else "Remove the main foreground subject from the frame if present. "
    )
-    identity_clause = (
-        f"Use the generated subject asset references as the primary character identity lock ({len(subject_ref_paths)} image(s)); preserve the subject type, material, proportions, style, age/gender presentation, pose vocabulary, and ad-friendly identity exactly as shown in those selected views. "
-        if subject_ref_paths
-        else (
-            "No generated subject reference was provided for this endpoint. Do not add a main character unless the user scene direction explicitly asks for one. "
-        )
+    subject_brief = req.subject_brief.strip()
+    subject_brief_clause = (
+        f"Subject identity (text only, no image reference): {subject_brief[:1800]}. "
+        "Maintain this identity across this and other endpoint frames in the same storyboard. "
+        "Vary pose, framing, expression, gesture, camera distance, and environment freely according to the user prompt; do not fall back to any specific reference photo or ID-card pose. "
+        if subject_brief
+        else "No subject identity brief was provided. Do not add a main character unless the user scene direction explicitly asks for one. "
    )
    mode_clause = {
        "remove_subject": (
@@ -4229,9 +4243,14 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
        if user_prompt
        else ""
    )
-    if req.asset_role != "scene" and asset_ref_paths:
+    if req.asset_role != "scene" and product_ref_paths:
        reference_clause = (
-            f"Use the provided asset contact sheet as the primary visual reference: {len(subject_ref_paths)} generated subject image(s) and {len(product_ref_paths)} SKG product image(s). "
+            f"Use the provided {len(product_ref_paths)} SKG product image(s) only as rigid product reference. "
+            "Do not use the original keyframe as the first/last-frame truth; it is only a storage anchor for this row. No subject image reference is attached. "
+        )
+    elif req.asset_role != "scene":
+        reference_clause = (
+            "No image reference is attached for this endpoint frame. Generate from text only. "
            "Do not use the original keyframe as the first/last-frame truth; it is only a storage anchor for this row. "
        )
    else:
@@ -4241,18 +4260,14 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
            else "Use the provided frame as the primary visual reference. "
        )
    product_asset_clause = (
-        "Use the provided SKG product references as the rigid product truth when the user prompt asks for product presence: a white U-shaped neck-and-shoulder wearable massage device worn around the neck/shoulders, not headphones, a collar pillow, skincare, food, or a medical prop. Keep product scale believable, preserve left/right asymmetry, side thickness, inner contact pads, buttons, white material, and real wearable placement. "
+        "The provided product image(s) are the only product truth. The product is a white U-shaped neck-and-shoulder wearable massage device worn around the neck/shoulders, not headphones, a collar pillow, skincare, food, or a medical prop. Do not vary left/right asymmetry, button placement, contact pad position, side thickness, opening direction, inner/outer shell relationship, or wearable scale relative to the human neck. Preserve all structural details exactly while integrating it into the new scene. "
        if product_ref_paths
        else "Do not invent a random product. Only include an SKG product if the user prompt explicitly asks for it. "
    )
    subject_asset_clause = (
-        TRANSPARENT_HUMAN_POSITIVE_PROMPT + " "
-        + TRANSPARENT_HUMAN_NEGATIVE_PROMPT + " "
-        + "If the selected subject references are transparent humanoid assets, keep the same friendly transparent or translucent human character: glass/acrylic/vinyl-like transparent outer body, visible clean white skeleton inside, clean commercial wellness style, non-horror. "
-        + "If the selected subject references are normal actor assets, keep them as a normal believable commercial actor and do not convert them into a transparent skeleton. "
-        + "Use the selected subject views only to understand identity, proportions, material, pose vocabulary, camera language, and lighting; do not copy watermarks, subtitles, platform UI, logos, or accidental artifacts. "
-        if subject_ref_paths
-        else "No main character should be generated unless the user scene direction explicitly requires one; product-only and environment-only frames should stay product-only or scene-only. "
+        (TRANSPARENT_HUMAN_POSITIVE_PROMPT + " " + TRANSPARENT_HUMAN_NEGATIVE_PROMPT + " ")
+        if subject_brief and ("透明" in subject_brief or "transparent" in subject_brief.lower() or "skeleton" in subject_brief.lower())
+        else ""
    )
    if req.asset_role == "scene":
        prompt = (
@@ -4275,7 +4290,7 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
        prompt = (
            "Create one premium 9:16 high-definition video endpoint frame from text direction. "
            + role_clause
-            + identity_clause
+            + subject_brief_clause
            + reference_clause
            + user_prompt_clause
            + style_clause + " "
@@ -4288,9 +4303,17 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
    try:
        if req.asset_role == "scene":
            img_bytes, _mode = _image_edit_call(model_src, prompt, models=models, fallback_text=False, max_attempts=3, max_side=1280)
-        elif asset_ref_paths:
-            img_bytes, _mode = _image_edit_call(model_src, prompt, models=models, fallback_text=False, max_attempts=3, max_side=1600)
+        elif product_ref_paths:
+            print(
+                f"[scene asset] role={req.asset_role} endpoint=/images/edits product_refs={len(product_ref_paths)} subject_refs=0 contact_sheet=0 model={GPT_IMAGE_MODEL}",
+                flush=True,
+            )
+            img_bytes, _mode = _image_edit_call(product_ref_paths, prompt, models=models, fallback_text=False, max_attempts=3, max_side=1600)
        else:
+            print(
+                f"[scene asset] role={req.asset_role} endpoint=/images/generations product_refs=0 subject_refs=0 contact_sheet=0 model={GPT_IMAGE_MODEL}",
+                flush=True,
+            )
            img_bytes, _mode = _image_text_call(prompt, models=models, max_attempts=3)
    except RuntimeError as e:
        raise HTTPException(500, f"{req.asset_role} asset failed: {e}")
@@ -4298,9 +4321,6 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
        if sheet_tmp and sheet_tmp.exists():
            try: sheet_tmp.unlink()
            except OSError: pass
-        if asset_sheet_tmp and asset_sheet_tmp.exists():
-            try: asset_sheet_tmp.unlink()
-            except OSError: pass

    asset_id = f"scene_{idx:03d}_{uuid.uuid4().hex[:8]}"
    out_path = job_dir(job_id) / "assets" / f"{asset_id}.jpg"
@@ -4451,6 +4471,7 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
    similar_mode = req.reconstruction_mode == "similar"
    character_reference_paths: list[Path] = []
    template_brief_clause = ""
+    selected_template_brief = ""
    character_label = ""
    subject_template_id = (req.subject_template_id or "").strip()
    character_id = (req.character_id or "").strip()
@@ -4462,6 +4483,7 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
        brief = template.prompt_brief.strip() or template.note.strip() or template.description.strip()
        if similar_mode and not brief:
            brief = _describe_subject_template_from_images(template.name, template.subject_style, template_paths, template.note)
+        selected_template_brief = brief.strip()
        template_brief_clause = (
            f"Reference character brief from saved database template '{template.name}': {brief}. "
            "Use this as a high-quality creative direction and identity bible only; do not copy a face, exact pose, pixels, file artifacts, labels, or accidental defects. "
@@ -4474,6 +4496,7 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
        character_label = character.name
        character_reference_paths.extend(character_library_file(image.filename) for image in character.images[:7])
        brief = character.prompt_brief.strip() or character.description.strip()
+        selected_template_brief = brief.strip()
        template_brief_clause = (
            f"Reference character brief from built-in creative character '{character.name}': {brief}. "
            "Use this planned character brief as a high-quality creative direction and anatomy/style bible only; "
@@ -4672,7 +4695,36 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
                            if old_asset.view in replaced_views:
                                _delete_subject_asset_file(job_id, old_asset.id)
                        current_assets = [asset for asset in current_assets if asset.view not in replaced_views]
-                    e.subject_assets = current_assets + generated
+                    final_assets = current_assets + generated
+                    e.subject_assets = final_assets
+                    if req.subject_kind == "living":
+                        current_brief = (e.subject_consensus_brief or "").strip()
+                        should_refresh_brief = bool(selected_template_brief) or not current_brief or len(generated) >= 3
+                        if should_refresh_brief:
+                            fallback_parts = [
+                                selected_template_brief,
+                                (req.subject_profile.resolved_summary if req.subject_profile else ""),
+                                source_subject_brief,
+                                prompt_extra,
+                            ]
+                            fallback_brief = " ".join(part.strip() for part in fallback_parts if part and part.strip())[:1800]
+                            if selected_template_brief:
+                                e.subject_consensus_brief = selected_template_brief[:1800]
+                            else:
+                                asset_paths = [
+                                    job_dir(job_id) / "assets" / f"{asset.id}.jpg"
+                                    for asset in final_assets[:10]
+                                    if asset.id
+                                ]
+                                brief = _describe_subject_consensus_from_images(
+                                    e.name_zh or e.name_en or "generated subject",
+                                    req.subject_style,
+                                    asset_paths,
+                                    fallback_brief,
+                                )
+                                e.subject_consensus_brief = brief or current_brief or fallback_brief or (
+                                    "Generated SKG ad subject; identity brief unavailable. Keep one consistent commercial subject with clear neck and shoulder placement area."
+                                )
        new_frames.append(f)
    if generation_errors:
        msg = f"主体资产包部分生成完成 · {el.name_zh} · {len(generated)} 张，失败 {len(generation_errors)} 张"
--- a/docs/source-analysis.html
+++ b/docs/source-analysis.html
--- a/web/components/ad-recreation-board.tsx
+++ b/web/components/ad-recreation-board.tsx
@@ -51,6 +51,7 @@ import {
  saveProductRefs,
  sourceAudioUrl,
  subjectTemplateImageUrl,
+  updateElement,
  updateStoryboard,
  uploadStoryboardAsset,
  videoUrl,
@@ -118,7 +119,7 @@ type AudioStoryboardRow = {
 }

 type ProductRefItem = ProductRefStateItem
-type SubjectPlanningRef = ImageRef & { view: string; roleHint: string }
+type SubjectPlanningRef = ImageRef & { view: string; roleHint: string; consensusBrief?: string }
 type SubjectStyleMode = "transparent_human" | "source_actor"
 type SubjectMode = "template" | "source_similar"
 type SubjectViewMode = "all" | "common" | "custom"
@@ -301,6 +302,7 @@ const PRODUCT_VIEW_SLOTS = [
 ] as const

 const MAX_PRODUCT_REFS_PER_VIDEO = 6
+const MAX_PRODUCT_REFS_PER_ENDPOINT = 2
 const MAX_SUBJECT_REFS_PER_ENDPOINT = 5

 const PRODUCT_BACKGROUND_LABELS: Record<string, string> = {
@@ -1235,8 +1237,30 @@ function productPriorityForRow(row: AudioStoryboardRow) {
  }
 }

-function scoreProductItemForRow(row: AudioStoryboardRow, item: ProductRefItem, index: number) {
-  const priority = productPriorityForRow(row)
+function endpointProductPriority(row: AudioStoryboardRow, role?: "first_frame" | "last_frame") {
+  const text = `${row.role} ${row.visualMode} ${row.visualPlan} ${row.firstFramePlan} ${row.lastFramePlan} ${row.productIntegration} ${row.productPlacement} ${role ?? ""}`.toLowerCase()
+  const views = ["front"]
+  const tags = ["hero_packshot", "wearing_scale"]
+  const add = (view: string, tag?: string) => {
+    if (!views.includes(view)) views.push(view)
+    if (tag && !tags.includes(tag)) tags.push(tag)
+  }
+  if (/后颈|肩背|背面|背部|后背|上背|尾帧|佩戴完成|贴合完成/.test(text)) add("back_bottom", "back_bottom")
+  if (/侧面|侧身|厚度|侧厚|体积|左侧|右侧|45|调整|拿起|靠近肩颈/.test(text)) add("side_thickness", "side_thickness")
+  if (/内侧|触点|按摩头|贴颈|接触|皮肤接触/.test(text)) add("inner_contacts", "inner_contact")
+  if (/佩戴比例|上身|真人佩戴|脖子|肩颈|锁骨/.test(text)) add("left_45", "wearing_scale")
+  if (/按键|按钮|控制|开关|logo/.test(text)) add("right_45", "button_detail")
+  return { views, tags }
+}
+
+function endpointProductMaxForRow(row: AudioStoryboardRow, role?: "first_frame" | "last_frame") {
+  const text = `${row.visualPlan} ${row.firstFramePlan} ${row.lastFramePlan} ${row.productIntegration} ${row.productPlacement} ${role ?? ""}`.toLowerCase()
+  return /侧面|侧身|厚度|侧厚|后颈|肩背|背面|背部|内侧|触点|按摩头|贴颈|特写|近景|按键|按钮|佩戴完成|上背/.test(text)
+    ? MAX_PRODUCT_REFS_PER_ENDPOINT
+    : 1
+}
+
+function scoreProductItem(row: AudioStoryboardRow, item: ProductRefItem, index: number, priority: { views: string[]; tags: string[] }) {
  const viewRank = priority.views.indexOf(item.view)
  const tagScore = item.useTags.reduce((sum, tag) => {
    const rank = priority.tags.indexOf(tag)
@@ -1249,20 +1273,26 @@ function scoreProductItemForRow(row: AudioStoryboardRow, item: ProductRefItem, i
  return (viewRank >= 0 ? 30 - viewRank * 4 : 0) + tagScore + backgroundScore + riskScore + confidenceScore + rotationScore
 }

-function selectProductItemsForRow(row: AudioStoryboardRow, items: ProductRefItem[]) {
+function selectProductItemsForRow(
+  row: AudioStoryboardRow,
+  items: ProductRefItem[],
+  mode: "video" | "endpoint" = "video",
+  role?: "first_frame" | "last_frame",
+) {
  if (!items.length) return []
  const picked: ProductRefItem[] = []
  const pickedIds = new Set<string>()
+  const maxItems = mode === "endpoint" ? endpointProductMaxForRow(row, role) : MAX_PRODUCT_REFS_PER_VIDEO
+  const priority = mode === "endpoint" ? endpointProductPriority(row, role) : productPriorityForRow(row)
  const add = (item?: ProductRefItem) => {
-    if (!item || pickedIds.has(item.id) || picked.length >= MAX_PRODUCT_REFS_PER_VIDEO) return
+    if (!item || pickedIds.has(item.id) || picked.length >= maxItems) return
    picked.push(item)
    pickedIds.add(item.id)
  }

-  const priority = productPriorityForRow(row)
  for (const view of priority.views) {
    const matches = items
-      .map((item, index) => ({ item, score: scoreProductItemForRow(row, item, index) }))
+      .map((item, index) => ({ item, score: scoreProductItem(row, item, index, priority) }))
      .filter(({ item }) => item.view === view)
      .sort((a, b) => b.score - a.score)
    add(matches[0]?.item)
@@ -1270,14 +1300,14 @@ function selectProductItemsForRow(row: AudioStoryboardRow, items: ProductRefItem

  for (const tag of priority.tags) {
    const matches = items
-      .map((item, index) => ({ item, score: scoreProductItemForRow(row, item, index) }))
+      .map((item, index) => ({ item, score: scoreProductItem(row, item, index, priority) }))
      .filter(({ item }) => item.useTags.includes(tag))
      .sort((a, b) => b.score - a.score)
    add(matches[0]?.item)
  }

  const ranked = items
-    .map((item, index) => ({ item, score: scoreProductItemForRow(row, item, index) }))
+    .map((item, index) => ({ item, score: scoreProductItem(row, item, index, priority) }))
    .sort((a, b) => b.score - a.score)
  for (const { item } of ranked) {
    add(item)
@@ -1366,9 +1396,19 @@ function subjectAssetRefsForPlanning(source: { frame: KeyFrame; element: KeyElem
    label: asset.label || asset.view || "相似主体视图",
    view: asset.view,
    roleHint: subjectViewRoleHint(asset.view),
+    consensusBrief: source.element.subject_consensus_brief || "",
  }))
 }

+function subjectBriefForEndpoint(row: AudioStoryboardRow, refs: SubjectPlanningRef[]) {
+  const storedBrief = refs.find((ref) => ref.consensusBrief?.trim())?.consensusBrief?.trim()
+  if (storedBrief) return storedBrief
+  const manualBrief = row.subjectDescription.trim()
+  if (manualBrief) return manualBrief
+  if (row.needsSubject) return subjectDescriptionForRow(row, refs)
+  return ""
+}
+
 function endpointAssetRef(frame: KeyFrame | null, role: "first_frame" | "last_frame"): ImageRef | null {
  if (!frame) return null
  const key = role === "first_frame" ? "first_image" : "last_image"
@@ -1387,12 +1427,10 @@ function endpointAssetRef(frame: KeyFrame | null, role: "first_frame" | "last_fr
  }
 }

-function buildEndpointFramePrompt(row: AudioStoryboardRow, role: "first_frame" | "last_frame", selectedProductItems: ProductRefItem[], subjectRefs: SubjectPlanningRef[]) {
+function buildEndpointFramePrompt(row: AudioStoryboardRow, role: "first_frame" | "last_frame", selectedProductItems: ProductRefItem[], subjectBrief: string) {
  const target = role === "first_frame" ? row.firstFramePlan : row.lastFramePlan
  const opposite = role === "first_frame" ? row.lastFramePlan : row.firstFramePlan
  const productNotes = selectedProductItems.length ? productReferenceNotes(selectedProductItems) : ""
-  const subjectNotes = subjectRefs.length ? subjectReferenceNotes(subjectRefs) : ""
-  const subjectDescription = subjectDescriptionForRow(row, subjectRefs)
  return [
    `分镜 ${row.index + 1} ${role === "first_frame" ? "首帧" : "尾帧"}。`,
    `新口播文案：${row.skgCopy}`,
@@ -1401,10 +1439,10 @@ function buildEndpointFramePrompt(row: AudioStoryboardRow, role: "first_frame" |
    `另一端画面用于连续性参考：${opposite}`,
    `画面规划：${row.visualPlan}`,
    row.needsSubject
-      ? `人物主体：${subjectDescription} 必须使用已生成的相似主体白底视图作为人物真源；本次只选择 ${subjectRefs.length} 张最符合镜头需求的主体视角：${subjectNotes}。不要回到原视频关键帧复刻人物。`
+      ? `人物主体 brief：${subjectBrief || "主体 brief 暂缺，请保持一个统一的商业广告主体，肩颈区域清晰可佩戴产品。"}。主体只依据这段文字身份描述，不上传主体参考图；可以根据本镜头自由改变动作、景别、表情和环境，但不能换成另一个人设。不要回到原视频关键帧复刻人物。`
      : "本条不需要主角人物；如出现人物，只能是局部手部、背影或环境人物，不要生成透明骨架主角。",
    row.needsProduct
-      ? `产品融入：${row.productPlacement}。${row.productIntegration}。已提供 ${selectedProductItems.length} 张同一 SKG 肩颈按摩仪产品参考；${productNotes}。产品是套在脖子上的 U 形肩颈按摩仪，必须保持真实佩戴大小、左右非对称和贴颈位置。`
+      ? `产品融入：${row.productPlacement}。${row.productIntegration}。本次只提供 ${selectedProductItems.length} 张同一 SKG 肩颈按摩仪产品硬参考；${productNotes}。产品是套在脖子上的 U 形肩颈按摩仪，必须保持真实佩戴大小、左右非对称、按键、触点、厚度和贴颈位置。`
      : "本条不露出产品，不要强行生成 SKG 产品、包装、白底图或随机商品。",
    "输出一张单独的 9:16 高清首/尾帧，不要拼图，不要字幕，不要平台 UI，不要水印。画面要能作为后续视频生成的明确起止帧。",
  ].join("\n")
@@ -1422,6 +1460,7 @@ function buildStoryboardSceneFromAudioRow(
  const notes = productReferenceNotes(selectedProductItems)
  const subjectDescription = subjectDescriptionForRow(row, subjectRefs)
  const subjectNotes = subjectReferenceNotes(subjectRefs)
+  const subjectBrief = subjectBriefForEndpoint(row, subjectRefs)
  const productGuidance = !row.needsProduct
    ? "本条规划为不露出产品或不把产品作为画面主体；视频生成时不要硬插 SKG 产品、包装、白底图或错误商品。"
    : productItems.length
@@ -1434,6 +1473,7 @@ function buildStoryboardSceneFromAudioRow(
    visual_mode: row.visualMode,
    needs_product: row.needsProduct,
    needs_subject: row.needsSubject,
+    subject_brief: row.needsSubject ? subjectBrief : "",
    first_frame_plan: row.firstFramePlan,
    last_frame_plan: row.lastFramePlan,
    product_placement: row.productPlacement,
@@ -2353,6 +2393,8 @@ function SourceReferenceBuildPanel({
  const [templateSaveBusy, setTemplateSaveBusy] = useState(false)
  const [templateDraftName, setTemplateDraftName] = useState("")
  const [templateDraftNote, setTemplateDraftNote] = useState("")
+  const [subjectBriefDraft, setSubjectBriefDraft] = useState("")
+  const [subjectBriefBusy, setSubjectBriefBusy] = useState(false)
  const frames = useMemo(() => [...job.frames].sort((a, b) => a.timestamp - b.timestamp), [job.frames])
  const selectedReferenceFrames = useMemo(
    () => frames.filter((frame) => selectedFrames.has(frame.index)),
@@ -2423,6 +2465,10 @@ function SourceReferenceBuildPanel({
  const generationCtaLabel = subjectMode === "template"
    ? `用模板生成 ${selectedSubjectViews.length} 张主体视图`
    : `从源视频创新生成 ${selectedSubjectViews.length} 张主体视图`
+  const currentSubjectBrief = actorSource?.element.subject_consensus_brief?.trim()
+    || selectedSubjectTemplate?.prompt_brief?.trim()
+    || selectedCharacter?.prompt_brief?.trim()
+    || ""

  const buildSubjectProfileForRequest = () => {
    if (subjectProfileMode === "random") {
@@ -2468,6 +2514,10 @@ function SourceReferenceBuildPanel({
    setLastSubjectProfile(null)
  }, [job.id])

+  useEffect(() => {
+    setSubjectBriefDraft(currentSubjectBrief)
+  }, [actorSource?.element.id, currentSubjectBrief])
+
  const generateSimilarActor = async () => {
    if (!frames.length) {
      toast.warning("请先自动抽帧 12 张，或在原版视频上手动补帧。")
@@ -2584,6 +2634,25 @@ function SourceReferenceBuildPanel({
    }
  }

+  const saveSubjectBriefDraft = async () => {
+    if (!actorSource) {
+      toast.warning("先生成本次主体视图，才能把 brief 绑定到主体元素。")
+      return
+    }
+    setSubjectBriefBusy(true)
+    try {
+      const updated = await updateElement(job.id, actorSource.frame.index, actorSource.element.id, {
+        subject_consensus_brief: subjectBriefDraft.trim(),
+      })
+      onJobUpdate(updated)
+      toast.success("主体 brief 已保存，后续首尾帧会使用这段文字依据")
+    } catch (e) {
+      toast.error("主体 brief 保存失败：" + (e instanceof Error ? e.message : String(e)))
+    } finally {
+      setSubjectBriefBusy(false)
+    }
+  }
+
  const saveGeneratedSubjectTemplate = async () => {
    if (!actorSource || !visibleActorAssets.length) {
      toast.warning("请先生成相似主体视图。")
@@ -2772,6 +2841,29 @@ function SourceReferenceBuildPanel({
            </button>
          </div>
          <div className="mt-1 text-[9px] text-white/32">{templateSaveHint}</div>
+          <div className="mt-2 rounded-md border border-white/10 bg-black/24 p-2">
+            <div className="mb-1 flex items-center justify-between gap-2">
+              <span className="text-[9.5px] font-semibold text-white/48">主体 brief 预览 / 首尾帧文字依据</span>
+              <button
+                type="button"
+                onClick={() => void saveSubjectBriefDraft()}
+                disabled={!actorSource || subjectBriefBusy || !subjectBriefDraft.trim()}
+                className="inline-flex h-6 items-center gap-1 rounded border border-white/10 bg-white/[0.045] px-2 text-[9.5px] font-semibold text-white/52 transition hover:border-cyan-300/35 hover:text-cyan-100 disabled:cursor-not-allowed disabled:opacity-35"
+              >
+                {subjectBriefBusy ? <Loader2 className="h-3 w-3 animate-spin" /> : <Check className="h-3 w-3" />}
+                保存 brief
+              </button>
+            </div>
+            <textarea
+              value={subjectBriefDraft}
+              onChange={(event) => setSubjectBriefDraft(event.target.value)}
+              placeholder="生成主体视图后，后端会用视觉模型反推出主体 brief；这里也会显示所选模板的 prompt_brief。"
+              className="min-h-[58px] w-full resize-y rounded border border-white/10 bg-black/35 px-2 py-1.5 text-[10px] leading-snug text-white/68 outline-none placeholder:text-white/25 focus:border-cyan-300/45"
+            />
+            <div className="mt-1 text-[9px] text-white/30">
+              首尾帧后续只使用这段主体 brief，不再把 5 张主体图拼成参考图上传；产品图仍作为结构硬参考。
+            </div>
+          </div>
        </div>

        <div className="mt-2 rounded-md border border-white/10 bg-black/28 p-2.5">
@@ -3324,15 +3416,12 @@ function AudioStoryboardPlanPanel({
    if (!job || !frame) return
    const plannedRow = { ...planForRow(row, frame), skgCopy: copyForRow(row) }
    const selectedSubjectRefs = plannedRow.needsSubject ? selectSubjectRefsForRow(plannedRow, subjectRefs, role) : []
-    if (plannedRow.needsSubject && !selectedSubjectRefs.length) {
-      toast.warning("先在上方生成相似主体白底视图，再生成首尾帧")
-      return
-    }
+    const subjectBrief = plannedRow.needsSubject ? subjectBriefForEndpoint(plannedRow, subjectRefs) : ""
    if (plannedRow.needsProduct && !productItems.length) {
      toast.warning("本条需要产品，请先上传并识别产品素材池")
      return
    }
-    const selectedProductItems = plannedRow.needsProduct ? selectProductItemsForRow(plannedRow, productItems) : []
+    const selectedProductItems = plannedRow.needsProduct ? selectProductItemsForRow(plannedRow, productItems, "endpoint", role) : []
    const busyKey = `${row.index}:${role}`
    setEndpointFrameBusy(busyKey)
    try {
@@ -3342,8 +3431,8 @@ function AudioStoryboardPlanPanel({
        scene_mode: "similar",
        scene_style: "premium_product",
        asset_role: role,
-        prompt: buildEndpointFramePrompt(plannedRow, role, selectedProductItems, selectedSubjectRefs),
-        subject_images: selectedSubjectRefs,
+        prompt: buildEndpointFramePrompt(plannedRow, role, selectedProductItems, subjectBrief),
+        subject_brief: subjectBrief,
        product_images: selectedProductItems.map((item) => item.ref),
        source_frame_indices: [],
      })
@@ -3568,8 +3657,8 @@ function AudioStoryboardPlanPanel({
            const rowVideos = videosForFrame(referenceFrame)
            const savingStoryboard = storyboardSaveBusyRow === row.index
            const copyText = copyForRow(row)
-            const selectedProductCount = plannedRow.needsProduct ? selectProductItemsForRow(plannedRow, productItems).length : 0
-            const selectedSubjectRefs = plannedRow.needsSubject ? selectSubjectRefsForRow(plannedRow, subjectRefs) : []
+            const selectedProductCount = plannedRow.needsProduct ? selectProductItemsForRow(plannedRow, productItems, "endpoint").length : 0
+            const endpointSubjectBrief = plannedRow.needsSubject ? subjectBriefForEndpoint(plannedRow, subjectRefs) : ""
            return (
              <article
                key={row.index}
@@ -3685,9 +3774,10 @@ function AudioStoryboardPlanPanel({
                        job={job}
                        frame={referenceFrame}
                        role="first_frame"
+                        subjectBrief={endpointSubjectBrief}
                        busy={endpointFrameBusy === `${row.index}:first_frame`}
                        deleting={endpointFrameBusy === `${row.index}:clear_first_frame`}
-                        disabled={!referenceFrame || (plannedRow.needsSubject && !subjectRefs.length) || (plannedRow.needsProduct && !productItems.length)}
+                        disabled={!referenceFrame || (plannedRow.needsProduct && !productItems.length)}
                        onGenerate={() => void generateEndpointFrameForRow(plannedRow, referenceFrame, "first_frame")}
                        onDelete={() => void clearEndpointFrameForRow(plannedRow, referenceFrame, "first_frame")}
                      />
@@ -3695,16 +3785,17 @@ function AudioStoryboardPlanPanel({
                        job={job}
                        frame={referenceFrame}
                        role="last_frame"
+                        subjectBrief={endpointSubjectBrief}
                        busy={endpointFrameBusy === `${row.index}:last_frame`}
                        deleting={endpointFrameBusy === `${row.index}:clear_last_frame`}
-                        disabled={!referenceFrame || (plannedRow.needsSubject && !subjectRefs.length) || (plannedRow.needsProduct && !productItems.length)}
+                        disabled={!referenceFrame || (plannedRow.needsProduct && !productItems.length)}
                        onGenerate={() => void generateEndpointFrameForRow(plannedRow, referenceFrame, "last_frame")}
                        onDelete={() => void clearEndpointFrameForRow(plannedRow, referenceFrame, "last_frame")}
                      />
                    </div>
                    <div className="flex items-center justify-between gap-2 text-[10px] text-white/34">
-                      <span title={selectedSubjectRefs.map((ref) => ref.label || subjectViewLabel(ref.view)).join(" / ")}>
-                        {plannedRow.needsSubject ? `主体参考 ${selectedSubjectRefs.length}/${subjectRefs.length} 张` : "本条不传主体"} · {plannedRow.needsProduct ? `产品参考 ${selectedProductCount || 0} 张` : "本条不传产品图"}
+                      <span title={endpointSubjectBrief || "本条不传主体 brief"}>
+                        {plannedRow.needsSubject ? "依据：主体 brief" : "本条不传主体"} · {plannedRow.needsProduct ? `${selectedProductCount || 0} 张产品参考` : "本条不传产品图"}
                      </span>
                      <button
                        type="button"
@@ -3926,6 +4017,7 @@ function EndpointFrameSlot({
  job,
  frame,
  role,
+  subjectBrief,
  busy,
  deleting,
  disabled,
@@ -3935,6 +4027,7 @@ function EndpointFrameSlot({
  job: Job
  frame: KeyFrame | null
  role: "first_frame" | "last_frame"
+  subjectBrief?: string
  busy: boolean
  deleting?: boolean
  disabled: boolean
@@ -3946,6 +4039,15 @@ function EndpointFrameSlot({
  const label = role === "first_frame" ? "首帧" : "尾帧"
  return (
    <div className="overflow-hidden rounded border border-white/10 bg-black/32">
+      <div className="flex h-6 items-center justify-between gap-1 border-b border-white/10 px-1.5 text-[9.5px] text-white/42">
+        <span>{label}</span>
+        <span
+          title={subjectBrief?.trim() ? subjectBrief : "本条没有主体 brief，生成时只按画面规划和产品参考执行。"}
+          className="inline-flex h-4 w-4 items-center justify-center rounded border border-white/10 bg-white/[0.045] text-white/45"
+        >
+          <Info className="h-3 w-3" />
+        </span>
+      </div>
      <MediaAssetTile
        src={src}
        href={src || undefined}
--- a/web/lib/api.ts
+++ b/web/lib/api.ts
@@ -58,6 +58,7 @@ export interface KeyElement {
  cutout_background?: "white" | "black"
  subject_kind?: SubjectKind
  subject_assets?: SubjectAsset[]
+  subject_consensus_brief?: string
  created_at?: number
 }

@@ -122,6 +123,7 @@ export interface StoryboardScene {
  visual_mode?: "person_only" | "person_product" | "product_only" | "environment"
  needs_product?: boolean
  needs_subject?: boolean
+  subject_brief?: string
  first_frame_plan?: string
  last_frame_plan?: string
  product_placement?: string
@@ -1111,7 +1113,7 @@ export async function updateElement(
  jobId: string,
  frameIdx: number,
  elementId: string,
-  body: { name_zh?: string; name_en?: string; position?: string },
+  body: { name_zh?: string; name_en?: string; position?: string; subject_consensus_brief?: string },
 ): Promise<Job> {
  const res = await fetch(`${API_BASE}/jobs/${jobId}/frames/${frameIdx}/elements/${elementId}`, {
    method: "PATCH",
@@ -1163,6 +1165,7 @@ export async function generateSceneAsset(
    scene_style?: SceneStyle
    asset_role?: SceneAssetRole
    prompt?: string
+    subject_brief?: string
    source_frame_indices?: number[]
    subject_images?: ImageRef[]
    product_images?: ImageRef[]
@@ -1178,6 +1181,7 @@ export async function generateSceneAsset(
      scene_style: body.scene_style ?? "source",
      asset_role: body.asset_role ?? "scene",
      prompt: body.prompt ?? "",
+      subject_brief: body.subject_brief ?? "",
      source_frame_indices: body.source_frame_indices ?? null,
      subject_images: body.subject_images ?? [],
      product_images: body.product_images ?? [],