feat: select subject views for endpoint frames

This commit is contained in:
2026-05-18 13:58:45 +08:00
parent c7c7301c13
commit b5b1e43624
3 changed files with 157 additions and 28 deletions

View File

@@ -96,6 +96,7 @@ type AudioStoryboardRow = {
visualMode: StoryboardVisualMode
needsProduct: boolean
needsSubject: boolean
subjectDescription: string
skgCopy: string
visualPlan: string
firstFramePlan: string
@@ -107,9 +108,10 @@ type AudioStoryboardRow = {
}
type ProductRefItem = ProductRefStateItem
type SubjectPlanningRef = ImageRef & { view: string; roleHint: string }
type SubjectStyleMode = "transparent_human" | "source_actor"
type StoryboardVisualMode = NonNullable<StoryboardScene["visual_mode"]>
type RowPlanPatch = Partial<Pick<AudioStoryboardRow, "visualMode" | "needsProduct" | "needsSubject" | "visualPlan" | "firstFramePlan" | "lastFramePlan" | "productIntegration" | "productPlacement">>
type RowPlanPatch = Partial<Pick<AudioStoryboardRow, "visualMode" | "needsProduct" | "needsSubject" | "subjectDescription" | "visualPlan" | "firstFramePlan" | "lastFramePlan" | "productIntegration" | "productPlacement">>
const VISUAL_MODE_OPTIONS: Array<{ value: StoryboardVisualMode; label: string; description: string }> = [
{ value: "person_only", label: "人物/情绪", description: "只拍人物、状态、痛点或口播,不强制露产品。" },
@@ -156,6 +158,7 @@ const PRODUCT_VIEW_SLOTS = [
] as const
const MAX_PRODUCT_REFS_PER_VIDEO = 6
const MAX_SUBJECT_REFS_PER_ENDPOINT = 5
const PRODUCT_BACKGROUND_LABELS: Record<string, string> = {
white: "白底",
@@ -603,6 +606,17 @@ function buildLastFramePlan(role: string) {
return "动作小幅推进并稳定停住,保留与下一句衔接的方向感。"
}
function buildSubjectDescription(role: string, visualMode: StoryboardVisualMode) {
if (visualMode === "product_only" || visualMode === "environment") return ""
const base = "统一相似主体:透明或半透明皮肤包裹可见白色骨架的人形,广告感、非恐怖、肩颈/锁骨/上背区域清晰,适合佩戴肩颈按摩仪。"
if (role === "开场钩子") return `${base} 正面或半身口播状态,表情有痛点或好奇感,能快速抓住注意。`
if (role === "痛点推进") return `${base} 肩颈紧绷、低头久坐或按揉脖子的状态,重点看清脖子、肩线和上背。`
if (role === "利益证明") return `${base} 产品佩戴或即将佩戴的放松状态,优先肩颈近景、侧面和后颈肩背角度。`
if (role === "方案过渡") return `${base} 手部调整产品或展示佩戴贴合感,人物姿态自然,产品位置不能挡住关键结构。`
if (role === "转化收口") return `${base} 状态稳定、放松、干净收尾,可用正面/三分之二视角或产品佩戴后的稳定状态。`
return `${base} 保持与整片一致的主体身份、材质、体型、性别表现和广告气质。`
}
function buildAudioStoryboardRows(job: Job | null): AudioStoryboardRow[] {
if (!job?.transcript.length) return []
return job.transcript.map((segment, index) => {
@@ -619,6 +633,7 @@ function buildAudioStoryboardRows(job: Job | null): AudioStoryboardRow[] {
visualMode,
needsProduct: defaults.needsProduct,
needsSubject: defaults.needsSubject,
subjectDescription: buildSubjectDescription(role, visualMode),
skgCopy: buildSkgCopy(role, index),
visualPlan: buildVisualPlan(role),
firstFramePlan: buildFirstFramePlan(role),
@@ -833,6 +848,7 @@ function savedScenePatch(scene?: StoryboardScene | null): RowPlanPatch {
visualMode: scene.visual_mode,
needsProduct: scene.needs_product,
needsSubject: scene.needs_subject,
subjectDescription: scene.subject?.split("\n").find((line) => line.trim() && !line.startsWith("主体真源") && !line.startsWith("本条不需要"))?.trim(),
visualPlan: scene.scene?.split("\n").find((line) => line.trim() && !line.startsWith("镜头类型") && !line.startsWith("首帧规划") && !line.startsWith("尾帧规划") && !line.startsWith("原音频依据"))?.trim(),
firstFramePlan: scene.first_frame_plan,
lastFramePlan: scene.last_frame_plan,
@@ -848,6 +864,7 @@ function applyPlanPatch(row: AudioStoryboardRow, patch?: RowPlanPatch): AudioSto
visualMode: patch.visualMode ?? row.visualMode,
needsProduct: patch.needsProduct ?? row.needsProduct,
needsSubject: patch.needsSubject ?? row.needsSubject,
subjectDescription: patch.subjectDescription ?? row.subjectDescription,
visualPlan: patch.visualPlan ?? row.visualPlan,
firstFramePlan: patch.firstFramePlan ?? row.firstFramePlan,
lastFramePlan: patch.lastFramePlan ?? row.lastFramePlan,
@@ -930,7 +947,77 @@ function selectProductItemsForRow(row: AudioStoryboardRow, items: ProductRefItem
return picked
}
function subjectAssetRefsForPlanning(source: { frame: KeyFrame; element: KeyElement } | null): ImageRef[] {
function subjectViewLabel(view: string) {
return SUBJECT_ASSET_VIEWS.find((item) => item.value === view)?.label ?? view
}
function subjectViewRoleHint(view: string) {
const hints: Record<string, string> = {
front: "正面口播、开场、情绪表达、转化收口",
three_quarter_left: "左前45度、口播、佩戴前动作、自然转身",
three_quarter_right: "右前45度、口播、佩戴前动作、自然转身",
left: "左侧、肩颈侧面、佩戴动作、产品厚度与位置",
right: "右侧、肩颈侧面、佩戴动作、产品厚度与位置",
back: "背面、后颈肩背、产品佩戴落位",
bust_front: "肩颈正面近景、痛点表情、佩戴比例",
bust_left_45: "肩颈左前近景、手部调整、佩戴贴合",
bust_right_45: "肩颈右前近景、手部调整、佩戴贴合",
back_neck_detail: "后颈肩背特写、触点位置、产品贴合",
}
return hints[view] ?? "主体参考视角"
}
function subjectDescriptionForRow(row: AudioStoryboardRow, subjectRefs: SubjectPlanningRef[]) {
const trimmed = row.subjectDescription.trim()
if (trimmed) return trimmed
const labels = subjectRefs.slice(0, 4).map((ref) => ref.label || subjectViewLabel(ref.view)).join("、")
return [
"统一相似主体:使用已生成的主体视图作为人物真源,保持同一人物身份、体型、材质、年龄段、性别表现和广告气质。",
labels ? `可用主体视角:${labels}` : "",
"如果本条需要人物但缺少更具体描述,默认保持透明皮肤包裹白色骨架、非恐怖、肩颈区域清晰可佩戴产品。",
].filter(Boolean).join("")
}
function subjectPriorityForRow(row: AudioStoryboardRow, role?: "first_frame" | "last_frame") {
const text = `${row.role} ${row.visualMode} ${row.subjectDescription} ${row.visualPlan} ${row.firstFramePlan} ${row.lastFramePlan} ${row.productPlacement}`.toLowerCase()
if (/后颈|肩背|上背|背面|背部|贴合|佩戴完成|已正确佩戴/.test(text)) {
return ["back_neck_detail", "back", "bust_left_45", "bust_right_45", "left", "right", "bust_front", "three_quarter_left", "three_quarter_right", "front"]
}
if (/侧面|左侧|右侧|45|调整|拿起|准备佩戴|靠近肩颈|手部/.test(text)) {
return ["bust_left_45", "bust_right_45", "left", "right", "three_quarter_left", "three_quarter_right", "bust_front", "front", "back_neck_detail", "back"]
}
if (/近景|半身|肩颈|锁骨|脖子|揉脖子|低头|紧绷/.test(text)) {
return ["bust_front", "bust_left_45", "bust_right_45", "front", "three_quarter_left", "three_quarter_right", "left", "right", "back_neck_detail", "back"]
}
if (role === "last_frame" && row.needsProduct) {
return ["back_neck_detail", "back", "bust_left_45", "bust_right_45", "bust_front", "left", "right", "front", "three_quarter_left", "three_quarter_right"]
}
return ["front", "three_quarter_left", "three_quarter_right", "bust_front", "left", "right", "bust_left_45", "bust_right_45", "back_neck_detail", "back"]
}
function selectSubjectRefsForRow(row: AudioStoryboardRow, refs: SubjectPlanningRef[], role?: "first_frame" | "last_frame") {
if (!row.needsSubject || !refs.length) return []
const priority = subjectPriorityForRow(row, role)
return refs
.map((ref, index) => {
const rank = priority.indexOf(ref.view)
const labelText = `${ref.label || ""} ${ref.roleHint}`.toLowerCase()
const closeupScore = /肩颈|后颈|近景|贴合|佩戴/.test(row.visualPlan + row.firstFramePlan + row.lastFramePlan + row.productPlacement)
&& /bust|neck|近景|肩颈|后颈/.test(`${ref.view} ${labelText}`)
? 12
: 0
return { ref, score: (rank >= 0 ? 100 - rank * 8 : 0) + closeupScore - index }
})
.sort((a, b) => b.score - a.score)
.slice(0, MAX_SUBJECT_REFS_PER_ENDPOINT)
.map((item) => item.ref)
}
function subjectReferenceNotes(refs: SubjectPlanningRef[]) {
return refs.map((ref, index) => `${index + 1}. ${ref.label || subjectViewLabel(ref.view)}${ref.roleHint}`).join("")
}
function subjectAssetRefsForPlanning(source: { frame: KeyFrame; element: KeyElement } | null): SubjectPlanningRef[] {
if (!source) return []
return (source.element.subject_assets ?? []).slice(0, 10).map((asset) => ({
kind: "asset",
@@ -938,6 +1025,8 @@ function subjectAssetRefsForPlanning(source: { frame: KeyFrame; element: KeyElem
element_id: asset.id,
cutout_id: asset.id,
label: asset.label || asset.view || "相似主体视图",
view: asset.view,
roleHint: subjectViewRoleHint(asset.view),
}))
}
@@ -959,10 +1048,12 @@ function endpointAssetRef(frame: KeyFrame | null, role: "first_frame" | "last_fr
}
}
function buildEndpointFramePrompt(row: AudioStoryboardRow, role: "first_frame" | "last_frame", selectedProductItems: ProductRefItem[], subjectRefs: ImageRef[]) {
function buildEndpointFramePrompt(row: AudioStoryboardRow, role: "first_frame" | "last_frame", selectedProductItems: ProductRefItem[], subjectRefs: SubjectPlanningRef[]) {
const target = role === "first_frame" ? row.firstFramePlan : row.lastFramePlan
const opposite = role === "first_frame" ? row.lastFramePlan : row.firstFramePlan
const productNotes = selectedProductItems.length ? productReferenceNotes(selectedProductItems) : ""
const subjectNotes = subjectRefs.length ? subjectReferenceNotes(subjectRefs) : ""
const subjectDescription = subjectDescriptionForRow(row, subjectRefs)
return [
`分镜 ${row.index + 1} ${role === "first_frame" ? "首帧" : "尾帧"}`,
`新口播文案:${row.skgCopy}`,
@@ -971,7 +1062,7 @@ function buildEndpointFramePrompt(row: AudioStoryboardRow, role: "first_frame" |
`另一端画面用于连续性参考:${opposite}`,
`画面规划:${row.visualPlan}`,
row.needsSubject
? `人物主体:必须使用已生成的相似主体白底视图作为人物真源;已提供 ${subjectRefs.length}主体参考。不要回到原视频关键帧复刻人物。`
? `人物主体:${subjectDescription} 必须使用已生成的相似主体白底视图作为人物真源;本次只选择 ${subjectRefs.length}最符合镜头需求的主体视角:${subjectNotes}。不要回到原视频关键帧复刻人物。`
: "本条不需要主角人物;如出现人物,只能是局部手部、背影或环境人物,不要生成透明骨架主角。",
row.needsProduct
? `产品融入:${row.productPlacement}${row.productIntegration}。已提供 ${selectedProductItems.length} 张同一 SKG 肩颈按摩仪产品参考;${productNotes}。产品是套在脖子上的 U 形肩颈按摩仪,必须保持真实佩戴大小、左右非对称和贴颈位置。`
@@ -984,12 +1075,14 @@ function buildStoryboardSceneFromAudioRow(
row: AudioStoryboardRow,
frame: KeyFrame,
productItems: ProductRefItem[] = [],
subjectRefs: ImageRef[] = [],
subjectRefs: SubjectPlanningRef[] = [],
endpointRefs: { firstImage?: ImageRef | null; lastImage?: ImageRef | null } = {},
): StoryboardScene {
const selectedProductItems = row.needsProduct ? selectProductItemsForRow(row, productItems) : []
const productRefs = selectedProductItems.map((item) => item.ref)
const notes = productReferenceNotes(selectedProductItems)
const subjectDescription = subjectDescriptionForRow(row, subjectRefs)
const subjectNotes = subjectReferenceNotes(subjectRefs)
const productGuidance = !row.needsProduct
? "本条规划为不露出产品或不把产品作为画面主体;视频生成时不要硬插 SKG 产品、包装、白底图或错误商品。"
: productItems.length
@@ -1010,7 +1103,7 @@ function buildStoryboardSceneFromAudioRow(
subject_images: row.needsSubject ? subjectRefs : [],
subject_image: row.needsSubject ? subjectRefs[0] ?? null : null,
subject: row.needsSubject
? `${row.keyElements}\n主体真源使用已生成的相似主体白底视图,共 ${subjectRefs.length} 张;关键帧只用于前置主体提取,不作为后续视频首尾帧参考。`
? `${subjectDescription}\n主体动作/画面要素:${row.keyElements}\n主体真源已生成的相似主体白底视图中按本镜头需求选择 ${subjectRefs.length} 张;${subjectNotes}关键帧只用于前置主体提取,不作为后续视频首尾帧参考。`
: "本条不需要人物主体或相似主体参考;如画面里出现人物,只作为背景或局部,不作为主角。",
scene: `镜头类型:${VISUAL_MODE_OPTIONS.find((item) => item.value === row.visualMode)?.label ?? row.visualMode}\n${row.visualPlan}\n首帧规划${row.firstFramePlan}\n尾帧规划${row.lastFramePlan}\n原音频依据${row.source}`,
product: `产品需求:${row.needsProduct ? "需要产品参考" : "本条不需要产品"}\n产品出现方式${row.productPlacement}\n${row.needsProduct ? row.productIntegration : "本条以情绪、人物状态、空间或节奏过渡为主,不露出产品。"}\n${productGuidance}`,
@@ -2168,10 +2261,12 @@ function AudioStoryboardPlanPanel({
const applyVisualMode = (rowIndex: number, mode: StoryboardVisualMode) => {
const defaults = visualModeDefaults(mode)
const row = rows.find((item) => item.index === rowIndex)
patchRowPlan(rowIndex, {
visualMode: mode,
needsProduct: defaults.needsProduct,
needsSubject: defaults.needsSubject,
subjectDescription: row ? buildSubjectDescription(row.role, mode) : "",
productPlacement: defaults.productPlacement,
})
}
@@ -2412,7 +2507,8 @@ function AudioStoryboardPlanPanel({
const saveRowStoryboardDraft = async (row: AudioStoryboardRow, frame: KeyFrame) => {
if (!job) return
const plannedRow = { ...planForRow(row, frame), skgCopy: copyForRow(row) }
const scene = buildStoryboardSceneFromAudioRow(plannedRow, frame, productItems, subjectRefs, {
const selectedSubjectRefs = plannedRow.needsSubject ? selectSubjectRefsForRow(plannedRow, subjectRefs) : []
const scene = buildStoryboardSceneFromAudioRow(plannedRow, frame, productItems, selectedSubjectRefs, {
firstImage: endpointAssetRef(frame, "first_frame"),
lastImage: endpointAssetRef(frame, "last_frame"),
})
@@ -2423,7 +2519,8 @@ function AudioStoryboardPlanPanel({
const generateEndpointFrameForRow = async (row: AudioStoryboardRow, frame: KeyFrame | null, role: "first_frame" | "last_frame") => {
if (!job || !frame) return
const plannedRow = { ...planForRow(row, frame), skgCopy: copyForRow(row) }
if (plannedRow.needsSubject && !subjectRefs.length) {
const selectedSubjectRefs = plannedRow.needsSubject ? selectSubjectRefsForRow(plannedRow, subjectRefs, role) : []
if (plannedRow.needsSubject && !selectedSubjectRefs.length) {
toast.warning("先在上方生成相似主体白底视图,再生成首尾帧")
return
}
@@ -2441,14 +2538,14 @@ function AudioStoryboardPlanPanel({
scene_mode: "similar",
scene_style: "premium_product",
asset_role: role,
prompt: buildEndpointFramePrompt(plannedRow, role, selectedProductItems, subjectRefs),
subject_images: plannedRow.needsSubject ? subjectRefs : [],
prompt: buildEndpointFramePrompt(plannedRow, role, selectedProductItems, selectedSubjectRefs),
subject_images: selectedSubjectRefs,
product_images: selectedProductItems.map((item) => item.ref),
source_frame_indices: [],
})
const updatedFrame = updated.frames.find((item) => item.index === frame.index) ?? frame
const generatedRef = endpointAssetRef(updatedFrame, role)
const scene = buildStoryboardSceneFromAudioRow(plannedRow, updatedFrame, productItems, subjectRefs, {
const scene = buildStoryboardSceneFromAudioRow(plannedRow, updatedFrame, productItems, selectedSubjectRefs, {
firstImage: role === "first_frame" ? generatedRef : endpointAssetRef(updatedFrame, "first_frame"),
lastImage: role === "last_frame" ? generatedRef : endpointAssetRef(updatedFrame, "last_frame"),
})
@@ -2468,7 +2565,8 @@ function AudioStoryboardPlanPanel({
const busyKey = `${row.index}:clear_${role}`
setEndpointFrameBusy(busyKey)
try {
const scene = buildStoryboardSceneFromAudioRow(plannedRow, frame, productItems, subjectRefs, {
const selectedSubjectRefs = plannedRow.needsSubject ? selectSubjectRefsForRow(plannedRow, subjectRefs, role) : []
const scene = buildStoryboardSceneFromAudioRow(plannedRow, frame, productItems, selectedSubjectRefs, {
firstImage: role === "first_frame" ? null : endpointAssetRef(frame, "first_frame"),
lastImage: role === "last_frame" ? null : endpointAssetRef(frame, "last_frame"),
})
@@ -2663,6 +2761,7 @@ function AudioStoryboardPlanPanel({
const savingStoryboard = storyboardSaveBusyRow === row.index
const copyText = copyForRow(row)
const selectedProductCount = plannedRow.needsProduct ? selectProductItemsForRow(plannedRow, productItems).length : 0
const selectedSubjectRefs = plannedRow.needsSubject ? selectSubjectRefsForRow(plannedRow, subjectRefs) : []
return (
<article
key={row.index}
@@ -2734,6 +2833,14 @@ function AudioStoryboardPlanPanel({
placeholder="画面规划"
className="min-h-[42px] w-full resize-y rounded border border-white/10 bg-black/32 px-2 py-1.5 text-[10.5px] leading-snug text-white/76 outline-none placeholder:text-white/25 focus:border-cyan-300/50"
/>
{plannedRow.needsSubject && (
<textarea
value={plannedRow.subjectDescription}
onChange={(event) => patchRowPlan(row.index, { subjectDescription: event.target.value })}
placeholder="人物描述:主体身份、姿态、情绪、需要用哪些视角"
className="min-h-[42px] w-full resize-y rounded border border-violet-300/12 bg-violet-300/[0.04] px-2 py-1.5 text-[10.5px] leading-snug text-violet-50/78 outline-none placeholder:text-white/25 focus:border-violet-300/50"
/>
)}
<div className="grid gap-1 md:grid-cols-2">
<textarea
value={plannedRow.firstFramePlan}
@@ -2788,10 +2895,15 @@ function AudioStoryboardPlanPanel({
/>
</div>
<div className="flex items-center justify-between gap-2 text-[10px] text-white/34">
<span>{plannedRow.needsSubject ? `主体视图 ${subjectRefs.length}` : "本条不传主体"} · {plannedRow.needsProduct ? `产品参考 ${selectedProductCount || 0}` : "本条不传产品图"}</span>
<span title={selectedSubjectRefs.map((ref) => ref.label || subjectViewLabel(ref.view)).join(" / ")}>
{plannedRow.needsSubject ? `主体参考 ${selectedSubjectRefs.length}/${subjectRefs.length}` : "本条不传主体"} · {plannedRow.needsProduct ? `产品参考 ${selectedProductCount || 0}` : "本条不传产品图"}
</span>
<button
type="button"
onClick={() => patchRowPlan(row.index, visualModeDefaults(plannedRow.visualMode))}
onClick={() => patchRowPlan(row.index, {
...visualModeDefaults(plannedRow.visualMode),
subjectDescription: buildSubjectDescription(plannedRow.role, plannedRow.visualMode),
})}
className="rounded border border-white/10 px-1.5 py-0.5 text-white/42 transition hover:border-white/25 hover:text-white/72"
>