feat: use subject brief for endpoint frames

This commit is contained in:
2026-05-18 19:33:16 +08:00
parent 33c3aef669
commit adf8b2ba0a
4 changed files with 239 additions and 66 deletions

View File

@@ -51,6 +51,7 @@ import {
saveProductRefs,
sourceAudioUrl,
subjectTemplateImageUrl,
updateElement,
updateStoryboard,
uploadStoryboardAsset,
videoUrl,
@@ -118,7 +119,7 @@ type AudioStoryboardRow = {
}
type ProductRefItem = ProductRefStateItem
type SubjectPlanningRef = ImageRef & { view: string; roleHint: string }
type SubjectPlanningRef = ImageRef & { view: string; roleHint: string; consensusBrief?: string }
type SubjectStyleMode = "transparent_human" | "source_actor"
type SubjectMode = "template" | "source_similar"
type SubjectViewMode = "all" | "common" | "custom"
@@ -301,6 +302,7 @@ const PRODUCT_VIEW_SLOTS = [
] as const
const MAX_PRODUCT_REFS_PER_VIDEO = 6
const MAX_PRODUCT_REFS_PER_ENDPOINT = 2
const MAX_SUBJECT_REFS_PER_ENDPOINT = 5
const PRODUCT_BACKGROUND_LABELS: Record<string, string> = {
@@ -1235,8 +1237,30 @@ function productPriorityForRow(row: AudioStoryboardRow) {
}
}
function scoreProductItemForRow(row: AudioStoryboardRow, item: ProductRefItem, index: number) {
const priority = productPriorityForRow(row)
function endpointProductPriority(row: AudioStoryboardRow, role?: "first_frame" | "last_frame") {
const text = `${row.role} ${row.visualMode} ${row.visualPlan} ${row.firstFramePlan} ${row.lastFramePlan} ${row.productIntegration} ${row.productPlacement} ${role ?? ""}`.toLowerCase()
const views = ["front"]
const tags = ["hero_packshot", "wearing_scale"]
const add = (view: string, tag?: string) => {
if (!views.includes(view)) views.push(view)
if (tag && !tags.includes(tag)) tags.push(tag)
}
if (/后颈|肩背|背面|背部|后背|上背|尾帧|佩戴完成|贴合完成/.test(text)) add("back_bottom", "back_bottom")
if (/侧面|侧身|厚度|侧厚|体积|左侧|右侧|45|调整|拿起|靠近肩颈/.test(text)) add("side_thickness", "side_thickness")
if (/内侧|触点|按摩头|贴颈|接触|皮肤接触/.test(text)) add("inner_contacts", "inner_contact")
if (/佩戴比例|上身|真人佩戴|脖子|肩颈|锁骨/.test(text)) add("left_45", "wearing_scale")
if (/按键|按钮|控制|开关|logo/.test(text)) add("right_45", "button_detail")
return { views, tags }
}
function endpointProductMaxForRow(row: AudioStoryboardRow, role?: "first_frame" | "last_frame") {
const text = `${row.visualPlan} ${row.firstFramePlan} ${row.lastFramePlan} ${row.productIntegration} ${row.productPlacement} ${role ?? ""}`.toLowerCase()
return /侧面|侧身|厚度|侧厚|后颈|肩背|背面|背部|内侧|触点|按摩头|贴颈|特写|近景|按键|按钮|佩戴完成|上背/.test(text)
? MAX_PRODUCT_REFS_PER_ENDPOINT
: 1
}
function scoreProductItem(row: AudioStoryboardRow, item: ProductRefItem, index: number, priority: { views: string[]; tags: string[] }) {
const viewRank = priority.views.indexOf(item.view)
const tagScore = item.useTags.reduce((sum, tag) => {
const rank = priority.tags.indexOf(tag)
@@ -1249,20 +1273,26 @@ function scoreProductItemForRow(row: AudioStoryboardRow, item: ProductRefItem, i
return (viewRank >= 0 ? 30 - viewRank * 4 : 0) + tagScore + backgroundScore + riskScore + confidenceScore + rotationScore
}
function selectProductItemsForRow(row: AudioStoryboardRow, items: ProductRefItem[]) {
function selectProductItemsForRow(
row: AudioStoryboardRow,
items: ProductRefItem[],
mode: "video" | "endpoint" = "video",
role?: "first_frame" | "last_frame",
) {
if (!items.length) return []
const picked: ProductRefItem[] = []
const pickedIds = new Set<string>()
const maxItems = mode === "endpoint" ? endpointProductMaxForRow(row, role) : MAX_PRODUCT_REFS_PER_VIDEO
const priority = mode === "endpoint" ? endpointProductPriority(row, role) : productPriorityForRow(row)
const add = (item?: ProductRefItem) => {
if (!item || pickedIds.has(item.id) || picked.length >= MAX_PRODUCT_REFS_PER_VIDEO) return
if (!item || pickedIds.has(item.id) || picked.length >= maxItems) return
picked.push(item)
pickedIds.add(item.id)
}
const priority = productPriorityForRow(row)
for (const view of priority.views) {
const matches = items
.map((item, index) => ({ item, score: scoreProductItemForRow(row, item, index) }))
.map((item, index) => ({ item, score: scoreProductItem(row, item, index, priority) }))
.filter(({ item }) => item.view === view)
.sort((a, b) => b.score - a.score)
add(matches[0]?.item)
@@ -1270,14 +1300,14 @@ function selectProductItemsForRow(row: AudioStoryboardRow, items: ProductRefItem
for (const tag of priority.tags) {
const matches = items
.map((item, index) => ({ item, score: scoreProductItemForRow(row, item, index) }))
.map((item, index) => ({ item, score: scoreProductItem(row, item, index, priority) }))
.filter(({ item }) => item.useTags.includes(tag))
.sort((a, b) => b.score - a.score)
add(matches[0]?.item)
}
const ranked = items
.map((item, index) => ({ item, score: scoreProductItemForRow(row, item, index) }))
.map((item, index) => ({ item, score: scoreProductItem(row, item, index, priority) }))
.sort((a, b) => b.score - a.score)
for (const { item } of ranked) {
add(item)
@@ -1366,9 +1396,19 @@ function subjectAssetRefsForPlanning(source: { frame: KeyFrame; element: KeyElem
label: asset.label || asset.view || "相似主体视图",
view: asset.view,
roleHint: subjectViewRoleHint(asset.view),
consensusBrief: source.element.subject_consensus_brief || "",
}))
}
function subjectBriefForEndpoint(row: AudioStoryboardRow, refs: SubjectPlanningRef[]) {
const storedBrief = refs.find((ref) => ref.consensusBrief?.trim())?.consensusBrief?.trim()
if (storedBrief) return storedBrief
const manualBrief = row.subjectDescription.trim()
if (manualBrief) return manualBrief
if (row.needsSubject) return subjectDescriptionForRow(row, refs)
return ""
}
function endpointAssetRef(frame: KeyFrame | null, role: "first_frame" | "last_frame"): ImageRef | null {
if (!frame) return null
const key = role === "first_frame" ? "first_image" : "last_image"
@@ -1387,12 +1427,10 @@ function endpointAssetRef(frame: KeyFrame | null, role: "first_frame" | "last_fr
}
}
function buildEndpointFramePrompt(row: AudioStoryboardRow, role: "first_frame" | "last_frame", selectedProductItems: ProductRefItem[], subjectRefs: SubjectPlanningRef[]) {
function buildEndpointFramePrompt(row: AudioStoryboardRow, role: "first_frame" | "last_frame", selectedProductItems: ProductRefItem[], subjectBrief: string) {
const target = role === "first_frame" ? row.firstFramePlan : row.lastFramePlan
const opposite = role === "first_frame" ? row.lastFramePlan : row.firstFramePlan
const productNotes = selectedProductItems.length ? productReferenceNotes(selectedProductItems) : ""
const subjectNotes = subjectRefs.length ? subjectReferenceNotes(subjectRefs) : ""
const subjectDescription = subjectDescriptionForRow(row, subjectRefs)
return [
`分镜 ${row.index + 1} ${role === "first_frame" ? "首帧" : "尾帧"}`,
`新口播文案:${row.skgCopy}`,
@@ -1401,10 +1439,10 @@ function buildEndpointFramePrompt(row: AudioStoryboardRow, role: "first_frame" |
`另一端画面用于连续性参考:${opposite}`,
`画面规划:${row.visualPlan}`,
row.needsSubject
? `人物主体:${subjectDescription} 必须使用已生成的相似主体白底视图作为人物真源;本次只选择 ${subjectRefs.length} 张最符合镜头需求的主体视角:${subjectNotes}。不要回到原视频关键帧复刻人物。`
? `人物主体 brief${subjectBrief || "主体 brief 暂缺,请保持一个统一的商业广告主体,肩颈区域清晰可佩戴产品。"}。主体只依据这段文字身份描述,不上传主体参考图;可以根据本镜头自由改变动作、景别、表情和环境,但不能换成另一个人设。不要回到原视频关键帧复刻人物。`
: "本条不需要主角人物;如出现人物,只能是局部手部、背影或环境人物,不要生成透明骨架主角。",
row.needsProduct
? `产品融入:${row.productPlacement}${row.productIntegration}提供 ${selectedProductItems.length} 张同一 SKG 肩颈按摩仪产品参考;${productNotes}。产品是套在脖子上的 U 形肩颈按摩仪,必须保持真实佩戴大小、左右非对称和贴颈位置。`
? `产品融入:${row.productPlacement}${row.productIntegration}本次只提供 ${selectedProductItems.length} 张同一 SKG 肩颈按摩仪产品参考;${productNotes}。产品是套在脖子上的 U 形肩颈按摩仪,必须保持真实佩戴大小、左右非对称、按键、触点、厚度和贴颈位置。`
: "本条不露出产品,不要强行生成 SKG 产品、包装、白底图或随机商品。",
"输出一张单独的 9:16 高清首/尾帧,不要拼图,不要字幕,不要平台 UI不要水印。画面要能作为后续视频生成的明确起止帧。",
].join("\n")
@@ -1422,6 +1460,7 @@ function buildStoryboardSceneFromAudioRow(
const notes = productReferenceNotes(selectedProductItems)
const subjectDescription = subjectDescriptionForRow(row, subjectRefs)
const subjectNotes = subjectReferenceNotes(subjectRefs)
const subjectBrief = subjectBriefForEndpoint(row, subjectRefs)
const productGuidance = !row.needsProduct
? "本条规划为不露出产品或不把产品作为画面主体;视频生成时不要硬插 SKG 产品、包装、白底图或错误商品。"
: productItems.length
@@ -1434,6 +1473,7 @@ function buildStoryboardSceneFromAudioRow(
visual_mode: row.visualMode,
needs_product: row.needsProduct,
needs_subject: row.needsSubject,
subject_brief: row.needsSubject ? subjectBrief : "",
first_frame_plan: row.firstFramePlan,
last_frame_plan: row.lastFramePlan,
product_placement: row.productPlacement,
@@ -2353,6 +2393,8 @@ function SourceReferenceBuildPanel({
const [templateSaveBusy, setTemplateSaveBusy] = useState(false)
const [templateDraftName, setTemplateDraftName] = useState("")
const [templateDraftNote, setTemplateDraftNote] = useState("")
const [subjectBriefDraft, setSubjectBriefDraft] = useState("")
const [subjectBriefBusy, setSubjectBriefBusy] = useState(false)
const frames = useMemo(() => [...job.frames].sort((a, b) => a.timestamp - b.timestamp), [job.frames])
const selectedReferenceFrames = useMemo(
() => frames.filter((frame) => selectedFrames.has(frame.index)),
@@ -2423,6 +2465,10 @@ function SourceReferenceBuildPanel({
const generationCtaLabel = subjectMode === "template"
? `用模板生成 ${selectedSubjectViews.length} 张主体视图`
: `从源视频创新生成 ${selectedSubjectViews.length} 张主体视图`
const currentSubjectBrief = actorSource?.element.subject_consensus_brief?.trim()
|| selectedSubjectTemplate?.prompt_brief?.trim()
|| selectedCharacter?.prompt_brief?.trim()
|| ""
const buildSubjectProfileForRequest = () => {
if (subjectProfileMode === "random") {
@@ -2468,6 +2514,10 @@ function SourceReferenceBuildPanel({
setLastSubjectProfile(null)
}, [job.id])
useEffect(() => {
setSubjectBriefDraft(currentSubjectBrief)
}, [actorSource?.element.id, currentSubjectBrief])
const generateSimilarActor = async () => {
if (!frames.length) {
toast.warning("请先自动抽帧 12 张,或在原版视频上手动补帧。")
@@ -2584,6 +2634,25 @@ function SourceReferenceBuildPanel({
}
}
const saveSubjectBriefDraft = async () => {
if (!actorSource) {
toast.warning("先生成本次主体视图,才能把 brief 绑定到主体元素。")
return
}
setSubjectBriefBusy(true)
try {
const updated = await updateElement(job.id, actorSource.frame.index, actorSource.element.id, {
subject_consensus_brief: subjectBriefDraft.trim(),
})
onJobUpdate(updated)
toast.success("主体 brief 已保存,后续首尾帧会使用这段文字依据")
} catch (e) {
toast.error("主体 brief 保存失败:" + (e instanceof Error ? e.message : String(e)))
} finally {
setSubjectBriefBusy(false)
}
}
const saveGeneratedSubjectTemplate = async () => {
if (!actorSource || !visibleActorAssets.length) {
toast.warning("请先生成相似主体视图。")
@@ -2772,6 +2841,29 @@ function SourceReferenceBuildPanel({
</button>
</div>
<div className="mt-1 text-[9px] text-white/32">{templateSaveHint}</div>
<div className="mt-2 rounded-md border border-white/10 bg-black/24 p-2">
<div className="mb-1 flex items-center justify-between gap-2">
<span className="text-[9.5px] font-semibold text-white/48"> brief / </span>
<button
type="button"
onClick={() => void saveSubjectBriefDraft()}
disabled={!actorSource || subjectBriefBusy || !subjectBriefDraft.trim()}
className="inline-flex h-6 items-center gap-1 rounded border border-white/10 bg-white/[0.045] px-2 text-[9.5px] font-semibold text-white/52 transition hover:border-cyan-300/35 hover:text-cyan-100 disabled:cursor-not-allowed disabled:opacity-35"
>
{subjectBriefBusy ? <Loader2 className="h-3 w-3 animate-spin" /> : <Check className="h-3 w-3" />}
brief
</button>
</div>
<textarea
value={subjectBriefDraft}
onChange={(event) => setSubjectBriefDraft(event.target.value)}
placeholder="生成主体视图后,后端会用视觉模型反推出主体 brief这里也会显示所选模板的 prompt_brief。"
className="min-h-[58px] w-full resize-y rounded border border-white/10 bg-black/35 px-2 py-1.5 text-[10px] leading-snug text-white/68 outline-none placeholder:text-white/25 focus:border-cyan-300/45"
/>
<div className="mt-1 text-[9px] text-white/30">
使 brief 5
</div>
</div>
</div>
<div className="mt-2 rounded-md border border-white/10 bg-black/28 p-2.5">
@@ -3324,15 +3416,12 @@ function AudioStoryboardPlanPanel({
if (!job || !frame) return
const plannedRow = { ...planForRow(row, frame), skgCopy: copyForRow(row) }
const selectedSubjectRefs = plannedRow.needsSubject ? selectSubjectRefsForRow(plannedRow, subjectRefs, role) : []
if (plannedRow.needsSubject && !selectedSubjectRefs.length) {
toast.warning("先在上方生成相似主体白底视图,再生成首尾帧")
return
}
const subjectBrief = plannedRow.needsSubject ? subjectBriefForEndpoint(plannedRow, subjectRefs) : ""
if (plannedRow.needsProduct && !productItems.length) {
toast.warning("本条需要产品,请先上传并识别产品素材池")
return
}
const selectedProductItems = plannedRow.needsProduct ? selectProductItemsForRow(plannedRow, productItems) : []
const selectedProductItems = plannedRow.needsProduct ? selectProductItemsForRow(plannedRow, productItems, "endpoint", role) : []
const busyKey = `${row.index}:${role}`
setEndpointFrameBusy(busyKey)
try {
@@ -3342,8 +3431,8 @@ function AudioStoryboardPlanPanel({
scene_mode: "similar",
scene_style: "premium_product",
asset_role: role,
prompt: buildEndpointFramePrompt(plannedRow, role, selectedProductItems, selectedSubjectRefs),
subject_images: selectedSubjectRefs,
prompt: buildEndpointFramePrompt(plannedRow, role, selectedProductItems, subjectBrief),
subject_brief: subjectBrief,
product_images: selectedProductItems.map((item) => item.ref),
source_frame_indices: [],
})
@@ -3568,8 +3657,8 @@ function AudioStoryboardPlanPanel({
const rowVideos = videosForFrame(referenceFrame)
const savingStoryboard = storyboardSaveBusyRow === row.index
const copyText = copyForRow(row)
const selectedProductCount = plannedRow.needsProduct ? selectProductItemsForRow(plannedRow, productItems).length : 0
const selectedSubjectRefs = plannedRow.needsSubject ? selectSubjectRefsForRow(plannedRow, subjectRefs) : []
const selectedProductCount = plannedRow.needsProduct ? selectProductItemsForRow(plannedRow, productItems, "endpoint").length : 0
const endpointSubjectBrief = plannedRow.needsSubject ? subjectBriefForEndpoint(plannedRow, subjectRefs) : ""
return (
<article
key={row.index}
@@ -3685,9 +3774,10 @@ function AudioStoryboardPlanPanel({
job={job}
frame={referenceFrame}
role="first_frame"
subjectBrief={endpointSubjectBrief}
busy={endpointFrameBusy === `${row.index}:first_frame`}
deleting={endpointFrameBusy === `${row.index}:clear_first_frame`}
disabled={!referenceFrame || (plannedRow.needsSubject && !subjectRefs.length) || (plannedRow.needsProduct && !productItems.length)}
disabled={!referenceFrame || (plannedRow.needsProduct && !productItems.length)}
onGenerate={() => void generateEndpointFrameForRow(plannedRow, referenceFrame, "first_frame")}
onDelete={() => void clearEndpointFrameForRow(plannedRow, referenceFrame, "first_frame")}
/>
@@ -3695,16 +3785,17 @@ function AudioStoryboardPlanPanel({
job={job}
frame={referenceFrame}
role="last_frame"
subjectBrief={endpointSubjectBrief}
busy={endpointFrameBusy === `${row.index}:last_frame`}
deleting={endpointFrameBusy === `${row.index}:clear_last_frame`}
disabled={!referenceFrame || (plannedRow.needsSubject && !subjectRefs.length) || (plannedRow.needsProduct && !productItems.length)}
disabled={!referenceFrame || (plannedRow.needsProduct && !productItems.length)}
onGenerate={() => void generateEndpointFrameForRow(plannedRow, referenceFrame, "last_frame")}
onDelete={() => void clearEndpointFrameForRow(plannedRow, referenceFrame, "last_frame")}
/>
</div>
<div className="flex items-center justify-between gap-2 text-[10px] text-white/34">
<span title={selectedSubjectRefs.map((ref) => ref.label || subjectViewLabel(ref.view)).join(" / ")}>
{plannedRow.needsSubject ? `主体参考 ${selectedSubjectRefs.length}/${subjectRefs.length}` : "本条不传主体"} · {plannedRow.needsProduct ? `产品参考 ${selectedProductCount || 0}` : "本条不传产品图"}
<span title={endpointSubjectBrief || "本条不传主体 brief"}>
{plannedRow.needsSubject ? "依据:主体 brief" : "本条不传主体"} · {plannedRow.needsProduct ? `${selectedProductCount || 0}产品参考` : "本条不传产品图"}
</span>
<button
type="button"
@@ -3926,6 +4017,7 @@ function EndpointFrameSlot({
job,
frame,
role,
subjectBrief,
busy,
deleting,
disabled,
@@ -3935,6 +4027,7 @@ function EndpointFrameSlot({
job: Job
frame: KeyFrame | null
role: "first_frame" | "last_frame"
subjectBrief?: string
busy: boolean
deleting?: boolean
disabled: boolean
@@ -3946,6 +4039,15 @@ function EndpointFrameSlot({
const label = role === "first_frame" ? "首帧" : "尾帧"
return (
<div className="overflow-hidden rounded border border-white/10 bg-black/32">
<div className="flex h-6 items-center justify-between gap-1 border-b border-white/10 px-1.5 text-[9.5px] text-white/42">
<span>{label}</span>
<span
title={subjectBrief?.trim() ? subjectBrief : "本条没有主体 brief生成时只按画面规划和产品参考执行。"}
className="inline-flex h-4 w-4 items-center justify-center rounded border border-white/10 bg-white/[0.045] text-white/45"
>
<Info className="h-3 w-3" />
</span>
</div>
<MediaAssetTile
src={src}
href={src || undefined}