feat: gate video generation on endpoint frames

This commit is contained in:
2026-05-18 11:37:13 +08:00
parent 4c8cb066d6
commit 8f917d52b8
6 changed files with 373 additions and 80 deletions

View File

@@ -11,7 +11,7 @@
- 详见 `CLAUDE.md` 立项决策段 + `.memory/plan.md` 七步管线拆解
- 风格:`04-Dark-Gallery-Ambient`(路径:`~/Projects/research/20260305-网页风格库/04-Dark-Gallery-Ambient.md`
- 第一冲刺:步骤 1-4下载 / 拆轨 / 关键帧 / ASR+翻译)
- 当前产品方向2026-05-18 再确认):先解决信息流广告快速复刻的第一步,不再沿用“开始后线性完成抽帧、分镜、元素生成、合成”的旧做法。主界面为“左侧素材输入列 + 右侧信息流复刻工作表”。用户粘贴 TK 链接或上传视频后点击“开始分析”,系统自动下载源视频;下载完成后并行启动两条路:音频文案路提取原音频文案/字幕,并分析讲话人、语速节奏、背景音乐/环境声/音效;视频视觉路自动抽取 12 张参考帧,供人工选择可用主体并生成相似主体视图。产品图上传后独立形成产品资产包,自动识别视角/结构/比例并补缺角度。分镜工作台按逐句时间轴规划新口播、镜头类型、首帧/尾帧、人物需求和产品出现方式;单条或“一键提交全部”生成视频时,按该行规划自动调取产品图、人物主体和参考帧
- 当前产品方向2026-05-18 再确认):先解决信息流广告快速复刻的第一步,不再沿用“开始后线性完成抽帧、分镜、元素生成、合成”的旧做法。主界面为“左侧素材输入列 + 右侧信息流复刻工作表”。用户粘贴 TK 链接或上传视频后点击“开始分析”,系统自动下载源视频;下载完成后并行启动两条路:音频文案路提取原音频文案/字幕,并分析讲话人、语速节奏、背景音乐/环境声/音效;视频视觉路自动抽取参考帧,供人工选择可用主体并生成相似主体白底视图。产品图上传后独立形成产品资产包,自动识别视角/结构/比例并补缺角度。分镜工作台按逐句时间轴规划新口播、镜头类型、首帧/尾帧、人物需求和产品出现方式;当前暂停直接调视频模型,先逐条用“相似主体视图 + 产品素材池 + 首尾帧文字规划”生成并审核首帧/尾帧,保存规划后再决定哪些分镜进入单条视频候选
## 部署事实
- 平台VPS `76.13.31.179`Ubuntu 24.04 / Docker Compose / Coolify Traefik

View File

@@ -330,6 +330,7 @@ class StoryboardScene(BaseModel):
first_image: dict | None = None
last_image: dict | None = None
product_images: list[dict] = Field(default_factory=list)
subject_images: list[dict] = Field(default_factory=list)
product_fusion_shots: list[dict] = Field(default_factory=list)
visual_mode: Literal["person_only", "person_product", "product_only", "environment"] = "person_product"
needs_product: bool = True
@@ -1274,6 +1275,44 @@ def _make_reference_contact_sheet(job_id: str, frame_indices: list[int], out_pat
return out_path
def _make_paths_contact_sheet(paths: list[Path], out_path: Path, max_items: int = 10) -> Path | None:
usable: list[Path] = []
seen: set[str] = set()
max_items = max(2, min(12, int(max_items or 10)))
for p in paths:
key = str(p)
if key in seen or not p.exists():
continue
seen.add(key)
usable.append(p)
if len(usable) >= max_items:
break
if len(usable) <= 1:
return usable[0] if usable else None
thumbs: list[Image.Image] = []
for p in usable:
try:
im = Image.open(p).convert("RGB")
im.thumbnail((420, 420), Image.Resampling.LANCZOS)
canvas = Image.new("RGB", (420, 420), (245, 245, 245))
canvas.paste(im, ((420 - im.width) // 2, (420 - im.height) // 2))
thumbs.append(canvas)
except Exception:
continue
if len(thumbs) <= 1:
return usable[0] if usable else None
cols = 4 if len(thumbs) > 6 else (3 if len(thumbs) > 2 else 2)
rows = (len(thumbs) + cols - 1) // cols
sheet = Image.new("RGB", (cols * 420, rows * 420), (245, 245, 245))
for i, thumb in enumerate(thumbs):
sheet.paste(thumb, ((i % cols) * 420, (i // cols) * 420))
out_path.parent.mkdir(parents=True, exist_ok=True)
sheet.save(out_path, "JPEG", quality=92)
return out_path
SUBJECT_VIEW_LABELS: dict[str, str] = {
"front": "正面",
"back": "背面",
@@ -3732,6 +3771,8 @@ class GenerateSceneAssetReq(BaseModel):
asset_role: SceneAssetRole = "scene"
prompt: str = ""
source_frame_indices: list[int] | None = None
subject_images: list[dict] = Field(default_factory=list)
product_images: list[dict] = Field(default_factory=list)
class GenerateSubjectAssetsReq(BaseModel):
@@ -3899,11 +3940,20 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
source_indices = list(dict.fromkeys(source_indices))[:8]
model_src = src
sheet_tmp: Path | None = None
asset_sheet_tmp: Path | None = None
if len(source_indices) > 1:
sheet_tmp = job_dir(job_id) / "tmp" / f"scene_refs_{idx:03d}_{uuid.uuid4().hex[:6]}.jpg"
sheet = _make_reference_contact_sheet(job_id, source_indices, sheet_tmp)
if sheet:
model_src = sheet
subject_ref_paths = [p for p in (storyboard_ref_path(job_id, r) for r in req.subject_images[:8]) if p and p.exists()]
product_ref_paths = [p for p in (storyboard_ref_path(job_id, r) for r in req.product_images[:6]) if p and p.exists()]
asset_ref_paths = [*subject_ref_paths, *product_ref_paths]
if req.asset_role != "scene" and asset_ref_paths:
asset_sheet_tmp = job_dir(job_id) / "tmp" / f"endpoint_refs_{idx:03d}_{uuid.uuid4().hex[:6]}.jpg"
asset_sheet = _make_paths_contact_sheet(asset_ref_paths, asset_sheet_tmp, max_items=10)
if asset_sheet:
model_src = asset_sheet
confirmed_subjects = [
(e.name_en or e.name_zh).strip()
@@ -3925,9 +3975,13 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
else "Remove the main foreground subject from the frame if present. "
)
identity_clause = (
"Known character identity cues: " + ", ".join(confirmed_subjects) + ". "
if confirmed_subjects
else "Infer one consistent friendly transparent human character identity from the provided references. "
f"Use the generated subject asset references as the primary character identity lock ({len(subject_ref_paths)} image(s)); keep the same transparent body shell, clean visible skeleton, proportions, material, and ad-friendly non-horror identity. "
if subject_ref_paths
else (
"Known character identity cues: " + ", ".join(confirmed_subjects) + ". "
if confirmed_subjects
else "Infer one consistent friendly transparent human character identity from the provided references. "
)
)
mode_clause = {
"remove_subject": (
@@ -3956,10 +4010,21 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
if user_prompt
else ""
)
reference_clause = (
f"Use the selected reference frame contact sheet as visual evidence for location, composition, lighting, materials, and atmosphere. Reference frame indices: {', '.join(str(i + 1) for i in source_indices)}. "
if len(source_indices) > 1
else "Use the provided frame as the primary visual reference. "
if req.asset_role != "scene" and asset_ref_paths:
reference_clause = (
f"Use the provided asset contact sheet as the primary visual reference: {len(subject_ref_paths)} generated subject image(s) and {len(product_ref_paths)} SKG product image(s). "
"Do not use the original keyframe as the first/last-frame truth; it is only a storage anchor for this row. "
)
else:
reference_clause = (
f"Use the selected reference frame contact sheet as visual evidence for location, composition, lighting, materials, and atmosphere. Reference frame indices: {', '.join(str(i + 1) for i in source_indices)}. "
if len(source_indices) > 1
else "Use the provided frame as the primary visual reference. "
)
product_asset_clause = (
"Use the provided SKG product references as the rigid product truth when the user prompt asks for product presence: a white U-shaped neck-and-shoulder wearable massage device worn around the neck/shoulders, not headphones, a collar pillow, skincare, food, or a medical prop. Keep product scale believable, preserve left/right asymmetry, side thickness, inner contact pads, buttons, white material, and real wearable placement. "
if product_ref_paths
else "Do not invent a random product. Only include an SKG product if the user prompt explicitly asks for it. "
)
if req.asset_role == "scene":
prompt = (
@@ -3986,6 +4051,7 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
+ reference_clause
+ user_prompt_clause
+ style_clause + " "
+ product_asset_clause
+ TRANSPARENT_HUMAN_POSITIVE_PROMPT + " "
+ TRANSPARENT_HUMAN_NEGATIVE_PROMPT + " "
+ "The frame must feature the same friendly transparent or translucent human character: glass/acrylic/vinyl-like transparent outer body, visible clean white skeleton inside, clean commercial wellness style, non-horror. "
@@ -3997,6 +4063,8 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
try:
if req.asset_role == "scene":
img_bytes, _mode = _image_edit_call(model_src, prompt, models=models, fallback_text=False, max_attempts=3, max_side=1280)
elif asset_ref_paths:
img_bytes, _mode = _image_edit_call(model_src, prompt, models=models, fallback_text=False, max_attempts=3, max_side=1600)
else:
img_bytes, _mode = _image_text_call(prompt, models=models, max_attempts=3)
except RuntimeError as e:
@@ -4005,6 +4073,9 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
if sheet_tmp and sheet_tmp.exists():
try: sheet_tmp.unlink()
except OSError: pass
if asset_sheet_tmp and asset_sheet_tmp.exists():
try: asset_sheet_tmp.unlink()
except OSError: pass
asset_id = f"scene_{idx:03d}_{uuid.uuid4().hex[:8]}"
out_path = job_dir(job_id) / "assets" / f"{asset_id}.jpg"
@@ -4387,6 +4458,7 @@ class UpdateStoryboardReq(BaseModel):
first_image: dict | None = None
last_image: dict | None = None
product_images: list[dict] = Field(default_factory=list)
subject_images: list[dict] = Field(default_factory=list)
product_fusion_shots: list[dict] = Field(default_factory=list)
visual_mode: Literal["person_only", "person_product", "product_only", "environment"] = "person_product"
needs_product: bool = True
@@ -5562,6 +5634,7 @@ def update_storyboard(job_id: str, idx: int, req: UpdateStoryboardReq) -> Job:
first_image=req.first_image,
last_image=req.last_image,
product_images=list(req.product_images),
subject_images=list(req.subject_images),
product_fusion_shots=list(req.product_fusion_shots),
visual_mode=req.visual_mode,
needs_product=bool(req.needs_product),

File diff suppressed because one or more lines are too long

View File

@@ -38,6 +38,7 @@ const NODE_TYPES = {
const KEYFRAME_PANEL_ID = "keyframe-detail-panel"
const VIDEO_FRAME_PANEL_ID = "video-frame-panel"
const FLOATING_PANEL_IDS = new Set([KEYFRAME_PANEL_ID, VIDEO_FRAME_PANEL_ID])
const DIRECT_VIDEO_GENERATION_PAUSED = true
const FRAME_TARGET_LABELS: Record<FrameExtractTarget, string> = {
transparent_human: "透明骨架人",
balanced: "综合关键帧",
@@ -592,6 +593,10 @@ export default function Home() {
}, [jobs, productionJobIds, startProductionLanesForJob])
const handleQuickGenerateVideo = useCallback(async (frameIdx: number, scene: StoryboardScene, model: string) => {
if (DIRECT_VIDEO_GENERATION_PAUSED) {
toast.info("视频生成调用已暂停:先生成并审核每条分镜的首帧/尾帧,再开放单条提交")
return
}
if (!job) return
const frame = job.frames.find((f) => f.index === frameIdx)
if (!frame) return
@@ -728,6 +733,10 @@ export default function Home() {
}, [ensureDefaultProductRefs, job, selectedFrames, updateJobInList])
const handleGenerateProductFusionVideo = useCallback(async (frameIdx: number, shot: ProductFusionShot) => {
if (DIRECT_VIDEO_GENERATION_PAUSED) {
toast.info("视频生成调用已暂停:当前只做首尾帧和素材规划")
return
}
if (!job) return
const frame = job.frames.find((f) => f.index === frameIdx)
if (!frame) return

View File

@@ -32,6 +32,7 @@ import {
cutoutElement,
deleteSubjectAsset,
effectiveFrameUrl,
generateSceneAsset,
generateProductAngleAsset,
generateSubjectAssets,
generatedImageUrl,
@@ -471,7 +472,7 @@ function videoModelTrace(models: RuntimeModels | undefined, model: string): Mode
`前端选择:${model}`,
`后端解析:${resolveVideoModelLabel(models, model)}`,
`服务商:${modelValue(models?.video_provider)} · ${modelValue(models?.video_base_url)}`,
"输入:当前分镜文案、参考帧、产品素材、产品方向标注和画面规划",
"输入:已确认的首尾帧、当前分镜文案、产品素材、相似主体资产和画面规划",
"输出:异步候选视频,完成后回填到对应分镜行",
],
}
@@ -479,7 +480,6 @@ function videoModelTrace(models: RuntimeModels | undefined, model: string): Mode
function buildFallbackScene(job: Job, frame: KeyFrame, order: number): StoryboardScene {
const frames = [...job.frames].sort((a, b) => a.timestamp - b.timestamp)
const nextFrame = frames.find((item) => item.timestamp > frame.timestamp) ?? null
const duration = Math.max(3.5, Math.min(7.5, Math.max(job.duration || 0, frames.length * 5) / Math.max(frames.length, 1)))
const audio = job.audio_script?.rewritten_text?.trim()
|| job.transcript?.slice(0, 4).map((item) => item.en || item.zh).filter(Boolean).join(" ")
@@ -487,10 +487,10 @@ function buildFallbackScene(job: Job, frame: KeyFrame, order: number): Storyboar
const objects = frame.description?.objects?.slice(0, 5).map((item) => item.name).filter(Boolean).join("、")
return {
duration: Number(duration.toFixed(1)),
first_image: { kind: "keyframe", frame_idx: frame.index, label: `分镜 ${order + 1} 首帧` },
last_image: nextFrame ? { kind: "keyframe", frame_idx: nextFrame.index, label: `分镜 ${order + 1} 尾帧` } : null,
first_image: null,
last_image: null,
subject: objects ? `关键元素候选:${objects}` : "保留原视频最重要的主体动作和构图关系。",
scene: `${frame.description?.scene || `参考${order + 1} 个关键画面规划 SKG 信息流广告分镜。`}\n音频节奏依据${audio.slice(0, 220)}`,
scene: `${frame.description?.scene || `${order + 1} 段音频规划 SKG 信息流广告分镜。`}\n音频节奏依据${audio.slice(0, 220)}`,
product: "把原素材里的产品/痛点转成 SKG 颈部/肩颈按摩仪表达,默认使用 SKG 四张产品角度图做产品真源。",
action: frame.description?.style
? `沿用原画面的讲话节奏、动作节点和 ${frame.description.style},突出使用前紧绷、使用后放松。`
@@ -929,7 +929,60 @@ function selectProductItemsForRow(row: AudioStoryboardRow, items: ProductRefItem
return picked
}
function buildStoryboardSceneFromAudioRow(row: AudioStoryboardRow, frame: KeyFrame, nextFrame?: KeyFrame | null, productItems: ProductRefItem[] = []): StoryboardScene {
function subjectAssetRefsForPlanning(source: { frame: KeyFrame; element: KeyElement } | null): ImageRef[] {
if (!source) return []
return (source.element.subject_assets ?? []).slice(0, 10).map((asset) => ({
kind: "asset",
frame_idx: source.frame.index,
element_id: asset.id,
cutout_id: asset.id,
label: asset.label || asset.view || "相似主体视图",
}))
}
function endpointAssetRef(frame: KeyFrame | null, role: "first_frame" | "last_frame"): ImageRef | null {
if (!frame) return null
const saved = role === "first_frame" ? frame.storyboard?.first_image : frame.storyboard?.last_image
if (saved && saved.kind !== "keyframe") return saved
const asset = [...(frame.scene_assets ?? [])].reverse().find((item) => item.asset_role === role)
if (!asset) return null
return {
kind: "asset",
frame_idx: frame.index,
element_id: asset.id,
cutout_id: asset.id,
label: asset.label || (role === "first_frame" ? "首帧" : "尾帧"),
}
}
function buildEndpointFramePrompt(row: AudioStoryboardRow, role: "first_frame" | "last_frame", selectedProductItems: ProductRefItem[], subjectRefs: ImageRef[]) {
const target = role === "first_frame" ? row.firstFramePlan : row.lastFramePlan
const opposite = role === "first_frame" ? row.lastFramePlan : row.firstFramePlan
const productNotes = selectedProductItems.length ? productReferenceNotes(selectedProductItems) : ""
return [
`分镜 ${row.index + 1} ${role === "first_frame" ? "首帧" : "尾帧"}`,
`新口播文案:${row.skgCopy}`,
`镜头类型:${VISUAL_MODE_OPTIONS.find((item) => item.value === row.visualMode)?.label ?? row.visualMode}`,
`当前要生成的画面:${target}`,
`另一端画面用于连续性参考:${opposite}`,
`画面规划:${row.visualPlan}`,
row.needsSubject
? `人物主体:必须使用已生成的相似主体白底视图作为人物真源;已提供 ${subjectRefs.length} 张主体参考。不要回到原视频关键帧复刻人物。`
: "本条不需要主角人物;如出现人物,只能是局部手部、背影或环境人物,不要生成透明骨架主角。",
row.needsProduct
? `产品融入:${row.productPlacement}${row.productIntegration}。已提供 ${selectedProductItems.length} 张同一 SKG 肩颈按摩仪产品参考;${productNotes}。产品是套在脖子上的 U 形肩颈按摩仪,必须保持真实佩戴大小、左右非对称和贴颈位置。`
: "本条不露出产品,不要强行生成 SKG 产品、包装、白底图或随机商品。",
"输出一张单独的 9:16 高清首/尾帧,不要拼图,不要字幕,不要平台 UI不要水印。画面要能作为后续视频生成的明确起止帧。",
].join("\n")
}
function buildStoryboardSceneFromAudioRow(
row: AudioStoryboardRow,
frame: KeyFrame,
productItems: ProductRefItem[] = [],
subjectRefs: ImageRef[] = [],
endpointRefs: { firstImage?: ImageRef | null; lastImage?: ImageRef | null } = {},
): StoryboardScene {
const selectedProductItems = row.needsProduct ? selectProductItemsForRow(row, productItems) : []
const productRefs = selectedProductItems.map((item) => item.ref)
const notes = productReferenceNotes(selectedProductItems)
@@ -940,8 +993,8 @@ function buildStoryboardSceneFromAudioRow(row: AudioStoryboardRow, frame: KeyFra
: "未上传产品图时使用默认 SKG 产品图;生成前建议先建立同一产品素材池,锁定左右差异、厚度和佩戴比例。"
return {
duration: Number(Math.max(3.2, Math.min(6.5, row.end - row.start || 4.5)).toFixed(1)),
first_image: { kind: "keyframe", frame_idx: frame.index, label: `分镜 ${row.index + 1} 参考帧` },
last_image: nextFrame ? { kind: "keyframe", frame_idx: nextFrame.index, label: `分镜 ${row.index + 1} 尾帧` } : null,
first_image: endpointRefs.firstImage ?? null,
last_image: endpointRefs.lastImage ?? null,
visual_mode: row.visualMode,
needs_product: row.needsProduct,
needs_subject: row.needsSubject,
@@ -950,7 +1003,11 @@ function buildStoryboardSceneFromAudioRow(row: AudioStoryboardRow, frame: KeyFra
product_placement: row.productPlacement,
product_images: productRefs,
product_image: productRefs[0] ?? null,
subject: row.needsSubject ? row.keyElements : "本条不需要人物主体或相似主体参考;如画面里出现人物,只作为背景或局部,不作为主角。",
subject_images: row.needsSubject ? subjectRefs : [],
subject_image: row.needsSubject ? subjectRefs[0] ?? null : null,
subject: row.needsSubject
? `${row.keyElements}\n主体真源使用已生成的相似主体白底视图${subjectRefs.length} 张;关键帧只用于前置主体提取,不作为后续视频首尾帧参考。`
: "本条不需要人物主体或相似主体参考;如画面里出现人物,只作为背景或局部,不作为主角。",
scene: `镜头类型:${VISUAL_MODE_OPTIONS.find((item) => item.value === row.visualMode)?.label ?? row.visualMode}\n${row.visualPlan}\n首帧规划${row.firstFramePlan}\n尾帧规划${row.lastFramePlan}\n原音频依据${row.source}`,
product: `产品需求:${row.needsProduct ? "需要产品参考" : "本条不需要产品"}\n产品出现方式${row.productPlacement}\n${row.needsProduct ? row.productIntegration : "本条以情绪、人物状态、空间或节奏过渡为主,不露出产品。"}\n${productGuidance}`,
action: `${row.skgCopy}\n连续动作从首帧规划自然过渡到尾帧规划镜头类型和产品/人物需求不能中途改变。`,
@@ -988,6 +1045,9 @@ export function AdRecreationBoard({
const visualReady = (job?.frames.length ?? 0) > 0
const subjectAssetCount = countSubjectAssetViews(job)
const productAssetCount = job?.product_refs?.length ?? 0
const statusMessage = job?.message?.startsWith("视频生成已提交")
? "历史候选视频已保留;当前已暂停直接提交视频,先逐条生成并审核首尾帧。"
: job?.message
useEffect(() => {
setDraftSegments([])
@@ -1203,8 +1263,8 @@ export function AdRecreationBoard({
<span className="font-mono text-[12px] text-white/36">02</span>
<h2 className="text-[15px] font-semibold leading-tight text-white"></h2>
</div>
<div className="mt-1 truncate text-[11px] text-white/38" title={job?.message}>
{job?.message || "下载源视频后解析音频,再抽参考帧并生成相似主体。"}
<div className="mt-1 truncate text-[11px] text-white/38" title={statusMessage}>
{statusMessage || "下载源视频后解析音频,再抽参考帧并生成相似主体。"}
</div>
</div>
<div className="flex shrink-0 items-center gap-2">
@@ -1260,7 +1320,6 @@ export function AdRecreationBoard({
job={job}
selectedFrames={data.selectedFrames}
onJobUpdate={data.onJobUpdate}
onGenerateVideo={onGenerateVideo}
runtimeModels={runtimeModels}
/>
</div>
@@ -2157,17 +2216,16 @@ function AudioStoryboardPlanPanel({
job,
selectedFrames,
onJobUpdate,
onGenerateVideo,
runtimeModels,
}: {
job: Job | null
selectedFrames: Set<number>
onJobUpdate?: (job: Job) => void
onGenerateVideo?: (frameIdx: number, scene: StoryboardScene, model: string) => Promise<void> | void
runtimeModels?: RuntimeModels
}) {
const [videoBusyRow, setVideoBusyRow] = useState<number | null>(null)
const [batchVideoBusy, setBatchVideoBusy] = useState(false)
const [storyboardSaveBusyRow, setStoryboardSaveBusyRow] = useState<number | null>(null)
const [batchStoryboardSaveBusy, setBatchStoryboardSaveBusy] = useState(false)
const [endpointFrameBusy, setEndpointFrameBusy] = useState<string | null>(null)
const [productItems, setProductItems] = useState<ProductRefItem[]>([])
const [productUploading, setProductUploading] = useState(false)
const [productAnalyzing, setProductAnalyzing] = useState(false)
@@ -2185,6 +2243,11 @@ function AudioStoryboardPlanPanel({
[orderedFrames, selectedFrames],
)
const rowReferencePool = selectedReferenceFrames.length ? selectedReferenceFrames : orderedFrames
const similarActorSource = useMemo(
() => findSimilarActorSource(selectedReferenceFrames, orderedFrames),
[selectedReferenceFrames, orderedFrames],
)
const subjectRefs = useMemo(() => subjectAssetRefsForPlanning(similarActorSource), [similarActorSource])
useEffect(() => {
setProductItems((job?.product_refs ?? []).map(normalizeStoredProductItem))
@@ -2463,56 +2526,100 @@ function AudioStoryboardPlanPanel({
}
}
const submitRowVideo = async (row: AudioStoryboardRow, frame: KeyFrame) => {
if (!job || !onGenerateVideo) return
const nextFrame = orderedFrames.find((item) => item.timestamp > frame.timestamp) ?? null
const saveRowStoryboardDraft = async (row: AudioStoryboardRow, frame: KeyFrame) => {
if (!job) return
const plannedRow = { ...planForRow(row, frame), skgCopy: copyForRow(row) }
const scene = buildStoryboardSceneFromAudioRow(plannedRow, frame, nextFrame, productItems)
const scene = buildStoryboardSceneFromAudioRow(plannedRow, frame, productItems, subjectRefs, {
firstImage: endpointAssetRef(frame, "first_frame"),
lastImage: endpointAssetRef(frame, "last_frame"),
})
const updated = await updateStoryboard(job.id, frame.index, scene)
onJobUpdate?.(updated)
await onGenerateVideo(frame.index, scene, "seedance")
}
const generateRowVideo = async (row: AudioStoryboardRow, frame: KeyFrame | null) => {
if (!job || !frame || !onGenerateVideo) return
setVideoBusyRow(row.index)
const generateEndpointFrameForRow = async (row: AudioStoryboardRow, frame: KeyFrame | null, role: "first_frame" | "last_frame") => {
if (!job || !frame) return
const plannedRow = { ...planForRow(row, frame), skgCopy: copyForRow(row) }
if (plannedRow.needsSubject && !subjectRefs.length) {
toast.warning("先在上方生成相似主体白底视图,再生成首尾帧")
return
}
if (plannedRow.needsProduct && !productItems.length) {
toast.warning("本条需要产品,请先上传并识别产品素材池")
return
}
const selectedProductItems = plannedRow.needsProduct ? selectProductItemsForRow(plannedRow, productItems) : []
const busyKey = `${row.index}:${role}`
setEndpointFrameBusy(busyKey)
try {
await submitRowVideo(row, frame)
await saveRowStoryboardDraft(plannedRow, frame)
const updated = await generateSceneAsset(job.id, frame.index, {
size: SUBJECT_ASSET_SIZE,
scene_mode: "similar",
scene_style: "premium_product",
asset_role: role,
prompt: buildEndpointFramePrompt(plannedRow, role, selectedProductItems, subjectRefs),
subject_images: plannedRow.needsSubject ? subjectRefs : [],
product_images: selectedProductItems.map((item) => item.ref),
source_frame_indices: [],
})
const updatedFrame = updated.frames.find((item) => item.index === frame.index) ?? frame
const generatedRef = endpointAssetRef(updatedFrame, role)
const scene = buildStoryboardSceneFromAudioRow(plannedRow, updatedFrame, productItems, subjectRefs, {
firstImage: role === "first_frame" ? generatedRef : endpointAssetRef(updatedFrame, "first_frame"),
lastImage: role === "last_frame" ? generatedRef : endpointAssetRef(updatedFrame, "last_frame"),
})
const saved = await updateStoryboard(job.id, frame.index, scene)
onJobUpdate?.(saved)
toast.success(`分镜 ${row.index + 1} ${role === "first_frame" ? "首帧" : "尾帧"}已生成`)
} catch (e) {
toast.error("生成本条视频失败:" + (e instanceof Error ? e.message : String(e)))
toast.error(`${role === "first_frame" ? "首帧" : "尾帧"}生成失败:` + (e instanceof Error ? e.message : String(e)))
} finally {
setVideoBusyRow(null)
setEndpointFrameBusy(null)
}
}
const generateAllRowVideos = async () => {
if (!job || !onGenerateVideo || !rows.length) return
const saveSingleRowStoryboardDraft = async (row: AudioStoryboardRow, frame: KeyFrame | null) => {
if (!job || !frame) return
setStoryboardSaveBusyRow(row.index)
try {
await saveRowStoryboardDraft(row, frame)
toast.success("已保存本条分镜规划;视频生成入口已暂停,等待首尾帧资产")
} catch (e) {
toast.error("保存本条规划失败:" + (e instanceof Error ? e.message : String(e)))
} finally {
setStoryboardSaveBusyRow(null)
}
}
const saveAllStoryboardDrafts = async () => {
if (!job || !rows.length) return
const jobsToSubmit = rows
.map((row) => ({ row: planForRow(row, referenceFrameForRow(row)), frame: referenceFrameForRow(row) }))
.filter((item): item is { row: AudioStoryboardRow; frame: KeyFrame } => !!item.frame)
if (!jobsToSubmit.length) {
toast.warning("先完成自动抽帧,或在原版视频上手动补参考帧")
toast.warning("先完成前置抽帧,让每条分镜有可保存的承载位置")
return
}
setBatchVideoBusy(true)
setBatchStoryboardSaveBusy(true)
let ok = 0
let failed = 0
try {
for (const item of jobsToSubmit) {
setVideoBusyRow(item.row.index)
setStoryboardSaveBusyRow(item.row.index)
try {
await submitRowVideo(item.row, item.frame)
await saveRowStoryboardDraft(item.row, item.frame)
ok += 1
} catch (e) {
failed += 1
console.warn("批量提交分镜失败", item.row.index, e)
console.warn("批量保存分镜规划失败", item.row.index, e)
}
}
if (failed) toast.warning(`提交 ${ok} 条,${failed} 条失败`)
else toast.success(`提交全部 ${ok} 条分镜视频`)
if (failed) toast.warning(`保存 ${ok}规划${failed} 条失败`)
else toast.success(`保存全部 ${ok} 条分镜规划;视频生成入口已暂停`)
} finally {
setVideoBusyRow(null)
setBatchVideoBusy(false)
setStoryboardSaveBusyRow(null)
setBatchStoryboardSaveBusy(false)
}
}
@@ -2528,7 +2635,7 @@ function AudioStoryboardPlanPanel({
<div className="grid shrink-0 grid-cols-3 gap-2 text-[11px] text-white/45">
<Requirement label="分镜" ready={rows.length > 0} detail={rows.length ? `${rows.length}` : "待音频"} />
<Requirement label="参考帧" ready={orderedFrames.length > 0} detail={orderedFrames.length ? `${orderedFrames.length}` : "待抽帧"} />
<Requirement label="生成" ready={(job.generated_videos?.length ?? 0) > 0} detail={`${job.generated_videos?.length ?? 0}`} />
<Requirement label="候选" ready={(job.generated_videos?.length ?? 0) > 0} detail={`${job.generated_videos?.length ?? 0}历史`} />
</div>
</div>
@@ -2636,12 +2743,12 @@ function AudioStoryboardPlanPanel({
</button>
<button
type="button"
onClick={() => void generateAllRowVideos()}
disabled={batchVideoBusy || !onGenerateVideo || !rows.length || !orderedFrames.length}
className="inline-flex h-9 items-center justify-center gap-1 rounded-md bg-rose-600 px-2.5 text-[11px] font-semibold text-white transition hover:bg-rose-500 disabled:cursor-not-allowed disabled:opacity-40"
onClick={() => void saveAllStoryboardDrafts()}
disabled={batchStoryboardSaveBusy || !rows.length || !orderedFrames.length}
className="inline-flex h-9 items-center justify-center gap-1 rounded-md bg-white px-2.5 text-[11px] font-semibold text-black transition hover:bg-white/90 disabled:cursor-not-allowed disabled:opacity-40"
>
{batchVideoBusy ? <Loader2 className="h-3.5 w-3.5 animate-spin" /> : <Play className="h-3.5 w-3.5" />}
{batchStoryboardSaveBusy ? <Loader2 className="h-3.5 w-3.5 animate-spin" /> : <Check className="h-3.5 w-3.5" />}
</button>
</div>
</div>
@@ -2650,13 +2757,13 @@ function AudioStoryboardPlanPanel({
const referenceFrame = referenceFrameForRow(row)
const plannedRow = planForRow(row, referenceFrame)
const rowVideos = videosForFrame(referenceFrame)
const generating = videoBusyRow === row.index
const savingStoryboard = storyboardSaveBusyRow === row.index
const copyText = copyForRow(row)
const selectedProductCount = plannedRow.needsProduct ? selectProductItemsForRow(plannedRow, productItems).length : 0
return (
<article
key={row.index}
className="grid overflow-hidden rounded-md border border-white/10 bg-black/24 text-[11px] leading-snug text-white/64 xl:grid-cols-[56px_140px_minmax(220px,0.75fr)_minmax(240px,0.8fr)_minmax(320px,1fr)] 2xl:grid-cols-[58px_170px_minmax(360px,0.8fr)_minmax(380px,1fr)_520px]"
className="grid overflow-hidden rounded-md border border-white/10 bg-black/24 text-[11px] leading-snug text-white/64 xl:grid-cols-[54px_120px_minmax(170px,0.48fr)_minmax(420px,1.2fr)_360px] 2xl:grid-cols-[56px_140px_280px_minmax(560px,1fr)_420px]"
>
<StoryboardPlanCell label="分镜">
<div className="font-mono text-[11px] text-white/40">{row.start.toFixed(1)}-{row.end.toFixed(1)}s</div>
@@ -2744,8 +2851,37 @@ function AudioStoryboardPlanPanel({
placeholder="产品出现方式:不出现 / 首帧出现 / 尾帧出现 / 全程佩戴 / 产品特写"
className="min-h-[38px] w-full resize-y rounded border border-white/10 bg-black/32 px-2 py-1.5 text-[10.5px] leading-snug text-white/68 outline-none placeholder:text-white/25 focus:border-rose-300/45"
/>
<div className="grid gap-1.5 md:grid-cols-[minmax(0,1fr)_88px_88px]">
<div className="rounded border border-white/10 bg-black/24 px-2 py-1.5 text-[10px] leading-snug text-white/42">
<div className="mb-1 flex items-center justify-between gap-2">
<span className="text-white/54"></span>
<span className={endpointAssetRef(referenceFrame, "first_frame") && endpointAssetRef(referenceFrame, "last_frame") ? "text-emerald-100/75" : "text-amber-100/72"}>
{endpointAssetRef(referenceFrame, "first_frame") && endpointAssetRef(referenceFrame, "last_frame") ? "可进入视频候选" : "先看图再生视频"}
</span>
</div>
<p>
{plannedRow.needsProduct ? " + 产品素材池" : ""}
</p>
</div>
<EndpointFrameSlot
job={job}
frame={referenceFrame}
role="first_frame"
busy={endpointFrameBusy === `${row.index}:first_frame`}
disabled={!referenceFrame || (plannedRow.needsSubject && !subjectRefs.length) || (plannedRow.needsProduct && !productItems.length)}
onGenerate={() => void generateEndpointFrameForRow(plannedRow, referenceFrame, "first_frame")}
/>
<EndpointFrameSlot
job={job}
frame={referenceFrame}
role="last_frame"
busy={endpointFrameBusy === `${row.index}:last_frame`}
disabled={!referenceFrame || (plannedRow.needsSubject && !subjectRefs.length) || (plannedRow.needsProduct && !productItems.length)}
onGenerate={() => void generateEndpointFrameForRow(plannedRow, referenceFrame, "last_frame")}
/>
</div>
<div className="flex items-center justify-between gap-2 text-[10px] text-white/34">
<span>{plannedRow.needsProduct ? `将自动挑选 ${selectedProductCount || 0}产品参考图` : "本条不传产品图"}</span>
<span>{plannedRow.needsSubject ? `主体视图 ${subjectRefs.length}` : "本条不传主体"} · {plannedRow.needsProduct ? `产品参考 ${selectedProductCount || 0}` : "本条不传产品图"}</span>
<button
type="button"
onClick={() => patchRowPlan(row.index, visualModeDefaults(plannedRow.visualMode))}
@@ -2757,23 +2893,32 @@ function AudioStoryboardPlanPanel({
</div>
</StoryboardPlanCell>
<StoryboardPlanCell label="生成视频" className="xl:border-r-0">
<StoryboardVideoSlots job={job} videos={rowVideos} enabled={!!referenceFrame} />
<div className="mt-1 truncate text-[10px] text-white/34" title={referenceFrame ? `参考 ${referenceFrame.timestamp.toFixed(1)}s` : row.referencePlan}>
{referenceFrame ? `参考 ${referenceFrame.timestamp.toFixed(1)}s · 可多次生成候选` : "先在关键帧区自动抽帧 12 张"}
<StoryboardPlanCell label="视频候选 / 待生成" className="xl:border-r-0">
<StoryboardVideoSlots
job={job}
videos={rowVideos}
enabled={!!endpointAssetRef(referenceFrame, "first_frame") && !!endpointAssetRef(referenceFrame, "last_frame")}
/>
<div className="mt-1 truncate text-[10px] text-white/34" title="视频生成已暂停,首尾帧确认后再开放单条提交">
{endpointAssetRef(referenceFrame, "first_frame") && endpointAssetRef(referenceFrame, "last_frame")
? "首尾帧已就绪 · 待开放单条视频提交"
: "先生成并确认首帧 / 尾帧"}
</div>
<div className="mt-1 flex items-center justify-between gap-2">
<span className="text-[10px] text-white/34"></span>
<ModelTrace trace={videoModelTrace(runtimeModels, "seedance")} compact />
<span className="text-[10px] text-white/34"></span>
<span className="rounded border border-amber-300/18 bg-amber-300/[0.07] px-1.5 py-0.5 text-[10px] text-amber-100/70"></span>
</div>
<div className="mt-1 rounded border border-amber-300/12 bg-amber-300/[0.045] px-2 py-1 text-[10px] leading-snug text-amber-100/62">
SKG /
</div>
<button
type="button"
onClick={() => generateRowVideo(plannedRow, referenceFrame)}
disabled={!referenceFrame || !onGenerateVideo || generating}
onClick={() => void saveSingleRowStoryboardDraft(plannedRow, referenceFrame)}
disabled={!referenceFrame || savingStoryboard}
className="mt-1.5 inline-flex h-8 w-full items-center justify-center gap-1 rounded-md bg-white px-2 text-[11px] font-semibold text-black transition hover:bg-white/90 disabled:cursor-not-allowed disabled:opacity-40"
>
{generating ? <Loader2 className="h-3.5 w-3.5 animate-spin" /> : <Play className="h-3.5 w-3.5" />}
· Seedance
{savingStoryboard ? <Loader2 className="h-3.5 w-3.5 animate-spin" /> : <Check className="h-3.5 w-3.5" />}
</button>
</StoryboardPlanCell>
</article>
@@ -2782,7 +2927,7 @@ function AudioStoryboardPlanPanel({
</div>
</>
) : (
<EmptyState text="音频解析完成后,这里会按逐句时间轴生成信息流复刻分镜工作台。先在关键帧区自动抽帧 12 张并选择主角参考,再按分镜生成视频候选。" />
<EmptyState text="音频解析完成后,这里会按逐句时间轴生成信息流复刻分镜工作台。先抽帧并生成相似主体,再逐条规划首尾帧。" />
)}
</section>
)
@@ -2951,7 +3096,7 @@ function StoryboardVideoSlots({ job, videos, enabled }: { job: Job; videos: Gene
))}
{Array.from({ length: emptyCount }).map((_, index) => (
<div key={`empty-video-${index}`} className="flex aspect-[9/16] min-h-[86px] min-w-0 items-center justify-center rounded border border-dashed border-white/12 bg-black/25 px-1 text-center text-[9.5px] leading-tight text-white/26">
{enabled ? `候选 ${visible.length + index + 1}` : "先抽 12 帧"}
{enabled ? `候选 ${visible.length + index + 1}` : "待首尾帧"}
</div>
))}
</div>
@@ -2962,6 +3107,53 @@ function StoryboardVideoSlots({ job, videos, enabled }: { job: Job; videos: Gene
)
}
function EndpointFrameSlot({
job,
frame,
role,
busy,
disabled,
onGenerate,
}: {
job: Job
frame: KeyFrame | null
role: "first_frame" | "last_frame"
busy: boolean
disabled: boolean
onGenerate: () => void
}) {
const ref = endpointAssetRef(frame, role)
const src = ref ? resolveImageRefUrl(job.id, ref) : ""
const label = role === "first_frame" ? "首帧" : "尾帧"
return (
<div className="overflow-hidden rounded border border-white/10 bg-black/32">
<div className="relative flex aspect-[9/16] min-h-[112px] items-center justify-center bg-black">
{src ? (
<a href={src} target="_blank" rel="noreferrer" className="group h-full w-full">
<img src={src} alt={`${label}资产`} className="h-full w-full object-contain transition group-hover:scale-[1.02]" />
</a>
) : (
<div className="px-2 text-center text-[10px] leading-snug text-white/28">{label}</div>
)}
{busy && (
<div className="absolute inset-0 flex items-center justify-center bg-black/65">
<Loader2 className="h-4 w-4 animate-spin text-white/80" />
</div>
)}
</div>
<button
type="button"
onClick={onGenerate}
disabled={disabled || busy}
className="flex h-7 w-full items-center justify-center gap-1 border-t border-white/10 bg-white/[0.045] px-1 text-[10px] font-semibold text-white/62 transition hover:bg-white/[0.09] hover:text-white disabled:cursor-not-allowed disabled:opacity-35"
>
{busy ? <Loader2 className="h-3 w-3 animate-spin" /> : <Sparkles className="h-3 w-3" />}
{src ? `重生${label}` : `生成${label}`}
</button>
</div>
)
}
function StoryboardVideoPreview({ job, video, className = "h-20 w-12" }: { job: Job; video: GeneratedVideo; className?: string }) {
const src = videoSrc(video)
const poster = videoPoster(job, video)

View File

@@ -117,6 +117,7 @@ export interface StoryboardScene {
first_image?: ImageRef | null
last_image?: ImageRef | null
product_images?: ImageRef[]
subject_images?: ImageRef[]
product_fusion_shots?: ProductFusionShot[]
visual_mode?: "person_only" | "person_product" | "product_only" | "environment"
needs_product?: boolean
@@ -1049,6 +1050,8 @@ export async function generateSceneAsset(
asset_role?: SceneAssetRole
prompt?: string
source_frame_indices?: number[]
subject_images?: ImageRef[]
product_images?: ImageRef[]
} = {},
): Promise<Job> {
const res = await fetch(`${API_BASE}/jobs/${jobId}/frames/${frameIdx}/scene-asset`, {
@@ -1062,6 +1065,8 @@ export async function generateSceneAsset(
asset_role: body.asset_role ?? "scene",
prompt: body.prompt ?? "",
source_frame_indices: body.source_frame_indices ?? null,
subject_images: body.subject_images ?? [],
product_images: body.product_images ?? [],
}),
})
if (!res.ok) {