diff --git a/.memory/worklog.json b/.memory/worklog.json
index 777b36e..eb806a4 100644
--- a/.memory/worklog.json
+++ b/.memory/worklog.json
@@ -1,19 +1,5 @@
{
"entries": [
- {
- "files_changed": 3,
- "hash": "4779c26",
- "message": "auto-save 2026-05-12 16:49 (~3)",
- "ts": "2026-05-12T16:50:05+08:00",
- "type": "commit"
- },
- {
- "files_changed": 4,
- "hash": "345391d",
- "message": "auto-save 2026-05-12 16:55 (~4)",
- "ts": "2026-05-12T16:55:37+08:00",
- "type": "commit"
- },
{
"files_changed": 3,
"hash": "4138bea",
@@ -3354,6 +3340,19 @@
"type": "session-heartbeat",
"message": "Claude 会话活跃 · 最近命令:claude · 1 项未提交变更 · 最近提交:auto-save 2026-05-14 06:00 (~2)",
"files_changed": 1
+ },
+ {
+ "ts": "2026-05-14T06:05:57+08:00",
+ "type": "commit",
+ "message": "auto-save 2026-05-14 06:05 (~1)",
+ "hash": "2b54616",
+ "files_changed": 1
+ },
+ {
+ "ts": "2026-05-13T22:08:51Z",
+ "type": "session-heartbeat",
+ "message": "Codex 会话活跃 · 最近命令:codex · 4 项未提交变更 · 最近提交:auto-save 2026-05-14 06:05 (~1)",
+ "files_changed": 4
}
]
}
diff --git a/api/main.py b/api/main.py
index 680c155..c95d2bd 100644
--- a/api/main.py
+++ b/api/main.py
@@ -97,6 +97,8 @@ AssetSize = Literal["source", "1024", "1536", "2048"]
AssetQuality = Literal["hd"]
SubjectKind = Literal["object", "living"]
SubjectView = str
+SceneMode = Literal["remove_subject", "similar", "style"]
+SceneStyle = Literal["source", "premium_product", "clean_studio", "warm_lifestyle", "cinematic"]
FRAME_TARGET_LABELS: dict[FrameExtractTarget, str] = {
"balanced": "综合关键帧",
"subject": "清晰主体",
@@ -191,6 +193,8 @@ class SceneAsset(BaseModel):
height: int = 0
quality: AssetQuality = "hd"
size: AssetSize = "source"
+ scene_mode: SceneMode = "remove_subject"
+ scene_style: SceneStyle = "source"
quality_report: QualityReport | None = None
created_at: float = 0.0
@@ -1930,6 +1934,8 @@ class UpdateElementReq(BaseModel):
class GenerateSceneAssetReq(BaseModel):
quality: AssetQuality = "hd"
size: AssetSize = "source"
+ scene_mode: SceneMode = "remove_subject"
+ scene_style: SceneStyle = "source"
class GenerateSubjectAssetsReq(BaseModel):
@@ -2058,7 +2064,8 @@ def delete_element(job_id: str, idx: int, element_id: str) -> Job:
@app.post("/jobs/{job_id}/frames/{idx}/scene-asset", response_model=Job)
def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> Job:
- """为关键帧生成一张干净、高清的场景参考图。默认一帧只需要一张,重跑会保留历史供人工比对。"""
+ """为关键帧生成一张干净、高清的场景参考图。默认一帧只需要一张,重跑会保留历史供人工比对。
+ 场景图排在主体资产之后:优先依据已确认主体,去主体并补全背景,再按模式生成原场景/相似场景/换风格场景。"""
import time as _time
job = JOBS.get(job_id)
if not job:
@@ -2068,12 +2075,51 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
if not src.exists():
raise HTTPException(404, "source frame file missing")
+ confirmed_subjects = [
+ (e.name_en or e.name_zh).strip()
+ for e in (frame.elements or [])
+ if (e.subject_assets or [])
+ ]
+ if not confirmed_subjects:
+ confirmed_subjects = [
+ (e.name_en or e.name_zh).strip()
+ for e in (frame.elements or [])
+ if (e.name_en or e.name_zh).strip()
+ ][:3]
+ subject_clause = (
+ "Confirmed foreground subject(s) to remove: " + ", ".join(confirmed_subjects) + ". "
+ if confirmed_subjects
+ else "Remove the main foreground subject from the frame if present. "
+ )
+ mode_clause = {
+ "remove_subject": (
+ "Keep the original environment, camera angle, perspective, composition, lighting direction, color mood, and spatial layout. "
+ "The result should be an empty clean scene/background plate with the subject removed and the occluded background reconstructed."
+ ),
+ "similar": (
+ "Create a similar but not identical scene/background plate: keep the same camera angle, rough spatial layout, lighting direction, and usage context, "
+ "but vary props, surface details, textures, and small environmental details so it is not a duplicate of the source."
+ ),
+ "style": (
+ "Create a scene/background plate with the same camera angle and spatial layout, but reinterpret the environment in the selected visual style. "
+ "Keep it believable and useful for image-to-video generation."
+ ),
+ }[req.scene_mode]
+ style_clause = {
+ "source": "Follow the original source style.",
+ "premium_product": "Use a premium product-advertising style: polished, high-end, clean commercial lighting, refined materials.",
+ "clean_studio": "Use a clean studio style: simple surfaces, controlled lighting, minimal distractions.",
+ "warm_lifestyle": "Use a warm lifestyle style: realistic lived-in details, soft natural light, approachable atmosphere.",
+ "cinematic": "Use a cinematic style: dramatic but natural lighting, richer depth, filmic contrast, not fantasy.",
+ }[req.scene_style]
prompt = (
- "Create one clean high-definition scene reference image from this frame. "
- "Remove watermarks, platform UI, captions, usernames, hashtags, logos, and overlay graphics. "
- "Preserve the original camera angle, composition, environment, lighting style, and believable spatial layout. "
- "Do not create multiple views. Do not isolate objects. Keep it useful as the scene/background reference for image-to-video generation. "
- "Enhance clarity and texture while avoiding over-smoothing or changing important visual details."
+ "Create one clean high-definition scene/background reference image from this frame. "
+ + subject_clause
+ + "Do not include the removed subject, duplicate people, animals, products, text, watermark, platform UI, captions, usernames, hashtags, logos, or overlay graphics. "
+ + mode_clause + " "
+ + style_clause + " "
+ + "Enhance clarity and texture while avoiding over-smoothing, warped geometry, or changing important perspective details. "
+ + "Do not create multiple views. Do not isolate objects."
)
models = [IMAGE_MODEL, "gemini-3.1-flash-image-preview", "gemini-2.5-flash-image"]
try:
@@ -2093,6 +2139,8 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
height=height,
quality=req.quality,
size=req.size,
+ scene_mode=req.scene_mode,
+ scene_style=req.scene_style,
quality_report=report,
created_at=_time.time(),
)
diff --git a/docs/source-analysis.html b/docs/source-analysis.html
index 6f07a08..5f2d86c 100644
--- a/docs/source-analysis.html
+++ b/docs/source-analysis.html
@@ -555,7 +555,7 @@
2
镜头拆解
拆轨、抽关键帧、手动加帧,形成参考分镜池。
3
清洗水印
对关键帧做全图或区域清洗,必要时应用为当前参考图。
4
主体识别
识别场景和主体候选,只是候选,不应锁死。
- 5
素材准备
清洗关键帧,生成场景图和主体多视角/动作/表情资产包。
+ 5
素材准备
清洗关键帧,先生成主体多视角/动作/表情资产包,再生成去主体、相似或换风格场景图。
6
分镜改造
把参考主体、场景、动作和 SKG 产品放入分镜结构。
7
生成视频
用分镜 4 图槽、改造目标和时长调用 Seedance / Kling / Veo 3 生视频 API,结果回写到画面工作台节点。
8
合成成品
片段、字幕、配音、转场合成最终 mp4。当前未实现。
@@ -571,7 +571,7 @@
web/app/page.tsx | 产品工作台主状态:jobs、activeJobId、selectedFrames、clipboard、ReactFlow 节点和边;负责打开/找回画布工作面板。 |
web/components/nodes/index.tsx | DAG 节点定义:Input、VisualLab、Audio、Compose,以及画布工作面板 KeyframePanel / VideoFramePanel;旧 Keyframe/Storyboard/VideoGen 组件保留但不再挂主画布。 |
- web/components/lightbox.tsx | 关键帧素材准备面板:清洗、场景图、主体候选、主体资产包和审核。 |
+ web/components/lightbox.tsx | 关键帧素材准备面板:清洗、主体候选、主体资产包、去主体场景图和审核。 |
web/components/storyboard-bar.tsx | 顶部分镜编排条:展示选入编排的关键帧,并作为唯一分镜导航。 |
web/components/storyboard-workbench.tsx | 顶部分镜编排条下方的明细区:4 图槽、改造目标、时长、自动保存。 |
web/lib/api.ts | 前端类型和 API client,是前后端数据契约镜像。 |
@@ -618,13 +618,13 @@ api/main.py
你看到的区域画面工作台 · Visual Lab
-
主要源码VisualLabNode in web/components/nodes/index.tsx;它现在是素材准备看板,汇总关键帧、场景图、主体资产包和视频任务。
+
主要源码VisualLabNode in web/components/nodes/index.tsx;它现在是素材准备看板,汇总关键帧、主体资产包、场景图和视频任务。
适合怎么描述“画面工作台的素材准备进度、分组缩略图、关键帧审核入口和后续分镜入口应该如何组织”。
你看到的区域关键帧素材审核面板
-
主要源码FrameLightbox;按“原图/清洗、场景图、主体资产、审核”四个页签组织;非主体页采用左侧大图 + 右侧窄状态栏,主体资产页保留较宽右栏;清洗页支持一键批量生成待审核清洗版,相关接口包括 cleanupFrame、addElement、generateSceneAsset、generateSubjectAssets。
-
适合怎么描述“某张关键帧的水印、场景图、主体多视角/动作/表情图和质量风险应该如何审核”。
+
主要源码FrameLightbox;按“原图/清洗、主体资产、场景图、审核”四个页签组织;左侧只放主图/框选画布,右侧承载当前页操作、状态和结果;场景图依赖主体资产,支持去主体原场景、相似新场景和同构换风格。相关接口包括 cleanupFrame、addElement、generateSubjectAssets、generateSceneAsset。
+
适合怎么描述“某张关键帧的水印、主体多视角/动作/表情图、去主体场景图和质量风险应该如何审核”。
你看到的区域顶部分镜头编排下拉面板
@@ -728,8 +728,8 @@ SubjectAsset {
| 应用清洗 | POST /cleanup/apply | applyCleanedFrame | 物理覆盖 frames/{idx}.jpg,并备份原图。 |
| 元素增改删 | POST/PATCH/DELETE /elements | addElement/updateElement/deleteElement | 让用户修正 Vision 错误,避免候选结果锁死。 |
| 元素提取 | POST /elements/{element_id}/cutout | cutoutElement | 调用图像模型生成独立白底素材图,每次累积一张 cutout。 |
-
| 场景资产 | POST /frames/{idx}/scene-asset | generateSceneAsset | 为每张已选关键帧生成一张去水印、高清增强的场景图,保留历史版本用于人工审核。 |
| 主体资产包 | POST /elements/{element_id}/subject-assets | generateSubjectAssets | 根据用户选择的视图、动作和表情生成主体资产包;当多个关键帧都指向同一主体时,前端把已选关键帧作为 source_frame_indices 传入,后端拼参考板。 |
+
| 场景资产 | POST /frames/{idx}/scene-asset | generateSceneAsset | 在主体资产之后生成去主体背景板;请求包含 scene_mode 和 scene_style,可做原场景补背景、相似新场景或同构换风格,保留历史版本用于人工审核。 |
| 分镜保存 | PUT /frames/{idx}/storyboard | updateStoryboard | 保存 4 图槽、时长和改造说明。 |
| 生图 | POST /frames/{idx}/generate | generateImage | 基于关键帧或已选生成图做 image-to-image,目前可用。 |
@@ -751,7 +751,7 @@ SubjectAsset {
| 画面工作台 Visual Lab |
- 作为素材准备看板:显示准备进度、质量风险、关键帧 / 场景图 / 主体包 / 分镜视频四个入口;上方缩略图按关键帧、场景图、主体包、视频任务分组。点击关键帧进入素材审核面板,点击资产图复制到分镜编排。 |
+ 作为素材准备看板:显示准备进度、质量风险、关键帧 / 主体包 / 场景图 / 分镜视频四个入口;上方缩略图按关键帧、主体包、场景图、视频任务分组。点击关键帧进入素材审核面板,点击资产图复制到分镜编排。 |
不要在主卡片里堆复杂表单;主卡片只做状态总览和入口。 |
VisualLabNode、FrameLightbox、generateSceneAsset、generateSubjectAssets、视频任务接口 |
@@ -814,7 +814,7 @@ SubjectAsset {
改关键帧素材准备
-
“我在关键帧素材准备面板里,主体候选应该怎么编辑/删除;场景图和主体资产包怎么生成、审核、复制到分镜。”
+
“我在关键帧素材准备面板里,主体候选应该怎么编辑/删除;主体资产包怎么生成;场景图怎么基于主体去除、换风格、审核、复制到分镜。”
改 Storyboard 节点
@@ -841,13 +841,26 @@ SubjectAsset {
- 2026-05-14 · 关键帧素材面板右侧改为紧凑状态栏
+ 2026-05-14 · 场景图改为主体资产之后生成
+ FrameLightbox
+ API
+
+
+
问题:场景图如果先于主体资产生成,只能做普通背景清理,无法准确知道要移除哪个主体,也不利于后续生成相似但不同或同构换风格的场景。
+
改动:FrameLightbox 页签顺序改为“原图/清洗 → 主体资产 → 场景图 → 审核”;画面工作台缩略图和进度文案也同步为主体资产先于场景图。场景图页新增“去主体原场景 / 相似新场景 / 同构换风格”和风格选择,且在没有主体资产时提示先生成主体资产。
+
后端:generateSceneAsset 请求新增 scene_mode 和 scene_style;后端提示词会优先读取已生成主体资产对应的主体名称,生成去主体并补背景的场景图,再按模式决定是否做相似变化或风格变化。
+
影响:web/components/lightbox.tsx、web/components/nodes/index.tsx、web/lib/api.ts、api/main.py、docs/source-analysis.html。
+
+
+
+
+ 2026-05-14 · 关键帧素材面板统一右侧操作栏
FrameLightbox
Layout
-
问题:移除旧元素提取和手工加主体入口后,关键帧详情右侧内容变少,继续占用大列会压缩左侧主图和清洗操作区。
-
改动:FrameLightbox 在“原图/清洗、场景图、审核”页把右侧改成固定窄状态栏,左侧主图和操作区获得更大宽度;“主体资产”页仍保留较宽右栏,用于主体识别、主体清单和资产包。
+
问题:“原图/清洗、场景图、主体资产、审核”都应遵循同一结构:左侧负责看图和框选,右侧负责操作、状态和结果;旧布局把部分操作塞在左侧下方,导致左侧满、右侧空。
+
改动:FrameLightbox 统一为左侧主图、右侧操作栏。清洗按钮、批量清洗、清洗结果预览、场景图生成/复制、主体识别/主体资产包和审核状态都在右侧;切换到非清洗页时会退出框选模式,避免画框状态残留。
影响:web/components/lightbox.tsx、docs/source-analysis.html。
diff --git a/web/components/lightbox.tsx b/web/components/lightbox.tsx
index ff40d00..961d44b 100644
--- a/web/components/lightbox.tsx
+++ b/web/components/lightbox.tsx
@@ -6,7 +6,7 @@ import {
frameUrl, cleanedFrameUrl, apiAssetUrl,
describeFrame, cleanupFrame, applyCleanedFrame, discardCleanedFrame, addElement, updateElement, deleteElement,
generateSceneAsset, generateSubjectAssets,
- type AssetBackground, type AssetSize, type KeyFrame, type Job, type ImageRef, type SubjectKind,
+ type AssetBackground, type AssetSize, type KeyFrame, type Job, type ImageRef, type SceneMode, type SceneStyle, type SubjectKind,
} from "@/lib/api"
import { toast } from "sonner"
@@ -51,11 +51,25 @@ type LightboxTab = "clean" | "scene" | "subject" | "review"
const LIGHTBOX_TABS: Array<{ key: LightboxTab; label: string }> = [
{ key: "clean", label: "原图/清洗" },
- { key: "scene", label: "场景图" },
{ key: "subject", label: "主体资产" },
+ { key: "scene", label: "场景图" },
{ key: "review", label: "审核" },
]
+const SCENE_MODE_OPTIONS: Array<[SceneMode, string]> = [
+ ["remove_subject", "去主体原场景"],
+ ["similar", "相似新场景"],
+ ["style", "同构换风格"],
+]
+
+const SCENE_STYLE_OPTIONS: Array<[SceneStyle, string]> = [
+ ["source", "跟随原图"],
+ ["premium_product", "高端产品感"],
+ ["clean_studio", "干净工作室"],
+ ["warm_lifestyle", "真实生活感"],
+ ["cinematic", "电影感"],
+]
+
export function FrameLightbox({ jobId, frames, activeIndex, selected, onClose, onChange, onToggleSelect, onJobUpdate, onSwitchPanel, onCopyImage, embedded = false }: Props) {
const [describing, setDescribing] = useState(false)
const [cleaningFrameIds, setCleaningFrameIds] = useState
>(new Set())
@@ -65,6 +79,8 @@ export function FrameLightbox({ jobId, frames, activeIndex, selected, onClose, o
const [sceneGenerating, setSceneGenerating] = useState(false)
const [subjectGenerating, setSubjectGenerating] = useState(null)
const [assetSize, setAssetSize] = useState("source")
+ const [sceneMode, setSceneMode] = useState("remove_subject")
+ const [sceneStyle, setSceneStyle] = useState("source")
const [subjectKinds, setSubjectKinds] = useState>({})
const [subjectBackgrounds, setSubjectBackgrounds] = useState>({})
const [subjectViews, setSubjectViews] = useState>({})
@@ -124,6 +140,7 @@ export function FrameLightbox({ jobId, frames, activeIndex, selected, onClose, o
const selectedFrameIndices = Array.from(selected).sort((a, b) => a - b)
const sharedSubjectFrameIndices = selectedFrameIndices.length > 1 ? selectedFrameIndices : [f.index]
const subjectAssetCount = elements.reduce((sum, item) => sum + (item.subject_assets?.length ?? 0), 0)
+ const hasSubjectAssets = subjectAssetCount > 0
const qualityWarnings = [
...(f.quality_report?.warnings ?? []),
...(latestSceneAsset?.quality_report?.warnings ?? []),
@@ -197,9 +214,14 @@ export function FrameLightbox({ jobId, frames, activeIndex, selected, onClose, o
}
const handleGenerateSceneAsset = async () => {
+ if (!hasSubjectAssets) {
+ setActiveTab("subject")
+ toast.message("先生成主体资产,再生成去主体场景图")
+ return
+ }
setSceneGenerating(true)
try {
- const updated = await generateSceneAsset(jobId, f.index, { size: assetSize })
+ const updated = await generateSceneAsset(jobId, f.index, { size: assetSize, scene_mode: sceneMode, scene_style: sceneStyle })
onJobUpdate?.(updated)
toast.success(`分镜 ${f.index + 1} 场景图已生成`)
} catch (e) {
@@ -406,7 +428,15 @@ export function FrameLightbox({ jobId, frames, activeIndex, selected, onClose, o
))}
- {latestSceneAsset ? "场景已生成" : "场景待生成"}
+ {hasSubjectAssets ? `${subjectAssetCount} 主体资产` : "主体待生成"}
·
- {subjectAssetCount > 0 ? `${subjectAssetCount} 主体资产` : "主体待生成"}
+ {latestSceneAsset ? "场景已生成" : "场景待生成"}
- {/* 主体 — 左:大图 + 主操作;右:当前页上下文 / 主体资产 */}
+ {/* 主体 — 左:主图;右:当前页操作 / 状态 / 主体资产 */}
{/* 左侧大图区 */}
@@ -650,11 +676,50 @@ export function FrameLightbox({ jobId, frames, activeIndex, selected, onClose, o
+
+ 先根据主体资产确认要移除的主体,再补全空场景;之后可生成相似新场景或同构换风格场景。
+
+
+
+
+
+ {!hasSubjectAssets && (
+
+ 还没有主体资产。先在“主体资产”页生成主体图,场景图才能更准确地去主体和补背景。
+
+ )}
{latestSceneAsset ? (
-
{latestSceneAsset.width}×{latestSceneAsset.height}
+
+ {latestSceneAsset.width}×{latestSceneAsset.height}
+ {latestSceneAsset.scene_mode && (
+ <> · {SCENE_MODE_OPTIONS.find(([value]) => value === latestSceneAsset.scene_mode)?.[1] ?? latestSceneAsset.scene_mode}>
+ )}
+
{onCopyImage && (
+ {!hasSubjectAssets && (
+
+ )}
)}
{activeTab === "review" && (
@@ -697,10 +771,6 @@ export function FrameLightbox({ jobId, frames, activeIndex, selected, onClose, o
清洗
{f.cleaned_applied ? "已应用" : hasCleaned ? "待确认" : "未处理"}
-
-
场景
-
{latestSceneAsset ? "已生成" : "未生成"}
-
0 ? "border-violet-300/35 bg-violet-500/12 text-violet-100" : "border-white/10 bg-black/25 text-white/55"}`}>
主体
{elements.length} 个
@@ -709,6 +779,10 @@ export function FrameLightbox({ jobId, frames, activeIndex, selected, onClose, o
资产
{subjectAssetCount} 张
+
+
场景
+
{latestSceneAsset ? "已生成" : "未生成"}
+
{qualityWarnings.length > 0 ? (
@@ -722,7 +796,7 @@ export function FrameLightbox({ jobId, frames, activeIndex, selected, onClose, o
)}
- 审核通过后,把场景图和主体资产复制到分镜槽位;当前不会自动覆盖素材。
+ 审核通过后,把主体资产和场景图复制到分镜槽位;当前不会自动覆盖素材。
)}
diff --git a/web/components/nodes/index.tsx b/web/components/nodes/index.tsx
index 4ea3cda..1764da4 100644
--- a/web/components/nodes/index.tsx
+++ b/web/components/nodes/index.tsx
@@ -1257,18 +1257,6 @@ export function VisualLabNode({ data, selected }: any) {
borderClass: f.quality_report?.risk === "bad" ? "border-rose-300/70" : f.quality_report?.risk === "warn" ? "border-amber-300/70" : "border-orange-300/50",
aspect,
})) : []),
- ...sceneAssets.map((p) => ({
- id: `scene:${p.frameIdx}:${p.assetId}`,
- kind: "scene" as const,
- group: "场景图",
- frameIdx: p.frameIdx,
- assetId: p.assetId,
- src: p.src,
- label: p.label,
- caption: `${p.width}×${p.height}`,
- borderClass: p.risk === "bad" ? "border-rose-300/70" : p.risk === "warn" ? "border-amber-300/70" : "border-emerald-300/60",
- aspect: p.width && p.height ? `${p.width}/${p.height}` : aspect,
- })),
...subjectAssets.map((p) => ({
id: `subject:${p.frameIdx}:${p.assetId}`,
kind: "subject" as const,
@@ -1281,6 +1269,18 @@ export function VisualLabNode({ data, selected }: any) {
borderClass: "border-violet-300/65",
aspect: p.width && p.height ? `${p.width}/${p.height}` : "1/1",
})),
+ ...sceneAssets.map((p) => ({
+ id: `scene:${p.frameIdx}:${p.assetId}`,
+ kind: "scene" as const,
+ group: "场景图",
+ frameIdx: p.frameIdx,
+ assetId: p.assetId,
+ src: p.src,
+ label: p.label,
+ caption: `${p.width}×${p.height}`,
+ borderClass: p.risk === "bad" ? "border-rose-300/70" : p.risk === "warn" ? "border-amber-300/70" : "border-emerald-300/60",
+ aspect: p.width && p.height ? `${p.width}/${p.height}` : aspect,
+ })),
...videos.map((v, i) => {
const videoSrc = apiAssetUrl(v.url)
const posterSrc = apiAssetUrl(v.poster_url)
@@ -1556,19 +1556,6 @@ export function VisualLabNode({ data, selected }: any) {
关键帧素材
-
主体资产
+