diff --git a/.memory/worklog.json b/.memory/worklog.json
index 0ea53f1..192c7da 100644
--- a/.memory/worklog.json
+++ b/.memory/worklog.json
@@ -3283,6 +3283,19 @@
"type": "session-heartbeat",
"message": "Codex 会话活跃 · 最近命令:codex · 2 项未提交变更 · 最近提交:auto-save 2026-05-14 04:54 (~1)",
"files_changed": 2
+ },
+ {
+ "ts": "2026-05-14T04:59:53+08:00",
+ "type": "commit",
+ "message": "auto-save 2026-05-14 04:59 (~3)",
+ "hash": "f1f3a0f",
+ "files_changed": 3
+ },
+ {
+ "ts": "2026-05-13T21:03:13Z",
+ "type": "session-heartbeat",
+ "message": "Claude 会话活跃 · 最近命令:claude · 4 项未提交变更 · 最近提交:auto-save 2026-05-14 04:59 (~3)",
+ "files_changed": 4
}
]
}
diff --git a/api/main.py b/api/main.py
index 3948db2..b23559e 100644
--- a/api/main.py
+++ b/api/main.py
@@ -96,7 +96,7 @@ AssetBackground = Literal["white", "black"]
AssetSize = Literal["source", "1024", "1536", "2048"]
AssetQuality = Literal["hd"]
SubjectKind = Literal["object", "living"]
-SubjectView = Literal["front", "back", "left", "right", "side", "side_walk", "top", "bottom", "expression"]
+SubjectView = str
FRAME_TARGET_LABELS: dict[FrameExtractTarget, str] = {
"balanced": "综合关键帧",
"subject": "清晰主体",
@@ -690,16 +690,43 @@ def _make_reference_contact_sheet(job_id: str, frame_indices: list[int], out_pat
return out_path
-def _subject_view_labels(kind: SubjectKind) -> list[tuple[SubjectView, str]]:
+SUBJECT_VIEW_LABELS: dict[str, str] = {
+ "front": "正面",
+ "back": "背面",
+ "left": "左侧",
+ "right": "右侧",
+ "side": "侧面",
+ "side_walk": "侧面走路",
+ "top": "顶部视角",
+ "bottom": "底部视角",
+ "expression_neutral": "中性表情",
+ "expression_happy": "开心表情",
+ "expression_angry": "生气表情",
+ "expression_sad": "难过表情",
+ "expression_relaxed": "放松表情",
+ "action_walk": "走路动作",
+ "action_sit": "坐姿动作",
+ "action_hold": "手持动作",
+ "action_use": "使用动作",
+}
+
+
+def _subject_view_labels(kind: SubjectKind, requested: list[str] | None = None) -> list[tuple[SubjectView, str]]:
+ if requested:
+ normalized: list[str] = []
+ for raw in requested:
+ key = "".join(ch for ch in str(raw).strip().lower() if ch.isalnum() or ch == "_")
+ if key and key not in normalized:
+ normalized.append(key)
+ return [(key, SUBJECT_VIEW_LABELS.get(key, key.replace("_", " "))) for key in normalized[:12]]
if kind == "living":
return [
("front", "正面站立"),
("back", "背面站立"),
("side", "侧面站立"),
("side_walk", "侧面走路"),
- ("top", "顶部视角"),
- ("bottom", "底部视角"),
- ("expression", "表情参考"),
+ ("expression_neutral", "中性表情"),
+ ("expression_relaxed", "放松表情"),
]
return [
("front", "正面"),
@@ -1911,6 +1938,7 @@ class GenerateSubjectAssetsReq(BaseModel):
quality: AssetQuality = "hd"
size: AssetSize = "source"
source_frame_indices: list[int] | None = None
+ views: list[str] | None = None
@app.post("/jobs/{job_id}/frames/{idx}/elements", response_model=Job)
@@ -2205,11 +2233,14 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
models = [IMAGE_MODEL, "gemini-3.1-flash-image-preview", "gemini-2.5-flash-image"]
generated: list[SubjectAsset] = []
try:
- for view, view_label in _subject_view_labels(req.subject_kind):
+ for view, view_label in _subject_view_labels(req.subject_kind, req.views):
if view == "side_walk":
view_prompt = "side view in a natural walking pose, same identity and proportions"
- elif view == "expression":
- view_prompt = "clear expression reference, frontal or three-quarter standing pose, preserving the same identity"
+ elif view.startswith("expression_"):
+ emotion = view_label.replace("表情", "")
+ view_prompt = f"clear {emotion} facial expression reference, frontal or three-quarter standing pose, preserving the same identity"
+ elif view.startswith("action_"):
+ view_prompt = f"{view_label} reference pose, same identity and proportions"
else:
view_prompt = f"{view_label} view"
prompt = (
diff --git a/docs/source-analysis.html b/docs/source-analysis.html
index ed03041..f6e98ae 100644
--- a/docs/source-analysis.html
+++ b/docs/source-analysis.html
@@ -655,6 +655,8 @@ api/main.py
index, timestamp, url,
description,
cleaned_url, cleaned_applied,
+ quality_report,
+ scene_assets: SceneAsset[],
elements: KeyElement[],
storyboard: StoryboardScene,
generated_images: GeneratedImage[]
@@ -669,7 +671,25 @@ api/main.py
source: auto | manual | region,
region,
cutouts: string[],
- cutout_id
+ cutout_id,
+ subject_kind: object | living,
+ subject_assets: SubjectAsset[]
+}
+
+
+
SceneAsset / SubjectAsset
+
画面工作台素材准备阶段生成的组图资产。实际图片保存在 jobs/<jobId>/assets,可作为 asset 类型复制到分镜槽位。
+
SceneAsset {
+ id, label, url,
+ width, height, quality, size,
+ quality_report
+}
+
+SubjectAsset {
+ id, view, label, url,
+ background: white | black,
+ width, height, size,
+ source_frame_indices[]
}
@@ -708,6 +728,8 @@ api/main.py
| 应用清洗 | POST /cleanup/apply | applyCleanedFrame | 物理覆盖 frames/{idx}.jpg,并备份原图。 |
| 元素增改删 | POST/PATCH/DELETE /elements | addElement/updateElement/deleteElement | 让用户修正 Vision 错误,避免候选结果锁死。 |
| 元素提取 | POST /elements/{element_id}/cutout | cutoutElement | 调用图像模型生成独立白底素材图,每次累积一张 cutout。 |
+
| 场景资产 | POST /frames/{idx}/scene-asset | generateSceneAsset | 为每张已选关键帧生成一张去水印、高清增强的场景图,保留历史版本用于人工审核。 |
+
| 主体资产包 | POST /elements/{element_id}/subject-assets | generateSubjectAssets | 根据用户选择的视图、动作和表情生成主体资产包;当多个关键帧都指向同一主体时,前端把已选关键帧作为 source_frame_indices 传入,后端拼参考板。 |
| 分镜保存 | PUT /frames/{idx}/storyboard | updateStoryboard | 保存 4 图槽、时长和改造说明。 |
| 生图 | POST /frames/{idx}/generate | generateImage | 基于关键帧或已选生成图做 image-to-image,目前可用。 |
@@ -729,9 +751,9 @@ api/main.py
| 画面工作台 Visual Lab |
- 在一个画布卡片里展示关键帧、元素 cutout 和视频任务;点击缩略图进入对应处理面板。 |
+ 在一个画布卡片里展示关键帧、元素 cutout、场景图、主体资产包和视频任务状态;点击缩略图进入对应处理面板。关键帧详情面板负责生成场景资产和主体多视角/动作/表情资产。 |
不要在主卡片里堆复杂表单;主卡片只做状态总览和入口。 |
- VisualLabNode、FrameLightbox、StoryboardWorkbench、视频任务接口 |
+ VisualLabNode、FrameLightbox、generateSceneAsset、generateSubjectAssets、视频任务接口 |
| 分镜工作台 |
diff --git a/web/components/lightbox.tsx b/web/components/lightbox.tsx
index 0914354..7d00f36 100644
--- a/web/components/lightbox.tsx
+++ b/web/components/lightbox.tsx
@@ -3,10 +3,10 @@ import { useEffect, useRef, useState } from "react"
import { createPortal } from "react-dom"
import { X, ChevronLeft, ChevronRight, Check, Sparkles, Wand2, Loader2, Eye, RefreshCw, Plus, Sparkle, Crop, Copy, PencilLine, Trash2, Save } from "lucide-react"
import {
- frameUrl, cleanedFrameUrl, cutoutUrl,
+ frameUrl, cleanedFrameUrl, cutoutUrl, apiAssetUrl,
describeFrame, cleanupFrame, applyCleanedFrame, discardCleanedFrame, addElement, updateElement, deleteElement, cutoutElement, deleteCutout,
- pushStoryboardImage,
- type KeyFrame, type Job, type ImageRef,
+ pushStoryboardImage, generateSceneAsset, generateSubjectAssets,
+ type AssetBackground, type AssetSize, type KeyFrame, type Job, type ImageRef, type SubjectKind,
} from "@/lib/api"
import { toast } from "sonner"
@@ -24,13 +24,42 @@ interface Props {
embedded?: boolean
}
+const OBJECT_VIEW_OPTIONS = [
+ ["front", "正面"],
+ ["back", "背面"],
+ ["left", "左侧"],
+ ["right", "右侧"],
+ ["top", "顶部"],
+ ["bottom", "底部"],
+]
+
+const LIVING_VIEW_OPTIONS = [
+ ["front", "正面"],
+ ["back", "背面"],
+ ["side", "侧面"],
+ ["side_walk", "走路"],
+ ["expression_happy", "喜"],
+ ["expression_angry", "怒"],
+ ["expression_sad", "哀"],
+ ["expression_relaxed", "乐/放松"],
+ ["action_sit", "坐"],
+ ["action_hold", "手持"],
+ ["action_use", "使用"],
+]
+
export function FrameLightbox({ jobId, frames, activeIndex, selected, onClose, onChange, onToggleSelect, onJobUpdate, onSwitchPanel, onCopyImage, embedded = false }: Props) {
const [describing, setDescribing] = useState(false)
const [cleaning, setCleaning] = useState(false)
const [applying, setApplying] = useState(false)
const [cuttingId, setCuttingId] = useState(null)
+ const [sceneGenerating, setSceneGenerating] = useState(false)
+ const [subjectGenerating, setSubjectGenerating] = useState(null)
const [addingZh, setAddingZh] = useState(false)
const [addInput, setAddInput] = useState("")
+ const [assetSize, setAssetSize] = useState("source")
+ const [subjectKinds, setSubjectKinds] = useState>({})
+ const [subjectBackgrounds, setSubjectBackgrounds] = useState>({})
+ const [subjectViews, setSubjectViews] = useState>({})
const [editingElement, setEditingElement] = useState<{
id: string
name_zh: string
@@ -87,6 +116,9 @@ export function FrameLightbox({ jobId, frames, activeIndex, selected, onClose, o
const desc = f.description
const elements = f.elements ?? []
const hasCleaned = !!f.cleaned_url
+ const latestSceneAsset = f.scene_assets?.[f.scene_assets.length - 1] ?? null
+ const selectedFrameIndices = Array.from(selected).sort((a, b) => a - b)
+ const sharedSubjectFrameIndices = selectedFrameIndices.length > 1 ? selectedFrameIndices : [f.index]
const handleDescribe = async () => {
setDescribing(true)
@@ -116,6 +148,50 @@ export function FrameLightbox({ jobId, frames, activeIndex, selected, onClose, o
}
}
+ const handleGenerateSceneAsset = async () => {
+ setSceneGenerating(true)
+ try {
+ const updated = await generateSceneAsset(jobId, f.index, { size: assetSize })
+ onJobUpdate?.(updated)
+ toast.success(`分镜 ${f.index + 1} 场景图已生成`)
+ } catch (e) {
+ toast.error("场景图生成失败:" + (e instanceof Error ? e.message : String(e)))
+ } finally {
+ setSceneGenerating(false)
+ }
+ }
+
+ const handleGenerateSubjectPackage = async (elementId: string) => {
+ const kind = subjectKinds[elementId] ?? "object"
+ const defaultViews = (kind === "living" ? LIVING_VIEW_OPTIONS : OBJECT_VIEW_OPTIONS).map(([value]) => value)
+ const views = subjectViews[elementId]?.length ? subjectViews[elementId] : defaultViews
+ setSubjectGenerating(elementId)
+ try {
+ const updated = await generateSubjectAssets(jobId, f.index, elementId, {
+ subject_kind: kind,
+ background: subjectBackgrounds[elementId] ?? "white",
+ size: assetSize,
+ source_frame_indices: sharedSubjectFrameIndices,
+ views,
+ })
+ onJobUpdate?.(updated)
+ toast.success(`主体资产包已生成 · ${views.length} 张`)
+ } catch (e) {
+ toast.error("主体资产包生成失败:" + (e instanceof Error ? e.message : String(e)))
+ } finally {
+ setSubjectGenerating(null)
+ }
+ }
+
+ const toggleSubjectView = (elementId: string, view: string, kind: SubjectKind) => {
+ const defaults = (kind === "living" ? LIVING_VIEW_OPTIONS : OBJECT_VIEW_OPTIONS).map(([value]) => value)
+ setSubjectViews((prev) => {
+ const current = prev[elementId] ?? defaults
+ const next = current.includes(view) ? current.filter((x) => x !== view) : [...current, view]
+ return { ...prev, [elementId]: next }
+ })
+ }
+
const handleExtractRegion = async () => {
// 提取语义只在恰好 1 个框时支持
if (regions.length !== 1 || !extractName.trim()) return
@@ -548,6 +624,56 @@ export function FrameLightbox({ jobId, frames, activeIndex, selected, onClose, o
{cleaning ? "清洗中…(5-15 秒)" : hasCleaned ? "重新清洗" : f.cleaned_applied ? "再次清洗" : "🧹 清洗水印"}
+
+
+
场景图
+
+
+ {latestSceneAsset ? (
+
+
})
+
+ {latestSceneAsset.width}×{latestSceneAsset.height}
+ {onCopyImage && (
+
+ )}
+
+
+ ) : null}
+ {latestSceneAsset?.quality_report?.warnings?.length ? (
+
+ {latestSceneAsset.quality_report.warnings[0]}
+
+ ) : null}
+
+
+