auto-save 2026-05-14 06:33 (~5)

2026-05-14 06:33:37 +08:00
parent 6480d69c63
commit 0d86b4cff2
5 changed files with 271 additions and 96 deletions
--- a/.memory/worklog.json
+++ b/.memory/worklog.json
@@ -1,26 +1,5 @@
 {
  "entries": [
-    {
-      "files_changed": 1,
-      "hash": "f3ec026",
-      "message": "auto-save 2026-05-12 17:34 (~1)",
-      "ts": "2026-05-12T17:34:26+08:00",
-      "type": "commit"
-    },
-    {
-      "files_changed": 1,
-      "hash": "0a2cfe2",
-      "message": "auto-save 2026-05-12 17:39 (~1)",
-      "ts": "2026-05-12T17:39:59+08:00",
-      "type": "commit"
-    },
-    {
-      "files_changed": 1,
-      "hash": "92bd66f",
-      "message": "auto-save 2026-05-12 17:45 (~1)",
-      "ts": "2026-05-12T17:45:30+08:00",
-      "type": "commit"
-    },
    {
      "files_changed": 1,
      "hash": "440164e",
@@ -3350,6 +3329,25 @@
      "type": "session-heartbeat",
      "message": "Claude 会话活跃 · 最近命令：claude · 1 项未提交变更 · 最近提交：auto-save 2026-05-14 06:22 (~2)",
      "files_changed": 1
+    },
+    {
+      "ts": "2026-05-14T06:28:04+08:00",
+      "type": "commit",
+      "message": "auto-save 2026-05-14 06:27 (~4)",
+      "hash": "6480d69",
+      "files_changed": 4
+    },
+    {
+      "ts": "2026-05-13T22:28:51Z",
+      "type": "session-heartbeat",
+      "message": "Codex 会话活跃 · 最近命令：codex · 2 项未提交变更 · 最近提交：auto-save 2026-05-14 06:27 (~4)",
+      "files_changed": 2
+    },
+    {
+      "ts": "2026-05-13T22:33:14Z",
+      "type": "session-heartbeat",
+      "message": "Claude 会话活跃 · 最近命令：claude · 5 项未提交变更 · 最近提交：auto-save 2026-05-14 06:27 (~4)",
+      "files_changed": 5
    }
  ]
 }
--- a/api/main.py
+++ b/api/main.py
@@ -1957,6 +1957,8 @@ class GenerateSceneAssetReq(BaseModel):
    size: AssetSize = "source"
    scene_mode: SceneMode = "remove_subject"
    scene_style: SceneStyle = "source"
+    prompt: str = ""
+    source_frame_indices: list[int] | None = None


 class GenerateSubjectAssetsReq(BaseModel):
@@ -2096,6 +2098,18 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
    if not src.exists():
        raise HTTPException(404, "source frame file missing")

+    source_indices = [int(x) for x in (req.source_frame_indices or [idx]) if isinstance(x, int) or str(x).isdigit()]
+    if not source_indices:
+        source_indices = [idx]
+    source_indices = list(dict.fromkeys(source_indices))[:8]
+    model_src = src
+    sheet_tmp: Path | None = None
+    if len(source_indices) > 1:
+        sheet_tmp = job_dir(job_id) / "tmp" / f"scene_refs_{idx:03d}_{uuid.uuid4().hex[:6]}.jpg"
+        sheet = _make_reference_contact_sheet(job_id, source_indices, sheet_tmp)
+        if sheet:
+            model_src = sheet
+
    confirmed_subjects = [
        (e.name_en or e.name_zh).strip()
        for ref_frame in job.frames
@@ -2136,10 +2150,23 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
        "warm_lifestyle": "Use a warm lifestyle style: realistic lived-in details, soft natural light, approachable atmosphere.",
        "cinematic": "Use a cinematic style: dramatic but natural lighting, richer depth, filmic contrast, not fantasy.",
    }[req.scene_style]
+    user_prompt = req.prompt.strip()
+    user_prompt_clause = (
+        "User scene direction: " + user_prompt[:1200] + " "
+        if user_prompt
+        else ""
+    )
+    reference_clause = (
+        f"Use the selected reference frame contact sheet as visual evidence for location, composition, lighting, materials, and atmosphere. Reference frame indices: {', '.join(str(i + 1) for i in source_indices)}. "
+        if len(source_indices) > 1
+        else "Use the provided frame as the primary visual reference. "
+    )
    prompt = (
        "Create one clean high-definition scene/background reference image from this frame. "
        + subject_clause
        + "Do not include the removed subject, duplicate people, animals, products, text, watermark, platform UI, captions, usernames, hashtags, logos, or overlay graphics. "
+        + reference_clause
+        + user_prompt_clause
        + mode_clause + " "
        + style_clause + " "
        + "Enhance clarity and texture while avoiding over-smoothing, warped geometry, or changing important perspective details. "
@@ -2147,9 +2174,13 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
    )
    models = [IMAGE_MODEL, "gemini-3.1-flash-image-preview", "gemini-2.5-flash-image"]
    try:
-        img_bytes, _mode = _image_edit_call(src, prompt, models=models, fallback_text=False, max_attempts=3, max_side=1280)
+        img_bytes, _mode = _image_edit_call(model_src, prompt, models=models, fallback_text=False, max_attempts=3, max_side=1280)
    except RuntimeError as e:
        raise HTTPException(500, f"scene asset failed: {e}")
+    finally:
+        if sheet_tmp and sheet_tmp.exists():
+            try: sheet_tmp.unlink()
+            except OSError: pass

    asset_id = f"scene_{idx:03d}_{uuid.uuid4().hex[:8]}"
    out_path = job_dir(job_id) / "assets" / f"{asset_id}.jpg"
@@ -2306,23 +2337,27 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
    generated: list[SubjectAsset] = []
    try:
        for view, view_label in _subject_view_labels(req.subject_kind, req.views):
-            if view == "side_walk":
-                view_prompt = "side view in a natural walking pose, same identity and proportions"
-            elif view.startswith("expression_"):
-                emotion = view_label.replace("表情", "")
-                view_prompt = f"clear {emotion} facial expression reference, frontal or three-quarter standing pose, preserving the same identity"
-            elif view.startswith("action_"):
-                view_prompt = f"{view_label} reference pose, same identity and proportions"
+            if req.subject_kind == "living":
+                if view.startswith("expression_"):
+                    emotion = view_label.replace("表情", "")
+                    view_prompt = f"full-body upright standing character reference with a clear {emotion} facial expression"
+                elif view.startswith("action_") or view == "side_walk":
+                    view_prompt = f"full-body upright standing character reference, {view_label}, same identity and proportions"
+                else:
+                    view_prompt = f"full-body upright standing character reference, {view_label}"
            else:
-                view_prompt = f"{view_label} view"
+                view_prompt = f"complete object/product reference, {view_label} view"
            prompt = (
-                f"Use the reference image(s) to generate a single {view_prompt} of the same {target}. "
+                f"Use the reference image(s) only as visual evidence to redraw the same {target}; do not crop, cut out, paste, or extract pixels from the source. "
+                f"Generate one newly rendered {view_prompt} of the same subject. "
                f"The subject is a {kind_phrase}. If multiple frames are shown, treat them as evidence of one same subject, not multiple subjects. "
                "Preserve identity, proportions, silhouette, material, colors, styling, and distinctive details across all generated views. "
-                f"Create a high-definition standalone asset on a {bg_phrase} background. "
-                "No extra objects, no original scene fragments, no text, no watermark, no UI. "
-                "If the source is incomplete or occluded, intelligently complete missing parts while staying consistent with the reference. "
-                "For living subjects, keep the body standing and readable; do not create medical, horror, or distorted anatomy."
+                "The subject must be complete, centered, full body or full object, head-to-feet visible when applicable, not cropped by the canvas. "
+                "Make the subject large and readable: it should occupy about 85-95% of the image height with only small margins. "
+                f"Create a high-definition standalone asset on a solid {bg_phrase} background. "
+                "No extra objects, no props, no additional products, no background elements, no original scene fragments, no shadows from the original scene, no text, no watermark, no UI. "
+                "If the source is incomplete, partially visible, occluded, or low resolution, reconstruct the missing parts by redrawing a clean complete subject while staying consistent with the reference. "
+                "For living subjects, keep a normal upright standing pose for the standard views; do not create sitting, walking, medical, horror, or distorted anatomy unless explicitly requested by the view label."
            )
            try:
                img_bytes, _mode = _image_edit_call(model_src, prompt, models=models, fallback_text=False, max_attempts=3, max_side=1280)
@@ -2331,7 +2366,7 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat

            asset_id = f"subject_{idx:03d}_{element_id}_{view}_{uuid.uuid4().hex[:8]}"
            out_path = job_dir(job_id) / "assets" / f"{asset_id}.jpg"
-            width, height = _normalize_asset_image(img_bytes, out_path, _source_frame_path(job_id, idx), req.size, req.background, square=False)
+            width, height = _normalize_asset_image(img_bytes, out_path, _source_frame_path(job_id, idx), req.size, req.background, square=False, fill_subject=True)
            generated.append(SubjectAsset(
                id=asset_id,
                view=view,
--- a/docs/source-analysis.html
+++ b/docs/source-analysis.html
@@ -555,7 +555,7 @@
          <div class="step"><div class="num">2</div><h3>镜头拆解</h3><p>拆轨、抽关键帧、手动加帧，形成参考分镜池。</p></div>
          <div class="step"><div class="num">3</div><h3>清洗水印</h3><p>对关键帧做全图或区域清洗，必要时应用为当前参考图。</p></div>
          <div class="step"><div class="num">4</div><h3>主体识别</h3><p>识别场景和主体候选，只是候选，不应锁死。</p></div>
-          <div class="step"><div class="num">5</div><h3>素材准备</h3><p>清洗关键帧，把多张关键帧作为同一主体的参考，先生成一个统一主体资产包，再按关键帧生成多个去主体、相似或换风格场景图。</p></div>
+          <div class="step"><div class="num">5</div><h3>素材准备</h3><p>清洗关键帧，把多张关键帧作为同一主体的参考，先重绘六张标准站立主体资产图，再按关键帧生成多个去主体、相似或换风格场景图。</p></div>
          <div class="step"><div class="num">6</div><h3>分镜改造</h3><p>把参考主体、场景、动作和 SKG 产品放入分镜结构。</p></div>
          <div class="step"><div class="num">7</div><h3>生成视频</h3><p>用分镜 4 图槽、改造目标和时长调用 Seedance / Kling / Veo 3 生视频 API，结果回写到画面工作台节点。</p></div>
          <div class="step"><div class="num">8</div><h3>合成成品</h3><p>片段、字幕、配音、转场合成最终 mp4。当前未实现。</p></div>
@@ -571,7 +571,7 @@
              <tbody>
                <tr><td><code>web/app/page.tsx</code></td><td>产品工作台主状态：jobs、activeJobId、selectedFrames、clipboard、ReactFlow 节点和边；负责打开/找回画布工作面板。</td></tr>
                <tr><td><code>web/components/nodes/index.tsx</code></td><td>DAG 节点定义：Input、VisualLab、Audio、Compose，以及画布工作面板 KeyframePanel / VideoFramePanel；旧 Keyframe/Storyboard/VideoGen 组件保留但不再挂主画布。</td></tr>
-                <tr><td><code>web/components/lightbox.tsx</code></td><td>关键帧素材准备面板：清洗、统一主体候选、统一主体资产包、每帧去主体场景图和审核。</td></tr>
+                <tr><td><code>web/components/lightbox.tsx</code></td><td>关键帧素材准备面板：清洗、统一主体候选、参考帧网格、六张主体重绘图、每帧去主体场景图和审核。</td></tr>
                <tr><td><code>web/components/storyboard-bar.tsx</code></td><td>顶部分镜编排条：展示选入编排的关键帧，并作为唯一分镜导航。</td></tr>
                <tr><td><code>web/components/storyboard-workbench.tsx</code></td><td>顶部分镜编排条下方的明细区：4 图槽、改造目标、时长、自动保存。</td></tr>
                <tr><td><code>web/lib/api.ts</code></td><td>前端类型和 API client，是前后端数据契约镜像。</td></tr>
@@ -623,7 +623,7 @@ api/main.py
          </div>
          <div class="flow-row">
            <div><strong>你看到的区域</strong><span>关键帧素材审核面板</span></div>
-            <div><strong>主要源码</strong><span><code>FrameLightbox</code>；按“原图/清洗、主体资产、场景图、审核”四个页签组织；左侧只放主图/框选画布，右侧承载当前页操作、状态和结果；主体资产页只确认一个统一主体，默认用全部关键帧或已选关键帧作为参考；场景图依赖主体资产，按当前关键帧生成去主体原场景、相似新场景或同构换风格。相关接口包括 <code>cleanupFrame</code>、<code>addElement</code>、<code>generateSubjectAssets</code>、<code>generateSceneAsset</code>。</span></div>
+            <div><strong>主要源码</strong><span><code>FrameLightbox</code>；按“原图/清洗、主体资产、场景图、审核”四个页签组织；左侧只放主图/框选画布，但主体资产页左侧改为全部参考帧网格；右侧承载当前页操作、状态和结果；主体资产页只确认一个统一主体，默认用全部关键帧或已选关键帧作为参考，后端按参考重绘六张纯背景、占满画面的标准站立主体图；场景图依赖主体资产，按当前关键帧生成去主体原场景、相似新场景或同构换风格。相关接口包括 <code>cleanupFrame</code>、<code>addElement</code>、<code>generateSubjectAssets</code>、<code>generateSceneAsset</code>。</span></div>
            <div><strong>适合怎么描述</strong><span>“这一组关键帧如何共同生成一个统一主体包；某张关键帧的水印、去主体场景图和质量风险应该如何审核”。</span></div>
          </div>
          <div class="flow-row">
@@ -730,7 +730,7 @@ SubjectAsset {
            <tr><td>应用清洗</td><td><code>POST /cleanup/apply</code></td><td><code>applyCleanedFrame</code></td><td>物理覆盖 frames/{idx}.jpg，并备份原图。</td></tr>
            <tr><td>元素增改删</td><td><code>POST/PATCH/DELETE /elements</code></td><td><code>addElement/updateElement/deleteElement</code></td><td>让用户修正 Vision 错误，避免候选结果锁死。</td></tr>
            <tr><td>元素提取</td><td><code>POST /elements/{element_id}/cutout</code></td><td><code>cutoutElement</code></td><td>调用图像模型生成独立白底素材图，每次累积一张 cutout。</td></tr>
-            <tr><td>主体资产包</td><td><code>POST /elements/{element_id}/subject-assets</code></td><td><code>generateSubjectAssets</code></td><td>根据用户选择的视图、动作和表情生成一个统一主体资产包；前端默认把全部关键帧作为 <code>source_frame_indices</code>，如果用户手动选择了关键帧则只传已选帧，后端拼参考板。</td></tr>
+            <tr><td>主体资产包</td><td><code>POST /elements/{element_id}/subject-assets</code></td><td><code>generateSubjectAssets</code></td><td>根据参考帧重新绘制一个统一主体资产包；前端默认把全部关键帧作为 <code>source_frame_indices</code>，如果用户手动选择了关键帧则只传已选帧，后端拼参考板。默认输出六张标准站立/转身参考图，纯白/黑背景，不含其他元素，并裁去空白让主体占满画面。</td></tr>
            <tr><td>场景资产</td><td><code>POST /frames/{idx}/scene-asset</code></td><td><code>generateSceneAsset</code></td><td>在统一主体资产之后，按当前关键帧生成去主体背景板；请求包含 <code>scene_mode</code> 和 <code>scene_style</code>，可做原场景补背景、相似新场景或同构换风格，保留历史版本用于人工审核。</td></tr>
            <tr><td>分镜保存</td><td><code>PUT /frames/{idx}/storyboard</code></td><td><code>updateStoryboard</code></td><td>保存 4 图槽、时长和改造说明。</td></tr>
            <tr><td>生图</td><td><code>POST /frames/{idx}/generate</code></td><td><code>generateImage</code></td><td>基于关键帧或已选生成图做 image-to-image，目前可用。</td></tr>
@@ -841,6 +841,19 @@ SubjectAsset {
        <h2>变更记录</h2>
        <p>这个记录不是 git log 的替代品。它记录“产品理解发生了什么变化、影响了哪些源码、你以后描述需求时该怎么说”。后续每次改功能都要补一条。</p>
        <div class="changelog">
+          <article class="change">
+            <header>
+              <h3>2026-05-14 · 主体资产改为参考重绘六张标准图</h3>
+              <span class="tag violet">FrameLightbox</span>
+              <span class="tag blue">Assets</span>
+            </header>
+            <div class="body">
+              <p><strong>问题：</strong>主体资产不是抠图，也不是只看当前单帧生成多角度；主体页需要看到全部参考帧，并用这些参考重新绘制一个完整主体。</p>
+              <p><strong>改动：</strong><code>FrameLightbox</code> 在“主体资产”页左侧显示全部参考帧网格，小图排列，可点击切换当前帧；右侧仍负责统一主体确认和生成。人物/生物默认视图改为六张标准站立/转身图：正面、背面、左侧、右侧、左前 45°、右前 45°。</p>
+              <p><strong>后端：</strong><code>generateSubjectAssets</code> prompt 改为“参考重绘”，明确禁止裁剪/抠图/粘贴源像素，要求主体完整居中、纯白/黑背景、无其他元素，并占画面约 85-95% 高度；落盘时会裁掉纯背景空白并放大主体。</p>
+              <p><strong>影响：</strong><code>web/components/lightbox.tsx</code>、<code>web/components/nodes/index.tsx</code>、<code>api/main.py</code>、<code>docs/source-analysis.html</code>。</p>
+            </div>
+          </article>
          <article class="change">
            <header>
              <h3>2026-05-14 · 主体资产改为统一主体参考帧生成</h3>
--- a/web/components/lightbox.tsx
+++ b/web/components/lightbox.tsx
@@ -65,6 +65,25 @@ const SCENE_STYLE_OPTIONS: Array<[SceneStyle, string]> = [
  ["cinematic", "电影感"],
 ]

+const SCENE_LOCATION_OPTIONS = [
+  ["modern living room", "现代客厅"],
+  ["minimal studio", "极简影棚"],
+  ["premium bathroom", "高端浴室"],
+  ["bedroom nightstand", "卧室床头"],
+  ["office desk", "办公桌面"],
+  ["retail display", "零售陈列"],
+  ["outdoor patio", "户外露台"],
+]
+
+const SCENE_REFERENCE_OPTIONS = [
+  ["camera angle and composition", "构图/机位"],
+  ["lighting direction", "光线方向"],
+  ["material textures", "材质纹理"],
+  ["color palette", "色彩氛围"],
+  ["spatial layout", "空间层次"],
+  ["social media realism", "真实生活感"],
+]
+
 export function FrameLightbox({ jobId, frames, activeIndex, selected, onClose, onChange, onToggleSelect, onJobUpdate, onSwitchPanel, onCopyImage, embedded = false }: Props) {
  const [describing, setDescribing] = useState(false)
  const [cleaningFrameIds, setCleaningFrameIds] = useState<Set<number>>(new Set())
@@ -76,6 +95,10 @@ export function FrameLightbox({ jobId, frames, activeIndex, selected, onClose, o
  const [assetSize, setAssetSize] = useState<AssetSize>("source")
  const [sceneMode, setSceneMode] = useState<SceneMode>("remove_subject")
  const [sceneStyle, setSceneStyle] = useState<SceneStyle>("source")
+  const [sceneLocation, setSceneLocation] = useState("modern living room")
+  const [sceneReferenceKeys, setSceneReferenceKeys] = useState<string[]>(["camera angle and composition", "lighting direction", "spatial layout"])
+  const [sceneExtraKeywords, setSceneExtraKeywords] = useState("")
+  const [scenePrompt, setScenePrompt] = useState("")
  const [subjectKinds, setSubjectKinds] = useState<Record<string, SubjectKind>>({})
  const [subjectBackgrounds, setSubjectBackgrounds] = useState<Record<string, AssetBackground>>({})
  const [subjectViews, setSubjectViews] = useState<Record<string, string[]>>({})
@@ -134,11 +157,23 @@ export function FrameLightbox({ jobId, frames, activeIndex, selected, onClose, o
  const cleanedFrameCount = frames.filter((frame) => frame.cleaned_applied || frame.cleaned_url).length
  const pendingCleanFrames = frames.filter((frame) => !frame.cleaned_applied && !frame.cleaned_url)
  const selectedFrameIndices = Array.from(selected).sort((a, b) => a - b)
-  const subjectReferenceFrameIndices = (selectedFrameIndices.length > 0 ? selectedFrameIndices : frames.map((frame) => frame.index))
+  const cleanedFrameIndices = frames
+    .filter((frame) => frame.cleaned_applied || frame.cleaned_url)
+    .map((frame) => frame.index)
+  const subjectReferenceFrameIndices = (
+    cleanedFrameIndices.length > 0 || selectedFrameIndices.length > 0
+      ? [...cleanedFrameIndices, ...selectedFrameIndices]
+      : frames.map((frame) => frame.index)
+  )
    .filter((idx, pos, arr) => arr.indexOf(idx) === pos)
+  const subjectReferenceFrames = subjectReferenceFrameIndices
+    .map((idx) => frames.find((frame) => frame.index === idx))
+    .filter((frame): frame is KeyFrame => Boolean(frame))
  const subjectReferenceLabel = selectedFrameIndices.length > 0
-    ? `${subjectReferenceFrameIndices.length} 已选帧参考`
-    : `${subjectReferenceFrameIndices.length} 全部帧参考`
+    ? `${subjectReferenceFrameIndices.length} 清洗/已选帧参考`
+    : cleanedFrameIndices.length > 0
+      ? `${subjectReferenceFrameIndices.length} 已清洗帧参考`
+      : `${subjectReferenceFrameIndices.length} 全部帧参考`
  const subjectElementRefs = frames.flatMap((frame) =>
    (frame.elements ?? []).map((element) => ({
      frameIndex: frame.index,
@@ -162,7 +197,29 @@ export function FrameLightbox({ jobId, frames, activeIndex, selected, onClose, o
    ...(latestSceneAsset?.quality_report?.warnings ?? []),
  ]
  const isSubjectTab = activeTab === "subject"
+  const isSceneTab = activeTab === "scene"
  const isCleanTab = activeTab === "clean"
+  const sceneReferenceFrameIndices = (selectedFrameIndices.length > 0 ? selectedFrameIndices : [f.index])
+    .filter((idx, pos, arr) => arr.indexOf(idx) === pos)
+  const sceneReferenceFrames = sceneReferenceFrameIndices
+    .map((idx) => frames.find((frame) => frame.index === idx))
+    .filter((frame): frame is KeyFrame => Boolean(frame))
+  const unifiedSubjectName = subjectElementRefs[0]?.element.name_zh || "统一主体"
+  const sceneLocationLabel = SCENE_LOCATION_OPTIONS.find(([value]) => value === sceneLocation)?.[1] ?? sceneLocation
+  const sceneStyleLabel = SCENE_STYLE_OPTIONS.find(([value]) => value === sceneStyle)?.[1] ?? sceneStyle
+  const sceneModeLabel = SCENE_MODE_OPTIONS.find(([value]) => value === sceneMode)?.[1] ?? sceneMode
+  const sceneReferenceLabels = sceneReferenceKeys
+    .map((key) => SCENE_REFERENCE_OPTIONS.find(([value]) => value === key)?.[1] ?? key)
+  const scenePromptDraft = [
+    `主体：移除 ${unifiedSubjectName} 后生成空场景。`,
+    `地点：${sceneLocationLabel}。`,
+    `生成方式：${sceneModeLabel}。`,
+    `风格：${sceneStyleLabel}。`,
+    `参考帧：${sceneReferenceFrames.map((frame) => `分镜${frame.index + 1}`).join("、") || `分镜${f.index + 1}`}。`,
+    sceneReferenceLabels.length > 0 ? `保留参考：${sceneReferenceLabels.join("、")}。` : "",
+    sceneExtraKeywords.trim() ? `额外关键词：${sceneExtraKeywords.trim()}。` : "",
+    "要求：无主体、无人物动物产品、无文字水印，保持可用于后续视频生成的干净背景板。",
+  ].filter(Boolean).join("\n")

  const handleDescribe = async () => {
    setDescribing(true)
@@ -237,7 +294,13 @@ export function FrameLightbox({ jobId, frames, activeIndex, selected, onClose, o
    }
    setSceneGenerating(true)
    try {
-      const updated = await generateSceneAsset(jobId, f.index, { size: assetSize, scene_mode: sceneMode, scene_style: sceneStyle })
+      const updated = await generateSceneAsset(jobId, f.index, {
+        size: assetSize,
+        scene_mode: sceneMode,
+        scene_style: sceneStyle,
+        prompt: scenePrompt.trim() || scenePromptDraft,
+        source_frame_indices: sceneReferenceFrameIndices,
+      })
      onJobUpdate?.(updated)
      toast.success(`分镜 ${f.index + 1} 场景图已生成`)
    } catch (e) {
@@ -388,6 +451,13 @@ export function FrameLightbox({ jobId, frames, activeIndex, selected, onClose, o
  })()
  // bust cache：替换后 frames/{idx}.jpg 内容已变，要刷新
  const mainSrc = `${frameUrl(jobId, f.index)}${f.cleaned_applied ? "?applied=1" : ""}`
+  const referenceFrameSrc = (frame: KeyFrame) => {
+    if (frame.cleaned_url) {
+      const ts = frame.cleaned_url.match(/t=(\d+)/)?.[1]
+      return cleanedFrameUrl(jobId, frame.index, ts)
+    }
+    return `${frameUrl(jobId, frame.index)}${frame.cleaned_applied ? "?applied=1" : ""}`
+  }

  const content = (
    <div
@@ -484,61 +554,112 @@ export function FrameLightbox({ jobId, frames, activeIndex, selected, onClose, o
              ? { flex: "1 1 500px", minWidth: 300, maxWidth: 600, minHeight: 0 }
              : { flex: "1 1 560px", minWidth: 300, maxWidth: 680, minHeight: 0 }}
        >
-          {/* 上方：主图 + 画框 overlay */}
-          <div
-            ref={imgWrapRef}
-            className={`relative ${cropMode ? "cursor-crosshair select-none" : ""}`}
-            onMouseDown={onCropMouseDown}
-            onMouseMove={onCropMouseMove}
-            onMouseUp={onCropMouseUp}
-            onMouseLeave={onCropMouseUp}
-          >
-            <img
-              src={mainSrc}
-              alt={`frame ${f.index}`}
-              className="rounded-lg object-contain w-full pointer-events-none"
-              style={{
-                maxHeight: isSubjectTab ? "64vh" : "68vh",
-              }}
-              draggable={false}
-            />
-            <div className="absolute top-2 left-2 text-[9.5px] px-1.5 py-0.5 rounded backdrop-blur bg-black/50 text-white/80 pointer-events-none">
-              {f.cleaned_applied ? "✨ 已替换为清洗版" : "原图"}
-            </div>
-
-            {/* 已确认的多个选区 */}
-            {cropMode && regions.map((r, i) => (
-              <div key={i} className="absolute pointer-events-none border-2 border-cyan-300/90 bg-cyan-300/10"
-                style={{
-                  left: `${r.x * 100}%`,
-                  top: `${r.y * 100}%`,
-                  width: `${r.w * 100}%`,
-                  height: `${r.h * 100}%`,
-                }}
+          {isSubjectTab ? (
+            <section className="rounded-lg border border-violet-300/15 bg-violet-500/[0.06] p-2.5">
+              <div className="mb-2 flex items-center justify-between gap-2">
+                <div className="text-[12px] font-semibold text-white">主体参考帧</div>
+                <span className="text-[9.5px] font-mono text-white/38">{subjectReferenceLabel}</span>
+              </div>
+              <div
+                className="grid gap-2"
+                style={{ gridTemplateColumns: "repeat(auto-fill, minmax(104px, 1fr))" }}
              >
-                <span className="absolute -top-4 left-0 text-[9px] px-1 py-0 rounded-sm bg-cyan-300 text-black font-bold leading-tight">#{i + 1}</span>
+                {subjectReferenceFrames.map((frame) => {
+                  const active = frame.index === f.index
+                  return (
+                    <button
+                      key={frame.index}
+                      type="button"
+                      onClick={() => onChange(frame.index)}
+                      className={`group overflow-hidden rounded-md border bg-black/35 text-left transition ${
+                        active
+                          ? "border-violet-300/70 shadow-[0_0_0_1px_rgba(196,181,253,0.25)]"
+                          : "border-white/10 hover:border-violet-300/45"
+                      }`}
+                      title={`切换到分镜 ${frame.index + 1}`}
+                    >
+                      <div className="relative aspect-[9/13] bg-black">
+                        <img
+                          src={referenceFrameSrc(frame)}
+                          alt={`subject reference ${frame.index}`}
+                          className="h-full w-full object-contain"
+                          draggable={false}
+                        />
+                        <span className="absolute left-1 top-1 rounded bg-black/65 px-1 py-0.5 text-[8.5px] font-mono text-white/80">
+                          {String(frame.index + 1).padStart(2, "0")}
+                        </span>
+                        {frame.cleaned_url || frame.cleaned_applied ? (
+                          <span className="absolute right-1 top-1 rounded bg-emerald-500/80 px-1 py-0.5 text-[8px] text-white">
+                            净
+                          </span>
+                        ) : null}
+                      </div>
+                      <div className="flex items-center justify-between gap-1 px-1.5 py-1 text-[9.5px] text-white/52">
+                        <span>{frame.timestamp.toFixed(2)}s</span>
+                        {active && <span className="text-violet-200">当前</span>}
+                      </div>
+                    </button>
+                  )
+                })}
              </div>
-            ))}
-
-            {/* 当前正在拖的草稿框 */}
-            {cropMode && draftRegion && draftRegion.w > 0 && draftRegion.h > 0 && (
-              <div className="absolute pointer-events-none border-2 border-cyan-300 border-dashed shadow-[0_0_0_1px_rgba(0,0,0,0.4)]"
-                style={{
-                  left: `${draftRegion.x * 100}%`,
-                  top: `${draftRegion.y * 100}%`,
-                  width: `${draftRegion.w * 100}%`,
-                  height: `${draftRegion.h * 100}%`,
-                }}
+              <div className="mt-2 text-[10px] leading-relaxed text-white/38">
+                这些参考帧会一起传给模型，用来重绘同一个主体；不是逐张抠图。
+              </div>
+            </section>
+          ) : (
+            <div
+              ref={imgWrapRef}
+              className={`relative ${cropMode ? "cursor-crosshair select-none" : ""}`}
+              onMouseDown={onCropMouseDown}
+              onMouseMove={onCropMouseMove}
+              onMouseUp={onCropMouseUp}
+              onMouseLeave={onCropMouseUp}
+            >
+              <img
+                src={mainSrc}
+                alt={`frame ${f.index}`}
+                className="rounded-lg object-contain w-full pointer-events-none"
+                style={{ maxHeight: "68vh" }}
+                draggable={false}
              />
-            )}
-
-            {/* 画框模式角标（小，左上） — 不再遮挡画面 */}
-            {cropMode && (
-              <div className="absolute top-2 right-2 text-[9.5px] px-1.5 py-0.5 rounded backdrop-blur bg-cyan-500/85 text-white pointer-events-none font-medium">
-                画框 · 已选 {regions.length}
+              <div className="absolute top-2 left-2 text-[9.5px] px-1.5 py-0.5 rounded backdrop-blur bg-black/50 text-white/80 pointer-events-none">
+                {f.cleaned_applied ? "✨ 已替换为清洗版" : "原图"}
              </div>
-            )}
-          </div>
+
+              {/* 已确认的多个选区 */}
+              {cropMode && regions.map((r, i) => (
+                <div key={i} className="absolute pointer-events-none border-2 border-cyan-300/90 bg-cyan-300/10"
+                  style={{
+                    left: `${r.x * 100}%`,
+                    top: `${r.y * 100}%`,
+                    width: `${r.w * 100}%`,
+                    height: `${r.h * 100}%`,
+                  }}
+                >
+                  <span className="absolute -top-4 left-0 text-[9px] px-1 py-0 rounded-sm bg-cyan-300 text-black font-bold leading-tight">#{i + 1}</span>
+                </div>
+              ))}
+
+              {/* 当前正在拖的草稿框 */}
+              {cropMode && draftRegion && draftRegion.w > 0 && draftRegion.h > 0 && (
+                <div className="absolute pointer-events-none border-2 border-cyan-300 border-dashed shadow-[0_0_0_1px_rgba(0,0,0,0.4)]"
+                  style={{
+                    left: `${draftRegion.x * 100}%`,
+                    top: `${draftRegion.y * 100}%`,
+                    width: `${draftRegion.w * 100}%`,
+                    height: `${draftRegion.h * 100}%`,
+                  }}
+                />
+              )}
+
+              {/* 画框模式角标（小，左上） — 不再遮挡画面 */}
+              {cropMode && (
+                <div className="absolute top-2 right-2 text-[9.5px] px-1.5 py-0.5 rounded backdrop-blur bg-cyan-500/85 text-white pointer-events-none font-medium">
+                  画框 · 已选 {regions.length}
+                </div>
+              )}
+            </div>
+          )}

        </div>

--- a/web/lib/api.ts
+++ b/web/lib/api.ts
@@ -657,7 +657,13 @@ export async function cutoutElement(jobId: string, frameIdx: number, elementId:
 export async function generateSceneAsset(
  jobId: string,
  frameIdx: number,
-  body: { size?: AssetSize; scene_mode?: SceneMode; scene_style?: SceneStyle } = {},
+  body: {
+    size?: AssetSize
+    scene_mode?: SceneMode
+    scene_style?: SceneStyle
+    prompt?: string
+    source_frame_indices?: number[]
+  } = {},
 ): Promise<Job> {
  const res = await fetch(`${API_BASE}/jobs/${jobId}/frames/${frameIdx}/scene-asset`, {
    method: "POST",
@@ -667,6 +673,8 @@ export async function generateSceneAsset(
      size: body.size ?? "source",
      scene_mode: body.scene_mode ?? "remove_subject",
      scene_style: body.scene_style ?? "source",
+      prompt: body.prompt ?? "",
+      source_frame_indices: body.source_frame_indices ?? null,
    }),
  })
  if (!res.ok) {