From 646f945fe9e840091c49cc3e0edb7dda6c3f2c82 Mon Sep 17 00:00:00 2001
From: kang <wankang2050@gmail.com>
Date: Thu, 14 May 2026 13:10:42 +0800
Subject: [PATCH] auto-save 2026-05-14 13:10 (~5)

---
 .memory/worklog.json        |  40 +++---
 api/main.py                 |   2 +-
 docs/source-analysis.html   |  26 +++-
 web/app/page.tsx            |  30 ++---
 web/components/lightbox.tsx | 241 +++++++++++++++++-------------------
 5 files changed, 171 insertions(+), 168 deletions(-)
diff --git a/.memory/worklog.json b/.memory/worklog.json
index 4de0a0c..11b30ff 100644
--- a/.memory/worklog.json
+++ b/.memory/worklog.json
@@ -1,26 +1,5 @@
 {
   "entries": [
-    {
-      "files_changed": 1,
-      "hash": "3417408",
-      "message": "auto-save 2026-05-13 06:57 (~1)",
-      "ts": "2026-05-13T06:57:49+08:00",
-      "type": "commit"
-    },
-    {
-      "files_changed": 1,
-      "hash": "3472551",
-      "message": "auto-save 2026-05-13 07:03 (~1)",
-      "ts": "2026-05-13T07:03:42+08:00",
-      "type": "commit"
-    },
-    {
-      "files_changed": 1,
-      "hash": "cbb8e7f",
-      "message": "auto-save 2026-05-13 07:09 (~1)",
-      "ts": "2026-05-13T07:09:36+08:00",
-      "type": "commit"
-    },
     {
       "files_changed": 1,
       "hash": "1e4fd9e",
@@ -3283,6 +3262,25 @@
       "message": "auto-save 2026-05-14 12:59 (~1)",
       "hash": "887c9a0",
       "files_changed": 1
+    },
+    {
+      "ts": "2026-05-14T13:05:12+08:00",
+      "type": "commit",
+      "message": "auto-save 2026-05-14 13:04 (+1, ~3)",
+      "hash": "69e73d4",
+      "files_changed": 39
+    },
+    {
+      "ts": "2026-05-14T05:06:11Z",
+      "type": "session-heartbeat",
+      "message": "Codex 会话活跃 · 最近命令：codex · 2 项未提交变更 · 最近提交：auto-save 2026-05-14 13:04 (+1, ~3)",
+      "files_changed": 2
+    },
+    {
+      "ts": "2026-05-14T05:08:40Z",
+      "type": "session-heartbeat",
+      "message": "Codex 会话活跃 · 最近命令：codex · 3 项未提交变更 · 最近提交：auto-save 2026-05-14 13:04 (+1, ~3)",
+      "files_changed": 3
     }
   ]
 }
diff --git a/api/main.py b/api/main.py
index 32ec6b1..14e179b 100644
--- a/api/main.py
+++ b/api/main.py
@@ -3677,7 +3677,7 @@ def generate_storyboard_video(job_id: str, idx: int, req: GenerateStoryboardVide
     product_ref_paths = [p for p in (storyboard_ref_path(job_id, r) for r in raw_product_refs) if p]
     subject_ref_paths = [p for p in (storyboard_ref_path(job_id, r) for r in req.subject_images[:8]) if p]
     reference_ref_paths = []
-    seen_ref_paths: set[str] = set()
+    seen_ref_paths: set[str] = {str(ref_path)}
     for p in [*subject_ref_paths, *product_ref_paths]:
         key = str(p)
         if key not in seen_ref_paths:
diff --git a/docs/source-analysis.html b/docs/source-analysis.html
index a9e3d83..658473d 100644
--- a/docs/source-analysis.html
+++ b/docs/source-analysis.html
@@ -556,8 +556,8 @@
           <div class="step"><div class="num">3</div><h3>清洗水印</h3><p>对关键帧做全图或区域清洗，清洗版先进入待审核状态；确认后可单张替换，也可一键替换全部待应用清洗版。</p></div>
           <div class="step"><div class="num">4</div><h3>主体识别</h3><p>识别场景和主体候选，只是候选，不应锁死。</p></div>
           <div class="step"><div class="num">5</div><h3>素材准备</h3><p>清洗关键帧，把多张关键帧作为同一主体的参考，先重绘六张标准站立主体资产图，再按关键帧生成多个去主体、相似或换风格场景图。</p></div>
-          <div class="step"><div class="num">6</div><h3>分镜改造</h3><p>把参考主体、场景、动作和 SKG 产品放入分镜结构；产品融合使用纵向 6 行镜头工作表，只补人物首帧、尾帧、描述词和秒数，产品图固定内置。</p></div>
-          <div class="step"><div class="num">7</div><h3>生成视频</h3><p>普通分镜可调用 Seedance / Kling / Veo 3；产品融合自动传入固定 4 张 SKG 产品图和每行首尾帧，用 Seedance 按秒数生成视频，结果回写到对应行。</p></div>
+          <div class="step"><div class="num">6</div><h3>分镜改造</h3><p>把内置透明骨架人角色、场景/镜头描述和固定 SKG 产品放入分镜结构；产品融合使用纵向 6 行镜头工作表，只选角色、微调描述词和秒数。</p></div>
+          <div class="step"><div class="num">7</div><h3>生成视频</h3><p>普通分镜可调用 Seedance / Kling / Veo 3；产品融合自动传入所选角色 7 张参考图和固定 4 张 SKG 产品图，用 Seedance 按秒数生成视频，结果回写到对应行。</p></div>
           <div class="step"><div class="num">8</div><h3>声音文案</h3><p>音频轨独立处理：提取原音频并按实际秒数生成 SKG 英文产品介绍 voice-over，ASR/翻译只作为改前对照和节奏参考；配置 MiniMax 后从男声、女声、成熟声池随机生成自然英文配音 mp3。底部音频条播放原音频时，指针会按时间走过字幕节点。</p></div>
           <div class="step"><div class="num">9</div><h3>合成成品</h3><p>片段、字幕、配音、转场合成最终 mp4。当前未实现。</p></div>
         </div>
@@ -629,7 +629,7 @@ api/main.py
           </div>
           <div class="flow-row">
             <div><strong>你看到的区域</strong><span>关键帧素材审核面板</span></div>
-            <div><strong>主要源码</strong><span><code>FrameLightbox</code>；按“原图/清洗、主体资产、首尾帧、产品融合、审核”五个页签组织；左侧只放主图/框选画布，但主体资产页左侧改为全部已清洗/已选参考帧网格，首尾帧页左侧显示全部关键帧并可勾选人物/机位参考。主体识别页会显示透明骨架人目标和 Vision 验收分数。清洗页右侧支持一键清洗未处理帧、单张替换清洗版和一键替换全部待应用清洗版；批量替换顺序调用 <code>applyCleanedFrame</code>，不新增后端接口。产品融合页左侧是纵向 6 行镜头工作表：每行只显示首帧、尾帧、已预填动作描述、秒数、生成按钮和对应视频结果；描述词内置 36 条镜头语言模板，按“建立出场、产品入画、佩戴贴合、使用感受、生活延展、收尾记忆”排列，点击“换一组”只刷新 6 行描述词。四张桌面 SKG 产品图作为固定产品参考，生成时通过 <code>copyProductLibraryAsset</code> 自动写入镜头，不再暴露产品角度槽、产品融合辅助栏或产品图库选择器。产品融合槽位的“粘贴”优先使用应用内 <code>clipboard</code>，也支持选中槽位后 Cmd+V 粘贴系统图片。主体资产页只确认一个统一主体，后端按参考重绘六张纯背景、占满画面的标准站立透明骨架人资产图；首尾帧页通过地点、风格、参考要素和可编辑 prompt 做文字生图，生成结果写入 <code>scene_assets</code> 但以 <code>asset_role=first_frame/last_frame</code> 标记，并自动传入当前产品融合镜头。相关接口包括 <code>cleanupFrame</code>、<code>applyCleanedFrame</code>、<code>addElement</code>、<code>generateSubjectAssets</code>、<code>generateSceneAsset</code> 和 <code>copyProductLibraryAsset</code>。</span></div>
+            <div><strong>主要源码</strong><span><code>FrameLightbox</code>；按“原图/清洗、主体资产、首尾帧、产品融合、审核”五个页签组织；左侧只放主图/框选画布，但主体资产页左侧改为全部已清洗/已选参考帧网格，首尾帧页左侧显示全部关键帧并可勾选人物/机位参考。主体识别页会显示透明骨架人目标和 Vision 验收分数。清洗页右侧支持一键清洗未处理帧、单张替换清洗版和一键替换全部待应用清洗版；批量替换顺序调用 <code>applyCleanedFrame</code>，不新增后端接口。产品融合页左侧是纵向 6 行镜头工作表：顶部选择 5 个内置透明骨架人角色之一，每行只显示已预填场景/产品使用/享受描述、秒数、生成按钮和对应视频结果；描述词内置 36 条镜头语言模板，按“建立出场、产品入画、佩戴贴合、使用感受、生活延展、收尾记忆”排列，点击“换一组”只刷新 6 行描述词。四张桌面 SKG 产品图和所选角色 7 张参考图作为固定参考，生成时分别通过 <code>copyProductLibraryAsset</code> 与 <code>copyCharacterLibraryAssets</code> 自动写入当前 job，不再暴露产品角度槽、产品融合辅助栏、产品图库选择器或首尾帧槽。主体资产页只确认一个统一主体，后端按参考重绘六张纯背景、占满画面的标准站立透明骨架人资产图；首尾帧页保留给旧流程/单独生图，不再是产品融合必填步骤。相关接口包括 <code>cleanupFrame</code>、<code>applyCleanedFrame</code>、<code>addElement</code>、<code>generateSubjectAssets</code>、<code>generateSceneAsset</code>、<code>copyProductLibraryAsset</code> 和 <code>copyCharacterLibraryAssets</code>。</span></div>
             <div><strong>适合怎么描述</strong><span>“这一组关键帧如何共同生成一个统一主体包；某张关键帧的水印、去主体场景图、产品融合镜头组和质量风险应该如何审核”。</span></div>
           </div>
           <div class="flow-row">
@@ -748,7 +748,7 @@ SubjectAsset {
           </div>
           <div class="card">
             <h3>ProductFusionShot</h3>
-            <p>产品融合镜头组的单行数据。每个关键帧最多 6 行，用户只补首帧、尾帧和必要时微调动作描述、秒数；四张桌面 SKG 产品角度图固定隐藏填充，生成时直接把首尾帧和固定产品图作为 Seedance 垫图提交。</p>
+            <p>产品融合镜头组的单行数据。每个关键帧最多 6 行，用户选择一个内置角色后只微调场景/产品使用/享受描述和秒数；四张桌面 SKG 产品角度图与所选角色 7 张参考图固定隐藏填充，生成时作为 Seedance 参考图提交。</p>
             <pre>ProductFusionShot {
   id,
   first_image,
@@ -805,7 +805,9 @@ SubjectAsset {
             <tr><td>首尾帧资产</td><td><code>POST /frames/{idx}/scene-asset</code></td><td><code>generateSceneAsset</code></td><td>同一接口兼容旧场景图和新首尾帧；新流程传 <code>asset_role=first_frame/last_frame</code>，后端走文字生图，参考帧只用于理解透明骨架人形象、比例、机位和光线，生成结果仍保存在 <code>scene_assets</code> 并自动填入产品融合镜头。</td></tr>
             <tr><td>产品图库</td><td><code>GET /product-library/skg</code></td><td><code>listProductLibrary</code></td><td>读取内置 SKG 白底图库 manifest，返回产品标题、品类、尺寸、白底评分和预览图 URL。</td></tr>
             <tr><td>产品图入库到 job</td><td><code>POST /jobs/{id}/assets/product-library</code></td><td><code>copyProductLibraryAsset</code></td><td>把一个内置产品图库条目复制为当前 job 的普通 asset，返回 <code>ImageRef(kind="asset")</code>，用于画面工作台产品融合和分镜产品参考组。</td></tr>
-            <tr><td>产品融合引导图</td><td><code>POST /jobs/{id}/product-fusion/guide</code></td><td><code>createProductFusionGuide</code></td><td>旧流程兼容接口：读取产品图和白底人物图，按 <code>product_region</code> 合成位置引导图。当前首尾帧流程不再主动调用它。</td></tr>
+            <tr><td>角色库</td><td><code>GET /character-library/skg</code></td><td><code>listCharacterLibrary</code></td><td>读取内置 5 个透明骨架人角色 manifest，每个角色含正面、左右 45 度、侧面、背面、半身近景和背部特写 7 张参考图。</td></tr>
+            <tr><td>角色图入库到 job</td><td><code>POST /jobs/{id}/assets/character-library</code></td><td><code>copyCharacterLibraryAssets</code></td><td>把所选角色的 7 张参考图复制为当前 job asset，返回 <code>subject_images</code>，产品融合生成视频时作为人物身份参考图提交。</td></tr>
+            <tr><td>产品融合引导图</td><td><code>POST /jobs/{id}/product-fusion/guide</code></td><td><code>createProductFusionGuide</code></td><td>旧流程兼容接口：读取产品图和白底人物图，按 <code>product_region</code> 合成位置引导图。当前内置角色 + 产品 + 描述流程不再主动调用它。</td></tr>
             <tr><td>产品融合描述词</td><td><code>POST /jobs/{id}/product-fusion/descriptions</code></td><td><code>generateProductFusionDescriptions</code></td><td>兼容接口：可生成产品融合动作描述库。当前前端默认直接用本地 36 条镜头语言模板预填 6 行镜头，并通过“换一组”按钮按 6 条一组轮换。</td></tr>
             <tr><td>分镜保存</td><td><code>PUT /frames/{idx}/storyboard</code></td><td><code>updateStoryboard</code></td><td>保存 4 图槽、时长和改造说明。</td></tr>
             <tr><td>生图</td><td><code>POST /frames/{idx}/generate</code></td><td><code>generateImage</code></td><td>基于关键帧或已选生成图做 image-to-image，目前可用。</td></tr>
@@ -917,6 +919,20 @@ SubjectAsset {
         <h2>变更记录</h2>
         <p>这个记录不是 git log 的替代品。它记录“产品理解发生了什么变化、影响了哪些源码、你以后描述需求时该怎么说”。后续每次改功能都要补一条。</p>
         <div class="changelog">
+          <article class="change">
+            <header>
+              <h3>2026-05-14 · 产品融合改为内置角色 + 产品 + 描述生成</h3>
+              <span class="tag violet">FrameLightbox</span>
+              <span class="tag orange">产品融合</span>
+              <span class="tag blue">角色库</span>
+            </header>
+            <div class="body">
+              <p><strong>问题：</strong>当前产品融合不再需要手动首帧/尾帧，用户要的是从内置透明骨架人角色、场景描述、产品使用方式和享受状态直接生成视频。</p>
+              <p><strong>改动：</strong>桌面 <code>skg_anatomy_characters_20260514_120852</code> 的 5 个角色、35 张图内置为 <code>api/character_library/skg-characters</code>。产品融合页新增角色下拉和角色预览，每行只保留场景/产品使用/享受描述、秒数、生成按钮和结果视频；生成前自动复制所选角色 7 张参考图和固定 4 张 SKG 产品图到当前 job。</p>
+              <p><strong>后端：</strong>新增 <code>GET /character-library/skg</code>、<code>GET /character-library/skg/images/{filename}</code>、<code>POST /jobs/{job_id}/assets/character-library</code>。视频提交新增 <code>subject_images</code>，无首帧时主人物图以 <code>reference_image</code> role 传入 Ark/Seedance，而不是强制作为 <code>first_frame</code>。</p>
+              <p><strong>影响：</strong><code>api/main.py</code>、<code>api/character_library/skg-characters</code>、<code>web/lib/api.ts</code>、<code>web/app/page.tsx</code>、<code>web/components/lightbox.tsx</code>、<code>docs/source-analysis.html</code>。</p>
+            </div>
+          </article>
           <article class="change">
             <header>
               <h3>2026-05-14 · 产品融合内置多组镜头语言</h3>
diff --git a/web/app/page.tsx b/web/app/page.tsx
index 261230a..b1f5776 100644
--- a/web/app/page.tsx
+++ b/web/app/page.tsx
@@ -521,8 +521,10 @@ export default function Home() {
     const frame = job.frames.find((f) => f.index === frameIdx)
     if (!frame) return
     const productRefs = (shot.product_images ?? []).filter(Boolean).slice(0, 4) as ImageRef[]
-    if (!shot.first_image || !shot.last_image || productRefs.length < 4 || !shot.action_text?.trim()) {
-      toast.error("产品融合镜头缺少首帧、尾帧、固定产品图或描述词")
+    const subjectRefs = (shot.subject_images ?? []).filter(Boolean).slice(0, 7) as ImageRef[]
+    const primarySubject = shot.subject_image ?? subjectRefs[0] ?? null
+    if (!primarySubject || subjectRefs.length < 1 || productRefs.length < 4 || !shot.action_text?.trim()) {
+      toast.error("产品融合镜头缺少内置角色、固定产品图或描述词")
       return
     }
     const duration = shot.duration && shot.duration > 0 ? shot.duration : 5
@@ -530,22 +532,21 @@ export default function Home() {
     try {
       const prompt = [
         `产品融合镜头ID：${shot.id || `shot-${frameIdx + 1}`}`,
-        `竖屏 9:16，${duration.toFixed(1)} 秒，Seedance 图生视频。`,
-        "图片模型固定为 GPT Image 2：首帧和尾帧已经由文字生图生成，用来锁定透明骨架人角色、场景构图和动作起止状态。",
-        "视频模型固定为 Seedance：使用首帧作为起始画面、尾帧作为结束画面，并用四张同一 SKG 产品不同角度白底图作为垫图/产品身份参考。",
-        `首帧：${labelOf(shot.first_image, "透明骨架人首帧")}。起始人物形象、姿态、构图和场景氛围以这张图为准。`,
-        `尾帧：${labelOf(shot.last_image, "透明骨架人尾帧")}。结束人物状态、画面落点和场景延续以这张图为准。`,
+        `竖屏 9:16，${duration.toFixed(1)} 秒，Seedance 参考图生视频。`,
+        "没有首帧和尾帧：请根据内置人物角色参考图、固定 SKG 产品图、场景/使用/享受描述直接生成完整视频。",
+        `人物角色：${shot.character_name || "透明骨架人"}。必须保持同一透明/半透明人体外壳、干净白色骨架、体型比例、服装风格和非恐怖广告气质。`,
+        `人物参考图：${subjectRefs.map((ref, index) => `角色图${index + 1}=${labelOf(ref, "透明骨架人参考")}`).join("；")}。`,
         `产品角度图 1：${labelOf(productRefs[0], "SKG 产品正面/主视角")}。`,
         `产品角度图 2：${labelOf(productRefs[1], "SKG 产品侧面/斜侧视角")}。`,
         `产品角度图 3：${labelOf(productRefs[2], "SKG 产品背面/细节视角")}。`,
         `产品角度图 4：${labelOf(productRefs[3], "SKG 产品补充/底部或佩戴视角")}。`,
         "产品使用部位：这是颈部/肩颈按摩仪，只能自然佩戴或贴合在脖子、后颈、颈肩交界处；不要放到手臂、腰、腿、胸口、眼部或背景里。",
         "比例尺寸：产品应符合真实颈部按摩仪大小，U 形结构环绕后颈但不能巨大化、缩小成饰品、嵌入身体、悬浮或穿透透明人体。",
-        "镜头语言：严格按动作描述里的出场方式、景别、运镜、产品进入方式、佩戴贴合动作和收尾方式执行。",
-        `动作描述：${shot.action_text.trim()}`,
+        "镜头语言：严格按描述里的出场方式、场景、景别、运镜、产品进入方式、佩戴贴合动作、使用过程和收尾方式执行。",
+        `场景/使用/享受描述：${shot.action_text.trim()}`,
         TRANSPARENT_HUMAN_VIDEO_PROMPT,
         "融合要求：产品必须自然出现在透明骨架人动作中，尺寸可信，透视一致，只贴合手部拿取和后颈/颈肩使用区域，不能悬浮、漂移、融化、扭曲或变成其他物体。",
-        "首尾连续性：镜头从首帧自然运动到尾帧，中间不要跳切，不换角色，不换产品，不突然改变场景。",
+        "连续性：镜头必须完整连贯，中间不要跳切，不换角色，不换产品，不突然改变场景。",
         "产品一致性：严格保持 SKG 产品外观、颜色、材质、U 形结构、按摩触点、按键和比例；四张产品角度图是产品身份真源。",
         "场景要求：背景、空间、光线和阴影要自然统一，不要出现水印、平台 UI、字幕或竞品包装。",
         "商业质感：真实拍摄感、干净高级、产品清楚可辨、人物动作自然、镜头稳定。",
@@ -555,11 +556,12 @@ export default function Home() {
       const updated = await generateStoryboardVideo(job.id, frameIdx, {
         prompt,
         duration,
-        first_image: shot.first_image,
-        last_image: shot.last_image,
+        first_image: null,
+        last_image: null,
         product_images: productRefs,
-        subject_image: shot.first_image,
-        scene_image: shot.last_image,
+        subject_image: primarySubject,
+        subject_images: subjectRefs,
+        scene_image: null,
         product_image: productRefs[0] ?? null,
         action_image: null,
         source_ref: null,
diff --git a/web/components/lightbox.tsx b/web/components/lightbox.tsx
index 78f9066..2adea17 100644
--- a/web/components/lightbox.tsx
+++ b/web/components/lightbox.tsx
@@ -1,12 +1,13 @@
 "use client"
 import { useEffect, useRef, useState } from "react"
 import { createPortal } from "react-dom"
-import { X, ChevronLeft, ChevronRight, Check, Sparkles, Wand2, Loader2, Eye, RefreshCw, Plus, Sparkle, Crop, Copy, PencilLine, Trash2, Save, Upload, Play } from "lucide-react"
+import { X, ChevronLeft, ChevronRight, Check, Sparkles, Wand2, Loader2, Eye, RefreshCw, Plus, Sparkle, Crop, Copy, PencilLine, Trash2, Save, Play } from "lucide-react"
 import {
   frameUrl, cleanedFrameUrl, apiAssetUrl,
   describeFrame, cleanupFrame, applyCleanedFrame, discardCleanedFrame, addElement, updateElement, deleteElement,
-  generateSceneAsset, generateSubjectAssets, resolveImageRefUrl, uploadStoryboardAsset, updateStoryboard, copyProductLibraryAsset,
-  type AssetBackground, type AssetSize, type KeyFrame, type Job, type ImageRef, type ProductFusionShot, type SceneAssetRole, type SceneStyle, type SubjectKind,
+  generateSceneAsset, generateSubjectAssets, resolveImageRefUrl, updateStoryboard, copyProductLibraryAsset,
+  listCharacterLibrary, copyCharacterLibraryAssets, characterLibraryImageUrl,
+  type AssetBackground, type AssetSize, type CharacterLibraryItem, type KeyFrame, type Job, type ImageRef, type ProductFusionShot, type SceneAssetRole, type SceneStyle, type SubjectKind,
 } from "@/lib/api"
 import { TRANSPARENT_HUMAN_FRAME_STANDARD, TRANSPARENT_HUMAN_UI_SUMMARY } from "@/lib/workflow-target"
 import { toast } from "sonner"
@@ -114,6 +115,8 @@ const DESKTOP_PRODUCT_ANGLE_IDS = [
   "desktop-skg-product-angle-03",
   "desktop-skg-product-angle-04",
 ]
+const DEFAULT_CHARACTER_ID = "character-01"
+const DEFAULT_CHARACTER_NAME = "运动阳光男"
 type FusionUploadTarget = {
   shotIndex: number
   slot: "first_image" | "last_image"
@@ -204,6 +207,10 @@ const createFusionShots = (): ProductFusionShot[] =>
     last_image: null,
     product_images: [],
     product_image: null,
+    character_id: DEFAULT_CHARACTER_ID,
+    character_name: DEFAULT_CHARACTER_NAME,
+    subject_image: null,
+    subject_images: [],
     person_image: null,
     product_region: null,
     scene_image: null,
@@ -223,6 +230,10 @@ const normalizeFusionShots = (shots?: ProductFusionShot[] | null): ProductFusion
       ...item,
       ...shot,
       product_images: shot.product_images?.slice(0, PRODUCT_ANGLE_COUNT) ?? [],
+      character_id: shot.character_id || item.character_id,
+      character_name: shot.character_name || item.character_name,
+      subject_image: shot.subject_image ?? item.subject_image,
+      subject_images: shot.subject_images ?? item.subject_images,
       action_text: shouldUseDefaultFusionDescription(shot.action_text) ? item.action_text : shot.action_text,
       id: shot.id || item.id,
     }
@@ -251,10 +262,11 @@ export function FrameLightbox({ jobId, frames, generatedVideos = [], activeIndex
   const [activeTab, setActiveTab] = useState<LightboxTab>("clean")
   const [fusionShots, setFusionShots] = useState<ProductFusionShot[]>(() => createFusionShots())
   const [activeFusionShot, setActiveFusionShot] = useState(0)
-  const [fusionUploadTarget, setFusionUploadTarget] = useState<FusionUploadTarget | null>(null)
   const [fusionGenerating, setFusionGenerating] = useState<number | "all" | null>(null)
   const [fusionSaving, setFusionSaving] = useState(false)
   const [fusionPresetPage, setFusionPresetPage] = useState(0)
+  const [characterLibrary, setCharacterLibrary] = useState<CharacterLibraryItem[]>([])
+  const [selectedCharacterId, setSelectedCharacterId] = useState(DEFAULT_CHARACTER_ID)
   const [editingElement, setEditingElement] = useState<{
     frameIndex: number
     id: string
@@ -270,12 +282,23 @@ export function FrameLightbox({ jobId, frames, generatedVideos = [], activeIndex
   const [draftRegion, setDraftRegion] = useState<Region | null>(null)  // 当前正在拖的
   const [dragStart, setDragStart] = useState<{ x: number; y: number } | null>(null)
   const imgWrapRef = useRef<HTMLDivElement>(null)
-  const fusionFileInputRef = useRef<HTMLInputElement | null>(null)
   const loadedFusionKey = useRef("")
   const activeIndexRef = useRef<number | null>(activeIndex)
   useEffect(() => setMounted(true), [])
   useEffect(() => { activeIndexRef.current = activeIndex }, [activeIndex])
 
+  useEffect(() => {
+    let cancelled = false
+    listCharacterLibrary()
+      .then((items) => {
+        if (!cancelled) setCharacterLibrary(items)
+      })
+      .catch((e) => {
+        if (!cancelled) toast.error("角色库读取失败：" + (e instanceof Error ? e.message : String(e)))
+      })
+    return () => { cancelled = true }
+  }, [])
+
   useEffect(() => {
     if (activeIndex === null) {
       loadedFusionKey.current = ""
@@ -286,7 +309,9 @@ export function FrameLightbox({ jobId, frames, generatedVideos = [], activeIndex
     const key = `${jobId}:${activeIndex}`
     if (loadedFusionKey.current === key) return
     const frame = frames.find((x) => x.index === activeIndex)
-    setFusionShots(normalizeFusionShots(frame?.storyboard?.product_fusion_shots as ProductFusionShot[] | undefined))
+    const nextShots = normalizeFusionShots(frame?.storyboard?.product_fusion_shots as ProductFusionShot[] | undefined)
+    setFusionShots(nextShots)
+    setSelectedCharacterId(nextShots[0]?.character_id || DEFAULT_CHARACTER_ID)
     setActiveFusionShot(0)
     loadedFusionKey.current = key
   }, [activeIndex, frames, jobId])
@@ -391,9 +416,8 @@ export function FrameLightbox({ jobId, frames, generatedVideos = [], activeIndex
     sceneExtraKeywords.trim() ? `额外关键词：${sceneExtraKeywords.trim()}。` : "",
     "要求：单一透明骨架人清晰可见，人物占画面主体，首尾帧可连续生成视频；无文字、水印、平台 UI、恐怖解剖感。",
   ].filter(Boolean).join("\n")
-  const fusionReadyCount = fusionShots.filter((shot) =>
-    shot.first_image && shot.last_image && shot.action_text?.trim()
-  ).length
+  const fusionReadyCount = fusionShots.filter((shot) => shot.action_text?.trim()).length
+  const selectedCharacter = characterLibrary.find((item) => item.id === selectedCharacterId) ?? characterLibrary[0]
 
   const persistFusionShots = async (nextShots: ProductFusionShot[]) => {
     setFusionSaving(true)
@@ -425,47 +449,42 @@ export function FrameLightbox({ jobId, frames, generatedVideos = [], activeIndex
       : { last_image: ref, guide_image: null }, true)
   }
 
-  const uploadFusionFiles = async (files: FileList | File[]) => {
-    if (!fusionUploadTarget) return
-    const file = Array.from(files).find((item) => item.type.startsWith("image/"))
-    if (!file) {
-      toast.error("请上传图片文件")
-      return
-    }
-    try {
-      const ref = await uploadStoryboardAsset(jobId, file)
-      assignFusionImage(fusionUploadTarget, ref)
-      toast.success("已加入当前融合镜头")
-    } catch (e) {
-      toast.error("上传失败：" + (e instanceof Error ? e.message : String(e)))
-    } finally {
-      setFusionUploadTarget(null)
-    }
-  }
-
-  const openFusionUpload = (target: FusionUploadTarget) => {
-    setActiveFusionShot(target.shotIndex)
-    setFusionUploadTarget(target)
-    requestAnimationFrame(() => fusionFileInputRef.current?.click())
-  }
-
-  const ensureFixedProductAngles = async (indexes: number[]) => {
+  const prepareFusionReferences = async (indexes: number[]) => {
     try {
       const reusableRefs = fusionShots.find((shot) => (shot.product_images ?? []).filter(Boolean).length >= PRODUCT_ANGLE_COUNT)
         ?.product_images?.slice(0, PRODUCT_ANGLE_COUNT)
-      const refs = reusableRefs?.length === PRODUCT_ANGLE_COUNT
+      const productRefs = reusableRefs?.length === PRODUCT_ANGLE_COUNT
         ? reusableRefs
         : await Promise.all(DESKTOP_PRODUCT_ANGLE_IDS.map((id) => copyProductLibraryAsset(jobId, id)))
+      const reusableSubjectRefs = fusionShots.find((shot) =>
+        shot.character_id === selectedCharacterId && (shot.subject_images ?? []).filter(Boolean).length > 0
+      )?.subject_images?.filter(Boolean)
+      const copiedCharacter = reusableSubjectRefs?.length
+        ? {
+            character_id: selectedCharacterId,
+            character_name: selectedCharacter?.name || fusionShots.find((shot) => shot.character_id === selectedCharacterId)?.character_name || DEFAULT_CHARACTER_NAME,
+            images: reusableSubjectRefs,
+          }
+        : await copyCharacterLibraryAssets(jobId, selectedCharacterId)
       const next = fusionShots.map((shot, index) => (
         indexes.includes(index)
-          ? { ...shot, product_images: refs, product_image: refs[0] ?? null, guide_image: null }
+          ? {
+              ...shot,
+              product_images: productRefs,
+              product_image: productRefs[0] ?? null,
+              character_id: copiedCharacter.character_id,
+              character_name: copiedCharacter.character_name,
+              subject_image: copiedCharacter.images[0] ?? null,
+              subject_images: copiedCharacter.images,
+              guide_image: null,
+            }
           : shot
       ))
       setFusionShots(next)
       void persistFusionShots(next)
       return next
     } catch (e) {
-      toast.error("桌面产品角度填充失败：" + (e instanceof Error ? e.message : String(e)))
+      toast.error("内置角色/产品参考准备失败：" + (e instanceof Error ? e.message : String(e)))
       return null
     }
   }
@@ -483,15 +502,29 @@ export function FrameLightbox({ jobId, frames, generatedVideos = [], activeIndex
     toast.success(`已换第 ${Math.floor(start / FUSION_SHOT_COUNT) + 1} 组镜头语言`)
   }
 
+  const selectFusionCharacter = (characterId: string) => {
+    const character = characterLibrary.find((item) => item.id === characterId)
+    const next = fusionShots.map((shot) => ({
+      ...shot,
+      character_id: characterId,
+      character_name: character?.name || shot.character_name || DEFAULT_CHARACTER_NAME,
+      subject_image: null,
+      subject_images: [],
+    }))
+    setSelectedCharacterId(characterId)
+    setFusionShots(next)
+    void persistFusionShots(next)
+  }
+
   const runFusionVideo = async (index: number) => {
     const shot = fusionShots[index]
-    if (!shot?.first_image || !shot.last_image || !shot.action_text?.trim()) {
-      toast.error(`镜头 ${index + 1} 还缺首帧或尾帧`)
+    if (!shot?.action_text?.trim()) {
+      toast.error(`镜头 ${index + 1} 还缺场景/使用描述`)
       return
     }
     setFusionGenerating(index)
     try {
-      const next = await ensureFixedProductAngles([index])
+      const next = await prepareFusionReferences([index])
       if (!next) return
       await onGenerateProductFusionVideo?.(f.index, next[index])
     } finally {
@@ -502,15 +535,15 @@ export function FrameLightbox({ jobId, frames, generatedVideos = [], activeIndex
   const runAllFusionVideos = async () => {
     const indexes = fusionShots
       .map((shot, i) => ({ shot, i }))
-      .filter(({ shot }) => shot.first_image && shot.last_image && shot.action_text?.trim())
+      .filter(({ shot }) => shot.action_text?.trim())
       .map(({ i }) => i)
     if (indexes.length === 0) {
-      toast.error("还没有完整的融合镜头")
+      toast.error("还没有可生成的融合镜头")
       return
     }
     setFusionGenerating("all")
     try {
-      const next = await ensureFixedProductAngles(indexes)
+      const next = await prepareFusionReferences(indexes)
       if (!next) return
       for (const index of indexes) {
         await onGenerateProductFusionVideo?.(f.index, next[index])
@@ -1034,23 +1067,7 @@ export function FrameLightbox({ jobId, frames, generatedVideos = [], activeIndex
               </div>
             </section>
           ) : isProductTab ? (
-            <section
-              className="rounded-lg border border-amber-300/15 bg-amber-500/[0.06] p-2.5"
-              onPaste={(e) => {
-                if (fusionUploadTarget && e.clipboardData.files?.length) void uploadFusionFiles(e.clipboardData.files)
-              }}
-            >
-              <input
-                ref={fusionFileInputRef}
-                type="file"
-                accept="image/*"
-                className="hidden"
-                onChange={(e) => {
-                  const files = e.target.files
-                  if (files) void uploadFusionFiles(files)
-                  e.currentTarget.value = ""
-                }}
-              />
+            <section className="rounded-lg border border-amber-300/15 bg-amber-500/[0.06] p-2.5">
               <div className="mb-2 flex items-center justify-between gap-2">
                 <div className="text-[12px] font-semibold text-white">产品融合镜头组</div>
                 <div className="flex items-center gap-1.5">
@@ -1062,7 +1079,7 @@ export function FrameLightbox({ jobId, frames, generatedVideos = [], activeIndex
                     onClick={rotateFusionDescriptions}
                     disabled={!!fusionGenerating}
                     className="inline-flex h-6 items-center justify-center gap-1 rounded bg-white/10 px-2 text-[9.5px] font-medium text-white/65 transition hover:bg-white/18 hover:text-white disabled:cursor-not-allowed disabled:opacity-40"
-                    title="换一组内置镜头语言，不改变首帧和尾帧"
+                    title="换一组内置镜头语言，不改变角色和视频结果"
                   >
                     <RefreshCw className="h-3 w-3" />
                     换一组
@@ -1078,77 +1095,51 @@ export function FrameLightbox({ jobId, frames, generatedVideos = [], activeIndex
                   </button>
                 </div>
               </div>
+              <div className="mb-2 grid grid-cols-[minmax(220px,300px)_1fr] gap-2 rounded-md border border-white/10 bg-black/25 p-2">
+                <label className="block">
+                  <div className="mb-1 text-[9px] text-white/38">内置角色</div>
+                  <select
+                    value={selectedCharacterId}
+                    onChange={(e) => selectFusionCharacter(e.target.value)}
+                    className="h-8 w-full rounded-md border border-white/10 bg-black/45 px-2 text-[10.5px] text-white/80 outline-none focus:border-amber-300/45"
+                  >
+                    {(characterLibrary.length ? characterLibrary : [{ id: DEFAULT_CHARACTER_ID, name: DEFAULT_CHARACTER_NAME } as CharacterLibraryItem]).map((character) => (
+                      <option key={character.id} value={character.id}>{character.name}</option>
+                    ))}
+                  </select>
+                </label>
+                <div className="min-w-0">
+                  <div className="mb-1 flex items-center justify-between gap-2">
+                    <span className="text-[9px] text-white/38">角色参考</span>
+                    <span className="text-[8.5px] text-white/30">产品固定 4 图 · 无首尾帧</span>
+                  </div>
+                  <div className="flex gap-1.5 overflow-hidden">
+                    {(selectedCharacter?.images ?? []).slice(0, 7).map((image) => (
+                      <div key={image.id} className="h-12 w-10 overflow-hidden rounded border border-white/10 bg-black/35">
+                        <img src={characterLibraryImageUrl(image.filename)} alt={image.label} className="h-full w-full object-cover" draggable={false} />
+                      </div>
+                    ))}
+                    {!selectedCharacter?.images?.length && (
+                      <div className="flex h-12 items-center rounded border border-dashed border-white/10 px-2 text-[9.5px] text-white/32">
+                        角色库加载中
+                      </div>
+                    )}
+                  </div>
+                </div>
+              </div>
               <div className="mb-2 rounded-md border border-white/10 bg-black/25 px-2 py-1.5 text-[10px] leading-relaxed text-white/50">
-                描述词已预填，产品固定使用桌面 4 张 SKG 角度图；这里只需要填每行的首帧和尾帧。
+                角色和产品已内置；每行只写场景、产品如何在脖子/后颈使用，以及人物舒适享受的状态。
               </div>
               <div className="space-y-2">
                 {fusionShots.map((shot, i) => {
                   const active = i === activeFusionShot
-                  const firstUrl = shot.first_image ? resolveImageRefUrl(jobId, shot.first_image) : ""
-                  const lastUrl = shot.last_image ? resolveImageRefUrl(jobId, shot.last_image) : ""
                   const shotMarker = `${FUSION_PROMPT_MARKER_PREFIX}${shot.id}`
                   const shotVideos = generatedVideos.filter((video) => video.frame_idx === f.index && video.prompt.includes(shotMarker))
                   const latestShotVideo = shotVideos[0]
                   const latestVideoUrl = latestShotVideo?.url ? apiAssetUrl(latestShotVideo.url) : ""
-                  const ready = !!(shot.first_image && shot.last_image && shot.action_text?.trim())
+                  const ready = !!shot.action_text?.trim()
                   const busy = fusionGenerating === i || fusionGenerating === "all"
                   const lensStageLabel = PRODUCT_FUSION_LENS_STAGES[i] ?? `镜头 ${i + 1}`
-                  const pasteIntoSlot = (target: FusionUploadTarget, label: string) => {
-                    setActiveFusionShot(i)
-                    if (clipboard) {
-                      assignFusionImage(target, clipboard)
-                      toast.success(`已粘贴到镜头 ${i + 1}「${label}」：${clipboard.label || "剪贴板图片"}`)
-                      return
-                    }
-                    setFusionUploadTarget(target)
-                    toast.message(`镜头 ${i + 1} 已选中「${label}」槽位，现在可 Cmd+V 粘贴系统图片`)
-                  }
-                  const imageSlot = (target: FusionUploadTarget, label: string, url: string, ref?: ImageRef | null, white = false) => (
-                    <div className="overflow-hidden rounded-md border border-white/10 bg-black/24">
-                      <div className={`relative aspect-[4/5] ${white ? "bg-white" : "bg-black"}`}>
-                        {url ? (
-                          <button
-                            type="button"
-                            onClick={() => setActiveFusionShot(i)}
-                            className="absolute inset-0 cursor-pointer"
-                            title={`选中镜头 ${i + 1}`}
-                          >
-                            <img src={url} alt={label} className="h-full w-full object-contain" draggable={false} />
-                          </button>
-                        ) : (
-                          <button
-                            type="button"
-                            onClick={() => openFusionUpload(target)}
-                            className={`absolute inset-0 flex flex-col items-center justify-center gap-1 text-[9.5px] ${white ? "text-black/35 hover:text-black/65" : "text-white/35 hover:text-white/65"}`}
-                          >
-                            <Upload className="h-3.5 w-3.5" />
-                            {label}
-                          </button>
-                        )}
-                      </div>
-                      <div className="border-t border-white/10 px-1 py-1">
-                        <div className="mb-1 truncate text-[8.5px] text-white/42">{ref?.label || label}</div>
-                        <div className="grid grid-cols-2 gap-1">
-                          <button
-                            type="button"
-                            onClick={() => pasteIntoSlot(target, label)}
-                            className={`rounded px-1 py-0.5 text-[8.5px] transition ${
-                              clipboard ? "bg-violet-500/60 text-white hover:bg-violet-400/70" : "bg-white/10 text-white/58 hover:bg-white/18 hover:text-white"
-                            }`}
-                          >
-                            粘贴
-                          </button>
-                          <button
-                            type="button"
-                            onClick={() => openFusionUpload(target)}
-                            className="rounded bg-white/10 px-1 py-0.5 text-[8.5px] text-white/65 transition hover:bg-white/18 hover:text-white"
-                          >
-                            上传
-                          </button>
-                        </div>
-                      </div>
-                    </div>
-                  )
                   const resultPanel = latestShotVideo ? (
                     <div className="overflow-hidden rounded-md border border-white/10 bg-black/30">
                       <div className="relative aspect-video bg-black">
@@ -1200,7 +1191,7 @@ export function FrameLightbox({ jobId, frames, generatedVideos = [], activeIndex
                           : "border-white/10 bg-black/20 hover:border-amber-300/35"
                       }`}
                     >
-                      <div className="grid grid-cols-[34px_92px_92px_minmax(220px,1fr)_78px_190px] items-start gap-2">
+                      <div className="grid grid-cols-[34px_minmax(360px,1fr)_78px_190px] items-start gap-2">
                         <div className="flex flex-col items-center gap-1 pt-1">
                           <button
                             type="button"
@@ -1219,10 +1210,6 @@ export function FrameLightbox({ jobId, frames, generatedVideos = [], activeIndex
                           </span>
                         </div>
 
-                        {imageSlot({ shotIndex: i, slot: "first_image" }, "首帧", firstUrl, shot.first_image)}
-
-                        {imageSlot({ shotIndex: i, slot: "last_image" }, "尾帧", lastUrl, shot.last_image)}
-
                         <label className="block">
                           <div className="mb-1 flex items-center justify-between gap-2">
                             <span className="text-[9px] text-amber-100/65">{lensStageLabel}</span>
@@ -1236,7 +1223,7 @@ export function FrameLightbox({ jobId, frames, generatedVideos = [], activeIndex
                               const next = fusionShots.map((item, idx) => (idx === i ? { ...item, action_text: e.currentTarget.value } : item))
                               void persistFusionShots(next)
                             }}
-                            placeholder="描述这个镜头里透明骨架人、SKG 产品和动作起止状态。"
+                            placeholder="写清场景、产品如何佩戴到脖子/后颈，以及人物舒适享受状态。"
                             className="h-[92px] w-full resize-none rounded-md border border-white/10 bg-black/35 px-2 py-1.5 text-[10px] leading-relaxed text-white/75 outline-none placeholder:text-white/25 focus:border-amber-300/45"
                           />
                         </label>