From a6773a86906e80d9c5ee8d6d03dadb7b8d7af459 Mon Sep 17 00:00:00 2001
From: kang <wankang2050@gmail.com>
Date: Thu, 14 May 2026 07:23:13 +0800
Subject: [PATCH] auto-save 2026-05-14 07:23 (~4)

---
 .memory/worklog.json        |  27 ++-
 api/main.py                 |  92 ++++++++++
 web/components/lightbox.tsx | 325 ++++++++++++++++++++++++++++++++++--
 web/lib/api.ts              |  37 ++++
 4 files changed, 456 insertions(+), 25 deletions(-)

diff --git a/.memory/worklog.json b/.memory/worklog.json
index 4fc4a1f..0fef4c2 100644
--- a/.memory/worklog.json
+++ b/.memory/worklog.json
@@ -1,19 +1,5 @@
 {
   "entries": [
-    {
-      "files_changed": 2,
-      "hash": "c481da4",
-      "message": "auto-save 2026-05-12 19:53 (~2)",
-      "ts": "2026-05-12T19:53:40+08:00",
-      "type": "commit"
-    },
-    {
-      "files_changed": 5,
-      "hash": "375494e",
-      "message": "auto-save 2026-05-12 19:58 (+1, ~4)",
-      "ts": "2026-05-12T19:59:15+08:00",
-      "type": "commit"
-    },
     {
       "files_changed": 3,
       "hash": "ca0d6f1",
@@ -3340,6 +3326,19 @@
       "type": "session-heartbeat",
       "message": "Claude 会话活跃 · 最近命令：claude · 1 项未提交变更 · 最近提交：auto-save 2026-05-14 07:12 (~1)",
       "files_changed": 1
+    },
+    {
+      "ts": "2026-05-14T07:17:42+08:00",
+      "type": "commit",
+      "message": "auto-save 2026-05-14 07:17 (~1)",
+      "hash": "76412d2",
+      "files_changed": 1
+    },
+    {
+      "ts": "2026-05-13T23:18:52Z",
+      "type": "session-heartbeat",
+      "message": "Codex 会话活跃 · 最近命令：codex · 1 项未提交变更 · 最近提交：auto-save 2026-05-14 07:17 (~1)",
+      "files_changed": 1
     }
   ]
 }
diff --git a/api/main.py b/api/main.py
index 4dc8077..535b247 100644
--- a/api/main.py
+++ b/api/main.py
@@ -156,6 +156,7 @@ class StoryboardScene(BaseModel):
     first_image: dict | None = None
     last_image: dict | None = None
     product_images: list[dict] = Field(default_factory=list)
+    product_fusion_shots: list[dict] = Field(default_factory=list)
     # 4 图槽：dict 含 {kind, frame_idx, element_id?, cutout_id?, label}
     subject_image: dict | None = None
     scene_image: dict | None = None
@@ -236,6 +237,26 @@ class ProductLibraryItem(BaseModel):
     tags: list[str] = Field(default_factory=list)
 
 
+class ProductFusionRegion(BaseModel):
+    x: float = 0
+    y: float = 0
+    w: float = 0
+    h: float = 0
+
+
+class ProductFusionShot(BaseModel):
+    id: str = ""
+    product_image: dict | None = None
+    person_image: dict | None = None
+    product_region: ProductFusionRegion | None = None
+    scene_image: dict | None = None
+    action_text: str = ""
+    duration: float = 5
+    image_model: str = "gpt-image-2"
+    video_model: str = "seedance"
+    guide_image: dict | None = None
+
+
 class KeyElement(BaseModel):
     """关键帧里识别 / 用户提取的元素 · 多次提取累积多张图，让用户挑选满意的"""
     id: str             # uuid hex 8
@@ -2488,6 +2509,10 @@ def delete_cutout(job_id: str, idx: int, element_id: str, cutout_id: str) -> Job
 
 class UpdateStoryboardReq(BaseModel):
     duration: float = 0
+    first_image: dict | None = None
+    last_image: dict | None = None
+    product_images: list[dict] = Field(default_factory=list)
+    product_fusion_shots: list[dict] = Field(default_factory=list)
     subject_image: dict | None = None
     scene_image: dict | None = None
     product_image: dict | None = None
@@ -2909,6 +2934,69 @@ def copy_product_library_asset(job_id: str, req: CopyProductLibraryAssetReq) ->
     }
 
 
+def product_image_alpha(img: Image.Image) -> Image.Image:
+    rgba = img.convert("RGBA")
+    rgb = rgba.convert("RGB")
+    diff = ImageChops.difference(rgb, Image.new("RGB", rgb.size, (255, 255, 255)))
+    mask = diff.convert("L").point(lambda p: 0 if p < 18 else min(255, int(p * 2.4)))
+    mask = mask.filter(ImageFilter.GaussianBlur(0.7))
+    rgba.putalpha(mask)
+    return rgba
+
+
+@app.post("/jobs/{job_id}/product-fusion/guide")
+def create_product_fusion_guide(job_id: str, req: ProductFusionShot) -> dict:
+    if job_id not in JOBS:
+        raise HTTPException(404, "job not found")
+    person_path = storyboard_ref_path(job_id, req.person_image)
+    product_path = storyboard_ref_path(job_id, req.product_image)
+    if not person_path or not person_path.exists():
+        raise HTTPException(400, "person image required")
+    if not product_path or not product_path.exists():
+        raise HTTPException(400, "product image required")
+    if not req.product_region or req.product_region.w <= 0 or req.product_region.h <= 0:
+        raise HTTPException(400, "product region required")
+
+    region = req.product_region
+    x = max(0.0, min(1.0, float(region.x)))
+    y = max(0.0, min(1.0, float(region.y)))
+    w = max(0.02, min(1.0 - x, float(region.w)))
+    h = max(0.02, min(1.0 - y, float(region.h)))
+
+    try:
+        base = Image.open(person_path).convert("RGB")
+        base.thumbnail((1600, 1600), Image.Resampling.LANCZOS)
+        product = product_image_alpha(Image.open(product_path))
+        bw, bh = base.size
+        box = (
+            int(round(x * bw)),
+            int(round(y * bh)),
+            max(1, int(round(w * bw))),
+            max(1, int(round(h * bh))),
+        )
+        product.thumbnail((box[2], box[3]), Image.Resampling.LANCZOS)
+        px = box[0] + max(0, (box[2] - product.width) // 2)
+        py = box[1] + max(0, (box[3] - product.height) // 2)
+        guide = base.convert("RGBA")
+        guide.alpha_composite(product, (px, py))
+        out = guide.convert("RGB")
+        asset_id = uuid.uuid4().hex[:12]
+        out_dir = job_dir(job_id) / "assets"
+        out_dir.mkdir(parents=True, exist_ok=True)
+        out_path = out_dir / f"{asset_id}.jpg"
+        out.save(out_path, "JPEG", quality=94)
+    except Exception as e:
+        raise HTTPException(400, f"product fusion guide failed: {e}")
+
+    return {
+        "kind": "asset",
+        "frame_idx": -1,
+        "element_id": asset_id,
+        "cutout_id": asset_id,
+        "label": f"产品融合引导图 · {req.image_model or 'gpt-image-2'}",
+    }
+
+
 @app.get("/jobs/{job_id}/assets/{asset_id}.jpg")
 def get_storyboard_asset(job_id: str, asset_id: str):
     p = job_dir(job_id) / "assets" / f"{asset_id}.jpg"
@@ -2953,6 +3041,10 @@ def update_storyboard(job_id: str, idx: int, req: UpdateStoryboardReq) -> Job:
         if f.index == idx:
             f.storyboard = StoryboardScene(
                 duration=max(0.0, float(req.duration)),
+                first_image=req.first_image,
+                last_image=req.last_image,
+                product_images=list(req.product_images),
+                product_fusion_shots=list(req.product_fusion_shots),
                 subject_image=req.subject_image,
                 scene_image=req.scene_image,
                 product_image=req.product_image,
diff --git a/web/components/lightbox.tsx b/web/components/lightbox.tsx
index 17833e7..6640979 100644
--- a/web/components/lightbox.tsx
+++ b/web/components/lightbox.tsx
@@ -1,12 +1,12 @@
 "use client"
 import { useEffect, useRef, useState } from "react"
 import { createPortal } from "react-dom"
-import { X, ChevronLeft, ChevronRight, Check, Sparkles, Wand2, Loader2, Eye, RefreshCw, Plus, Sparkle, Crop, Copy, PencilLine, Trash2, Save } from "lucide-react"
+import { X, ChevronLeft, ChevronRight, Check, Sparkles, Wand2, Loader2, Eye, RefreshCw, Plus, Sparkle, Crop, Copy, PencilLine, Trash2, Save, Upload, Play } from "lucide-react"
 import {
   frameUrl, cleanedFrameUrl, apiAssetUrl,
   describeFrame, cleanupFrame, applyCleanedFrame, discardCleanedFrame, addElement, updateElement, deleteElement,
-  generateSceneAsset, generateSubjectAssets,
-  type AssetBackground, type AssetSize, type KeyFrame, type Job, type ImageRef, type SceneMode, type SceneStyle, type SubjectKind,
+  generateSceneAsset, generateSubjectAssets, resolveImageRefUrl, uploadStoryboardAsset, updateStoryboard,
+  type AssetBackground, type AssetSize, type KeyFrame, type Job, type ImageRef, type ProductFusionShot, type SceneMode, type SceneStyle, type SubjectKind,
 } from "@/lib/api"
 import { ProductLibraryPicker } from "@/components/product-library-picker"
 import { toast } from "sonner"
@@ -22,6 +22,7 @@ interface Props {
   onJobUpdate?: (job: Job) => void
   onSwitchPanel?: (key: string) => void
   onCopyImage?: (ref: ImageRef) => void
+  onGenerateProductFusionVideo?: (frameIdx: number, shot: ProductFusionShot) => Promise<void> | void
   embedded?: boolean
 }
 
@@ -107,7 +108,30 @@ const SCENE_REFERENCE_OPTIONS = [
   ["social media realism", "真实生活感"],
 ]
 
-export function FrameLightbox({ jobId, frames, activeIndex, selected, onClose, onChange, onToggleSelect, onJobUpdate, onSwitchPanel, onCopyImage, embedded = false }: Props) {
+const FUSION_SHOT_COUNT = 6
+const FUSION_DURATIONS = [4, 5, 6, 8, 10, 12, 15]
+
+const createFusionShots = (): ProductFusionShot[] =>
+  Array.from({ length: FUSION_SHOT_COUNT }, (_, i) => ({
+    id: `shot-${i + 1}`,
+    product_image: null,
+    person_image: null,
+    product_region: null,
+    scene_image: null,
+    action_text: "",
+    duration: 5,
+    image_model: "gpt-image-2",
+    video_model: "seedance",
+    guide_image: null,
+  }))
+
+const normalizeFusionShots = (shots?: ProductFusionShot[] | null): ProductFusionShot[] => {
+  const base = createFusionShots()
+  if (!shots?.length) return base
+  return base.map((item, i) => ({ ...item, ...(shots[i] ?? {}), id: shots[i]?.id || item.id }))
+}
+
+export function FrameLightbox({ jobId, frames, activeIndex, selected, onClose, onChange, onToggleSelect, onJobUpdate, onSwitchPanel, onCopyImage, onGenerateProductFusionVideo, embedded = false }: Props) {
   const [describing, setDescribing] = useState(false)
   const [cleaningFrameIds, setCleaningFrameIds] = useState<Set<number>>(new Set())
   const [batchCleaning, setBatchCleaning] = useState(false)
@@ -126,6 +150,13 @@ export function FrameLightbox({ jobId, frames, activeIndex, selected, onClose, o
   const [subjectBackgrounds, setSubjectBackgrounds] = useState<Record<string, AssetBackground>>({})
   const [subjectViews, setSubjectViews] = useState<Record<string, string[]>>({})
   const [activeTab, setActiveTab] = useState<LightboxTab>("clean")
+  const [fusionShots, setFusionShots] = useState<ProductFusionShot[]>(() => createFusionShots())
+  const [activeFusionShot, setActiveFusionShot] = useState(0)
+  const [fusionUploadTarget, setFusionUploadTarget] = useState<"product_image" | "person_image" | "scene_image" | null>(null)
+  const [fusionGenerating, setFusionGenerating] = useState<number | "all" | null>(null)
+  const [fusionSaving, setFusionSaving] = useState(false)
+  const [fusionDraftRegion, setFusionDraftRegion] = useState<{ x: number; y: number; w: number; h: number } | null>(null)
+  const [fusionDragStart, setFusionDragStart] = useState<{ x: number; y: number } | null>(null)
   const [editingElement, setEditingElement] = useState<{
     frameIndex: number
     id: string
@@ -141,10 +172,28 @@ export function FrameLightbox({ jobId, frames, activeIndex, selected, onClose, o
   const [draftRegion, setDraftRegion] = useState<Region | null>(null)  // 当前正在拖的
   const [dragStart, setDragStart] = useState<{ x: number; y: number } | null>(null)
   const imgWrapRef = useRef<HTMLDivElement>(null)
+  const fusionPersonWrapRef = useRef<HTMLDivElement>(null)
+  const fusionFileInputRef = useRef<HTMLInputElement | null>(null)
+  const loadedFusionKey = useRef("")
   const activeIndexRef = useRef<number | null>(activeIndex)
   useEffect(() => setMounted(true), [])
   useEffect(() => { activeIndexRef.current = activeIndex }, [activeIndex])
 
+  useEffect(() => {
+    if (activeIndex === null) {
+      loadedFusionKey.current = ""
+      setFusionShots(createFusionShots())
+      setActiveFusionShot(0)
+      return
+    }
+    const key = `${jobId}:${activeIndex}`
+    if (loadedFusionKey.current === key) return
+    const frame = frames.find((x) => x.index === activeIndex)
+    setFusionShots(normalizeFusionShots(frame?.storyboard?.product_fusion_shots as ProductFusionShot[] | undefined))
+    setActiveFusionShot(0)
+    loadedFusionKey.current = key
+  }, [activeIndex, frames, jobId])
+
   // 切换分镜时清空选区
   useEffect(() => {
     setCropMode(false)
@@ -244,6 +293,149 @@ export function FrameLightbox({ jobId, frames, activeIndex, selected, onClose, o
     sceneExtraKeywords.trim() ? `额外关键词：${sceneExtraKeywords.trim()}。` : "",
     "要求：无主体、无人物动物产品、无文字水印，保持可用于后续视频生成的干净背景板。",
   ].filter(Boolean).join("\n")
+  const currentFusionShot = fusionShots[activeFusionShot] ?? fusionShots[0]
+  const currentFusionProductUrl = currentFusionShot?.product_image ? resolveImageRefUrl(jobId, currentFusionShot.product_image) : ""
+  const currentFusionPersonUrl = currentFusionShot?.person_image ? resolveImageRefUrl(jobId, currentFusionShot.person_image) : ""
+  const currentFusionSceneUrl = currentFusionShot?.scene_image ? resolveImageRefUrl(jobId, currentFusionShot.scene_image) : ""
+  const fusionReadyCount = fusionShots.filter((shot) => shot.product_image && shot.person_image && shot.product_region && shot.scene_image && shot.action_text?.trim()).length
+
+  const persistFusionShots = async (nextShots: ProductFusionShot[]) => {
+    setFusionSaving(true)
+    try {
+      const updated = await updateStoryboard(jobId, f.index, {
+        ...(f.storyboard ?? { duration: 0 }),
+        product_fusion_shots: nextShots,
+      })
+      onJobUpdate?.(updated)
+    } catch (e) {
+      toast.error("产品融合镜头保存失败：" + (e instanceof Error ? e.message : String(e)))
+    } finally {
+      setFusionSaving(false)
+    }
+  }
+
+  const updateFusionShot = (index: number, patch: Partial<ProductFusionShot>, persist = false) => {
+    const next = fusionShots.map((shot, i) => (i === index ? { ...shot, ...patch } : shot))
+    setFusionShots(next)
+    if (persist) void persistFusionShots(next)
+  }
+
+  const assignFusionImage = (slot: "product_image" | "person_image" | "scene_image", ref: ImageRef, index = activeFusionShot) => {
+    updateFusionShot(index, { [slot]: ref, guide_image: null }, true)
+  }
+
+  const uploadFusionFiles = async (files: FileList | File[]) => {
+    if (!fusionUploadTarget) return
+    const file = Array.from(files).find((item) => item.type.startsWith("image/"))
+    if (!file) {
+      toast.error("请上传图片文件")
+      return
+    }
+    try {
+      const ref = await uploadStoryboardAsset(jobId, file)
+      assignFusionImage(fusionUploadTarget, ref)
+      toast.success("已加入当前融合镜头")
+    } catch (e) {
+      toast.error("上传失败：" + (e instanceof Error ? e.message : String(e)))
+    } finally {
+      setFusionUploadTarget(null)
+    }
+  }
+
+  const openFusionUpload = (slot: "product_image" | "person_image" | "scene_image") => {
+    setFusionUploadTarget(slot)
+    fusionFileInputRef.current?.click()
+  }
+
+  const draftFusionDescriptions = () => {
+    const actions = [
+      "人物双手拿起 SKG 颈部按摩仪，准备戴到脖子上，镜头轻微推近产品。",
+      "人物把 SKG 按摩仪贴合到肩颈位置，手部轻轻调整两侧机身角度。",
+      "人物坐在场景中轻按侧边控制区，产品保持在画框指定区域内清晰可见。",
+      "人物闭眼放松，肩颈从紧绷变舒展，产品佩戴位置稳定不漂移。",
+      "镜头靠近展示 SKG 产品材质、按键和内侧触点，手部不要遮挡产品主体。",
+      "使用后的放松状态收尾，人物自然抬头，产品仍保持白色 U 形外观和真实比例。",
+    ]
+    const next = fusionShots.map((shot, i) => ({
+      ...shot,
+      action_text: shot.action_text?.trim() || actions[i],
+    }))
+    setFusionShots(next)
+    void persistFusionShots(next)
+    toast.success("已生成 6 条动作描述草稿，可继续手工修改")
+  }
+
+  const fusionPointerPosition = (ev: React.MouseEvent<HTMLDivElement>) => {
+    const rect = fusionPersonWrapRef.current?.getBoundingClientRect()
+    if (!rect || rect.width <= 0 || rect.height <= 0) return null
+    return {
+      x: Math.max(0, Math.min(1, (ev.clientX - rect.left) / rect.width)),
+      y: Math.max(0, Math.min(1, (ev.clientY - rect.top) / rect.height)),
+    }
+  }
+
+  const onFusionRegionDown = (ev: React.MouseEvent<HTMLDivElement>) => {
+    if (activeTab !== "product" || !currentFusionPersonUrl) return
+    ev.preventDefault()
+    const p = fusionPointerPosition(ev)
+    if (!p) return
+    setFusionDragStart(p)
+    setFusionDraftRegion({ x: p.x, y: p.y, w: 0, h: 0 })
+  }
+
+  const onFusionRegionMove = (ev: React.MouseEvent<HTMLDivElement>) => {
+    if (!fusionDragStart) return
+    const p = fusionPointerPosition(ev)
+    if (!p) return
+    setFusionDraftRegion({
+      x: Math.min(fusionDragStart.x, p.x),
+      y: Math.min(fusionDragStart.y, p.y),
+      w: Math.abs(p.x - fusionDragStart.x),
+      h: Math.abs(p.y - fusionDragStart.y),
+    })
+  }
+
+  const onFusionRegionUp = () => {
+    if (!fusionDraftRegion || !fusionDragStart) return
+    const region = fusionDraftRegion.w >= 0.02 && fusionDraftRegion.h >= 0.02 ? fusionDraftRegion : null
+    if (region) updateFusionShot(activeFusionShot, { product_region: region, guide_image: null }, true)
+    setFusionDraftRegion(null)
+    setFusionDragStart(null)
+  }
+
+  const runFusionVideo = async (index: number) => {
+    const shot = fusionShots[index]
+    if (!shot?.product_image || !shot.person_image || !shot.scene_image || !shot.product_region || !shot.action_text?.trim()) {
+      toast.error(`镜头 ${index + 1} 还缺产品图、人物图、区域、场景图或描述词`)
+      return
+    }
+    setFusionGenerating(index)
+    try {
+      await onGenerateProductFusionVideo?.(f.index, shot)
+    } finally {
+      setFusionGenerating(null)
+    }
+  }
+
+  const runAllFusionVideos = async () => {
+    const indexes = fusionShots
+      .map((shot, i) => ({ shot, i }))
+      .filter(({ shot }) => shot.product_image && shot.person_image && shot.scene_image && shot.product_region && shot.action_text?.trim())
+      .map(({ i }) => i)
+    if (indexes.length === 0) {
+      toast.error("还没有完整的融合镜头")
+      return
+    }
+    setFusionGenerating("all")
+    try {
+      for (const index of indexes) {
+        await onGenerateProductFusionVideo?.(f.index, fusionShots[index])
+      }
+      toast.success(`已提交 ${indexes.length} 条产品融合视频队列`)
+    } finally {
+      setFusionGenerating(null)
+    }
+  }
 
   const handleDescribe = async () => {
     setDescribing(true)
@@ -705,13 +897,124 @@ export function FrameLightbox({ jobId, frames, activeIndex, selected, onClose, o
               </div>
             </section>
           ) : isProductTab ? (
-            <ProductLibraryPicker
-              jobId={jobId}
-              buttonLabel="复制"
-              title="产品融合 · SKG 白底图库"
-              disabled={!onCopyImage}
-              onPick={(ref) => onCopyImage?.(ref)}
-            />
+            <section
+              className="rounded-lg border border-amber-300/15 bg-amber-500/[0.06] p-2.5"
+              onPaste={(e) => {
+                if (fusionUploadTarget && e.clipboardData.files?.length) void uploadFusionFiles(e.clipboardData.files)
+              }}
+            >
+              <input
+                ref={fusionFileInputRef}
+                type="file"
+                accept="image/*"
+                className="hidden"
+                onChange={(e) => {
+                  const files = e.target.files
+                  if (files) void uploadFusionFiles(files)
+                  e.currentTarget.value = ""
+                }}
+              />
+              <div className="mb-2 flex items-center justify-between gap-2">
+                <div className="text-[12px] font-semibold text-white">产品融合镜头组</div>
+                <span className="rounded bg-black/35 px-1.5 py-0.5 text-[9.5px] font-mono text-white/55">
+                  {fusionReadyCount}/6 可生成
+                </span>
+              </div>
+              <div className="mb-2 rounded-md border border-white/10 bg-black/25 px-2 py-1.5 text-[10px] leading-relaxed text-white/50">
+                每行一条镜头：产品图、白底人物图、人物图上的产品区域、场景图和描述词一一对应。
+              </div>
+              <div className="mb-3 space-y-1.5">
+                {fusionShots.map((shot, i) => {
+                  const active = i === activeFusionShot
+                  const ready = !!(shot.product_image && shot.person_image && shot.product_region && shot.scene_image && shot.action_text?.trim())
+                  return (
+                    <button
+                      key={shot.id}
+                      type="button"
+                      onClick={() => setActiveFusionShot(i)}
+                      className={`grid w-full grid-cols-[40px_1fr_1fr_54px] items-center gap-1.5 rounded-md border px-2 py-1.5 text-left transition ${
+                        active
+                          ? "border-amber-300/70 bg-amber-500/18 text-white"
+                          : "border-white/10 bg-black/22 text-white/55 hover:border-amber-300/35 hover:text-white"
+                      }`}
+                    >
+                      <span className="font-mono text-[10px]">#{i + 1}</span>
+                      <span className="truncate text-[10px]">{shot.product_image?.label || "产品图空"}</span>
+                      <span className="truncate text-[10px]">{shot.scene_image?.label || "场景图空"}</span>
+                      <span className={`rounded px-1 py-0.5 text-center text-[9px] ${ready ? "bg-emerald-400/80 text-black" : "bg-white/10 text-white/45"}`}>
+                        {ready ? "就绪" : "待补"}
+                      </span>
+                    </button>
+                  )
+                })}
+              </div>
+              <div className="grid grid-cols-3 gap-2">
+                {([
+                  ["product_image", "产品图", currentFusionProductUrl],
+                  ["person_image", "白底人物图", currentFusionPersonUrl],
+                  ["scene_image", "场景图", currentFusionSceneUrl],
+                ] as const).map(([slot, label, url]) => (
+                  <div key={slot} className="overflow-hidden rounded-md border border-white/10 bg-black/25">
+                    <div className="relative bg-white" style={{ aspectRatio: "1/1" }}>
+                      {url ? (
+                        <img src={url} alt={label} className="absolute inset-0 h-full w-full object-contain" />
+                      ) : (
+                        <button
+                          type="button"
+                          onClick={() => openFusionUpload(slot)}
+                          className="absolute inset-0 flex flex-col items-center justify-center gap-1 text-[10px] text-black/35 hover:text-black/65"
+                        >
+                          <Upload className="h-4 w-4" />
+                          上传/粘贴
+                        </button>
+                      )}
+                    </div>
+                    <div className="flex items-center justify-between gap-1 border-t border-white/10 px-1.5 py-1">
+                      <span className="truncate text-[9.5px] text-white/55">{label}</span>
+                      <button
+                        type="button"
+                        onClick={() => openFusionUpload(slot)}
+                        className="rounded bg-white/10 px-1.5 py-0.5 text-[9px] text-white/70 hover:bg-white/18 hover:text-white"
+                      >
+                        上传
+                      </button>
+                    </div>
+                  </div>
+                ))}
+              </div>
+              <div className="mt-3 rounded-lg border border-white/10 bg-black/30 p-2">
+                <div className="mb-2 flex items-center justify-between gap-2">
+                  <div className="text-[11px] font-semibold text-white">产品区域 · 在人物图上画框</div>
+                  <span className="text-[9px] text-white/35">产品只在框内融合</span>
+                </div>
+                <div
+                  ref={fusionPersonWrapRef}
+                  onMouseDown={onFusionRegionDown}
+                  onMouseMove={onFusionRegionMove}
+                  onMouseUp={onFusionRegionUp}
+                  onMouseLeave={onFusionRegionUp}
+                  className={`relative overflow-hidden rounded-md border border-white/10 bg-white ${currentFusionPersonUrl ? "cursor-crosshair" : ""}`}
+                >
+                  {currentFusionPersonUrl ? (
+                    <img src={currentFusionPersonUrl} alt="fusion person" className="block w-full select-none object-contain" draggable={false} />
+                  ) : (
+                    <div className="flex h-64 items-center justify-center text-[11px] text-black/35">先上传白底人物图</div>
+                  )}
+                  {[currentFusionShot?.product_region, fusionDraftRegion].filter(Boolean).map((region, i) => region && (
+                    <div
+                      key={i}
+                      className={`absolute pointer-events-none border-2 ${i === 0 ? "border-amber-300 bg-amber-300/10" : "border-dashed border-cyan-300"}`}
+                      style={{
+                        left: `${region.x * 100}%`,
+                        top: `${region.y * 100}%`,
+                        width: `${region.w * 100}%`,
+                        height: `${region.h * 100}%`,
+                      }}
+                    />
+                  ))}
+                </div>
+              </div>
+            </section>
           ) : (
             <div
               ref={imgWrapRef}
diff --git a/web/lib/api.ts b/web/lib/api.ts
index b4e9857..e78f13f 100644
--- a/web/lib/api.ts
+++ b/web/lib/api.ts
@@ -57,11 +57,32 @@ export interface ImageRef {
   label?: string
 }
 
+export interface ProductFusionRegion {
+  x: number
+  y: number
+  w: number
+  h: number
+}
+
+export interface ProductFusionShot {
+  id: string
+  product_image?: ImageRef | null
+  person_image?: ImageRef | null
+  product_region?: ProductFusionRegion | null
+  scene_image?: ImageRef | null
+  action_text?: string
+  duration?: number
+  image_model?: "gpt-image-2"
+  video_model?: "seedance"
+  guide_image?: ImageRef | null
+}
+
 export interface StoryboardScene {
   duration: number
   first_image?: ImageRef | null
   last_image?: ImageRef | null
   product_images?: ImageRef[]
+  product_fusion_shots?: ProductFusionShot[]
   subject_image?: ImageRef | null
   scene_image?: ImageRef | null
   product_image?: ImageRef | null
@@ -140,6 +161,22 @@ export async function copyProductLibraryAsset(jobId: string, productId: string):
   return res.json()
 }
 
+export async function createProductFusionGuide(
+  jobId: string,
+  body: ProductFusionShot,
+): Promise<ImageRef> {
+  const res = await fetch(`${API_BASE}/jobs/${jobId}/product-fusion/guide`, {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify(body),
+  })
+  if (!res.ok) {
+    const txt = await res.text().catch(() => "")
+    throw new Error(`createProductFusionGuide ${res.status} ${txt.slice(0, 300)}`)
+  }
+  return res.json()
+}
+
 export interface KeyFrame {
   index: number
   timestamp: number