auto-save 2026-05-13 14:43 (~6)

2026-05-13 14:44:00 +08:00
parent 9421836a6d
commit 59f6c16225
6 changed files with 106 additions and 69 deletions
--- a/.memory/worklog.json
+++ b/.memory/worklog.json
@@ -1695,6 +1695,13 @@
      "type": "session-heartbeat",
      "message": "Claude 会话活跃 · 最近命令：claude · 4 项未提交变更 · 最近提交：auto-save 2026-05-13 14:32 (~3)",
      "files_changed": 4
+    },
+    {
+      "ts": "2026-05-13T14:38:26+08:00",
+      "type": "commit",
+      "message": "auto-save 2026-05-13 14:38 (~4)",
+      "hash": "9421836",
+      "files_changed": 4
    }
  ]
 }
--- a/api/main.py
+++ b/api/main.py
@@ -1267,10 +1267,12 @@ def delete_element(job_id: str, idx: int, element_id: str) -> Job:

@app.post("/jobs/{job_id}/frames/{idx}/elements/{element_id}/cutout", response_model=Job)
 def cutout_element(job_id: str, idx: int, element_id: str) -> Job:
-    """提取元素 · 每次调用累积一张新图（不覆盖之前的）：
-    - 有 region → PIL crop（瞬时 · 保留原表情/形体）
-    - 无 region → 调 nano-banana 模型生成白底图（5-15s）"""
+    """AI 提取元素 · 每次累积一张新图：
+    调 nano-banana 模型生成**完整、清晰**的元素图（即使原图只露出部分也补全）。
+    region 元素：先把 region + 30% padding 区域裁出作为 focus，再发给模型聚焦补全。"""
    from PIL import Image as _PILImage
+    import io as _io
+    import tempfile as _tempfile
    job = JOBS.get(job_id)
    if not job:
        raise HTTPException(404, "job not found")
@@ -1288,10 +1290,12 @@ def cutout_element(job_id: str, idx: int, element_id: str) -> Job:

    out_dir = job_dir(job_id) / "elements"
    out_dir.mkdir(parents=True, exist_ok=True)
-    # 新建一个 cutout_id append 到 element.cutouts（而非覆盖）
    new_cutout_id = uuid.uuid4().hex[:8]
    out_path = out_dir / f"{idx:03d}_{element_id}_{new_cutout_id}.jpg"

+    # region 元素：先 PIL 裁出 region + 30% padding 作为 focus 给模型（让它聚焦在该元素）
+    tmp_focus: Path | None = None
+    model_src = src
    if el.region:
        try:
            im = _PILImage.open(src).convert("RGB")
@@ -1301,31 +1305,46 @@ def cutout_element(job_id: str, idx: int, element_id: str) -> Job:
            y = max(0.0, min(1.0, float(r.get("y", 0))))
            w = max(0.0, min(1.0 - x, float(r.get("w", 0))))
            h = max(0.0, min(1.0 - y, float(r.get("h", 0))))
-            left, top = int(x * W), int(y * H)
-            right, bottom = int((x + w) * W), int((y + h) * H)
-            if right - left < 4 or bottom - top < 4:
-                raise HTTPException(400, "region 太小，无法提取")
-            cropped = im.crop((left, top, right, bottom))
-            cropped.save(out_path, format="JPEG", quality=92)
-        except HTTPException:
-            raise
+            cx, cy = x + w / 2, y + h / 2
+            # 扩大 30% 给上下文（避免裁到正好边界丢失补全 hint）
+            ew, eh = w * 1.6, h * 1.6
+            x0 = max(0.0, cx - ew / 2); y0 = max(0.0, cy - eh / 2)
+            x1 = min(1.0, cx + ew / 2); y1 = min(1.0, cy + eh / 2)
+            left, top, right, bottom = int(x0 * W), int(y0 * H), int(x1 * W), int(y1 * H)
+            if right - left > 8 and bottom - top > 8:
+                cropped = im.crop((left, top, right, bottom))
+                tmp = _tempfile.NamedTemporaryFile(suffix=".jpg", delete=False)
+                cropped.save(tmp.name, format="JPEG", quality=92)
+                tmp.close()
+                tmp_focus = Path(tmp.name)
+                model_src = tmp_focus
        except Exception as e:
-            raise HTTPException(500, f"extract failed: {e}")
-    else:
-        target = (el.name_en or el.name_zh).strip()
-        position_hint = f" Located in the {el.position} area." if el.position else ""
-        prompt = (
-            f"Extract the {target} from this image as a standalone asset.{position_hint} "
-            "Place it on a pure white background, isolated, no other objects."
-        )
-        models = [IMAGE_MODEL, "gemini-2.5-flash-image"]
+            print(f"[cutout region crop failed, fallback to full frame] {e}", flush=True)
+
+    target = (el.name_en or el.name_zh).strip()
+    prompt = (
+        f"Identify the {target} in this image. "
+        f"Generate a complete, high-resolution, sharply detailed image of the entire {target} as a standalone asset. "
+        f"If the {target} is only partially visible in the source (cropped at edges, occluded by other objects, or out of frame), "
+        "intelligently reconstruct the missing parts based on visual context so the result shows the FULL element. "
+        "Place the complete element on a pure white background, isolated, with no other objects, no scene fragments, no shadows from the original scene. "
+        "Preserve the element's original color palette, style, lighting character, and proportions. "
+        "Output must be a clean, high-quality asset image suitable for downstream composition."
+    )
+    models = [IMAGE_MODEL, "gemini-2.5-flash-image"]
+    img_bytes: bytes
+    try:
        try:
            img_bytes, _mode = _image_edit_call(
-                src, prompt, models=models, fallback_text=False, max_attempts=3,
+                model_src, prompt, models=models, fallback_text=False, max_attempts=3,
            )
        except RuntimeError as e:
            raise HTTPException(500, f"extract failed: {e}")
-        out_path.write_bytes(img_bytes)
+    finally:
+        if tmp_focus and tmp_focus.exists():
+            try: tmp_focus.unlink()
+            except OSError: pass
+    out_path.write_bytes(img_bytes)

    new_frames = []
    for f in job.frames:
@@ -1333,12 +1352,10 @@ def cutout_element(job_id: str, idx: int, element_id: str) -> Job:
            for e in f.elements:
                if e.id == element_id:
                    e.cutouts = (e.cutouts or []) + [new_cutout_id]
-                    # 兼容：若旧字段 cutout_id 未设置，记一下让旧 UI 仍能读到一张
                    if not e.cutout_id:
                        e.cutout_id = new_cutout_id
        new_frames.append(f)
-    msg_label = "提取（PIL）" if el.region else "提取（模型）"
-    update(job, frames=new_frames, message=f"{msg_label}完成 · {el.name_zh}")
+    update(job, frames=new_frames, message=f"提取完成 · {el.name_zh}")
    return job


--- a/web/components/lightbox.tsx
+++ b/web/components/lightbox.tsx
@@ -648,23 +648,15 @@ export function FrameLightbox({ jobId, frames, activeIndex, selected, onClose, o
                          </div>
                        </div>

-                        {/* 提取按钮（每次新增一张，不覆盖） */}
+                        {/* 提取按钮 — AI 补全完整元素（每次累积一张） */}
                        <button
                          onClick={() => handleCutout(e.id)}
                          disabled={isCutting}
-                          title={
-                            hasRegion
-                              ? `${hasAny ? "再提取一张" : "提取"}（从原图裁切框内 · 保留原表情形体 · 瞬时）`
-                              : `${hasAny ? "再提取一张" : "提取"}（调 nano-banana 模型生白底图 · 5-15s）`
-                          }
-                          className={`shrink-0 text-[10.5px] px-2 py-1 rounded inline-flex items-center gap-1 transition disabled:opacity-50 disabled:cursor-not-allowed font-medium ${
-                            hasRegion
-                              ? "bg-cyan-500/30 text-white/90 hover:bg-cyan-500/50"
-                              : "bg-violet-500/30 text-white/90 hover:bg-violet-500/50"
-                          }`}
+                          title={`${hasAny ? "再提取一张" : "AI 提取"} · 模型识别 + 补全缺失部分（如缺手脚 / 半个台灯）→ 完整清晰白底图（5-15s）`}
+                          className="shrink-0 text-[10.5px] px-2 py-1 rounded inline-flex items-center gap-1 transition disabled:opacity-50 disabled:cursor-not-allowed font-medium bg-violet-500/30 text-white/90 hover:bg-violet-500/50"
                        >
                          {isCutting ? <Loader2 className="h-3 w-3 animate-spin" /> : <Sparkle className="h-3 w-3" />}
-                          {isCutting ? "提取中" : hasAny ? "再提取" : "提取"}
+                          {isCutting ? "提取中…" : hasAny ? "再提取" : "AI 提取"}
                        </button>

                        {/* 删除整条元素 */}
--- a/web/components/nodes/index.tsx
+++ b/web/components/nodes/index.tsx
@@ -7,7 +7,7 @@ import {
  Mic, Languages, FileEdit, Sparkles, Film, FileVideo, Loader2, Plus, X, LayoutGrid,
 } from "lucide-react"
 import { NodeShell, type NodeStatus, type NodeKind } from "./node-shell"
-import { type Job, frameUrl, effectiveFrameUrl, videoUrl, generatedImageUrl, cutoutUrl } from "@/lib/api"
+import { type Job, frameUrl, effectiveFrameUrl, videoUrl, generatedImageUrl, cutoutUrl, hasCutout, representativeCutoutUrl } from "@/lib/api"

 export interface NodeData {
  job: Job | null            // 当前 active job
@@ -389,13 +389,13 @@ export function KeyframeNode({ data, selected }: any) {
                {isSel && (
                  <div className="absolute inset-0 bg-emerald-400/15 rounded-md pointer-events-none" />
                )}
-                {(f.cleaned_url || (f.elements?.some((e) => e.cutout_id))) && (
+                {(f.cleaned_url || (f.elements?.some((e) => hasCutout(e)))) && (
                  <div className="absolute top-0 left-0 flex items-center gap-0.5 px-1 py-0.5 rounded-br-md leading-none">
                    {f.cleaned_url && (
                      <span title="已清洗" className="bg-cyan-500/85 text-white text-[8px] font-bold px-1 py-0.5 rounded-sm">✨</span>
                    )}
                    {(() => {
-                      const cutN = f.elements?.filter((e) => e.cutout_id).length ?? 0
+                      const cutN = f.elements?.filter((e) => hasCutout(e)).length ?? 0
                      return cutN > 0 ? (
                        <span title={`${cutN} 个元素已抠图`} className="bg-violet-500/85 text-white text-[8px] font-mono font-bold px-1 py-0.5 rounded-sm">
                          {cutN}
@@ -440,7 +440,7 @@ export function KeyframeNode({ data, selected }: any) {
        {frames.length > 0 ? (() => {
          const cleanedCount = frames.filter((x) => x.cleaned_url).length
          const elementsCount = frames.reduce((s, x) => s + (x.elements?.length ?? 0), 0)
-          const cutoutCount = frames.reduce((s, x) => s + (x.elements?.filter((e) => e.cutout_id).length ?? 0), 0)
+          const cutoutCount = frames.reduce((s, x) => s + (x.elements?.filter((e) => hasCutout(e)).length ?? 0), 0)
          return (
            <div className="text-[11.5px] leading-relaxed text-[var(--text-soft)]">
              自动 <span className="text-[var(--text-strong)] font-medium">{frames.length}</span> 张
@@ -606,13 +606,17 @@ export function ImageGenNode({ data, selected }: any) {
  const [mounted, setMounted] = useState(false)
  useEffect(() => setMounted(true), [])

-  // 上方浮条 = 所有 frame 的 elements crop（"分镜头编排"的输入素材）
-  type ElPreview = { frameIdx: number; elementId: string; name: string }
+  // 上方浮条 = 所有 frame 的 elements 已提取图（"分镜头编排"的输入素材）
+  type ElPreview = { frameIdx: number; elementId: string; name: string; src: string }
  const elementCrops: ElPreview[] = job
    ? job.frames.flatMap((f) =>
        (f.elements ?? [])
-          .filter((e) => !!e.cutout_id)
-          .map((e) => ({ frameIdx: f.index, elementId: e.id, name: e.name_zh })),
+          .filter((e) => hasCutout(e))
+          .map((e) => {
+            const src = representativeCutoutUrl(job.id, f.index, e) || ""
+            return { frameIdx: f.index, elementId: e.id, name: e.name_zh, src }
+          })
+          .filter((p) => p.src),
      )
    : []

@@ -644,7 +648,7 @@ export function ImageGenNode({ data, selected }: any) {
                  className="absolute inset-0 w-full h-full"
                >
                  <img
-                    src={cutoutUrl(job.id, p.frameIdx, p.elementId)}
+                    src={p.src}
                    alt={p.name}
                    className="absolute inset-0 w-full h-full object-contain"
                  />
@@ -704,7 +708,7 @@ export function ImageGenNode({ data, selected }: any) {
          >
            <div className="rounded-2xl overflow-hidden border border-white/25 bg-black" style={{ boxShadow: "0 30px 80px -10px rgba(0,0,0,0.85), 0 0 0 1px rgba(255,255,255,0.06)" }}>
              <img
-                src={cutoutUrl(job.id, p.frameIdx, p.elementId)}
+                src={p.src}
                alt={`preview ${p.elementId}`}
                className="block"
                style={{ width: w, height: h, objectFit: "contain" }}
--- a/web/components/storyboard-bar.tsx
+++ b/web/components/storyboard-bar.tsx
@@ -2,7 +2,7 @@
 import { useEffect, useRef, useState } from "react"
 import { createPortal } from "react-dom"
 import { LayoutGrid, ChevronDown, ChevronUp, Sparkle, X, Wand2, Brush } from "lucide-react"
-import { type Job, type KeyFrame, effectiveFrameUrl, cutoutUrl } from "@/lib/api"
+import { type Job, type KeyFrame, effectiveFrameUrl, hasCutout, representativeCutoutUrl } from "@/lib/api"

 interface Props {
  job: Job | null
@@ -27,7 +27,7 @@ export function StoryboardBar({ job, selectedFrames, focusedFrame, onFocusFrame

  const aspect = job.height > 0 ? `${job.width}/${job.height}` : "9/16"
  const totalElements = frames.reduce(
-    (sum, f) => sum + (f.elements?.filter((e) => e.cutout_id).length ?? 0),
+    (sum, f) => sum + (f.elements?.filter((e) => hasCutout(e)).length ?? 0),
    0,
  )

@@ -39,7 +39,7 @@ export function StoryboardBar({ job, selectedFrames, focusedFrame, onFocusFrame
    ? job.frames.filter((f) => selectedFrames.has(f.index) && f.timestamp <= focusFrame.timestamp).length
    : 0
  const focusElements = focusFrame?.elements ?? []
-  const focusCutCount = focusElements.filter((e) => e.cutout_id).length
+  const focusCutCount = focusElements.filter((e) => hasCutout(e)).length

  return (
    <div className="relative z-20 flex-shrink-0 border-b border-white/5 bg-black/30 backdrop-blur-xl">
@@ -92,7 +92,7 @@ export function StoryboardBar({ job, selectedFrames, focusedFrame, onFocusFrame
        ) : (
          <div className="px-4 pb-3 flex gap-2 overflow-x-auto">
            {frames.map((f, i) => {
-              const elementCount = f.elements?.filter((e) => e.cutout_id).length ?? 0
+              const elementCount = f.elements?.filter((e) => hasCutout(e)).length ?? 0
              const totalElCount = f.elements?.length ?? 0
              const cleaned = f.cleaned_applied
              const isFocused = focusedFrame === f.index
@@ -177,24 +177,23 @@ export function StoryboardBar({ job, selectedFrames, focusedFrame, onFocusFrame
                  </div>
                ) : (
                  <div className="grid grid-cols-5 gap-1.5">
-                    {focusElements.map((e) => (
-                      <div key={e.id} className="rounded-md bg-white/[0.04] border border-white/10 p-1.5">
-                        <div className="w-full aspect-square rounded bg-black/40 overflow-hidden mb-1">
-                          {e.cutout_id ? (
-                            <img
-                              src={cutoutUrl(job.id, focusFrame.index, e.id)}
-                              alt={e.name_zh}
-                              className="w-full h-full object-contain"
-                            />
-                          ) : (
-                            <div className="w-full h-full inline-flex items-center justify-center">
-                              <Sparkle className="h-3.5 w-3.5 text-white/20" />
-                            </div>
-                          )}
+                    {focusElements.map((e) => {
+                      const src = representativeCutoutUrl(job.id, focusFrame.index, e)
+                      return (
+                        <div key={e.id} className="rounded-md bg-white/[0.04] border border-white/10 p-1.5">
+                          <div className="w-full aspect-square rounded bg-white overflow-hidden mb-1">
+                            {src ? (
+                              <img src={src} alt={e.name_zh} className="w-full h-full object-contain" />
+                            ) : (
+                              <div className="w-full h-full inline-flex items-center justify-center bg-black/40">
+                                <Sparkle className="h-3.5 w-3.5 text-white/20" />
+                              </div>
+                            )}
+                          </div>
+                          <div className="text-[10px] text-white truncate">{e.name_zh}</div>
                        </div>
-                        <div className="text-[10px] text-white truncate">{e.name_zh}</div>
-                      </div>
-                    ))}
+                      )
+                    })}
                  </div>
                )}
              </section>
--- a/web/lib/api.ts
+++ b/web/lib/api.ts
@@ -220,6 +220,24 @@ export function cutoutUrl(jobId: string, frameIndex: number, elementId: string,
  return `${API_BASE}/jobs/${jobId}/frames/${frameIndex}/elements/${elementId}/cutout.jpg`
 }

+// 兼容 v1 (cutout_id) / v2 (cutouts 数组) — 返回"有没有提取图"
+export function hasCutout(e: KeyElement): boolean {
+  return (Array.isArray(e.cutouts) && e.cutouts.length > 0) || !!e.cutout_id
+}
+
+// 返回代表性 cutout 的 URL（v2 取最新一张，v1 用旧路径）
+export function representativeCutoutUrl(
+  jobId: string,
+  frameIndex: number,
+  e: KeyElement,
+): string | null {
+  if (Array.isArray(e.cutouts) && e.cutouts.length > 0) {
+    return cutoutUrl(jobId, frameIndex, e.id, e.cutouts[e.cutouts.length - 1])
+  }
+  if (e.cutout_id) return cutoutUrl(jobId, frameIndex, e.id)
+  return null
+}
+
 export async function deleteCutout(jobId: string, frameIdx: number, elementId: string, cutoutId: string): Promise<Job> {
  const res = await fetch(`${API_BASE}/jobs/${jobId}/frames/${frameIdx}/elements/${elementId}/cutouts/${cutoutId}`, {
    method: "DELETE",