auto-save 2026-05-13 11:06 (~4)

2026-05-13 11:06:41 +08:00
parent 08d7cb470c
commit eb0f935bfe
4 changed files with 164 additions and 48 deletions
--- a/.memory/worklog.json
+++ b/.memory/worklog.json
@@ -1290,6 +1290,13 @@
      "type": "session-heartbeat",
      "message": "Claude 会话活跃 · 最近命令：claude · 2 项未提交变更 · 最近提交：auto-save 2026-05-13 10:55 (~4)",
      "files_changed": 2
+    },
+    {
+      "ts": "2026-05-13T11:01:06+08:00",
+      "type": "commit",
+      "message": "auto-save 2026-05-13 11:00 (~2)",
+      "hash": "08d7cb4",
+      "files_changed": 2
    }
  ]
 }
--- a/api/main.py
+++ b/api/main.py
@@ -69,7 +69,8 @@ class KeyElement(BaseModel):
    name_zh: str
    name_en: str = ""
    position: str = ""  # 在画面中的位置描述（vision 给的）
-    source: Literal["auto", "manual"] = "manual"  # auto=vision 识别 / manual=用户加
+    source: Literal["auto", "manual", "region"] = "manual"  # auto=vision / manual=用户加 / region=画框
+    region: dict | None = None  # 用户画框的相对坐标 {x,y,w,h}（用于精准抠图）
    cutout_id: str | None = None  # 已抠图 → /jobs/{id}/frames/{idx}/elements/{element_id}/cutout.png
    created_at: float = 0.0

@@ -476,17 +477,33 @@ def _image_edit_call(
    model: str | None = None,
    fallback_text: bool = False,
    max_attempts: int = 3,
+    max_side: int = 1024,
 ) -> tuple[bytes, str]:
    """通用 image edit 调用 · 失败重试 + 可选 text fallback。
    返回 (image_bytes, effective_mode) where effective_mode in {"edit","text"}。
-    失败 raise RuntimeError。"""
+    失败 raise RuntimeError。
+    输入图自动 resize 到 max_side（默认 1024）边长后再 base64，避免大图把 Gemini
+    function call 输入挤超阈值导致 incomplete_generation。"""
    import base64 as b64lib
+    import io as _io
    import time as _time
    import httpx
+    from PIL import Image as _PILImage
    if not LLM_API_KEY:
        raise RuntimeError("LLM_API_KEY 未配置")
    model = model or IMAGE_MODEL
-    img_b64 = b64lib.b64encode(image_path.read_bytes()).decode("ascii")
+    # 缩到 max_side 内
+    try:
+        im = _PILImage.open(image_path)
+        if max(im.size) > max_side:
+            im.thumbnail((max_side, max_side), _PILImage.LANCZOS)
+        buf = _io.BytesIO()
+        im.convert("RGB").save(buf, format="JPEG", quality=88)
+        img_bytes_in = buf.getvalue()
+    except Exception:
+        # PIL 失败兜底走原文件
+        img_bytes_in = image_path.read_bytes()
+    img_b64 = b64lib.b64encode(img_bytes_in).decode("ascii")
    data_uri = f"data:image/jpeg;base64,{img_b64}"

    plan: list[str] = ["edit"] * max_attempts
@@ -985,7 +1002,7 @@ class CleanupReq(BaseModel):


 def _region_to_phrase(r: dict) -> str:
-    """把相对坐标矩形转成方位描述给 prompt 用"""
+    """把相对坐标矩形转成简短方位描述给 prompt 用（避免百分号 / 括号触发模型异常）"""
    x = max(0.0, min(1.0, float(r.get("x", 0))))
    y = max(0.0, min(1.0, float(r.get("y", 0))))
    w = max(0.0, min(1.0 - x, float(r.get("w", 0))))
@@ -993,15 +1010,15 @@ def _region_to_phrase(r: dict) -> str:
    if w <= 0 or h <= 0:
        return ""
    cx, cy = x + w / 2, y + h / 2
-    hpos = "left" if cx < 0.4 else "right" if cx > 0.6 else "center"
+    hpos = "left" if cx < 0.4 else "right" if cx > 0.6 else "middle"
    vpos = "top" if cy < 0.4 else "bottom" if cy > 0.6 else "middle"
-    quadrant = f"{vpos}-{hpos}" if hpos != "center" else vpos
-    x_pct = (int(x * 100), int((x + w) * 100))
-    y_pct = (int(y * 100), int((y + h) * 100))
-    return (
-        f"the {quadrant} area of the image "
-        f"(roughly horizontal {x_pct[0]}%-{x_pct[1]}%, vertical {y_pct[0]}%-{y_pct[1]}%)"
-    )
+    if hpos == "middle" and vpos == "middle":
+        return "center"
+    if hpos == "middle":
+        return vpos
+    if vpos == "middle":
+        return hpos
+    return f"{vpos} {hpos}"


@app.post("/jobs/{job_id}/frames/{idx}/cleanup", response_model=Job)
@@ -1023,14 +1040,11 @@ def cleanup_frame(job_id: str, idx: int, req: CleanupReq | None = None) -> Job:
    region_phrase = _region_to_phrase(req.region) if (req and req.region) else ""
    if region_phrase:
        prompt = (
-            f"Remove text overlays only within {region_phrase}: watermarks, usernames, captions, hashtags, "
-            "platform logos. Keep every other part of the image exactly unchanged."
+            f"Erase the text and graphics in the {region_phrase} part of the image. "
+            "Keep all other parts unchanged."
        )
    else:
-        prompt = (
-            "Remove all text overlays from this image: watermarks, usernames, captions, hashtags, "
-            "platform logos. Keep the rest of the scene intact and natural."
-        )
+        prompt = "Erase all watermarks and text overlays. Keep the scene natural."
    try:
        img_bytes, _mode = _image_edit_call(frame_path, prompt, fallback_text=False, max_attempts=3)
    except RuntimeError as e:
@@ -1105,7 +1119,8 @@ class AddElementReq(BaseModel):
    name_zh: str
    name_en: str = ""
    position: str = ""
-    source: Literal["auto", "manual"] = "manual"
+    source: Literal["auto", "manual", "region"] = "manual"
+    region: dict | None = None


@app.post("/jobs/{job_id}/frames/{idx}/elements", response_model=Job)
@@ -1152,6 +1167,7 @@ def add_element(job_id: str, idx: int, req: AddElementReq) -> Job:
        name_en=name_en,
        position=req.position.strip(),
        source=req.source,
+        region=req.region,
        created_at=_time.time(),
    )
    new_frames = []
@@ -1211,13 +1227,18 @@ def cutout_element(job_id: str, idx: int, element_id: str) -> Job:
        raise HTTPException(404, "source frame file missing")

    target = (el.name_en or el.name_zh).strip()
-    position_hint = f" Located {el.position}." if el.position else ""
-    prompt = (
-        f"Extract the element '{target}' from this image as a standalone asset.{position_hint} "
-        "Output: the element on a fully transparent background (alpha channel), "
-        "isolated cleanly with no surrounding scene, no other objects, no shadows from the original scene. "
-        "Preserve the element's original colors, lighting, shape and proportions."
-    )
+    region_phrase = _region_to_phrase(el.region) if el.region else ""
+    if region_phrase:
+        prompt = (
+            f"Extract whatever is in the {region_phrase} part of the image as a standalone asset. "
+            "Output on transparent background, isolated, no other objects."
+        )
+    else:
+        position_hint = f" Located in the {el.position} area." if el.position else ""
+        prompt = (
+            f"Extract the {target} from this image as a standalone asset.{position_hint} "
+            "Output on transparent background, isolated, no other objects."
+        )
    try:
        img_bytes, _mode = _image_edit_call(src, prompt, fallback_text=False, max_attempts=3)
    except RuntimeError as e:
--- a/web/components/lightbox.tsx
+++ b/web/components/lightbox.tsx
@@ -34,6 +34,9 @@ export function FrameLightbox({ jobId, frames, activeIndex, selected, onClose, o
  const [cropMode, setCropMode] = useState(false)
  const [region, setRegion] = useState<{ x: number; y: number; w: number; h: number } | null>(null)
  const [dragStart, setDragStart] = useState<{ x: number; y: number } | null>(null)
+  const [extractNamePrompt, setExtractNamePrompt] = useState(false)  // 提取模式：要用户填名字
+  const [extractName, setExtractName] = useState("")
+  const [extracting, setExtracting] = useState(false)
  const imgWrapRef = useRef<HTMLDivElement>(null)
  useEffect(() => setMounted(true), [])

@@ -42,6 +45,8 @@ export function FrameLightbox({ jobId, frames, activeIndex, selected, onClose, o
    setCropMode(false)
    setRegion(null)
    setDragStart(null)
+    setExtractNamePrompt(false)
+    setExtractName("")
  }, [activeIndex])

  useEffect(() => {
@@ -99,6 +104,35 @@ export function FrameLightbox({ jobId, frames, activeIndex, selected, onClose, o
    }
  }

+  const handleExtractRegion = async () => {
+    if (!region || !extractName.trim()) return
+    setExtracting(true)
+    try {
+      // 先加 element 拿到 id
+      const added = await addElement(jobId, f.index, {
+        name_zh: extractName.trim(),
+        source: "region",
+        region,
+      })
+      onJobUpdate?.(added)
+      // 找到新加的 element id（按 created_at desc 取最新一条）
+      const fr = added.frames.find((x) => x.index === f.index)
+      const newEl = fr?.elements?.sort((a, b) => (b.created_at ?? 0) - (a.created_at ?? 0))[0]
+      if (newEl) {
+        const cut = await cutoutElement(jobId, f.index, newEl.id)
+        onJobUpdate?.(cut)
+        toast.success(`「${extractName.trim()}」已提取并加入元素清单`)
+      } else {
+        toast.success(`「${extractName.trim()}」已加入元素清单 · 但抠图未触发`)
+      }
+      setCropMode(false); setRegion(null); setExtractNamePrompt(false); setExtractName("")
+    } catch (e) {
+      toast.error("提取失败：" + (e instanceof Error ? e.message : String(e)))
+    } finally {
+      setExtracting(false)
+    }
+  }
+
  // 画框 mouse handlers — 坐标基于 img wrapper 相对位置
  const getRelXY = (clientX: number, clientY: number) => {
    const el = imgWrapRef.current
@@ -294,32 +328,79 @@ export function FrameLightbox({ jobId, frames, activeIndex, selected, onClose, o

          {/* 画框工具栏 */}
          {cropMode ? (
-            <div className="flex items-center gap-1.5">
-              <button
-                onClick={() => handleCleanup(true)}
-                disabled={cleaning || !region || region.w < 0.03 || region.h < 0.03}
-                className="flex-1 px-2 py-1.5 rounded-md text-[11px] font-medium inline-flex items-center justify-center gap-1 transition bg-cyan-500 hover:bg-cyan-400 text-white disabled:opacity-40 disabled:cursor-not-allowed"
-                title="只清洗框内"
-              >
-                {cleaning ? <Loader2 className="h-3 w-3 animate-spin" /> : <Sparkle className="h-3 w-3" />}
-                {cleaning ? "清洗框内…" : "✓ 清洗框内"}
-              </button>
-              <button
-                onClick={() => { setCropMode(false); setRegion(null); setDragStart(null) }}
-                className="px-2 py-1.5 rounded-md text-[11px] bg-white/10 hover:bg-white/20 text-white"
-                title="取消画框"
-              >
-                <X className="h-3 w-3" />
-              </button>
-            </div>
+            extractNamePrompt ? (
+              // 提取模式：要用户填名字
+              <div className="space-y-1.5">
+                <input
+                  autoFocus
+                  value={extractName}
+                  onChange={(e) => setExtractName(e.target.value)}
+                  onKeyDown={(e) => {
+                    if (e.key === "Enter" && !e.nativeEvent.isComposing && extractName.trim()) {
+                      e.preventDefault()
+                      handleExtractRegion()
+                    }
+                    if (e.key === "Escape") { setExtractNamePrompt(false); setExtractName("") }
+                  }}
+                  placeholder="给这个元素起个中文名（如：左下角药瓶）"
+                  className="w-full text-[11.5px] px-2 py-1.5 rounded-md bg-black/40 border border-violet-300/50 outline-none text-white placeholder:text-white/30 focus:ring-2 focus:ring-violet-400/40"
+                />
+                <div className="flex items-center gap-1.5">
+                  <button
+                    onClick={handleExtractRegion}
+                    disabled={extracting || !extractName.trim()}
+                    className="flex-1 px-2 py-1.5 rounded-md text-[11px] font-medium inline-flex items-center justify-center gap-1 transition bg-violet-500 hover:bg-violet-400 text-white disabled:opacity-40 disabled:cursor-not-allowed"
+                  >
+                    {extracting ? <Loader2 className="h-3 w-3 animate-spin" /> : <Wand2 className="h-3 w-3" />}
+                    {extracting ? "提取中…（5-15 秒）" : "✓ 提取"}
+                  </button>
+                  <button
+                    onClick={() => { setExtractNamePrompt(false); setExtractName("") }}
+                    className="px-2 py-1.5 rounded-md text-[11px] bg-white/10 hover:bg-white/20 text-white"
+                    title="返回"
+                  >
+                    <ChevronLeft className="h-3 w-3" />
+                  </button>
+                </div>
+              </div>
+            ) : (
+              // 画框完成 → 选操作
+              <div className="flex items-center gap-1">
+                <button
+                  onClick={() => handleCleanup(true)}
+                  disabled={cleaning || !region || region.w < 0.03 || region.h < 0.03}
+                  className="flex-1 px-1.5 py-1.5 rounded-md text-[10.5px] font-medium inline-flex items-center justify-center gap-1 transition bg-cyan-500 hover:bg-cyan-400 text-white disabled:opacity-40 disabled:cursor-not-allowed"
+                  title="清洗框内（去掉）"
+                >
+                  {cleaning ? <Loader2 className="h-3 w-3 animate-spin" /> : <Sparkle className="h-3 w-3" />}
+                  {cleaning ? "去掉中" : "✓ 去掉"}
+                </button>
+                <button
+                  onClick={() => { if (region && region.w >= 0.03 && region.h >= 0.03) setExtractNamePrompt(true) }}
+                  disabled={!region || region.w < 0.03 || region.h < 0.03}
+                  className="flex-1 px-1.5 py-1.5 rounded-md text-[10.5px] font-medium inline-flex items-center justify-center gap-1 transition bg-violet-500 hover:bg-violet-400 text-white disabled:opacity-40 disabled:cursor-not-allowed"
+                  title="提取框内为元素（加入元素清单）"
+                >
+                  <Wand2 className="h-3 w-3" />
+                  🪄 提取
+                </button>
+                <button
+                  onClick={() => { setCropMode(false); setRegion(null); setDragStart(null) }}
+                  className="px-1.5 py-1.5 rounded-md text-[11px] bg-white/10 hover:bg-white/20 text-white"
+                  title="取消"
+                >
+                  <X className="h-3 w-3" />
+                </button>
+              </div>
+            )
          ) : (
            <button
              onClick={() => { setCropMode(true); setRegion(null) }}
              className="w-full px-3 py-1.5 rounded-md text-[10.5px] font-medium inline-flex items-center justify-center gap-1.5 transition bg-white/[0.06] hover:bg-cyan-500/30 border border-white/15 hover:border-cyan-300/50 text-white/80 hover:text-white"
-              title="拖框限定清洗范围（推荐用于精确去掉某个角落的水印）"
+              title="拖框 → 去掉框内（清洗）或提取框内（抠为元素）"
            >
              <Crop className="h-3 w-3" />
-              📐 框选清洗范围
+              📐 框选区域 · 去掉 / 提取
            </button>
          )}

--- a/web/lib/api.ts
+++ b/web/lib/api.ts
@@ -39,7 +39,8 @@ export interface KeyElement {
  name_zh: string
  name_en: string
  position?: string
-  source: "auto" | "manual"
+  source: "auto" | "manual" | "region"
+  region?: { x: number; y: number; w: number; h: number } | null
  cutout_id?: string | null
  created_at?: number
 }
@@ -237,7 +238,13 @@ export async function applyCleanedFrame(jobId: string, frameIdx: number): Promis
 export async function addElement(
  jobId: string,
  frameIdx: number,
-  body: { name_zh: string; name_en?: string; position?: string; source?: "auto" | "manual" },
+  body: {
+    name_zh: string
+    name_en?: string
+    position?: string
+    source?: "auto" | "manual" | "region"
+    region?: { x: number; y: number; w: number; h: number } | null
+  },
 ): Promise<Job> {
  const res = await fetch(`${API_BASE}/jobs/${jobId}/frames/${frameIdx}/elements`, {
    method: "POST",