diff --git a/.memory/worklog.json b/.memory/worklog.json index 83203e0..2a69737 100644 --- a/.memory/worklog.json +++ b/.memory/worklog.json @@ -1695,6 +1695,13 @@ "type": "session-heartbeat", "message": "Claude 会话活跃 · 最近命令:claude · 4 项未提交变更 · 最近提交:auto-save 2026-05-13 14:32 (~3)", "files_changed": 4 + }, + { + "ts": "2026-05-13T14:38:26+08:00", + "type": "commit", + "message": "auto-save 2026-05-13 14:38 (~4)", + "hash": "9421836", + "files_changed": 4 } ] } diff --git a/api/main.py b/api/main.py index d5a89cb..e32c6d8 100644 --- a/api/main.py +++ b/api/main.py @@ -1267,10 +1267,12 @@ def delete_element(job_id: str, idx: int, element_id: str) -> Job: @app.post("/jobs/{job_id}/frames/{idx}/elements/{element_id}/cutout", response_model=Job) def cutout_element(job_id: str, idx: int, element_id: str) -> Job: - """提取元素 · 每次调用累积一张新图(不覆盖之前的): - - 有 region → PIL crop(瞬时 · 保留原表情/形体) - - 无 region → 调 nano-banana 模型生成白底图(5-15s)""" + """AI 提取元素 · 每次累积一张新图: + 调 nano-banana 模型生成**完整、清晰**的元素图(即使原图只露出部分也补全)。 + region 元素:先把 region + 30% padding 区域裁出作为 focus,再发给模型聚焦补全。""" from PIL import Image as _PILImage + import io as _io + import tempfile as _tempfile job = JOBS.get(job_id) if not job: raise HTTPException(404, "job not found") @@ -1288,10 +1290,12 @@ def cutout_element(job_id: str, idx: int, element_id: str) -> Job: out_dir = job_dir(job_id) / "elements" out_dir.mkdir(parents=True, exist_ok=True) - # 新建一个 cutout_id append 到 element.cutouts(而非覆盖) new_cutout_id = uuid.uuid4().hex[:8] out_path = out_dir / f"{idx:03d}_{element_id}_{new_cutout_id}.jpg" + # region 元素:先 PIL 裁出 region + 30% padding 作为 focus 给模型(让它聚焦在该元素) + tmp_focus: Path | None = None + model_src = src if el.region: try: im = _PILImage.open(src).convert("RGB") @@ -1301,31 +1305,46 @@ def cutout_element(job_id: str, idx: int, element_id: str) -> Job: y = max(0.0, min(1.0, float(r.get("y", 0)))) w = max(0.0, min(1.0 - x, float(r.get("w", 0)))) h = max(0.0, min(1.0 - y, float(r.get("h", 0)))) - left, top = int(x * W), int(y * H) - right, bottom = int((x + w) * W), int((y + h) * H) - if right - left < 4 or bottom - top < 4: - raise HTTPException(400, "region 太小,无法提取") - cropped = im.crop((left, top, right, bottom)) - cropped.save(out_path, format="JPEG", quality=92) - except HTTPException: - raise + cx, cy = x + w / 2, y + h / 2 + # 扩大 30% 给上下文(避免裁到正好边界丢失补全 hint) + ew, eh = w * 1.6, h * 1.6 + x0 = max(0.0, cx - ew / 2); y0 = max(0.0, cy - eh / 2) + x1 = min(1.0, cx + ew / 2); y1 = min(1.0, cy + eh / 2) + left, top, right, bottom = int(x0 * W), int(y0 * H), int(x1 * W), int(y1 * H) + if right - left > 8 and bottom - top > 8: + cropped = im.crop((left, top, right, bottom)) + tmp = _tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) + cropped.save(tmp.name, format="JPEG", quality=92) + tmp.close() + tmp_focus = Path(tmp.name) + model_src = tmp_focus except Exception as e: - raise HTTPException(500, f"extract failed: {e}") - else: - target = (el.name_en or el.name_zh).strip() - position_hint = f" Located in the {el.position} area." if el.position else "" - prompt = ( - f"Extract the {target} from this image as a standalone asset.{position_hint} " - "Place it on a pure white background, isolated, no other objects." - ) - models = [IMAGE_MODEL, "gemini-2.5-flash-image"] + print(f"[cutout region crop failed, fallback to full frame] {e}", flush=True) + + target = (el.name_en or el.name_zh).strip() + prompt = ( + f"Identify the {target} in this image. " + f"Generate a complete, high-resolution, sharply detailed image of the entire {target} as a standalone asset. " + f"If the {target} is only partially visible in the source (cropped at edges, occluded by other objects, or out of frame), " + "intelligently reconstruct the missing parts based on visual context so the result shows the FULL element. " + "Place the complete element on a pure white background, isolated, with no other objects, no scene fragments, no shadows from the original scene. " + "Preserve the element's original color palette, style, lighting character, and proportions. " + "Output must be a clean, high-quality asset image suitable for downstream composition." + ) + models = [IMAGE_MODEL, "gemini-2.5-flash-image"] + img_bytes: bytes + try: try: img_bytes, _mode = _image_edit_call( - src, prompt, models=models, fallback_text=False, max_attempts=3, + model_src, prompt, models=models, fallback_text=False, max_attempts=3, ) except RuntimeError as e: raise HTTPException(500, f"extract failed: {e}") - out_path.write_bytes(img_bytes) + finally: + if tmp_focus and tmp_focus.exists(): + try: tmp_focus.unlink() + except OSError: pass + out_path.write_bytes(img_bytes) new_frames = [] for f in job.frames: @@ -1333,12 +1352,10 @@ def cutout_element(job_id: str, idx: int, element_id: str) -> Job: for e in f.elements: if e.id == element_id: e.cutouts = (e.cutouts or []) + [new_cutout_id] - # 兼容:若旧字段 cutout_id 未设置,记一下让旧 UI 仍能读到一张 if not e.cutout_id: e.cutout_id = new_cutout_id new_frames.append(f) - msg_label = "提取(PIL)" if el.region else "提取(模型)" - update(job, frames=new_frames, message=f"{msg_label}完成 · {el.name_zh}") + update(job, frames=new_frames, message=f"提取完成 · {el.name_zh}") return job diff --git a/web/components/lightbox.tsx b/web/components/lightbox.tsx index e20450e..6f9f6aa 100644 --- a/web/components/lightbox.tsx +++ b/web/components/lightbox.tsx @@ -648,23 +648,15 @@ export function FrameLightbox({ jobId, frames, activeIndex, selected, onClose, o - {/* 提取按钮(每次新增一张,不覆盖) */} + {/* 提取按钮 — AI 补全完整元素(每次累积一张) */} {/* 删除整条元素 */} diff --git a/web/components/nodes/index.tsx b/web/components/nodes/index.tsx index c76bbce..53274c7 100644 --- a/web/components/nodes/index.tsx +++ b/web/components/nodes/index.tsx @@ -7,7 +7,7 @@ import { Mic, Languages, FileEdit, Sparkles, Film, FileVideo, Loader2, Plus, X, LayoutGrid, } from "lucide-react" import { NodeShell, type NodeStatus, type NodeKind } from "./node-shell" -import { type Job, frameUrl, effectiveFrameUrl, videoUrl, generatedImageUrl, cutoutUrl } from "@/lib/api" +import { type Job, frameUrl, effectiveFrameUrl, videoUrl, generatedImageUrl, cutoutUrl, hasCutout, representativeCutoutUrl } from "@/lib/api" export interface NodeData { job: Job | null // 当前 active job @@ -389,13 +389,13 @@ export function KeyframeNode({ data, selected }: any) { {isSel && (
)} - {(f.cleaned_url || (f.elements?.some((e) => e.cutout_id))) && ( + {(f.cleaned_url || (f.elements?.some((e) => hasCutout(e)))) && (
{f.cleaned_url && ( )} {(() => { - const cutN = f.elements?.filter((e) => e.cutout_id).length ?? 0 + const cutN = f.elements?.filter((e) => hasCutout(e)).length ?? 0 return cutN > 0 ? ( {cutN} @@ -440,7 +440,7 @@ export function KeyframeNode({ data, selected }: any) { {frames.length > 0 ? (() => { const cleanedCount = frames.filter((x) => x.cleaned_url).length const elementsCount = frames.reduce((s, x) => s + (x.elements?.length ?? 0), 0) - const cutoutCount = frames.reduce((s, x) => s + (x.elements?.filter((e) => e.cutout_id).length ?? 0), 0) + const cutoutCount = frames.reduce((s, x) => s + (x.elements?.filter((e) => hasCutout(e)).length ?? 0), 0) return (
自动 {frames.length} 张 @@ -606,13 +606,17 @@ export function ImageGenNode({ data, selected }: any) { const [mounted, setMounted] = useState(false) useEffect(() => setMounted(true), []) - // 上方浮条 = 所有 frame 的 elements crop("分镜头编排"的输入素材) - type ElPreview = { frameIdx: number; elementId: string; name: string } + // 上方浮条 = 所有 frame 的 elements 已提取图("分镜头编排"的输入素材) + type ElPreview = { frameIdx: number; elementId: string; name: string; src: string } const elementCrops: ElPreview[] = job ? job.frames.flatMap((f) => (f.elements ?? []) - .filter((e) => !!e.cutout_id) - .map((e) => ({ frameIdx: f.index, elementId: e.id, name: e.name_zh })), + .filter((e) => hasCutout(e)) + .map((e) => { + const src = representativeCutoutUrl(job.id, f.index, e) || "" + return { frameIdx: f.index, elementId: e.id, name: e.name_zh, src } + }) + .filter((p) => p.src), ) : [] @@ -644,7 +648,7 @@ export function ImageGenNode({ data, selected }: any) { className="absolute inset-0 w-full h-full" > {p.name} @@ -704,7 +708,7 @@ export function ImageGenNode({ data, selected }: any) { >
{`preview 0 ? `${job.width}/${job.height}` : "9/16" const totalElements = frames.reduce( - (sum, f) => sum + (f.elements?.filter((e) => e.cutout_id).length ?? 0), + (sum, f) => sum + (f.elements?.filter((e) => hasCutout(e)).length ?? 0), 0, ) @@ -39,7 +39,7 @@ export function StoryboardBar({ job, selectedFrames, focusedFrame, onFocusFrame ? job.frames.filter((f) => selectedFrames.has(f.index) && f.timestamp <= focusFrame.timestamp).length : 0 const focusElements = focusFrame?.elements ?? [] - const focusCutCount = focusElements.filter((e) => e.cutout_id).length + const focusCutCount = focusElements.filter((e) => hasCutout(e)).length return (
@@ -92,7 +92,7 @@ export function StoryboardBar({ job, selectedFrames, focusedFrame, onFocusFrame ) : (
{frames.map((f, i) => { - const elementCount = f.elements?.filter((e) => e.cutout_id).length ?? 0 + const elementCount = f.elements?.filter((e) => hasCutout(e)).length ?? 0 const totalElCount = f.elements?.length ?? 0 const cleaned = f.cleaned_applied const isFocused = focusedFrame === f.index @@ -177,24 +177,23 @@ export function StoryboardBar({ job, selectedFrames, focusedFrame, onFocusFrame
) : (
- {focusElements.map((e) => ( -
-
- {e.cutout_id ? ( - {e.name_zh} - ) : ( -
- -
- )} + {focusElements.map((e) => { + const src = representativeCutoutUrl(job.id, focusFrame.index, e) + return ( +
+
+ {src ? ( + {e.name_zh} + ) : ( +
+ +
+ )} +
+
{e.name_zh}
-
{e.name_zh}
-
- ))} + ) + })}
)} diff --git a/web/lib/api.ts b/web/lib/api.ts index 8f668b0..4e48f77 100644 --- a/web/lib/api.ts +++ b/web/lib/api.ts @@ -220,6 +220,24 @@ export function cutoutUrl(jobId: string, frameIndex: number, elementId: string, return `${API_BASE}/jobs/${jobId}/frames/${frameIndex}/elements/${elementId}/cutout.jpg` } +// 兼容 v1 (cutout_id) / v2 (cutouts 数组) — 返回"有没有提取图" +export function hasCutout(e: KeyElement): boolean { + return (Array.isArray(e.cutouts) && e.cutouts.length > 0) || !!e.cutout_id +} + +// 返回代表性 cutout 的 URL(v2 取最新一张,v1 用旧路径) +export function representativeCutoutUrl( + jobId: string, + frameIndex: number, + e: KeyElement, +): string | null { + if (Array.isArray(e.cutouts) && e.cutouts.length > 0) { + return cutoutUrl(jobId, frameIndex, e.id, e.cutouts[e.cutouts.length - 1]) + } + if (e.cutout_id) return cutoutUrl(jobId, frameIndex, e.id) + return null +} + export async function deleteCutout(jobId: string, frameIdx: number, elementId: string, cutoutId: string): Promise { const res = await fetch(`${API_BASE}/jobs/${jobId}/frames/${frameIdx}/elements/${elementId}/cutouts/${cutoutId}`, { method: "DELETE",