diff --git a/.memory/worklog.json b/.memory/worklog.json index 5f3cc87..871bbbd 100644 --- a/.memory/worklog.json +++ b/.memory/worklog.json @@ -1230,6 +1230,13 @@ "type": "session-heartbeat", "message": "Claude 会话活跃 · 最近命令:claude · 1 项未提交变更 · 最近提交:auto-save 2026-05-13 10:21 (~1)", "files_changed": 1 + }, + { + "ts": "2026-05-13T10:27:44+08:00", + "type": "commit", + "message": "auto-save 2026-05-13 10:27 (~1)", + "hash": "e154f8b", + "files_changed": 1 } ] } diff --git a/api/main.py b/api/main.py index 7dd7ef4..e2c12f8 100644 --- a/api/main.py +++ b/api/main.py @@ -63,11 +63,24 @@ class GeneratedImage(BaseModel): created_at: float = 0.0 +class KeyElement(BaseModel): + """关键帧里识别 / 用户提取的元素,可单独抠图给下游做"二创素材层" """ + id: str # uuid hex 8 + name_zh: str + name_en: str = "" + position: str = "" # 在画面中的位置描述(vision 给的) + source: Literal["auto", "manual"] = "manual" # auto=vision 识别 / manual=用户加 + cutout_id: str | None = None # 已抠图 → /jobs/{id}/frames/{idx}/elements/{element_id}/cutout.png + created_at: float = 0.0 + + class KeyFrame(BaseModel): index: int timestamp: float url: str description: dict | None = None # vision 模型识别结果 {scene, objects, style, suggested_prompt} + cleaned_url: str | None = None # 清洗后干净版 → /jobs/{id}/frames/{idx}/cleaned.jpg + elements: list[KeyElement] = [] # 提取的元素清单(持久化) generated_images: list[GeneratedImage] = [] @@ -456,6 +469,83 @@ async def pipeline_transcribe(job_id: str) -> None: update(job, status="failed", error=str(e), message="转录失败") +def _image_edit_call( + image_path: Path, + prompt: str, + model: str | None = None, + fallback_text: bool = False, + max_attempts: int = 3, +) -> tuple[bytes, str]: + """通用 image edit 调用 · 失败重试 + 可选 text fallback。 + 返回 (image_bytes, effective_mode) where effective_mode in {"edit","text"}。 + 失败 raise RuntimeError。""" + import base64 as b64lib + import time as _time + import httpx + if not LLM_API_KEY: + raise RuntimeError("LLM_API_KEY 未配置") + model = model or IMAGE_MODEL + img_b64 = b64lib.b64encode(image_path.read_bytes()).decode("ascii") + data_uri = f"data:image/jpeg;base64,{img_b64}" + + plan: list[str] = ["edit"] * max_attempts + if fallback_text: + plan.append("text") + + last_err = "" + resp_data: dict = {} + effective_mode = "edit" + for attempt, current_mode in enumerate(plan): + try: + if current_mode == "edit": + with httpx.Client(timeout=120) as client: + r = client.post( + f"{LLM_BASE_URL}/images/generations", + headers={ + "Authorization": f"Bearer {LLM_API_KEY}", + "Content-Type": "application/json", + }, + json={"model": model, "prompt": prompt, "image": data_uri, "n": 1}, + ) + r.raise_for_status() + resp_data = r.json() + else: + resp = llm().images.generate(model=model, prompt=prompt, n=1) + resp_data = resp.model_dump() if hasattr(resp, "model_dump") else {"data": [{"b64_json": resp.data[0].b64_json}]} + if resp_data.get("data"): + effective_mode = current_mode + break + err_obj = resp_data.get("error") or {} + last_err = f"empty data · {err_obj.get('code', '')} · {str(err_obj.get('message', ''))[:200]}" + except httpx.HTTPStatusError as e: + body = e.response.text + transient = ( + e.response.status_code >= 500 + or "incomplete_generation" in body + or "rate_limit" in body + or "timeout" in body.lower() + ) + last_err = f"HTTP {e.response.status_code}: {body[:200]}" + if not transient: + raise RuntimeError(f"image edit HTTP {e.response.status_code}: {body[:300]}") + except Exception as e: + last_err = f"{type(e).__name__}: {e}" + + if attempt < len(plan) - 1: + next_mode = plan[attempt + 1] + tag = f"fallback → {next_mode}" if next_mode != current_mode else f"retry {attempt + 1}/{len(plan)}" + print(f"[image edit {tag}] {last_err}", flush=True) + _time.sleep(1.5 * (attempt + 1)) + + data_arr = resp_data.get("data", []) + if not data_arr: + raise RuntimeError(f"image edit failed after {len(plan)} attempts: {last_err}") + b64 = data_arr[0].get("b64_json") + if not b64: + raise RuntimeError("image edit returned no b64_json") + return b64lib.b64decode(b64), effective_mode + + # ---------- API 路由 ---------- class CreateJobReq(BaseModel): @@ -884,3 +974,201 @@ def describe_frame(job_id: str, idx: int) -> Job: new_frames.append(f) update(job, frames=new_frames, message=f"识别完成 · 分镜 {idx + 1}") return job + + +# ---------- 清洗水印 / 元素提取(关键帧二阶段加工) ---------- + +@app.post("/jobs/{job_id}/frames/{idx}/cleanup", response_model=Job) +def cleanup_frame(job_id: str, idx: int) -> Job: + """调 nano-banana image edit 清洗关键帧:去水印 / @用户名 / 字幕 / 平台 logo。 + 输出干净版到 jobs//cleaned/.jpg,写回 frame.cleaned_url。""" + import time as _time + job = JOBS.get(job_id) + if not job: + raise HTTPException(404, "job not found") + frame = next((f for f in job.frames if f.index == idx), None) + if not frame: + raise HTTPException(404, "frame not found") + frame_path = job_dir(job_id) / "frames" / f"{idx:03d}.jpg" + if not frame_path.exists(): + raise HTTPException(404, "frame file missing") + + prompt = ( + "Clean this image by removing all overlay graphics that obstruct the main content: " + "watermarks, social media usernames or @handles, platform logos (TikTok, Instagram, etc.), " + "subtitles, captions, overlay text, sticker text, hashtags. " + "Keep all original scene elements (characters, props, background, lighting) intact. " + "The result should look like the same photograph with overlay UI removed — " + "natural, seamless, no visible patches or artifacts." + ) + try: + img_bytes, _mode = _image_edit_call(frame_path, prompt, fallback_text=False, max_attempts=3) + except RuntimeError as e: + raise HTTPException(500, f"cleanup failed: {e}") + + out_dir = job_dir(job_id) / "cleaned" + out_dir.mkdir(parents=True, exist_ok=True) + out_path = out_dir / f"{idx:03d}.jpg" + out_path.write_bytes(img_bytes) + + new_frames = [] + for f in job.frames: + if f.index == idx: + f.cleaned_url = f"/jobs/{job_id}/frames/{idx}/cleaned.jpg?t={int(_time.time())}" + new_frames.append(f) + update(job, frames=new_frames, message=f"清洗完成 · 分镜 {idx + 1}") + return job + + +@app.get("/jobs/{job_id}/frames/{idx}/cleaned.jpg") +def get_cleaned_frame(job_id: str, idx: int): + p = job_dir(job_id) / "cleaned" / f"{idx:03d}.jpg" + if not p.exists(): + raise HTTPException(404, "cleaned frame not found") + return FileResponse(p, media_type="image/jpeg") + + +class AddElementReq(BaseModel): + name_zh: str + name_en: str = "" + position: str = "" + source: Literal["auto", "manual"] = "manual" + + +@app.post("/jobs/{job_id}/frames/{idx}/elements", response_model=Job) +def add_element(job_id: str, idx: int, req: AddElementReq) -> Job: + """加一条元素 · 若 name_en 缺则自动 zh→en 翻译""" + import time as _time + import re as _re + job = JOBS.get(job_id) + if not job: + raise HTTPException(404, "job not found") + frame = next((f for f in job.frames if f.index == idx), None) + if not frame: + raise HTTPException(404, "frame not found") + name_zh = req.name_zh.strip() + if not name_zh: + raise HTTPException(400, "name_zh required") + name_en = req.name_en.strip() + if not name_en and LLM_API_KEY: + try: + prompt = ( + "Translate the following text into concise English, suitable as an element label " + "in an image-generation prompt. Output only the translation — no quotes, no punctuation, " + f"no explanation.\n\nInput: {name_zh}" + ) + resp = llm().chat.completions.create( + model=TRANSLATE_MODEL, + messages=[{"role": "user", "content": prompt}], + temperature=0.2, + max_tokens=200, + ) + out = (resp.choices[0].message.content or "").strip() + if not out: + rc = getattr(resp.choices[0].message, "reasoning_content", "") or "" + if rc: + out = rc.strip().splitlines()[-1].strip() + name_en = _re.sub(r'^[\'"「『]+|[\'"」』]+$', "", out).strip() + except Exception as e: + print(f"[add_element translate failed] {e}", flush=True) + name_en = "" + + el = KeyElement( + id=uuid.uuid4().hex[:8], + name_zh=name_zh, + name_en=name_en, + position=req.position.strip(), + source=req.source, + created_at=_time.time(), + ) + new_frames = [] + for f in job.frames: + if f.index == idx: + f.elements = f.elements + [el] + new_frames.append(f) + update(job, frames=new_frames, message=f"加入元素 · 分镜 {idx + 1} · {name_zh}") + return job + + +@app.delete("/jobs/{job_id}/frames/{idx}/elements/{element_id}", response_model=Job) +def delete_element(job_id: str, idx: int, element_id: str) -> Job: + job = JOBS.get(job_id) + if not job: + raise HTTPException(404, "job not found") + new_frames = [] + removed = False + for f in job.frames: + if f.index == idx: + before = len(f.elements) + f.elements = [e for e in f.elements if e.id != element_id] + removed = len(f.elements) < before + # 若有抠图文件也删 + if removed: + cutout = job_dir(job_id) / "elements" / f"{idx:03d}_{element_id}.png" + if cutout.exists(): + try: + cutout.unlink() + except OSError: + pass + new_frames.append(f) + if not removed: + raise HTTPException(404, "element not found") + update(job, frames=new_frames, message=f"删除元素 · 分镜 {idx + 1}") + return job + + +@app.post("/jobs/{job_id}/frames/{idx}/elements/{element_id}/cutout", response_model=Job) +def cutout_element(job_id: str, idx: int, element_id: str) -> Job: + """单元素抠图:调 nano-banana image edit 输出透明背景元素图""" + import time as _time + job = JOBS.get(job_id) + if not job: + raise HTTPException(404, "job not found") + frame = next((f for f in job.frames if f.index == idx), None) + if not frame: + raise HTTPException(404, "frame not found") + el = next((e for e in frame.elements if e.id == element_id), None) + if not el: + raise HTTPException(404, "element not found") + + # 优先用 cleaned 版作 reference(已去掉 logo / 水印干扰),fallback 原图 + cleaned_path = job_dir(job_id) / "cleaned" / f"{idx:03d}.jpg" + src = cleaned_path if cleaned_path.exists() else job_dir(job_id) / "frames" / f"{idx:03d}.jpg" + if not src.exists(): + raise HTTPException(404, "source frame file missing") + + target = (el.name_en or el.name_zh).strip() + position_hint = f" Located {el.position}." if el.position else "" + prompt = ( + f"Extract the element '{target}' from this image as a standalone asset.{position_hint} " + "Output: the element on a fully transparent background (alpha channel), " + "isolated cleanly with no surrounding scene, no other objects, no shadows from the original scene. " + "Preserve the element's original colors, lighting, shape and proportions." + ) + try: + img_bytes, _mode = _image_edit_call(src, prompt, fallback_text=False, max_attempts=3) + except RuntimeError as e: + raise HTTPException(500, f"cutout failed: {e}") + + out_dir = job_dir(job_id) / "elements" + out_dir.mkdir(parents=True, exist_ok=True) + out_path = out_dir / f"{idx:03d}_{element_id}.png" + out_path.write_bytes(img_bytes) + + new_frames = [] + for f in job.frames: + if f.index == idx: + for e in f.elements: + if e.id == element_id: + e.cutout_id = element_id # marker that cutout exists; URL derived from id + new_frames.append(f) + update(job, frames=new_frames, message=f"抠图完成 · {el.name_zh}") + return job + + +@app.get("/jobs/{job_id}/frames/{idx}/elements/{element_id}/cutout.png") +def get_cutout(job_id: str, idx: int, element_id: str): + p = job_dir(job_id) / "elements" / f"{idx:03d}_{element_id}.png" + if not p.exists(): + raise HTTPException(404, "cutout not found") + return FileResponse(p, media_type="image/png")