diff --git a/.memory/worklog.json b/.memory/worklog.json index 8c619e6..fe616cd 100644 --- a/.memory/worklog.json +++ b/.memory/worklog.json @@ -1483,6 +1483,19 @@ "message": "auto-save 2026-05-13 12:40 (~4)", "hash": "95b1354", "files_changed": 4 + }, + { + "ts": "2026-05-13T12:46:27+08:00", + "type": "commit", + "message": "auto-save 2026-05-13 12:46 (~1)", + "hash": "fbbf3c7", + "files_changed": 1 + }, + { + "ts": "2026-05-13T04:47:38Z", + "type": "session-heartbeat", + "message": "Claude 会话活跃 · 最近命令:claude · 1 项未提交变更 · 最近提交:auto-save 2026-05-13 12:46 (~1)", + "files_changed": 1 } ] } diff --git a/api/main.py b/api/main.py index a2c0ab5..1826533 100644 --- a/api/main.py +++ b/api/main.py @@ -477,6 +477,7 @@ def _image_edit_call( image_path: Path, prompt: str, model: str | None = None, + models: list[str] | None = None, fallback_text: bool = False, max_attempts: int = 3, max_side: int = 1024, @@ -485,7 +486,8 @@ def _image_edit_call( 返回 (image_bytes, effective_mode) where effective_mode in {"edit","text"}。 失败 raise RuntimeError。 输入图自动 resize 到 max_side(默认 1024)边长后再 base64,避免大图把 Gemini - function call 输入挤超阈值导致 incomplete_generation。""" + function call 输入挤超阈值导致 incomplete_generation。 + models: 多模型轮换列表,重试时换 model;不传则单一 model 重试。""" import base64 as b64lib import io as _io import time as _time @@ -493,7 +495,12 @@ def _image_edit_call( from PIL import Image as _PILImage if not LLM_API_KEY: raise RuntimeError("LLM_API_KEY 未配置") - model = model or IMAGE_MODEL + # model 优先级:models 列表 > 单个 model 参数 > IMAGE_MODEL + if models and len(models) > 0: + models_cycle = list(models) + else: + models_cycle = [model or IMAGE_MODEL] + model = models_cycle[0] # 缩到 max_side 内 try: im = _PILImage.open(image_path) @@ -516,6 +523,8 @@ def _image_edit_call( resp_data: dict = {} effective_mode = "edit" for attempt, current_mode in enumerate(plan): + # 多模型轮换:第 N 次重试用第 N 个 model(不够时用最后一个) + current_model = models_cycle[min(attempt, len(models_cycle) - 1)] try: if current_mode == "edit": with httpx.Client(timeout=120) as client: @@ -525,18 +534,19 @@ def _image_edit_call( "Authorization": f"Bearer {LLM_API_KEY}", "Content-Type": "application/json", }, - json={"model": model, "prompt": prompt, "image": data_uri, "n": 1}, + json={"model": current_model, "prompt": prompt, "image": data_uri, "n": 1}, ) r.raise_for_status() resp_data = r.json() else: - resp = llm().images.generate(model=model, prompt=prompt, n=1) + resp = llm().images.generate(model=current_model, prompt=prompt, n=1) resp_data = resp.model_dump() if hasattr(resp, "model_dump") else {"data": [{"b64_json": resp.data[0].b64_json}]} if resp_data.get("data"): effective_mode = current_mode + model = current_model # 记录实际成功的 model break err_obj = resp_data.get("error") or {} - last_err = f"empty data · {err_obj.get('code', '')} · {str(err_obj.get('message', ''))[:200]}" + last_err = f"empty data · {err_obj.get('code', '')} · {str(err_obj.get('message', ''))[:200]} · model={current_model}" except httpx.HTTPStatusError as e: body = e.response.text transient = ( @@ -545,17 +555,17 @@ def _image_edit_call( or "rate_limit" in body or "timeout" in body.lower() ) - last_err = f"HTTP {e.response.status_code}: {body[:200]}" + last_err = f"HTTP {e.response.status_code}: {body[:200]} · model={current_model}" if not transient: raise RuntimeError(f"image edit HTTP {e.response.status_code}: {body[:300]}") except Exception as e: - last_err = f"{type(e).__name__}: {e}" + last_err = f"{type(e).__name__}: {e} · model={current_model}" if attempt < len(plan) - 1: - next_mode = plan[attempt + 1] - tag = f"fallback → {next_mode}" if next_mode != current_mode else f"retry {attempt + 1}/{len(plan)}" + next_model = models_cycle[min(attempt + 1, len(models_cycle) - 1)] + tag = f"retry {attempt + 1}/{len(plan)} → {next_model}" print(f"[image edit {tag}] {last_err}", flush=True) - _time.sleep(1.5 * (attempt + 1)) + _time.sleep(1.0) data_arr = resp_data.get("data", []) if not data_arr: @@ -1045,22 +1055,35 @@ def cleanup_frame(job_id: str, idx: int, req: CleanupReq | None = None) -> Job: p = _region_to_phrase(r) if p: region_phrases.append(p) - # 去重保序 region_phrases = list(dict.fromkeys(region_phrases)) + # prompt 用"重画一张副本"语义而非"erase / remove only X" — 避免 Gemini 走 mask/inpainting + # function call 路径(实测该路径在 SKG 网关上 100% 触发 incomplete_generation) if region_phrases: if len(region_phrases) == 1: - zones = f"the {region_phrases[0]} part" + zones = f"the {region_phrases[0]} area" else: - zones = "these parts: " + ", ".join(region_phrases) + zones = ", ".join(region_phrases) + " areas" prompt = ( - f"Erase the text and graphics in {zones} of the image. " - "Keep all other parts unchanged." + f"Recreate this image as a clean version: remove the text and graphics in {zones}, " + "keep the rest of the scene identical." ) else: - prompt = "Erase all watermarks and text overlays. Keep the scene natural." + prompt = ( + "Recreate this image as a clean version without watermarks, captions, " + "hashtags, usernames, or platform logos. Keep the composition and style." + ) + + # 模型轮换:nano-banana-pro 失败时换 flash 系列 + models = [ + IMAGE_MODEL, # gemini-3-pro-image-preview (nano-banana-pro) + "gemini-3.1-flash-image-preview", + "gemini-2.5-flash-image", + ] try: - img_bytes, _mode = _image_edit_call(frame_path, prompt, fallback_text=False, max_attempts=3) + img_bytes, _mode = _image_edit_call( + frame_path, prompt, models=models, fallback_text=False, max_attempts=3, + ) except RuntimeError as e: raise HTTPException(500, f"cleanup failed: {e}") @@ -1281,8 +1304,15 @@ def cutout_element(job_id: str, idx: int, element_id: str, req: CutoutReq | None f"Extract the {target} from this image as a standalone asset.{position_hint} " f"Place it on a {bg_phrase} background, isolated, no other objects." ) + models = [ + IMAGE_MODEL, + "gemini-3.1-flash-image-preview", + "gemini-2.5-flash-image", + ] try: - img_bytes, _mode = _image_edit_call(src, prompt, fallback_text=False, max_attempts=3) + img_bytes, _mode = _image_edit_call( + src, prompt, models=models, fallback_text=False, max_attempts=3, + ) except RuntimeError as e: raise HTTPException(500, f"cutout failed: {e}")