auto-save 2026-05-13 12:51 (~2)

2026-05-13 12:51:59 +08:00
parent fbbf3c7775
commit 4e3850ecc0
2 changed files with 61 additions and 18 deletions
--- a/.memory/worklog.json
+++ b/.memory/worklog.json
@@ -1483,6 +1483,19 @@
      "message": "auto-save 2026-05-13 12:40 (~4)",
      "hash": "95b1354",
      "files_changed": 4
+    },
+    {
+      "ts": "2026-05-13T12:46:27+08:00",
+      "type": "commit",
+      "message": "auto-save 2026-05-13 12:46 (~1)",
+      "hash": "fbbf3c7",
+      "files_changed": 1
+    },
+    {
+      "ts": "2026-05-13T04:47:38Z",
+      "type": "session-heartbeat",
+      "message": "Claude 会话活跃 · 最近命令：claude · 1 项未提交变更 · 最近提交：auto-save 2026-05-13 12:46 (~1)",
+      "files_changed": 1
    }
  ]
 }
--- a/api/main.py
+++ b/api/main.py
@@ -477,6 +477,7 @@ def _image_edit_call(
    image_path: Path,
    prompt: str,
    model: str | None = None,
+    models: list[str] | None = None,
    fallback_text: bool = False,
    max_attempts: int = 3,
    max_side: int = 1024,
@@ -485,7 +486,8 @@ def _image_edit_call(
    返回 (image_bytes, effective_mode) where effective_mode in {"edit","text"}。
    失败 raise RuntimeError。
    输入图自动 resize 到 max_side（默认 1024）边长后再 base64，避免大图把 Gemini
-    function call 输入挤超阈值导致 incomplete_generation。"""
+    function call 输入挤超阈值导致 incomplete_generation。
+    models: 多模型轮换列表，重试时换 model；不传则单一 model 重试。"""
    import base64 as b64lib
    import io as _io
    import time as _time
@@ -493,7 +495,12 @@ def _image_edit_call(
    from PIL import Image as _PILImage
    if not LLM_API_KEY:
        raise RuntimeError("LLM_API_KEY 未配置")
-    model = model or IMAGE_MODEL
+    # model 优先级：models 列表 > 单个 model 参数 > IMAGE_MODEL
+    if models and len(models) > 0:
+        models_cycle = list(models)
+    else:
+        models_cycle = [model or IMAGE_MODEL]
+    model = models_cycle[0]
    # 缩到 max_side 内
    try:
        im = _PILImage.open(image_path)
@@ -516,6 +523,8 @@ def _image_edit_call(
    resp_data: dict = {}
    effective_mode = "edit"
    for attempt, current_mode in enumerate(plan):
+        # 多模型轮换：第 N 次重试用第 N 个 model（不够时用最后一个）
+        current_model = models_cycle[min(attempt, len(models_cycle) - 1)]
        try:
            if current_mode == "edit":
                with httpx.Client(timeout=120) as client:
@@ -525,18 +534,19 @@ def _image_edit_call(
                            "Authorization": f"Bearer {LLM_API_KEY}",
                            "Content-Type": "application/json",
                        },
-                        json={"model": model, "prompt": prompt, "image": data_uri, "n": 1},
+                        json={"model": current_model, "prompt": prompt, "image": data_uri, "n": 1},
                    )
                    r.raise_for_status()
                    resp_data = r.json()
            else:
-                resp = llm().images.generate(model=model, prompt=prompt, n=1)
+                resp = llm().images.generate(model=current_model, prompt=prompt, n=1)
                resp_data = resp.model_dump() if hasattr(resp, "model_dump") else {"data": [{"b64_json": resp.data[0].b64_json}]}
            if resp_data.get("data"):
                effective_mode = current_mode
+                model = current_model  # 记录实际成功的 model
                break
            err_obj = resp_data.get("error") or {}
-            last_err = f"empty data · {err_obj.get('code', '')} · {str(err_obj.get('message', ''))[:200]}"
+            last_err = f"empty data · {err_obj.get('code', '')} · {str(err_obj.get('message', ''))[:200]} · model={current_model}"
        except httpx.HTTPStatusError as e:
            body = e.response.text
            transient = (
@@ -545,17 +555,17 @@ def _image_edit_call(
                or "rate_limit" in body
                or "timeout" in body.lower()
            )
-            last_err = f"HTTP {e.response.status_code}: {body[:200]}"
+            last_err = f"HTTP {e.response.status_code}: {body[:200]} · model={current_model}"
            if not transient:
                raise RuntimeError(f"image edit HTTP {e.response.status_code}: {body[:300]}")
        except Exception as e:
-            last_err = f"{type(e).__name__}: {e}"
+            last_err = f"{type(e).__name__}: {e} · model={current_model}"

        if attempt < len(plan) - 1:
-            next_mode = plan[attempt + 1]
-            tag = f"fallback → {next_mode}" if next_mode != current_mode else f"retry {attempt + 1}/{len(plan)}"
+            next_model = models_cycle[min(attempt + 1, len(models_cycle) - 1)]
+            tag = f"retry {attempt + 1}/{len(plan)} → {next_model}"
            print(f"[image edit {tag}] {last_err}", flush=True)
-            _time.sleep(1.5 * (attempt + 1))
+            _time.sleep(1.0)

    data_arr = resp_data.get("data", [])
    if not data_arr:
@@ -1045,22 +1055,35 @@ def cleanup_frame(job_id: str, idx: int, req: CleanupReq | None = None) -> Job:
            p = _region_to_phrase(r)
            if p:
                region_phrases.append(p)
-    # 去重保序
    region_phrases = list(dict.fromkeys(region_phrases))

+    # prompt 用"重画一张副本"语义而非"erase / remove only X" — 避免 Gemini 走 mask/inpainting
+    # function call 路径（实测该路径在 SKG 网关上 100% 触发 incomplete_generation）
    if region_phrases:
        if len(region_phrases) == 1:
-            zones = f"the {region_phrases[0]} part"
+            zones = f"the {region_phrases[0]} area"
        else:
-            zones = "these parts: " + ", ".join(region_phrases)
+            zones = ", ".join(region_phrases) + " areas"
        prompt = (
-            f"Erase the text and graphics in {zones} of the image. "
-            "Keep all other parts unchanged."
+            f"Recreate this image as a clean version: remove the text and graphics in {zones}, "
+            "keep the rest of the scene identical."
        )
    else:
-        prompt = "Erase all watermarks and text overlays. Keep the scene natural."
+        prompt = (
+            "Recreate this image as a clean version without watermarks, captions, "
+            "hashtags, usernames, or platform logos. Keep the composition and style."
+        )
+
+    # 模型轮换：nano-banana-pro 失败时换 flash 系列
+    models = [
+        IMAGE_MODEL,                          # gemini-3-pro-image-preview (nano-banana-pro)
+        "gemini-3.1-flash-image-preview",
+        "gemini-2.5-flash-image",
+    ]
    try:
-        img_bytes, _mode = _image_edit_call(frame_path, prompt, fallback_text=False, max_attempts=3)
+        img_bytes, _mode = _image_edit_call(
+            frame_path, prompt, models=models, fallback_text=False, max_attempts=3,
+        )
    except RuntimeError as e:
        raise HTTPException(500, f"cleanup failed: {e}")

@@ -1281,8 +1304,15 @@ def cutout_element(job_id: str, idx: int, element_id: str, req: CutoutReq | None
            f"Extract the {target} from this image as a standalone asset.{position_hint} "
            f"Place it on a {bg_phrase} background, isolated, no other objects."
        )
+    models = [
+        IMAGE_MODEL,
+        "gemini-3.1-flash-image-preview",
+        "gemini-2.5-flash-image",
+    ]
    try:
-        img_bytes, _mode = _image_edit_call(src, prompt, fallback_text=False, max_attempts=3)
+        img_bytes, _mode = _image_edit_call(
+            src, prompt, models=models, fallback_text=False, max_attempts=3,
+        )
    except RuntimeError as e:
        raise HTTPException(500, f"cutout failed: {e}")