diff --git a/.memory/worklog.json b/.memory/worklog.json
index 8c619e6..fe616cd 100644
--- a/.memory/worklog.json
+++ b/.memory/worklog.json
@@ -1483,6 +1483,19 @@
       "message": "auto-save 2026-05-13 12:40 (~4)",
       "hash": "95b1354",
       "files_changed": 4
+    },
+    {
+      "ts": "2026-05-13T12:46:27+08:00",
+      "type": "commit",
+      "message": "auto-save 2026-05-13 12:46 (~1)",
+      "hash": "fbbf3c7",
+      "files_changed": 1
+    },
+    {
+      "ts": "2026-05-13T04:47:38Z",
+      "type": "session-heartbeat",
+      "message": "Claude 会话活跃 · 最近命令：claude · 1 项未提交变更 · 最近提交：auto-save 2026-05-13 12:46 (~1)",
+      "files_changed": 1
     }
   ]
 }
diff --git a/api/main.py b/api/main.py
index a2c0ab5..1826533 100644
--- a/api/main.py
+++ b/api/main.py
@@ -477,6 +477,7 @@ def _image_edit_call(
     image_path: Path,
     prompt: str,
     model: str | None = None,
+    models: list[str] | None = None,
     fallback_text: bool = False,
     max_attempts: int = 3,
     max_side: int = 1024,
@@ -485,7 +486,8 @@ def _image_edit_call(
     返回 (image_bytes, effective_mode) where effective_mode in {"edit","text"}。
     失败 raise RuntimeError。
     输入图自动 resize 到 max_side（默认 1024）边长后再 base64，避免大图把 Gemini
-    function call 输入挤超阈值导致 incomplete_generation。"""
+    function call 输入挤超阈值导致 incomplete_generation。
+    models: 多模型轮换列表，重试时换 model；不传则单一 model 重试。"""
     import base64 as b64lib
     import io as _io
     import time as _time
@@ -493,7 +495,12 @@ def _image_edit_call(
     from PIL import Image as _PILImage
     if not LLM_API_KEY:
         raise RuntimeError("LLM_API_KEY 未配置")
-    model = model or IMAGE_MODEL
+    # model 优先级：models 列表 > 单个 model 参数 > IMAGE_MODEL
+    if models and len(models) > 0:
+        models_cycle = list(models)
+    else:
+        models_cycle = [model or IMAGE_MODEL]
+    model = models_cycle[0]
     # 缩到 max_side 内
     try:
         im = _PILImage.open(image_path)
@@ -516,6 +523,8 @@ def _image_edit_call(
     resp_data: dict = {}
     effective_mode = "edit"
     for attempt, current_mode in enumerate(plan):
+        # 多模型轮换：第 N 次重试用第 N 个 model（不够时用最后一个）
+        current_model = models_cycle[min(attempt, len(models_cycle) - 1)]
         try:
             if current_mode == "edit":
                 with httpx.Client(timeout=120) as client:
@@ -525,18 +534,19 @@ def _image_edit_call(
                             "Authorization": f"Bearer {LLM_API_KEY}",
                             "Content-Type": "application/json",
                         },
-                        json={"model": model, "prompt": prompt, "image": data_uri, "n": 1},
+                        json={"model": current_model, "prompt": prompt, "image": data_uri, "n": 1},
                     )
                     r.raise_for_status()
                     resp_data = r.json()
             else:
-                resp = llm().images.generate(model=model, prompt=prompt, n=1)
+                resp = llm().images.generate(model=current_model, prompt=prompt, n=1)
                 resp_data = resp.model_dump() if hasattr(resp, "model_dump") else {"data": [{"b64_json": resp.data[0].b64_json}]}
             if resp_data.get("data"):
                 effective_mode = current_mode
+                model = current_model  # 记录实际成功的 model
                 break
             err_obj = resp_data.get("error") or {}
-            last_err = f"empty data · {err_obj.get('code', '')} · {str(err_obj.get('message', ''))[:200]}"
+            last_err = f"empty data · {err_obj.get('code', '')} · {str(err_obj.get('message', ''))[:200]} · model={current_model}"
         except httpx.HTTPStatusError as e:
             body = e.response.text
             transient = (
@@ -545,17 +555,17 @@ def _image_edit_call(
                 or "rate_limit" in body
                 or "timeout" in body.lower()
             )
-            last_err = f"HTTP {e.response.status_code}: {body[:200]}"
+            last_err = f"HTTP {e.response.status_code}: {body[:200]} · model={current_model}"
             if not transient:
                 raise RuntimeError(f"image edit HTTP {e.response.status_code}: {body[:300]}")
         except Exception as e:
-            last_err = f"{type(e).__name__}: {e}"
+            last_err = f"{type(e).__name__}: {e} · model={current_model}"
 
         if attempt < len(plan) - 1:
-            next_mode = plan[attempt + 1]
-            tag = f"fallback → {next_mode}" if next_mode != current_mode else f"retry {attempt + 1}/{len(plan)}"
+            next_model = models_cycle[min(attempt + 1, len(models_cycle) - 1)]
+            tag = f"retry {attempt + 1}/{len(plan)} → {next_model}"
             print(f"[image edit {tag}] {last_err}", flush=True)
-            _time.sleep(1.5 * (attempt + 1))
+            _time.sleep(1.0)
 
     data_arr = resp_data.get("data", [])
     if not data_arr:
@@ -1045,22 +1055,35 @@ def cleanup_frame(job_id: str, idx: int, req: CleanupReq | None = None) -> Job:
             p = _region_to_phrase(r)
             if p:
                 region_phrases.append(p)
-    # 去重保序
     region_phrases = list(dict.fromkeys(region_phrases))
 
+    # prompt 用"重画一张副本"语义而非"erase / remove only X" — 避免 Gemini 走 mask/inpainting
+    # function call 路径（实测该路径在 SKG 网关上 100% 触发 incomplete_generation）
     if region_phrases:
         if len(region_phrases) == 1:
-            zones = f"the {region_phrases[0]} part"
+            zones = f"the {region_phrases[0]} area"
         else:
-            zones = "these parts: " + ", ".join(region_phrases)
+            zones = ", ".join(region_phrases) + " areas"
         prompt = (
-            f"Erase the text and graphics in {zones} of the image. "
-            "Keep all other parts unchanged."
+            f"Recreate this image as a clean version: remove the text and graphics in {zones}, "
+            "keep the rest of the scene identical."
         )
     else:
-        prompt = "Erase all watermarks and text overlays. Keep the scene natural."
+        prompt = (
+            "Recreate this image as a clean version without watermarks, captions, "
+            "hashtags, usernames, or platform logos. Keep the composition and style."
+        )
+
+    # 模型轮换：nano-banana-pro 失败时换 flash 系列
+    models = [
+        IMAGE_MODEL,                          # gemini-3-pro-image-preview (nano-banana-pro)
+        "gemini-3.1-flash-image-preview",
+        "gemini-2.5-flash-image",
+    ]
     try:
-        img_bytes, _mode = _image_edit_call(frame_path, prompt, fallback_text=False, max_attempts=3)
+        img_bytes, _mode = _image_edit_call(
+            frame_path, prompt, models=models, fallback_text=False, max_attempts=3,
+        )
     except RuntimeError as e:
         raise HTTPException(500, f"cleanup failed: {e}")
 
@@ -1281,8 +1304,15 @@ def cutout_element(job_id: str, idx: int, element_id: str, req: CutoutReq | None
             f"Extract the {target} from this image as a standalone asset.{position_hint} "
             f"Place it on a {bg_phrase} background, isolated, no other objects."
         )
+    models = [
+        IMAGE_MODEL,
+        "gemini-3.1-flash-image-preview",
+        "gemini-2.5-flash-image",
+    ]
     try:
-        img_bytes, _mode = _image_edit_call(src, prompt, fallback_text=False, max_attempts=3)
+        img_bytes, _mode = _image_edit_call(
+            src, prompt, models=models, fallback_text=False, max_attempts=3,
+        )
     except RuntimeError as e:
         raise HTTPException(500, f"cutout failed: {e}")