diff --git a/.memory/worklog.json b/.memory/worklog.json
index 5b3d04d..44ec84c 100644
--- a/.memory/worklog.json
+++ b/.memory/worklog.json
@@ -1,32 +1,5 @@
 {
   "entries": [
-    {
-      "files_changed": 3,
-      "hash": "ecc5894",
-      "message": "auto-save 2026-05-15 16:27 (~3)",
-      "ts": "2026-05-15T16:27:46+08:00",
-      "type": "commit"
-    },
-    {
-      "files_changed": 4,
-      "hash": "1336fc6",
-      "message": "auto-save 2026-05-15 16:33 (~4)",
-      "ts": "2026-05-15T16:33:17+08:00",
-      "type": "commit"
-    },
-    {
-      "files_changed": 1,
-      "message": "Codex 会话活跃 · 最近命令：codex · 1 项未提交变更 · 最近提交：auto-save 2026-05-15 16:33 (~4)",
-      "ts": "2026-05-15T08:34:47Z",
-      "type": "session-heartbeat"
-    },
-    {
-      "files_changed": 1,
-      "hash": "2ad7884",
-      "message": "auto-save 2026-05-15 16:38 (~1)",
-      "ts": "2026-05-15T16:38:48+08:00",
-      "type": "commit"
-    },
     {
       "files_changed": 2,
       "hash": "6fdcb47",
@@ -3255,6 +3228,32 @@
       "type": "session-heartbeat",
       "message": "Codex 会话活跃 · 最近命令：codex · 分支 main · 1 项未提交变更 · 最近提交：fix: force gpt image model",
       "files_changed": 1
+    },
+    {
+      "ts": "2026-05-18T06:33:24+08:00",
+      "type": "commit",
+      "message": "auto-save 2026-05-18 06:33 (~5)",
+      "hash": "18fd8c9",
+      "files_changed": 5
+    },
+    {
+      "ts": "2026-05-17T22:33:44Z",
+      "type": "session-heartbeat",
+      "message": "Codex 会话活跃 · 最近命令：codex · 分支 main · 1 项未提交变更 · 最近提交：auto-save 2026-05-18 06:33 (~5)",
+      "files_changed": 1
+    },
+    {
+      "ts": "2026-05-18T06:38:01+08:00",
+      "type": "commit",
+      "message": "feat: manage subject view thumbnails",
+      "hash": "58c3e5c",
+      "files_changed": 1
+    },
+    {
+      "ts": "2026-05-17T22:43:44Z",
+      "type": "session-heartbeat",
+      "message": "Codex 会话活跃 · 最近命令：codex · 分支 main · 2 项未提交变更 · 最近提交：feat: manage subject view thumbnails",
+      "files_changed": 2
     }
   ]
 }
diff --git a/api/main.py b/api/main.py
index 51debf3..5116a84 100644
--- a/api/main.py
+++ b/api/main.py
@@ -2540,6 +2540,42 @@ def start_audio_processing(job_id: str, manage_job_status: bool = True) -> bool:
     return True
 
 
+def _image_is_capacity_error(status_code: int, body: str) -> bool:
+    lower = body.lower()
+    return (
+        status_code == 429
+        or (
+            status_code in (500, 502, 503, 504)
+            and any(token in lower for token in ("saturated", "rate", "quota", "capacity", "overload", "timeout", "繁忙", "饱和", "过载"))
+        )
+    )
+
+
+def _image_retry_delay(attempt: int, status_code: int = 0, body: str = "", retry_after: str | None = None) -> float:
+    if retry_after:
+        try:
+            return max(1.0, min(60.0, float(retry_after)))
+        except ValueError:
+            pass
+    if _image_is_capacity_error(status_code, body):
+        return [6.0, 14.0, 30.0, 45.0][min(attempt, 3)]
+    return [1.0, 2.0, 4.0, 8.0][min(attempt, 3)]
+
+
+def _image_failure_message(kind: str, attempts: int, last_err: str, capacity_seen: bool) -> str:
+    if capacity_seen:
+        return (
+            f"{kind} failed after {attempts} attempts: gpt-image-2 上游负载饱和，"
+            f"已自动退避重试仍失败，请稍后点重试。最后错误：{last_err}"
+        )
+    return f"{kind} failed after {attempts} attempts: {last_err}"
+
+
+def _image_error_status(error: Exception) -> int:
+    msg = str(error)
+    return 503 if ("上游负载饱和" in msg or "HTTP 429" in msg or "saturated" in msg.lower()) else 500
+
+
 def _image_edit_call(
     image_path: Path,
     prompt: str,
@@ -2574,9 +2610,6 @@ def _image_edit_call(
     except Exception:
         # PIL 失败兜底走原文件
         img_bytes_in = image_path.read_bytes()
-    img_b64 = b64lib.b64encode(img_bytes_in).decode("ascii")
-    data_uri = f"data:image/jpeg;base64,{img_b64}"
-
     plan: list[str] = ["edit"] * max_attempts
     if fallback_text:
         plan.append("text")
@@ -2584,18 +2617,22 @@ def _image_edit_call(
     last_err = ""
     resp_data: dict = {}
     effective_mode = "edit"
+    capacity_seen = False
     for attempt, current_mode in enumerate(plan):
         current_model = models_cycle[min(attempt, len(models_cycle) - 1)]
+        status_code = 0
+        body = ""
+        retry_after: str | None = None
         try:
             if current_mode == "edit":
                 with httpx.Client(timeout=120) as client:
                     r = client.post(
-                        f"{IMAGE_BASE_URL}/images/generations",
+                        f"{IMAGE_BASE_URL}/images/edits",
                         headers={
                             "Authorization": f"Bearer {IMAGE_API_KEY}",
-                            "Content-Type": "application/json",
                         },
-                        json={"model": current_model, "prompt": prompt, "image": data_uri, "n": 1},
+                        data={"model": current_model, "prompt": prompt, "n": "1"},
+                        files={"image": ("reference.jpg", img_bytes_in, "image/jpeg")},
                     )
                     r.raise_for_status()
                     resp_data = r.json()
@@ -2610,23 +2647,32 @@ def _image_edit_call(
             last_err = f"empty data · {err_obj.get('code', '')} · {str(err_obj.get('message', ''))[:200]} · model={current_model}"
         except httpx.HTTPStatusError as e:
             body = e.response.text
-            sc = e.response.status_code
-            fatal = sc in (401, 403)
-            last_err = f"HTTP {sc}: {body[:200]} · model={current_model}"
+            status_code = e.response.status_code
+            retry_after = e.response.headers.get("retry-after")
+            capacity_seen = capacity_seen or _image_is_capacity_error(status_code, body)
+            fatal = status_code in (401, 403)
+            last_err = f"HTTP {status_code}: {body[:200]} · model={current_model}"
             if fatal:
-                raise RuntimeError(f"image edit HTTP {sc}: {body[:300]}")
+                raise RuntimeError(f"image edit HTTP {status_code}: {body[:300]}")
         except Exception as e:
             last_err = f"{type(e).__name__}: {e} · model={current_model}"
 
         if attempt < len(plan) - 1:
             tag = f"retry {attempt + 1}/{len(plan)} → {GPT_IMAGE_MODEL}"
-            print(f"[image edit {tag}] {last_err}", flush=True)
-            _time.sleep(1.0)
+            delay = _image_retry_delay(attempt, status_code, body, retry_after)
+            print(f"[image edit {tag}, sleep {delay:.0f}s] {last_err}", flush=True)
+            _time.sleep(delay)
 
     data_arr = resp_data.get("data", [])
     if not data_arr:
-        raise RuntimeError(f"image edit failed after {len(plan)} attempts: {last_err}")
-    b64 = data_arr[0].get("b64_json")
+        raise RuntimeError(_image_failure_message("image edit", len(plan), last_err, capacity_seen))
+    item = data_arr[0]
+    b64 = item.get("b64_json")
+    if not b64 and item.get("url"):
+        with httpx.Client(timeout=120) as client:
+            image_resp = client.get(item["url"])
+            image_resp.raise_for_status()
+            return image_resp.content, effective_mode
     if not b64:
         raise RuntimeError("image edit returned no b64_json")
     return b64lib.b64decode(b64), effective_mode
@@ -2646,8 +2692,11 @@ def _image_text_call(
     models_cycle = [GPT_IMAGE_MODEL]
     last_err = ""
     resp_data: dict = {}
+    capacity_seen = False
     for attempt in range(max_attempts):
         current_model = models_cycle[min(attempt, len(models_cycle) - 1)]
+        status_code = 0
+        body = ""
         try:
             resp = image_llm().images.generate(model=current_model, prompt=prompt, n=1)
             resp_data = resp.model_dump() if hasattr(resp, "model_dump") else {"data": [{"b64_json": resp.data[0].b64_json}]}
@@ -2659,10 +2708,14 @@ def _image_text_call(
             last_err = f"empty data · {err_obj.get('code', '')} · {str(err_obj.get('message', ''))[:200]} · model={current_model}"
         except Exception as e:
             last_err = f"{type(e).__name__}: {e} · model={current_model}"
+            body = str(e)
+            status_code = 429 if "429" in body or "saturated" in body.lower() or "饱和" in body else 0
+            capacity_seen = capacity_seen or _image_is_capacity_error(status_code, body)
         if attempt < max_attempts - 1:
-            print(f"[image text retry {attempt + 1}/{max_attempts} → {GPT_IMAGE_MODEL}] {last_err}", flush=True)
-            _time.sleep(1.0)
-    raise RuntimeError(f"image text failed after {max_attempts} attempts: {last_err}")
+            delay = _image_retry_delay(attempt, status_code, body)
+            print(f"[image text retry {attempt + 1}/{max_attempts} → {GPT_IMAGE_MODEL}, sleep {delay:.0f}s] {last_err}", flush=True)
+            _time.sleep(delay)
+    raise RuntimeError(_image_failure_message("image text", max_attempts, last_err, capacity_seen))
 
 
 # ---------- API 路由 ----------
@@ -3166,9 +3219,9 @@ def generate_image(job_id: str, idx: int, req: GenerateReq) -> Job:
     import time as _time
     import httpx
 
-    img_b64: str | None = None
+    img_bytes_in: bytes | None = None
     if req.mode == "edit":
-        img_b64 = b64lib.b64encode(reference_path.read_bytes()).decode("ascii")
+        img_bytes_in = reference_path.read_bytes()
 
     # 尝试 i2i 最多 3 次，全失败时降级 text-only 再试 1 次
     plan: list[str] = ([req.mode] * 3) if req.mode == "edit" else [req.mode]
@@ -3177,24 +3230,23 @@ def generate_image(job_id: str, idx: int, req: GenerateReq) -> Job:
     resp_data: dict = {}
     last_err = ""
     effective_mode = req.mode
+    capacity_seen = False
     for attempt, current_mode in enumerate(plan):
+        status_code = 0
+        body = ""
+        retry_after: str | None = None
         try:
             if current_mode == "edit":
-                data_uri = f"data:image/jpeg;base64,{img_b64}"
-                # OpenAI SDK 不直接支持 image 参数，用底层 httpx
+                if img_bytes_in is None:
+                    raise RuntimeError("edit mode reference image missing")
                 with httpx.Client(timeout=120) as client:
                     r = client.post(
-                        f"{IMAGE_BASE_URL}/images/generations",
+                        f"{IMAGE_BASE_URL}/images/edits",
                         headers={
                             "Authorization": f"Bearer {IMAGE_API_KEY}",
-                            "Content-Type": "application/json",
-                        },
-                        json={
-                            "model": model,
-                            "prompt": full_prompt,
-                            "image": data_uri,
-                            "n": 1,
                         },
+                        data={"model": model, "prompt": full_prompt, "n": "1"},
+                        files={"image": ("reference.jpg", img_bytes_in, "image/jpeg")},
                     )
                     r.raise_for_status()
                     resp_data = r.json()
@@ -3210,15 +3262,20 @@ def generate_image(job_id: str, idx: int, req: GenerateReq) -> Job:
             last_err = f"empty data · {err_obj.get('code', '')} · {str(err_obj.get('message', ''))[:200]}"
         except httpx.HTTPStatusError as e:
             body = e.response.text
+            status_code = e.response.status_code
+            retry_after = e.response.headers.get("retry-after")
+            capacity_seen = capacity_seen or _image_is_capacity_error(status_code, body)
             transient = (
-                e.response.status_code >= 500
+                status_code == 429
+                or status_code >= 500
                 or "incomplete_generation" in body
                 or "rate_limit" in body
                 or "timeout" in body.lower()
+                or _image_is_capacity_error(status_code, body)
             )
-            last_err = f"HTTP {e.response.status_code}: {body[:200]}"
+            last_err = f"HTTP {status_code}: {body[:200]}"
             if not transient:
-                raise HTTPException(500, f"image gen HTTP {e.response.status_code}: {body[:300]}")
+                raise HTTPException(500, f"image gen HTTP {status_code}: {body[:300]}")
         except Exception as e:
             last_err = f"{type(e).__name__}: {e}"
 
@@ -3226,22 +3283,29 @@ def generate_image(job_id: str, idx: int, req: GenerateReq) -> Job:
             next_mode = plan[attempt + 1]
             tag = f"fallback → {next_mode}" if next_mode != current_mode else f"retry {attempt + 1}/{len(plan)}"
             print(f"[image gen {tag}] {last_err}", flush=True)
-            _time.sleep(1.5 * (attempt + 1))
+            _time.sleep(_image_retry_delay(attempt, status_code, body, retry_after))
 
     data_arr = resp_data.get("data", [])
     if not data_arr:
-        raise HTTPException(500, f"image gen failed after {len(plan)} attempts: {last_err}")
+        raise HTTPException(503 if capacity_seen else 500, _image_failure_message("image gen", len(plan), last_err, capacity_seen))
 
     item = data_arr[0]
     b64 = item.get("b64_json")
-    if not b64:
+    if b64:
+        out_bytes = b64lib.b64decode(b64)
+    elif item.get("url"):
+        with httpx.Client(timeout=120) as client:
+            image_resp = client.get(item["url"])
+            image_resp.raise_for_status()
+            out_bytes = image_resp.content
+    else:
         raise HTTPException(500, "image gen returned no b64_json")
 
     # 保存到本地 jobs/<id>/gen/<idx>_<gen_id>.jpg
     gen_dir = job_dir(job_id) / "gen"
     gen_dir.mkdir(parents=True, exist_ok=True)
     out_path = gen_dir / f"{idx:03d}_{gen_id}.jpg"
-    out_path.write_bytes(b64lib.b64decode(b64))
+    out_path.write_bytes(out_bytes)
 
     new_gen = GeneratedImage(
         id=gen_id,
@@ -4073,7 +4137,7 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
             try:
                 img_bytes, _mode = _image_edit_call(model_src, prompt, models=models, fallback_text=False, max_attempts=3, max_side=1280)
             except RuntimeError as e:
-                raise HTTPException(500, f"subject asset {view} failed: {e}")
+                raise HTTPException(_image_error_status(e), f"subject asset {view} failed: {e}")
 
             asset_id = f"subject_{idx:03d}_{element_id}_{view}_{uuid.uuid4().hex[:8]}"
             out_path = job_dir(job_id) / "assets" / f"{asset_id}.jpg"
@@ -5073,9 +5137,9 @@ def generate_product_angle_asset(job_id: str, req: GenerateProductAngleAssetReq)
     )
     models = [GPT_IMAGE_MODEL]
     try:
-        img_bytes, _mode = _image_edit_call(source_path, prompt, models=models, fallback_text=False, max_attempts=3, max_side=1280)
+        img_bytes, _mode = _image_edit_call(source_path, prompt, models=models, fallback_text=False, max_attempts=5, max_side=1280)
     except RuntimeError as e:
-        raise HTTPException(500, f"product angle generation failed: {e}")
+        raise HTTPException(_image_error_status(e), f"product angle generation failed: {e}")
     asset_id = f"product_angle_{uuid.uuid4().hex[:10]}"
     out_path = job_dir(job_id) / "assets" / f"{asset_id}.jpg"
     _normalize_asset_image(img_bytes, out_path, source_path, "1024", "white", square=True, fill_subject=True)
diff --git a/web/lib/api.ts b/web/lib/api.ts
index c4b345b..c51065f 100644
--- a/web/lib/api.ts
+++ b/web/lib/api.ts
@@ -1,5 +1,16 @@
 const API_BASE = process.env.NEXT_PUBLIC_API_BASE ?? "http://localhost:4291"
 
+function apiError(prefix: string, status: number, text: string) {
+  let detail = text
+  try {
+    const parsed = JSON.parse(text)
+    detail = parsed?.detail || text
+  } catch {
+    detail = text
+  }
+  return new Error(`${prefix} ${status} ${String(detail).slice(0, 300)}`)
+}
+
 export type JobStatus =
   | "created"
   | "downloading"