feat: add Gemini image fallback circuit breaker
This commit is contained in:
6
RULES.md
6
RULES.md
@@ -73,9 +73,11 @@
|
|||||||
- `AUDIO_REWRITE_MODEL`:后续音频口播改写模型,默认跟随 `REWRITE_MODEL`;如果旧环境仍写 `gemini-*`,后端会自动改用 `REWRITE_MODEL`
|
- `AUDIO_REWRITE_MODEL`:后续音频口播改写模型,默认跟随 `REWRITE_MODEL`;如果旧环境仍写 `gemini-*`,后端会自动改用 `REWRITE_MODEL`
|
||||||
- `AUDIO_PRODUCT_BRIEF`:音频口播改写时注入的 SKG 产品卖点
|
- `AUDIO_PRODUCT_BRIEF`:音频口播改写时注入的 SKG 产品卖点
|
||||||
- `PRODUCT_VIEW_MODEL`:同一产品素材池的视角标注/自动识别模型;当前按项目要求强制使用 `gpt-image-2`
|
- `PRODUCT_VIEW_MODEL`:同一产品素材池的视角标注/自动识别模型;当前按项目要求强制使用 `gpt-image-2`
|
||||||
- `IMAGE_BASE_URL` / `IMAGE_API_KEY` / `IMAGE_MODEL`:OpenAI 兼容生图网关;当前所有生图入口一律强制使用 `gpt-image-2`,不做其他图片模型 fallback
|
- `IMAGE_BASE_URL` / `IMAGE_API_KEY` / `IMAGE_MODEL`:OpenAI 兼容生图网关;当前所有生图入口主模型仍为 `gpt-image-2`
|
||||||
- `IMAGE_REQUEST_TIMEOUT_SECONDS`:单次图片网关请求超时,默认 60 秒;超时会直接把该视图标失败并继续下一张,避免主体 6 视图整包长时间无反馈
|
- `IMAGE_REQUEST_TIMEOUT_SECONDS`:单次图片网关请求超时,默认 60 秒;超时会直接把该视图标失败并继续下一张,避免主体 6 视图整包长时间无反馈
|
||||||
- `GPT_IMAGE_MODEL` / `SUBJECT_ASSET_IMAGE_MODEL` / `SUBJECT_ASSET_IMAGE_MODELS`:保留兼容旧环境变量名,但服务端会强制主体 6 视图和所有其他生图入口都只使用 `gpt-image-2`
|
- `IMAGE_FALLBACK_ENABLED` / `IMAGE_FALLBACK_MODEL`:图片主模型故障兜底;当前允许在 `gpt-image-2` 超时、429、5xx 或网络错误时临时使用 `gemini-3-pro-image-preview`,400/401/403/404 和参数错误不兜底
|
||||||
|
- `IMAGE_CIRCUIT_FAILURE_THRESHOLD` / `IMAGE_CIRCUIT_COOLDOWN_SECONDS`:短时熔断配置,默认 `gpt-image-2` 连续 2 次上游类失败后 600 秒内直接走 Gemini 兜底;成功恢复后自动清空失败计数
|
||||||
|
- `GPT_IMAGE_MODEL` / `SUBJECT_ASSET_IMAGE_MODEL` / `SUBJECT_ASSET_IMAGE_MODELS`:保留兼容旧环境变量名;主体 6 视图先用 `gpt-image-2`,同一套图内一旦触发 Gemini 兜底,后续视图沿用 Gemini,避免一张张等待主模型超时
|
||||||
- `AI_HTTP_PROXY` / `IMAGE_HTTP_PROXY`:可选的 AI 网关出站代理;本地 launchd 后台进程不一定继承 shell 的 `http_proxy/https_proxy`,如生图报 DNS / ConnectError,可在本地 `api/.env` 配置后重启后端。`/health` 只回传是否配置代理,不回传代理地址。
|
- `AI_HTTP_PROXY` / `IMAGE_HTTP_PROXY`:可选的 AI 网关出站代理;本地 launchd 后台进程不一定继承 shell 的 `http_proxy/https_proxy`,如生图报 DNS / ConnectError,可在本地 `api/.env` 配置后重启后端。`/health` 只回传是否配置代理,不回传代理地址。
|
||||||
- `YTDLP_COOKIES_FILE` / `YTDLP_COOKIES_FROM_BROWSER`:可选 TikTok 下载登录态;生产云端固定使用 cookies 文件 `/run/secrets/tiktok_cookies.txt`(宿主机 `./secrets/tiktok_cookies.txt` 挂载进容器),本地开发可临时用浏览器 cookies。cookies 文件属于敏感登录态,只能放本机或服务器私有路径,不允许入库。
|
- `YTDLP_COOKIES_FILE` / `YTDLP_COOKIES_FROM_BROWSER`:可选 TikTok 下载登录态;生产云端固定使用 cookies 文件 `/run/secrets/tiktok_cookies.txt`(宿主机 `./secrets/tiktok_cookies.txt` 挂载进容器),本地开发可临时用浏览器 cookies。cookies 文件属于敏感登录态,只能放本机或服务器私有路径,不允许入库。
|
||||||
- `VOICE_PROVIDER`:配音通道,服务端固定使用 `azure_openai`;旧环境若写 `minimax` 会被忽略
|
- `VOICE_PROVIDER`:配音通道,服务端固定使用 `azure_openai`;旧环境若写 `minimax` 会被忽略
|
||||||
|
|||||||
@@ -25,9 +25,13 @@ IMAGE_BASE_URL=https://ai.skg.com/ezlink/v1
|
|||||||
IMAGE_API_KEY=
|
IMAGE_API_KEY=
|
||||||
IMAGE_MODEL=gpt-image-2
|
IMAGE_MODEL=gpt-image-2
|
||||||
IMAGE_REQUEST_TIMEOUT_SECONDS=60
|
IMAGE_REQUEST_TIMEOUT_SECONDS=60
|
||||||
|
IMAGE_FALLBACK_ENABLED=true
|
||||||
|
IMAGE_FALLBACK_MODEL=gemini-3-pro-image-preview
|
||||||
|
IMAGE_CIRCUIT_FAILURE_THRESHOLD=2
|
||||||
|
IMAGE_CIRCUIT_COOLDOWN_SECONDS=600
|
||||||
GPT_IMAGE_MODEL=gpt-image-2
|
GPT_IMAGE_MODEL=gpt-image-2
|
||||||
SUBJECT_ASSET_IMAGE_MODEL=gpt-image-2
|
SUBJECT_ASSET_IMAGE_MODEL=gpt-image-2
|
||||||
SUBJECT_ASSET_IMAGE_MODELS=gpt-image-2
|
SUBJECT_ASSET_IMAGE_MODELS=gpt-image-2,gemini-3-pro-image-preview
|
||||||
# 可选:本地网络需要代理访问 ai.skg.com 时配置;launchd 不一定继承 shell 代理变量。
|
# 可选:本地网络需要代理访问 ai.skg.com 时配置;launchd 不一定继承 shell 代理变量。
|
||||||
AI_HTTP_PROXY=
|
AI_HTTP_PROXY=
|
||||||
YTDLP_COOKIES_FILE=
|
YTDLP_COOKIES_FILE=
|
||||||
|
|||||||
202
api/main.py
202
api/main.py
@@ -97,14 +97,24 @@ AI_HTTP_PROXY = (
|
|||||||
or os.getenv("http_proxy")
|
or os.getenv("http_proxy")
|
||||||
or ""
|
or ""
|
||||||
).strip()
|
).strip()
|
||||||
# Product decision: every image-generation/editing path is locked to gpt-image-2.
|
# Product decision: gpt-image-2 remains the primary image model. Gemini is only
|
||||||
# Environment variables may still choose the gateway URL/key, but not the model.
|
# allowed as an outage fallback when the primary gateway times out or returns
|
||||||
|
# transient upstream failures.
|
||||||
GPT_IMAGE_MODEL = "gpt-image-2"
|
GPT_IMAGE_MODEL = "gpt-image-2"
|
||||||
|
IMAGE_FALLBACK_MODEL = os.getenv("IMAGE_FALLBACK_MODEL", "gemini-3-pro-image-preview").strip() or ""
|
||||||
|
IMAGE_FALLBACK_ENABLED = os.getenv("IMAGE_FALLBACK_ENABLED", "true").strip().lower() not in {"0", "false", "no", "off"}
|
||||||
IMAGE_MODEL = GPT_IMAGE_MODEL
|
IMAGE_MODEL = GPT_IMAGE_MODEL
|
||||||
PRODUCT_VIEW_MODEL = GPT_IMAGE_MODEL
|
PRODUCT_VIEW_MODEL = GPT_IMAGE_MODEL
|
||||||
SUBJECT_ASSET_IMAGE_MODEL = GPT_IMAGE_MODEL
|
SUBJECT_ASSET_IMAGE_MODEL = GPT_IMAGE_MODEL
|
||||||
SUBJECT_ASSET_IMAGE_MODELS = [GPT_IMAGE_MODEL]
|
SUBJECT_ASSET_IMAGE_MODELS = [GPT_IMAGE_MODEL] + (
|
||||||
|
[IMAGE_FALLBACK_MODEL] if IMAGE_FALLBACK_ENABLED and IMAGE_FALLBACK_MODEL and IMAGE_FALLBACK_MODEL != GPT_IMAGE_MODEL else []
|
||||||
|
)
|
||||||
IMAGE_REQUEST_TIMEOUT_SECONDS = max(15, min(180, int(os.getenv("IMAGE_REQUEST_TIMEOUT_SECONDS", "60"))))
|
IMAGE_REQUEST_TIMEOUT_SECONDS = max(15, min(180, int(os.getenv("IMAGE_REQUEST_TIMEOUT_SECONDS", "60"))))
|
||||||
|
IMAGE_CIRCUIT_FAILURE_THRESHOLD = max(1, int(os.getenv("IMAGE_CIRCUIT_FAILURE_THRESHOLD", "2")))
|
||||||
|
IMAGE_CIRCUIT_COOLDOWN_SECONDS = max(60, int(os.getenv("IMAGE_CIRCUIT_COOLDOWN_SECONDS", "600")))
|
||||||
|
_IMAGE_CIRCUIT_LOCK = threading.Lock()
|
||||||
|
_IMAGE_PRIMARY_FAILURES = 0
|
||||||
|
_IMAGE_PRIMARY_OPEN_UNTIL = 0.0
|
||||||
PRODUCT_ASSET_MAX_SIDE = max(1024, int(os.getenv("PRODUCT_ASSET_MAX_SIDE", "1600")))
|
PRODUCT_ASSET_MAX_SIDE = max(1024, int(os.getenv("PRODUCT_ASSET_MAX_SIDE", "1600")))
|
||||||
PRODUCT_ASSET_MIN_LONG_SIDE = max(512, int(os.getenv("PRODUCT_ASSET_MIN_LONG_SIDE", "900")))
|
PRODUCT_ASSET_MIN_LONG_SIDE = max(512, int(os.getenv("PRODUCT_ASSET_MIN_LONG_SIDE", "900")))
|
||||||
PRODUCT_ASSET_MIN_SHORT_SIDE = max(320, int(os.getenv("PRODUCT_ASSET_MIN_SHORT_SIDE", "600")))
|
PRODUCT_ASSET_MIN_SHORT_SIDE = max(320, int(os.getenv("PRODUCT_ASSET_MIN_SHORT_SIDE", "600")))
|
||||||
@@ -3511,6 +3521,83 @@ def _image_is_transport_error(message: str) -> bool:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _image_fallback_models() -> list[str]:
|
||||||
|
if not IMAGE_FALLBACK_ENABLED or not IMAGE_FALLBACK_MODEL or IMAGE_FALLBACK_MODEL == GPT_IMAGE_MODEL:
|
||||||
|
return []
|
||||||
|
return [IMAGE_FALLBACK_MODEL]
|
||||||
|
|
||||||
|
|
||||||
|
def _image_circuit_snapshot() -> dict:
|
||||||
|
now = time.time()
|
||||||
|
with _IMAGE_CIRCUIT_LOCK:
|
||||||
|
open_until = _IMAGE_PRIMARY_OPEN_UNTIL
|
||||||
|
return {
|
||||||
|
"primary": GPT_IMAGE_MODEL,
|
||||||
|
"fallbacks": _image_fallback_models(),
|
||||||
|
"failure_threshold": IMAGE_CIRCUIT_FAILURE_THRESHOLD,
|
||||||
|
"cooldown_seconds": IMAGE_CIRCUIT_COOLDOWN_SECONDS,
|
||||||
|
"primary_failures": _IMAGE_PRIMARY_FAILURES,
|
||||||
|
"primary_open": open_until > now,
|
||||||
|
"primary_open_until": open_until if open_until > now else 0,
|
||||||
|
"primary_open_remaining_seconds": max(0, int(open_until - now)),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _image_primary_circuit_open() -> bool:
|
||||||
|
return _image_circuit_snapshot()["primary_open"]
|
||||||
|
|
||||||
|
|
||||||
|
def _image_model_candidates(force_fallback: bool = False) -> list[str]:
|
||||||
|
fallbacks = _image_fallback_models()
|
||||||
|
if not fallbacks:
|
||||||
|
return [GPT_IMAGE_MODEL]
|
||||||
|
if force_fallback or _image_primary_circuit_open():
|
||||||
|
return fallbacks
|
||||||
|
return [GPT_IMAGE_MODEL, *fallbacks]
|
||||||
|
|
||||||
|
|
||||||
|
def _image_failure_can_fallback(status_code: int, body: str, last_err: str) -> bool:
|
||||||
|
if status_code in (400, 401, 403, 404):
|
||||||
|
return False
|
||||||
|
return (
|
||||||
|
status_code == 429
|
||||||
|
or status_code >= 500
|
||||||
|
or _image_is_capacity_error(status_code, body)
|
||||||
|
or _image_is_transport_error(last_err)
|
||||||
|
or "timeout" in (body or "").lower()
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _image_record_primary_success() -> None:
|
||||||
|
global _IMAGE_PRIMARY_FAILURES, _IMAGE_PRIMARY_OPEN_UNTIL
|
||||||
|
with _IMAGE_CIRCUIT_LOCK:
|
||||||
|
if _IMAGE_PRIMARY_FAILURES or _IMAGE_PRIMARY_OPEN_UNTIL:
|
||||||
|
print(f"[image circuit] primary {GPT_IMAGE_MODEL} recovered", flush=True)
|
||||||
|
_IMAGE_PRIMARY_FAILURES = 0
|
||||||
|
_IMAGE_PRIMARY_OPEN_UNTIL = 0.0
|
||||||
|
|
||||||
|
|
||||||
|
def _image_record_primary_failure(reason: str) -> None:
|
||||||
|
global _IMAGE_PRIMARY_FAILURES, _IMAGE_PRIMARY_OPEN_UNTIL
|
||||||
|
if not _image_fallback_models():
|
||||||
|
return
|
||||||
|
with _IMAGE_CIRCUIT_LOCK:
|
||||||
|
_IMAGE_PRIMARY_FAILURES += 1
|
||||||
|
if _IMAGE_PRIMARY_FAILURES >= IMAGE_CIRCUIT_FAILURE_THRESHOLD:
|
||||||
|
_IMAGE_PRIMARY_OPEN_UNTIL = time.time() + IMAGE_CIRCUIT_COOLDOWN_SECONDS
|
||||||
|
print(
|
||||||
|
f"[image circuit] primary {GPT_IMAGE_MODEL} opened for {IMAGE_CIRCUIT_COOLDOWN_SECONDS}s "
|
||||||
|
f"after {_IMAGE_PRIMARY_FAILURES} failures; fallback={IMAGE_FALLBACK_MODEL}; reason={reason[:220]}",
|
||||||
|
flush=True,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
print(
|
||||||
|
f"[image circuit] primary {GPT_IMAGE_MODEL} failure {_IMAGE_PRIMARY_FAILURES}/{IMAGE_CIRCUIT_FAILURE_THRESHOLD}; "
|
||||||
|
f"fallback={IMAGE_FALLBACK_MODEL}; reason={reason[:220]}",
|
||||||
|
flush=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _image_failure_message(kind: str, attempts: int, last_err: str, capacity_seen: bool) -> str:
|
def _image_failure_message(kind: str, attempts: int, last_err: str, capacity_seen: bool) -> str:
|
||||||
if capacity_seen:
|
if capacity_seen:
|
||||||
return (
|
return (
|
||||||
@@ -3604,36 +3691,37 @@ def _image_edit_call(
|
|||||||
fallback_text: bool = False,
|
fallback_text: bool = False,
|
||||||
max_attempts: int = 3,
|
max_attempts: int = 3,
|
||||||
max_side: int = 1024,
|
max_side: int = 1024,
|
||||||
|
force_fallback_model: bool = False,
|
||||||
) -> tuple[bytes, str]:
|
) -> tuple[bytes, str]:
|
||||||
"""通用 image edit 调用 · 失败重试 + 可选 text fallback。
|
"""通用 image edit 调用 · 失败重试 + 可选 text fallback。
|
||||||
返回 (image_bytes, effective_mode) where effective_mode in {"edit","text"}。
|
返回 (image_bytes, effective_mode) where effective_mode in {"edit","text"}。
|
||||||
失败 raise RuntimeError。
|
失败 raise RuntimeError。
|
||||||
输入图自动 resize 到 max_side(默认 1024)边长后再用 multipart 上传;多参考图使用 image[]。
|
输入图自动 resize 到 max_side(默认 1024)边长后再用 multipart 上传;多参考图使用 image[]。
|
||||||
生图模型按产品规则强制使用 gpt-image-2;model/models 参数只保留兼容旧调用。"""
|
生图模型主路径使用 gpt-image-2;Gemini 只在主模型上游异常时兜底。model/models 参数只保留兼容旧调用。"""
|
||||||
import base64 as b64lib
|
import base64 as b64lib
|
||||||
import time as _time
|
import time as _time
|
||||||
import httpx
|
import httpx
|
||||||
if not IMAGE_API_KEY:
|
if not IMAGE_API_KEY:
|
||||||
raise RuntimeError("IMAGE_API_KEY 或 LLM_API_KEY 未配置")
|
raise RuntimeError("IMAGE_API_KEY 或 LLM_API_KEY 未配置")
|
||||||
models_cycle = [GPT_IMAGE_MODEL]
|
|
||||||
model = GPT_IMAGE_MODEL
|
model = GPT_IMAGE_MODEL
|
||||||
image_paths = image_path if isinstance(image_path, list) else [image_path]
|
image_paths = image_path if isinstance(image_path, list) else [image_path]
|
||||||
image_paths = [path for path in image_paths if path and path.exists()][:10]
|
image_paths = [path for path in image_paths if path and path.exists()][:10]
|
||||||
if not image_paths:
|
if not image_paths:
|
||||||
raise RuntimeError("image edit reference image missing")
|
raise RuntimeError("image edit reference image missing")
|
||||||
img_bytes_list = [_prepare_image_edit_bytes(path, max_side) for path in image_paths]
|
img_bytes_list = [_prepare_image_edit_bytes(path, max_side) for path in image_paths]
|
||||||
plan: list[str] = ["edit"] * max_attempts
|
model_candidates = _image_model_candidates(force_fallback=force_fallback_model)
|
||||||
|
mode_plan: list[str] = ["edit"] if model_candidates != [GPT_IMAGE_MODEL] else ["edit"] * max_attempts
|
||||||
if fallback_text:
|
if fallback_text:
|
||||||
plan.append("text")
|
mode_plan.append("text")
|
||||||
|
attempt_steps = [(current_mode, current_model) for current_mode in mode_plan for current_model in model_candidates]
|
||||||
|
|
||||||
last_err = ""
|
last_err = ""
|
||||||
resp_data: dict = {}
|
resp_data: dict = {}
|
||||||
effective_mode = "edit"
|
effective_mode = "edit"
|
||||||
capacity_seen = False
|
capacity_seen = False
|
||||||
attempts_done = 0
|
attempts_done = 0
|
||||||
for attempt, current_mode in enumerate(plan):
|
for attempt, (current_mode, current_model) in enumerate(attempt_steps):
|
||||||
attempts_done = attempt + 1
|
attempts_done = attempt + 1
|
||||||
current_model = models_cycle[min(attempt, len(models_cycle) - 1)]
|
|
||||||
status_code = 0
|
status_code = 0
|
||||||
body = ""
|
body = ""
|
||||||
retry_after: str | None = None
|
retry_after: str | None = None
|
||||||
@@ -3660,8 +3748,10 @@ def _image_edit_call(
|
|||||||
else:
|
else:
|
||||||
resp_data = _image_generation_response(prompt, current_model)
|
resp_data = _image_generation_response(prompt, current_model)
|
||||||
if resp_data.get("data"):
|
if resp_data.get("data"):
|
||||||
effective_mode = current_mode
|
effective_mode = f"{current_mode}:{current_model}"
|
||||||
model = current_model # 记录实际成功的 model
|
model = current_model # 记录实际成功的 model
|
||||||
|
if current_model == GPT_IMAGE_MODEL:
|
||||||
|
_image_record_primary_success()
|
||||||
break
|
break
|
||||||
err_obj = resp_data.get("error") or {}
|
err_obj = resp_data.get("error") or {}
|
||||||
last_err = f"empty data · {err_obj.get('code', '')} · {str(err_obj.get('message', ''))[:200]} · model={current_model}"
|
last_err = f"empty data · {err_obj.get('code', '')} · {str(err_obj.get('message', ''))[:200]} · model={current_model}"
|
||||||
@@ -3677,9 +3767,15 @@ def _image_edit_call(
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
last_err = f"{type(e).__name__}: {e} · model={current_model}"
|
last_err = f"{type(e).__name__}: {e} · model={current_model}"
|
||||||
|
|
||||||
next_mode_changed = attempt < len(plan) - 1 and plan[attempt + 1] != current_mode
|
fallbackable = current_model == GPT_IMAGE_MODEL and _image_failure_can_fallback(status_code, body, last_err)
|
||||||
if _image_should_retry(attempt, len(plan), status_code, body, last_err, next_mode_changed):
|
if fallbackable:
|
||||||
tag = f"retry {attempt + 1}/{len(plan)} → {GPT_IMAGE_MODEL}"
|
_image_record_primary_failure(last_err)
|
||||||
|
if any(next_model != GPT_IMAGE_MODEL for _next_mode, next_model in attempt_steps[attempt + 1:]):
|
||||||
|
print(f"[image edit fallback → {IMAGE_FALLBACK_MODEL}] {last_err}", flush=True)
|
||||||
|
continue
|
||||||
|
next_mode_changed = attempt < len(attempt_steps) - 1 and attempt_steps[attempt + 1][0] != current_mode
|
||||||
|
if _image_should_retry(attempt, len(attempt_steps), status_code, body, last_err, next_mode_changed):
|
||||||
|
tag = f"retry {attempt + 1}/{len(attempt_steps)} → {current_model}"
|
||||||
delay = _image_retry_delay(attempt, status_code, body, retry_after)
|
delay = _image_retry_delay(attempt, status_code, body, retry_after)
|
||||||
print(f"[image edit {tag}, sleep {delay:.0f}s] {last_err}", flush=True)
|
print(f"[image edit {tag}, sleep {delay:.0f}s] {last_err}", flush=True)
|
||||||
_time.sleep(delay)
|
_time.sleep(delay)
|
||||||
@@ -3706,20 +3802,21 @@ def _image_text_call(
|
|||||||
model: str | None = None,
|
model: str | None = None,
|
||||||
models: list[str] | None = None,
|
models: list[str] | None = None,
|
||||||
max_attempts: int = 3,
|
max_attempts: int = 3,
|
||||||
|
force_fallback_model: bool = False,
|
||||||
) -> tuple[bytes, str]:
|
) -> tuple[bytes, str]:
|
||||||
"""Text-only image generation. 生图模型强制使用 gpt-image-2。"""
|
"""Text-only image generation. gpt-image-2 primary, Gemini only as outage fallback."""
|
||||||
import base64 as b64lib
|
import base64 as b64lib
|
||||||
import time as _time
|
import time as _time
|
||||||
import httpx
|
import httpx
|
||||||
if not IMAGE_API_KEY:
|
if not IMAGE_API_KEY:
|
||||||
raise RuntimeError("IMAGE_API_KEY 或 LLM_API_KEY 未配置")
|
raise RuntimeError("IMAGE_API_KEY 或 LLM_API_KEY 未配置")
|
||||||
models_cycle = [GPT_IMAGE_MODEL]
|
candidates = _image_model_candidates(force_fallback=force_fallback_model)
|
||||||
|
attempt_models = candidates if candidates != [GPT_IMAGE_MODEL] else [GPT_IMAGE_MODEL] * max_attempts
|
||||||
last_err = ""
|
last_err = ""
|
||||||
capacity_seen = False
|
capacity_seen = False
|
||||||
attempts_done = 0
|
attempts_done = 0
|
||||||
for attempt in range(max_attempts):
|
for attempt, current_model in enumerate(attempt_models):
|
||||||
attempts_done = attempt + 1
|
attempts_done = attempt + 1
|
||||||
current_model = models_cycle[min(attempt, len(models_cycle) - 1)]
|
|
||||||
status_code = 0
|
status_code = 0
|
||||||
body = ""
|
body = ""
|
||||||
retry_after: str | None = None
|
retry_after: str | None = None
|
||||||
@@ -3729,12 +3826,16 @@ def _image_text_call(
|
|||||||
item = resp_data["data"][0]
|
item = resp_data["data"][0]
|
||||||
b64 = item.get("b64_json")
|
b64 = item.get("b64_json")
|
||||||
if b64:
|
if b64:
|
||||||
return b64lib.b64decode(b64), "text"
|
if current_model == GPT_IMAGE_MODEL:
|
||||||
|
_image_record_primary_success()
|
||||||
|
return b64lib.b64decode(b64), f"text:{current_model}"
|
||||||
if item.get("url"):
|
if item.get("url"):
|
||||||
with ai_http_client(timeout=IMAGE_REQUEST_TIMEOUT_SECONDS) as client:
|
with ai_http_client(timeout=IMAGE_REQUEST_TIMEOUT_SECONDS) as client:
|
||||||
image_resp = client.get(item["url"])
|
image_resp = client.get(item["url"])
|
||||||
image_resp.raise_for_status()
|
image_resp.raise_for_status()
|
||||||
return image_resp.content, "text"
|
if current_model == GPT_IMAGE_MODEL:
|
||||||
|
_image_record_primary_success()
|
||||||
|
return image_resp.content, f"text:{current_model}"
|
||||||
err_obj = resp_data.get("error") or {}
|
err_obj = resp_data.get("error") or {}
|
||||||
last_err = f"empty data · {err_obj.get('code', '')} · {str(err_obj.get('message', ''))[:200]} · model={current_model}"
|
last_err = f"empty data · {err_obj.get('code', '')} · {str(err_obj.get('message', ''))[:200]} · model={current_model}"
|
||||||
except httpx.HTTPStatusError as e:
|
except httpx.HTTPStatusError as e:
|
||||||
@@ -3748,9 +3849,15 @@ def _image_text_call(
|
|||||||
body = str(e)
|
body = str(e)
|
||||||
status_code = 429 if "429" in body or "saturated" in body.lower() or "饱和" in body else 0
|
status_code = 429 if "429" in body or "saturated" in body.lower() or "饱和" in body else 0
|
||||||
capacity_seen = capacity_seen or _image_is_capacity_error(status_code, body)
|
capacity_seen = capacity_seen or _image_is_capacity_error(status_code, body)
|
||||||
if _image_should_retry(attempt, max_attempts, status_code, body, last_err):
|
fallbackable = current_model == GPT_IMAGE_MODEL and _image_failure_can_fallback(status_code, body, last_err)
|
||||||
|
if fallbackable:
|
||||||
|
_image_record_primary_failure(last_err)
|
||||||
|
if any(next_model != GPT_IMAGE_MODEL for next_model in attempt_models[attempt + 1:]):
|
||||||
|
print(f"[image text fallback → {IMAGE_FALLBACK_MODEL}] {last_err}", flush=True)
|
||||||
|
continue
|
||||||
|
if _image_should_retry(attempt, len(attempt_models), status_code, body, last_err):
|
||||||
delay = _image_retry_delay(attempt, status_code, body, retry_after)
|
delay = _image_retry_delay(attempt, status_code, body, retry_after)
|
||||||
print(f"[image text retry {attempt + 1}/{max_attempts} → {GPT_IMAGE_MODEL}, sleep {delay:.0f}s] {last_err}", flush=True)
|
print(f"[image text retry {attempt + 1}/{len(attempt_models)} → {current_model}, sleep {delay:.0f}s] {last_err}", flush=True)
|
||||||
_time.sleep(delay)
|
_time.sleep(delay)
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
@@ -4116,7 +4223,8 @@ def health() -> dict:
|
|||||||
"image_base_url": IMAGE_BASE_URL or LLM_BASE_URL or "openai-default",
|
"image_base_url": IMAGE_BASE_URL or LLM_BASE_URL or "openai-default",
|
||||||
"image_request_timeout_seconds": IMAGE_REQUEST_TIMEOUT_SECONDS,
|
"image_request_timeout_seconds": IMAGE_REQUEST_TIMEOUT_SECONDS,
|
||||||
"ai_proxy_configured": bool(AI_HTTP_PROXY),
|
"ai_proxy_configured": bool(AI_HTTP_PROXY),
|
||||||
"image_fallbacks": [GPT_IMAGE_MODEL],
|
"image_fallbacks": _image_fallback_models(),
|
||||||
|
"image_circuit": _image_circuit_snapshot(),
|
||||||
"subject_image": SUBJECT_ASSET_IMAGE_MODEL,
|
"subject_image": SUBJECT_ASSET_IMAGE_MODEL,
|
||||||
"subject_image_fallbacks": SUBJECT_ASSET_IMAGE_MODELS,
|
"subject_image_fallbacks": SUBJECT_ASSET_IMAGE_MODELS,
|
||||||
"voice_provider": VOICE_PROVIDER,
|
"voice_provider": VOICE_PROVIDER,
|
||||||
@@ -4447,16 +4555,18 @@ def generate_image(job_id: str, idx: int, req: GenerateReq) -> Job:
|
|||||||
if req.mode == "edit":
|
if req.mode == "edit":
|
||||||
img_bytes_in = reference_path.read_bytes()
|
img_bytes_in = reference_path.read_bytes()
|
||||||
|
|
||||||
# 尝试 i2i 最多 3 次,全失败时降级 text-only 再试 1 次
|
# 尝试 i2i;主模型上游异常时允许 Gemini 兜底。无兜底时保留旧的多次重试。
|
||||||
plan: list[str] = ([req.mode] * 3) if req.mode == "edit" else [req.mode]
|
model_candidates = _image_model_candidates()
|
||||||
|
plan: list[str] = ([req.mode] if model_candidates != [GPT_IMAGE_MODEL] else [req.mode] * 3) if req.mode == "edit" else [req.mode]
|
||||||
if req.mode == "edit":
|
if req.mode == "edit":
|
||||||
plan.append("text") # i2i 都失败时自动降级
|
plan.append("text") # i2i 都失败时自动降级
|
||||||
|
attempt_steps = [(current_mode, current_model) for current_mode in plan for current_model in model_candidates]
|
||||||
resp_data: dict = {}
|
resp_data: dict = {}
|
||||||
last_err = ""
|
last_err = ""
|
||||||
effective_mode = req.mode
|
effective_mode = req.mode
|
||||||
capacity_seen = False
|
capacity_seen = False
|
||||||
attempts_done = 0
|
attempts_done = 0
|
||||||
for attempt, current_mode in enumerate(plan):
|
for attempt, (current_mode, current_model) in enumerate(attempt_steps):
|
||||||
attempts_done = attempt + 1
|
attempts_done = attempt + 1
|
||||||
status_code = 0
|
status_code = 0
|
||||||
body = ""
|
body = ""
|
||||||
@@ -4471,20 +4581,23 @@ def generate_image(job_id: str, idx: int, req: GenerateReq) -> Job:
|
|||||||
headers={
|
headers={
|
||||||
"Authorization": f"Bearer {IMAGE_API_KEY}",
|
"Authorization": f"Bearer {IMAGE_API_KEY}",
|
||||||
},
|
},
|
||||||
data={"model": model, "prompt": full_prompt, "n": "1"},
|
data={"model": current_model, "prompt": full_prompt, "n": "1"},
|
||||||
files={"image": ("reference.jpg", img_bytes_in, "image/jpeg")},
|
files={"image": ("reference.jpg", img_bytes_in, "image/jpeg")},
|
||||||
)
|
)
|
||||||
r.raise_for_status()
|
r.raise_for_status()
|
||||||
resp_data = r.json()
|
resp_data = r.json()
|
||||||
else:
|
else:
|
||||||
# text-only
|
# text-only
|
||||||
resp_data = _image_generation_response(full_prompt, model)
|
resp_data = _image_generation_response(full_prompt, current_model)
|
||||||
|
|
||||||
if resp_data.get("data"):
|
if resp_data.get("data"):
|
||||||
effective_mode = current_mode
|
effective_mode = f"{current_mode}:{current_model}"
|
||||||
|
model = current_model
|
||||||
|
if current_model == GPT_IMAGE_MODEL:
|
||||||
|
_image_record_primary_success()
|
||||||
break
|
break
|
||||||
err_obj = resp_data.get("error") or {}
|
err_obj = resp_data.get("error") or {}
|
||||||
last_err = f"empty data · {err_obj.get('code', '')} · {str(err_obj.get('message', ''))[:200]}"
|
last_err = f"empty data · {err_obj.get('code', '')} · {str(err_obj.get('message', ''))[:200]} · model={current_model}"
|
||||||
except httpx.HTTPStatusError as e:
|
except httpx.HTTPStatusError as e:
|
||||||
body = e.response.text
|
body = e.response.text
|
||||||
status_code = e.response.status_code
|
status_code = e.response.status_code
|
||||||
@@ -4498,16 +4611,22 @@ def generate_image(job_id: str, idx: int, req: GenerateReq) -> Job:
|
|||||||
or "timeout" in body.lower()
|
or "timeout" in body.lower()
|
||||||
or _image_is_capacity_error(status_code, body)
|
or _image_is_capacity_error(status_code, body)
|
||||||
)
|
)
|
||||||
last_err = f"HTTP {status_code}: {body[:200]}"
|
last_err = f"HTTP {status_code}: {body[:200]} · model={current_model}"
|
||||||
if not transient:
|
if not transient:
|
||||||
raise HTTPException(500, f"image gen HTTP {status_code}: {body[:300]}")
|
raise HTTPException(500, f"image gen HTTP {status_code}: {body[:300]}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
last_err = f"{type(e).__name__}: {e}"
|
last_err = f"{type(e).__name__}: {e} · model={current_model}"
|
||||||
|
|
||||||
next_mode_changed = attempt < len(plan) - 1 and plan[attempt + 1] != current_mode
|
fallbackable = current_model == GPT_IMAGE_MODEL and _image_failure_can_fallback(status_code, body, last_err)
|
||||||
if _image_should_retry(attempt, len(plan), status_code, body, last_err, next_mode_changed):
|
if fallbackable:
|
||||||
next_mode = plan[attempt + 1]
|
_image_record_primary_failure(last_err)
|
||||||
tag = f"fallback → {next_mode}" if next_mode != current_mode else f"retry {attempt + 1}/{len(plan)}"
|
if any(next_model != GPT_IMAGE_MODEL for _next_mode, next_model in attempt_steps[attempt + 1:]):
|
||||||
|
print(f"[image gen fallback → {IMAGE_FALLBACK_MODEL}] {last_err}", flush=True)
|
||||||
|
continue
|
||||||
|
next_mode_changed = attempt < len(attempt_steps) - 1 and attempt_steps[attempt + 1][0] != current_mode
|
||||||
|
if _image_should_retry(attempt, len(attempt_steps), status_code, body, last_err, next_mode_changed):
|
||||||
|
next_mode = attempt_steps[attempt + 1][0]
|
||||||
|
tag = f"fallback → {next_mode}" if next_mode != current_mode else f"retry {attempt + 1}/{len(attempt_steps)}"
|
||||||
print(f"[image gen {tag}] {last_err}", flush=True)
|
print(f"[image gen {tag}] {last_err}", flush=True)
|
||||||
_time.sleep(_image_retry_delay(attempt, status_code, body, retry_after))
|
_time.sleep(_image_retry_delay(attempt, status_code, body, retry_after))
|
||||||
else:
|
else:
|
||||||
@@ -5677,10 +5796,11 @@ def _generate_subject_assets_sync(job_id: str, idx: int, element_id: str, req: G
|
|||||||
"Avoid bulky collars, scarves, hair, hoods, props, or poses that hide the neck/shoulder placement area. "
|
"Avoid bulky collars, scarves, hair, hoods, props, or poses that hide the neck/shoulder placement area. "
|
||||||
"For back and close-up views, prioritize the cervical spine, shoulder blades, upper trapezius, and clean wearable-device contact area. "
|
"For back and close-up views, prioritize the cervical spine, shoulder blades, upper trapezius, and clean wearable-device contact area. "
|
||||||
)
|
)
|
||||||
models = [GPT_IMAGE_MODEL]
|
models = SUBJECT_ASSET_IMAGE_MODELS
|
||||||
generated: list[SubjectAsset] = []
|
generated: list[SubjectAsset] = []
|
||||||
generation_errors: list[str] = []
|
generation_errors: list[str] = []
|
||||||
first_generation_error: RuntimeError | None = None
|
first_generation_error: RuntimeError | None = None
|
||||||
|
pack_force_fallback_model = _image_primary_circuit_open()
|
||||||
try:
|
try:
|
||||||
for view, view_label in _subject_view_labels(req.subject_kind, req.views):
|
for view, view_label in _subject_view_labels(req.subject_kind, req.views):
|
||||||
closeup_view = view in {"bust", "back_detail", "bust_front", "bust_left_45", "bust_right_45", "back_neck_detail"} or "detail" in view
|
closeup_view = view in {"bust", "back_detail", "bust_front", "bust_left_45", "bust_right_45", "back_neck_detail"} or "detail" in view
|
||||||
@@ -5741,14 +5861,18 @@ def _generate_subject_assets_sync(job_id: str, idx: int, element_id: str, req: G
|
|||||||
try:
|
try:
|
||||||
if similar_mode:
|
if similar_mode:
|
||||||
print(
|
print(
|
||||||
f"[subject assets] reconstruction_mode=similar endpoint=/images/generations view={view} image_refs=0 model={GPT_IMAGE_MODEL}",
|
f"[subject assets] reconstruction_mode=similar endpoint=/images/generations view={view} image_refs=0 model={'fallback' if pack_force_fallback_model else GPT_IMAGE_MODEL}",
|
||||||
flush=True,
|
flush=True,
|
||||||
)
|
)
|
||||||
img_bytes, _mode = _image_text_call(prompt, models=models, max_attempts=3)
|
img_bytes, _mode = _image_text_call(prompt, models=models, max_attempts=3, force_fallback_model=pack_force_fallback_model)
|
||||||
|
if _mode.endswith(f":{IMAGE_FALLBACK_MODEL}"):
|
||||||
|
pack_force_fallback_model = True
|
||||||
else:
|
else:
|
||||||
if model_src is None:
|
if model_src is None:
|
||||||
raise RuntimeError("subject asset edit reference image missing")
|
raise RuntimeError("subject asset edit reference image missing")
|
||||||
img_bytes, _mode = _image_edit_call(model_src, prompt, models=models, fallback_text=False, max_attempts=3, max_side=1280)
|
img_bytes, _mode = _image_edit_call(model_src, prompt, models=models, fallback_text=False, max_attempts=3, max_side=1280, force_fallback_model=pack_force_fallback_model)
|
||||||
|
if _mode.endswith(f":{IMAGE_FALLBACK_MODEL}"):
|
||||||
|
pack_force_fallback_model = True
|
||||||
except RuntimeError as e:
|
except RuntimeError as e:
|
||||||
if first_generation_error is None:
|
if first_generation_error is None:
|
||||||
first_generation_error = e
|
first_generation_error = e
|
||||||
|
|||||||
@@ -43,9 +43,13 @@ IMAGE_BASE_URL=https://ai.skg.com/ezlink/v1
|
|||||||
IMAGE_API_KEY=
|
IMAGE_API_KEY=
|
||||||
IMAGE_MODEL=gpt-image-2
|
IMAGE_MODEL=gpt-image-2
|
||||||
IMAGE_REQUEST_TIMEOUT_SECONDS=60
|
IMAGE_REQUEST_TIMEOUT_SECONDS=60
|
||||||
|
IMAGE_FALLBACK_ENABLED=true
|
||||||
|
IMAGE_FALLBACK_MODEL=gemini-3-pro-image-preview
|
||||||
|
IMAGE_CIRCUIT_FAILURE_THRESHOLD=2
|
||||||
|
IMAGE_CIRCUIT_COOLDOWN_SECONDS=600
|
||||||
GPT_IMAGE_MODEL=gpt-image-2
|
GPT_IMAGE_MODEL=gpt-image-2
|
||||||
SUBJECT_ASSET_IMAGE_MODEL=gpt-image-2
|
SUBJECT_ASSET_IMAGE_MODEL=gpt-image-2
|
||||||
SUBJECT_ASSET_IMAGE_MODELS=gpt-image-2
|
SUBJECT_ASSET_IMAGE_MODELS=gpt-image-2,gemini-3-pro-image-preview
|
||||||
# Optional outbound proxy for AI gateway calls. Leave blank on normal VPS networking.
|
# Optional outbound proxy for AI gateway calls. Leave blank on normal VPS networking.
|
||||||
AI_HTTP_PROXY=
|
AI_HTTP_PROXY=
|
||||||
|
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
@@ -1189,11 +1189,11 @@ function modelList(values: Array<string | undefined>) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function imageModelChain(models?: RuntimeModels) {
|
function imageModelChain(models?: RuntimeModels) {
|
||||||
return modelList([models?.image || "gpt-image-2"])
|
return modelList([models?.image || "gpt-image-2", ...(models?.image_fallbacks || [])])
|
||||||
}
|
}
|
||||||
|
|
||||||
function subjectImageModelChain(models?: RuntimeModels) {
|
function subjectImageModelChain(models?: RuntimeModels) {
|
||||||
return modelList([models?.subject_image || "gpt-image-2"])
|
return modelList([models?.subject_image || "gpt-image-2", ...(models?.subject_image_fallbacks || [])])
|
||||||
}
|
}
|
||||||
|
|
||||||
function resolveVideoModelLabel(models: RuntimeModels | undefined, model: string) {
|
function resolveVideoModelLabel(models: RuntimeModels | undefined, model: string) {
|
||||||
@@ -1224,7 +1224,7 @@ function productModelTrace(models?: RuntimeModels): ModelTraceSpec {
|
|||||||
chain: [
|
chain: [
|
||||||
`批量视角识别:${modelValue(models?.product_view)} 多图读取同一产品素材,标注视角、佩戴者左右、上下、内外侧、用途和风险`,
|
`批量视角识别:${modelValue(models?.product_view)} 多图读取同一产品素材,标注视角、佩戴者左右、上下、内外侧、用途和风险`,
|
||||||
"识别兜底:批量失败会按单图重试;仍失败或文件缺失时写入本地默认视角,并在 risk/note 标明兜底原因",
|
"识别兜底:批量失败会按单图重试;仍失败或文件缺失时写入本地默认视角,并在 risk/note 标明兜底原因",
|
||||||
`缺角度补图:${imageModelChain(models)} 走 /images/edits,最多读取 6 张已上传参考图补齐缺失视角;失败保留重试入口,不自动换模型`,
|
`缺角度补图:${imageModelChain(models)} 走 /images/edits,最多读取 6 张已上传参考图补齐缺失视角;只有 gpt-image-2 超时、限流或 5xx 上游异常时才自动兜底`,
|
||||||
"前端只保存标注和 AI 补图结果;后续首尾帧/视频规划每条最多挑 6 张相关产品图",
|
"前端只保存标注和 AI 补图结果;后续首尾帧/视频规划每条最多挑 6 张相关产品图",
|
||||||
],
|
],
|
||||||
note: "上传产品图、重新识别、缺视角重试都会使用这组模型链路。",
|
note: "上传产品图、重新识别、缺视角重试都会使用这组模型链路。",
|
||||||
@@ -1244,7 +1244,7 @@ function similarSubjectModelTrace(models: RuntimeModels | undefined, subjectStyl
|
|||||||
`视觉 brief:${modelValue(models?.vision)} 把关键帧/模板图转成非身份化文字 brief;失败时继续用用户方向和模板文字`,
|
`视觉 brief:${modelValue(models?.vision)} 把关键帧/模板图转成非身份化文字 brief;失败时继续用用户方向和模板文字`,
|
||||||
`主体类型:${typeLabel}`,
|
`主体类型:${typeLabel}`,
|
||||||
"主体设定:前端把随机组合或手动选择的性别、年龄、着装、地域人种、肤色、体型、发型和气质锁定为结构化 profile",
|
"主体设定:前端把随机组合或手动选择的性别、年龄、着装、地域人种、肤色、体型、发型和气质锁定为结构化 profile",
|
||||||
`图像生成:${subjectImageModelChain(models)} 走 /images/generations 逐张文字生图;当前 similar 模式不上传原帧或模板图作为 image-edit 参考`,
|
`图像生成:${subjectImageModelChain(models)} 走 /images/generations 逐张文字生图;gpt-image-2 是主模型,超时、429 或 5xx 时短时熔断并兜底 Gemini;当前 similar 模式不上传原帧或模板图作为 image-edit 参考`,
|
||||||
"身份锁定:整套图必须是同一个主体,性别表现、年龄段、体型、材质和风格保持一致",
|
"身份锁定:整套图必须是同一个主体,性别表现、年龄段、体型、材质和风格保持一致",
|
||||||
],
|
],
|
||||||
note: "这是生成类似但创新的主体,不是复制、抠出或复刻源视频人物身份;内置形象也只作为方向参考。",
|
note: "这是生成类似但创新的主体,不是复制、抠出或复刻源视频人物身份;内置形象也只作为方向参考。",
|
||||||
|
|||||||
@@ -272,6 +272,16 @@ export interface RuntimeModels {
|
|||||||
image?: string
|
image?: string
|
||||||
image_base_url?: string
|
image_base_url?: string
|
||||||
image_fallbacks?: string[]
|
image_fallbacks?: string[]
|
||||||
|
image_circuit?: {
|
||||||
|
primary?: string
|
||||||
|
fallbacks?: string[]
|
||||||
|
failure_threshold?: number
|
||||||
|
cooldown_seconds?: number
|
||||||
|
primary_failures?: number
|
||||||
|
primary_open?: boolean
|
||||||
|
primary_open_until?: number
|
||||||
|
primary_open_remaining_seconds?: number
|
||||||
|
}
|
||||||
subject_image?: string
|
subject_image?: string
|
||||||
subject_image_fallbacks?: string[]
|
subject_image_fallbacks?: string[]
|
||||||
voice_provider?: string
|
voice_provider?: string
|
||||||
|
|||||||
Reference in New Issue
Block a user