auto-save 2026-05-27 17:18 (~9)

This commit is contained in:
2026-05-27 17:18:45 +08:00
parent 8999fe0baf
commit 9ab541796b
9 changed files with 420 additions and 243 deletions

View File

@@ -39,11 +39,22 @@ IMAGE_MODEL=gpt-image-2
IMAGE_REQUEST_TIMEOUT_SECONDS=60
IMAGE_FALLBACK_ENABLED=true
IMAGE_FALLBACK_MODEL=gemini-3-pro-image-preview
# 多备用模型用逗号分隔;未设置时兼容 IMAGE_FALLBACK_MODEL。
IMAGE_FALLBACK_MODELS=gemini-3-pro-image-preview
# 可选:把其它 OpenAI-compatible 图片模型加入 /health 和前端白名单,默认走 IMAGE_BASE_URL/IMAGE_API_KEY。
IMAGE_EXTRA_MODELS=
# 可选JSON 覆盖/扩展模型配置,建议只写 api_key_env不把真实 key 写入 JSON。
# IMAGE_MODEL_CONFIGS_JSON={"custom-model":{"label":"Custom Image","base_url_env":"CUSTOM_IMAGE_BASE_URL","api_key_env":"CUSTOM_IMAGE_API_KEY","provider":"openai","sizes":["1024x1024"],"default_size":"1024x1024"}}
IMAGE_CIRCUIT_FAILURE_THRESHOLD=2
IMAGE_CIRCUIT_COOLDOWN_SECONDS=600
GPT_IMAGE_MODEL=gpt-image-2
SUBJECT_ASSET_IMAGE_MODEL=gpt-image-2
SUBJECT_ASSET_IMAGE_MODELS=gpt-image-2,gemini-3-pro-image-preview
# 火山方舟 Seedream 图片模型。真实 key 只填本地/服务器 .env不提交到 git。
ARK_SEEDREAM_ENABLED=true
ARK_IMAGE_BASE_URL=https://ark.cn-beijing.volces.com/api/v3
ARK_IMAGE_API_KEY=
ARK_SEEDREAM_IMAGE_MODEL=doubao-seedream-4-5-251128
# 可选:本地网络需要代理访问 ai.skg.com 时配置launchd 不一定继承 shell 代理变量。
AI_HTTP_PROXY=
YTDLP_COOKIES_FILE=

View File

@@ -858,7 +858,7 @@ class ProductFusionShot(BaseModel):
scene_image: dict | None = None
action_text: str = ""
duration: float = 5
image_model: str = "gpt-image-2"
image_model: str = "auto"
video_model: str = "seedance"
guide_image: dict | None = None
@@ -4600,10 +4600,144 @@ def _image_is_transport_error(message: str) -> bool:
)
def _dedupe_keep_order(items: list[str]) -> list[str]:
seen: set[str] = set()
result: list[str] = []
for item in items:
value = (item or "").strip()
if not value or value in seen:
continue
seen.add(value)
result.append(value)
return result
def _image_override(model: str) -> dict:
return IMAGE_MODEL_CONFIG_OVERRIDES.get(model, {})
def _image_is_ark_seedream(model: str | None) -> bool:
value = (model or "").strip().lower()
if not value:
return False
provider = str(_image_override(value).get("provider", "")).lower()
return provider in {"ark", "ark_seedream", "seedream"} or value == ARK_SEEDREAM_IMAGE_MODEL.lower() or value.startswith("doubao-seedream-")
def _image_provider(model: str) -> str:
override = _image_override(model)
provider = str(override.get("provider", "")).strip().lower()
if provider:
return provider
return "ark_seedream" if _image_is_ark_seedream(model) else "openai"
def _image_model_label(model: str) -> str:
override = _image_override(model)
if override.get("label"):
return str(override["label"])
labels = {
GPT_IMAGE_MODEL: "GPT Image 2",
"gemini-3-pro-image-preview": "Gemini 图片",
ARK_SEEDREAM_IMAGE_MODEL: "Seedream 4.5",
}
if model in labels:
return labels[model]
if _image_is_ark_seedream(model):
return "Seedream"
return model
def _image_model_base_url(model: str | None = None) -> str:
model_id = (model or IMAGE_MODEL).strip()
override = _image_override(model_id)
env_name = str(override.get("base_url_env", "")).strip()
if env_name and os.getenv(env_name):
return os.getenv(env_name, "").strip().rstrip("/")
if override.get("base_url"):
return str(override["base_url"]).strip().rstrip("/")
if _image_is_ark_seedream(model_id):
return (ARK_IMAGE_BASE_URL or IMAGE_BASE_URL).strip().rstrip("/")
return (IMAGE_BASE_URL or LLM_BASE_URL).strip().rstrip("/")
def _image_model_api_key(model: str | None = None) -> str:
model_id = (model or IMAGE_MODEL).strip()
override = _image_override(model_id)
env_name = str(override.get("api_key_env", "")).strip()
if env_name and os.getenv(env_name):
return os.getenv(env_name, "").strip()
if override.get("api_key"):
return str(override["api_key"]).strip()
if _image_is_ark_seedream(model_id):
if ARK_IMAGE_API_KEY:
return ARK_IMAGE_API_KEY
if "ark.cn-beijing.volces.com" in (IMAGE_BASE_URL or ""):
return IMAGE_API_KEY
return ""
return IMAGE_API_KEY
def _image_model_headers(model: str) -> dict:
api_key = _image_model_api_key(model)
if not api_key:
raise RuntimeError(f"{_image_model_label(model)} API key 未配置")
return {"Authorization": f"Bearer {api_key}"}
def _image_any_configured() -> bool:
return any(_image_model_api_key(model) for model in _configured_image_models())
def _image_model_size_choices(model: str | None = None) -> list[dict]:
model_id = (model or IMAGE_MODEL).strip()
override = _image_override(model_id)
sizes = override.get("size_options") or override.get("sizes")
if isinstance(sizes, list) and sizes:
result: list[dict] = []
for item in sizes:
if isinstance(item, dict):
value = str(item.get("value") or item.get("key") or item.get("id") or "").strip()
if not value:
continue
result.append({
"id": str(item.get("id") or value),
"label": str(item.get("label") or value),
"value": value,
"description": str(item.get("description") or ""),
})
else:
value = str(item).strip()
if value:
result.append({"id": value, "label": value, "value": value, "description": ""})
if result:
return result
return ARK_SEEDREAM_SIZE_CHOICES if _image_is_ark_seedream(model_id) else IMAGE_SIZE_CHOICES
def _image_default_size(model: str | None = None) -> str:
model_id = (model or IMAGE_MODEL).strip()
override = _image_override(model_id)
if override.get("default_size"):
return str(override["default_size"]).strip()
return "2048x2048" if _image_is_ark_seedream(model_id) else "1024x1536"
def _configured_image_models() -> list[str]:
models = [IMAGE_MODEL]
if IMAGE_FALLBACK_ENABLED:
models.extend(IMAGE_FALLBACK_MODELS)
if ARK_SEEDREAM_ENABLED:
models.append(ARK_SEEDREAM_IMAGE_MODEL)
models.extend(IMAGE_EXTRA_MODELS)
models.extend(IMAGE_MODEL_CONFIG_OVERRIDES.keys())
return _dedupe_keep_order(models)
def _image_fallback_models() -> list[str]:
if not IMAGE_FALLBACK_ENABLED or not IMAGE_FALLBACK_MODEL or IMAGE_FALLBACK_MODEL == GPT_IMAGE_MODEL:
if not IMAGE_FALLBACK_ENABLED:
return []
return [IMAGE_FALLBACK_MODEL]
return [model for model in _dedupe_keep_order(IMAGE_FALLBACK_MODELS) if model != IMAGE_MODEL]
def _image_circuit_snapshot() -> dict:
@@ -4611,7 +4745,7 @@ def _image_circuit_snapshot() -> dict:
with _IMAGE_CIRCUIT_LOCK:
open_until = _IMAGE_PRIMARY_OPEN_UNTIL
return {
"primary": GPT_IMAGE_MODEL,
"primary": IMAGE_MODEL,
"fallbacks": _image_fallback_models(),
"failure_threshold": IMAGE_CIRCUIT_FAILURE_THRESHOLD,
"cooldown_seconds": IMAGE_CIRCUIT_COOLDOWN_SECONDS,
@@ -4626,64 +4760,72 @@ def _image_primary_circuit_open() -> bool:
return _image_circuit_snapshot()["primary_open"]
def _image_mode_used_fallback(mode: str) -> bool:
return any(mode.endswith(f":{fallback}") for fallback in _image_fallback_models())
def _normalize_image_model_preference(value: str | None) -> str:
raw = (value or "auto").strip().lower()
if raw in {"", "auto", "default"}:
return "auto"
if raw in {"gpt", "gpt-image", GPT_IMAGE_MODEL.lower()}:
return GPT_IMAGE_MODEL
if raw in {"seedream", "ark-seedream", "doubao-seedream", ARK_SEEDREAM_IMAGE_MODEL.lower()}:
return ARK_SEEDREAM_IMAGE_MODEL
for model in _configured_image_models():
if raw == model.lower():
return model
if IMAGE_FALLBACK_MODEL and raw in {"gemini", IMAGE_FALLBACK_MODEL.lower()}:
return IMAGE_FALLBACK_MODEL
return "auto"
return (value or "").strip() or "auto"
def _image_model_candidates(force_fallback: bool = False, preference: str | None = "auto") -> list[str]:
normalized = _normalize_image_model_preference(preference)
fallbacks = _image_fallback_models()
if normalized == GPT_IMAGE_MODEL:
return [GPT_IMAGE_MODEL]
if normalized == IMAGE_FALLBACK_MODEL and fallbacks:
return [IMAGE_FALLBACK_MODEL]
if normalized != "auto":
return [normalized]
if not fallbacks:
return [GPT_IMAGE_MODEL]
return [IMAGE_MODEL]
if force_fallback or _image_primary_circuit_open():
return fallbacks
return [GPT_IMAGE_MODEL, *fallbacks]
return [IMAGE_MODEL, *fallbacks]
def image_model_options() -> list[dict]:
fallback_labels = ", ".join(_image_model_label(model) for model in _image_fallback_models()) or ""
options = [
{
"id": "auto",
"label": "自动",
"model": GPT_IMAGE_MODEL,
"description": "优先 GPT Image 2必要时按后端熔断和兜底策略切到备用图片模型",
"available": bool(IMAGE_API_KEY),
},
{
"id": GPT_IMAGE_MODEL,
"label": "GPT Image 2",
"model": GPT_IMAGE_MODEL,
"description": "主生图模型,适合营销图和参考图重绘",
"available": bool(IMAGE_API_KEY),
"model": IMAGE_MODEL,
"provider": _image_provider(IMAGE_MODEL),
"description": f"优先 {_image_model_label(IMAGE_MODEL)};备用:{fallback_labels}",
"available": bool(_image_model_api_key(IMAGE_MODEL)),
"default_size": _image_default_size(IMAGE_MODEL),
"size_options": _image_model_size_choices(IMAGE_MODEL),
},
]
if IMAGE_FALLBACK_ENABLED and IMAGE_FALLBACK_MODEL and IMAGE_FALLBACK_MODEL != GPT_IMAGE_MODEL:
for model in _configured_image_models():
options.append({
"id": IMAGE_FALLBACK_MODEL,
"label": "Gemini 图片",
"model": IMAGE_FALLBACK_MODEL,
"description": "备用图片模型,适合主模型慢或失败时手动选择",
"available": bool(IMAGE_API_KEY),
"id": model,
"label": _image_model_label(model),
"model": model,
"provider": _image_provider(model),
"description": "图片生成模型可通过环境变量切换网关、key 和尺寸能力",
"available": bool(_image_model_api_key(model)),
"default_size": _image_default_size(model),
"size_options": _image_model_size_choices(model),
})
return options
def image_size_options() -> list[dict]:
return IMAGE_SIZE_CHOICES
return ALL_IMAGE_SIZE_CHOICES
def _normalize_image_size(raw: str | None) -> str:
def _normalize_image_size(raw: str | None, model: str | None = None) -> str:
model_id = (model or IMAGE_MODEL).strip()
value = (raw or "auto").strip().lower()
aliases = {
"vertical": "1024x1536",
@@ -4696,14 +4838,30 @@ def _normalize_image_size(raw: str | None) -> str:
"横图": "1536x1024",
}
value = aliases.get(value, value)
allowed = {str(item["value"]) for item in IMAGE_SIZE_CHOICES}
if value not in allowed:
if _image_is_ark_seedream(model_id):
seedream_aliases = {
"auto": _image_default_size(model_id),
"1024x1536": "1440x2560",
"1536x1024": "2560x1440",
"1024x1024": "2048x2048",
"square": "2048x2048",
"方图": "2048x2048",
"vertical": "1440x2560",
"portrait": "1440x2560",
"竖图": "1440x2560",
"horizontal": "2560x1440",
"landscape": "2560x1440",
"横图": "2560x1440",
}
value = seedream_aliases.get(value, value)
canonical = {str(item["value"]).lower(): str(item["value"]) for item in _image_model_size_choices(model_id)}
if value not in canonical:
raise HTTPException(400, f"unsupported image size: {raw}")
return value
return canonical[value]
def _image_size_payload(raw: str | None) -> dict:
size = _normalize_image_size(raw)
def _image_size_payload(raw: str | None, model: str | None = None) -> dict:
size = _normalize_image_size(raw, model)
return {} if size == "auto" else {"size": size}
@@ -4801,7 +4959,7 @@ def _image_record_primary_success() -> None:
global _IMAGE_PRIMARY_FAILURES, _IMAGE_PRIMARY_OPEN_UNTIL
with _IMAGE_CIRCUIT_LOCK:
if _IMAGE_PRIMARY_FAILURES or _IMAGE_PRIMARY_OPEN_UNTIL:
print(f"[image circuit] primary {GPT_IMAGE_MODEL} recovered", flush=True)
print(f"[image circuit] primary {IMAGE_MODEL} recovered", flush=True)
_IMAGE_PRIMARY_FAILURES = 0
_IMAGE_PRIMARY_OPEN_UNTIL = 0.0
@@ -4815,14 +4973,14 @@ def _image_record_primary_failure(reason: str) -> None:
if _IMAGE_PRIMARY_FAILURES >= IMAGE_CIRCUIT_FAILURE_THRESHOLD:
_IMAGE_PRIMARY_OPEN_UNTIL = time.time() + IMAGE_CIRCUIT_COOLDOWN_SECONDS
print(
f"[image circuit] primary {GPT_IMAGE_MODEL} opened for {IMAGE_CIRCUIT_COOLDOWN_SECONDS}s "
f"after {_IMAGE_PRIMARY_FAILURES} failures; fallback={IMAGE_FALLBACK_MODEL}; reason={reason[:220]}",
f"[image circuit] primary {IMAGE_MODEL} opened for {IMAGE_CIRCUIT_COOLDOWN_SECONDS}s "
f"after {_IMAGE_PRIMARY_FAILURES} failures; fallback={','.join(_image_fallback_models())}; reason={reason[:220]}",
flush=True,
)
else:
print(
f"[image circuit] primary {GPT_IMAGE_MODEL} failure {_IMAGE_PRIMARY_FAILURES}/{IMAGE_CIRCUIT_FAILURE_THRESHOLD}; "
f"fallback={IMAGE_FALLBACK_MODEL}; reason={reason[:220]}",
f"[image circuit] primary {IMAGE_MODEL} failure {_IMAGE_PRIMARY_FAILURES}/{IMAGE_CIRCUIT_FAILURE_THRESHOLD}; "
f"fallback={','.join(_image_fallback_models())}; reason={reason[:220]}",
flush=True,
)
@@ -4830,14 +4988,14 @@ def _image_record_primary_failure(reason: str) -> None:
def _image_failure_message(kind: str, attempts: int, last_err: str, capacity_seen: bool) -> str:
if capacity_seen:
return (
f"{kind} failed after {attempts} attempts: gpt-image-2 上游负载饱和,"
f"{kind} failed after {attempts} attempts: {_image_model_label(IMAGE_MODEL)} 上游负载饱和,"
f"已自动退避重试仍失败,请稍后点重试。最后错误:{last_err}"
)
if "timeout" in last_err.lower():
return (
f"{kind} failed after {attempts} attempts: gpt-image-2 图片网关响应超时"
f"{kind} failed after {attempts} attempts: {_image_model_label(IMAGE_MODEL)} 图片网关响应超时"
f"(单次 {IMAGE_REQUEST_TIMEOUT_SECONDS}s模型未更改。"
f"请检查 {IMAGE_BASE_URL or LLM_BASE_URL or 'image gateway'} 的 gpt-image-2 上游渠道或稍后重试。"
f"请检查 {_image_model_base_url(IMAGE_MODEL) or 'image gateway'}{_image_model_label(IMAGE_MODEL)} 上游渠道或稍后重试。"
f"最后错误:{last_err}"
)
if _image_is_transport_error(last_err):
@@ -4859,24 +5017,69 @@ def _image_error_status(error: Exception) -> int:
) else 500
def _image_endpoint(path: str) -> str:
base = (IMAGE_BASE_URL or "").strip().rstrip("/")
def _image_endpoint(path: str, model: str | None = None) -> str:
base = _image_model_base_url(model)
if not base:
raise RuntimeError("IMAGE_BASE_URL 或 LLM_BASE_URL 未配置")
raise RuntimeError(f"{_image_model_label(model or IMAGE_MODEL)} base URL 未配置")
return f"{base}/{path.lstrip('/')}"
def _image_generation_response(prompt: str, model: str, size: str | None = "auto") -> dict:
def _image_reference_data_urls(img_bytes_list: list[bytes]) -> list[str]:
return [f"data:image/jpeg;base64,{base64.b64encode(img_bytes).decode('ascii')}" for img_bytes in img_bytes_list]
def _image_generation_payload(
prompt: str,
model: str,
size: str | None = "auto",
reference_images: list[str] | None = None,
) -> dict:
if _image_is_ark_seedream(model):
payload = {
"model": model,
"prompt": prompt,
"watermark": False,
"response_format": "url",
"sequential_image_generation": "disabled",
**_image_size_payload(size, model),
}
if reference_images:
payload["reference_images"] = reference_images[:10]
return payload
return {"model": model, "prompt": prompt, "n": 1, **_image_size_payload(size, model)}
def _image_generation_response(
prompt: str,
model: str,
size: str | None = "auto",
reference_images: list[str] | None = None,
) -> dict:
with ai_http_client(timeout=IMAGE_REQUEST_TIMEOUT_SECONDS) as client:
r = client.post(
_image_endpoint("/images/generations"),
headers={"Authorization": f"Bearer {IMAGE_API_KEY}"},
json={"model": model, "prompt": prompt, "n": 1, **_image_size_payload(size)},
_image_endpoint("/images/generations", model),
headers=_image_model_headers(model),
json=_image_generation_payload(prompt, model, size, reference_images),
)
r.raise_for_status()
return r.json()
def _image_response_item_bytes(item: dict, kind: str) -> bytes:
import base64 as b64lib
b64 = item.get("b64_json") or item.get("b64")
if b64:
return b64lib.b64decode(b64)
for key in ("url", "image_url", "output_url", "download_url"):
url = item.get(key)
if isinstance(url, str) and url:
with ai_http_client(timeout=IMAGE_REQUEST_TIMEOUT_SECONDS) as client:
image_resp = client.get(url)
image_resp.raise_for_status()
return image_resp.content
raise RuntimeError(f"{kind} returned no b64_json or url")
def _image_should_retry(
attempt: int,
total_attempts: int,
@@ -4922,25 +5125,25 @@ def _image_edit_call(
max_side: int = 1024,
force_fallback_model: bool = False,
image_model_preference: str | None = "auto",
size: str | None = "auto",
) -> tuple[bytes, str]:
"""通用 image edit 调用 · 失败重试 + 可选 text fallback。
返回 (image_bytes, effective_mode) where effective_mode in {"edit","text"}。
失败 raise RuntimeError。
输入图自动 resize 到 max_side默认 1024边长后再用 multipart 上传;多参考图使用 image[]
生图模型主路径使用 gpt-image-2Gemini 只在主模型上游异常时兜底。model/models 参数只保留兼容旧调用。"""
import base64 as b64lib
输入图自动 resize 到 max_side默认 1024边长后再上传
OpenAI-compatible 模型走 /images/editsArk Seedream 走 /images/generations + reference_images。
主模型、备用模型和 Ark 模型均来自运行时配置model/models 参数只保留兼容旧调用。"""
import time as _time
import httpx
if not IMAGE_API_KEY:
raise RuntimeError("IMAGE_API_KEY 或 LLM_API_KEY 未配置")
model = GPT_IMAGE_MODEL
if not _image_any_configured():
raise RuntimeError("图片模型 API key 未配置")
image_paths = image_path if isinstance(image_path, list) else [image_path]
image_paths = [path for path in image_paths if path and path.exists()][:10]
if not image_paths:
raise RuntimeError("image edit reference image missing")
img_bytes_list = [_prepare_image_edit_bytes(path, max_side) for path in image_paths]
model_candidates = _image_model_candidates(force_fallback=force_fallback_model, preference=image_model_preference)
mode_plan: list[str] = ["edit"] if model_candidates != [GPT_IMAGE_MODEL] else ["edit"] * max_attempts
mode_plan: list[str] = ["edit"] if model_candidates != [IMAGE_MODEL] else ["edit"] * max_attempts
if fallback_text:
mode_plan.append("text")
attempt_steps = [(current_mode, current_model) for current_mode in mode_plan for current_model in model_candidates]
@@ -4957,30 +5160,35 @@ def _image_edit_call(
retry_after: str | None = None
try:
if current_mode == "edit":
with ai_http_client(timeout=IMAGE_REQUEST_TIMEOUT_SECONDS) as client:
r = client.post(
_image_endpoint("/images/edits"),
headers={
"Authorization": f"Bearer {IMAGE_API_KEY}",
},
data={"model": current_model, "prompt": prompt, "n": "1"},
files=(
{"image": ("reference.jpg", img_bytes_list[0], "image/jpeg")}
if len(img_bytes_list) == 1
else [
("image[]", (f"reference_{idx + 1}.jpg", img_bytes, "image/jpeg"))
for idx, img_bytes in enumerate(img_bytes_list)
]
),
if _image_is_ark_seedream(current_model):
resp_data = _image_generation_response(
prompt,
current_model,
size,
reference_images=_image_reference_data_urls(img_bytes_list),
)
r.raise_for_status()
resp_data = r.json()
else:
with ai_http_client(timeout=IMAGE_REQUEST_TIMEOUT_SECONDS) as client:
r = client.post(
_image_endpoint("/images/edits", current_model),
headers=_image_model_headers(current_model),
data={"model": current_model, "prompt": prompt, "n": "1", **_image_size_payload(size, current_model)},
files=(
{"image": ("reference.jpg", img_bytes_list[0], "image/jpeg")}
if len(img_bytes_list) == 1
else [
("image[]", (f"reference_{idx + 1}.jpg", img_bytes, "image/jpeg"))
for idx, img_bytes in enumerate(img_bytes_list)
]
),
)
r.raise_for_status()
resp_data = r.json()
else:
resp_data = _image_generation_response(prompt, current_model)
resp_data = _image_generation_response(prompt, current_model, size)
if resp_data.get("data"):
effective_mode = f"{current_mode}:{current_model}"
model = current_model # 记录实际成功的 model
if current_model == GPT_IMAGE_MODEL:
if current_model == IMAGE_MODEL:
_image_record_primary_success()
break
err_obj = resp_data.get("error") or {}
@@ -4997,11 +5205,11 @@ def _image_edit_call(
except Exception as e:
last_err = f"{type(e).__name__}: {e} · model={current_model}"
fallbackable = current_model == GPT_IMAGE_MODEL and _image_failure_can_fallback(status_code, body, last_err)
fallbackable = current_model == IMAGE_MODEL and _image_failure_can_fallback(status_code, body, last_err)
if fallbackable:
_image_record_primary_failure(last_err)
if any(next_model != GPT_IMAGE_MODEL for _next_mode, next_model in attempt_steps[attempt + 1:]):
print(f"[image edit fallback → {IMAGE_FALLBACK_MODEL}] {last_err}", flush=True)
if any(next_model != IMAGE_MODEL for _next_mode, next_model in attempt_steps[attempt + 1:]):
print(f"[image edit fallback → {','.join(_image_fallback_models())}] {last_err}", flush=True)
continue
next_mode_changed = attempt < len(attempt_steps) - 1 and attempt_steps[attempt + 1][0] != current_mode
if _image_should_retry(attempt, len(attempt_steps), status_code, body, last_err, next_mode_changed):
@@ -5015,16 +5223,7 @@ def _image_edit_call(
data_arr = resp_data.get("data", [])
if not data_arr:
raise RuntimeError(_image_failure_message("image edit", attempts_done, last_err, capacity_seen))
item = data_arr[0]
b64 = item.get("b64_json")
if not b64 and item.get("url"):
with ai_http_client(timeout=IMAGE_REQUEST_TIMEOUT_SECONDS) as client:
image_resp = client.get(item["url"])
image_resp.raise_for_status()
return image_resp.content, effective_mode
if not b64:
raise RuntimeError("image edit returned no b64_json")
return b64lib.b64decode(b64), effective_mode
return _image_response_item_bytes(data_arr[0], "image edit"), effective_mode
def _image_text_call(
@@ -5034,15 +5233,15 @@ def _image_text_call(
max_attempts: int = 3,
force_fallback_model: bool = False,
image_model_preference: str | None = "auto",
size: str | None = "auto",
) -> tuple[bytes, str]:
"""Text-only image generation. gpt-image-2 primary, Gemini only as outage fallback."""
import base64 as b64lib
"""Text-only image generation. Primary and fallback models are selected from runtime config."""
import time as _time
import httpx
if not IMAGE_API_KEY:
raise RuntimeError("IMAGE_API_KEY 或 LLM_API_KEY 未配置")
if not _image_any_configured():
raise RuntimeError("图片模型 API key 未配置")
candidates = _image_model_candidates(force_fallback=force_fallback_model, preference=image_model_preference)
attempt_models = candidates if candidates != [GPT_IMAGE_MODEL] else [GPT_IMAGE_MODEL] * max_attempts
attempt_models = candidates if candidates != [IMAGE_MODEL] else [IMAGE_MODEL] * max_attempts
last_err = ""
capacity_seen = False
attempts_done = 0
@@ -5052,21 +5251,12 @@ def _image_text_call(
body = ""
retry_after: str | None = None
try:
resp_data = _image_generation_response(prompt, current_model)
resp_data = _image_generation_response(prompt, current_model, size)
if resp_data.get("data"):
item = resp_data["data"][0]
b64 = item.get("b64_json")
if b64:
if current_model == GPT_IMAGE_MODEL:
_image_record_primary_success()
return b64lib.b64decode(b64), f"text:{current_model}"
if item.get("url"):
with ai_http_client(timeout=IMAGE_REQUEST_TIMEOUT_SECONDS) as client:
image_resp = client.get(item["url"])
image_resp.raise_for_status()
if current_model == GPT_IMAGE_MODEL:
_image_record_primary_success()
return image_resp.content, f"text:{current_model}"
out_bytes = _image_response_item_bytes(resp_data["data"][0], "image text")
if current_model == IMAGE_MODEL:
_image_record_primary_success()
return out_bytes, f"text:{current_model}"
err_obj = resp_data.get("error") or {}
last_err = f"empty data · {err_obj.get('code', '')} · {str(err_obj.get('message', ''))[:200]} · model={current_model}"
except httpx.HTTPStatusError as e:
@@ -5080,11 +5270,11 @@ def _image_text_call(
body = str(e)
status_code = 429 if "429" in body or "saturated" in body.lower() or "饱和" in body else 0
capacity_seen = capacity_seen or _image_is_capacity_error(status_code, body)
fallbackable = current_model == GPT_IMAGE_MODEL and _image_failure_can_fallback(status_code, body, last_err)
fallbackable = current_model == IMAGE_MODEL and _image_failure_can_fallback(status_code, body, last_err)
if fallbackable:
_image_record_primary_failure(last_err)
if any(next_model != GPT_IMAGE_MODEL for next_model in attempt_models[attempt + 1:]):
print(f"[image text fallback → {IMAGE_FALLBACK_MODEL}] {last_err}", flush=True)
if any(next_model != IMAGE_MODEL for next_model in attempt_models[attempt + 1:]):
print(f"[image text fallback → {','.join(_image_fallback_models())}] {last_err}", flush=True)
continue
if _image_should_retry(attempt, len(attempt_models), status_code, body, last_err):
delay = _image_retry_delay(attempt, status_code, body, retry_after)
@@ -5190,7 +5380,7 @@ def _subject_agent_model(bundle: SubjectModelBundle) -> str:
def _subject_agent_image_model(bundle: SubjectModelBundle) -> str:
return IMAGE_FALLBACK_MODEL if bundle == "gemini" and IMAGE_FALLBACK_MODEL else GPT_IMAGE_MODEL
return IMAGE_FALLBACK_MODEL if bundle == "gemini" and IMAGE_FALLBACK_MODEL else SUBJECT_ASSET_IMAGE_MODEL
def _list_of_strings(value, limit: int = 18) -> list[str]:
@@ -6200,6 +6390,18 @@ def health() -> dict:
"image_request_timeout_seconds": IMAGE_REQUEST_TIMEOUT_SECONDS,
"image_options": image_model_options(),
"image_size_options": image_size_options(),
"image_model_routes": [
{
"model": model,
"label": _image_model_label(model),
"provider": _image_provider(model),
"base_url": _image_model_base_url(model) or "",
"configured": bool(_image_model_api_key(model)),
"default_size": _image_default_size(model),
"size_options": _image_model_size_choices(model),
}
for model in _configured_image_models()
],
"ai_proxy_configured": bool(AI_HTTP_PROXY),
"image_fallbacks": _image_fallback_models(),
"image_circuit": _image_circuit_snapshot(),
@@ -6687,8 +6889,8 @@ class GenerateReq(BaseModel):
prompt: str
extra_prompt: str = "" # ✓ 需要的元素(正向)
negative_prompt: str = "" # ✗ 不需要的元素(负向)
model: str = "auto" # auto / gpt-image-2 / gemini-3-pro-image-preview
size: str = "auto" # auto / 1024x1536 / 1024x1024 / 1536x1024
model: str = "auto" # auto / configured image model id
size: str = "auto" # auto / model-specific size
mode: str = "edit" # "edit" 带参考图,"text" 纯文字
from_selected: bool = False # True 时优先用 frame.selected 的生成图作 reference迭代否则原关键帧
@@ -6725,113 +6927,34 @@ def generate_image(job_id: str, idx: int, req: GenerateReq) -> Job:
if not raw_prompt:
raise HTTPException(400, "prompt required")
full_prompt = _ensure_english(raw_prompt)
image_size = _normalize_image_size(req.size)
if not IMAGE_API_KEY:
raise HTTPException(503, "IMAGE_API_KEY 或 LLM_API_KEY 未配置")
if not _image_any_configured():
raise HTTPException(503, "图片模型 API key 未配置")
model = GPT_IMAGE_MODEL
gen_id = uuid.uuid4().hex[:12]
import base64 as b64lib
import time as _time
import httpx
img_bytes_in: bytes | None = None
if req.mode == "edit":
img_bytes_in = reference_path.read_bytes()
# 尝试 i2iauto 允许按熔断策略兜底,显式模型只走用户所选模型。
model_candidates = _image_model_candidates(preference=req.model)
plan: list[str] = ([req.mode] if model_candidates != [GPT_IMAGE_MODEL] else [req.mode] * 3) if req.mode == "edit" else [req.mode]
if req.mode == "edit":
plan.append("text") # i2i 都失败时自动降级
attempt_steps = [(current_mode, current_model) for current_mode in plan for current_model in model_candidates]
resp_data: dict = {}
last_err = ""
effective_mode = req.mode
capacity_seen = False
attempts_done = 0
for attempt, (current_mode, current_model) in enumerate(attempt_steps):
attempts_done = attempt + 1
status_code = 0
body = ""
retry_after: str | None = None
try:
if current_mode == "edit":
if img_bytes_in is None:
raise RuntimeError("edit mode reference image missing")
with ai_http_client(timeout=IMAGE_REQUEST_TIMEOUT_SECONDS) as client:
r = client.post(
_image_endpoint("/images/edits"),
headers={
"Authorization": f"Bearer {IMAGE_API_KEY}",
},
data={"model": current_model, "prompt": full_prompt, "n": "1", **_image_size_payload(image_size)},
files={"image": ("reference.jpg", img_bytes_in, "image/jpeg")},
)
r.raise_for_status()
resp_data = r.json()
else:
# text-only
resp_data = _image_generation_response(full_prompt, current_model, image_size)
if resp_data.get("data"):
effective_mode = f"{current_mode}:{current_model}"
model = current_model
if current_model == GPT_IMAGE_MODEL:
_image_record_primary_success()
break
err_obj = resp_data.get("error") or {}
last_err = f"empty data · {err_obj.get('code', '')} · {str(err_obj.get('message', ''))[:200]} · model={current_model}"
except httpx.HTTPStatusError as e:
body = e.response.text
status_code = e.response.status_code
retry_after = e.response.headers.get("retry-after")
capacity_seen = capacity_seen or _image_is_capacity_error(status_code, body)
transient = (
status_code == 429
or status_code >= 500
or "incomplete_generation" in body
or "rate_limit" in body
or "timeout" in body.lower()
or _image_is_capacity_error(status_code, body)
mode = req.mode if req.mode in {"edit", "text"} else "edit"
try:
if mode == "edit":
out_bytes, effective_mode = _image_edit_call(
reference_path,
full_prompt,
fallback_text=True,
max_attempts=3,
image_model_preference=req.model,
size=req.size,
)
last_err = f"HTTP {status_code}: {body[:200]} · model={current_model}"
if not transient:
raise HTTPException(500, f"image gen HTTP {status_code}: {body[:300]}")
except Exception as e:
last_err = f"{type(e).__name__}: {e} · model={current_model}"
fallbackable = current_model == GPT_IMAGE_MODEL and _image_failure_can_fallback(status_code, body, last_err)
if fallbackable:
_image_record_primary_failure(last_err)
if any(next_model != GPT_IMAGE_MODEL for _next_mode, next_model in attempt_steps[attempt + 1:]):
print(f"[image gen fallback → {IMAGE_FALLBACK_MODEL}] {last_err}", flush=True)
continue
next_mode_changed = attempt < len(attempt_steps) - 1 and attempt_steps[attempt + 1][0] != current_mode
if _image_should_retry(attempt, len(attempt_steps), status_code, body, last_err, next_mode_changed):
next_mode = attempt_steps[attempt + 1][0]
tag = f"fallback → {next_mode}" if next_mode != current_mode else f"retry {attempt + 1}/{len(attempt_steps)}"
print(f"[image gen {tag}] {last_err}", flush=True)
_time.sleep(_image_retry_delay(attempt, status_code, body, retry_after))
else:
break
out_bytes, effective_mode = _image_text_call(
full_prompt,
max_attempts=3,
image_model_preference=req.model,
size=req.size,
)
except RuntimeError as e:
raise HTTPException(_image_error_status(e), f"image gen failed: {e}")
data_arr = resp_data.get("data", [])
if not data_arr:
raise HTTPException(503 if capacity_seen else 500, _image_failure_message("image gen", attempts_done, last_err, capacity_seen))
item = data_arr[0]
b64 = item.get("b64_json")
if b64:
out_bytes = b64lib.b64decode(b64)
elif item.get("url"):
with ai_http_client(timeout=IMAGE_REQUEST_TIMEOUT_SECONDS) as client:
image_resp = client.get(item["url"])
image_resp.raise_for_status()
out_bytes = image_resp.content
else:
raise HTTPException(500, "image gen returned no b64_json")
model = effective_mode.rsplit(":", 1)[-1] if ":" in effective_mode else IMAGE_MODEL
# 保存到本地 jobs/<id>/gen/<idx>_<gen_id>.jpg
gen_dir = job_dir(job_id) / "gen"
@@ -6999,7 +7122,7 @@ def _region_to_phrase(r: dict) -> str:
@app.post("/jobs/{job_id}/frames/{idx}/cleanup", response_model=Job)
def cleanup_frame(job_id: str, idx: int, req: CleanupReq | None = None) -> Job:
""" gpt-image-2 image edit 清洗关键帧:去水印 / @用户名 / 字幕 / 平台 logo。
"""当前图片模型 image edit 清洗关键帧:去水印 / @用户名 / 字幕 / 平台 logo。
输出干净版到 jobs/<id>/cleaned/<idx>.jpg写回 frame.cleaned_url。
可选 region: 限定只清洗框内区域。"""
import time as _time
@@ -7038,10 +7161,9 @@ def cleanup_frame(job_id: str, idx: int, req: CleanupReq | None = None) -> Job:
"hashtags, usernames, or platform logos. Keep the composition and style."
)
models = [GPT_IMAGE_MODEL]
try:
img_bytes, _mode = _image_edit_call(
frame_path, prompt, models=models, fallback_text=False, max_attempts=3,
frame_path, prompt, fallback_text=False, max_attempts=3,
)
except RuntimeError as e:
raise HTTPException(500, f"cleanup failed: {e}")
@@ -7504,22 +7626,21 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
+ "Do not create a plain background plate. Do not include SKG product unless the user prompt explicitly asks for it. "
+ "The output should be ready as a first/last frame for Seedance video generation, with stable composition, believable perspective, clear subject, no text, no watermark, no gore, no medical surgery imagery."
)
models = [GPT_IMAGE_MODEL]
try:
if req.asset_role == "scene":
img_bytes, _mode = _image_edit_call(model_src, prompt, models=models, fallback_text=False, max_attempts=3, max_side=1280)
img_bytes, _mode = _image_edit_call(model_src, prompt, fallback_text=False, max_attempts=3, max_side=1280)
elif product_ref_paths:
print(
f"[scene asset] role={req.asset_role} endpoint=/images/edits product_refs={len(product_ref_paths)} subject_refs=0 contact_sheet=0 model={GPT_IMAGE_MODEL}",
f"[scene asset] role={req.asset_role} endpoint=/images/edits product_refs={len(product_ref_paths)} subject_refs=0 contact_sheet=0 model={IMAGE_MODEL}",
flush=True,
)
img_bytes, _mode = _image_edit_call(product_ref_paths, prompt, models=models, fallback_text=False, max_attempts=3, max_side=1600)
img_bytes, _mode = _image_edit_call(product_ref_paths, prompt, fallback_text=False, max_attempts=3, max_side=1600)
else:
print(
f"[scene asset] role={req.asset_role} endpoint=/images/generations product_refs=0 subject_refs=0 contact_sheet=0 model={GPT_IMAGE_MODEL}",
f"[scene asset] role={req.asset_role} endpoint=/images/generations product_refs=0 subject_refs=0 contact_sheet=0 model={IMAGE_MODEL}",
flush=True,
)
img_bytes, _mode = _image_text_call(prompt, models=models, max_attempts=3)
img_bytes, _mode = _image_text_call(prompt, max_attempts=3)
except RuntimeError as e:
raise HTTPException(500, f"{req.asset_role} asset failed: {e}")
finally:
@@ -7564,7 +7685,7 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
@app.post("/jobs/{job_id}/frames/{idx}/elements/{element_id}/cutout", response_model=Job)
def cutout_element(job_id: str, idx: int, element_id: str) -> Job:
"""AI 提取元素 · 每次累积一张新图:
gpt-image-2 生成**完整、清晰**的元素图(即使原图只露出部分也补全)。
当前图片模型生成**完整、清晰**的元素图(即使原图只露出部分也补全)。
region 元素:先把 region + 30% padding 区域裁出作为 focus再发给模型聚焦补全。"""
from PIL import Image as _PILImage
import io as _io
@@ -7627,12 +7748,11 @@ def cutout_element(job_id: str, idx: int, element_id: str) -> Job:
"Preserve the element's original color palette, style, lighting character, and proportions. "
"Output must be a clean, high-quality asset image suitable for downstream composition."
)
models = [GPT_IMAGE_MODEL]
img_bytes: bytes
try:
try:
img_bytes, _mode = _image_edit_call(
model_src, prompt, models=models, fallback_text=False, max_attempts=3,
model_src, prompt, fallback_text=False, max_attempts=3,
)
except RuntimeError as e:
raise HTTPException(500, f"extract failed: {e}")
@@ -8116,7 +8236,7 @@ def _generate_subject_assets_sync(job_id: str, idx: int, element_id: str, req: G
flush=True,
)
img_bytes, _mode = _image_edit_call(model_src, prompt, models=models, fallback_text=False, max_attempts=3, max_side=1280, force_fallback_model=pack_force_fallback_model, image_model_preference=model_preference)
if model_preference == "auto" and _mode.endswith(f":{IMAGE_FALLBACK_MODEL}"):
if model_preference == "auto" and _image_mode_used_fallback(_mode):
pack_force_fallback_model = True
elif similar_mode:
print(
@@ -8124,13 +8244,13 @@ def _generate_subject_assets_sync(job_id: str, idx: int, element_id: str, req: G
flush=True,
)
img_bytes, _mode = _image_text_call(prompt, models=models, max_attempts=3, force_fallback_model=pack_force_fallback_model, image_model_preference=model_preference)
if model_preference == "auto" and _mode.endswith(f":{IMAGE_FALLBACK_MODEL}"):
if model_preference == "auto" and _image_mode_used_fallback(_mode):
pack_force_fallback_model = True
else:
if model_src is None:
raise RuntimeError("subject asset edit reference image missing")
img_bytes, _mode = _image_edit_call(model_src, prompt, models=models, fallback_text=False, max_attempts=3, max_side=1280, force_fallback_model=pack_force_fallback_model, image_model_preference=model_preference)
if model_preference == "auto" and _mode.endswith(f":{IMAGE_FALLBACK_MODEL}"):
if model_preference == "auto" and _image_mode_used_fallback(_mode):
pack_force_fallback_model = True
except RuntimeError as e:
if first_generation_error is None:
@@ -9857,9 +9977,8 @@ def generate_product_angle_asset(job_id: str, req: GenerateProductAngleAssetReq)
"If the target view is not fully visible in the source, infer the missing surfaces conservatively from the same product design without inventing a new model. "
+ (f"Additional operator note: {note}. " if note else "")
)
models = [GPT_IMAGE_MODEL]
try:
img_bytes, _mode = _image_edit_call(source_paths, prompt, models=models, fallback_text=False, max_attempts=5, max_side=1600)
img_bytes, _mode = _image_edit_call(source_paths, prompt, fallback_text=False, max_attempts=5, max_side=1600)
except RuntimeError as e:
raise HTTPException(_image_error_status(e), f"product angle generation failed: {e}")
asset_id = f"product_angle_{uuid.uuid4().hex[:10]}"
@@ -10414,7 +10533,7 @@ def create_product_fusion_guide(job_id: str, req: ProductFusionShot) -> dict:
"frame_idx": -1,
"element_id": asset_id,
"cutout_id": asset_id,
"label": f"产品融合引导图 · {req.image_model or 'gpt-image-2'}",
"label": f"产品融合引导图 · {req.image_model or IMAGE_MODEL}",
}