From 9ab541796b9dda2cf73950b2f9e21005d063cc5f Mon Sep 17 00:00:00 2001 From: kang Date: Wed, 27 May 2026 17:18:45 +0800 Subject: [PATCH] auto-save 2026-05-27 17:18 (~9) --- .memory/worklog.json | 14 +- api/.env.example | 11 + api/main.py | 583 +++++++++++++++++----------- deploy/.env.local.example | 8 + deploy/.env.production.example | 9 + web/canvas-app/src/config/models.js | 26 +- web/canvas-app/src/stores/models.js | 6 + web/components/lightbox.tsx | 2 +- web/lib/api.ts | 4 +- 9 files changed, 420 insertions(+), 243 deletions(-) diff --git a/.memory/worklog.json b/.memory/worklog.json index e4bf203..da1df88 100644 --- a/.memory/worklog.json +++ b/.memory/worklog.json @@ -1,12 +1,5 @@ { "entries": [ - { - "files_changed": 1, - "hash": "5b13a5c", - "message": "docs: record collapsed agent confirmation deployment", - "ts": "2026-05-20T19:36:21+08:00", - "type": "commit" - }, { "files_changed": 3, "hash": "5e0afce", @@ -3196,6 +3189,13 @@ "message": "fix: persist uploaded canvas reference images", "hash": "ec38215", "files_changed": 4 + }, + { + "ts": "2026-05-27T17:13:16+08:00", + "type": "commit", + "message": "auto-save 2026-05-27 17:13 (~2)", + "hash": "8999fe0", + "files_changed": 2 } ] } diff --git a/api/.env.example b/api/.env.example index b56fe99..cfe04c5 100644 --- a/api/.env.example +++ b/api/.env.example @@ -39,11 +39,22 @@ IMAGE_MODEL=gpt-image-2 IMAGE_REQUEST_TIMEOUT_SECONDS=60 IMAGE_FALLBACK_ENABLED=true IMAGE_FALLBACK_MODEL=gemini-3-pro-image-preview +# 多备用模型用逗号分隔;未设置时兼容 IMAGE_FALLBACK_MODEL。 +IMAGE_FALLBACK_MODELS=gemini-3-pro-image-preview +# 可选:把其它 OpenAI-compatible 图片模型加入 /health 和前端白名单,默认走 IMAGE_BASE_URL/IMAGE_API_KEY。 +IMAGE_EXTRA_MODELS= +# 可选:JSON 覆盖/扩展模型配置,建议只写 api_key_env,不把真实 key 写入 JSON。 +# IMAGE_MODEL_CONFIGS_JSON={"custom-model":{"label":"Custom Image","base_url_env":"CUSTOM_IMAGE_BASE_URL","api_key_env":"CUSTOM_IMAGE_API_KEY","provider":"openai","sizes":["1024x1024"],"default_size":"1024x1024"}} IMAGE_CIRCUIT_FAILURE_THRESHOLD=2 IMAGE_CIRCUIT_COOLDOWN_SECONDS=600 GPT_IMAGE_MODEL=gpt-image-2 SUBJECT_ASSET_IMAGE_MODEL=gpt-image-2 SUBJECT_ASSET_IMAGE_MODELS=gpt-image-2,gemini-3-pro-image-preview +# 火山方舟 Seedream 图片模型。真实 key 只填本地/服务器 .env,不提交到 git。 +ARK_SEEDREAM_ENABLED=true +ARK_IMAGE_BASE_URL=https://ark.cn-beijing.volces.com/api/v3 +ARK_IMAGE_API_KEY= +ARK_SEEDREAM_IMAGE_MODEL=doubao-seedream-4-5-251128 # 可选:本地网络需要代理访问 ai.skg.com 时配置;launchd 不一定继承 shell 代理变量。 AI_HTTP_PROXY= YTDLP_COOKIES_FILE= diff --git a/api/main.py b/api/main.py index bb0ee3e..0d84dfe 100644 --- a/api/main.py +++ b/api/main.py @@ -858,7 +858,7 @@ class ProductFusionShot(BaseModel): scene_image: dict | None = None action_text: str = "" duration: float = 5 - image_model: str = "gpt-image-2" + image_model: str = "auto" video_model: str = "seedance" guide_image: dict | None = None @@ -4600,10 +4600,144 @@ def _image_is_transport_error(message: str) -> bool: ) +def _dedupe_keep_order(items: list[str]) -> list[str]: + seen: set[str] = set() + result: list[str] = [] + for item in items: + value = (item or "").strip() + if not value or value in seen: + continue + seen.add(value) + result.append(value) + return result + + +def _image_override(model: str) -> dict: + return IMAGE_MODEL_CONFIG_OVERRIDES.get(model, {}) + + +def _image_is_ark_seedream(model: str | None) -> bool: + value = (model or "").strip().lower() + if not value: + return False + provider = str(_image_override(value).get("provider", "")).lower() + return provider in {"ark", "ark_seedream", "seedream"} or value == ARK_SEEDREAM_IMAGE_MODEL.lower() or value.startswith("doubao-seedream-") + + +def _image_provider(model: str) -> str: + override = _image_override(model) + provider = str(override.get("provider", "")).strip().lower() + if provider: + return provider + return "ark_seedream" if _image_is_ark_seedream(model) else "openai" + + +def _image_model_label(model: str) -> str: + override = _image_override(model) + if override.get("label"): + return str(override["label"]) + labels = { + GPT_IMAGE_MODEL: "GPT Image 2", + "gemini-3-pro-image-preview": "Gemini 图片", + ARK_SEEDREAM_IMAGE_MODEL: "Seedream 4.5", + } + if model in labels: + return labels[model] + if _image_is_ark_seedream(model): + return "Seedream" + return model + + +def _image_model_base_url(model: str | None = None) -> str: + model_id = (model or IMAGE_MODEL).strip() + override = _image_override(model_id) + env_name = str(override.get("base_url_env", "")).strip() + if env_name and os.getenv(env_name): + return os.getenv(env_name, "").strip().rstrip("/") + if override.get("base_url"): + return str(override["base_url"]).strip().rstrip("/") + if _image_is_ark_seedream(model_id): + return (ARK_IMAGE_BASE_URL or IMAGE_BASE_URL).strip().rstrip("/") + return (IMAGE_BASE_URL or LLM_BASE_URL).strip().rstrip("/") + + +def _image_model_api_key(model: str | None = None) -> str: + model_id = (model or IMAGE_MODEL).strip() + override = _image_override(model_id) + env_name = str(override.get("api_key_env", "")).strip() + if env_name and os.getenv(env_name): + return os.getenv(env_name, "").strip() + if override.get("api_key"): + return str(override["api_key"]).strip() + if _image_is_ark_seedream(model_id): + if ARK_IMAGE_API_KEY: + return ARK_IMAGE_API_KEY + if "ark.cn-beijing.volces.com" in (IMAGE_BASE_URL or ""): + return IMAGE_API_KEY + return "" + return IMAGE_API_KEY + + +def _image_model_headers(model: str) -> dict: + api_key = _image_model_api_key(model) + if not api_key: + raise RuntimeError(f"{_image_model_label(model)} API key 未配置") + return {"Authorization": f"Bearer {api_key}"} + + +def _image_any_configured() -> bool: + return any(_image_model_api_key(model) for model in _configured_image_models()) + + +def _image_model_size_choices(model: str | None = None) -> list[dict]: + model_id = (model or IMAGE_MODEL).strip() + override = _image_override(model_id) + sizes = override.get("size_options") or override.get("sizes") + if isinstance(sizes, list) and sizes: + result: list[dict] = [] + for item in sizes: + if isinstance(item, dict): + value = str(item.get("value") or item.get("key") or item.get("id") or "").strip() + if not value: + continue + result.append({ + "id": str(item.get("id") or value), + "label": str(item.get("label") or value), + "value": value, + "description": str(item.get("description") or ""), + }) + else: + value = str(item).strip() + if value: + result.append({"id": value, "label": value, "value": value, "description": ""}) + if result: + return result + return ARK_SEEDREAM_SIZE_CHOICES if _image_is_ark_seedream(model_id) else IMAGE_SIZE_CHOICES + + +def _image_default_size(model: str | None = None) -> str: + model_id = (model or IMAGE_MODEL).strip() + override = _image_override(model_id) + if override.get("default_size"): + return str(override["default_size"]).strip() + return "2048x2048" if _image_is_ark_seedream(model_id) else "1024x1536" + + +def _configured_image_models() -> list[str]: + models = [IMAGE_MODEL] + if IMAGE_FALLBACK_ENABLED: + models.extend(IMAGE_FALLBACK_MODELS) + if ARK_SEEDREAM_ENABLED: + models.append(ARK_SEEDREAM_IMAGE_MODEL) + models.extend(IMAGE_EXTRA_MODELS) + models.extend(IMAGE_MODEL_CONFIG_OVERRIDES.keys()) + return _dedupe_keep_order(models) + + def _image_fallback_models() -> list[str]: - if not IMAGE_FALLBACK_ENABLED or not IMAGE_FALLBACK_MODEL or IMAGE_FALLBACK_MODEL == GPT_IMAGE_MODEL: + if not IMAGE_FALLBACK_ENABLED: return [] - return [IMAGE_FALLBACK_MODEL] + return [model for model in _dedupe_keep_order(IMAGE_FALLBACK_MODELS) if model != IMAGE_MODEL] def _image_circuit_snapshot() -> dict: @@ -4611,7 +4745,7 @@ def _image_circuit_snapshot() -> dict: with _IMAGE_CIRCUIT_LOCK: open_until = _IMAGE_PRIMARY_OPEN_UNTIL return { - "primary": GPT_IMAGE_MODEL, + "primary": IMAGE_MODEL, "fallbacks": _image_fallback_models(), "failure_threshold": IMAGE_CIRCUIT_FAILURE_THRESHOLD, "cooldown_seconds": IMAGE_CIRCUIT_COOLDOWN_SECONDS, @@ -4626,64 +4760,72 @@ def _image_primary_circuit_open() -> bool: return _image_circuit_snapshot()["primary_open"] +def _image_mode_used_fallback(mode: str) -> bool: + return any(mode.endswith(f":{fallback}") for fallback in _image_fallback_models()) + + def _normalize_image_model_preference(value: str | None) -> str: raw = (value or "auto").strip().lower() if raw in {"", "auto", "default"}: return "auto" if raw in {"gpt", "gpt-image", GPT_IMAGE_MODEL.lower()}: return GPT_IMAGE_MODEL + if raw in {"seedream", "ark-seedream", "doubao-seedream", ARK_SEEDREAM_IMAGE_MODEL.lower()}: + return ARK_SEEDREAM_IMAGE_MODEL + for model in _configured_image_models(): + if raw == model.lower(): + return model if IMAGE_FALLBACK_MODEL and raw in {"gemini", IMAGE_FALLBACK_MODEL.lower()}: return IMAGE_FALLBACK_MODEL - return "auto" + return (value or "").strip() or "auto" def _image_model_candidates(force_fallback: bool = False, preference: str | None = "auto") -> list[str]: normalized = _normalize_image_model_preference(preference) fallbacks = _image_fallback_models() - if normalized == GPT_IMAGE_MODEL: - return [GPT_IMAGE_MODEL] - if normalized == IMAGE_FALLBACK_MODEL and fallbacks: - return [IMAGE_FALLBACK_MODEL] + if normalized != "auto": + return [normalized] if not fallbacks: - return [GPT_IMAGE_MODEL] + return [IMAGE_MODEL] if force_fallback or _image_primary_circuit_open(): return fallbacks - return [GPT_IMAGE_MODEL, *fallbacks] + return [IMAGE_MODEL, *fallbacks] def image_model_options() -> list[dict]: + fallback_labels = ", ".join(_image_model_label(model) for model in _image_fallback_models()) or "无" options = [ { "id": "auto", "label": "自动", - "model": GPT_IMAGE_MODEL, - "description": "优先 GPT Image 2,必要时按后端熔断和兜底策略切到备用图片模型", - "available": bool(IMAGE_API_KEY), - }, - { - "id": GPT_IMAGE_MODEL, - "label": "GPT Image 2", - "model": GPT_IMAGE_MODEL, - "description": "主生图模型,适合营销图和参考图重绘", - "available": bool(IMAGE_API_KEY), + "model": IMAGE_MODEL, + "provider": _image_provider(IMAGE_MODEL), + "description": f"优先 {_image_model_label(IMAGE_MODEL)};备用:{fallback_labels}", + "available": bool(_image_model_api_key(IMAGE_MODEL)), + "default_size": _image_default_size(IMAGE_MODEL), + "size_options": _image_model_size_choices(IMAGE_MODEL), }, ] - if IMAGE_FALLBACK_ENABLED and IMAGE_FALLBACK_MODEL and IMAGE_FALLBACK_MODEL != GPT_IMAGE_MODEL: + for model in _configured_image_models(): options.append({ - "id": IMAGE_FALLBACK_MODEL, - "label": "Gemini 图片", - "model": IMAGE_FALLBACK_MODEL, - "description": "备用图片模型,适合主模型慢或失败时手动选择", - "available": bool(IMAGE_API_KEY), + "id": model, + "label": _image_model_label(model), + "model": model, + "provider": _image_provider(model), + "description": "图片生成模型,可通过环境变量切换网关、key 和尺寸能力", + "available": bool(_image_model_api_key(model)), + "default_size": _image_default_size(model), + "size_options": _image_model_size_choices(model), }) return options def image_size_options() -> list[dict]: - return IMAGE_SIZE_CHOICES + return ALL_IMAGE_SIZE_CHOICES -def _normalize_image_size(raw: str | None) -> str: +def _normalize_image_size(raw: str | None, model: str | None = None) -> str: + model_id = (model or IMAGE_MODEL).strip() value = (raw or "auto").strip().lower() aliases = { "vertical": "1024x1536", @@ -4696,14 +4838,30 @@ def _normalize_image_size(raw: str | None) -> str: "横图": "1536x1024", } value = aliases.get(value, value) - allowed = {str(item["value"]) for item in IMAGE_SIZE_CHOICES} - if value not in allowed: + if _image_is_ark_seedream(model_id): + seedream_aliases = { + "auto": _image_default_size(model_id), + "1024x1536": "1440x2560", + "1536x1024": "2560x1440", + "1024x1024": "2048x2048", + "square": "2048x2048", + "方图": "2048x2048", + "vertical": "1440x2560", + "portrait": "1440x2560", + "竖图": "1440x2560", + "horizontal": "2560x1440", + "landscape": "2560x1440", + "横图": "2560x1440", + } + value = seedream_aliases.get(value, value) + canonical = {str(item["value"]).lower(): str(item["value"]) for item in _image_model_size_choices(model_id)} + if value not in canonical: raise HTTPException(400, f"unsupported image size: {raw}") - return value + return canonical[value] -def _image_size_payload(raw: str | None) -> dict: - size = _normalize_image_size(raw) +def _image_size_payload(raw: str | None, model: str | None = None) -> dict: + size = _normalize_image_size(raw, model) return {} if size == "auto" else {"size": size} @@ -4801,7 +4959,7 @@ def _image_record_primary_success() -> None: global _IMAGE_PRIMARY_FAILURES, _IMAGE_PRIMARY_OPEN_UNTIL with _IMAGE_CIRCUIT_LOCK: if _IMAGE_PRIMARY_FAILURES or _IMAGE_PRIMARY_OPEN_UNTIL: - print(f"[image circuit] primary {GPT_IMAGE_MODEL} recovered", flush=True) + print(f"[image circuit] primary {IMAGE_MODEL} recovered", flush=True) _IMAGE_PRIMARY_FAILURES = 0 _IMAGE_PRIMARY_OPEN_UNTIL = 0.0 @@ -4815,14 +4973,14 @@ def _image_record_primary_failure(reason: str) -> None: if _IMAGE_PRIMARY_FAILURES >= IMAGE_CIRCUIT_FAILURE_THRESHOLD: _IMAGE_PRIMARY_OPEN_UNTIL = time.time() + IMAGE_CIRCUIT_COOLDOWN_SECONDS print( - f"[image circuit] primary {GPT_IMAGE_MODEL} opened for {IMAGE_CIRCUIT_COOLDOWN_SECONDS}s " - f"after {_IMAGE_PRIMARY_FAILURES} failures; fallback={IMAGE_FALLBACK_MODEL}; reason={reason[:220]}", + f"[image circuit] primary {IMAGE_MODEL} opened for {IMAGE_CIRCUIT_COOLDOWN_SECONDS}s " + f"after {_IMAGE_PRIMARY_FAILURES} failures; fallback={','.join(_image_fallback_models())}; reason={reason[:220]}", flush=True, ) else: print( - f"[image circuit] primary {GPT_IMAGE_MODEL} failure {_IMAGE_PRIMARY_FAILURES}/{IMAGE_CIRCUIT_FAILURE_THRESHOLD}; " - f"fallback={IMAGE_FALLBACK_MODEL}; reason={reason[:220]}", + f"[image circuit] primary {IMAGE_MODEL} failure {_IMAGE_PRIMARY_FAILURES}/{IMAGE_CIRCUIT_FAILURE_THRESHOLD}; " + f"fallback={','.join(_image_fallback_models())}; reason={reason[:220]}", flush=True, ) @@ -4830,14 +4988,14 @@ def _image_record_primary_failure(reason: str) -> None: def _image_failure_message(kind: str, attempts: int, last_err: str, capacity_seen: bool) -> str: if capacity_seen: return ( - f"{kind} failed after {attempts} attempts: gpt-image-2 上游负载饱和," + f"{kind} failed after {attempts} attempts: {_image_model_label(IMAGE_MODEL)} 上游负载饱和," f"已自动退避重试仍失败,请稍后点重试。最后错误:{last_err}" ) if "timeout" in last_err.lower(): return ( - f"{kind} failed after {attempts} attempts: gpt-image-2 图片网关响应超时" + f"{kind} failed after {attempts} attempts: {_image_model_label(IMAGE_MODEL)} 图片网关响应超时" f"(单次 {IMAGE_REQUEST_TIMEOUT_SECONDS}s),模型未更改。" - f"请检查 {IMAGE_BASE_URL or LLM_BASE_URL or 'image gateway'} 的 gpt-image-2 上游渠道或稍后重试。" + f"请检查 {_image_model_base_url(IMAGE_MODEL) or 'image gateway'} 的 {_image_model_label(IMAGE_MODEL)} 上游渠道或稍后重试。" f"最后错误:{last_err}" ) if _image_is_transport_error(last_err): @@ -4859,24 +5017,69 @@ def _image_error_status(error: Exception) -> int: ) else 500 -def _image_endpoint(path: str) -> str: - base = (IMAGE_BASE_URL or "").strip().rstrip("/") +def _image_endpoint(path: str, model: str | None = None) -> str: + base = _image_model_base_url(model) if not base: - raise RuntimeError("IMAGE_BASE_URL 或 LLM_BASE_URL 未配置") + raise RuntimeError(f"{_image_model_label(model or IMAGE_MODEL)} base URL 未配置") return f"{base}/{path.lstrip('/')}" -def _image_generation_response(prompt: str, model: str, size: str | None = "auto") -> dict: +def _image_reference_data_urls(img_bytes_list: list[bytes]) -> list[str]: + return [f"data:image/jpeg;base64,{base64.b64encode(img_bytes).decode('ascii')}" for img_bytes in img_bytes_list] + + +def _image_generation_payload( + prompt: str, + model: str, + size: str | None = "auto", + reference_images: list[str] | None = None, +) -> dict: + if _image_is_ark_seedream(model): + payload = { + "model": model, + "prompt": prompt, + "watermark": False, + "response_format": "url", + "sequential_image_generation": "disabled", + **_image_size_payload(size, model), + } + if reference_images: + payload["reference_images"] = reference_images[:10] + return payload + return {"model": model, "prompt": prompt, "n": 1, **_image_size_payload(size, model)} + + +def _image_generation_response( + prompt: str, + model: str, + size: str | None = "auto", + reference_images: list[str] | None = None, +) -> dict: with ai_http_client(timeout=IMAGE_REQUEST_TIMEOUT_SECONDS) as client: r = client.post( - _image_endpoint("/images/generations"), - headers={"Authorization": f"Bearer {IMAGE_API_KEY}"}, - json={"model": model, "prompt": prompt, "n": 1, **_image_size_payload(size)}, + _image_endpoint("/images/generations", model), + headers=_image_model_headers(model), + json=_image_generation_payload(prompt, model, size, reference_images), ) r.raise_for_status() return r.json() +def _image_response_item_bytes(item: dict, kind: str) -> bytes: + import base64 as b64lib + b64 = item.get("b64_json") or item.get("b64") + if b64: + return b64lib.b64decode(b64) + for key in ("url", "image_url", "output_url", "download_url"): + url = item.get(key) + if isinstance(url, str) and url: + with ai_http_client(timeout=IMAGE_REQUEST_TIMEOUT_SECONDS) as client: + image_resp = client.get(url) + image_resp.raise_for_status() + return image_resp.content + raise RuntimeError(f"{kind} returned no b64_json or url") + + def _image_should_retry( attempt: int, total_attempts: int, @@ -4922,25 +5125,25 @@ def _image_edit_call( max_side: int = 1024, force_fallback_model: bool = False, image_model_preference: str | None = "auto", + size: str | None = "auto", ) -> tuple[bytes, str]: """通用 image edit 调用 · 失败重试 + 可选 text fallback。 返回 (image_bytes, effective_mode) where effective_mode in {"edit","text"}。 失败 raise RuntimeError。 - 输入图自动 resize 到 max_side(默认 1024)边长后再用 multipart 上传;多参考图使用 image[]。 - 生图模型主路径使用 gpt-image-2;Gemini 只在主模型上游异常时兜底。model/models 参数只保留兼容旧调用。""" - import base64 as b64lib + 输入图自动 resize 到 max_side(默认 1024)边长后再上传。 + OpenAI-compatible 模型走 /images/edits;Ark Seedream 走 /images/generations + reference_images。 + 主模型、备用模型和 Ark 模型均来自运行时配置;model/models 参数只保留兼容旧调用。""" import time as _time import httpx - if not IMAGE_API_KEY: - raise RuntimeError("IMAGE_API_KEY 或 LLM_API_KEY 未配置") - model = GPT_IMAGE_MODEL + if not _image_any_configured(): + raise RuntimeError("图片模型 API key 未配置") image_paths = image_path if isinstance(image_path, list) else [image_path] image_paths = [path for path in image_paths if path and path.exists()][:10] if not image_paths: raise RuntimeError("image edit reference image missing") img_bytes_list = [_prepare_image_edit_bytes(path, max_side) for path in image_paths] model_candidates = _image_model_candidates(force_fallback=force_fallback_model, preference=image_model_preference) - mode_plan: list[str] = ["edit"] if model_candidates != [GPT_IMAGE_MODEL] else ["edit"] * max_attempts + mode_plan: list[str] = ["edit"] if model_candidates != [IMAGE_MODEL] else ["edit"] * max_attempts if fallback_text: mode_plan.append("text") attempt_steps = [(current_mode, current_model) for current_mode in mode_plan for current_model in model_candidates] @@ -4957,30 +5160,35 @@ def _image_edit_call( retry_after: str | None = None try: if current_mode == "edit": - with ai_http_client(timeout=IMAGE_REQUEST_TIMEOUT_SECONDS) as client: - r = client.post( - _image_endpoint("/images/edits"), - headers={ - "Authorization": f"Bearer {IMAGE_API_KEY}", - }, - data={"model": current_model, "prompt": prompt, "n": "1"}, - files=( - {"image": ("reference.jpg", img_bytes_list[0], "image/jpeg")} - if len(img_bytes_list) == 1 - else [ - ("image[]", (f"reference_{idx + 1}.jpg", img_bytes, "image/jpeg")) - for idx, img_bytes in enumerate(img_bytes_list) - ] - ), + if _image_is_ark_seedream(current_model): + resp_data = _image_generation_response( + prompt, + current_model, + size, + reference_images=_image_reference_data_urls(img_bytes_list), ) - r.raise_for_status() - resp_data = r.json() + else: + with ai_http_client(timeout=IMAGE_REQUEST_TIMEOUT_SECONDS) as client: + r = client.post( + _image_endpoint("/images/edits", current_model), + headers=_image_model_headers(current_model), + data={"model": current_model, "prompt": prompt, "n": "1", **_image_size_payload(size, current_model)}, + files=( + {"image": ("reference.jpg", img_bytes_list[0], "image/jpeg")} + if len(img_bytes_list) == 1 + else [ + ("image[]", (f"reference_{idx + 1}.jpg", img_bytes, "image/jpeg")) + for idx, img_bytes in enumerate(img_bytes_list) + ] + ), + ) + r.raise_for_status() + resp_data = r.json() else: - resp_data = _image_generation_response(prompt, current_model) + resp_data = _image_generation_response(prompt, current_model, size) if resp_data.get("data"): effective_mode = f"{current_mode}:{current_model}" - model = current_model # 记录实际成功的 model - if current_model == GPT_IMAGE_MODEL: + if current_model == IMAGE_MODEL: _image_record_primary_success() break err_obj = resp_data.get("error") or {} @@ -4997,11 +5205,11 @@ def _image_edit_call( except Exception as e: last_err = f"{type(e).__name__}: {e} · model={current_model}" - fallbackable = current_model == GPT_IMAGE_MODEL and _image_failure_can_fallback(status_code, body, last_err) + fallbackable = current_model == IMAGE_MODEL and _image_failure_can_fallback(status_code, body, last_err) if fallbackable: _image_record_primary_failure(last_err) - if any(next_model != GPT_IMAGE_MODEL for _next_mode, next_model in attempt_steps[attempt + 1:]): - print(f"[image edit fallback → {IMAGE_FALLBACK_MODEL}] {last_err}", flush=True) + if any(next_model != IMAGE_MODEL for _next_mode, next_model in attempt_steps[attempt + 1:]): + print(f"[image edit fallback → {','.join(_image_fallback_models())}] {last_err}", flush=True) continue next_mode_changed = attempt < len(attempt_steps) - 1 and attempt_steps[attempt + 1][0] != current_mode if _image_should_retry(attempt, len(attempt_steps), status_code, body, last_err, next_mode_changed): @@ -5015,16 +5223,7 @@ def _image_edit_call( data_arr = resp_data.get("data", []) if not data_arr: raise RuntimeError(_image_failure_message("image edit", attempts_done, last_err, capacity_seen)) - item = data_arr[0] - b64 = item.get("b64_json") - if not b64 and item.get("url"): - with ai_http_client(timeout=IMAGE_REQUEST_TIMEOUT_SECONDS) as client: - image_resp = client.get(item["url"]) - image_resp.raise_for_status() - return image_resp.content, effective_mode - if not b64: - raise RuntimeError("image edit returned no b64_json") - return b64lib.b64decode(b64), effective_mode + return _image_response_item_bytes(data_arr[0], "image edit"), effective_mode def _image_text_call( @@ -5034,15 +5233,15 @@ def _image_text_call( max_attempts: int = 3, force_fallback_model: bool = False, image_model_preference: str | None = "auto", + size: str | None = "auto", ) -> tuple[bytes, str]: - """Text-only image generation. gpt-image-2 primary, Gemini only as outage fallback.""" - import base64 as b64lib + """Text-only image generation. Primary and fallback models are selected from runtime config.""" import time as _time import httpx - if not IMAGE_API_KEY: - raise RuntimeError("IMAGE_API_KEY 或 LLM_API_KEY 未配置") + if not _image_any_configured(): + raise RuntimeError("图片模型 API key 未配置") candidates = _image_model_candidates(force_fallback=force_fallback_model, preference=image_model_preference) - attempt_models = candidates if candidates != [GPT_IMAGE_MODEL] else [GPT_IMAGE_MODEL] * max_attempts + attempt_models = candidates if candidates != [IMAGE_MODEL] else [IMAGE_MODEL] * max_attempts last_err = "" capacity_seen = False attempts_done = 0 @@ -5052,21 +5251,12 @@ def _image_text_call( body = "" retry_after: str | None = None try: - resp_data = _image_generation_response(prompt, current_model) + resp_data = _image_generation_response(prompt, current_model, size) if resp_data.get("data"): - item = resp_data["data"][0] - b64 = item.get("b64_json") - if b64: - if current_model == GPT_IMAGE_MODEL: - _image_record_primary_success() - return b64lib.b64decode(b64), f"text:{current_model}" - if item.get("url"): - with ai_http_client(timeout=IMAGE_REQUEST_TIMEOUT_SECONDS) as client: - image_resp = client.get(item["url"]) - image_resp.raise_for_status() - if current_model == GPT_IMAGE_MODEL: - _image_record_primary_success() - return image_resp.content, f"text:{current_model}" + out_bytes = _image_response_item_bytes(resp_data["data"][0], "image text") + if current_model == IMAGE_MODEL: + _image_record_primary_success() + return out_bytes, f"text:{current_model}" err_obj = resp_data.get("error") or {} last_err = f"empty data · {err_obj.get('code', '')} · {str(err_obj.get('message', ''))[:200]} · model={current_model}" except httpx.HTTPStatusError as e: @@ -5080,11 +5270,11 @@ def _image_text_call( body = str(e) status_code = 429 if "429" in body or "saturated" in body.lower() or "饱和" in body else 0 capacity_seen = capacity_seen or _image_is_capacity_error(status_code, body) - fallbackable = current_model == GPT_IMAGE_MODEL and _image_failure_can_fallback(status_code, body, last_err) + fallbackable = current_model == IMAGE_MODEL and _image_failure_can_fallback(status_code, body, last_err) if fallbackable: _image_record_primary_failure(last_err) - if any(next_model != GPT_IMAGE_MODEL for next_model in attempt_models[attempt + 1:]): - print(f"[image text fallback → {IMAGE_FALLBACK_MODEL}] {last_err}", flush=True) + if any(next_model != IMAGE_MODEL for next_model in attempt_models[attempt + 1:]): + print(f"[image text fallback → {','.join(_image_fallback_models())}] {last_err}", flush=True) continue if _image_should_retry(attempt, len(attempt_models), status_code, body, last_err): delay = _image_retry_delay(attempt, status_code, body, retry_after) @@ -5190,7 +5380,7 @@ def _subject_agent_model(bundle: SubjectModelBundle) -> str: def _subject_agent_image_model(bundle: SubjectModelBundle) -> str: - return IMAGE_FALLBACK_MODEL if bundle == "gemini" and IMAGE_FALLBACK_MODEL else GPT_IMAGE_MODEL + return IMAGE_FALLBACK_MODEL if bundle == "gemini" and IMAGE_FALLBACK_MODEL else SUBJECT_ASSET_IMAGE_MODEL def _list_of_strings(value, limit: int = 18) -> list[str]: @@ -6200,6 +6390,18 @@ def health() -> dict: "image_request_timeout_seconds": IMAGE_REQUEST_TIMEOUT_SECONDS, "image_options": image_model_options(), "image_size_options": image_size_options(), + "image_model_routes": [ + { + "model": model, + "label": _image_model_label(model), + "provider": _image_provider(model), + "base_url": _image_model_base_url(model) or "", + "configured": bool(_image_model_api_key(model)), + "default_size": _image_default_size(model), + "size_options": _image_model_size_choices(model), + } + for model in _configured_image_models() + ], "ai_proxy_configured": bool(AI_HTTP_PROXY), "image_fallbacks": _image_fallback_models(), "image_circuit": _image_circuit_snapshot(), @@ -6687,8 +6889,8 @@ class GenerateReq(BaseModel): prompt: str extra_prompt: str = "" # ✓ 需要的元素(正向) negative_prompt: str = "" # ✗ 不需要的元素(负向) - model: str = "auto" # auto / gpt-image-2 / gemini-3-pro-image-preview - size: str = "auto" # auto / 1024x1536 / 1024x1024 / 1536x1024 + model: str = "auto" # auto / configured image model id + size: str = "auto" # auto / model-specific size mode: str = "edit" # "edit" 带参考图,"text" 纯文字 from_selected: bool = False # True 时优先用 frame.selected 的生成图作 reference(迭代),否则原关键帧 @@ -6725,113 +6927,34 @@ def generate_image(job_id: str, idx: int, req: GenerateReq) -> Job: if not raw_prompt: raise HTTPException(400, "prompt required") full_prompt = _ensure_english(raw_prompt) - image_size = _normalize_image_size(req.size) - if not IMAGE_API_KEY: - raise HTTPException(503, "IMAGE_API_KEY 或 LLM_API_KEY 未配置") + if not _image_any_configured(): + raise HTTPException(503, "图片模型 API key 未配置") - model = GPT_IMAGE_MODEL gen_id = uuid.uuid4().hex[:12] - import base64 as b64lib import time as _time - import httpx - - img_bytes_in: bytes | None = None - if req.mode == "edit": - img_bytes_in = reference_path.read_bytes() - - # 尝试 i2i;auto 允许按熔断策略兜底,显式模型只走用户所选模型。 - model_candidates = _image_model_candidates(preference=req.model) - plan: list[str] = ([req.mode] if model_candidates != [GPT_IMAGE_MODEL] else [req.mode] * 3) if req.mode == "edit" else [req.mode] - if req.mode == "edit": - plan.append("text") # i2i 都失败时自动降级 - attempt_steps = [(current_mode, current_model) for current_mode in plan for current_model in model_candidates] - resp_data: dict = {} - last_err = "" - effective_mode = req.mode - capacity_seen = False - attempts_done = 0 - for attempt, (current_mode, current_model) in enumerate(attempt_steps): - attempts_done = attempt + 1 - status_code = 0 - body = "" - retry_after: str | None = None - try: - if current_mode == "edit": - if img_bytes_in is None: - raise RuntimeError("edit mode reference image missing") - with ai_http_client(timeout=IMAGE_REQUEST_TIMEOUT_SECONDS) as client: - r = client.post( - _image_endpoint("/images/edits"), - headers={ - "Authorization": f"Bearer {IMAGE_API_KEY}", - }, - data={"model": current_model, "prompt": full_prompt, "n": "1", **_image_size_payload(image_size)}, - files={"image": ("reference.jpg", img_bytes_in, "image/jpeg")}, - ) - r.raise_for_status() - resp_data = r.json() - else: - # text-only - resp_data = _image_generation_response(full_prompt, current_model, image_size) - - if resp_data.get("data"): - effective_mode = f"{current_mode}:{current_model}" - model = current_model - if current_model == GPT_IMAGE_MODEL: - _image_record_primary_success() - break - err_obj = resp_data.get("error") or {} - last_err = f"empty data · {err_obj.get('code', '')} · {str(err_obj.get('message', ''))[:200]} · model={current_model}" - except httpx.HTTPStatusError as e: - body = e.response.text - status_code = e.response.status_code - retry_after = e.response.headers.get("retry-after") - capacity_seen = capacity_seen or _image_is_capacity_error(status_code, body) - transient = ( - status_code == 429 - or status_code >= 500 - or "incomplete_generation" in body - or "rate_limit" in body - or "timeout" in body.lower() - or _image_is_capacity_error(status_code, body) + mode = req.mode if req.mode in {"edit", "text"} else "edit" + try: + if mode == "edit": + out_bytes, effective_mode = _image_edit_call( + reference_path, + full_prompt, + fallback_text=True, + max_attempts=3, + image_model_preference=req.model, + size=req.size, ) - last_err = f"HTTP {status_code}: {body[:200]} · model={current_model}" - if not transient: - raise HTTPException(500, f"image gen HTTP {status_code}: {body[:300]}") - except Exception as e: - last_err = f"{type(e).__name__}: {e} · model={current_model}" - - fallbackable = current_model == GPT_IMAGE_MODEL and _image_failure_can_fallback(status_code, body, last_err) - if fallbackable: - _image_record_primary_failure(last_err) - if any(next_model != GPT_IMAGE_MODEL for _next_mode, next_model in attempt_steps[attempt + 1:]): - print(f"[image gen fallback → {IMAGE_FALLBACK_MODEL}] {last_err}", flush=True) - continue - next_mode_changed = attempt < len(attempt_steps) - 1 and attempt_steps[attempt + 1][0] != current_mode - if _image_should_retry(attempt, len(attempt_steps), status_code, body, last_err, next_mode_changed): - next_mode = attempt_steps[attempt + 1][0] - tag = f"fallback → {next_mode}" if next_mode != current_mode else f"retry {attempt + 1}/{len(attempt_steps)}" - print(f"[image gen {tag}] {last_err}", flush=True) - _time.sleep(_image_retry_delay(attempt, status_code, body, retry_after)) else: - break + out_bytes, effective_mode = _image_text_call( + full_prompt, + max_attempts=3, + image_model_preference=req.model, + size=req.size, + ) + except RuntimeError as e: + raise HTTPException(_image_error_status(e), f"image gen failed: {e}") - data_arr = resp_data.get("data", []) - if not data_arr: - raise HTTPException(503 if capacity_seen else 500, _image_failure_message("image gen", attempts_done, last_err, capacity_seen)) - - item = data_arr[0] - b64 = item.get("b64_json") - if b64: - out_bytes = b64lib.b64decode(b64) - elif item.get("url"): - with ai_http_client(timeout=IMAGE_REQUEST_TIMEOUT_SECONDS) as client: - image_resp = client.get(item["url"]) - image_resp.raise_for_status() - out_bytes = image_resp.content - else: - raise HTTPException(500, "image gen returned no b64_json") + model = effective_mode.rsplit(":", 1)[-1] if ":" in effective_mode else IMAGE_MODEL # 保存到本地 jobs//gen/_.jpg gen_dir = job_dir(job_id) / "gen" @@ -6999,7 +7122,7 @@ def _region_to_phrase(r: dict) -> str: @app.post("/jobs/{job_id}/frames/{idx}/cleanup", response_model=Job) def cleanup_frame(job_id: str, idx: int, req: CleanupReq | None = None) -> Job: - """调 gpt-image-2 image edit 清洗关键帧:去水印 / @用户名 / 字幕 / 平台 logo。 + """调当前图片模型 image edit 清洗关键帧:去水印 / @用户名 / 字幕 / 平台 logo。 输出干净版到 jobs//cleaned/.jpg,写回 frame.cleaned_url。 可选 region: 限定只清洗框内区域。""" import time as _time @@ -7038,10 +7161,9 @@ def cleanup_frame(job_id: str, idx: int, req: CleanupReq | None = None) -> Job: "hashtags, usernames, or platform logos. Keep the composition and style." ) - models = [GPT_IMAGE_MODEL] try: img_bytes, _mode = _image_edit_call( - frame_path, prompt, models=models, fallback_text=False, max_attempts=3, + frame_path, prompt, fallback_text=False, max_attempts=3, ) except RuntimeError as e: raise HTTPException(500, f"cleanup failed: {e}") @@ -7504,22 +7626,21 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J + "Do not create a plain background plate. Do not include SKG product unless the user prompt explicitly asks for it. " + "The output should be ready as a first/last frame for Seedance video generation, with stable composition, believable perspective, clear subject, no text, no watermark, no gore, no medical surgery imagery." ) - models = [GPT_IMAGE_MODEL] try: if req.asset_role == "scene": - img_bytes, _mode = _image_edit_call(model_src, prompt, models=models, fallback_text=False, max_attempts=3, max_side=1280) + img_bytes, _mode = _image_edit_call(model_src, prompt, fallback_text=False, max_attempts=3, max_side=1280) elif product_ref_paths: print( - f"[scene asset] role={req.asset_role} endpoint=/images/edits product_refs={len(product_ref_paths)} subject_refs=0 contact_sheet=0 model={GPT_IMAGE_MODEL}", + f"[scene asset] role={req.asset_role} endpoint=/images/edits product_refs={len(product_ref_paths)} subject_refs=0 contact_sheet=0 model={IMAGE_MODEL}", flush=True, ) - img_bytes, _mode = _image_edit_call(product_ref_paths, prompt, models=models, fallback_text=False, max_attempts=3, max_side=1600) + img_bytes, _mode = _image_edit_call(product_ref_paths, prompt, fallback_text=False, max_attempts=3, max_side=1600) else: print( - f"[scene asset] role={req.asset_role} endpoint=/images/generations product_refs=0 subject_refs=0 contact_sheet=0 model={GPT_IMAGE_MODEL}", + f"[scene asset] role={req.asset_role} endpoint=/images/generations product_refs=0 subject_refs=0 contact_sheet=0 model={IMAGE_MODEL}", flush=True, ) - img_bytes, _mode = _image_text_call(prompt, models=models, max_attempts=3) + img_bytes, _mode = _image_text_call(prompt, max_attempts=3) except RuntimeError as e: raise HTTPException(500, f"{req.asset_role} asset failed: {e}") finally: @@ -7564,7 +7685,7 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J @app.post("/jobs/{job_id}/frames/{idx}/elements/{element_id}/cutout", response_model=Job) def cutout_element(job_id: str, idx: int, element_id: str) -> Job: """AI 提取元素 · 每次累积一张新图: - 调 gpt-image-2 生成**完整、清晰**的元素图(即使原图只露出部分也补全)。 + 调当前图片模型生成**完整、清晰**的元素图(即使原图只露出部分也补全)。 region 元素:先把 region + 30% padding 区域裁出作为 focus,再发给模型聚焦补全。""" from PIL import Image as _PILImage import io as _io @@ -7627,12 +7748,11 @@ def cutout_element(job_id: str, idx: int, element_id: str) -> Job: "Preserve the element's original color palette, style, lighting character, and proportions. " "Output must be a clean, high-quality asset image suitable for downstream composition." ) - models = [GPT_IMAGE_MODEL] img_bytes: bytes try: try: img_bytes, _mode = _image_edit_call( - model_src, prompt, models=models, fallback_text=False, max_attempts=3, + model_src, prompt, fallback_text=False, max_attempts=3, ) except RuntimeError as e: raise HTTPException(500, f"extract failed: {e}") @@ -8116,7 +8236,7 @@ def _generate_subject_assets_sync(job_id: str, idx: int, element_id: str, req: G flush=True, ) img_bytes, _mode = _image_edit_call(model_src, prompt, models=models, fallback_text=False, max_attempts=3, max_side=1280, force_fallback_model=pack_force_fallback_model, image_model_preference=model_preference) - if model_preference == "auto" and _mode.endswith(f":{IMAGE_FALLBACK_MODEL}"): + if model_preference == "auto" and _image_mode_used_fallback(_mode): pack_force_fallback_model = True elif similar_mode: print( @@ -8124,13 +8244,13 @@ def _generate_subject_assets_sync(job_id: str, idx: int, element_id: str, req: G flush=True, ) img_bytes, _mode = _image_text_call(prompt, models=models, max_attempts=3, force_fallback_model=pack_force_fallback_model, image_model_preference=model_preference) - if model_preference == "auto" and _mode.endswith(f":{IMAGE_FALLBACK_MODEL}"): + if model_preference == "auto" and _image_mode_used_fallback(_mode): pack_force_fallback_model = True else: if model_src is None: raise RuntimeError("subject asset edit reference image missing") img_bytes, _mode = _image_edit_call(model_src, prompt, models=models, fallback_text=False, max_attempts=3, max_side=1280, force_fallback_model=pack_force_fallback_model, image_model_preference=model_preference) - if model_preference == "auto" and _mode.endswith(f":{IMAGE_FALLBACK_MODEL}"): + if model_preference == "auto" and _image_mode_used_fallback(_mode): pack_force_fallback_model = True except RuntimeError as e: if first_generation_error is None: @@ -9857,9 +9977,8 @@ def generate_product_angle_asset(job_id: str, req: GenerateProductAngleAssetReq) "If the target view is not fully visible in the source, infer the missing surfaces conservatively from the same product design without inventing a new model. " + (f"Additional operator note: {note}. " if note else "") ) - models = [GPT_IMAGE_MODEL] try: - img_bytes, _mode = _image_edit_call(source_paths, prompt, models=models, fallback_text=False, max_attempts=5, max_side=1600) + img_bytes, _mode = _image_edit_call(source_paths, prompt, fallback_text=False, max_attempts=5, max_side=1600) except RuntimeError as e: raise HTTPException(_image_error_status(e), f"product angle generation failed: {e}") asset_id = f"product_angle_{uuid.uuid4().hex[:10]}" @@ -10414,7 +10533,7 @@ def create_product_fusion_guide(job_id: str, req: ProductFusionShot) -> dict: "frame_idx": -1, "element_id": asset_id, "cutout_id": asset_id, - "label": f"产品融合引导图 · {req.image_model or 'gpt-image-2'}", + "label": f"产品融合引导图 · {req.image_model or IMAGE_MODEL}", } diff --git a/deploy/.env.local.example b/deploy/.env.local.example index 01579d3..14d9415 100644 --- a/deploy/.env.local.example +++ b/deploy/.env.local.example @@ -37,11 +37,19 @@ IMAGE_MODEL=gpt-image-2 IMAGE_REQUEST_TIMEOUT_SECONDS=60 IMAGE_FALLBACK_ENABLED=true IMAGE_FALLBACK_MODEL=gemini-3-pro-image-preview +IMAGE_FALLBACK_MODELS=gemini-3-pro-image-preview +IMAGE_EXTRA_MODELS= +# Optional JSON model overrides. Use api_key_env/base_url_env; do not put real keys in git. +# IMAGE_MODEL_CONFIGS_JSON={"custom-model":{"label":"Custom Image","base_url_env":"CUSTOM_IMAGE_BASE_URL","api_key_env":"CUSTOM_IMAGE_API_KEY","provider":"openai","sizes":["1024x1024"],"default_size":"1024x1024"}} IMAGE_CIRCUIT_FAILURE_THRESHOLD=2 IMAGE_CIRCUIT_COOLDOWN_SECONDS=600 GPT_IMAGE_MODEL=gpt-image-2 SUBJECT_ASSET_IMAGE_MODEL=gpt-image-2 SUBJECT_ASSET_IMAGE_MODELS=gpt-image-2,gemini-3-pro-image-preview +ARK_SEEDREAM_ENABLED=true +ARK_IMAGE_BASE_URL=https://ark.cn-beijing.volces.com/api/v3 +ARK_IMAGE_API_KEY= +ARK_SEEDREAM_IMAGE_MODEL=doubao-seedream-4-5-251128 AI_HTTP_PROXY= # Text/vision/audio model names diff --git a/deploy/.env.production.example b/deploy/.env.production.example index aaf8524..817ae93 100644 --- a/deploy/.env.production.example +++ b/deploy/.env.production.example @@ -65,11 +65,20 @@ IMAGE_MODEL=gpt-image-2 IMAGE_REQUEST_TIMEOUT_SECONDS=60 IMAGE_FALLBACK_ENABLED=true IMAGE_FALLBACK_MODEL=gemini-3-pro-image-preview +IMAGE_FALLBACK_MODELS=gemini-3-pro-image-preview +IMAGE_EXTRA_MODELS= +# Optional JSON model overrides. Use api_key_env/base_url_env; do not put real keys in git. +# IMAGE_MODEL_CONFIGS_JSON={"custom-model":{"label":"Custom Image","base_url_env":"CUSTOM_IMAGE_BASE_URL","api_key_env":"CUSTOM_IMAGE_API_KEY","provider":"openai","sizes":["1024x1024"],"default_size":"1024x1024"}} IMAGE_CIRCUIT_FAILURE_THRESHOLD=2 IMAGE_CIRCUIT_COOLDOWN_SECONDS=600 GPT_IMAGE_MODEL=gpt-image-2 SUBJECT_ASSET_IMAGE_MODEL=gpt-image-2 SUBJECT_ASSET_IMAGE_MODELS=gpt-image-2,gemini-3-pro-image-preview +# Optional Ark Seedream image channel. Keep the real key only in deploy/.env.production on the VPS. +ARK_SEEDREAM_ENABLED=true +ARK_IMAGE_BASE_URL=https://ark.cn-beijing.volces.com/api/v3 +ARK_IMAGE_API_KEY= +ARK_SEEDREAM_IMAGE_MODEL=doubao-seedream-4-5-251128 # Optional outbound proxy for AI gateway calls. Leave blank on normal VPS networking. AI_HTTP_PROXY= diff --git a/web/canvas-app/src/config/models.js b/web/canvas-app/src/config/models.js index 6a85923..e2fb4be 100644 --- a/web/canvas-app/src/config/models.js +++ b/web/canvas-app/src/config/models.js @@ -11,6 +11,16 @@ export const SEEDREAM_SIZE_OPTIONS = [ { label: '横图 3:2', key: '1536x1024' } ] +export const ARK_SEEDREAM_SIZE_OPTIONS = [ + { label: '自动 2K', key: '2K' }, + { label: '方图 2048', key: '2048x2048' }, + { label: '竖图 9:16', key: '1440x2560' }, + { label: '横图 16:9', key: '2560x1440' }, + { label: '方图 2160', key: '2160x2160' }, + { label: '竖图 4K', key: '2160x3840' }, + { label: '横图 4K', key: '3840x2160' } +] + // Kept for compatibility with upstream model helpers. export const SEEDREAM_4K_SIZE_OPTIONS = SEEDREAM_SIZE_OPTIONS @@ -67,6 +77,19 @@ export const IMAGE_MODELS = [ style: 'vivid' } }, + { + label: 'Seedream 4.5', + key: 'doubao-seedream-4-5-251128', + provider: ['chatfire'], + sizes: ARK_SEEDREAM_SIZE_OPTIONS.map(s => s.key), + sizeOptions: ARK_SEEDREAM_SIZE_OPTIONS, + qualities: SEEDREAM_QUALITY_OPTIONS, + defaultParams: { + size: '2048x2048', + quality: 'standard', + style: 'commercial' + } + }, ] @@ -121,7 +144,8 @@ export const IMAGE_SIZE_OPTIONS = [ { label: '自动', key: 'auto' }, { label: '竖图 2:3', key: '1024x1536' }, { label: '方图 1:1', key: '1024x1024' }, - { label: '横图 3:2', key: '1536x1024' } + { label: '横图 3:2', key: '1536x1024' }, + ...ARK_SEEDREAM_SIZE_OPTIONS ] // Image quality options | 图片质量选项 diff --git a/web/canvas-app/src/stores/models.js b/web/canvas-app/src/stores/models.js index c0beb02..dc6bcae 100644 --- a/web/canvas-app/src/stores/models.js +++ b/web/canvas-app/src/stores/models.js @@ -9,6 +9,7 @@ import { VIDEO_MODELS, CHAT_MODELS, SEEDREAM_SIZE_OPTIONS, + ARK_SEEDREAM_SIZE_OPTIONS, SEEDREAM_4K_SIZE_OPTIONS, SEEDREAM_QUALITY_OPTIONS, SEEDANCE_RESOLUTION_OPTIONS, @@ -68,6 +69,10 @@ export const getModelConfig = (modelKey) => { */ export const getModelSizeOptions = (modelKey, quality = 'standard') => { const model = IMAGE_MODELS.find(m => m.key === modelKey) + + if (model?.sizeOptions) { + return model.sizeOptions + } // If model has getSizesByQuality function, use it | 如果模型有 getSizesByQuality 函数,使用它 if (model?.getSizesByQuality) { @@ -208,6 +213,7 @@ export { // Export options | 导出选项 export { SEEDREAM_SIZE_OPTIONS, SEEDREAM_4K_SIZE_OPTIONS, SEEDREAM_QUALITY_OPTIONS, SEEDANCE_RESOLUTION_OPTIONS, VIDEO_RATIO_OPTIONS, VIDEO_DURATION_OPTIONS } +export { ARK_SEEDREAM_SIZE_OPTIONS } // Export state | 导出状态 export { loading, error } diff --git a/web/components/lightbox.tsx b/web/components/lightbox.tsx index e5b3ffc..86dfacd 100644 --- a/web/components/lightbox.tsx +++ b/web/components/lightbox.tsx @@ -354,7 +354,7 @@ const createFusionShots = (): ProductFusionShot[] => scene_image: null, action_text: fusionDescriptionForCharacter(DEFAULT_CHARACTER_ID, i), duration: 5, - image_model: "gpt-image-2", + image_model: "auto", video_model: "seedance", guide_image: null, })) diff --git a/web/lib/api.ts b/web/lib/api.ts index 5b23cea..8bcd58d 100644 --- a/web/lib/api.ts +++ b/web/lib/api.ts @@ -172,7 +172,7 @@ export interface ProductFusionShot { scene_image?: ImageRef | null action_text?: string duration?: number - image_model?: "gpt-image-2" + image_model?: string video_model?: "seedance" guide_image?: ImageRef | null } @@ -796,7 +796,7 @@ export type AssetSize = "source" | "1024" | "1536" | "2048" export type SubjectKind = "object" | "living" export type SubjectView = string export type SubjectAssetStatus = "queued" | "in_progress" | "completed" | "failed" -export type SubjectImageModelPreference = "auto" | "gpt-image-2" | "gemini-3-pro-image-preview" +export type SubjectImageModelPreference = "auto" | "gpt-image-2" | "gemini-3-pro-image-preview" | "doubao-seedream-4-5-251128" export type SubjectModelBundle = "gpt" | "gemini" export type SceneMode = "remove_subject" | "similar" | "style" export type SceneStyle = "source" | "premium_product" | "clean_studio" | "warm_lifestyle" | "cinematic"