auto-save 2026-05-18 01:07 (~8)

2026-05-18 01:07:51 +08:00
parent 4c43d89346
commit 7ca5a95a5e
8 changed files with 43 additions and 41 deletions
--- a/api/main.py
+++ b/api/main.py
@@ -2851,7 +2851,7 @@ def health() -> dict:
            "vision": VISION_MODEL,
            "image": IMAGE_MODEL,
            "image_base_url": IMAGE_BASE_URL or LLM_BASE_URL or "openai-default",
-            "image_fallbacks": list(dict.fromkeys([IMAGE_MODEL, GPT_IMAGE_MODEL, "gpt-image-1.5"])),
+            "image_fallbacks": [GPT_IMAGE_MODEL],
            "subject_image": SUBJECT_ASSET_IMAGE_MODEL,
            "subject_image_fallbacks": SUBJECT_ASSET_IMAGE_MODELS,
            "voice_provider": VOICE_PROVIDER,
@@ -3109,7 +3109,7 @@ class GenerateReq(BaseModel):
    prompt: str
    extra_prompt: str = ""        # ✓ 需要的元素（正向）
    negative_prompt: str = ""     # ✗ 不需要的元素（负向）
-    model: str = ""  # 留空用 IMAGE_MODEL 默认
+    model: str = ""  # 兼容旧前端字段；服务端强制使用 gpt-image-2
    mode: str = "edit"  # "edit" 带参考图，"text" 纯文字
    from_selected: bool = False   # True 时优先用 frame.selected 的生成图作 reference（迭代），否则原关键帧

@@ -3148,7 +3148,7 @@ def generate_image(job_id: str, idx: int, req: GenerateReq) -> Job:
    if not IMAGE_API_KEY:
        raise HTTPException(503, "IMAGE_API_KEY 或 LLM_API_KEY 未配置")

-    model = req.model or IMAGE_MODEL
+    model = GPT_IMAGE_MODEL
    gen_id = uuid.uuid4().hex[:12]

    import base64 as b64lib
@@ -3392,7 +3392,7 @@ def _region_to_phrase(r: dict) -> str:

@app.post("/jobs/{job_id}/frames/{idx}/cleanup", response_model=Job)
 def cleanup_frame(job_id: str, idx: int, req: CleanupReq | None = None) -> Job:
-    """调 nano-banana image edit 清洗关键帧：去水印 / @用户名 / 字幕 / 平台 logo。
+    """调 gpt-image-2 image edit 清洗关键帧：去水印 / @用户名 / 字幕 / 平台 logo。
    输出干净版到 jobs/<id>/cleaned/<idx>.jpg，写回 frame.cleaned_url。
    可选 region: 限定只清洗框内区域。"""
    import time as _time
@@ -3431,12 +3431,7 @@ def cleanup_frame(job_id: str, idx: int, req: CleanupReq | None = None) -> Job:
            "hashtags, usernames, or platform logos. Keep the composition and style."
        )

-    # 模型轮换：nano-banana-pro 失败时换 flash 系列
-    models = [
-        IMAGE_MODEL,                          # gemini-3-pro-image-preview (nano-banana-pro)
-        "gemini-3.1-flash-image-preview",
-        "gemini-2.5-flash-image",
-    ]
+    models = [GPT_IMAGE_MODEL]
    try:
        img_bytes, _mode = _image_edit_call(
            frame_path, prompt, models=models, fallback_text=False, max_attempts=3,
@@ -3812,7 +3807,7 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
            + "Do not create a plain background plate. Do not remove the character. Do not include SKG product unless the user prompt explicitly asks for it. "
            + "The output should be ready as a first/last frame for Seedance video generation, with stable composition, believable perspective, clear subject, no text, no watermark, no gore, no medical surgery imagery."
        )
-    models = [IMAGE_MODEL, "gemini-3.1-flash-image-preview", "gemini-2.5-flash-image"]
+    models = [GPT_IMAGE_MODEL]
    try:
        if req.asset_role == "scene":
            img_bytes, _mode = _image_edit_call(model_src, prompt, models=models, fallback_text=False, max_attempts=3, max_side=1280)
@@ -3862,7 +3857,7 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
@app.post("/jobs/{job_id}/frames/{idx}/elements/{element_id}/cutout", response_model=Job)
 def cutout_element(job_id: str, idx: int, element_id: str) -> Job:
    """AI 提取元素 · 每次累积一张新图：
-    调 nano-banana 模型生成**完整、清晰**的元素图（即使原图只露出部分也补全）。
+    调 gpt-image-2 生成**完整、清晰**的元素图（即使原图只露出部分也补全）。
    region 元素：先把 region + 30% padding 区域裁出作为 focus，再发给模型聚焦补全。"""
    from PIL import Image as _PILImage
    import io as _io
@@ -3925,7 +3920,7 @@ def cutout_element(job_id: str, idx: int, element_id: str) -> Job:
        "Preserve the element's original color palette, style, lighting character, and proportions. "
        "Output must be a clean, high-quality asset image suitable for downstream composition."
    )
-    models = [IMAGE_MODEL, "gemini-2.5-flash-image"]
+    models = [GPT_IMAGE_MODEL]
    img_bytes: bytes
    try:
        try:
@@ -4013,7 +4008,7 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
        "If user direction requests a gender, age, or style change, apply that one change uniformly to all views; never mix male/female, young/old, or multiple style identities inside the same six-view pack. "
        "For transparent humanoids, keep the same transparent skin shell, skeleton proportions, visible spine/rib cage/pelvis/limb bones, and non-horror wellness character style in every view. "
    )
-    models = SUBJECT_ASSET_IMAGE_MODELS
+    models = [GPT_IMAGE_MODEL]
    generated: list[SubjectAsset] = []
    try:
        for view, view_label in _subject_view_labels(req.subject_kind, req.views):
@@ -5019,7 +5014,7 @@ def generate_product_angle_asset(job_id: str, req: GenerateProductAngleAssetReq)
        "If the target view is not fully visible in the source, infer the missing surfaces conservatively from the same product design without inventing a new model. "
        + (f"Additional operator note: {note}. " if note else "")
    )
-    models = [IMAGE_MODEL, "gemini-3.1-flash-image-preview", "gemini-2.5-flash-image"]
+    models = [GPT_IMAGE_MODEL]
    try:
        img_bytes, _mode = _image_edit_call(source_path, prompt, models=models, fallback_text=False, max_attempts=3, max_side=1280)
    except RuntimeError as e: