feat: add subject image model controls

2026-05-20 09:16:28 +08:00
parent b4a7968c1b
commit c245bff4b8
5 changed files with 226 additions and 16 deletions
--- a/api/main.py
+++ b/api/main.py
@@ -3547,8 +3547,24 @@ def _image_primary_circuit_open() -> bool:
    return _image_circuit_snapshot()["primary_open"]


-def _image_model_candidates(force_fallback: bool = False) -> list[str]:
+def _normalize_image_model_preference(value: str | None) -> str:
+    raw = (value or "auto").strip().lower()
+    if raw in {"", "auto", "default"}:
+        return "auto"
+    if raw in {"gpt", "gpt-image", GPT_IMAGE_MODEL.lower()}:
+        return GPT_IMAGE_MODEL
+    if IMAGE_FALLBACK_MODEL and raw in {"gemini", IMAGE_FALLBACK_MODEL.lower()}:
+        return IMAGE_FALLBACK_MODEL
+    return "auto"
+
+
+def _image_model_candidates(force_fallback: bool = False, preference: str | None = "auto") -> list[str]:
+    normalized = _normalize_image_model_preference(preference)
    fallbacks = _image_fallback_models()
+    if normalized == GPT_IMAGE_MODEL:
+        return [GPT_IMAGE_MODEL]
+    if normalized == IMAGE_FALLBACK_MODEL and fallbacks:
+        return [IMAGE_FALLBACK_MODEL]
    if not fallbacks:
        return [GPT_IMAGE_MODEL]
    if force_fallback or _image_primary_circuit_open():
@@ -3692,6 +3708,7 @@ def _image_edit_call(
    max_attempts: int = 3,
    max_side: int = 1024,
    force_fallback_model: bool = False,
+    image_model_preference: str | None = "auto",
 ) -> tuple[bytes, str]:
    """通用 image edit 调用 · 失败重试 + 可选 text fallback。
    返回 (image_bytes, effective_mode) where effective_mode in {"edit","text"}。
@@ -3709,7 +3726,7 @@ def _image_edit_call(
    if not image_paths:
        raise RuntimeError("image edit reference image missing")
    img_bytes_list = [_prepare_image_edit_bytes(path, max_side) for path in image_paths]
-    model_candidates = _image_model_candidates(force_fallback=force_fallback_model)
+    model_candidates = _image_model_candidates(force_fallback=force_fallback_model, preference=image_model_preference)
    mode_plan: list[str] = ["edit"] if model_candidates != [GPT_IMAGE_MODEL] else ["edit"] * max_attempts
    if fallback_text:
        mode_plan.append("text")
@@ -3803,6 +3820,7 @@ def _image_text_call(
    models: list[str] | None = None,
    max_attempts: int = 3,
    force_fallback_model: bool = False,
+    image_model_preference: str | None = "auto",
 ) -> tuple[bytes, str]:
    """Text-only image generation. gpt-image-2 primary, Gemini only as outage fallback."""
    import base64 as b64lib
@@ -3810,7 +3828,7 @@ def _image_text_call(
    import httpx
    if not IMAGE_API_KEY:
        raise RuntimeError("IMAGE_API_KEY 或 LLM_API_KEY 未配置")
-    candidates = _image_model_candidates(force_fallback=force_fallback_model)
+    candidates = _image_model_candidates(force_fallback=force_fallback_model, preference=image_model_preference)
    attempt_models = candidates if candidates != [GPT_IMAGE_MODEL] else [GPT_IMAGE_MODEL] * max_attempts
    last_err = ""
    capacity_seen = False
@@ -5004,6 +5022,7 @@ class GenerateSubjectAssetsReq(BaseModel):
    reconstruction_mode: Literal["same", "similar"] = "same"
    subject_profile: SubjectProfilePreference | None = None
    prompt: str = ""
+    image_model_preference: str = "auto"
    replace_views: bool = False
    source_subject_brief: str = ""
    pack_id: str = ""
@@ -5787,9 +5806,17 @@ def _generate_subject_assets_sync(job_id: str, idx: int, element_id: str, req: G
        "Identity lock: these API calls generate one high-definition multi-view pack for ONE single subject, but each individual output file must show only its one requested view. "
        "Before rendering, infer one consistent character bible from the supplied text brief and generation instructions: gender presentation, age range, body proportions, head shape, face direction cues, material, silhouette, wardrobe/material style, and commercial mood. "
        "Keep that same character bible unchanged across every generated view in separate files. "
+        "By default, inherit the reference frames' broad gender presentation, regional/ethnic appearance category, skin-tone family, body-proportion category, and ad-role energy unless the user explicitly overrides them. "
+        "The pack must depict the same newly designed person or character in every view: same face design, same hair design, same body proportions, same skin tone, same age range, and same commercial styling. "
        "If user direction requests a gender, age, or style change, apply that one change uniformly to all views; never mix male/female, young/old, or multiple style identities inside the same pack. "
        "For transparent humanoids, keep the same transparent skin shell, skeleton proportions, visible spine/rib cage/pelvis/limb bones, and non-horror wellness character style in every view. "
    )
+    wardrobe_lock_clause = (
+        "Wardrobe lock: choose one outfit bible before rendering and keep it identical across all views. "
+        "The same garment type, color palette, neckline, sleeve shape, straps, fabric/material, fit, seam logic, and visible accessories must remain consistent from front, side, three-quarter, and back views. "
+        "Do not change clothing between views; do not switch from sportswear to casualwear, dress, coat, hoodie, uniform, or underwear unless the user explicitly requests that single outfit for the whole pack. "
+        "If the reference outfit is useful, inherit its broad wardrobe category and color family, but redraw it as a new non-identical clean commercial outfit. "
+    )
    neck_product_clause = (
        "This subject pack is for SKG neck-and-shoulder wearable massage device videos. "
        "Make the neck, collarbone, shoulder line, upper back, side neck, and shoulder slope clear and product-ready. "
@@ -5797,10 +5824,11 @@ def _generate_subject_assets_sync(job_id: str, idx: int, element_id: str, req: G
        "For back and close-up views, prioritize the cervical spine, shoulder blades, upper trapezius, and clean wearable-device contact area. "
    )
    models = SUBJECT_ASSET_IMAGE_MODELS
+    model_preference = _normalize_image_model_preference(req.image_model_preference)
    generated: list[SubjectAsset] = []
    generation_errors: list[str] = []
    first_generation_error: RuntimeError | None = None
-    pack_force_fallback_model = _image_primary_circuit_open()
+    pack_force_fallback_model = model_preference == "auto" and _image_primary_circuit_open()
    try:
        for view, view_label in _subject_view_labels(req.subject_kind, req.views):
            closeup_view = view in {"bust", "back_detail", "bust_front", "bust_left_45", "bust_right_45", "back_neck_detail"} or "detail" in view
@@ -5845,6 +5873,7 @@ def _generate_subject_assets_sync(job_id: str, idx: int, element_id: str, req: G
                + single_view_clause
                + identity_clause
                + identity_lock_clause
+                + wardrobe_lock_clause
                + neck_product_clause
                + canvas_clause
                + prompt_extra_clause
@@ -5861,17 +5890,17 @@ def _generate_subject_assets_sync(job_id: str, idx: int, element_id: str, req: G
            try:
                if similar_mode:
                    print(
-                        f"[subject assets] reconstruction_mode=similar endpoint=/images/generations view={view} image_refs=0 model={'fallback' if pack_force_fallback_model else GPT_IMAGE_MODEL}",
+                        f"[subject assets] reconstruction_mode=similar endpoint=/images/generations view={view} image_refs=0 model_preference={model_preference}",
                        flush=True,
                    )
-                    img_bytes, _mode = _image_text_call(prompt, models=models, max_attempts=3, force_fallback_model=pack_force_fallback_model)
-                    if _mode.endswith(f":{IMAGE_FALLBACK_MODEL}"):
+                    img_bytes, _mode = _image_text_call(prompt, models=models, max_attempts=3, force_fallback_model=pack_force_fallback_model, image_model_preference=model_preference)
+                    if model_preference == "auto" and _mode.endswith(f":{IMAGE_FALLBACK_MODEL}"):
                        pack_force_fallback_model = True
                else:
                    if model_src is None:
                        raise RuntimeError("subject asset edit reference image missing")
-                    img_bytes, _mode = _image_edit_call(model_src, prompt, models=models, fallback_text=False, max_attempts=3, max_side=1280, force_fallback_model=pack_force_fallback_model)
-                    if _mode.endswith(f":{IMAGE_FALLBACK_MODEL}"):
+                    img_bytes, _mode = _image_edit_call(model_src, prompt, models=models, fallback_text=False, max_attempts=3, max_side=1280, force_fallback_model=pack_force_fallback_model, image_model_preference=model_preference)
+                    if model_preference == "auto" and _mode.endswith(f":{IMAGE_FALLBACK_MODEL}"):
                        pack_force_fallback_model = True
            except RuntimeError as e:
                if first_generation_error is None: