feat: add subject image model controls

This commit is contained in:
2026-05-20 09:16:28 +08:00
parent b4a7968c1b
commit c245bff4b8
5 changed files with 226 additions and 16 deletions

View File

@@ -3547,8 +3547,24 @@ def _image_primary_circuit_open() -> bool:
return _image_circuit_snapshot()["primary_open"]
def _image_model_candidates(force_fallback: bool = False) -> list[str]:
def _normalize_image_model_preference(value: str | None) -> str:
raw = (value or "auto").strip().lower()
if raw in {"", "auto", "default"}:
return "auto"
if raw in {"gpt", "gpt-image", GPT_IMAGE_MODEL.lower()}:
return GPT_IMAGE_MODEL
if IMAGE_FALLBACK_MODEL and raw in {"gemini", IMAGE_FALLBACK_MODEL.lower()}:
return IMAGE_FALLBACK_MODEL
return "auto"
def _image_model_candidates(force_fallback: bool = False, preference: str | None = "auto") -> list[str]:
normalized = _normalize_image_model_preference(preference)
fallbacks = _image_fallback_models()
if normalized == GPT_IMAGE_MODEL:
return [GPT_IMAGE_MODEL]
if normalized == IMAGE_FALLBACK_MODEL and fallbacks:
return [IMAGE_FALLBACK_MODEL]
if not fallbacks:
return [GPT_IMAGE_MODEL]
if force_fallback or _image_primary_circuit_open():
@@ -3692,6 +3708,7 @@ def _image_edit_call(
max_attempts: int = 3,
max_side: int = 1024,
force_fallback_model: bool = False,
image_model_preference: str | None = "auto",
) -> tuple[bytes, str]:
"""通用 image edit 调用 · 失败重试 + 可选 text fallback。
返回 (image_bytes, effective_mode) where effective_mode in {"edit","text"}。
@@ -3709,7 +3726,7 @@ def _image_edit_call(
if not image_paths:
raise RuntimeError("image edit reference image missing")
img_bytes_list = [_prepare_image_edit_bytes(path, max_side) for path in image_paths]
model_candidates = _image_model_candidates(force_fallback=force_fallback_model)
model_candidates = _image_model_candidates(force_fallback=force_fallback_model, preference=image_model_preference)
mode_plan: list[str] = ["edit"] if model_candidates != [GPT_IMAGE_MODEL] else ["edit"] * max_attempts
if fallback_text:
mode_plan.append("text")
@@ -3803,6 +3820,7 @@ def _image_text_call(
models: list[str] | None = None,
max_attempts: int = 3,
force_fallback_model: bool = False,
image_model_preference: str | None = "auto",
) -> tuple[bytes, str]:
"""Text-only image generation. gpt-image-2 primary, Gemini only as outage fallback."""
import base64 as b64lib
@@ -3810,7 +3828,7 @@ def _image_text_call(
import httpx
if not IMAGE_API_KEY:
raise RuntimeError("IMAGE_API_KEY 或 LLM_API_KEY 未配置")
candidates = _image_model_candidates(force_fallback=force_fallback_model)
candidates = _image_model_candidates(force_fallback=force_fallback_model, preference=image_model_preference)
attempt_models = candidates if candidates != [GPT_IMAGE_MODEL] else [GPT_IMAGE_MODEL] * max_attempts
last_err = ""
capacity_seen = False
@@ -5004,6 +5022,7 @@ class GenerateSubjectAssetsReq(BaseModel):
reconstruction_mode: Literal["same", "similar"] = "same"
subject_profile: SubjectProfilePreference | None = None
prompt: str = ""
image_model_preference: str = "auto"
replace_views: bool = False
source_subject_brief: str = ""
pack_id: str = ""
@@ -5787,9 +5806,17 @@ def _generate_subject_assets_sync(job_id: str, idx: int, element_id: str, req: G
"Identity lock: these API calls generate one high-definition multi-view pack for ONE single subject, but each individual output file must show only its one requested view. "
"Before rendering, infer one consistent character bible from the supplied text brief and generation instructions: gender presentation, age range, body proportions, head shape, face direction cues, material, silhouette, wardrobe/material style, and commercial mood. "
"Keep that same character bible unchanged across every generated view in separate files. "
"By default, inherit the reference frames' broad gender presentation, regional/ethnic appearance category, skin-tone family, body-proportion category, and ad-role energy unless the user explicitly overrides them. "
"The pack must depict the same newly designed person or character in every view: same face design, same hair design, same body proportions, same skin tone, same age range, and same commercial styling. "
"If user direction requests a gender, age, or style change, apply that one change uniformly to all views; never mix male/female, young/old, or multiple style identities inside the same pack. "
"For transparent humanoids, keep the same transparent skin shell, skeleton proportions, visible spine/rib cage/pelvis/limb bones, and non-horror wellness character style in every view. "
)
wardrobe_lock_clause = (
"Wardrobe lock: choose one outfit bible before rendering and keep it identical across all views. "
"The same garment type, color palette, neckline, sleeve shape, straps, fabric/material, fit, seam logic, and visible accessories must remain consistent from front, side, three-quarter, and back views. "
"Do not change clothing between views; do not switch from sportswear to casualwear, dress, coat, hoodie, uniform, or underwear unless the user explicitly requests that single outfit for the whole pack. "
"If the reference outfit is useful, inherit its broad wardrobe category and color family, but redraw it as a new non-identical clean commercial outfit. "
)
neck_product_clause = (
"This subject pack is for SKG neck-and-shoulder wearable massage device videos. "
"Make the neck, collarbone, shoulder line, upper back, side neck, and shoulder slope clear and product-ready. "
@@ -5797,10 +5824,11 @@ def _generate_subject_assets_sync(job_id: str, idx: int, element_id: str, req: G
"For back and close-up views, prioritize the cervical spine, shoulder blades, upper trapezius, and clean wearable-device contact area. "
)
models = SUBJECT_ASSET_IMAGE_MODELS
model_preference = _normalize_image_model_preference(req.image_model_preference)
generated: list[SubjectAsset] = []
generation_errors: list[str] = []
first_generation_error: RuntimeError | None = None
pack_force_fallback_model = _image_primary_circuit_open()
pack_force_fallback_model = model_preference == "auto" and _image_primary_circuit_open()
try:
for view, view_label in _subject_view_labels(req.subject_kind, req.views):
closeup_view = view in {"bust", "back_detail", "bust_front", "bust_left_45", "bust_right_45", "back_neck_detail"} or "detail" in view
@@ -5845,6 +5873,7 @@ def _generate_subject_assets_sync(job_id: str, idx: int, element_id: str, req: G
+ single_view_clause
+ identity_clause
+ identity_lock_clause
+ wardrobe_lock_clause
+ neck_product_clause
+ canvas_clause
+ prompt_extra_clause
@@ -5861,17 +5890,17 @@ def _generate_subject_assets_sync(job_id: str, idx: int, element_id: str, req: G
try:
if similar_mode:
print(
f"[subject assets] reconstruction_mode=similar endpoint=/images/generations view={view} image_refs=0 model={'fallback' if pack_force_fallback_model else GPT_IMAGE_MODEL}",
f"[subject assets] reconstruction_mode=similar endpoint=/images/generations view={view} image_refs=0 model_preference={model_preference}",
flush=True,
)
img_bytes, _mode = _image_text_call(prompt, models=models, max_attempts=3, force_fallback_model=pack_force_fallback_model)
if _mode.endswith(f":{IMAGE_FALLBACK_MODEL}"):
img_bytes, _mode = _image_text_call(prompt, models=models, max_attempts=3, force_fallback_model=pack_force_fallback_model, image_model_preference=model_preference)
if model_preference == "auto" and _mode.endswith(f":{IMAGE_FALLBACK_MODEL}"):
pack_force_fallback_model = True
else:
if model_src is None:
raise RuntimeError("subject asset edit reference image missing")
img_bytes, _mode = _image_edit_call(model_src, prompt, models=models, fallback_text=False, max_attempts=3, max_side=1280, force_fallback_model=pack_force_fallback_model)
if _mode.endswith(f":{IMAGE_FALLBACK_MODEL}"):
img_bytes, _mode = _image_edit_call(model_src, prompt, models=models, fallback_text=False, max_attempts=3, max_side=1280, force_fallback_model=pack_force_fallback_model, image_model_preference=model_preference)
if model_preference == "auto" and _mode.endswith(f":{IMAGE_FALLBACK_MODEL}"):
pack_force_fallback_model = True
except RuntimeError as e:
if first_generation_error is None: