feat: add subject image model controls
This commit is contained in:
47
api/main.py
47
api/main.py
@@ -3547,8 +3547,24 @@ def _image_primary_circuit_open() -> bool:
|
||||
return _image_circuit_snapshot()["primary_open"]
|
||||
|
||||
|
||||
def _image_model_candidates(force_fallback: bool = False) -> list[str]:
|
||||
def _normalize_image_model_preference(value: str | None) -> str:
|
||||
raw = (value or "auto").strip().lower()
|
||||
if raw in {"", "auto", "default"}:
|
||||
return "auto"
|
||||
if raw in {"gpt", "gpt-image", GPT_IMAGE_MODEL.lower()}:
|
||||
return GPT_IMAGE_MODEL
|
||||
if IMAGE_FALLBACK_MODEL and raw in {"gemini", IMAGE_FALLBACK_MODEL.lower()}:
|
||||
return IMAGE_FALLBACK_MODEL
|
||||
return "auto"
|
||||
|
||||
|
||||
def _image_model_candidates(force_fallback: bool = False, preference: str | None = "auto") -> list[str]:
|
||||
normalized = _normalize_image_model_preference(preference)
|
||||
fallbacks = _image_fallback_models()
|
||||
if normalized == GPT_IMAGE_MODEL:
|
||||
return [GPT_IMAGE_MODEL]
|
||||
if normalized == IMAGE_FALLBACK_MODEL and fallbacks:
|
||||
return [IMAGE_FALLBACK_MODEL]
|
||||
if not fallbacks:
|
||||
return [GPT_IMAGE_MODEL]
|
||||
if force_fallback or _image_primary_circuit_open():
|
||||
@@ -3692,6 +3708,7 @@ def _image_edit_call(
|
||||
max_attempts: int = 3,
|
||||
max_side: int = 1024,
|
||||
force_fallback_model: bool = False,
|
||||
image_model_preference: str | None = "auto",
|
||||
) -> tuple[bytes, str]:
|
||||
"""通用 image edit 调用 · 失败重试 + 可选 text fallback。
|
||||
返回 (image_bytes, effective_mode) where effective_mode in {"edit","text"}。
|
||||
@@ -3709,7 +3726,7 @@ def _image_edit_call(
|
||||
if not image_paths:
|
||||
raise RuntimeError("image edit reference image missing")
|
||||
img_bytes_list = [_prepare_image_edit_bytes(path, max_side) for path in image_paths]
|
||||
model_candidates = _image_model_candidates(force_fallback=force_fallback_model)
|
||||
model_candidates = _image_model_candidates(force_fallback=force_fallback_model, preference=image_model_preference)
|
||||
mode_plan: list[str] = ["edit"] if model_candidates != [GPT_IMAGE_MODEL] else ["edit"] * max_attempts
|
||||
if fallback_text:
|
||||
mode_plan.append("text")
|
||||
@@ -3803,6 +3820,7 @@ def _image_text_call(
|
||||
models: list[str] | None = None,
|
||||
max_attempts: int = 3,
|
||||
force_fallback_model: bool = False,
|
||||
image_model_preference: str | None = "auto",
|
||||
) -> tuple[bytes, str]:
|
||||
"""Text-only image generation. gpt-image-2 primary, Gemini only as outage fallback."""
|
||||
import base64 as b64lib
|
||||
@@ -3810,7 +3828,7 @@ def _image_text_call(
|
||||
import httpx
|
||||
if not IMAGE_API_KEY:
|
||||
raise RuntimeError("IMAGE_API_KEY 或 LLM_API_KEY 未配置")
|
||||
candidates = _image_model_candidates(force_fallback=force_fallback_model)
|
||||
candidates = _image_model_candidates(force_fallback=force_fallback_model, preference=image_model_preference)
|
||||
attempt_models = candidates if candidates != [GPT_IMAGE_MODEL] else [GPT_IMAGE_MODEL] * max_attempts
|
||||
last_err = ""
|
||||
capacity_seen = False
|
||||
@@ -5004,6 +5022,7 @@ class GenerateSubjectAssetsReq(BaseModel):
|
||||
reconstruction_mode: Literal["same", "similar"] = "same"
|
||||
subject_profile: SubjectProfilePreference | None = None
|
||||
prompt: str = ""
|
||||
image_model_preference: str = "auto"
|
||||
replace_views: bool = False
|
||||
source_subject_brief: str = ""
|
||||
pack_id: str = ""
|
||||
@@ -5787,9 +5806,17 @@ def _generate_subject_assets_sync(job_id: str, idx: int, element_id: str, req: G
|
||||
"Identity lock: these API calls generate one high-definition multi-view pack for ONE single subject, but each individual output file must show only its one requested view. "
|
||||
"Before rendering, infer one consistent character bible from the supplied text brief and generation instructions: gender presentation, age range, body proportions, head shape, face direction cues, material, silhouette, wardrobe/material style, and commercial mood. "
|
||||
"Keep that same character bible unchanged across every generated view in separate files. "
|
||||
"By default, inherit the reference frames' broad gender presentation, regional/ethnic appearance category, skin-tone family, body-proportion category, and ad-role energy unless the user explicitly overrides them. "
|
||||
"The pack must depict the same newly designed person or character in every view: same face design, same hair design, same body proportions, same skin tone, same age range, and same commercial styling. "
|
||||
"If user direction requests a gender, age, or style change, apply that one change uniformly to all views; never mix male/female, young/old, or multiple style identities inside the same pack. "
|
||||
"For transparent humanoids, keep the same transparent skin shell, skeleton proportions, visible spine/rib cage/pelvis/limb bones, and non-horror wellness character style in every view. "
|
||||
)
|
||||
wardrobe_lock_clause = (
|
||||
"Wardrobe lock: choose one outfit bible before rendering and keep it identical across all views. "
|
||||
"The same garment type, color palette, neckline, sleeve shape, straps, fabric/material, fit, seam logic, and visible accessories must remain consistent from front, side, three-quarter, and back views. "
|
||||
"Do not change clothing between views; do not switch from sportswear to casualwear, dress, coat, hoodie, uniform, or underwear unless the user explicitly requests that single outfit for the whole pack. "
|
||||
"If the reference outfit is useful, inherit its broad wardrobe category and color family, but redraw it as a new non-identical clean commercial outfit. "
|
||||
)
|
||||
neck_product_clause = (
|
||||
"This subject pack is for SKG neck-and-shoulder wearable massage device videos. "
|
||||
"Make the neck, collarbone, shoulder line, upper back, side neck, and shoulder slope clear and product-ready. "
|
||||
@@ -5797,10 +5824,11 @@ def _generate_subject_assets_sync(job_id: str, idx: int, element_id: str, req: G
|
||||
"For back and close-up views, prioritize the cervical spine, shoulder blades, upper trapezius, and clean wearable-device contact area. "
|
||||
)
|
||||
models = SUBJECT_ASSET_IMAGE_MODELS
|
||||
model_preference = _normalize_image_model_preference(req.image_model_preference)
|
||||
generated: list[SubjectAsset] = []
|
||||
generation_errors: list[str] = []
|
||||
first_generation_error: RuntimeError | None = None
|
||||
pack_force_fallback_model = _image_primary_circuit_open()
|
||||
pack_force_fallback_model = model_preference == "auto" and _image_primary_circuit_open()
|
||||
try:
|
||||
for view, view_label in _subject_view_labels(req.subject_kind, req.views):
|
||||
closeup_view = view in {"bust", "back_detail", "bust_front", "bust_left_45", "bust_right_45", "back_neck_detail"} or "detail" in view
|
||||
@@ -5845,6 +5873,7 @@ def _generate_subject_assets_sync(job_id: str, idx: int, element_id: str, req: G
|
||||
+ single_view_clause
|
||||
+ identity_clause
|
||||
+ identity_lock_clause
|
||||
+ wardrobe_lock_clause
|
||||
+ neck_product_clause
|
||||
+ canvas_clause
|
||||
+ prompt_extra_clause
|
||||
@@ -5861,17 +5890,17 @@ def _generate_subject_assets_sync(job_id: str, idx: int, element_id: str, req: G
|
||||
try:
|
||||
if similar_mode:
|
||||
print(
|
||||
f"[subject assets] reconstruction_mode=similar endpoint=/images/generations view={view} image_refs=0 model={'fallback' if pack_force_fallback_model else GPT_IMAGE_MODEL}",
|
||||
f"[subject assets] reconstruction_mode=similar endpoint=/images/generations view={view} image_refs=0 model_preference={model_preference}",
|
||||
flush=True,
|
||||
)
|
||||
img_bytes, _mode = _image_text_call(prompt, models=models, max_attempts=3, force_fallback_model=pack_force_fallback_model)
|
||||
if _mode.endswith(f":{IMAGE_FALLBACK_MODEL}"):
|
||||
img_bytes, _mode = _image_text_call(prompt, models=models, max_attempts=3, force_fallback_model=pack_force_fallback_model, image_model_preference=model_preference)
|
||||
if model_preference == "auto" and _mode.endswith(f":{IMAGE_FALLBACK_MODEL}"):
|
||||
pack_force_fallback_model = True
|
||||
else:
|
||||
if model_src is None:
|
||||
raise RuntimeError("subject asset edit reference image missing")
|
||||
img_bytes, _mode = _image_edit_call(model_src, prompt, models=models, fallback_text=False, max_attempts=3, max_side=1280, force_fallback_model=pack_force_fallback_model)
|
||||
if _mode.endswith(f":{IMAGE_FALLBACK_MODEL}"):
|
||||
img_bytes, _mode = _image_edit_call(model_src, prompt, models=models, fallback_text=False, max_attempts=3, max_side=1280, force_fallback_model=pack_force_fallback_model, image_model_preference=model_preference)
|
||||
if model_preference == "auto" and _mode.endswith(f":{IMAGE_FALLBACK_MODEL}"):
|
||||
pack_force_fallback_model = True
|
||||
except RuntimeError as e:
|
||||
if first_generation_error is None:
|
||||
|
||||
Reference in New Issue
Block a user