diff --git a/api/main.py b/api/main.py
index b1a0215..ff1f1ab 100644
--- a/api/main.py
+++ b/api/main.py
@@ -339,6 +339,7 @@ class StoryboardScene(BaseModel):
visual_mode: Literal["person_only", "person_product", "product_only", "environment"] = "person_product"
needs_product: bool = True
needs_subject: bool = True
+ subject_brief: str = ""
first_frame_plan: str = ""
last_frame_plan: str = ""
product_placement: str = ""
@@ -532,6 +533,7 @@ class KeyElement(BaseModel):
cutout_background: Literal["white", "black"] = "white"
subject_kind: SubjectKind = "object"
subject_assets: list[SubjectAsset] = Field(default_factory=list)
+ subject_consensus_brief: str = ""
created_at: float = 0.0
@@ -3014,6 +3016,19 @@ def _describe_subject_template_from_images(name: str, subject_style: str, image_
return _vision_brief_from_images(image_paths, prompt, max_images=10)
+def _describe_subject_consensus_from_images(name: str, subject_style: str, image_paths: list[Path], note: str = "") -> str:
+ prompt = (
+ f"You are extracting the stable character bible from a generated SKG subject view pack named '{name}'. "
+ f"Subject style: {subject_style}. User/profile note: {note[:700]}. "
+ "These images are multiple views of ONE generated subject. Summarize the reusable identity as text for future first/last-frame generation. "
+ "Do NOT identify a real person and do NOT mention exact facial identity. "
+ "Output strict JSON only with keys: gender_presentation, age_range, body_proportion, hair, skin_tone, "
+ "wardrobe_or_material_style, pose_language, camera_readability, neck_shoulder_readiness, commercial_mood, brief. "
+ "The brief should be 90-160 words, describe one consistent subject, and explicitly allow new poses, new framing, new expressions, and new environments while preserving identity, proportions, material/style, and ad role."
+ )
+ return _vision_brief_from_images(image_paths, prompt, max_images=10)
+
+
# ---------- API 路由 ----------
class CreateJobReq(BaseModel):
@@ -3934,6 +3949,7 @@ class UpdateElementReq(BaseModel):
name_zh: str | None = None
name_en: str | None = None
position: str | None = None
+ subject_consensus_brief: str | None = None
class GenerateSceneAssetReq(BaseModel):
@@ -3943,6 +3959,7 @@ class GenerateSceneAssetReq(BaseModel):
scene_style: SceneStyle = "source"
asset_role: SceneAssetRole = "scene"
prompt: str = ""
+ subject_brief: str = ""
source_frame_indices: list[int] | None = None
subject_images: list[dict] = Field(default_factory=list)
product_images: list[dict] = Field(default_factory=list)
@@ -4107,6 +4124,8 @@ def update_element(job_id: str, idx: int, element_id: str, req: UpdateElementReq
e.name_en = req.name_en.strip()
if req.position is not None:
e.position = req.position.strip()
+ if req.subject_consensus_brief is not None:
+ e.subject_consensus_brief = req.subject_consensus_brief.strip()[:2200]
new_frames.append(f)
if not found:
raise HTTPException(404, "element not found")
@@ -4161,20 +4180,14 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
source_indices = list(dict.fromkeys(source_indices))[:8]
model_src = src
sheet_tmp: Path | None = None
- asset_sheet_tmp: Path | None = None
- if len(source_indices) > 1:
+ if req.asset_role == "scene" and len(source_indices) > 1:
sheet_tmp = job_dir(job_id) / "tmp" / f"scene_refs_{idx:03d}_{uuid.uuid4().hex[:6]}.jpg"
sheet = _make_reference_contact_sheet(job_id, source_indices, sheet_tmp)
if sheet:
model_src = sheet
- subject_ref_paths = [p for p in (storyboard_ref_path(job_id, r) for r in req.subject_images[:8]) if p and p.exists()]
- product_ref_paths = [p for p in (storyboard_ref_path(job_id, r) for r in req.product_images[:6]) if p and p.exists()]
- asset_ref_paths = [*subject_ref_paths, *product_ref_paths]
- if req.asset_role != "scene" and asset_ref_paths:
- asset_sheet_tmp = job_dir(job_id) / "tmp" / f"endpoint_refs_{idx:03d}_{uuid.uuid4().hex[:6]}.jpg"
- asset_sheet = _make_paths_contact_sheet(asset_ref_paths, asset_sheet_tmp, max_items=10)
- if asset_sheet:
- model_src = asset_sheet
+ # Endpoint frames deliberately ignore subject image references. Character identity comes
+ # from subject_brief text, while only 1-2 product images remain hard visual truth.
+ product_ref_paths = [p for p in (storyboard_ref_path(job_id, r) for r in req.product_images[:2]) if p and p.exists()]
confirmed_subjects = [
(e.name_en or e.name_zh).strip()
@@ -4195,12 +4208,13 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
if confirmed_subjects
else "Remove the main foreground subject from the frame if present. "
)
- identity_clause = (
- f"Use the generated subject asset references as the primary character identity lock ({len(subject_ref_paths)} image(s)); preserve the subject type, material, proportions, style, age/gender presentation, pose vocabulary, and ad-friendly identity exactly as shown in those selected views. "
- if subject_ref_paths
- else (
- "No generated subject reference was provided for this endpoint. Do not add a main character unless the user scene direction explicitly asks for one. "
- )
+ subject_brief = req.subject_brief.strip()
+ subject_brief_clause = (
+ f"Subject identity (text only, no image reference): {subject_brief[:1800]}. "
+ "Maintain this identity across this and other endpoint frames in the same storyboard. "
+ "Vary pose, framing, expression, gesture, camera distance, and environment freely according to the user prompt; do not fall back to any specific reference photo or ID-card pose. "
+ if subject_brief
+ else "No subject identity brief was provided. Do not add a main character unless the user scene direction explicitly asks for one. "
)
mode_clause = {
"remove_subject": (
@@ -4229,9 +4243,14 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
if user_prompt
else ""
)
- if req.asset_role != "scene" and asset_ref_paths:
+ if req.asset_role != "scene" and product_ref_paths:
reference_clause = (
- f"Use the provided asset contact sheet as the primary visual reference: {len(subject_ref_paths)} generated subject image(s) and {len(product_ref_paths)} SKG product image(s). "
+ f"Use the provided {len(product_ref_paths)} SKG product image(s) only as rigid product reference. "
+ "Do not use the original keyframe as the first/last-frame truth; it is only a storage anchor for this row. No subject image reference is attached. "
+ )
+ elif req.asset_role != "scene":
+ reference_clause = (
+ "No image reference is attached for this endpoint frame. Generate from text only. "
"Do not use the original keyframe as the first/last-frame truth; it is only a storage anchor for this row. "
)
else:
@@ -4241,18 +4260,14 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
else "Use the provided frame as the primary visual reference. "
)
product_asset_clause = (
- "Use the provided SKG product references as the rigid product truth when the user prompt asks for product presence: a white U-shaped neck-and-shoulder wearable massage device worn around the neck/shoulders, not headphones, a collar pillow, skincare, food, or a medical prop. Keep product scale believable, preserve left/right asymmetry, side thickness, inner contact pads, buttons, white material, and real wearable placement. "
+ "The provided product image(s) are the only product truth. The product is a white U-shaped neck-and-shoulder wearable massage device worn around the neck/shoulders, not headphones, a collar pillow, skincare, food, or a medical prop. Do not vary left/right asymmetry, button placement, contact pad position, side thickness, opening direction, inner/outer shell relationship, or wearable scale relative to the human neck. Preserve all structural details exactly while integrating it into the new scene. "
if product_ref_paths
else "Do not invent a random product. Only include an SKG product if the user prompt explicitly asks for it. "
)
subject_asset_clause = (
- TRANSPARENT_HUMAN_POSITIVE_PROMPT + " "
- + TRANSPARENT_HUMAN_NEGATIVE_PROMPT + " "
- + "If the selected subject references are transparent humanoid assets, keep the same friendly transparent or translucent human character: glass/acrylic/vinyl-like transparent outer body, visible clean white skeleton inside, clean commercial wellness style, non-horror. "
- + "If the selected subject references are normal actor assets, keep them as a normal believable commercial actor and do not convert them into a transparent skeleton. "
- + "Use the selected subject views only to understand identity, proportions, material, pose vocabulary, camera language, and lighting; do not copy watermarks, subtitles, platform UI, logos, or accidental artifacts. "
- if subject_ref_paths
- else "No main character should be generated unless the user scene direction explicitly requires one; product-only and environment-only frames should stay product-only or scene-only. "
+ (TRANSPARENT_HUMAN_POSITIVE_PROMPT + " " + TRANSPARENT_HUMAN_NEGATIVE_PROMPT + " ")
+ if subject_brief and ("透明" in subject_brief or "transparent" in subject_brief.lower() or "skeleton" in subject_brief.lower())
+ else ""
)
if req.asset_role == "scene":
prompt = (
@@ -4275,7 +4290,7 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
prompt = (
"Create one premium 9:16 high-definition video endpoint frame from text direction. "
+ role_clause
- + identity_clause
+ + subject_brief_clause
+ reference_clause
+ user_prompt_clause
+ style_clause + " "
@@ -4288,9 +4303,17 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
try:
if req.asset_role == "scene":
img_bytes, _mode = _image_edit_call(model_src, prompt, models=models, fallback_text=False, max_attempts=3, max_side=1280)
- elif asset_ref_paths:
- img_bytes, _mode = _image_edit_call(model_src, prompt, models=models, fallback_text=False, max_attempts=3, max_side=1600)
+ elif product_ref_paths:
+ print(
+ f"[scene asset] role={req.asset_role} endpoint=/images/edits product_refs={len(product_ref_paths)} subject_refs=0 contact_sheet=0 model={GPT_IMAGE_MODEL}",
+ flush=True,
+ )
+ img_bytes, _mode = _image_edit_call(product_ref_paths, prompt, models=models, fallback_text=False, max_attempts=3, max_side=1600)
else:
+ print(
+ f"[scene asset] role={req.asset_role} endpoint=/images/generations product_refs=0 subject_refs=0 contact_sheet=0 model={GPT_IMAGE_MODEL}",
+ flush=True,
+ )
img_bytes, _mode = _image_text_call(prompt, models=models, max_attempts=3)
except RuntimeError as e:
raise HTTPException(500, f"{req.asset_role} asset failed: {e}")
@@ -4298,9 +4321,6 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
if sheet_tmp and sheet_tmp.exists():
try: sheet_tmp.unlink()
except OSError: pass
- if asset_sheet_tmp and asset_sheet_tmp.exists():
- try: asset_sheet_tmp.unlink()
- except OSError: pass
asset_id = f"scene_{idx:03d}_{uuid.uuid4().hex[:8]}"
out_path = job_dir(job_id) / "assets" / f"{asset_id}.jpg"
@@ -4451,6 +4471,7 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
similar_mode = req.reconstruction_mode == "similar"
character_reference_paths: list[Path] = []
template_brief_clause = ""
+ selected_template_brief = ""
character_label = ""
subject_template_id = (req.subject_template_id or "").strip()
character_id = (req.character_id or "").strip()
@@ -4462,6 +4483,7 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
brief = template.prompt_brief.strip() or template.note.strip() or template.description.strip()
if similar_mode and not brief:
brief = _describe_subject_template_from_images(template.name, template.subject_style, template_paths, template.note)
+ selected_template_brief = brief.strip()
template_brief_clause = (
f"Reference character brief from saved database template '{template.name}': {brief}. "
"Use this as a high-quality creative direction and identity bible only; do not copy a face, exact pose, pixels, file artifacts, labels, or accidental defects. "
@@ -4474,6 +4496,7 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
character_label = character.name
character_reference_paths.extend(character_library_file(image.filename) for image in character.images[:7])
brief = character.prompt_brief.strip() or character.description.strip()
+ selected_template_brief = brief.strip()
template_brief_clause = (
f"Reference character brief from built-in creative character '{character.name}': {brief}. "
"Use this planned character brief as a high-quality creative direction and anatomy/style bible only; "
@@ -4672,7 +4695,36 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
if old_asset.view in replaced_views:
_delete_subject_asset_file(job_id, old_asset.id)
current_assets = [asset for asset in current_assets if asset.view not in replaced_views]
- e.subject_assets = current_assets + generated
+ final_assets = current_assets + generated
+ e.subject_assets = final_assets
+ if req.subject_kind == "living":
+ current_brief = (e.subject_consensus_brief or "").strip()
+ should_refresh_brief = bool(selected_template_brief) or not current_brief or len(generated) >= 3
+ if should_refresh_brief:
+ fallback_parts = [
+ selected_template_brief,
+ (req.subject_profile.resolved_summary if req.subject_profile else ""),
+ source_subject_brief,
+ prompt_extra,
+ ]
+ fallback_brief = " ".join(part.strip() for part in fallback_parts if part and part.strip())[:1800]
+ if selected_template_brief:
+ e.subject_consensus_brief = selected_template_brief[:1800]
+ else:
+ asset_paths = [
+ job_dir(job_id) / "assets" / f"{asset.id}.jpg"
+ for asset in final_assets[:10]
+ if asset.id
+ ]
+ brief = _describe_subject_consensus_from_images(
+ e.name_zh or e.name_en or "generated subject",
+ req.subject_style,
+ asset_paths,
+ fallback_brief,
+ )
+ e.subject_consensus_brief = brief or current_brief or fallback_brief or (
+ "Generated SKG ad subject; identity brief unavailable. Keep one consistent commercial subject with clear neck and shoulder placement area."
+ )
new_frames.append(f)
if generation_errors:
msg = f"主体资产包部分生成完成 · {el.name_zh} · {len(generated)} 张,失败 {len(generation_errors)} 张"
diff --git a/docs/source-analysis.html b/docs/source-analysis.html
index 5d38c7c..8fb1e54 100644
--- a/docs/source-analysis.html
+++ b/docs/source-analysis.html
@@ -593,7 +593,7 @@
web/next.config.mjs
Next.js 构建配置:静态导出、图片不走优化、禁用开发环境左下角 Next Dev Indicator,并移除 Next 16 已不支持的 eslint 顶层配置,避免本地 dev 出现配置 Issue 提示。