fix: isolate subject reference generation

2026-05-20 11:39:33 +08:00
parent e64bf40267
commit 7acbfd5214
3 changed files with 160 additions and 57 deletions
--- a/api/main.py
+++ b/api/main.py
@@ -5547,7 +5547,7 @@ def _subject_assets_background_worker(
    req: GenerateSubjectAssetsReq,
    queued: list[tuple[SubjectView, str, str]],
 ) -> None:
-    if req.reconstruction_mode == "similar" and not req.source_subject_brief.strip():
+    if not req.source_subject_brief.strip() and _subject_source_indices(req, idx):
        try:
            req.source_subject_brief = _describe_source_subject(job_id, _subject_source_indices(req, idx))
        except Exception as e:
@@ -5738,21 +5738,35 @@ def _generate_subject_assets_sync(job_id: str, idx: int, element_id: str, req: G
    frame_reference_paths = [p for p in (_source_frame_path(job_id, i) for i in source_indices) if p.exists()]
    source_subject_brief = (
        _ensure_english(req.source_subject_brief.strip())
-        if similar_mode and req.source_subject_brief.strip()
-        else (_describe_source_subject(job_id, source_indices) if similar_mode else "")
+        if req.source_subject_brief.strip()
+        else (_describe_source_subject(job_id, source_indices) if source_indices else "")
    )
    source_subject_clause = (
        f"Source video role brief from selected keyframes: {source_subject_brief}. "
-        "Use this brief to preserve role category, creator-ad energy, camera readability, and broad styling, while creating a new non-identical subject. "
+        + (
+            "Use this brief as secondary text evidence while preserving the same visible source subject from the attached reference image(s). "
+            if req.reconstruction_mode == "same"
+            else "Use this brief to preserve role category, creator-ad energy, camera readability, and broad styling, while creating a new non-identical subject. "
+        )
        if source_subject_brief else
-        "Source video role brief unavailable; create a new non-identical ad subject guided by the user direction, template brief, and requested view. "
+        (
+            "Source video role brief unavailable; use the attached source reference image(s) as primary evidence for the same visible subject. "
+            if req.reconstruction_mode == "same"
+            else "Source video role brief unavailable; create a new non-identical ad subject guided by the user direction, template brief, and requested view. "
+        )
    )
-    if not similar_mode:
+    if similar_mode:
+        if character_reference_paths:
+            remaining = max(0, 10 - len(character_reference_paths))
+            model_src = character_reference_paths + frame_reference_paths[:remaining]
+        elif frame_reference_paths:
+            model_src = frame_reference_paths[:10]
+    else:
        model_src, tmp_focus = _focus_source_for_element(job_id, idx, el)
        if character_reference_paths:
            remaining = max(0, 10 - len(character_reference_paths))
            model_src = character_reference_paths + frame_reference_paths[:remaining]
-        elif len(frame_reference_paths) > 1:
+        elif frame_reference_paths:
            model_src = frame_reference_paths[:10]

    try:
@@ -5823,14 +5837,25 @@ def _generate_subject_assets_sync(job_id: str, idx: int, element_id: str, req: G
        "If the reference outfit is useful, inherit its broad wardrobe category and color family, but redraw it as a new non-identical clean commercial outfit. "
    )
    pack_bible_clause = (
-        "PACK BIBLE - this exact bible applies to every view in this generated set. "
-        "Subject bible: one newly designed commercial wellness-ad subject; inherit only broad non-identifying casting traits from the source such as gender presentation, regional/ethnic appearance category, skin-tone family, age range, body-proportion category, hair-length family, posture energy, and neck/shoulder readability. "
-        "Do not copy the source person's biometric identity, exact face, exact hairstyle, marks, tattoos, captions, logos, or watermarks. "
-        "Keep the same new face design, same head shape, same hair color and hair silhouette, same skin tone, same body proportions, same height impression, and same character age across front, side, three-quarter, and back views. "
-        "Wardrobe bible: if the user direction names a specific outfit, use that one outfit uniformly across every view. Otherwise use one clean SKG wellness-ad activewear outfit for the entire pack: fitted short-sleeve performance top with a visible neck/collarbone area, slim athletic pants, and low-profile sneakers. "
-        "Lock the exact top color, bottom color, shoe color, neckline shape, sleeve/strap structure, seams, trim, fabric finish, fit, and accessories before rendering the first view, then repeat those same clothing decisions in every other view. "
-        "Never add or remove a jacket, blazer, hoodie, coat, dress, skirt, scarf, hat, bag, jewelry, logo, stripe pattern, or extra layer in only one view. "
-        "Back and side views must show the same garment wrapping around the same body, not a redesigned outfit. "
+        (
+            "PACK BIBLE - source-locked mode. "
+            "Subject bible: use the attached source frame(s) as the primary identity and wardrobe reference for one same visible subject. "
+            "Preserve the visible gender presentation, regional/ethnic appearance category, skin-tone family, age range impression, body-proportion category, hair length/color/silhouette, face-structure impression, posture energy, neck/shoulder readability, outfit category, garment colors, material finish, and accessory logic across every generated view. "
+            "Do not replace the source subject with a different actor, different body type, different ethnicity, different gender, different hairstyle, different outfit, or generic wellness model. "
+            "Remove only source-video artifacts such as background, captions, watermarks, platform UI, compression noise, and accidental occlusion; redraw missing angles as the same subject. "
+            "Lock the exact top color, bottom color, shoe color, neckline shape, sleeve/strap structure, seams, trim, fabric finish, fit, and accessories before rendering the first view, then repeat those same clothing decisions in every other view. "
+        )
+        if req.reconstruction_mode == "same" else
+        (
+            "PACK BIBLE - this exact bible applies to every view in this generated set. "
+            "Subject bible: one newly designed commercial wellness-ad subject; inherit only broad non-identifying casting traits from the source such as gender presentation, regional/ethnic appearance category, skin-tone family, age range, body-proportion category, hair-length family, posture energy, and neck/shoulder readability. "
+            "Do not copy the source person's biometric identity, exact face, exact hairstyle, marks, tattoos, captions, logos, or watermarks. "
+            "Keep the same new face design, same head shape, same hair color and hair silhouette, same skin tone, same body proportions, same height impression, and same character age across front, side, three-quarter, and back views. "
+            "Wardrobe bible: if the user direction names a specific outfit, use that one outfit uniformly across every view. Otherwise use one clean SKG wellness-ad activewear outfit for the entire pack: fitted short-sleeve performance top with a visible neck/collarbone area, slim athletic pants, and low-profile sneakers. "
+            "Lock the exact top color, bottom color, shoe color, neckline shape, sleeve/strap structure, seams, trim, fabric finish, fit, and accessories before rendering the first view, then repeat those same clothing decisions in every other view. "
+            "Never add or remove a jacket, blazer, hoodie, coat, dress, skirt, scarf, hat, bag, jewelry, logo, stripe pattern, or extra layer in only one view. "
+            "Back and side views must show the same garment wrapping around the same body, not a redesigned outfit. "
+        )
    )
    neck_product_clause = (
        "This subject pack is for SKG neck-and-shoulder wearable massage device videos. "
@@ -5840,6 +5865,7 @@ def _generate_subject_assets_sync(job_id: str, idx: int, element_id: str, req: G
    )
    models = SUBJECT_ASSET_IMAGE_MODELS
    model_preference = _normalize_image_model_preference(req.image_model_preference)
+    reference_image_count = len(model_src) if isinstance(model_src, list) else (1 if model_src else 0)
    generated: list[SubjectAsset] = []
    generation_errors: list[str] = []
    first_generation_error: RuntimeError | None = None
@@ -5872,14 +5898,30 @@ def _generate_subject_assets_sync(job_id: str, idx: int, element_id: str, req: G
                if closeup_view and req.subject_kind == "living"
                else "The subject must be complete, centered, full body or full object, head-to-feet visible when applicable, not cropped by the canvas. Make the subject large and readable: it should occupy about 88-94% of the image height, with the head close to the top margin and feet close to the bottom margin. No tiny character, no miniature person, no distant full-body figure, no large empty white margins. "
            )
-            reference_strategy_clause = (
-                "Text-only generation mode: no source image is attached to this image request. Use only the written source/video/template briefs below as creative constraints. "
-                "This is intentionally NOT image editing and NOT identity replication. "
-                + source_subject_clause
-                + template_brief_clause
-                if similar_mode else
-                "Use the reference image(s) only as visual evidence; do not crop, cut out, paste, trace, or extract pixels from the source. "
-            )
+            if similar_mode and reference_image_count:
+                reference_strategy_clause = (
+                    f"Image-conditioned reference reconstruction mode: {reference_image_count} selected source reference image(s) are attached to this request. "
+                    "First read the attached frames and the written source brief, then generate a new similar but non-identical subject. "
+                    "Use the images as visual evidence for broad role, gender presentation, regional/ethnic appearance category, skin-tone family, body proportion, hair family, outfit category/color family, pose language, and creator-ad energy. "
+                    "Do not copy exact face, biometric identity, unique marks, source pixels, captions, watermarks, or background. "
+                    + source_subject_clause
+                    + template_brief_clause
+                )
+            elif similar_mode:
+                reference_strategy_clause = (
+                    "Text-only generation mode: no source image is attached to this image request. Use only the written source/video/template briefs below as creative constraints. "
+                    "This is intentionally NOT image editing and NOT identity replication. "
+                    + source_subject_clause
+                    + template_brief_clause
+                )
+            else:
+                reference_strategy_clause = (
+                    f"Source-locked image reference mode: {reference_image_count} selected source reference image(s) are attached and are the primary visual evidence. "
+                    "Preserve the visible source subject's identity impression, proportions, silhouette, material, colors, wardrobe, styling, and distinctive non-artifact details across all generated views. "
+                    "Do not crop, cut out, paste, trace, or extract pixels from the source; redraw a clean production-ready asset of the same visible subject. "
+                    + source_subject_clause
+                    + template_brief_clause
+                )
            prompt = (
                reference_strategy_clause
                +
@@ -5904,7 +5946,15 @@ def _generate_subject_assets_sync(job_id: str, idx: int, element_id: str, req: G
                + transparent_character_clause
            )
            try:
-                if similar_mode:
+                if similar_mode and model_src is not None:
+                    print(
+                        f"[subject assets] reconstruction_mode=similar endpoint=/images/edits view={view} image_refs={reference_image_count} model_preference={model_preference}",
+                        flush=True,
+                    )
+                    img_bytes, _mode = _image_edit_call(model_src, prompt, models=models, fallback_text=False, max_attempts=3, max_side=1280, force_fallback_model=pack_force_fallback_model, image_model_preference=model_preference)
+                    if model_preference == "auto" and _mode.endswith(f":{IMAGE_FALLBACK_MODEL}"):
+                        pack_force_fallback_model = True
+                elif similar_mode:
                    print(
                        f"[subject assets] reconstruction_mode=similar endpoint=/images/generations view={view} image_refs=0 model_preference={model_preference}",
                        flush=True,