auto-save 2026-05-17 21:09 (~4)

2026-05-17 21:09:20 +08:00
parent 096f201470
commit 252cdf441d
4 changed files with 117 additions and 50 deletions
--- a/api/main.py
+++ b/api/main.py
@@ -1140,9 +1140,10 @@ def _focus_source_for_element(job_id: str, idx: int, el: KeyElement) -> tuple[Pa
    return model_src, tmp_focus


-def _make_reference_contact_sheet(job_id: str, frame_indices: list[int], out_path: Path) -> Path | None:
+def _make_reference_contact_sheet(job_id: str, frame_indices: list[int], out_path: Path, max_items: int = 6) -> Path | None:
    paths: list[Path] = []
    seen: set[int] = set()
+    max_items = max(2, min(12, int(max_items or 6)))
    for idx in frame_indices:
        if idx in seen:
            continue
@@ -1150,7 +1151,7 @@ def _make_reference_contact_sheet(job_id: str, frame_indices: list[int], out_pat
        p = _source_frame_path(job_id, idx)
        if p.exists():
            paths.append(p)
-        if len(paths) >= 6:
+        if len(paths) >= max_items:
            break
    if len(paths) <= 1:
        return None
@@ -1168,7 +1169,7 @@ def _make_reference_contact_sheet(job_id: str, frame_indices: list[int], out_pat
    if len(thumbs) <= 1:
        return None

-    cols = 3 if len(thumbs) > 2 else 2
+    cols = 4 if len(thumbs) > 6 else (3 if len(thumbs) > 2 else 2)
    rows = (len(thumbs) + cols - 1) // cols
    sheet = Image.new("RGB", (cols * 420, rows * 420), (245, 245, 245))
    for i, thumb in enumerate(thumbs):
@@ -3447,6 +3448,9 @@ class GenerateSubjectAssetsReq(BaseModel):
    size: AssetSize = "source"
    source_frame_indices: list[int] | None = None
    views: list[str] | None = None
+    subject_style: Literal["transparent_human", "source_actor"] = "transparent_human"
+    reconstruction_mode: Literal["same", "similar"] = "same"
+    prompt: str = ""


@app.post("/jobs/{job_id}/frames/{idx}/elements", response_model=Job)
@@ -3834,27 +3838,43 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
    source_indices = [int(x) for x in (req.source_frame_indices or [idx]) if isinstance(x, int) or str(x).isdigit()]
    if idx not in source_indices:
        source_indices = [idx] + source_indices
-    source_indices = list(dict.fromkeys(source_indices))[:6]
+    source_indices = list(dict.fromkeys(source_indices))[:12]

    model_src, tmp_focus = _focus_source_for_element(job_id, idx, el)
    sheet_tmp: Path | None = None
    if len(source_indices) > 1:
        sheet_tmp = job_dir(job_id) / "tmp" / f"subject_refs_{idx:03d}_{element_id}_{uuid.uuid4().hex[:6]}.jpg"
-        sheet = _make_reference_contact_sheet(job_id, source_indices, sheet_tmp)
+        sheet = _make_reference_contact_sheet(job_id, source_indices, sheet_tmp, max_items=12)
        if sheet:
            model_src = sheet

    target = (el.name_en or el.name_zh).strip()
    bg_phrase = "pure white" if req.background == "white" else "pure black"
-    kind_phrase = "person, animal, or living character" if req.subject_kind == "living" else "object or product-like subject"
+    similar_actor = req.subject_kind == "living" and req.subject_style == "source_actor" and req.reconstruction_mode == "similar"
+    kind_phrase = "human actor or living character" if req.subject_kind == "living" else "object or product-like subject"
    transparent_character_clause = (
        TRANSPARENT_HUMAN_POSITIVE_PROMPT
        + " The generated living character must be a friendly transparent humanoid with transparent or translucent outer body and clean white skeleton visible inside the same body. "
        + TRANSPARENT_HUMAN_NEGATIVE_PROMPT
        + " Do not render a normal human, ordinary skeleton-only character, horror skeleton, medical anatomy, organs, veins, blood, corpse, zombie, hospital, surgery, or autopsy visual. "
-        if req.subject_kind == "living"
+        if req.subject_kind == "living" and req.subject_style == "transparent_human"
        else ""
    )
+    actor_style_clause = (
+        "Generate a believable normal commercial video actor, not a transparent or skeleton character. "
+        "Use the references to understand the source video's casting direction, age range, gender presentation, body proportion, wardrobe category, gesture vocabulary, framing, energy, lighting, and creator-ad style. "
+        "Do not recreate the exact person's face, biometric identity, unique likeness, tattoos, scars, logos, watermarks, captions, or platform UI. "
+        "The output must be a newly designed similar actor that could play the same role in a new ad, with consistent identity across all views. "
+        if similar_actor
+        else ""
+    )
+    identity_clause = (
+        "Create a similar but non-identical original subject: match the performance role, silhouette category, styling direction, camera-readability, and commercial mood, while changing exact identity and unique personal features. "
+        if req.reconstruction_mode == "similar"
+        else "Preserve identity, proportions, silhouette, material, colors, styling, and distinctive details across all generated views. "
+    )
+    prompt_extra = req.prompt.strip()
+    prompt_extra_clause = f"User direction: {prompt_extra[:1200]} " if prompt_extra else ""
    models = [IMAGE_MODEL, "gemini-3.1-flash-image-preview", "gemini-2.5-flash-image"]
    generated: list[SubjectAsset] = []
    try:
@@ -3864,17 +3884,19 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
                    emotion = view_label.replace("表情", "")
                    view_prompt = f"full-body upright standing character reference with a clear {emotion} facial expression"
                elif view.startswith("action_") or view == "side_walk":
-                    view_prompt = f"full-body upright standing character reference, {view_label}, same identity and proportions"
+                    view_prompt = f"full-body upright standing character reference, {view_label}, consistent actor proportions"
                else:
                    view_prompt = f"full-body upright standing character reference, {view_label}"
            else:
                view_prompt = f"complete object/product reference, {view_label} view"
            prompt = (
-                f"Use the reference image(s) only as visual evidence to redraw the same {target}; do not crop, cut out, paste, or extract pixels from the source. "
-                f"Generate one newly rendered {view_prompt} of the same subject. "
+                f"Use the reference image(s) only as visual evidence; do not crop, cut out, paste, trace, or extract pixels from the source. "
+                f"Generate one newly rendered {view_prompt} for {target}. "
                f"The subject is a {kind_phrase}. If multiple frames are shown, treat them as evidence of one same subject, not multiple subjects. "
-                "Preserve identity, proportions, silhouette, material, colors, styling, and distinctive details across all generated views. "
-                "The subject must be complete, centered, full body or full object, head-to-feet visible when applicable, not cropped by the canvas. "
+                + identity_clause
+                + prompt_extra_clause
+                + actor_style_clause
+                + "The subject must be complete, centered, full body or full object, head-to-feet visible when applicable, not cropped by the canvas. "
                "Make the subject large and readable: it should occupy about 85-95% of the image height with only small margins. "
                f"Create a high-definition standalone asset on a solid {bg_phrase} background. "
                "No extra objects, no props, no additional products, no background elements, no original scene fragments, no shadows from the original scene, no text, no watermark, no UI. "