auto-save 2026-05-18 07:27 (~6)

2026-05-18 07:27:45 +08:00
parent 4653108baf
commit 9790e5bedb
6 changed files with 213 additions and 60 deletions
--- a/api/main.py
+++ b/api/main.py
@@ -4056,7 +4056,7 @@ def cutout_element(job_id: str, idx: int, element_id: str) -> Job:
@app.post("/jobs/{job_id}/frames/{idx}/elements/{element_id}/subject-assets", response_model=Job)
 def generate_subject_assets(job_id: str, idx: int, element_id: str, req: GenerateSubjectAssetsReq) -> Job:
    """为一个主体生成多视角资产包。
-    如果传入 source_frame_indices，则把多张已选关键帧拼成参考板，表示这些帧都在服务同一个主体。"""
+    如果传入 source_frame_indices 或内置 character_id，则把多张参考图作为独立 image[] 证据提交。"""
    import time as _time
    job = JOBS.get(job_id)
    if not job:
@@ -4071,13 +4071,30 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
        source_indices = [idx] + source_indices
    source_indices = list(dict.fromkeys(source_indices))[:12]

+    character_reference_paths: list[Path] = []
+    character_reference_clause = ""
+    character_label = ""
+    character_id = (req.character_id or "").strip()
+    if character_id:
+        character = find_character_library_item(character_id)
+        character_label = character.name
+        for image in character.images[:7]:
+            character_reference_paths.append(character_library_file(image.filename))
+        character_reference_clause = (
+            f"Selected built-in creative character reference: {character.name}. "
+            "Use these planned character images as a high-quality creative direction and anatomy/style bible only; "
+            "do not copy the exact face, exact pose, exact silhouette, pixels, or make a duplicate. "
+            "Create a new innovative variation that keeps the same broad role, transparent wellness character language, "
+            "camera readability, and shoulder/neck product compatibility. "
+        )
+
    model_src, tmp_focus = _focus_source_for_element(job_id, idx, el)
-    sheet_tmp: Path | None = None
-    if len(source_indices) > 1:
-        sheet_tmp = job_dir(job_id) / "tmp" / f"subject_refs_{idx:03d}_{element_id}_{uuid.uuid4().hex[:6]}.jpg"
-        sheet = _make_reference_contact_sheet(job_id, source_indices, sheet_tmp, max_items=12)
-        if sheet:
-            model_src = sheet
+    frame_reference_paths = [p for p in (_source_frame_path(job_id, i) for i in source_indices) if p.exists()]
+    if character_reference_paths:
+        remaining = max(0, 10 - len(character_reference_paths))
+        model_src = character_reference_paths + frame_reference_paths[:remaining]
+    elif len(frame_reference_paths) > 1:
+        model_src = frame_reference_paths[:10]

    try:
        with Image.open(_source_frame_path(job_id, idx)) as src_im:
@@ -4118,18 +4135,27 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
    prompt_extra = req.prompt.strip()
    prompt_extra_clause = f"User direction: {prompt_extra[:1200]} " if prompt_extra else ""
    identity_lock_clause = (
-        "Identity lock: these API calls generate a six-view pack for ONE single subject, but each individual output file must show only its one requested view. "
+        "Identity lock: these API calls generate one high-definition multi-view pack for ONE single subject, but each individual output file must show only its one requested view. "
        "Before rendering, infer one consistent character bible from the reference image(s): gender presentation, age range, body proportions, head shape, face direction cues, material, silhouette, wardrobe/material style, and commercial mood. "
        "Keep that same character bible unchanged across every generated view in separate files. "
-        "If user direction requests a gender, age, or style change, apply that one change uniformly to all views; never mix male/female, young/old, or multiple style identities inside the same six-view pack. "
+        "If user direction requests a gender, age, or style change, apply that one change uniformly to all views; never mix male/female, young/old, or multiple style identities inside the same pack. "
        "For transparent humanoids, keep the same transparent skin shell, skeleton proportions, visible spine/rib cage/pelvis/limb bones, and non-horror wellness character style in every view. "
    )
+    neck_product_clause = (
+        "This subject pack is for SKG neck-and-shoulder wearable massage device videos. "
+        "Make the neck, collarbone, shoulder line, upper back, side neck, and shoulder slope clear and product-ready. "
+        "Avoid bulky collars, scarves, hair, hoods, props, or poses that hide the neck/shoulder placement area. "
+        "For back and close-up views, prioritize the cervical spine, shoulder blades, upper trapezius, and clean wearable-device contact area. "
+    )
    models = [GPT_IMAGE_MODEL]
    generated: list[SubjectAsset] = []
    try:
        for view, view_label in _subject_view_labels(req.subject_kind, req.views):
+            closeup_view = view in {"bust", "back_detail", "bust_front", "bust_left_45", "bust_right_45", "back_neck_detail"} or "detail" in view
            if req.subject_kind == "living":
-                if view.startswith("expression_"):
+                if closeup_view:
+                    view_prompt = f"upper-body shoulder-and-neck close-up character reference, {view_label}"
+                elif view.startswith("expression_"):
                    emotion = view_label.replace("表情", "")
                    view_prompt = f"full-body upright standing character reference with a clear {emotion} facial expression"
                elif view.startswith("action_") or view == "side_walk":
@@ -4142,8 +4168,14 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
            single_view_clause = (
                f"Single-image output rule: this output file is ONLY for the {view_label} view ({view_name}). "
                "Render exactly one subject, one time, in one pose and one camera angle. "
-                "Do not create a six-view sheet, contact sheet, grid, storyboard, lineup, comparison layout, before/after layout, mirrored pair, duplicate subjects, thumbnails, labels, captions, arrows, view names, panel borders, or multiple versions in the same image. "
-                "Do not include any of the other five views in this image. "
+                "Do not create a multi-view sheet, contact sheet, grid, storyboard, lineup, comparison layout, before/after layout, mirrored pair, duplicate subjects, thumbnails, labels, captions, arrows, view names, panel borders, or multiple versions in the same image. "
+                "Do not include any other views in this image. "
+            )
+            framing_clause = (
+                "For this close-up view, intentionally crop as an upper-body asset from head/neck to chest or upper back; the neck, shoulders, collarbone or upper spine area must be large, clear, and useful for placing a neck-and-shoulder massage device. "
+                "Do not force full-body framing for close-ups. "
+                if closeup_view and req.subject_kind == "living"
+                else "The subject must be complete, centered, full body or full object, head-to-feet visible when applicable, not cropped by the canvas. Make the subject large and readable: it should occupy about 85-95% of the image height with only small margins. "
            )
            prompt = (
                f"Use the reference image(s) only as visual evidence; do not crop, cut out, paste, trace, or extract pixels from the source. "
@@ -4152,15 +4184,16 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
                + single_view_clause
                + identity_clause
                + identity_lock_clause
+                + character_reference_clause
+                + neck_product_clause
                + canvas_clause
                + prompt_extra_clause
                + actor_style_clause
-                + "The subject must be complete, centered, full body or full object, head-to-feet visible when applicable, not cropped by the canvas. "
-                "Make the subject large and readable: it should occupy about 85-95% of the image height with only small margins. "
+                + framing_clause
                f"Create a high-definition standalone asset on a solid {bg_phrase} background. "
                "No extra objects, no props, no additional products, no background elements, no original scene fragments, no shadows from the original scene, no text, no watermark, no UI. "
                "If the source is incomplete, partially visible, occluded, or low resolution, reconstruct the missing parts by redrawing a clean complete subject while staying consistent with the reference. "
-                "For living subjects, keep a normal upright standing pose for the standard views; do not create sitting, walking, medical, horror, or distorted anatomy unless explicitly requested by the view label. "
+                "For living standard full-body views, keep a normal upright standing pose; do not create sitting, walking, medical, horror, or distorted anatomy unless explicitly requested by the view label. "
                + transparent_character_clause
            )
            try:
@@ -4174,7 +4207,7 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
            generated.append(SubjectAsset(
                id=asset_id,
                view=view,
-                label=f"{el.name_zh} · {view_label}",
+                label=f"{el.name_zh} · {view_label}" + (f" · {character_label}" if character_label else ""),
                url=_asset_url(job_id, asset_id),
                width=width,
                height=height,
@@ -4185,7 +4218,7 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
                created_at=_time.time(),
            ))
    finally:
-        for p in (tmp_focus, sheet_tmp):
+        for p in (tmp_focus,):
            if p and p.exists():
                try: p.unlink()
                except OSError: pass