auto-save 2026-05-14 06:11 (~6)

2026-05-14 06:11:29 +08:00
parent 2b546168f7
commit 871ced6d2d
6 changed files with 225 additions and 82 deletions
--- a/api/main.py
+++ b/api/main.py
@@ -97,6 +97,8 @@ AssetSize = Literal["source", "1024", "1536", "2048"]
 AssetQuality = Literal["hd"]
 SubjectKind = Literal["object", "living"]
 SubjectView = str
+SceneMode = Literal["remove_subject", "similar", "style"]
+SceneStyle = Literal["source", "premium_product", "clean_studio", "warm_lifestyle", "cinematic"]
 FRAME_TARGET_LABELS: dict[FrameExtractTarget, str] = {
    "balanced": "综合关键帧",
    "subject": "清晰主体",
@@ -191,6 +193,8 @@ class SceneAsset(BaseModel):
    height: int = 0
    quality: AssetQuality = "hd"
    size: AssetSize = "source"
+    scene_mode: SceneMode = "remove_subject"
+    scene_style: SceneStyle = "source"
    quality_report: QualityReport | None = None
    created_at: float = 0.0

@@ -1930,6 +1934,8 @@ class UpdateElementReq(BaseModel):
 class GenerateSceneAssetReq(BaseModel):
    quality: AssetQuality = "hd"
    size: AssetSize = "source"
+    scene_mode: SceneMode = "remove_subject"
+    scene_style: SceneStyle = "source"


 class GenerateSubjectAssetsReq(BaseModel):
@@ -2058,7 +2064,8 @@ def delete_element(job_id: str, idx: int, element_id: str) -> Job:

@app.post("/jobs/{job_id}/frames/{idx}/scene-asset", response_model=Job)
 def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> Job:
-    """为关键帧生成一张干净、高清的场景参考图。默认一帧只需要一张，重跑会保留历史供人工比对。"""
+    """为关键帧生成一张干净、高清的场景参考图。默认一帧只需要一张，重跑会保留历史供人工比对。
+    场景图排在主体资产之后：优先依据已确认主体，去主体并补全背景，再按模式生成原场景/相似场景/换风格场景。"""
    import time as _time
    job = JOBS.get(job_id)
    if not job:
@@ -2068,12 +2075,51 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
    if not src.exists():
        raise HTTPException(404, "source frame file missing")

+    confirmed_subjects = [
+        (e.name_en or e.name_zh).strip()
+        for e in (frame.elements or [])
+        if (e.subject_assets or [])
+    ]
+    if not confirmed_subjects:
+        confirmed_subjects = [
+            (e.name_en or e.name_zh).strip()
+            for e in (frame.elements or [])
+            if (e.name_en or e.name_zh).strip()
+        ][:3]
+    subject_clause = (
+        "Confirmed foreground subject(s) to remove: " + ", ".join(confirmed_subjects) + ". "
+        if confirmed_subjects
+        else "Remove the main foreground subject from the frame if present. "
+    )
+    mode_clause = {
+        "remove_subject": (
+            "Keep the original environment, camera angle, perspective, composition, lighting direction, color mood, and spatial layout. "
+            "The result should be an empty clean scene/background plate with the subject removed and the occluded background reconstructed."
+        ),
+        "similar": (
+            "Create a similar but not identical scene/background plate: keep the same camera angle, rough spatial layout, lighting direction, and usage context, "
+            "but vary props, surface details, textures, and small environmental details so it is not a duplicate of the source."
+        ),
+        "style": (
+            "Create a scene/background plate with the same camera angle and spatial layout, but reinterpret the environment in the selected visual style. "
+            "Keep it believable and useful for image-to-video generation."
+        ),
+    }[req.scene_mode]
+    style_clause = {
+        "source": "Follow the original source style.",
+        "premium_product": "Use a premium product-advertising style: polished, high-end, clean commercial lighting, refined materials.",
+        "clean_studio": "Use a clean studio style: simple surfaces, controlled lighting, minimal distractions.",
+        "warm_lifestyle": "Use a warm lifestyle style: realistic lived-in details, soft natural light, approachable atmosphere.",
+        "cinematic": "Use a cinematic style: dramatic but natural lighting, richer depth, filmic contrast, not fantasy.",
+    }[req.scene_style]
    prompt = (
-        "Create one clean high-definition scene reference image from this frame. "
-        "Remove watermarks, platform UI, captions, usernames, hashtags, logos, and overlay graphics. "
-        "Preserve the original camera angle, composition, environment, lighting style, and believable spatial layout. "
-        "Do not create multiple views. Do not isolate objects. Keep it useful as the scene/background reference for image-to-video generation. "
-        "Enhance clarity and texture while avoiding over-smoothing or changing important visual details."
+        "Create one clean high-definition scene/background reference image from this frame. "
+        + subject_clause
+        + "Do not include the removed subject, duplicate people, animals, products, text, watermark, platform UI, captions, usernames, hashtags, logos, or overlay graphics. "
+        + mode_clause + " "
+        + style_clause + " "
+        + "Enhance clarity and texture while avoiding over-smoothing, warped geometry, or changing important perspective details. "
+        + "Do not create multiple views. Do not isolate objects."
    )
    models = [IMAGE_MODEL, "gemini-3.1-flash-image-preview", "gemini-2.5-flash-image"]
    try:
@@ -2093,6 +2139,8 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
        height=height,
        quality=req.quality,
        size=req.size,
+        scene_mode=req.scene_mode,
+        scene_style=req.scene_style,
        quality_report=report,
        created_at=_time.time(),
    )