auto-save 2026-05-14 06:33 (~5)

This commit is contained in:
2026-05-14 06:33:37 +08:00
parent 6480d69c63
commit 0d86b4cff2
5 changed files with 271 additions and 96 deletions

View File

@@ -1957,6 +1957,8 @@ class GenerateSceneAssetReq(BaseModel):
size: AssetSize = "source"
scene_mode: SceneMode = "remove_subject"
scene_style: SceneStyle = "source"
prompt: str = ""
source_frame_indices: list[int] | None = None
class GenerateSubjectAssetsReq(BaseModel):
@@ -2096,6 +2098,18 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
if not src.exists():
raise HTTPException(404, "source frame file missing")
source_indices = [int(x) for x in (req.source_frame_indices or [idx]) if isinstance(x, int) or str(x).isdigit()]
if not source_indices:
source_indices = [idx]
source_indices = list(dict.fromkeys(source_indices))[:8]
model_src = src
sheet_tmp: Path | None = None
if len(source_indices) > 1:
sheet_tmp = job_dir(job_id) / "tmp" / f"scene_refs_{idx:03d}_{uuid.uuid4().hex[:6]}.jpg"
sheet = _make_reference_contact_sheet(job_id, source_indices, sheet_tmp)
if sheet:
model_src = sheet
confirmed_subjects = [
(e.name_en or e.name_zh).strip()
for ref_frame in job.frames
@@ -2136,10 +2150,23 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
"warm_lifestyle": "Use a warm lifestyle style: realistic lived-in details, soft natural light, approachable atmosphere.",
"cinematic": "Use a cinematic style: dramatic but natural lighting, richer depth, filmic contrast, not fantasy.",
}[req.scene_style]
user_prompt = req.prompt.strip()
user_prompt_clause = (
"User scene direction: " + user_prompt[:1200] + " "
if user_prompt
else ""
)
reference_clause = (
f"Use the selected reference frame contact sheet as visual evidence for location, composition, lighting, materials, and atmosphere. Reference frame indices: {', '.join(str(i + 1) for i in source_indices)}. "
if len(source_indices) > 1
else "Use the provided frame as the primary visual reference. "
)
prompt = (
"Create one clean high-definition scene/background reference image from this frame. "
+ subject_clause
+ "Do not include the removed subject, duplicate people, animals, products, text, watermark, platform UI, captions, usernames, hashtags, logos, or overlay graphics. "
+ reference_clause
+ user_prompt_clause
+ mode_clause + " "
+ style_clause + " "
+ "Enhance clarity and texture while avoiding over-smoothing, warped geometry, or changing important perspective details. "
@@ -2147,9 +2174,13 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
)
models = [IMAGE_MODEL, "gemini-3.1-flash-image-preview", "gemini-2.5-flash-image"]
try:
img_bytes, _mode = _image_edit_call(src, prompt, models=models, fallback_text=False, max_attempts=3, max_side=1280)
img_bytes, _mode = _image_edit_call(model_src, prompt, models=models, fallback_text=False, max_attempts=3, max_side=1280)
except RuntimeError as e:
raise HTTPException(500, f"scene asset failed: {e}")
finally:
if sheet_tmp and sheet_tmp.exists():
try: sheet_tmp.unlink()
except OSError: pass
asset_id = f"scene_{idx:03d}_{uuid.uuid4().hex[:8]}"
out_path = job_dir(job_id) / "assets" / f"{asset_id}.jpg"
@@ -2306,23 +2337,27 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
generated: list[SubjectAsset] = []
try:
for view, view_label in _subject_view_labels(req.subject_kind, req.views):
if view == "side_walk":
view_prompt = "side view in a natural walking pose, same identity and proportions"
elif view.startswith("expression_"):
emotion = view_label.replace("表情", "")
view_prompt = f"clear {emotion} facial expression reference, frontal or three-quarter standing pose, preserving the same identity"
elif view.startswith("action_"):
view_prompt = f"{view_label} reference pose, same identity and proportions"
if req.subject_kind == "living":
if view.startswith("expression_"):
emotion = view_label.replace("表情", "")
view_prompt = f"full-body upright standing character reference with a clear {emotion} facial expression"
elif view.startswith("action_") or view == "side_walk":
view_prompt = f"full-body upright standing character reference, {view_label}, same identity and proportions"
else:
view_prompt = f"full-body upright standing character reference, {view_label}"
else:
view_prompt = f"{view_label} view"
view_prompt = f"complete object/product reference, {view_label} view"
prompt = (
f"Use the reference image(s) to generate a single {view_prompt} of the same {target}. "
f"Use the reference image(s) only as visual evidence to redraw the same {target}; do not crop, cut out, paste, or extract pixels from the source. "
f"Generate one newly rendered {view_prompt} of the same subject. "
f"The subject is a {kind_phrase}. If multiple frames are shown, treat them as evidence of one same subject, not multiple subjects. "
"Preserve identity, proportions, silhouette, material, colors, styling, and distinctive details across all generated views. "
f"Create a high-definition standalone asset on a {bg_phrase} background. "
"No extra objects, no original scene fragments, no text, no watermark, no UI. "
"If the source is incomplete or occluded, intelligently complete missing parts while staying consistent with the reference. "
"For living subjects, keep the body standing and readable; do not create medical, horror, or distorted anatomy."
"The subject must be complete, centered, full body or full object, head-to-feet visible when applicable, not cropped by the canvas. "
"Make the subject large and readable: it should occupy about 85-95% of the image height with only small margins. "
f"Create a high-definition standalone asset on a solid {bg_phrase} background. "
"No extra objects, no props, no additional products, no background elements, no original scene fragments, no shadows from the original scene, no text, no watermark, no UI. "
"If the source is incomplete, partially visible, occluded, or low resolution, reconstruct the missing parts by redrawing a clean complete subject while staying consistent with the reference. "
"For living subjects, keep a normal upright standing pose for the standard views; do not create sitting, walking, medical, horror, or distorted anatomy unless explicitly requested by the view label."
)
try:
img_bytes, _mode = _image_edit_call(model_src, prompt, models=models, fallback_text=False, max_attempts=3, max_side=1280)
@@ -2331,7 +2366,7 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
asset_id = f"subject_{idx:03d}_{element_id}_{view}_{uuid.uuid4().hex[:8]}"
out_path = job_dir(job_id) / "assets" / f"{asset_id}.jpg"
width, height = _normalize_asset_image(img_bytes, out_path, _source_frame_path(job_id, idx), req.size, req.background, square=False)
width, height = _normalize_asset_image(img_bytes, out_path, _source_frame_path(job_id, idx), req.size, req.background, square=False, fill_subject=True)
generated.append(SubjectAsset(
id=asset_id,
view=view,