auto-save 2026-05-14 06:33 (~5)
This commit is contained in:
65
api/main.py
65
api/main.py
@@ -1957,6 +1957,8 @@ class GenerateSceneAssetReq(BaseModel):
|
||||
size: AssetSize = "source"
|
||||
scene_mode: SceneMode = "remove_subject"
|
||||
scene_style: SceneStyle = "source"
|
||||
prompt: str = ""
|
||||
source_frame_indices: list[int] | None = None
|
||||
|
||||
|
||||
class GenerateSubjectAssetsReq(BaseModel):
|
||||
@@ -2096,6 +2098,18 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
|
||||
if not src.exists():
|
||||
raise HTTPException(404, "source frame file missing")
|
||||
|
||||
source_indices = [int(x) for x in (req.source_frame_indices or [idx]) if isinstance(x, int) or str(x).isdigit()]
|
||||
if not source_indices:
|
||||
source_indices = [idx]
|
||||
source_indices = list(dict.fromkeys(source_indices))[:8]
|
||||
model_src = src
|
||||
sheet_tmp: Path | None = None
|
||||
if len(source_indices) > 1:
|
||||
sheet_tmp = job_dir(job_id) / "tmp" / f"scene_refs_{idx:03d}_{uuid.uuid4().hex[:6]}.jpg"
|
||||
sheet = _make_reference_contact_sheet(job_id, source_indices, sheet_tmp)
|
||||
if sheet:
|
||||
model_src = sheet
|
||||
|
||||
confirmed_subjects = [
|
||||
(e.name_en or e.name_zh).strip()
|
||||
for ref_frame in job.frames
|
||||
@@ -2136,10 +2150,23 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
|
||||
"warm_lifestyle": "Use a warm lifestyle style: realistic lived-in details, soft natural light, approachable atmosphere.",
|
||||
"cinematic": "Use a cinematic style: dramatic but natural lighting, richer depth, filmic contrast, not fantasy.",
|
||||
}[req.scene_style]
|
||||
user_prompt = req.prompt.strip()
|
||||
user_prompt_clause = (
|
||||
"User scene direction: " + user_prompt[:1200] + " "
|
||||
if user_prompt
|
||||
else ""
|
||||
)
|
||||
reference_clause = (
|
||||
f"Use the selected reference frame contact sheet as visual evidence for location, composition, lighting, materials, and atmosphere. Reference frame indices: {', '.join(str(i + 1) for i in source_indices)}. "
|
||||
if len(source_indices) > 1
|
||||
else "Use the provided frame as the primary visual reference. "
|
||||
)
|
||||
prompt = (
|
||||
"Create one clean high-definition scene/background reference image from this frame. "
|
||||
+ subject_clause
|
||||
+ "Do not include the removed subject, duplicate people, animals, products, text, watermark, platform UI, captions, usernames, hashtags, logos, or overlay graphics. "
|
||||
+ reference_clause
|
||||
+ user_prompt_clause
|
||||
+ mode_clause + " "
|
||||
+ style_clause + " "
|
||||
+ "Enhance clarity and texture while avoiding over-smoothing, warped geometry, or changing important perspective details. "
|
||||
@@ -2147,9 +2174,13 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
|
||||
)
|
||||
models = [IMAGE_MODEL, "gemini-3.1-flash-image-preview", "gemini-2.5-flash-image"]
|
||||
try:
|
||||
img_bytes, _mode = _image_edit_call(src, prompt, models=models, fallback_text=False, max_attempts=3, max_side=1280)
|
||||
img_bytes, _mode = _image_edit_call(model_src, prompt, models=models, fallback_text=False, max_attempts=3, max_side=1280)
|
||||
except RuntimeError as e:
|
||||
raise HTTPException(500, f"scene asset failed: {e}")
|
||||
finally:
|
||||
if sheet_tmp and sheet_tmp.exists():
|
||||
try: sheet_tmp.unlink()
|
||||
except OSError: pass
|
||||
|
||||
asset_id = f"scene_{idx:03d}_{uuid.uuid4().hex[:8]}"
|
||||
out_path = job_dir(job_id) / "assets" / f"{asset_id}.jpg"
|
||||
@@ -2306,23 +2337,27 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
|
||||
generated: list[SubjectAsset] = []
|
||||
try:
|
||||
for view, view_label in _subject_view_labels(req.subject_kind, req.views):
|
||||
if view == "side_walk":
|
||||
view_prompt = "side view in a natural walking pose, same identity and proportions"
|
||||
elif view.startswith("expression_"):
|
||||
emotion = view_label.replace("表情", "")
|
||||
view_prompt = f"clear {emotion} facial expression reference, frontal or three-quarter standing pose, preserving the same identity"
|
||||
elif view.startswith("action_"):
|
||||
view_prompt = f"{view_label} reference pose, same identity and proportions"
|
||||
if req.subject_kind == "living":
|
||||
if view.startswith("expression_"):
|
||||
emotion = view_label.replace("表情", "")
|
||||
view_prompt = f"full-body upright standing character reference with a clear {emotion} facial expression"
|
||||
elif view.startswith("action_") or view == "side_walk":
|
||||
view_prompt = f"full-body upright standing character reference, {view_label}, same identity and proportions"
|
||||
else:
|
||||
view_prompt = f"full-body upright standing character reference, {view_label}"
|
||||
else:
|
||||
view_prompt = f"{view_label} view"
|
||||
view_prompt = f"complete object/product reference, {view_label} view"
|
||||
prompt = (
|
||||
f"Use the reference image(s) to generate a single {view_prompt} of the same {target}. "
|
||||
f"Use the reference image(s) only as visual evidence to redraw the same {target}; do not crop, cut out, paste, or extract pixels from the source. "
|
||||
f"Generate one newly rendered {view_prompt} of the same subject. "
|
||||
f"The subject is a {kind_phrase}. If multiple frames are shown, treat them as evidence of one same subject, not multiple subjects. "
|
||||
"Preserve identity, proportions, silhouette, material, colors, styling, and distinctive details across all generated views. "
|
||||
f"Create a high-definition standalone asset on a {bg_phrase} background. "
|
||||
"No extra objects, no original scene fragments, no text, no watermark, no UI. "
|
||||
"If the source is incomplete or occluded, intelligently complete missing parts while staying consistent with the reference. "
|
||||
"For living subjects, keep the body standing and readable; do not create medical, horror, or distorted anatomy."
|
||||
"The subject must be complete, centered, full body or full object, head-to-feet visible when applicable, not cropped by the canvas. "
|
||||
"Make the subject large and readable: it should occupy about 85-95% of the image height with only small margins. "
|
||||
f"Create a high-definition standalone asset on a solid {bg_phrase} background. "
|
||||
"No extra objects, no props, no additional products, no background elements, no original scene fragments, no shadows from the original scene, no text, no watermark, no UI. "
|
||||
"If the source is incomplete, partially visible, occluded, or low resolution, reconstruct the missing parts by redrawing a clean complete subject while staying consistent with the reference. "
|
||||
"For living subjects, keep a normal upright standing pose for the standard views; do not create sitting, walking, medical, horror, or distorted anatomy unless explicitly requested by the view label."
|
||||
)
|
||||
try:
|
||||
img_bytes, _mode = _image_edit_call(model_src, prompt, models=models, fallback_text=False, max_attempts=3, max_side=1280)
|
||||
@@ -2331,7 +2366,7 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
|
||||
|
||||
asset_id = f"subject_{idx:03d}_{element_id}_{view}_{uuid.uuid4().hex[:8]}"
|
||||
out_path = job_dir(job_id) / "assets" / f"{asset_id}.jpg"
|
||||
width, height = _normalize_asset_image(img_bytes, out_path, _source_frame_path(job_id, idx), req.size, req.background, square=False)
|
||||
width, height = _normalize_asset_image(img_bytes, out_path, _source_frame_path(job_id, idx), req.size, req.background, square=False, fill_subject=True)
|
||||
generated.append(SubjectAsset(
|
||||
id=asset_id,
|
||||
view=view,
|
||||
|
||||
Reference in New Issue
Block a user