auto-save 2026-05-14 11:53 (~4)
This commit is contained in:
@@ -1,19 +1,5 @@
|
||||
{
|
||||
"entries": [
|
||||
{
|
||||
"files_changed": 1,
|
||||
"hash": "7a5b09a",
|
||||
"message": "auto-save 2026-05-13 04:05 (~1)",
|
||||
"ts": "2026-05-13T04:06:09+08:00",
|
||||
"type": "commit"
|
||||
},
|
||||
{
|
||||
"files_changed": 1,
|
||||
"hash": "6304eab",
|
||||
"message": "auto-save 2026-05-13 04:11 (~1)",
|
||||
"ts": "2026-05-13T04:12:02+08:00",
|
||||
"type": "commit"
|
||||
},
|
||||
{
|
||||
"files_changed": 1,
|
||||
"hash": "9fcc418",
|
||||
@@ -3298,6 +3284,19 @@
|
||||
"type": "session-heartbeat",
|
||||
"message": "Codex 会话活跃 · 最近命令:codex · 6 项未提交变更 · 最近提交:auto-save 2026-05-14 11:41 (~1)",
|
||||
"files_changed": 6
|
||||
},
|
||||
{
|
||||
"ts": "2026-05-14T11:47:40+08:00",
|
||||
"type": "commit",
|
||||
"message": "auto-save 2026-05-14 11:47 (~7)",
|
||||
"hash": "ba491c0",
|
||||
"files_changed": 7
|
||||
},
|
||||
{
|
||||
"ts": "2026-05-14T03:48:39Z",
|
||||
"type": "session-heartbeat",
|
||||
"message": "Codex 会话活跃 · 最近命令:codex · 2 项未提交变更 · 最近提交:auto-save 2026-05-14 11:47 (~7)",
|
||||
"files_changed": 2
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
107
api/main.py
107
api/main.py
@@ -120,6 +120,7 @@ SubjectKind = Literal["object", "living"]
|
||||
SubjectView = str
|
||||
SceneMode = Literal["remove_subject", "similar", "style"]
|
||||
SceneStyle = Literal["source", "premium_product", "clean_studio", "warm_lifestyle", "cinematic"]
|
||||
SceneAssetRole = Literal["scene", "first_frame", "last_frame"]
|
||||
FRAME_TARGET_LABELS: dict[FrameExtractTarget, str] = {
|
||||
"transparent_human": "透明骨架人",
|
||||
"balanced": "综合关键帧",
|
||||
@@ -256,6 +257,7 @@ class SceneAsset(BaseModel):
|
||||
size: AssetSize = "source"
|
||||
scene_mode: SceneMode = "remove_subject"
|
||||
scene_style: SceneStyle = "source"
|
||||
asset_role: SceneAssetRole = "scene"
|
||||
quality_report: QualityReport | None = None
|
||||
created_at: float = 0.0
|
||||
|
||||
@@ -302,6 +304,9 @@ class ProductFusionRegion(BaseModel):
|
||||
|
||||
class ProductFusionShot(BaseModel):
|
||||
id: str = ""
|
||||
first_image: dict | None = None
|
||||
last_image: dict | None = None
|
||||
product_images: list[dict] = Field(default_factory=list)
|
||||
product_image: dict | None = None
|
||||
person_image: dict | None = None
|
||||
product_region: ProductFusionRegion | None = None
|
||||
@@ -1897,6 +1902,40 @@ def _image_edit_call(
|
||||
return b64lib.b64decode(b64), effective_mode
|
||||
|
||||
|
||||
def _image_text_call(
|
||||
prompt: str,
|
||||
model: str | None = None,
|
||||
models: list[str] | None = None,
|
||||
max_attempts: int = 3,
|
||||
) -> tuple[bytes, str]:
|
||||
"""Text-only image generation with light model rotation."""
|
||||
import base64 as b64lib
|
||||
import time as _time
|
||||
if not LLM_API_KEY:
|
||||
raise RuntimeError("LLM_API_KEY 未配置")
|
||||
models_cycle = list(models) if models else [model or IMAGE_MODEL]
|
||||
last_err = ""
|
||||
resp_data: dict = {}
|
||||
for attempt in range(max_attempts):
|
||||
current_model = models_cycle[min(attempt, len(models_cycle) - 1)]
|
||||
try:
|
||||
resp = llm().images.generate(model=current_model, prompt=prompt, n=1)
|
||||
resp_data = resp.model_dump() if hasattr(resp, "model_dump") else {"data": [{"b64_json": resp.data[0].b64_json}]}
|
||||
if resp_data.get("data"):
|
||||
b64 = resp_data["data"][0].get("b64_json")
|
||||
if b64:
|
||||
return b64lib.b64decode(b64), "text"
|
||||
err_obj = resp_data.get("error") or {}
|
||||
last_err = f"empty data · {err_obj.get('code', '')} · {str(err_obj.get('message', ''))[:200]} · model={current_model}"
|
||||
except Exception as e:
|
||||
last_err = f"{type(e).__name__}: {e} · model={current_model}"
|
||||
if attempt < max_attempts - 1:
|
||||
next_model = models_cycle[min(attempt + 1, len(models_cycle) - 1)]
|
||||
print(f"[image text retry {attempt + 1}/{max_attempts} → {next_model}] {last_err}", flush=True)
|
||||
_time.sleep(1.0)
|
||||
raise RuntimeError(f"image text failed after {max_attempts} attempts: {last_err}")
|
||||
|
||||
|
||||
# ---------- API 路由 ----------
|
||||
|
||||
class CreateJobReq(BaseModel):
|
||||
@@ -2642,6 +2681,7 @@ class GenerateSceneAssetReq(BaseModel):
|
||||
size: AssetSize = "source"
|
||||
scene_mode: SceneMode = "remove_subject"
|
||||
scene_style: SceneStyle = "source"
|
||||
asset_role: SceneAssetRole = "scene"
|
||||
prompt: str = ""
|
||||
source_frame_indices: list[int] | None = None
|
||||
|
||||
@@ -2772,8 +2812,8 @@ def delete_element(job_id: str, idx: int, element_id: str) -> Job:
|
||||
|
||||
@app.post("/jobs/{job_id}/frames/{idx}/scene-asset", response_model=Job)
|
||||
def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> Job:
|
||||
"""为关键帧生成一张干净、高清的场景参考图。默认一帧只需要一张,重跑会保留历史供人工比对。
|
||||
场景图排在主体资产之后:优先依据已确认主体,去主体并补全背景,再按模式生成原场景/相似场景/换风格场景。"""
|
||||
"""为关键帧生成一张资产图。
|
||||
scene: 去主体背景板;first_frame/last_frame: 纯文字生成视频首尾帧,参考帧只用于理解统一人物形象。"""
|
||||
import time as _time
|
||||
job = JOBS.get(job_id)
|
||||
if not job:
|
||||
@@ -2814,6 +2854,11 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
|
||||
if confirmed_subjects
|
||||
else "Remove the main foreground subject from the frame if present. "
|
||||
)
|
||||
identity_clause = (
|
||||
"Known character identity cues: " + ", ".join(confirmed_subjects) + ". "
|
||||
if confirmed_subjects
|
||||
else "Infer one consistent friendly transparent human character identity from the provided references. "
|
||||
)
|
||||
mode_clause = {
|
||||
"remove_subject": (
|
||||
"Keep the original environment, camera angle, perspective, composition, lighting direction, color mood, and spatial layout. "
|
||||
@@ -2846,22 +2891,44 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
|
||||
if len(source_indices) > 1
|
||||
else "Use the provided frame as the primary visual reference. "
|
||||
)
|
||||
prompt = (
|
||||
"Create one clean high-definition scene/background reference image from this frame. "
|
||||
+ subject_clause
|
||||
+ "Do not include the removed subject, duplicate people, animals, products, text, watermark, platform UI, captions, usernames, hashtags, logos, or overlay graphics. "
|
||||
+ reference_clause
|
||||
+ user_prompt_clause
|
||||
+ mode_clause + " "
|
||||
+ style_clause + " "
|
||||
+ "Enhance clarity and texture while avoiding over-smoothing, warped geometry, or changing important perspective details. "
|
||||
+ "Do not create multiple views. Do not isolate objects."
|
||||
)
|
||||
if req.asset_role == "scene":
|
||||
prompt = (
|
||||
"Create one clean high-definition scene/background reference image from this frame. "
|
||||
+ subject_clause
|
||||
+ "Do not include the removed subject, duplicate people, animals, products, text, watermark, platform UI, captions, usernames, hashtags, logos, or overlay graphics. "
|
||||
+ reference_clause
|
||||
+ user_prompt_clause
|
||||
+ mode_clause + " "
|
||||
+ style_clause + " "
|
||||
+ "Enhance clarity and texture while avoiding over-smoothing, warped geometry, or changing important perspective details. "
|
||||
+ "Do not create multiple views. Do not isolate objects."
|
||||
)
|
||||
else:
|
||||
role_clause = (
|
||||
"This is the FIRST frame for an image-to-video clip: create a clear beginning pose and composition. "
|
||||
if req.asset_role == "first_frame"
|
||||
else "This is the LAST frame for an image-to-video clip: create a clear ending pose that can naturally follow the first frame, not a duplicate. "
|
||||
)
|
||||
prompt = (
|
||||
"Create one premium 9:16 high-definition video endpoint frame from text direction. "
|
||||
+ role_clause
|
||||
+ identity_clause
|
||||
+ reference_clause
|
||||
+ user_prompt_clause
|
||||
+ style_clause + " "
|
||||
+ "The frame must feature the same friendly transparent or translucent human character: glass/acrylic/vinyl-like transparent outer body, visible clean white skeleton inside, clean commercial wellness style, non-horror. "
|
||||
+ "Use the references only to understand character identity, proportions, transparent shell, white bones, pose vocabulary, camera language, and lighting; do not copy watermarks, subtitles, platform UI, logos, or accidental artifacts. "
|
||||
+ "Do not create a plain background plate. Do not remove the character. Do not include SKG product unless the user prompt explicitly asks for it. "
|
||||
+ "The output should be ready as a first/last frame for Seedance video generation, with stable composition, believable perspective, clear subject, no text, no watermark, no gore, no medical surgery imagery."
|
||||
)
|
||||
models = [IMAGE_MODEL, "gemini-3.1-flash-image-preview", "gemini-2.5-flash-image"]
|
||||
try:
|
||||
img_bytes, _mode = _image_edit_call(model_src, prompt, models=models, fallback_text=False, max_attempts=3, max_side=1280)
|
||||
if req.asset_role == "scene":
|
||||
img_bytes, _mode = _image_edit_call(model_src, prompt, models=models, fallback_text=False, max_attempts=3, max_side=1280)
|
||||
else:
|
||||
img_bytes, _mode = _image_text_call(prompt, models=models, max_attempts=3)
|
||||
except RuntimeError as e:
|
||||
raise HTTPException(500, f"scene asset failed: {e}")
|
||||
raise HTTPException(500, f"{req.asset_role} asset failed: {e}")
|
||||
finally:
|
||||
if sheet_tmp and sheet_tmp.exists():
|
||||
try: sheet_tmp.unlink()
|
||||
@@ -2873,7 +2940,11 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
|
||||
report = _image_quality_report(out_path)
|
||||
scene = SceneAsset(
|
||||
id=asset_id,
|
||||
label=f"分镜 {idx + 1} 场景图",
|
||||
label=(
|
||||
f"分镜 {idx + 1} 场景图"
|
||||
if req.asset_role == "scene"
|
||||
else f"分镜 {idx + 1} {'首帧' if req.asset_role == 'first_frame' else '尾帧'}"
|
||||
),
|
||||
url=_asset_url(job_id, asset_id),
|
||||
width=width,
|
||||
height=height,
|
||||
@@ -2881,6 +2952,7 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
|
||||
size=req.size,
|
||||
scene_mode=req.scene_mode,
|
||||
scene_style=req.scene_style,
|
||||
asset_role=req.asset_role,
|
||||
quality_report=report,
|
||||
created_at=_time.time(),
|
||||
)
|
||||
@@ -2891,7 +2963,8 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
|
||||
f.quality_report = _image_quality_report(src)
|
||||
f.scene_assets = (f.scene_assets or []) + [scene]
|
||||
new_frames.append(f)
|
||||
update(job, frames=new_frames, message=f"场景图生成完成 · 分镜 {idx + 1}")
|
||||
asset_label = "场景图" if req.asset_role == "scene" else ("首帧" if req.asset_role == "first_frame" else "尾帧")
|
||||
update(job, frames=new_frames, message=f"{asset_label}生成完成 · 分镜 {idx + 1}")
|
||||
return job
|
||||
|
||||
|
||||
|
||||
@@ -2143,13 +2143,13 @@ export function AudioNode({ data, selected }: any) {
|
||||
onTogglePin={() => d.onToggleNodePin?.("audio")}
|
||||
>
|
||||
<div
|
||||
className="space-y-2 text-[11px] text-[var(--text-soft)] leading-snug"
|
||||
className="cursor-pointer space-y-2 text-[11px] text-[var(--text-soft)] leading-snug"
|
||||
onClick={() => {
|
||||
if (job?.video_url) d.onOpenAudioStrip?.(job.id)
|
||||
}}
|
||||
>
|
||||
<div>
|
||||
音轨 → ASR 转录 → 英中翻译 → SKG 口播改写 → MiniMax 配音<br />
|
||||
音轨 → ASR 转录 → 英中翻译 → SKG 英文口播 → MiniMax 英文配音<br />
|
||||
<span className="text-[var(--text-faint)] font-mono">
|
||||
{audioScript?.rewrite_model || "AUDIO_REWRITE_MODEL"} → {audioScript?.voice_model || "MiniMax T2A"}
|
||||
</span>
|
||||
@@ -2186,7 +2186,7 @@ export function AudioNode({ data, selected }: any) {
|
||||
)}
|
||||
{rewrittenText && (
|
||||
<div className="rounded-md border border-emerald-400/25 bg-emerald-400/10 px-2.5 py-2">
|
||||
<div className="mb-1 text-[9.5px] uppercase tracking-widest text-emerald-200/80">改后 · SKG 口播</div>
|
||||
<div className="mb-1 text-[9.5px] uppercase tracking-widest text-emerald-200/80">改后 · SKG English VO</div>
|
||||
<div className="line-clamp-4 text-[11.5px] leading-relaxed text-[var(--text-strong)] break-words">{rewrittenText}</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
@@ -67,6 +67,9 @@ export interface ProductFusionRegion {
|
||||
|
||||
export interface ProductFusionShot {
|
||||
id: string
|
||||
first_image?: ImageRef | null
|
||||
last_image?: ImageRef | null
|
||||
product_images?: ImageRef[]
|
||||
product_image?: ImageRef | null
|
||||
person_image?: ImageRef | null
|
||||
product_region?: ProductFusionRegion | null
|
||||
@@ -218,6 +221,7 @@ export type SubjectKind = "object" | "living"
|
||||
export type SubjectView = string
|
||||
export type SceneMode = "remove_subject" | "similar" | "style"
|
||||
export type SceneStyle = "source" | "premium_product" | "clean_studio" | "warm_lifestyle" | "cinematic"
|
||||
export type SceneAssetRole = "scene" | "first_frame" | "last_frame"
|
||||
|
||||
export interface QualityReport {
|
||||
width: number
|
||||
@@ -251,6 +255,7 @@ export interface SceneAsset {
|
||||
size: AssetSize
|
||||
scene_mode?: SceneMode
|
||||
scene_style?: SceneStyle
|
||||
asset_role?: SceneAssetRole
|
||||
quality_report?: QualityReport | null
|
||||
created_at: number
|
||||
}
|
||||
@@ -794,6 +799,7 @@ export async function generateSceneAsset(
|
||||
size?: AssetSize
|
||||
scene_mode?: SceneMode
|
||||
scene_style?: SceneStyle
|
||||
asset_role?: SceneAssetRole
|
||||
prompt?: string
|
||||
source_frame_indices?: number[]
|
||||
} = {},
|
||||
@@ -806,6 +812,7 @@ export async function generateSceneAsset(
|
||||
size: body.size ?? "source",
|
||||
scene_mode: body.scene_mode ?? "remove_subject",
|
||||
scene_style: body.scene_style ?? "source",
|
||||
asset_role: body.asset_role ?? "scene",
|
||||
prompt: body.prompt ?? "",
|
||||
source_frame_indices: body.source_frame_indices ?? null,
|
||||
}),
|
||||
|
||||
Reference in New Issue
Block a user