From 252cdf441d82b2c0b2c46bf3b4f1c695e2c70f52 Mon Sep 17 00:00:00 2001 From: kang Date: Sun, 17 May 2026 21:09:20 +0800 Subject: [PATCH] auto-save 2026-05-17 21:09 (~4) --- .memory/worklog.json | 65 +++++++++++++------------- api/main.py | 46 +++++++++++++----- web/components/ad-recreation-board.tsx | 50 ++++++++++++++++++-- web/lib/api.ts | 6 +++ 4 files changed, 117 insertions(+), 50 deletions(-) diff --git a/.memory/worklog.json b/.memory/worklog.json index 736aa75..c1e9993 100644 --- a/.memory/worklog.json +++ b/.memory/worklog.json @@ -1,38 +1,5 @@ { "entries": [ - { - "files_changed": 1, - "message": "Codex 会话活跃 · 最近命令:codex · 1 项未提交变更 · 最近提交:auto-save 2026-05-15 12:29 (~1)", - "ts": "2026-05-15T04:34:45Z", - "type": "session-heartbeat" - }, - { - "files_changed": 1, - "hash": "a6466d0", - "message": "auto-save 2026-05-15 12:35 (~1)", - "ts": "2026-05-15T12:35:55+08:00", - "type": "commit" - }, - { - "files_changed": 1, - "hash": "fe24202", - "message": "auto-save 2026-05-15 12:41 (~1)", - "ts": "2026-05-15T12:41:49+08:00", - "type": "commit" - }, - { - "files_changed": 1, - "message": "Codex 会话活跃 · 最近命令:codex · 1 项未提交变更 · 最近提交:auto-save 2026-05-15 12:41 (~1)", - "ts": "2026-05-15T04:44:45Z", - "type": "session-heartbeat" - }, - { - "files_changed": 1, - "hash": "275b154", - "message": "auto-save 2026-05-15 12:47 (~1)", - "ts": "2026-05-15T12:47:42+08:00", - "type": "commit" - }, { "files_changed": 1, "hash": "1cb9861", @@ -3262,6 +3229,38 @@ "type": "session-heartbeat", "message": "Codex 会话活跃 · 最近命令:codex · 分支 main · 1 项未提交变更 · 最近提交:fix: harden product view parsing", "files_changed": 1 + }, + { + "ts": "2026-05-17T20:47:53+08:00", + "type": "commit", + "message": "auto-save 2026-05-17 20:47 (~4)", + "hash": "db24822", + "files_changed": 4 + }, + { + "ts": "2026-05-17T12:48:29Z", + "type": "session-heartbeat", + "message": "Codex 会话活跃 · 最近命令:codex · 分支 main · 2 项未提交变更 · 最近提交:auto-save 2026-05-17 20:47 (~4)", + "files_changed": 2 + }, + { + "ts": "2026-05-17T20:52:52+08:00", + "type": "commit", + "message": "feat: add storyboard script rewriting", + "hash": "096f201", + "files_changed": 2 + }, + { + "ts": "2026-05-17T12:58:29Z", + "type": "session-heartbeat", + "message": "Codex 会话活跃 · 最近命令:codex · 分支 main · 1 项未提交变更 · 最近提交:feat: add storyboard script rewriting", + "files_changed": 1 + }, + { + "ts": "2026-05-17T13:08:29Z", + "type": "session-heartbeat", + "message": "Codex 会话活跃 · 最近命令:codex · 分支 main · 3 项未提交变更 · 最近提交:feat: add storyboard script rewriting", + "files_changed": 3 } ] } diff --git a/api/main.py b/api/main.py index be364d8..998573c 100644 --- a/api/main.py +++ b/api/main.py @@ -1140,9 +1140,10 @@ def _focus_source_for_element(job_id: str, idx: int, el: KeyElement) -> tuple[Pa return model_src, tmp_focus -def _make_reference_contact_sheet(job_id: str, frame_indices: list[int], out_path: Path) -> Path | None: +def _make_reference_contact_sheet(job_id: str, frame_indices: list[int], out_path: Path, max_items: int = 6) -> Path | None: paths: list[Path] = [] seen: set[int] = set() + max_items = max(2, min(12, int(max_items or 6))) for idx in frame_indices: if idx in seen: continue @@ -1150,7 +1151,7 @@ def _make_reference_contact_sheet(job_id: str, frame_indices: list[int], out_pat p = _source_frame_path(job_id, idx) if p.exists(): paths.append(p) - if len(paths) >= 6: + if len(paths) >= max_items: break if len(paths) <= 1: return None @@ -1168,7 +1169,7 @@ def _make_reference_contact_sheet(job_id: str, frame_indices: list[int], out_pat if len(thumbs) <= 1: return None - cols = 3 if len(thumbs) > 2 else 2 + cols = 4 if len(thumbs) > 6 else (3 if len(thumbs) > 2 else 2) rows = (len(thumbs) + cols - 1) // cols sheet = Image.new("RGB", (cols * 420, rows * 420), (245, 245, 245)) for i, thumb in enumerate(thumbs): @@ -3447,6 +3448,9 @@ class GenerateSubjectAssetsReq(BaseModel): size: AssetSize = "source" source_frame_indices: list[int] | None = None views: list[str] | None = None + subject_style: Literal["transparent_human", "source_actor"] = "transparent_human" + reconstruction_mode: Literal["same", "similar"] = "same" + prompt: str = "" @app.post("/jobs/{job_id}/frames/{idx}/elements", response_model=Job) @@ -3834,27 +3838,43 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat source_indices = [int(x) for x in (req.source_frame_indices or [idx]) if isinstance(x, int) or str(x).isdigit()] if idx not in source_indices: source_indices = [idx] + source_indices - source_indices = list(dict.fromkeys(source_indices))[:6] + source_indices = list(dict.fromkeys(source_indices))[:12] model_src, tmp_focus = _focus_source_for_element(job_id, idx, el) sheet_tmp: Path | None = None if len(source_indices) > 1: sheet_tmp = job_dir(job_id) / "tmp" / f"subject_refs_{idx:03d}_{element_id}_{uuid.uuid4().hex[:6]}.jpg" - sheet = _make_reference_contact_sheet(job_id, source_indices, sheet_tmp) + sheet = _make_reference_contact_sheet(job_id, source_indices, sheet_tmp, max_items=12) if sheet: model_src = sheet target = (el.name_en or el.name_zh).strip() bg_phrase = "pure white" if req.background == "white" else "pure black" - kind_phrase = "person, animal, or living character" if req.subject_kind == "living" else "object or product-like subject" + similar_actor = req.subject_kind == "living" and req.subject_style == "source_actor" and req.reconstruction_mode == "similar" + kind_phrase = "human actor or living character" if req.subject_kind == "living" else "object or product-like subject" transparent_character_clause = ( TRANSPARENT_HUMAN_POSITIVE_PROMPT + " The generated living character must be a friendly transparent humanoid with transparent or translucent outer body and clean white skeleton visible inside the same body. " + TRANSPARENT_HUMAN_NEGATIVE_PROMPT + " Do not render a normal human, ordinary skeleton-only character, horror skeleton, medical anatomy, organs, veins, blood, corpse, zombie, hospital, surgery, or autopsy visual. " - if req.subject_kind == "living" + if req.subject_kind == "living" and req.subject_style == "transparent_human" else "" ) + actor_style_clause = ( + "Generate a believable normal commercial video actor, not a transparent or skeleton character. " + "Use the references to understand the source video's casting direction, age range, gender presentation, body proportion, wardrobe category, gesture vocabulary, framing, energy, lighting, and creator-ad style. " + "Do not recreate the exact person's face, biometric identity, unique likeness, tattoos, scars, logos, watermarks, captions, or platform UI. " + "The output must be a newly designed similar actor that could play the same role in a new ad, with consistent identity across all views. " + if similar_actor + else "" + ) + identity_clause = ( + "Create a similar but non-identical original subject: match the performance role, silhouette category, styling direction, camera-readability, and commercial mood, while changing exact identity and unique personal features. " + if req.reconstruction_mode == "similar" + else "Preserve identity, proportions, silhouette, material, colors, styling, and distinctive details across all generated views. " + ) + prompt_extra = req.prompt.strip() + prompt_extra_clause = f"User direction: {prompt_extra[:1200]} " if prompt_extra else "" models = [IMAGE_MODEL, "gemini-3.1-flash-image-preview", "gemini-2.5-flash-image"] generated: list[SubjectAsset] = [] try: @@ -3864,17 +3884,19 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat emotion = view_label.replace("表情", "") view_prompt = f"full-body upright standing character reference with a clear {emotion} facial expression" elif view.startswith("action_") or view == "side_walk": - view_prompt = f"full-body upright standing character reference, {view_label}, same identity and proportions" + view_prompt = f"full-body upright standing character reference, {view_label}, consistent actor proportions" else: view_prompt = f"full-body upright standing character reference, {view_label}" else: view_prompt = f"complete object/product reference, {view_label} view" prompt = ( - f"Use the reference image(s) only as visual evidence to redraw the same {target}; do not crop, cut out, paste, or extract pixels from the source. " - f"Generate one newly rendered {view_prompt} of the same subject. " + f"Use the reference image(s) only as visual evidence; do not crop, cut out, paste, trace, or extract pixels from the source. " + f"Generate one newly rendered {view_prompt} for {target}. " f"The subject is a {kind_phrase}. If multiple frames are shown, treat them as evidence of one same subject, not multiple subjects. " - "Preserve identity, proportions, silhouette, material, colors, styling, and distinctive details across all generated views. " - "The subject must be complete, centered, full body or full object, head-to-feet visible when applicable, not cropped by the canvas. " + + identity_clause + + prompt_extra_clause + + actor_style_clause + + "The subject must be complete, centered, full body or full object, head-to-feet visible when applicable, not cropped by the canvas. " "Make the subject large and readable: it should occupy about 85-95% of the image height with only small margins. " f"Create a high-definition standalone asset on a solid {bg_phrase} background. " "No extra objects, no props, no additional products, no background elements, no original scene fragments, no shadows from the original scene, no text, no watermark, no UI. " diff --git a/web/components/ad-recreation-board.tsx b/web/components/ad-recreation-board.tsx index 45ae852..91dbe26 100644 --- a/web/components/ad-recreation-board.tsx +++ b/web/components/ad-recreation-board.tsx @@ -19,8 +19,10 @@ import { type ProductViewAnalysisItem, type StoryboardScriptRewriteSegment, type StoryboardScene, + type SubjectAsset, type SubjectKind, addElement, + analyzeJob, analyzeProductViews, apiAssetUrl, cutoutElement, @@ -250,6 +252,23 @@ function guessSubjectKind(name: string): SubjectKind { : "object" } +function closestFrameForTime(frames: KeyFrame[], time: number) { + if (!frames.length) return null + const first = frames[0] as KeyFrame + return frames.reduce((best, frame) => + Math.abs(frame.timestamp - time) < Math.abs(best.timestamp - time) ? frame : best, + first) +} + +function isSimilarActorElement(element: KeyElement) { + const name = `${element.name_zh || ""} ${element.name_en || ""}`.toLowerCase() + return name.includes("相似主角") || name.includes("similar ad actor") || name.includes("similar actor") +} + +function subjectAssetUrl(job: Job, asset: SubjectAsset) { + return apiAssetUrl(asset.url) || resolveImageRefUrl(job.id, { kind: "asset", frame_idx: 0, element_id: asset.id }) +} + function buildFallbackScene(job: Job, frame: KeyFrame, order: number): StoryboardScene { const frames = [...job.frames].sort((a, b) => a.timestamp - b.timestamp) const nextFrame = frames.find((item) => item.timestamp > frame.timestamp) ?? null @@ -816,11 +835,15 @@ export function AdRecreationBoard({
- + @@ -951,7 +974,17 @@ function AudioIntakeStatus({ job, audioReady }: { job: Job | null; audioReady: b ) } -function AudioIntakePanel({ job }: { job: Job | null }) { +function AudioIntakePanel({ + job, + selectedFrames, + onToggleFrame, + onJobUpdate, +}: { + job: Job | null + selectedFrames: Set + onToggleFrame: (idx: number) => void + onJobUpdate: (job: Job) => void +}) { const [currentTime, setCurrentTime] = useState(0) const [mediaDuration, setMediaDuration] = useState(0) const [audioFeatures, setAudioFeatures] = useState([]) @@ -1082,7 +1115,7 @@ function AudioIntakePanel({ job }: { job: Job | null }) { />
-
+
} title="原版视频" /> @@ -1113,6 +1146,13 @@ function AudioIntakePanel({ job }: { job: Job | null }) {
+ +
} title="逐句时间轴" /> diff --git a/web/lib/api.ts b/web/lib/api.ts index cb00bcc..b913806 100644 --- a/web/lib/api.ts +++ b/web/lib/api.ts @@ -986,6 +986,9 @@ export async function generateSubjectAssets( size?: AssetSize source_frame_indices?: number[] views?: string[] + subject_style?: "transparent_human" | "source_actor" + reconstruction_mode?: "same" | "similar" + prompt?: string } = {}, ): Promise { const res = await fetch(`${API_BASE}/jobs/${jobId}/frames/${frameIdx}/elements/${elementId}/subject-assets`, { @@ -998,6 +1001,9 @@ export async function generateSubjectAssets( size: body.size ?? "source", source_frame_indices: body.source_frame_indices ?? null, views: body.views ?? null, + subject_style: body.subject_style ?? "transparent_human", + reconstruction_mode: body.reconstruction_mode ?? "same", + prompt: body.prompt ?? "", }), }) if (!res.ok) {