auto-save 2026-05-17 21:09 (~4)

This commit is contained in:
2026-05-17 21:09:20 +08:00
parent 096f201470
commit 252cdf441d
4 changed files with 117 additions and 50 deletions

View File

@@ -1,38 +1,5 @@
{
"entries": [
{
"files_changed": 1,
"message": "Codex 会话活跃 · 最近命令codex · 1 项未提交变更 · 最近提交auto-save 2026-05-15 12:29 (~1)",
"ts": "2026-05-15T04:34:45Z",
"type": "session-heartbeat"
},
{
"files_changed": 1,
"hash": "a6466d0",
"message": "auto-save 2026-05-15 12:35 (~1)",
"ts": "2026-05-15T12:35:55+08:00",
"type": "commit"
},
{
"files_changed": 1,
"hash": "fe24202",
"message": "auto-save 2026-05-15 12:41 (~1)",
"ts": "2026-05-15T12:41:49+08:00",
"type": "commit"
},
{
"files_changed": 1,
"message": "Codex 会话活跃 · 最近命令codex · 1 项未提交变更 · 最近提交auto-save 2026-05-15 12:41 (~1)",
"ts": "2026-05-15T04:44:45Z",
"type": "session-heartbeat"
},
{
"files_changed": 1,
"hash": "275b154",
"message": "auto-save 2026-05-15 12:47 (~1)",
"ts": "2026-05-15T12:47:42+08:00",
"type": "commit"
},
{
"files_changed": 1,
"hash": "1cb9861",
@@ -3262,6 +3229,38 @@
"type": "session-heartbeat",
"message": "Codex 会话活跃 · 最近命令codex · 分支 main · 1 项未提交变更 · 最近提交fix: harden product view parsing",
"files_changed": 1
},
{
"ts": "2026-05-17T20:47:53+08:00",
"type": "commit",
"message": "auto-save 2026-05-17 20:47 (~4)",
"hash": "db24822",
"files_changed": 4
},
{
"ts": "2026-05-17T12:48:29Z",
"type": "session-heartbeat",
"message": "Codex 会话活跃 · 最近命令codex · 分支 main · 2 项未提交变更 · 最近提交auto-save 2026-05-17 20:47 (~4)",
"files_changed": 2
},
{
"ts": "2026-05-17T20:52:52+08:00",
"type": "commit",
"message": "feat: add storyboard script rewriting",
"hash": "096f201",
"files_changed": 2
},
{
"ts": "2026-05-17T12:58:29Z",
"type": "session-heartbeat",
"message": "Codex 会话活跃 · 最近命令codex · 分支 main · 1 项未提交变更 · 最近提交feat: add storyboard script rewriting",
"files_changed": 1
},
{
"ts": "2026-05-17T13:08:29Z",
"type": "session-heartbeat",
"message": "Codex 会话活跃 · 最近命令codex · 分支 main · 3 项未提交变更 · 最近提交feat: add storyboard script rewriting",
"files_changed": 3
}
]
}

View File

@@ -1140,9 +1140,10 @@ def _focus_source_for_element(job_id: str, idx: int, el: KeyElement) -> tuple[Pa
return model_src, tmp_focus
def _make_reference_contact_sheet(job_id: str, frame_indices: list[int], out_path: Path) -> Path | None:
def _make_reference_contact_sheet(job_id: str, frame_indices: list[int], out_path: Path, max_items: int = 6) -> Path | None:
paths: list[Path] = []
seen: set[int] = set()
max_items = max(2, min(12, int(max_items or 6)))
for idx in frame_indices:
if idx in seen:
continue
@@ -1150,7 +1151,7 @@ def _make_reference_contact_sheet(job_id: str, frame_indices: list[int], out_pat
p = _source_frame_path(job_id, idx)
if p.exists():
paths.append(p)
if len(paths) >= 6:
if len(paths) >= max_items:
break
if len(paths) <= 1:
return None
@@ -1168,7 +1169,7 @@ def _make_reference_contact_sheet(job_id: str, frame_indices: list[int], out_pat
if len(thumbs) <= 1:
return None
cols = 3 if len(thumbs) > 2 else 2
cols = 4 if len(thumbs) > 6 else (3 if len(thumbs) > 2 else 2)
rows = (len(thumbs) + cols - 1) // cols
sheet = Image.new("RGB", (cols * 420, rows * 420), (245, 245, 245))
for i, thumb in enumerate(thumbs):
@@ -3447,6 +3448,9 @@ class GenerateSubjectAssetsReq(BaseModel):
size: AssetSize = "source"
source_frame_indices: list[int] | None = None
views: list[str] | None = None
subject_style: Literal["transparent_human", "source_actor"] = "transparent_human"
reconstruction_mode: Literal["same", "similar"] = "same"
prompt: str = ""
@app.post("/jobs/{job_id}/frames/{idx}/elements", response_model=Job)
@@ -3834,27 +3838,43 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
source_indices = [int(x) for x in (req.source_frame_indices or [idx]) if isinstance(x, int) or str(x).isdigit()]
if idx not in source_indices:
source_indices = [idx] + source_indices
source_indices = list(dict.fromkeys(source_indices))[:6]
source_indices = list(dict.fromkeys(source_indices))[:12]
model_src, tmp_focus = _focus_source_for_element(job_id, idx, el)
sheet_tmp: Path | None = None
if len(source_indices) > 1:
sheet_tmp = job_dir(job_id) / "tmp" / f"subject_refs_{idx:03d}_{element_id}_{uuid.uuid4().hex[:6]}.jpg"
sheet = _make_reference_contact_sheet(job_id, source_indices, sheet_tmp)
sheet = _make_reference_contact_sheet(job_id, source_indices, sheet_tmp, max_items=12)
if sheet:
model_src = sheet
target = (el.name_en or el.name_zh).strip()
bg_phrase = "pure white" if req.background == "white" else "pure black"
kind_phrase = "person, animal, or living character" if req.subject_kind == "living" else "object or product-like subject"
similar_actor = req.subject_kind == "living" and req.subject_style == "source_actor" and req.reconstruction_mode == "similar"
kind_phrase = "human actor or living character" if req.subject_kind == "living" else "object or product-like subject"
transparent_character_clause = (
TRANSPARENT_HUMAN_POSITIVE_PROMPT
+ " The generated living character must be a friendly transparent humanoid with transparent or translucent outer body and clean white skeleton visible inside the same body. "
+ TRANSPARENT_HUMAN_NEGATIVE_PROMPT
+ " Do not render a normal human, ordinary skeleton-only character, horror skeleton, medical anatomy, organs, veins, blood, corpse, zombie, hospital, surgery, or autopsy visual. "
if req.subject_kind == "living"
if req.subject_kind == "living" and req.subject_style == "transparent_human"
else ""
)
actor_style_clause = (
"Generate a believable normal commercial video actor, not a transparent or skeleton character. "
"Use the references to understand the source video's casting direction, age range, gender presentation, body proportion, wardrobe category, gesture vocabulary, framing, energy, lighting, and creator-ad style. "
"Do not recreate the exact person's face, biometric identity, unique likeness, tattoos, scars, logos, watermarks, captions, or platform UI. "
"The output must be a newly designed similar actor that could play the same role in a new ad, with consistent identity across all views. "
if similar_actor
else ""
)
identity_clause = (
"Create a similar but non-identical original subject: match the performance role, silhouette category, styling direction, camera-readability, and commercial mood, while changing exact identity and unique personal features. "
if req.reconstruction_mode == "similar"
else "Preserve identity, proportions, silhouette, material, colors, styling, and distinctive details across all generated views. "
)
prompt_extra = req.prompt.strip()
prompt_extra_clause = f"User direction: {prompt_extra[:1200]} " if prompt_extra else ""
models = [IMAGE_MODEL, "gemini-3.1-flash-image-preview", "gemini-2.5-flash-image"]
generated: list[SubjectAsset] = []
try:
@@ -3864,17 +3884,19 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
emotion = view_label.replace("表情", "")
view_prompt = f"full-body upright standing character reference with a clear {emotion} facial expression"
elif view.startswith("action_") or view == "side_walk":
view_prompt = f"full-body upright standing character reference, {view_label}, same identity and proportions"
view_prompt = f"full-body upright standing character reference, {view_label}, consistent actor proportions"
else:
view_prompt = f"full-body upright standing character reference, {view_label}"
else:
view_prompt = f"complete object/product reference, {view_label} view"
prompt = (
f"Use the reference image(s) only as visual evidence to redraw the same {target}; do not crop, cut out, paste, or extract pixels from the source. "
f"Generate one newly rendered {view_prompt} of the same subject. "
f"Use the reference image(s) only as visual evidence; do not crop, cut out, paste, trace, or extract pixels from the source. "
f"Generate one newly rendered {view_prompt} for {target}. "
f"The subject is a {kind_phrase}. If multiple frames are shown, treat them as evidence of one same subject, not multiple subjects. "
"Preserve identity, proportions, silhouette, material, colors, styling, and distinctive details across all generated views. "
"The subject must be complete, centered, full body or full object, head-to-feet visible when applicable, not cropped by the canvas. "
+ identity_clause
+ prompt_extra_clause
+ actor_style_clause
+ "The subject must be complete, centered, full body or full object, head-to-feet visible when applicable, not cropped by the canvas. "
"Make the subject large and readable: it should occupy about 85-95% of the image height with only small margins. "
f"Create a high-definition standalone asset on a solid {bg_phrase} background. "
"No extra objects, no props, no additional products, no background elements, no original scene fragments, no shadows from the original scene, no text, no watermark, no UI. "

View File

@@ -19,8 +19,10 @@ import {
type ProductViewAnalysisItem,
type StoryboardScriptRewriteSegment,
type StoryboardScene,
type SubjectAsset,
type SubjectKind,
addElement,
analyzeJob,
analyzeProductViews,
apiAssetUrl,
cutoutElement,
@@ -250,6 +252,23 @@ function guessSubjectKind(name: string): SubjectKind {
: "object"
}
function closestFrameForTime(frames: KeyFrame[], time: number) {
if (!frames.length) return null
const first = frames[0] as KeyFrame
return frames.reduce((best, frame) =>
Math.abs(frame.timestamp - time) < Math.abs(best.timestamp - time) ? frame : best,
first)
}
function isSimilarActorElement(element: KeyElement) {
const name = `${element.name_zh || ""} ${element.name_en || ""}`.toLowerCase()
return name.includes("相似主角") || name.includes("similar ad actor") || name.includes("similar actor")
}
function subjectAssetUrl(job: Job, asset: SubjectAsset) {
return apiAssetUrl(asset.url) || resolveImageRefUrl(job.id, { kind: "asset", frame_idx: 0, element_id: asset.id })
}
function buildFallbackScene(job: Job, frame: KeyFrame, order: number): StoryboardScene {
const frames = [...job.frames].sort((a, b) => a.timestamp - b.timestamp)
const nextFrame = frames.find((item) => item.timestamp > frame.timestamp) ?? null
@@ -816,11 +835,15 @@ export function AdRecreationBoard({
</header>
<div className="min-h-0 flex-1 overflow-y-auto p-4">
<AudioIntakePanel job={job} />
<AudioIntakePanel
job={job}
selectedFrames={data.selectedFrames}
onToggleFrame={data.onToggleFrame}
onJobUpdate={data.onJobUpdate}
/>
<AudioStoryboardPlanPanel
job={job}
onAddFrame={data.onAddManualFrameForJob}
onOpenFrame={data.onOpenFramePanel}
selectedFrames={data.selectedFrames}
onJobUpdate={data.onJobUpdate}
onGenerateVideo={onGenerateVideo}
/>
@@ -951,7 +974,17 @@ function AudioIntakeStatus({ job, audioReady }: { job: Job | null; audioReady: b
)
}
function AudioIntakePanel({ job }: { job: Job | null }) {
function AudioIntakePanel({
job,
selectedFrames,
onToggleFrame,
onJobUpdate,
}: {
job: Job | null
selectedFrames: Set<number>
onToggleFrame: (idx: number) => void
onJobUpdate: (job: Job) => void
}) {
const [currentTime, setCurrentTime] = useState(0)
const [mediaDuration, setMediaDuration] = useState(0)
const [audioFeatures, setAudioFeatures] = useState<AudioFeature[]>([])
@@ -1082,7 +1115,7 @@ function AudioIntakePanel({ job }: { job: Job | null }) {
/>
</div>
<div className="grid gap-2 xl:grid-cols-[230px_minmax(0,1fr)]">
<div className="grid gap-2 xl:grid-cols-[230px_320px_minmax(0,1fr)]">
<div className="min-w-0">
<div className="mb-2 flex items-center justify-between gap-3">
<SectionTitle icon={<Play className="h-4 w-4" />} title="原版视频" />
@@ -1113,6 +1146,13 @@ function AudioIntakePanel({ job }: { job: Job | null }) {
</div>
</div>
<SourceReferenceBuildPanel
job={job}
selectedFrames={selectedFrames}
onToggleFrame={onToggleFrame}
onJobUpdate={onJobUpdate}
/>
<div className="min-w-0">
<div className="mb-2 flex items-center justify-between gap-3">
<SectionTitle icon={<FileText className="h-4 w-4" />} title="逐句时间轴" />

View File

@@ -986,6 +986,9 @@ export async function generateSubjectAssets(
size?: AssetSize
source_frame_indices?: number[]
views?: string[]
subject_style?: "transparent_human" | "source_actor"
reconstruction_mode?: "same" | "similar"
prompt?: string
} = {},
): Promise<Job> {
const res = await fetch(`${API_BASE}/jobs/${jobId}/frames/${frameIdx}/elements/${elementId}/subject-assets`, {
@@ -998,6 +1001,9 @@ export async function generateSubjectAssets(
size: body.size ?? "source",
source_frame_indices: body.source_frame_indices ?? null,
views: body.views ?? null,
subject_style: body.subject_style ?? "transparent_human",
reconstruction_mode: body.reconstruction_mode ?? "same",
prompt: body.prompt ?? "",
}),
})
if (!res.ok) {