auto-save 2026-05-17 21:09 (~4)

2026-05-17 21:09:20 +08:00
parent 096f201470
commit 252cdf441d
4 changed files with 117 additions and 50 deletions
--- a/.memory/worklog.json
+++ b/.memory/worklog.json
@@ -1,38 +1,5 @@
 {
  "entries": [
-    {
-      "files_changed": 1,
-      "message": "Codex 会话活跃 · 最近命令：codex · 1 项未提交变更 · 最近提交：auto-save 2026-05-15 12:29 (~1)",
-      "ts": "2026-05-15T04:34:45Z",
-      "type": "session-heartbeat"
-    },
-    {
-      "files_changed": 1,
-      "hash": "a6466d0",
-      "message": "auto-save 2026-05-15 12:35 (~1)",
-      "ts": "2026-05-15T12:35:55+08:00",
-      "type": "commit"
-    },
-    {
-      "files_changed": 1,
-      "hash": "fe24202",
-      "message": "auto-save 2026-05-15 12:41 (~1)",
-      "ts": "2026-05-15T12:41:49+08:00",
-      "type": "commit"
-    },
-    {
-      "files_changed": 1,
-      "message": "Codex 会话活跃 · 最近命令：codex · 1 项未提交变更 · 最近提交：auto-save 2026-05-15 12:41 (~1)",
-      "ts": "2026-05-15T04:44:45Z",
-      "type": "session-heartbeat"
-    },
-    {
-      "files_changed": 1,
-      "hash": "275b154",
-      "message": "auto-save 2026-05-15 12:47 (~1)",
-      "ts": "2026-05-15T12:47:42+08:00",
-      "type": "commit"
-    },
    {
      "files_changed": 1,
      "hash": "1cb9861",
@@ -3262,6 +3229,38 @@
      "type": "session-heartbeat",
      "message": "Codex 会话活跃 · 最近命令：codex · 分支 main · 1 项未提交变更 · 最近提交：fix: harden product view parsing",
      "files_changed": 1
+    },
+    {
+      "ts": "2026-05-17T20:47:53+08:00",
+      "type": "commit",
+      "message": "auto-save 2026-05-17 20:47 (~4)",
+      "hash": "db24822",
+      "files_changed": 4
+    },
+    {
+      "ts": "2026-05-17T12:48:29Z",
+      "type": "session-heartbeat",
+      "message": "Codex 会话活跃 · 最近命令：codex · 分支 main · 2 项未提交变更 · 最近提交：auto-save 2026-05-17 20:47 (~4)",
+      "files_changed": 2
+    },
+    {
+      "ts": "2026-05-17T20:52:52+08:00",
+      "type": "commit",
+      "message": "feat: add storyboard script rewriting",
+      "hash": "096f201",
+      "files_changed": 2
+    },
+    {
+      "ts": "2026-05-17T12:58:29Z",
+      "type": "session-heartbeat",
+      "message": "Codex 会话活跃 · 最近命令：codex · 分支 main · 1 项未提交变更 · 最近提交：feat: add storyboard script rewriting",
+      "files_changed": 1
+    },
+    {
+      "ts": "2026-05-17T13:08:29Z",
+      "type": "session-heartbeat",
+      "message": "Codex 会话活跃 · 最近命令：codex · 分支 main · 3 项未提交变更 · 最近提交：feat: add storyboard script rewriting",
+      "files_changed": 3
    }
  ]
 }
--- a/api/main.py
+++ b/api/main.py
@@ -1140,9 +1140,10 @@ def _focus_source_for_element(job_id: str, idx: int, el: KeyElement) -> tuple[Pa
    return model_src, tmp_focus


-def _make_reference_contact_sheet(job_id: str, frame_indices: list[int], out_path: Path) -> Path | None:
+def _make_reference_contact_sheet(job_id: str, frame_indices: list[int], out_path: Path, max_items: int = 6) -> Path | None:
    paths: list[Path] = []
    seen: set[int] = set()
+    max_items = max(2, min(12, int(max_items or 6)))
    for idx in frame_indices:
        if idx in seen:
            continue
@@ -1150,7 +1151,7 @@ def _make_reference_contact_sheet(job_id: str, frame_indices: list[int], out_pat
        p = _source_frame_path(job_id, idx)
        if p.exists():
            paths.append(p)
-        if len(paths) >= 6:
+        if len(paths) >= max_items:
            break
    if len(paths) <= 1:
        return None
@@ -1168,7 +1169,7 @@ def _make_reference_contact_sheet(job_id: str, frame_indices: list[int], out_pat
    if len(thumbs) <= 1:
        return None

-    cols = 3 if len(thumbs) > 2 else 2
+    cols = 4 if len(thumbs) > 6 else (3 if len(thumbs) > 2 else 2)
    rows = (len(thumbs) + cols - 1) // cols
    sheet = Image.new("RGB", (cols * 420, rows * 420), (245, 245, 245))
    for i, thumb in enumerate(thumbs):
@@ -3447,6 +3448,9 @@ class GenerateSubjectAssetsReq(BaseModel):
    size: AssetSize = "source"
    source_frame_indices: list[int] | None = None
    views: list[str] | None = None
+    subject_style: Literal["transparent_human", "source_actor"] = "transparent_human"
+    reconstruction_mode: Literal["same", "similar"] = "same"
+    prompt: str = ""


@app.post("/jobs/{job_id}/frames/{idx}/elements", response_model=Job)
@@ -3834,27 +3838,43 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
    source_indices = [int(x) for x in (req.source_frame_indices or [idx]) if isinstance(x, int) or str(x).isdigit()]
    if idx not in source_indices:
        source_indices = [idx] + source_indices
-    source_indices = list(dict.fromkeys(source_indices))[:6]
+    source_indices = list(dict.fromkeys(source_indices))[:12]

    model_src, tmp_focus = _focus_source_for_element(job_id, idx, el)
    sheet_tmp: Path | None = None
    if len(source_indices) > 1:
        sheet_tmp = job_dir(job_id) / "tmp" / f"subject_refs_{idx:03d}_{element_id}_{uuid.uuid4().hex[:6]}.jpg"
-        sheet = _make_reference_contact_sheet(job_id, source_indices, sheet_tmp)
+        sheet = _make_reference_contact_sheet(job_id, source_indices, sheet_tmp, max_items=12)
        if sheet:
            model_src = sheet

    target = (el.name_en or el.name_zh).strip()
    bg_phrase = "pure white" if req.background == "white" else "pure black"
-    kind_phrase = "person, animal, or living character" if req.subject_kind == "living" else "object or product-like subject"
+    similar_actor = req.subject_kind == "living" and req.subject_style == "source_actor" and req.reconstruction_mode == "similar"
+    kind_phrase = "human actor or living character" if req.subject_kind == "living" else "object or product-like subject"
    transparent_character_clause = (
        TRANSPARENT_HUMAN_POSITIVE_PROMPT
        + " The generated living character must be a friendly transparent humanoid with transparent or translucent outer body and clean white skeleton visible inside the same body. "
        + TRANSPARENT_HUMAN_NEGATIVE_PROMPT
        + " Do not render a normal human, ordinary skeleton-only character, horror skeleton, medical anatomy, organs, veins, blood, corpse, zombie, hospital, surgery, or autopsy visual. "
-        if req.subject_kind == "living"
+        if req.subject_kind == "living" and req.subject_style == "transparent_human"
        else ""
    )
+    actor_style_clause = (
+        "Generate a believable normal commercial video actor, not a transparent or skeleton character. "
+        "Use the references to understand the source video's casting direction, age range, gender presentation, body proportion, wardrobe category, gesture vocabulary, framing, energy, lighting, and creator-ad style. "
+        "Do not recreate the exact person's face, biometric identity, unique likeness, tattoos, scars, logos, watermarks, captions, or platform UI. "
+        "The output must be a newly designed similar actor that could play the same role in a new ad, with consistent identity across all views. "
+        if similar_actor
+        else ""
+    )
+    identity_clause = (
+        "Create a similar but non-identical original subject: match the performance role, silhouette category, styling direction, camera-readability, and commercial mood, while changing exact identity and unique personal features. "
+        if req.reconstruction_mode == "similar"
+        else "Preserve identity, proportions, silhouette, material, colors, styling, and distinctive details across all generated views. "
+    )
+    prompt_extra = req.prompt.strip()
+    prompt_extra_clause = f"User direction: {prompt_extra[:1200]} " if prompt_extra else ""
    models = [IMAGE_MODEL, "gemini-3.1-flash-image-preview", "gemini-2.5-flash-image"]
    generated: list[SubjectAsset] = []
    try:
@@ -3864,17 +3884,19 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
                    emotion = view_label.replace("表情", "")
                    view_prompt = f"full-body upright standing character reference with a clear {emotion} facial expression"
                elif view.startswith("action_") or view == "side_walk":
-                    view_prompt = f"full-body upright standing character reference, {view_label}, same identity and proportions"
+                    view_prompt = f"full-body upright standing character reference, {view_label}, consistent actor proportions"
                else:
                    view_prompt = f"full-body upright standing character reference, {view_label}"
            else:
                view_prompt = f"complete object/product reference, {view_label} view"
            prompt = (
-                f"Use the reference image(s) only as visual evidence to redraw the same {target}; do not crop, cut out, paste, or extract pixels from the source. "
-                f"Generate one newly rendered {view_prompt} of the same subject. "
+                f"Use the reference image(s) only as visual evidence; do not crop, cut out, paste, trace, or extract pixels from the source. "
+                f"Generate one newly rendered {view_prompt} for {target}. "
                f"The subject is a {kind_phrase}. If multiple frames are shown, treat them as evidence of one same subject, not multiple subjects. "
-                "Preserve identity, proportions, silhouette, material, colors, styling, and distinctive details across all generated views. "
-                "The subject must be complete, centered, full body or full object, head-to-feet visible when applicable, not cropped by the canvas. "
+                + identity_clause
+                + prompt_extra_clause
+                + actor_style_clause
+                + "The subject must be complete, centered, full body or full object, head-to-feet visible when applicable, not cropped by the canvas. "
                "Make the subject large and readable: it should occupy about 85-95% of the image height with only small margins. "
                f"Create a high-definition standalone asset on a solid {bg_phrase} background. "
                "No extra objects, no props, no additional products, no background elements, no original scene fragments, no shadows from the original scene, no text, no watermark, no UI. "
--- a/web/components/ad-recreation-board.tsx
+++ b/web/components/ad-recreation-board.tsx
@@ -19,8 +19,10 @@ import {
  type ProductViewAnalysisItem,
  type StoryboardScriptRewriteSegment,
  type StoryboardScene,
+  type SubjectAsset,
  type SubjectKind,
  addElement,
+  analyzeJob,
  analyzeProductViews,
  apiAssetUrl,
  cutoutElement,
@@ -250,6 +252,23 @@ function guessSubjectKind(name: string): SubjectKind {
    : "object"
 }

+function closestFrameForTime(frames: KeyFrame[], time: number) {
+  if (!frames.length) return null
+  const first = frames[0] as KeyFrame
+  return frames.reduce((best, frame) =>
+    Math.abs(frame.timestamp - time) < Math.abs(best.timestamp - time) ? frame : best,
+  first)
+}
+
+function isSimilarActorElement(element: KeyElement) {
+  const name = `${element.name_zh || ""} ${element.name_en || ""}`.toLowerCase()
+  return name.includes("相似主角") || name.includes("similar ad actor") || name.includes("similar actor")
+}
+
+function subjectAssetUrl(job: Job, asset: SubjectAsset) {
+  return apiAssetUrl(asset.url) || resolveImageRefUrl(job.id, { kind: "asset", frame_idx: 0, element_id: asset.id })
+}
+
 function buildFallbackScene(job: Job, frame: KeyFrame, order: number): StoryboardScene {
  const frames = [...job.frames].sort((a, b) => a.timestamp - b.timestamp)
  const nextFrame = frames.find((item) => item.timestamp > frame.timestamp) ?? null
@@ -816,11 +835,15 @@ export function AdRecreationBoard({
            </header>

            <div className="min-h-0 flex-1 overflow-y-auto p-4">
-              <AudioIntakePanel job={job} />
+              <AudioIntakePanel
+                job={job}
+                selectedFrames={data.selectedFrames}
+                onToggleFrame={data.onToggleFrame}
+                onJobUpdate={data.onJobUpdate}
+              />
              <AudioStoryboardPlanPanel
                job={job}
-                onAddFrame={data.onAddManualFrameForJob}
-                onOpenFrame={data.onOpenFramePanel}
+                selectedFrames={data.selectedFrames}
                onJobUpdate={data.onJobUpdate}
                onGenerateVideo={onGenerateVideo}
              />
@@ -951,7 +974,17 @@ function AudioIntakeStatus({ job, audioReady }: { job: Job | null; audioReady: b
  )
 }

-function AudioIntakePanel({ job }: { job: Job | null }) {
+function AudioIntakePanel({
+  job,
+  selectedFrames,
+  onToggleFrame,
+  onJobUpdate,
+}: {
+  job: Job | null
+  selectedFrames: Set<number>
+  onToggleFrame: (idx: number) => void
+  onJobUpdate: (job: Job) => void
+}) {
  const [currentTime, setCurrentTime] = useState(0)
  const [mediaDuration, setMediaDuration] = useState(0)
  const [audioFeatures, setAudioFeatures] = useState<AudioFeature[]>([])
@@ -1082,7 +1115,7 @@ function AudioIntakePanel({ job }: { job: Job | null }) {
          />
        </div>

-        <div className="grid gap-2 xl:grid-cols-[230px_minmax(0,1fr)]">
+        <div className="grid gap-2 xl:grid-cols-[230px_320px_minmax(0,1fr)]">
          <div className="min-w-0">
            <div className="mb-2 flex items-center justify-between gap-3">
              <SectionTitle icon={<Play className="h-4 w-4" />} title="原版视频" />
@@ -1113,6 +1146,13 @@ function AudioIntakePanel({ job }: { job: Job | null }) {
            </div>
          </div>

+          <SourceReferenceBuildPanel
+            job={job}
+            selectedFrames={selectedFrames}
+            onToggleFrame={onToggleFrame}
+            onJobUpdate={onJobUpdate}
+          />
+
          <div className="min-w-0">
            <div className="mb-2 flex items-center justify-between gap-3">
              <SectionTitle icon={<FileText className="h-4 w-4" />} title="逐句时间轴" />
--- a/web/lib/api.ts
+++ b/web/lib/api.ts
@@ -986,6 +986,9 @@ export async function generateSubjectAssets(
    size?: AssetSize
    source_frame_indices?: number[]
    views?: string[]
+    subject_style?: "transparent_human" | "source_actor"
+    reconstruction_mode?: "same" | "similar"
+    prompt?: string
  } = {},
 ): Promise<Job> {
  const res = await fetch(`${API_BASE}/jobs/${jobId}/frames/${frameIdx}/elements/${elementId}/subject-assets`, {
@@ -998,6 +1001,9 @@ export async function generateSubjectAssets(
      size: body.size ?? "source",
      source_frame_indices: body.source_frame_indices ?? null,
      views: body.views ?? null,
+      subject_style: body.subject_style ?? "transparent_human",
+      reconstruction_mode: body.reconstruction_mode ?? "same",
+      prompt: body.prompt ?? "",
    }),
  })
  if (!res.ok) {