auto-save 2026-05-13 21:24 (~6)

2026-05-13 21:24:32 +08:00
parent d4eb18e5f8
commit 2befdf4e40
6 changed files with 81 additions and 17 deletions
--- a/.memory/worklog.json
+++ b/.memory/worklog.json
@@ -2408,6 +2408,19 @@
      "message": "auto-save 2026-05-13 21:13 (~3)",
      "hash": "a8b752b",
      "files_changed": 3
+    },
+    {
+      "ts": "2026-05-13T21:19:00+08:00",
+      "type": "commit",
+      "message": "auto-save 2026-05-13 21:18 (~2)",
+      "hash": "d4eb18e",
+      "files_changed": 2
+    },
+    {
+      "ts": "2026-05-13T13:19:30Z",
+      "type": "session-heartbeat",
+      "message": "Codex 会话活跃 · 最近命令：codex · 3 项未提交变更 · 最近提交：auto-save 2026-05-13 21:18 (~2)",
+      "files_changed": 3
    }
  ]
 }
--- a/api/main.py
+++ b/api/main.py
@@ -115,6 +115,11 @@ class GeneratedVideo(BaseModel):
    created_at: float = 0.0


+class VideoSourceRef(BaseModel):
+    kind: Literal["image", "source_video"] = "image"
+    url: str = ""
+
+
 class StoryboardScene(BaseModel):
    """分镜头编排：每个 selected 分镜对应一个 scene 描述
    v2: 4 图槽 + 时长（复制粘贴模式）— 主体 / 场景 / 产品 / 动作 各一张图
@@ -265,6 +270,9 @@ def storyboard_ref_path(job_id: str, ref: dict | None) -> Path | None:
    except Exception:
        return None
    if kind == "keyframe":
+        clean = job_dir(job_id) / "cleaned" / f"{frame_idx:03d}.jpg"
+        if clean.exists():
+            return clean
        p = job_dir(job_id) / "frames" / f"{frame_idx:03d}.jpg"
        return p if p.exists() else None
    if kind == "cutout":
@@ -1643,6 +1651,7 @@ class GenerateStoryboardVideoReq(BaseModel):
    scene_image: dict | None = None
    product_image: dict | None = None
    action_image: dict | None = None
+    source_ref: VideoSourceRef | None = None
    model: str = ""
    size: str = "720x1280"

@@ -1749,18 +1758,27 @@ def ark_reference_data_url(ref_img: Path) -> str:
    return f"data:{mime};base64,{base64.b64encode(ref_img.read_bytes()).decode('ascii')}"


-def submit_video_create(client, url: str, headers: dict, ref_img: Path, payload: dict):
+def submit_video_create(client, url: str, headers: dict, ref_img: Path, payload: dict, source_ref: VideoSourceRef | None = None):
    if video_uses_ark():
+        content = [{"type": "text", "text": payload["prompt"]}]
+        if source_ref and source_ref.kind == "source_video" and source_ref.url:
+            content.append(
+                {
+                    "type": "video_url",
+                    "video_url": {"url": source_ref.url},
+                    "role": "reference_video",
+                }
+            )
+        content.append(
+            {
+                "type": "image_url",
+                "image_url": {"url": ark_reference_data_url(ref_img)},
+                "role": "first_frame",
+            }
+        )
        data = {
            "model": payload["model"],
-            "content": [
-                {"type": "text", "text": payload["prompt"]},
-                {
-                    "type": "image_url",
-                    "image_url": {"url": ark_reference_data_url(ref_img)},
-                    "role": "first_frame",
-                },
-            ],
+            "content": content,
            "ratio": size_to_video_ratio(str(payload.get("size", ""))),
            "duration": int(float(str(payload.get(VIDEO_DURATION_FIELD, 5)))),
            "watermark": False,
@@ -1783,7 +1801,7 @@ def submit_video_create(client, url: str, headers: dict, ref_img: Path, payload:
        )


-def render_storyboard_video(job_id: str, local_id: str, provider_id: str, ref_path: Path, prompt: str, model: str, seconds: str, size: str) -> None:
+def render_storyboard_video(job_id: str, local_id: str, provider_id: str, ref_path: Path, prompt: str, model: str, seconds: str, size: str, source_ref: VideoSourceRef | None = None) -> None:
    import httpx

    out_dir = job_dir(job_id) / "storyboard_videos" / local_id
@@ -1801,7 +1819,10 @@ def render_storyboard_video(job_id: str, local_id: str, provider_id: str, ref_pa
            create = None
            create_errors: list[str] = []
            for create_path in VIDEO_CREATE_PATHS:
-                resp = submit_video_create(client, f"{base}{video_path(create_path)}", headers, ref_img, payload)
+                resp = submit_video_create(client, f"{base}{video_path(create_path)}", headers, ref_img, payload, source_ref)
+                if video_uses_ark() and source_ref and resp.status_code in {400, 422}:
+                    create_errors.append(f"{video_path(create_path)} + reference_video -> HTTP {resp.status_code}: {resp.text[:160]}")
+                    resp = submit_video_create(client, f"{base}{video_path(create_path)}", headers, ref_img, payload, None)
                if resp.status_code < 400:
                    create = resp
                    break
@@ -1881,7 +1902,10 @@ def generate_storyboard_video(job_id: str, idx: int, req: GenerateStoryboardVide
        created_at=time.time(),
    )
    update(job, generated_videos=[item] + job.generated_videos, message=f"视频生成已提交 · 分镜 {idx + 1}")
-    bg.add_task(render_storyboard_video, job_id, local_id, "", ref_path, prompt, model, seconds, req.size)
+    source_ref = req.source_ref
+    if source_ref and source_ref.kind == "source_video" and not source_ref.url:
+        source_ref = None
+    bg.add_task(render_storyboard_video, job_id, local_id, "", ref_path, prompt, model, seconds, req.size, source_ref)
    return job


--- a/docs/source-analysis.html
+++ b/docs/source-analysis.html
@@ -830,6 +830,30 @@ api/main.py
        <h2>变更记录</h2>
        <p>这个记录不是 git log 的替代品。它记录“产品理解发生了什么变化、影响了哪些源码、你以后描述需求时该怎么说”。后续每次改功能都要补一条。</p>
        <div class="changelog">
+          <article class="change">
+            <header>
+              <h3>2026-05-13 · 生视频携带原视频链接做节奏参考</h3>
+              <span class="tag violet">StoryboardWorkbench</span>
+              <span class="tag blue">API</span>
+            </header>
+            <div class="body">
+              <p><strong>问题：</strong>用户赶交付，希望直接把上传的原视频链接给视频模型参考，而不是只靠单张关键帧。</p>
+              <p><strong>改动：</strong>前端提交生视频时增加 <code>source_ref: { kind: "source_video", url: job.url }</code>；Ark 请求体在文本 prompt 和首帧之外追加 <code>video_url</code> 参考视频，用于模仿节奏、镜头运动和动作顺序。如果 Ark 返回 400/422 不接受参考视频字段，后端自动回退到“当前关键帧首帧生成”，保证这次不会直接阻断出片。</p>
+              <p><strong>影响：</strong><code>web/app/page.tsx</code>、<code>web/lib/api.ts</code>、<code>api/main.py</code>、<code>docs/source-analysis.html</code>。</p>
+            </div>
+          </article>
+          <article class="change">
+            <header>
+              <h3>2026-05-13 · 快速出片改为关键帧直生视频</h3>
+              <span class="tag violet">StoryboardWorkbench</span>
+              <span class="tag blue">Prompt</span>
+            </header>
+            <div class="body">
+              <p><strong>问题：</strong>赶交付时不适合再让 4 图槽决定首帧；如果某个槽里是抠图元素，模型会拿碎元素当第一帧，视频容易不连贯。</p>
+              <p><strong>改动：</strong>“生成视频”按钮改成直接用当前分镜关键帧作为首帧提交，4 图槽和改造目标只作为提示词参考；提示词强调一镜到底、首帧稳定、时间线连续、禁止跳切/换场景/主体变形。后端取关键帧时优先使用未应用的清洗版，否则使用当前 frame 文件。</p>
+              <p><strong>影响：</strong><code>web/app/page.tsx</code>、<code>web/components/storyboard-workbench.tsx</code>、<code>api/main.py</code>。</p>
+            </div>
+          </article>
          <article class="change">
            <header>
              <h3>2026-05-13 · 生视频支持火山方舟 Ark 异步任务</h3>
--- a/web/app/page.tsx
+++ b/web/app/page.tsx
@@ -250,6 +250,7 @@ export default function Home() {
      `竖屏 9:16，${duration.toFixed(1)} 秒，SKG 产品短视频广告。`,
      "直接根据当前分镜关键帧生成视频。必须使用输入的完整视频关键帧作为第一帧和视觉锚点：第一帧构图、主体位置、透视关系和光线方向保持稳定，然后从这一帧自然动起来。",
      "生成一段单镜头连续视频，一镜到底，不要跳切，不要突然换场景，不要突然换主体，不要蒙太奇，不要多镜头拼接。",
+      "如果提供了原视频链接，把它只作为节奏、镜头运动、动作顺序和画面调度参考；不要照搬原视频里的品牌、文字、水印、竞品产品或具体人物。",
      "时间线：0%-25% 保持首帧构图并轻微启动；25%-70% 做一个清晰、缓慢、可信的产品展示动作；70%-100% 镜头自然停稳在 SKG 产品或使用效果特写。",
      `主体改造：${subjectDirection}`,
      `产品替换：${productDirection}`,
@@ -274,6 +275,7 @@ export default function Home() {
        frame_idx: frameIdx,
        label: `分镜 ${frameIdx + 1} 关键帧`,
      }
+      const sourceUrl = job.url?.trim()
      const updated = await generateStoryboardVideo(job.id, frameIdx, {
        prompt,
        duration,
@@ -281,6 +283,7 @@ export default function Home() {
        scene_image: null,
        product_image: null,
        action_image: null,
+        source_ref: sourceUrl ? { kind: "source_video", url: sourceUrl } : null,
        model,
        size: "720x1280",
      })
--- a/web/components/storyboard-workbench.tsx
+++ b/web/components/storyboard-workbench.tsx
@@ -122,7 +122,6 @@ export function StoryboardWorkbench({ job, selectedFrames, open, onClose, onJobU
    window.addEventListener("pointerup", onUp)
  }

-  const hasVideoRefs = !!(form.subject_image || form.scene_image || form.product_image || form.action_image)
  const currentModelLabel = VIDEO_MODELS.find((m) => m.value === videoModel)?.label ?? "Seedance"

  return (
@@ -303,7 +302,7 @@ export function StoryboardWorkbench({ job, selectedFrames, open, onClose, onJobU
                  </div>
                </div>
                <button
-                  disabled={!hasVideoRefs || focusedIdx === null || generating}
+                  disabled={focusedIdx === null || generating}
                  onClick={async () => {
                    if (focusedIdx === null) return
                    queueSave(form)
@@ -315,13 +314,13 @@ export function StoryboardWorkbench({ job, selectedFrames, open, onClose, onJobU
                    }
                  }}
                  className="w-full py-3 rounded-lg text-[13.5px] font-semibold inline-flex items-center justify-center gap-2 bg-gradient-to-r from-rose-500 to-violet-500 text-white border border-violet-300/40 shadow-lg shadow-violet-500/20 hover:from-rose-400 hover:to-violet-400 disabled:opacity-40 disabled:cursor-not-allowed"
-                  title={hasVideoRefs ? `调用 ${currentModelLabel} 生视频 API，结果进入 Video Gen 节点` : "先粘贴至少一张参考图"}
+                  title={`用当前分镜关键帧作为首帧，调用 ${currentModelLabel} 生视频 API`}
                >
                  {generating ? <Loader2 className="h-4 w-4 animate-spin" /> : <Wand2 className="h-4 w-4" />}
-                  调用 {currentModelLabel} 生成视频
+                  用当前关键帧生成视频
                </button>
                <div className="mt-2 text-[10.5px] text-white/35 leading-relaxed">
-                  用当前 4 图槽、改造目标和时长提交生视频 API；生成中的进度和完成后的 MP4 会显示在 Video Gen 节点。
+                  直接用当前分镜关键帧作为首帧快速出片；4 图槽和改造目标只作为提示词参考，生成中的进度和完成后的 MP4 会显示在 Video Gen 节点。
                </div>
              </section>
          </div>
--- a/web/lib/api.ts
+++ b/web/lib/api.ts
@@ -382,6 +382,7 @@ export async function generateStoryboardVideo(
    scene_image?: ImageRef | null
    product_image?: ImageRef | null
    action_image?: ImageRef | null
+    source_ref?: { kind: "image" | "source_video"; url: string } | null
    model?: string
    size?: string
  },