auto-save 2026-05-13 21:29 (~7)

This commit is contained in:
2026-05-13 21:30:04 +08:00
parent 2befdf4e40
commit 7b59ed9bf1
7 changed files with 123 additions and 159 deletions

View File

@@ -2421,6 +2421,19 @@
"type": "session-heartbeat",
"message": "Codex 会话活跃 · 最近命令codex · 3 项未提交变更 · 最近提交auto-save 2026-05-13 21:18 (~2)",
"files_changed": 3
},
{
"ts": "2026-05-13T21:24:32+08:00",
"type": "commit",
"message": "auto-save 2026-05-13 21:24 (~6)",
"hash": "2befdf4",
"files_changed": 6
},
{
"ts": "2026-05-13T13:29:31Z",
"type": "session-heartbeat",
"message": "Codex 会话活跃 · 最近命令codex · 7 项未提交变更 · 最近提交auto-save 2026-05-13 21:24 (~6)",
"files_changed": 7
}
]
}

View File

@@ -125,6 +125,8 @@ class StoryboardScene(BaseModel):
v2: 4 图槽 + 时长(复制粘贴模式)— 主体 / 场景 / 产品 / 动作 各一张图
v1 字段保留兼容subject/product/scene/action/reference_ids"""
duration: float = 0
first_image: dict | None = None
last_image: dict | None = None
# 4 图槽dict 含 {kind, frame_idx, element_id?, cutout_id?, label}
subject_image: dict | None = None
scene_image: dict | None = None
@@ -1647,6 +1649,8 @@ class UpdateStoryboardReq(BaseModel):
class GenerateStoryboardVideoReq(BaseModel):
prompt: str
duration: float = 4
first_image: dict | None = None
last_image: dict | None = None
subject_image: dict | None = None
scene_image: dict | None = None
product_image: dict | None = None
@@ -1758,7 +1762,15 @@ def ark_reference_data_url(ref_img: Path) -> str:
return f"data:{mime};base64,{base64.b64encode(ref_img.read_bytes()).decode('ascii')}"
def submit_video_create(client, url: str, headers: dict, ref_img: Path, payload: dict, source_ref: VideoSourceRef | None = None):
def submit_video_create(
client,
url: str,
headers: dict,
ref_img: Path,
payload: dict,
source_ref: VideoSourceRef | None = None,
last_img: Path | None = None,
):
if video_uses_ark():
content = [{"type": "text", "text": payload["prompt"]}]
if source_ref and source_ref.kind == "source_video" and source_ref.url:
@@ -1776,6 +1788,14 @@ def submit_video_create(client, url: str, headers: dict, ref_img: Path, payload:
"role": "first_frame",
}
)
if last_img and last_img.exists():
content.append(
{
"type": "image_url",
"image_url": {"url": ark_reference_data_url(last_img)},
"role": "last_frame",
}
)
data = {
"model": payload["model"],
"content": content,
@@ -1801,17 +1821,33 @@ def submit_video_create(client, url: str, headers: dict, ref_img: Path, payload:
)
def render_storyboard_video(job_id: str, local_id: str, provider_id: str, ref_path: Path, prompt: str, model: str, seconds: str, size: str, source_ref: VideoSourceRef | None = None) -> None:
def render_storyboard_video(
job_id: str,
local_id: str,
provider_id: str,
ref_path: Path,
prompt: str,
model: str,
seconds: str,
size: str,
source_ref: VideoSourceRef | None = None,
last_ref_path: Path | None = None,
) -> None:
import httpx
out_dir = job_dir(job_id) / "storyboard_videos" / local_id
ref_img = out_dir / "reference.jpg"
last_img = out_dir / "last_reference.jpg"
out_mp4 = out_dir / "video.mp4"
base = video_api_base()
headers = {"Authorization": f"Bearer {video_api_key()}"}
try:
prepare_video_reference(ref_path, ref_img)
prepared_last_img: Path | None = None
if last_ref_path and last_ref_path.exists():
prepare_video_reference(last_ref_path, last_img)
prepared_last_img = last_img
update_generated_video(job_id, local_id, status="in_progress", progress=5)
with httpx.Client(timeout=120) as client:
payload = {"model": model, "prompt": prompt, "size": size}
@@ -1819,10 +1855,13 @@ def render_storyboard_video(job_id: str, local_id: str, provider_id: str, ref_pa
create = None
create_errors: list[str] = []
for create_path in VIDEO_CREATE_PATHS:
resp = submit_video_create(client, f"{base}{video_path(create_path)}", headers, ref_img, payload, source_ref)
resp = submit_video_create(client, f"{base}{video_path(create_path)}", headers, ref_img, payload, source_ref, prepared_last_img)
if video_uses_ark() and source_ref and resp.status_code in {400, 422}:
create_errors.append(f"{video_path(create_path)} + reference_video -> HTTP {resp.status_code}: {resp.text[:160]}")
resp = submit_video_create(client, f"{base}{video_path(create_path)}", headers, ref_img, payload, None)
resp = submit_video_create(client, f"{base}{video_path(create_path)}", headers, ref_img, payload, None, prepared_last_img)
if video_uses_ark() and prepared_last_img and resp.status_code in {400, 422}:
create_errors.append(f"{video_path(create_path)} + last_frame -> HTTP {resp.status_code}: {resp.text[:160]}")
resp = submit_video_create(client, f"{base}{video_path(create_path)}", headers, ref_img, payload, None, None)
if resp.status_code < 400:
create = resp
break
@@ -1879,11 +1918,12 @@ def generate_storyboard_video(job_id: str, idx: int, req: GenerateStoryboardVide
if not prompt:
raise HTTPException(400, "prompt required")
ref = req.product_image or req.subject_image or req.scene_image or req.action_image
ref = req.first_image or req.subject_image or req.product_image or req.scene_image or req.action_image
ref_path = storyboard_ref_path(job_id, ref) or (job_dir(job_id) / "frames" / f"{idx:03d}.jpg")
if not ref_path.exists():
raise HTTPException(404, "reference image missing")
poster = storyboard_ref_url(job_id, ref) or f"/jobs/{job_id}/frames/{idx}.jpg"
last_ref_path = storyboard_ref_path(job_id, req.last_image)
local_id = uuid.uuid4().hex[:12]
model = resolve_video_model(req.model)
@@ -1905,7 +1945,7 @@ def generate_storyboard_video(job_id: str, idx: int, req: GenerateStoryboardVide
source_ref = req.source_ref
if source_ref and source_ref.kind == "source_video" and not source_ref.url:
source_ref = None
bg.add_task(render_storyboard_video, job_id, local_id, "", ref_path, prompt, model, seconds, req.size, source_ref)
bg.add_task(render_storyboard_video, job_id, local_id, "", ref_path, prompt, model, seconds, req.size, source_ref, last_ref_path)
return job

View File

@@ -830,6 +830,18 @@ api/main.py
<h2>变更记录</h2>
<p>这个记录不是 git log 的替代品。它记录“产品理解发生了什么变化、影响了哪些源码、你以后描述需求时该怎么说”。后续每次改功能都要补一条。</p>
<div class="changelog">
<article class="change">
<header>
<h3>2026-05-13 · 分镜编排改为首尾帧生成</h3>
<span class="tag violet">StoryboardWorkbench</span>
<span class="tag blue">API</span>
</header>
<div class="body">
<p><strong>问题:</strong>赶交付时顶部横向分镜缩略条占空间4 图槽也不如“首帧到尾帧”直接;用户希望直接做首尾帧视频生成。</p>
<p><strong>改动:</strong>移除 <code>StoryboardBar</code> 的横向分镜缩略图区域,只保留标题栏和展开按钮;<code>StoryboardWorkbench</code> 改成首帧 / 尾帧两个槽,首帧默认当前分镜,尾帧默认下一张已选分镜,也可从剪贴板粘贴指定结束画面。后端 <code>/storyboard/video</code> 支持 <code>first_image</code>/<code>last_image</code>Ark 请求同时传 <code>first_frame</code>/<code>last_frame</code>,如果接口不接受尾帧字段则自动回退到单首帧。</p>
<p><strong>影响:</strong><code>web/components/storyboard-bar.tsx</code><code>web/components/storyboard-workbench.tsx</code><code>web/app/page.tsx</code><code>web/lib/api.ts</code><code>api/main.py</code></p>
</div>
</article>
<article class="change">
<header>
<h3>2026-05-13 · 生视频携带原视频链接做节奏参考</h3>

View File

@@ -232,6 +232,20 @@ export default function Home() {
if (!frame) return
const labelOf = (ref?: ImageRef | null, fallback = "未提供") => ref?.label || fallback
const keyframeRef: ImageRef = {
kind: "keyframe",
frame_idx: frameIdx,
label: `分镜 ${frameIdx + 1} 首帧`,
}
const orderedSelected = job.frames
.filter((f) => selectedFrames.has(f.index))
.sort((a, b) => a.timestamp - b.timestamp)
const nextFrame = orderedSelected.find((f) => f.timestamp > frame.timestamp) ?? null
const defaultLastRef: ImageRef | null = nextFrame
? { kind: "keyframe", frame_idx: nextFrame.index, label: `分镜 ${nextFrame.index + 1} 尾帧` }
: null
const firstRef = scene.first_image ?? keyframeRef
const lastRef = scene.last_image ?? defaultLastRef
const duration = scene.duration && scene.duration > 0 ? scene.duration : 5
const sourceScene = frame.description?.scene ? `参考画面识别:${frame.description.scene}` : ""
const sourceStyle = frame.description?.style ? `参考风格:${frame.description.style}` : ""
@@ -245,21 +259,21 @@ export default function Home() {
const sceneDirection = scene.scene?.trim()
|| "借鉴参考画面的构图、可信感和空间层次,但改造成适合 SKG 产品广告的现代家居、办公或零售场景。"
const actionDirection = scene.action?.trim()
|| "一镜到底缓慢推进,先建立画面,再出现自然手部互动,最后停在产品细节或使用状态特写。"
|| "按首帧到尾帧做平滑过渡,动作连续自然,镜头运动稳定,最后准确停在尾帧意图。"
const prompt = [
`竖屏 9:16${duration.toFixed(1)}SKG 产品短视频广告。`,
"直接根据当前分镜关键帧生成视频。必须使用输入的完整视频关键帧作为第一帧和视觉锚点:第一帧构图、主体位置、透视关系和光线方向保持稳定,然后从这一帧自然动起来。",
"生成一段单镜头连续视频,一镜到底,不要跳切,不要突然换场景,不要突然换主体,不要蒙太奇,不要多镜头拼接。",
"使用首帧和尾帧生成连续过渡视频:首帧必须严格作为视频开始画面,尾帧必须作为视频结束目标画面,中间只做自然运动补间。",
"生成一段单镜头连续视频,一镜到底,从首帧平滑过渡到尾帧;不要跳切,不要突然换场景,不要突然换主体,不要蒙太奇,不要多镜头拼接。",
"如果提供了原视频链接,把它只作为节奏、镜头运动、动作顺序和画面调度参考;不要照搬原视频里的品牌、文字、水印、竞品产品或具体人物。",
"时间线0%-25% 保持首帧构图并轻微启动;25%-70% 做一个清晰、缓慢、可信的产品展示动作70%-100% 镜头自然停稳在 SKG 产品或使用效果特写。",
"时间线0%-15% 锁住首帧构图并轻微启动;15%-85% 做平滑连续运动85%-100% 缓慢贴近尾帧并稳定收住。",
`主体改造:${subjectDirection}`,
`产品替换:${productDirection}`,
`场景改造:${sceneDirection}`,
`连续动作和镜头:${actionDirection}`,
`参考主体图槽${labelOf(scene.subject_image, "产品演示主体或手部姿态")}`,
`参考场景图槽${labelOf(scene.scene_image, "现代健康生活场景")}`,
`SKG 产品图槽${labelOf(scene.product_image, "SKG 产品视觉主角")}`,
`参考动作图槽${labelOf(scene.action_image, "自然拿取、佩戴、展示或靠近产品的动作")}`,
`首帧${labelOf(firstRef, "当前分镜关键帧")}`,
`尾帧${labelOf(lastRef, "未指定,按首帧小幅自然运动收尾")}`,
`SKG 产品参考${labelOf(scene.product_image, "SKG 产品视觉主角")}`,
`动作参考:${labelOf(scene.action_image, "自然拿取、佩戴、展示或靠近产品的动作")}`,
sourceScene,
sourceStyle,
sourceObjects,
@@ -270,16 +284,13 @@ export default function Home() {
try {
toast.info(`已提交 ${model} 生视频 · 分镜 ${frameIdx + 1}`)
const keyframeRef: ImageRef = {
kind: "keyframe",
frame_idx: frameIdx,
label: `分镜 ${frameIdx + 1} 关键帧`,
}
const sourceUrl = job.url?.trim()
const updated = await generateStoryboardVideo(job.id, frameIdx, {
prompt,
duration,
subject_image: keyframeRef,
first_image: firstRef,
last_image: lastRef,
subject_image: firstRef,
scene_image: null,
product_image: null,
action_image: null,
@@ -293,7 +304,7 @@ export default function Home() {
} catch (e) {
toast.error("提交视频失败:" + (e instanceof Error ? e.message : String(e)))
}
}, [job, setJob])
}, [job, selectedFrames, setJob])
// URL ?job=xxx,yyy 自动恢复多个 job
useEffect(() => {

View File

@@ -1,8 +1,6 @@
"use client"
import { useEffect, useRef, useState } from "react"
import { createPortal } from "react-dom"
import { LayoutGrid, ChevronDown, ChevronUp, Sparkle } from "lucide-react"
import { type Job, effectiveFrameUrl, hasCutout } from "@/lib/api"
import { LayoutGrid, ChevronDown, ChevronUp } from "lucide-react"
import { type Job, hasCutout } from "@/lib/api"
interface Props {
job: Job | null
@@ -15,19 +13,12 @@ interface Props {
}
export function StoryboardBar({ job, selectedFrames, focusedFrame, onFocusFrame, workbenchOpen = false, onOpenWorkbench, onCloseWorkbench }: Props) {
const [collapsed, setCollapsed] = useState(false)
const [mounted, setMounted] = useState(false)
useEffect(() => setMounted(true), [])
const [hover, setHover] = useState<{ src: string; topLabel: string; subLabel: string; rect: DOMRect } | null>(null)
const btnRefs = useRef<Record<number, HTMLButtonElement | null>>({})
if (!job) return null
const frames = job.frames
.filter((f) => selectedFrames.has(f.index))
.sort((a, b) => a.timestamp - b.timestamp)
const aspect = job.height > 0 ? `${job.width}/${job.height}` : "9/16"
const totalElements = frames.reduce(
(sum, f) => sum + (f.elements?.filter((e) => hasCutout(e)).length ?? 0),
0,
@@ -73,7 +64,6 @@ export function StoryboardBar({ job, selectedFrames, focusedFrame, onFocusFrame,
if (frames.length === 0) return
const nextFrame = focusedFrame ?? frames[0].index
if (focusedFrame === null) onFocusFrame(nextFrame)
setCollapsed(false)
onOpenWorkbench?.(nextFrame)
}}
disabled={frames.length === 0}
@@ -83,123 +73,8 @@ export function StoryboardBar({ job, selectedFrames, focusedFrame, onFocusFrame,
{workbenchOpen ? <ChevronUp className="h-3 w-3" /> : <ChevronDown className="h-3 w-3" />}
{workbenchOpen ? "收起编排" : "展开编排"}
</button>
<button
onClick={() => {
const nextCollapsed = !collapsed
setCollapsed(nextCollapsed)
if (nextCollapsed) onCloseWorkbench?.()
}}
className="text-white/50 hover:text-white text-[11px] inline-flex items-center gap-1"
title={collapsed ? "展开" : "折叠"}
>
{collapsed ? <ChevronDown className="h-3 w-3" /> : <ChevronUp className="h-3 w-3" />}
{collapsed ? "展开" : "折叠"}
</button>
</div>
</div>
{/* thumbnails row */}
{!collapsed && (
frames.length === 0 ? (
<div className="px-4 pb-3 text-[11px] text-white/40">
·
</div>
) : (
<div className="px-4 pb-3 flex gap-2 overflow-x-auto">
{frames.map((f, i) => {
const elementCount = f.elements?.filter((e) => hasCutout(e)).length ?? 0
const totalElCount = f.elements?.length ?? 0
const cleaned = f.cleaned_applied
const isFocused = focusedFrame === f.index
return (
<button
key={f.index}
ref={(el) => { btnRefs.current[f.index] = el }}
onClick={() => {
onFocusFrame(f.index)
setCollapsed(false)
}}
onMouseEnter={() => {
const el = btnRefs.current[f.index]
if (el) setHover({
src: effectiveFrameUrl(job.id, f),
topLabel: `分镜 ${i + 1}`,
subLabel: `${f.timestamp.toFixed(2)}s`,
rect: el.getBoundingClientRect(),
})
}}
onMouseLeave={() => setHover(null)}
title={`分镜 ${i + 1} · ${f.timestamp.toFixed(2)}s${cleaned ? " · 已清洗" : ""} · ${elementCount}/${totalElCount} 元素 · 点击聚焦`}
className={`relative shrink-0 rounded-md border transition shadow-lg hover:-translate-y-0.5 ${
isFocused
? "border-violet-300 ring-2 ring-violet-300/70"
: "border-white/15 hover:border-violet-300/60"
}`}
style={{ width: 88, aspectRatio: aspect }}
>
<img
src={effectiveFrameUrl(job.id, f)}
alt={`frame ${f.index}`}
className="absolute inset-0 w-full h-full object-cover rounded-md"
/>
<div className="absolute top-1 left-1 text-[9.5px] font-bold text-white bg-violet-500/85 backdrop-blur px-1.5 py-0.5 rounded">
#{i + 1}
</div>
{cleaned && (
<div className="absolute top-1 right-1 text-[9px] text-white bg-cyan-500/85 backdrop-blur px-1 py-0.5 rounded font-bold" title="已清洗">
</div>
)}
<div className="absolute bottom-0 right-0 left-0 px-1.5 py-0.5 text-[9px] font-mono text-white bg-gradient-to-t from-black/85 to-transparent flex items-center justify-between rounded-b-md">
<span>{f.timestamp.toFixed(1)}s</span>
{totalElCount > 0 && (
<span className="inline-flex items-center gap-0.5">
<Sparkle className="h-2 w-2" />
{elementCount}/{totalElCount}
</span>
)}
</div>
</button>
)
})}
</div>
)
)}
{/* Hover 预览 · 浮在缩略图正下方bar 在顶部 fixed下方是 DAG 画布区) */}
{mounted && hover && (() => {
const vidAspect = job.height > 0 ? job.height / job.width : 16 / 9
const w = 280
const h = w * vidAspect
const gap = 10
const centerX = hover.rect.left + hover.rect.width / 2
const left = Math.max(12, Math.min(window.innerWidth - w - 12, centerX - w / 2))
const top = hover.rect.bottom + gap
return createPortal(
<div
className="fixed z-[120] pointer-events-none"
style={{
left, top,
animation: "drawer-in 0.18s cubic-bezier(0.32, 0.72, 0, 1)",
}}
>
<div className="rounded-lg overflow-hidden border-2 border-violet-300/50 bg-white shadow-2xl">
<img
src={hover.src}
alt="preview"
className="block"
style={{ width: w, height: h, objectFit: "cover" }}
/>
<div className="px-2 py-1 bg-black/80 text-white text-[10.5px] flex items-center justify-between gap-2">
<span className="truncate">{hover.topLabel}</span>
<span className="text-white/60 font-mono shrink-0">{hover.subLabel}</span>
</div>
</div>
</div>,
document.body,
)
})()}
</div>
)
}

View File

@@ -82,6 +82,13 @@ export function StoryboardWorkbench({ job, selectedFrames, open, onClose, onJobU
.sort((a, b) => a.timestamp - b.timestamp)
const focusFrame = focusedIdx !== null ? job.frames.find((f) => f.index === focusedIdx) ?? null : null
const focusSeq = focusFrame ? frames.findIndex((f) => f.index === focusFrame.index) + 1 : 0
const defaultFirstRef: ImageRef | null = focusFrame
? { kind: "keyframe", frame_idx: focusFrame.index, label: `分镜 ${focusSeq || focusFrame.index + 1} 首帧` }
: null
const nextFrame = focusFrame ? frames.find((f) => f.timestamp > focusFrame.timestamp) ?? null : null
const defaultLastRef: ImageRef | null = nextFrame
? { kind: "keyframe", frame_idx: nextFrame.index, label: `分镜 ${frames.findIndex((f) => f.index === nextFrame.index) + 1} 尾帧` }
: null
const queueSave = (next: StoryboardScene) => {
setForm(next)
@@ -176,21 +183,20 @@ export function StoryboardWorkbench({ job, selectedFrames, open, onClose, onJobU
</div>
</div>
{/* 4 图槽 grid图片是参考不是最终复刻素材 */}
<div className="grid grid-cols-4 gap-4">
{/* 首尾帧:图片直接参与视频生成 */}
<div className="grid grid-cols-2 gap-4">
{([
{ key: "subject_image" as const, label: "参考主体", placeholder: "人物 / 手部 / 模特姿态" },
{ key: "scene_image" as const, label: "参考场景", placeholder: "药店柜台 / 卧室 / 浴室" },
{ key: "product_image" as const, label: "SKG 产品", placeholder: "产品图 / 包装 / 使用状态" },
{ key: "action_image" as const, label: "参考动作", placeholder: "拿起 / 佩戴 / 展示 / 递给顾客" },
{ key: "first_image" as const, label: "首帧", placeholder: "默认当前分镜关键帧" },
{ key: "last_image" as const, label: "尾帧", placeholder: defaultLastRef ? "默认下一张已选分镜" : "粘贴一张结束画面" },
]).map(({ key, label, placeholder }) => {
const ref = form[key]
const fallback = key === "first_image" ? defaultFirstRef : defaultLastRef
const ref = form[key] ?? fallback
const url = ref ? resolveImageRefUrl(job.id, ref) : ""
return (
<div key={key} className="rounded-lg bg-white/[0.04] border border-white/10 p-2.5">
<div className="text-[12px] text-white font-semibold mb-2 flex items-center justify-between">
<span>{label}</span>
{ref && (
{form[key] && (
<button
onClick={() => queueSave({ ...form, [key]: null })}
className="text-[10px] text-white/40 hover:text-rose-300"
@@ -238,6 +244,9 @@ export function StoryboardWorkbench({ job, selectedFrames, open, onClose, onJobU
)
})}
</div>
<div className="rounded-md border border-violet-300/20 bg-violet-500/10 px-3 py-2 text-[11px] leading-relaxed text-violet-100/75">
📋
</div>
{/* 改造 brief明确“借鉴参考 → 变成 SKG 产品视频”,避免直接复刻 */}
<section className="rounded-lg bg-white/[0.035] border border-white/10 p-3">
@@ -314,13 +323,13 @@ export function StoryboardWorkbench({ job, selectedFrames, open, onClose, onJobU
}
}}
className="w-full py-3 rounded-lg text-[13.5px] font-semibold inline-flex items-center justify-center gap-2 bg-gradient-to-r from-rose-500 to-violet-500 text-white border border-violet-300/40 shadow-lg shadow-violet-500/20 hover:from-rose-400 hover:to-violet-400 disabled:opacity-40 disabled:cursor-not-allowed"
title={`当前分镜关键帧作为首帧,调用 ${currentModelLabel} 生视频 API`}
title={`首帧和尾帧调用 ${currentModelLabel} 生视频 API`}
>
{generating ? <Loader2 className="h-4 w-4 animate-spin" /> : <Wand2 className="h-4 w-4" />}
</button>
<div className="mt-2 text-[10.5px] text-white/35 leading-relaxed">
4 MP4 Video Gen
+ / MP4 Video Gen
</div>
</section>
</div>

View File

@@ -57,6 +57,8 @@ export interface ImageRef {
export interface StoryboardScene {
duration: number
first_image?: ImageRef | null
last_image?: ImageRef | null
subject_image?: ImageRef | null
scene_image?: ImageRef | null
product_image?: ImageRef | null
@@ -378,6 +380,8 @@ export async function generateStoryboardVideo(
body: {
prompt: string
duration?: number
first_image?: ImageRef | null
last_image?: ImageRef | null
subject_image?: ImageRef | null
scene_image?: ImageRef | null
product_image?: ImageRef | null