feat: simplify storyboard video card flow

This commit is contained in:
2026-05-19 11:05:57 +08:00
parent ff7bf00f6d
commit 3462758585
4 changed files with 1133 additions and 80 deletions

View File

@@ -376,6 +376,13 @@ class StoryboardScene(BaseModel):
needs_product: bool = True
needs_subject: bool = True
subject_brief: str = ""
skg_copy_en: str = ""
skg_copy_zh: str = ""
scene_one_line_en: str = ""
scene_one_line_zh: str = ""
action_one_line_en: str = ""
action_one_line_zh: str = ""
selected_video_id: str = ""
first_frame_plan: str = ""
last_frame_plan: str = ""
product_placement: str = ""
@@ -5599,6 +5606,14 @@ class UpdateStoryboardReq(BaseModel):
visual_mode: Literal["person_only", "person_product", "product_only", "environment"] = "person_product"
needs_product: bool = True
needs_subject: bool = True
subject_brief: str = ""
skg_copy_en: str = ""
skg_copy_zh: str = ""
scene_one_line_en: str = ""
scene_one_line_zh: str = ""
action_one_line_en: str = ""
action_one_line_zh: str = ""
selected_video_id: str = ""
first_frame_plan: str = ""
last_frame_plan: str = ""
product_placement: str = ""
@@ -5615,8 +5630,10 @@ class UpdateStoryboardReq(BaseModel):
class GenerateStoryboardVideoReq(BaseModel):
prompt: str
prompt: str = ""
duration: float = 4
count: int = 1
seed: int | None = None
first_image: dict | None = None
last_image: dict | None = None
product_images: list[dict] = Field(default_factory=list)
@@ -5630,6 +5647,175 @@ class GenerateStoryboardVideoReq(BaseModel):
size: str = "720x1280"
class QuickStoryboardPlanReq(BaseModel):
skg_copy_en: str = ""
skg_copy_zh: str = ""
scene_one_line_en: str = ""
scene_one_line_zh: str = ""
action_one_line_en: str = ""
action_one_line_zh: str = ""
subject_brief: str = ""
duration: float = 4
visual_mode: Literal["person_only", "person_product", "product_only", "environment"] = "person_product"
needs_product: bool = True
needs_subject: bool = True
class RefineStoryboardReq(BaseModel):
current_plan: QuickStoryboardPlanReq = Field(default_factory=QuickStoryboardPlanReq)
user_feedback: str = ""
class BatchGenerateStoryboardReq(BaseModel):
count_per_row: int = 4
concurrency: int = 4
model: str = ""
size: str = "720x1280"
def _quick_field_en(en: str, zh: str) -> str:
text = (en or "").strip()
if text:
return _ensure_english(text)
return _ensure_english((zh or "").strip())
def _subject_brief_for_frame(frame: KeyFrame | None) -> str:
if not frame:
return ""
briefs = [
(element.subject_consensus_brief or element.subject_consensus_brief_zh or "").strip()
for element in (frame.elements or [])
if (element.subject_consensus_brief or element.subject_consensus_brief_zh or "").strip()
]
return "\n".join(briefs[:3])
def _fallback_quick_storyboard_plan(req: QuickStoryboardPlanReq, frame: KeyFrame | None = None) -> StoryboardScene:
copy_en = _quick_field_en(req.skg_copy_en, req.skg_copy_zh) or "Show the SKG massage product as a natural upgrade in this short-video beat."
scene_en = _quick_field_en(req.scene_one_line_en, req.scene_one_line_zh) or "Clean vertical short-video scene with premium wellness lighting."
action_en = _quick_field_en(req.action_one_line_en, req.action_one_line_zh) or "A natural creator-style subject introduces and uses the SKG neck-and-shoulder massager."
subject_brief = (req.subject_brief or _subject_brief_for_frame(frame)).strip()
product_placement = (
"Show the SKG white U-shaped neck-and-shoulder massager worn externally around the neck and shoulders; "
"preserve realistic scale, contact pads, button placement, side thickness, and left-right asymmetry."
if req.needs_product
else "Do not show the SKG product in this beat unless it is only a subtle background context."
)
return StoryboardScene(
duration=max(3.2, min(8.0, float(req.duration or 4))),
visual_mode=req.visual_mode,
needs_product=bool(req.needs_product),
needs_subject=bool(req.needs_subject),
subject_brief=subject_brief,
skg_copy_en=copy_en,
skg_copy_zh=(req.skg_copy_zh or "").strip(),
scene_one_line_en=scene_en,
scene_one_line_zh=(req.scene_one_line_zh or "").strip(),
action_one_line_en=action_en,
action_one_line_zh=(req.action_one_line_zh or "").strip(),
first_frame_plan=f"First frame: {scene_en}. Establish the subject state and visual problem clearly. {action_en}",
last_frame_plan=f"Last frame: continue from the first frame and land on a clearer SKG product benefit moment. {action_en}",
product_placement=product_placement,
subject=subject_brief or ("Use a consistent similar commercial subject with clear neck and shoulder area." if req.needs_subject else "No main character required."),
scene=f"{scene_en}\nVoice-over reference: {copy_en}",
product=product_placement,
action=f"{action_en}\nEnglish voice-over: {copy_en}",
reference_ids=[],
)
def _quick_storyboard_plan_sync(req: QuickStoryboardPlanReq, frame: KeyFrame | None = None) -> StoryboardScene:
fallback = _fallback_quick_storyboard_plan(req, frame)
if not LLM_API_KEY:
return fallback
subject_brief = (req.subject_brief or _subject_brief_for_frame(frame)).strip()
payload = {
"skg_copy_en": _quick_field_en(req.skg_copy_en, req.skg_copy_zh),
"skg_copy_zh": req.skg_copy_zh,
"scene_one_line_en": _quick_field_en(req.scene_one_line_en, req.scene_one_line_zh),
"scene_one_line_zh": req.scene_one_line_zh,
"action_one_line_en": _quick_field_en(req.action_one_line_en, req.action_one_line_zh),
"action_one_line_zh": req.action_one_line_zh,
"subject_brief": subject_brief,
"duration": req.duration,
"visual_mode": req.visual_mode,
"needs_product": req.needs_product,
"needs_subject": req.needs_subject,
}
prompt = (
"Expand this compact SKG TikTok recreation row into a complete video generation storyboard plan. "
"Return strict JSON only. All English fields must be English. Chinese mirror fields may be Simplified Chinese.\n"
"Schema: {\"visual_mode\":\"person_only|person_product|product_only|environment\","
"\"needs_product\":true,\"needs_subject\":true,"
"\"skg_copy_en\":\"...\",\"skg_copy_zh\":\"...\","
"\"scene_one_line_en\":\"...\",\"scene_one_line_zh\":\"...\","
"\"action_one_line_en\":\"...\",\"action_one_line_zh\":\"...\","
"\"subject_brief\":\"...\",\"first_frame_plan\":\"...\",\"last_frame_plan\":\"...\","
"\"product_placement\":\"...\",\"subject\":\"...\",\"scene\":\"...\",\"product\":\"...\",\"action\":\"...\"}.\n"
"Rules: keep the row compact semantics; do not add medical treatment claims; product is an SKG white U-shaped neck-and-shoulder wearable massager; "
"the final video prompt must be usable without user-visible first/last frame steps.\n\n"
f"Input:\n{json.dumps(payload, ensure_ascii=False)}"
)
try:
resp = llm().chat.completions.create(
model=REWRITE_MODEL,
messages=[
{"role": "system", "content": "Return valid JSON only. No markdown. No commentary."},
{"role": "user", "content": prompt},
],
response_format={"type": "json_object"},
temperature=0.35,
max_tokens=1400,
)
raw = (resp.choices[0].message.content or "").strip()
if raw.startswith("```"):
match = re.search(r"\{[\s\S]*\}", raw)
raw = match.group(0) if match else raw
data = json.loads(raw)
return StoryboardScene(
duration=max(3.2, min(8.0, float(req.duration or 4))),
visual_mode=data.get("visual_mode") if data.get("visual_mode") in {"person_only", "person_product", "product_only", "environment"} else fallback.visual_mode,
needs_product=bool(data.get("needs_product", fallback.needs_product)),
needs_subject=bool(data.get("needs_subject", fallback.needs_subject)),
subject_brief=str(data.get("subject_brief") or fallback.subject_brief).strip(),
skg_copy_en=_ensure_english(str(data.get("skg_copy_en") or fallback.skg_copy_en).strip()),
skg_copy_zh=str(data.get("skg_copy_zh") or fallback.skg_copy_zh).strip(),
scene_one_line_en=_ensure_english(str(data.get("scene_one_line_en") or fallback.scene_one_line_en).strip()),
scene_one_line_zh=str(data.get("scene_one_line_zh") or fallback.scene_one_line_zh).strip(),
action_one_line_en=_ensure_english(str(data.get("action_one_line_en") or fallback.action_one_line_en).strip()),
action_one_line_zh=str(data.get("action_one_line_zh") or fallback.action_one_line_zh).strip(),
first_frame_plan=_ensure_english(str(data.get("first_frame_plan") or fallback.first_frame_plan).strip()),
last_frame_plan=_ensure_english(str(data.get("last_frame_plan") or fallback.last_frame_plan).strip()),
product_placement=_ensure_english(str(data.get("product_placement") or fallback.product_placement).strip()),
subject=_ensure_english(str(data.get("subject") or fallback.subject).strip()),
scene=_ensure_english(str(data.get("scene") or fallback.scene).strip()),
product=_ensure_english(str(data.get("product") or fallback.product).strip()),
action=_ensure_english(str(data.get("action") or fallback.action).strip()),
reference_ids=[],
)
except Exception as e:
print(f"[quick storyboard fallback] {e}", flush=True)
return fallback
def _storyboard_video_prompt(scene: StoryboardScene, seed: int | None = None) -> str:
parts = [
"Create one vertical 9:16 short-form ad video clip for SKG.",
f"English voice-over line: {_ensure_english(scene.skg_copy_en or scene.action or '')}",
f"Scene: {_ensure_english(scene.scene_one_line_en or scene.scene or '')}",
f"Subject + product + action: {_ensure_english(scene.action_one_line_en or scene.action or '')}",
f"First frame intent: {_ensure_english(scene.first_frame_plan or '')}",
f"Last frame intent: {_ensure_english(scene.last_frame_plan or '')}",
f"Product placement: {_ensure_english(scene.product_placement or scene.product or '')}",
f"Subject brief: {_ensure_english(scene.subject_brief or scene.subject or '')}",
"Keep motion natural, creator-ad style, premium clean wellness lighting, no subtitles, no platform UI, no watermark, no medical treatment claims.",
]
if seed is not None:
parts.append(f"Creative variation seed: {seed}.")
return "\n".join([p for p in parts if p.strip()])
class ProductFusionDescriptionReq(BaseModel):
shots: list[ProductFusionShot] = Field(default_factory=list)
@@ -5901,29 +6087,93 @@ def render_storyboard_video(
update_generated_video(job_id, local_id, status="failed", error=str(e)[:500])
@app.post("/jobs/{job_id}/frames/{idx}/storyboard/video", response_model=Job)
def generate_storyboard_video(job_id: str, idx: int, req: GenerateStoryboardVideoReq, bg: BackgroundTasks) -> Job:
@app.post("/jobs/{job_id}/frames/{idx}/storyboard/quick-plan", response_model=StoryboardScene)
def quick_plan_storyboard(job_id: str, idx: int, req: QuickStoryboardPlanReq) -> StoryboardScene:
job = JOBS.get(job_id)
if not job:
raise HTTPException(404, "job not found")
frame = next((f for f in job.frames if f.index == idx), None)
if not frame:
raise HTTPException(404, "frame not found")
return _quick_storyboard_plan_sync(req, frame)
@app.post("/jobs/{job_id}/frames/{idx}/storyboard/refine")
def refine_storyboard(job_id: str, idx: int, req: RefineStoryboardReq) -> dict:
job = JOBS.get(job_id)
if not job:
raise HTTPException(404, "job not found")
frame = next((f for f in job.frames if f.index == idx), None)
if not frame:
raise HTTPException(404, "frame not found")
current = req.current_plan
feedback = req.user_feedback.strip()
if not feedback:
raise HTTPException(400, "user_feedback required")
fallback = {
"skg_copy_en": _quick_field_en(current.skg_copy_en, current.skg_copy_zh),
"skg_copy_zh": current.skg_copy_zh.strip(),
"scene_one_line_en": _quick_field_en(current.scene_one_line_en, current.scene_one_line_zh),
"scene_one_line_zh": current.scene_one_line_zh.strip(),
"action_one_line_en": _quick_field_en(current.action_one_line_en, current.action_one_line_zh),
"action_one_line_zh": current.action_one_line_zh.strip(),
}
if not LLM_API_KEY:
return {"items": fallback, "model": "fallback"}
prompt = (
"Rewrite this compact SKG storyboard row according to user feedback. "
"Keep meaning and timing, improve clarity and video-generation usefulness. "
"Return strict JSON only with exactly these fields: "
"skg_copy_en, skg_copy_zh, scene_one_line_en, scene_one_line_zh, action_one_line_en, action_one_line_zh. "
"English fields must be English; Chinese fields must be Simplified Chinese. "
"No medical treatment claims.\n\n"
f"Current:\n{json.dumps(fallback, ensure_ascii=False)}\n\n"
f"User feedback:\n{feedback}"
)
try:
resp = llm().chat.completions.create(
model=REWRITE_MODEL,
messages=[
{"role": "system", "content": "Return valid JSON only. No markdown. No explanation."},
{"role": "user", "content": prompt},
],
response_format={"type": "json_object"},
temperature=0.55,
max_tokens=900,
)
data = json.loads((resp.choices[0].message.content or "{}").strip())
out = {
"skg_copy_en": _ensure_english(str(data.get("skg_copy_en") or fallback["skg_copy_en"]).strip()),
"skg_copy_zh": str(data.get("skg_copy_zh") or fallback["skg_copy_zh"]).strip(),
"scene_one_line_en": _ensure_english(str(data.get("scene_one_line_en") or fallback["scene_one_line_en"]).strip()),
"scene_one_line_zh": str(data.get("scene_one_line_zh") or fallback["scene_one_line_zh"]).strip(),
"action_one_line_en": _ensure_english(str(data.get("action_one_line_en") or fallback["action_one_line_en"]).strip()),
"action_one_line_zh": str(data.get("action_one_line_zh") or fallback["action_one_line_zh"]).strip(),
}
return {"items": out, "model": REWRITE_MODEL}
except Exception as e:
return {"items": fallback, "model": "fallback", "error": str(e)[:300]}
def _enqueue_storyboard_videos(job: Job, frame: KeyFrame, req: GenerateStoryboardVideoReq, bg: BackgroundTasks | None = None) -> list[str]:
ensure_video_api_configured()
prompt = req.prompt.strip()
prompt = _ensure_english(req.prompt.strip())
if not prompt and frame.storyboard:
prompt = _storyboard_video_prompt(frame.storyboard, req.seed)
if not prompt:
raise HTTPException(400, "prompt required")
count = max(1, min(12, int(req.count or 1)))
ref = req.first_image or req.subject_image or req.product_image or req.scene_image or req.action_image
primary_role = "first_frame" if req.first_image else "reference_image"
ref_path = storyboard_ref_path(job_id, ref) or (job_dir(job_id) / "frames" / f"{idx:03d}.jpg")
ref_path = storyboard_ref_path(job.id, ref) or (job_dir(job.id) / "frames" / f"{frame.index:03d}.jpg")
if not ref_path.exists():
raise HTTPException(404, "reference image missing")
poster = storyboard_ref_url(job_id, ref) or f"/jobs/{job_id}/frames/{idx}.jpg"
last_ref_path = storyboard_ref_path(job_id, req.last_image)
poster = storyboard_ref_url(job.id, ref) or f"/jobs/{job.id}/frames/{frame.index}.jpg"
last_ref_path = storyboard_ref_path(job.id, req.last_image)
raw_product_refs = req.product_images[:6] if req.product_images else ([req.product_image] if req.product_image else [])
product_ref_paths = [p for p in (storyboard_ref_path(job_id, r) for r in raw_product_refs) if p]
subject_ref_paths = [p for p in (storyboard_ref_path(job_id, r) for r in req.subject_images[:8]) if p]
product_ref_paths = [p for p in (storyboard_ref_path(job.id, r) for r in raw_product_refs) if p]
subject_ref_paths = [p for p in (storyboard_ref_path(job.id, r) for r in req.subject_images[:8]) if p]
reference_ref_paths = []
seen_ref_paths: set[str] = {str(ref_path)}
# Product fusion is sensitive to object drift. Send product references before
@@ -5934,27 +6184,117 @@ def generate_storyboard_video(job_id: str, idx: int, req: GenerateStoryboardVide
reference_ref_paths.append(p)
seen_ref_paths.add(key)
local_id = uuid.uuid4().hex[:12]
model = resolve_video_model(req.model)
seconds = video_seconds(float(req.duration or 4))
item = GeneratedVideo(
id=local_id,
provider_id="",
frame_idx=idx,
prompt=prompt,
model=model,
status="queued",
url="",
poster_url=poster,
duration=float(seconds),
progress=0,
created_at=time.time(),
)
update(job, generated_videos=[item] + job.generated_videos, message=f"视频生成已提交 · 分镜 {idx + 1}")
source_ref = req.source_ref
if source_ref and source_ref.kind == "source_video" and not source_ref.url:
source_ref = None
bg.add_task(render_storyboard_video, job_id, local_id, "", ref_path, prompt, model, seconds, req.size, source_ref, last_ref_path, reference_ref_paths, primary_role)
items: list[GeneratedVideo] = []
ids: list[str] = []
for i in range(count):
local_id = uuid.uuid4().hex[:12]
ids.append(local_id)
variant_seed = (req.seed + i) if req.seed is not None else random.randint(100000, 999999)
variant_prompt = _ensure_english(f"{prompt}\n\nCreate variation {i + 1} of {count}. Variation seed: {variant_seed}. Keep the same compact row meaning but vary camera motion, gesture timing, and composition.")
items.append(GeneratedVideo(
id=local_id,
provider_id="",
frame_idx=frame.index,
prompt=variant_prompt,
model=model,
status="queued",
url="",
poster_url=poster,
duration=float(seconds),
progress=0,
created_at=time.time(),
))
task_args = (job.id, local_id, "", ref_path, variant_prompt, model, seconds, req.size, source_ref, last_ref_path, reference_ref_paths, primary_role)
if bg is not None:
bg.add_task(render_storyboard_video, *task_args)
else:
threading.Thread(target=render_storyboard_video, args=task_args, daemon=True).start()
update(job, generated_videos=items + job.generated_videos, message=f"视频抽卡已提交 · 分镜 {frame.index + 1} · {count}")
return ids
@app.post("/jobs/{job_id}/frames/{idx}/storyboard/video", response_model=Job)
def generate_storyboard_video(job_id: str, idx: int, req: GenerateStoryboardVideoReq, bg: BackgroundTasks) -> Job:
job = JOBS.get(job_id)
if not job:
raise HTTPException(404, "job not found")
frame = next((f for f in job.frames if f.index == idx), None)
if not frame:
raise HTTPException(404, "frame not found")
_enqueue_storyboard_videos(job, frame, req, bg)
return job
def _batch_generate_worker(job_id: str, req: BatchGenerateStoryboardReq) -> None:
from concurrent.futures import ThreadPoolExecutor, wait
job = JOBS.get(job_id)
if not job:
return
count = max(1, min(12, int(req.count_per_row or 4)))
concurrency = max(1, min(8, int(req.concurrency or 4)))
frames = list(job.frames)
update(job, message=f"整片一键抽卡已启动 · 0/{len(frames)}", error="")
done = 0
def submit_one(frame: KeyFrame) -> None:
nonlocal done
try:
scene = frame.storyboard
if scene is None:
quick_req = QuickStoryboardPlanReq(
scene_one_line_en=(frame.description or {}).get("scene", "") if isinstance(frame.description, dict) else "",
action_one_line_en="Use the source beat as a compact SKG product ad action with a clear subject, product, and motion.",
subject_brief=_subject_brief_for_frame(frame),
duration=5,
)
scene = _quick_storyboard_plan_sync(quick_req, frame)
frame.storyboard = scene
update(job, frames=job.frames)
prompt = _storyboard_video_prompt(scene)
video_req = GenerateStoryboardVideoReq(
prompt=prompt,
duration=scene.duration or 4,
count=count,
first_image=scene.first_image,
last_image=scene.last_image,
product_images=scene.product_images,
subject_images=scene.subject_images,
subject_image=scene.subject_image,
scene_image=scene.scene_image,
product_image=scene.product_image,
action_image=scene.action_image,
model=req.model,
size=req.size,
)
_enqueue_storyboard_videos(job, frame, video_req, None)
except Exception as e:
update(job, error=f"分镜 {frame.index + 1} 抽卡失败:{str(e)[:220]}")
finally:
done += 1
update(job, message=f"整片一键抽卡进行中 · {done}/{len(frames)}")
with ThreadPoolExecutor(max_workers=concurrency) as executor:
futures = [executor.submit(submit_one, frame) for frame in frames]
wait(futures)
update(job, message=f"整片一键抽卡已提交 · {len(frames)}/{len(frames)} 条 · 每条 {count}")
@app.post("/jobs/{job_id}/storyboard/batch-generate-all", response_model=Job)
def batch_generate_all_storyboard(job_id: str, req: BatchGenerateStoryboardReq) -> Job:
job = JOBS.get(job_id)
if not job:
raise HTTPException(404, "job not found")
ensure_video_api_configured()
if not job.frames:
raise HTTPException(400, "no frames to generate")
threading.Thread(target=_batch_generate_worker, args=(job_id, req), daemon=True).start()
update(job, message=f"整片一键抽卡已启动 · {len(job.frames)} 条 · 每条 {max(1, min(12, int(req.count_per_row or 4)))}")
return job
@@ -6901,8 +7241,12 @@ def delete_storyboard_video(job_id: str, video_id: str) -> Job:
shutil.rmtree(out_dir)
except OSError:
pass
if removed:
for frame in job.frames:
if frame.index == removed.frame_idx and frame.storyboard and frame.storyboard.selected_video_id == video_id:
frame.storyboard.selected_video_id = ""
msg = f"删除视频任务 · 分镜 {removed.frame_idx + 1}" if removed else "删除视频任务"
update(job, generated_videos=kept, message=msg)
update(job, generated_videos=kept, frames=job.frames, message=msg)
return job
@@ -6928,6 +7272,14 @@ def update_storyboard(job_id: str, idx: int, req: UpdateStoryboardReq) -> Job:
visual_mode=req.visual_mode,
needs_product=bool(req.needs_product),
needs_subject=bool(req.needs_subject),
subject_brief=req.subject_brief.strip(),
skg_copy_en=req.skg_copy_en.strip(),
skg_copy_zh=req.skg_copy_zh.strip(),
scene_one_line_en=req.scene_one_line_en.strip(),
scene_one_line_zh=req.scene_one_line_zh.strip(),
action_one_line_en=req.action_one_line_en.strip(),
action_one_line_zh=req.action_one_line_zh.strip(),
selected_video_id=req.selected_video_id.strip(),
first_frame_plan=req.first_frame_plan.strip(),
last_frame_plan=req.last_frame_plan.strip(),
product_placement=req.product_placement.strip(),