From 989cc912ec2d4daa606f9436fa5be8ddd804d5d6 Mon Sep 17 00:00:00 2001
From: kang <wankang2050@gmail.com>
Date: Wed, 13 May 2026 20:23:53 +0800
Subject: [PATCH] auto-save 2026-05-13 20:23 (~2)

---
 .memory/worklog.json |  13 +++
 api/main.py          | 218 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 231 insertions(+)

diff --git a/.memory/worklog.json b/.memory/worklog.json
index 332f0ef..aeba63c 100644
--- a/.memory/worklog.json
+++ b/.memory/worklog.json
@@ -2295,6 +2295,19 @@
       "message": "auto-save 2026-05-13 20:12 (~5)",
       "hash": "0b6a463",
       "files_changed": 5
+    },
+    {
+      "ts": "2026-05-13T20:18:24+08:00",
+      "type": "commit",
+      "message": "auto-save 2026-05-13 20:18 (~4)",
+      "hash": "40a665a",
+      "files_changed": 4
+    },
+    {
+      "ts": "2026-05-13T12:19:30Z",
+      "type": "session-heartbeat",
+      "message": "Codex 会话活跃 · 最近命令：codex · 2 项未提交变更 · 最近提交：auto-save 2026-05-13 20:18 (~4)",
+      "files_changed": 2
     }
   ]
 }
diff --git a/api/main.py b/api/main.py
index ed89673..1f61d4b 100644
--- a/api/main.py
+++ b/api/main.py
@@ -5,6 +5,7 @@ import json
 import os
 import shutil
 import subprocess
+import time
 import uuid
 from contextlib import asynccontextmanager
 from pathlib import Path
@@ -29,6 +30,13 @@ TRANSLATE_MODEL = os.getenv("TRANSLATE_MODEL", "gemini-2.5-flash")
 REWRITE_MODEL = os.getenv("REWRITE_MODEL", "gemini-2.5-pro")
 VISION_MODEL = os.getenv("VISION_MODEL", "gemini-2.5-flash")
 IMAGE_MODEL = os.getenv("IMAGE_MODEL", "gemini-3-pro-image-preview")
+VIDEO_MODEL = os.getenv("VIDEO_MODEL", "seedance").strip() or "seedance"
+VIDEO_MODEL_ALIASES = {
+    "seedance": os.getenv("VIDEO_MODEL_SEEDANCE", "seedance").strip() or "seedance",
+    "kling": os.getenv("VIDEO_MODEL_KLING", "kling").strip() or "kling",
+    "veo3": os.getenv("VIDEO_MODEL_VEO3", "veo3").strip() or "veo3",
+}
+VIDEO_DURATION_FIELD = os.getenv("VIDEO_DURATION_FIELD", "seconds").strip() or "seconds"
 
 # OpenAI 客户端（OpenAI 兼容网关，含 SKG ezlink）
 from openai import OpenAI
@@ -63,6 +71,21 @@ class GeneratedImage(BaseModel):
     created_at: float = 0.0
 
 
+class GeneratedVideo(BaseModel):
+    id: str
+    provider_id: str = ""
+    frame_idx: int
+    prompt: str
+    model: str = ""
+    status: Literal["queued", "in_progress", "completed", "failed"] = "queued"
+    url: str = ""
+    poster_url: str = ""
+    duration: float = 4.0
+    progress: int = 0
+    error: str = ""
+    created_at: float = 0.0
+
+
 class StoryboardScene(BaseModel):
     """分镜头编排：每个 selected 分镜对应一个 scene 描述
     v2: 4 图槽 + 时长（复制粘贴模式）— 主体 / 场景 / 产品 / 动作 各一张图
@@ -141,6 +164,7 @@ class Job(BaseModel):
     frames: list[KeyFrame] = Field(default_factory=list)
     transcript: list[TranscriptSegment] = Field(default_factory=list)
     storyboard_images: list[StoryboardImage] = Field(default_factory=list)
+    generated_videos: list[GeneratedVideo] = Field(default_factory=list)
     error: str = ""
 
 
@@ -163,6 +187,79 @@ def update(job: Job, **kw) -> None:
     save_state(job)
 
 
+def public_api_base() -> str:
+    return (LLM_BASE_URL or "https://api.openai.com/v1").rstrip("/")
+
+
+def storyboard_ref_path(job_id: str, ref: dict | None) -> Path | None:
+    if not ref:
+        return None
+    try:
+        kind = ref.get("kind")
+        frame_idx = int(ref.get("frame_idx"))
+    except Exception:
+        return None
+    if kind == "keyframe":
+        p = job_dir(job_id) / "frames" / f"{frame_idx:03d}.jpg"
+        return p if p.exists() else None
+    if kind == "cutout":
+        element_id = (ref.get("element_id") or "").strip()
+        cutout_id = (ref.get("cutout_id") or "").strip()
+        if not element_id:
+            return None
+        candidates = []
+        if cutout_id and cutout_id != element_id:
+            candidates.append(job_dir(job_id) / "elements" / f"{frame_idx:03d}_{element_id}_{cutout_id}.jpg")
+        candidates.append(job_dir(job_id) / "elements" / f"{frame_idx:03d}_{element_id}.jpg")
+        candidates.append(job_dir(job_id) / "elements" / f"{frame_idx:03d}_{element_id}.png")
+        for p in candidates:
+            if p.exists():
+                return p
+    return None
+
+
+def storyboard_ref_url(job_id: str, ref: dict | None) -> str:
+    if not ref:
+        return ""
+    kind = ref.get("kind")
+    frame_idx = ref.get("frame_idx")
+    if kind == "keyframe" and frame_idx is not None:
+        return f"/jobs/{job_id}/frames/{int(frame_idx)}.jpg"
+    if kind == "cutout" and frame_idx is not None and ref.get("element_id"):
+        element_id = ref.get("element_id")
+        cutout_id = ref.get("cutout_id")
+        if cutout_id and cutout_id != element_id:
+            return f"/jobs/{job_id}/frames/{int(frame_idx)}/elements/{element_id}/cutouts/{cutout_id}.jpg"
+        return f"/jobs/{job_id}/frames/{int(frame_idx)}/elements/{element_id}/cutout.jpg"
+    return ""
+
+
+def prepare_video_reference(src: Path, dst: Path, size: tuple[int, int] = (720, 1280)) -> None:
+    dst.parent.mkdir(parents=True, exist_ok=True)
+    img = Image.open(src).convert("RGB")
+    img.thumbnail(size, Image.Resampling.LANCZOS)
+    canvas = Image.new("RGB", size, (8, 8, 10))
+    x = (size[0] - img.width) // 2
+    y = (size[1] - img.height) // 2
+    canvas.paste(img, (x, y))
+    canvas.save(dst, "JPEG", quality=94)
+
+
+def update_generated_video(job_id: str, video_id: str, **kw) -> None:
+    job = JOBS.get(job_id)
+    if not job:
+        return
+    updated = []
+    for v in job.generated_videos:
+        if v.id == video_id:
+            data = v.model_dump()
+            data.update(kw)
+            updated.append(GeneratedVideo(**data))
+        else:
+            updated.append(v)
+    update(job, generated_videos=updated)
+
+
 @asynccontextmanager
 async def lifespan(_: FastAPI):
     # 启动时从磁盘恢复 jobs（简化版：只列目录）
@@ -1468,6 +1565,127 @@ class UpdateStoryboardReq(BaseModel):
     reference_ids: list[str] = []
 
 
+class GenerateStoryboardVideoReq(BaseModel):
+    prompt: str
+    duration: float = 4
+    subject_image: dict | None = None
+    scene_image: dict | None = None
+    product_image: dict | None = None
+    action_image: dict | None = None
+    model: str = ""
+    size: str = "720x1280"
+
+
+def video_seconds(duration: float) -> str:
+    if duration <= 6:
+        return "4"
+    if duration <= 10:
+        return "8"
+    return "12"
+
+
+def render_storyboard_video(job_id: str, local_id: str, provider_id: str, ref_path: Path, prompt: str, model: str, seconds: str, size: str) -> None:
+    import httpx
+
+    out_dir = job_dir(job_id) / "storyboard_videos" / local_id
+    ref_img = out_dir / "reference.jpg"
+    out_mp4 = out_dir / "video.mp4"
+    base = public_api_base()
+    headers = {"Authorization": f"Bearer {LLM_API_KEY}"}
+
+    try:
+      prepare_video_reference(ref_path, ref_img)
+      update_generated_video(job_id, local_id, status="in_progress", progress=5)
+      with httpx.Client(timeout=120) as client:
+          with ref_img.open("rb") as fh:
+              create = client.post(
+                  f"{base}/videos",
+                  headers=headers,
+                  data={
+                      "model": model,
+                      "prompt": prompt,
+                      "seconds": seconds,
+                      "size": size,
+                  },
+                  files={"input_reference": ("reference.jpg", fh, "image/jpeg")},
+              )
+          create.raise_for_status()
+          data = create.json()
+          video_api_id = data.get("id") or provider_id
+          update_generated_video(job_id, local_id, provider_id=video_api_id, status=data.get("status", "queued"), progress=int(data.get("progress") or 5))
+
+          status = data.get("status", "queued")
+          progress = int(data.get("progress") or 5)
+          deadline = time.time() + 420
+          while status in {"queued", "in_progress"} and time.time() < deadline:
+              time.sleep(8)
+              poll = client.get(f"{base}/videos/{video_api_id}", headers=headers)
+              poll.raise_for_status()
+              pdata = poll.json()
+              status = pdata.get("status", status)
+              progress = int(pdata.get("progress") or progress)
+              update_generated_video(job_id, local_id, status=status, progress=progress)
+
+          if status != "completed":
+              update_generated_video(job_id, local_id, status="failed", error=f"video status: {status}", progress=progress)
+              return
+
+          content = client.get(f"{base}/videos/{video_api_id}/content", headers=headers)
+          content.raise_for_status()
+          out_mp4.write_bytes(content.content)
+          update_generated_video(
+              job_id,
+              local_id,
+              status="completed",
+              progress=100,
+              url=f"/jobs/{job_id}/storyboard-videos/{local_id}.mp4",
+              error="",
+          )
+    except Exception as e:
+      update_generated_video(job_id, local_id, status="failed", error=str(e)[:500])
+
+
+@app.post("/jobs/{job_id}/frames/{idx}/storyboard/video", response_model=Job)
+def generate_storyboard_video(job_id: str, idx: int, req: GenerateStoryboardVideoReq, bg: BackgroundTasks) -> Job:
+    job = JOBS.get(job_id)
+    if not job:
+        raise HTTPException(404, "job not found")
+    frame = next((f for f in job.frames if f.index == idx), None)
+    if not frame:
+        raise HTTPException(404, "frame not found")
+    if not LLM_API_KEY:
+        raise HTTPException(500, "LLM_API_KEY 未配置，无法调用视频生成 API")
+    prompt = req.prompt.strip()
+    if not prompt:
+        raise HTTPException(400, "prompt required")
+
+    ref = req.product_image or req.subject_image or req.scene_image or req.action_image
+    ref_path = storyboard_ref_path(job_id, ref) or (job_dir(job_id) / "frames" / f"{idx:03d}.jpg")
+    if not ref_path.exists():
+        raise HTTPException(404, "reference image missing")
+    poster = storyboard_ref_url(job_id, ref) or f"/jobs/{job_id}/frames/{idx}.jpg"
+
+    local_id = uuid.uuid4().hex[:12]
+    model = req.model.strip() or VIDEO_MODEL
+    seconds = video_seconds(float(req.duration or 4))
+    item = GeneratedVideo(
+        id=local_id,
+        provider_id="",
+        frame_idx=idx,
+        prompt=prompt,
+        model=model,
+        status="queued",
+        url="",
+        poster_url=poster,
+        duration=float(seconds),
+        progress=0,
+        created_at=time.time(),
+    )
+    update(job, generated_videos=[item] + job.generated_videos, message=f"视频生成已提交 · 分镜 {idx + 1}")
+    bg.add_task(render_storyboard_video, job_id, local_id, "", ref_path, prompt, model, seconds, req.size)
+    return job
+
+
 @app.put("/jobs/{job_id}/frames/{idx}/storyboard", response_model=Job)
 def update_storyboard(job_id: str, idx: int, req: UpdateStoryboardReq) -> Job:
     """更新分镜的编排字段（subject / product / scene / action / duration / reference_ids）"""