From 989cc912ec2d4daa606f9436fa5be8ddd804d5d6 Mon Sep 17 00:00:00 2001 From: kang Date: Wed, 13 May 2026 20:23:53 +0800 Subject: [PATCH] auto-save 2026-05-13 20:23 (~2) --- .memory/worklog.json | 13 +++ api/main.py | 218 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 231 insertions(+) diff --git a/.memory/worklog.json b/.memory/worklog.json index 332f0ef..aeba63c 100644 --- a/.memory/worklog.json +++ b/.memory/worklog.json @@ -2295,6 +2295,19 @@ "message": "auto-save 2026-05-13 20:12 (~5)", "hash": "0b6a463", "files_changed": 5 + }, + { + "ts": "2026-05-13T20:18:24+08:00", + "type": "commit", + "message": "auto-save 2026-05-13 20:18 (~4)", + "hash": "40a665a", + "files_changed": 4 + }, + { + "ts": "2026-05-13T12:19:30Z", + "type": "session-heartbeat", + "message": "Codex 会话活跃 · 最近命令:codex · 2 项未提交变更 · 最近提交:auto-save 2026-05-13 20:18 (~4)", + "files_changed": 2 } ] } diff --git a/api/main.py b/api/main.py index ed89673..1f61d4b 100644 --- a/api/main.py +++ b/api/main.py @@ -5,6 +5,7 @@ import json import os import shutil import subprocess +import time import uuid from contextlib import asynccontextmanager from pathlib import Path @@ -29,6 +30,13 @@ TRANSLATE_MODEL = os.getenv("TRANSLATE_MODEL", "gemini-2.5-flash") REWRITE_MODEL = os.getenv("REWRITE_MODEL", "gemini-2.5-pro") VISION_MODEL = os.getenv("VISION_MODEL", "gemini-2.5-flash") IMAGE_MODEL = os.getenv("IMAGE_MODEL", "gemini-3-pro-image-preview") +VIDEO_MODEL = os.getenv("VIDEO_MODEL", "seedance").strip() or "seedance" +VIDEO_MODEL_ALIASES = { + "seedance": os.getenv("VIDEO_MODEL_SEEDANCE", "seedance").strip() or "seedance", + "kling": os.getenv("VIDEO_MODEL_KLING", "kling").strip() or "kling", + "veo3": os.getenv("VIDEO_MODEL_VEO3", "veo3").strip() or "veo3", +} +VIDEO_DURATION_FIELD = os.getenv("VIDEO_DURATION_FIELD", "seconds").strip() or "seconds" # OpenAI 客户端(OpenAI 兼容网关,含 SKG ezlink) from openai import OpenAI @@ -63,6 +71,21 @@ class GeneratedImage(BaseModel): created_at: float = 0.0 +class GeneratedVideo(BaseModel): + id: str + provider_id: str = "" + frame_idx: int + prompt: str + model: str = "" + status: Literal["queued", "in_progress", "completed", "failed"] = "queued" + url: str = "" + poster_url: str = "" + duration: float = 4.0 + progress: int = 0 + error: str = "" + created_at: float = 0.0 + + class StoryboardScene(BaseModel): """分镜头编排:每个 selected 分镜对应一个 scene 描述 v2: 4 图槽 + 时长(复制粘贴模式)— 主体 / 场景 / 产品 / 动作 各一张图 @@ -141,6 +164,7 @@ class Job(BaseModel): frames: list[KeyFrame] = Field(default_factory=list) transcript: list[TranscriptSegment] = Field(default_factory=list) storyboard_images: list[StoryboardImage] = Field(default_factory=list) + generated_videos: list[GeneratedVideo] = Field(default_factory=list) error: str = "" @@ -163,6 +187,79 @@ def update(job: Job, **kw) -> None: save_state(job) +def public_api_base() -> str: + return (LLM_BASE_URL or "https://api.openai.com/v1").rstrip("/") + + +def storyboard_ref_path(job_id: str, ref: dict | None) -> Path | None: + if not ref: + return None + try: + kind = ref.get("kind") + frame_idx = int(ref.get("frame_idx")) + except Exception: + return None + if kind == "keyframe": + p = job_dir(job_id) / "frames" / f"{frame_idx:03d}.jpg" + return p if p.exists() else None + if kind == "cutout": + element_id = (ref.get("element_id") or "").strip() + cutout_id = (ref.get("cutout_id") or "").strip() + if not element_id: + return None + candidates = [] + if cutout_id and cutout_id != element_id: + candidates.append(job_dir(job_id) / "elements" / f"{frame_idx:03d}_{element_id}_{cutout_id}.jpg") + candidates.append(job_dir(job_id) / "elements" / f"{frame_idx:03d}_{element_id}.jpg") + candidates.append(job_dir(job_id) / "elements" / f"{frame_idx:03d}_{element_id}.png") + for p in candidates: + if p.exists(): + return p + return None + + +def storyboard_ref_url(job_id: str, ref: dict | None) -> str: + if not ref: + return "" + kind = ref.get("kind") + frame_idx = ref.get("frame_idx") + if kind == "keyframe" and frame_idx is not None: + return f"/jobs/{job_id}/frames/{int(frame_idx)}.jpg" + if kind == "cutout" and frame_idx is not None and ref.get("element_id"): + element_id = ref.get("element_id") + cutout_id = ref.get("cutout_id") + if cutout_id and cutout_id != element_id: + return f"/jobs/{job_id}/frames/{int(frame_idx)}/elements/{element_id}/cutouts/{cutout_id}.jpg" + return f"/jobs/{job_id}/frames/{int(frame_idx)}/elements/{element_id}/cutout.jpg" + return "" + + +def prepare_video_reference(src: Path, dst: Path, size: tuple[int, int] = (720, 1280)) -> None: + dst.parent.mkdir(parents=True, exist_ok=True) + img = Image.open(src).convert("RGB") + img.thumbnail(size, Image.Resampling.LANCZOS) + canvas = Image.new("RGB", size, (8, 8, 10)) + x = (size[0] - img.width) // 2 + y = (size[1] - img.height) // 2 + canvas.paste(img, (x, y)) + canvas.save(dst, "JPEG", quality=94) + + +def update_generated_video(job_id: str, video_id: str, **kw) -> None: + job = JOBS.get(job_id) + if not job: + return + updated = [] + for v in job.generated_videos: + if v.id == video_id: + data = v.model_dump() + data.update(kw) + updated.append(GeneratedVideo(**data)) + else: + updated.append(v) + update(job, generated_videos=updated) + + @asynccontextmanager async def lifespan(_: FastAPI): # 启动时从磁盘恢复 jobs(简化版:只列目录) @@ -1468,6 +1565,127 @@ class UpdateStoryboardReq(BaseModel): reference_ids: list[str] = [] +class GenerateStoryboardVideoReq(BaseModel): + prompt: str + duration: float = 4 + subject_image: dict | None = None + scene_image: dict | None = None + product_image: dict | None = None + action_image: dict | None = None + model: str = "" + size: str = "720x1280" + + +def video_seconds(duration: float) -> str: + if duration <= 6: + return "4" + if duration <= 10: + return "8" + return "12" + + +def render_storyboard_video(job_id: str, local_id: str, provider_id: str, ref_path: Path, prompt: str, model: str, seconds: str, size: str) -> None: + import httpx + + out_dir = job_dir(job_id) / "storyboard_videos" / local_id + ref_img = out_dir / "reference.jpg" + out_mp4 = out_dir / "video.mp4" + base = public_api_base() + headers = {"Authorization": f"Bearer {LLM_API_KEY}"} + + try: + prepare_video_reference(ref_path, ref_img) + update_generated_video(job_id, local_id, status="in_progress", progress=5) + with httpx.Client(timeout=120) as client: + with ref_img.open("rb") as fh: + create = client.post( + f"{base}/videos", + headers=headers, + data={ + "model": model, + "prompt": prompt, + "seconds": seconds, + "size": size, + }, + files={"input_reference": ("reference.jpg", fh, "image/jpeg")}, + ) + create.raise_for_status() + data = create.json() + video_api_id = data.get("id") or provider_id + update_generated_video(job_id, local_id, provider_id=video_api_id, status=data.get("status", "queued"), progress=int(data.get("progress") or 5)) + + status = data.get("status", "queued") + progress = int(data.get("progress") or 5) + deadline = time.time() + 420 + while status in {"queued", "in_progress"} and time.time() < deadline: + time.sleep(8) + poll = client.get(f"{base}/videos/{video_api_id}", headers=headers) + poll.raise_for_status() + pdata = poll.json() + status = pdata.get("status", status) + progress = int(pdata.get("progress") or progress) + update_generated_video(job_id, local_id, status=status, progress=progress) + + if status != "completed": + update_generated_video(job_id, local_id, status="failed", error=f"video status: {status}", progress=progress) + return + + content = client.get(f"{base}/videos/{video_api_id}/content", headers=headers) + content.raise_for_status() + out_mp4.write_bytes(content.content) + update_generated_video( + job_id, + local_id, + status="completed", + progress=100, + url=f"/jobs/{job_id}/storyboard-videos/{local_id}.mp4", + error="", + ) + except Exception as e: + update_generated_video(job_id, local_id, status="failed", error=str(e)[:500]) + + +@app.post("/jobs/{job_id}/frames/{idx}/storyboard/video", response_model=Job) +def generate_storyboard_video(job_id: str, idx: int, req: GenerateStoryboardVideoReq, bg: BackgroundTasks) -> Job: + job = JOBS.get(job_id) + if not job: + raise HTTPException(404, "job not found") + frame = next((f for f in job.frames if f.index == idx), None) + if not frame: + raise HTTPException(404, "frame not found") + if not LLM_API_KEY: + raise HTTPException(500, "LLM_API_KEY 未配置,无法调用视频生成 API") + prompt = req.prompt.strip() + if not prompt: + raise HTTPException(400, "prompt required") + + ref = req.product_image or req.subject_image or req.scene_image or req.action_image + ref_path = storyboard_ref_path(job_id, ref) or (job_dir(job_id) / "frames" / f"{idx:03d}.jpg") + if not ref_path.exists(): + raise HTTPException(404, "reference image missing") + poster = storyboard_ref_url(job_id, ref) or f"/jobs/{job_id}/frames/{idx}.jpg" + + local_id = uuid.uuid4().hex[:12] + model = req.model.strip() or VIDEO_MODEL + seconds = video_seconds(float(req.duration or 4)) + item = GeneratedVideo( + id=local_id, + provider_id="", + frame_idx=idx, + prompt=prompt, + model=model, + status="queued", + url="", + poster_url=poster, + duration=float(seconds), + progress=0, + created_at=time.time(), + ) + update(job, generated_videos=[item] + job.generated_videos, message=f"视频生成已提交 · 分镜 {idx + 1}") + bg.add_task(render_storyboard_video, job_id, local_id, "", ref_path, prompt, model, seconds, req.size) + return job + + @app.put("/jobs/{job_id}/frames/{idx}/storyboard", response_model=Job) def update_storyboard(job_id: str, idx: int, req: UpdateStoryboardReq) -> Job: """更新分镜的编排字段(subject / product / scene / action / duration / reference_ids)"""