From efe984bb025fc2ccec3c30a08322398cdb86735f Mon Sep 17 00:00:00 2001 From: kang Date: Wed, 13 May 2026 21:07:56 +0800 Subject: [PATCH] auto-save 2026-05-13 21:07 (~4) --- .memory/worklog.json | 7 +++ api/.env.example | 21 ++++++-- api/main.py | 103 ++++++++++++++++++++++++++++++++++---- docs/source-analysis.html | 26 +++++++++- 4 files changed, 141 insertions(+), 16 deletions(-) diff --git a/.memory/worklog.json b/.memory/worklog.json index 0afe6ff..3d285a1 100644 --- a/.memory/worklog.json +++ b/.memory/worklog.json @@ -2381,6 +2381,13 @@ "type": "session-heartbeat", "message": "Codex 会话活跃 · 最近命令:codex · 1 项未提交变更 · 最近提交:auto-save 2026-05-13 20:56 (~7)", "files_changed": 1 + }, + { + "ts": "2026-05-13T21:02:26+08:00", + "type": "commit", + "message": "auto-save 2026-05-13 21:02 (~2)", + "hash": "21c5a2b", + "files_changed": 2 } ] } diff --git a/api/.env.example b/api/.env.example index 345c97c..b7feb73 100644 --- a/api/.env.example +++ b/api/.env.example @@ -8,13 +8,26 @@ TRANSLATE_MODEL=gemini-2.5-flash REWRITE_MODEL=gemini-2.5-pro IMAGE_MODEL=gemini-3-pro-image-preview VIDEO_MODEL=seedance -VIDEO_MODEL_SEEDANCE=seedance -VIDEO_MODEL_KLING=kling -VIDEO_MODEL_VEO3=veo3 +VIDEO_MODEL_SEEDANCE=seedance-2-fast +VIDEO_MODEL_KLING=kling-omni +VIDEO_MODEL_VEO3=veo-3.1-fast + +# Poe 视频 API(优先用于 Seedance / Kling / Veo) +POE_API_BASE_URL=https://api.poe.com/v1 +POE_API_KEY= + +# 火山方舟 Seedance 视频 API 可直接覆盖这里: +# VIDEO_API_BASE_URL=https://ark.cn-beijing.volces.com/api/v3 +# VIDEO_API_KEY= +# VIDEO_MODEL_SEEDANCE=doubao-seedance-1-0-pro-fast-250528 +# VIDEO_CREATE_PATHS=/contents/generations/tasks +# VIDEO_STATUS_PATH=/contents/generations/tasks/{id} +# +# 自定义视频网关覆盖;留空时如配置 POE_API_KEY,则走 Poe。 VIDEO_API_BASE_URL= VIDEO_API_KEY= VIDEO_CREATE_PATH=/videos -VIDEO_CREATE_PATHS=/videos,/videos/generations,/video/generations +VIDEO_CREATE_PATHS=/videos VIDEO_STATUS_PATH=/videos/{id} VIDEO_CONTENT_PATH=/videos/{id}/content VIDEO_DURATION_FIELD=seconds diff --git a/api/main.py b/api/main.py index 09654af..e563c2a 100644 --- a/api/main.py +++ b/api/main.py @@ -220,12 +220,30 @@ def public_api_base() -> str: return (LLM_BASE_URL or "https://api.openai.com/v1").rstrip("/") +def video_uses_poe() -> bool: + if VIDEO_API_BASE_URL: + return VIDEO_API_BASE_URL.rstrip("/") == POE_API_BASE_URL.rstrip("/") + return bool(POE_API_KEY) + + +def video_uses_ark() -> bool: + return "ark.cn-beijing.volces.com" in video_api_base() + + def video_api_base() -> str: - return (VIDEO_API_BASE_URL or LLM_BASE_URL or "https://api.openai.com/v1").rstrip("/") + if VIDEO_API_BASE_URL: + return VIDEO_API_BASE_URL.rstrip("/") + if POE_API_KEY: + return POE_API_BASE_URL.rstrip("/") + return (LLM_BASE_URL or "https://api.openai.com/v1").rstrip("/") def video_api_key() -> str: - return VIDEO_API_KEY or LLM_API_KEY + if VIDEO_API_KEY: + return VIDEO_API_KEY + if video_uses_poe(): + return POE_API_KEY + return LLM_API_KEY def video_path(template: str, **values: str) -> str: @@ -235,7 +253,7 @@ def video_path(template: str, **values: str) -> str: def ensure_video_api_configured() -> None: if not video_api_key(): - raise HTTPException(503, "VIDEO_API_KEY 或 LLM_API_KEY 未配置,无法调用生视频 API") + raise HTTPException(503, "POE_API_KEY、VIDEO_API_KEY 或 LLM_API_KEY 未配置,无法调用生视频 API") def storyboard_ref_path(job_id: str, ref: dict | None) -> Path | None: @@ -805,7 +823,8 @@ def health() -> dict: "rewrite": REWRITE_MODEL, "video": VIDEO_MODEL, "video_aliases": VIDEO_MODEL_ALIASES, - "video_base_url": video_api_base() if VIDEO_API_BASE_URL else "", + "video_provider": "poe" if video_uses_poe() else ("ark" if video_uses_ark() else "custom"), + "video_base_url": video_api_base(), "video_configured": bool(video_api_key()), "video_create_paths": VIDEO_CREATE_PATHS, }, @@ -1629,6 +1648,10 @@ class GenerateStoryboardVideoReq(BaseModel): def video_seconds(duration: float) -> str: + if video_uses_ark(): + if duration <= 0: + return "5" + return str(max(4, min(15, round(duration)))) if duration <= 6: return "4" if duration <= 10: @@ -1683,6 +1706,12 @@ def video_url_from_response(data: dict) -> str: v = output.get(key) if isinstance(v, str) and v: return v + content = data.get("content") + if isinstance(content, dict): + for key in ("video_url", "url", "download_url", "file_url"): + v = content.get(key) + if isinstance(v, str) and v: + return v return "" @@ -1696,6 +1725,64 @@ def download_generated_video(client, base: str, headers: dict, provider_id: str, out_mp4.write_bytes(r.content) +def size_to_video_ratio(size: str) -> str: + try: + w, h = [int(x) for x in size.lower().replace(" ", "").split("x", 1)] + except Exception: + return "9:16" + if w <= 0 or h <= 0: + return "9:16" + ratio = w / h + known = { + "16:9": 16 / 9, + "9:16": 9 / 16, + "1:1": 1, + "4:3": 4 / 3, + "3:4": 3 / 4, + "21:9": 21 / 9, + } + return min(known, key=lambda key: abs(known[key] - ratio)) + + +def ark_reference_data_url(ref_img: Path) -> str: + mime = "image/png" if ref_img.suffix.lower() == ".png" else "image/jpeg" + return f"data:{mime};base64,{base64.b64encode(ref_img.read_bytes()).decode('ascii')}" + + +def submit_video_create(client, url: str, headers: dict, ref_img: Path, payload: dict): + if video_uses_ark(): + data = { + "model": payload["model"], + "content": [ + {"type": "text", "text": payload["prompt"]}, + { + "type": "image_url", + "image_url": {"url": ark_reference_data_url(ref_img)}, + "role": "first_frame", + }, + ], + "ratio": size_to_video_ratio(str(payload.get("size", ""))), + "duration": int(float(str(payload.get(VIDEO_DURATION_FIELD, 5)))), + "watermark": False, + "resolution": "720p", + } + return client.post(url, headers={**headers, "Content-Type": "application/json"}, json=data) + + if video_uses_poe(): + data = dict(payload) + data[VIDEO_DURATION_FIELD] = int(float(str(data.get(VIDEO_DURATION_FIELD, 4)))) + data["input_image"] = base64.b64encode(ref_img.read_bytes()).decode("ascii") + return client.post(url, headers=headers, json=data) + + with ref_img.open("rb") as fh: + return client.post( + url, + headers=headers, + data=payload, + files={"input_reference": ("reference.jpg", fh, "image/jpeg")}, + ) + + def render_storyboard_video(job_id: str, local_id: str, provider_id: str, ref_path: Path, prompt: str, model: str, seconds: str, size: str) -> None: import httpx @@ -1714,13 +1801,7 @@ def render_storyboard_video(job_id: str, local_id: str, provider_id: str, ref_pa create = None create_errors: list[str] = [] for create_path in VIDEO_CREATE_PATHS: - with ref_img.open("rb") as fh: - resp = client.post( - f"{base}{video_path(create_path)}", - headers=headers, - data=payload, - files={"input_reference": ("reference.jpg", fh, "image/jpeg")}, - ) + resp = submit_video_create(client, f"{base}{video_path(create_path)}", headers, ref_img, payload) if resp.status_code < 400: create = resp break diff --git a/docs/source-analysis.html b/docs/source-analysis.html index e931182..21c1eab 100644 --- a/docs/source-analysis.html +++ b/docs/source-analysis.html @@ -790,7 +790,7 @@ api/main.py
  • ASR:SKG 网关 audio endpoint 404 或渠道不可用。
  • Translate:本身 text 通,但产品流里依赖 ASR 段落。
  • Rewrite:需要 SKG 产品信息模板和目标脚本结构。
  • -
  • Video Gen:模型层按业务保留 Seedance / Kling / Veo/Voe 选择;网关调用层通过 VIDEO_CREATE_PATHS 多入口尝试,当前常见入口实测返回 404/unsupported,若平台后台有其它入口要直接配置到该变量。
  • +
  • Video Gen:模型层按业务保留 Seedance / Kling / Veo/Voe 选择;后端已支持 Poe 视频通道,别名默认映射到 seedance-2-fastkling-omniveo-3.1-fast,提交后写入 Video Gen 节点。
  • Compose:还没做本地 ffmpeg 字幕/TTS 合成。
  • @@ -830,6 +830,30 @@ api/main.py

    变更记录

    这个记录不是 git log 的替代品。它记录“产品理解发生了什么变化、影响了哪些源码、你以后描述需求时该怎么说”。后续每次改功能都要补一条。

    +
    +
    +

    2026-05-13 · 生视频支持火山方舟 Ark 异步任务

    + VideoGenNode + API +
    +
    +

    问题:用户提供火山方舟 https://ark.cn-beijing.volces.com/api/v3 作为生视频通道;这个通道不是 Poe 的 /videos 形态,而是内容生成异步任务。

    +

    改动:后端识别 Ark base 后,提交改为 POST /contents/generations/tasks,请求体使用 content 数组:文本 prompt + 首帧 image_url data URL;轮询改为 GET /contents/generations/tasks/{id},成功后读取 content.video_url 下载 MP4。

    +

    影响:api/main.pyapi/.env.exampledocs/source-analysis.html。本机 api/.env 需要把 VIDEO_API_BASE_URL/VIDEO_API_KEY/VIDEO_CREATE_PATHS/VIDEO_STATUS_PATH 指向 Ark。

    +
    +
    +
    +
    +

    2026-05-13 · 生视频改接 Poe 视频模型

    + VideoGenNode + API +
    +
    +

    问题:SKG ezlink 的 OpenAI 兼容 base 可列出部分模型,但常规 /videos 入口返回 404/unsupported;用户确认可用的视频模型在 Poe 通道里。

    +

    改动:后端新增 POE_API_BASE_URL/POE_API_KEY 配置,未显式配置 VIDEO_API_BASE_URL 时优先走 Poe;Seedance / Kling / Veo/Voe 业务别名默认映射到 Poe 真实模型 seedance-2-fastkling-omniveo-3.1-fast。Poe 提交使用 input_image base64,继续轮询 /videos/{id} 并下载 /videos/{id}/content

    +

    影响:api/main.pyapi/.env.exampledocs/source-analysis.html。密钥只放本地 api/.env,不进入源码解析页。

    +
    +

    2026-05-13 · 生视频提交不再被前端锁死