diff --git a/RULES.md b/RULES.md
index b4e43c5..90cbdcb 100644
--- a/RULES.md
+++ b/RULES.md
@@ -163,7 +163,8 @@
- `VOICE_PROVIDER`:配音通道,服务端固定使用 `azure_openai`;旧环境若写 `minimax` 会被忽略
- `AZURE_OPENAI_BASE_URL` / `AZURE_OPENAI_API_KEY`:微软 Azure OpenAI 协议配音网关;本地未单独配置 Key 时回退复用 `LLM_API_KEY`
- `AZURE_TTS_MODEL` / `AZURE_TTS_VOICE_ID` / `AZURE_TTS_VOICE_POOL` / `AZURE_TTS_PATH` / `AZURE_TTS_PATHS`:Azure OpenAI TTS 模型、默认音色、音色池和 OpenAI 协议语音路径;后端会按 `AZURE_TTS_PATHS` 依次尝试,便于区分路径不对和整条语音服务不可用
-- `POE_API_KEY` / `VIDEO_API_KEY`:视频生成通道 Key,只能放本地环境变量
+- `POE_API_KEY` / `VIDEO_API_KEY`:默认视频生成通道 Key,只能放本地环境变量
+- `XAI_VIDEO_API_BASE_URL` / `XAI_VIDEO_API_KEY` / `VIDEO_MODEL_XAI`:xAI / Grok Imagine Video 独立视频通道;默认 base 为 `https://ai.skg.com/ezlink/xai`,模型为 `grok-imagine-video`,真实 key 只放本地 `api/.env`、本地 Docker `deploy/.env.local` 或服务器 `deploy/.env.production`,不入库。未配置 `XAI_VIDEO_API_KEY` 时 `/health` 会标记 xAI 视频不可用,画布不显示该模型。
- `PASSWORD_AUTH_ENABLED`:生产密码登录总开关;当前固定为 `false`,只允许飞书免登录。若应急恢复密码入口,必须显式改成 `true` 并重启 API。
- `WEB_AUTH_USERNAME` / `WEB_AUTH_PASSWORD` / `WEB_AUTH_SESSION_SECRET`:生产备用网页登录和会话签名配置;密码和 session secret 只放服务器环境变量,不入库。当前密码入口被 `PASSWORD_AUTH_ENABLED=false` 禁用;即使只开飞书免登录,也必须配置 `WEB_AUTH_SESSION_SECRET` 用于签名会话 Cookie。
- `FEISHU_APP_ID` / `FEISHU_APP_SECRET`:飞书免登录 OAuth 应用凭证;只放服务器 `deploy/.env.production` 或本地 `api/.env`,不入库。
diff --git a/api/.env.example b/api/.env.example
index cfe04c5..92e40e2 100644
--- a/api/.env.example
+++ b/api/.env.example
@@ -61,6 +61,7 @@ YTDLP_COOKIES_FILE=
YTDLP_COOKIES_FROM_BROWSER=
VIDEO_MODEL=seedance
VIDEO_MODEL_SEEDANCE=seedance-2-fast
+VIDEO_MODEL_XAI=grok-imagine-video
VIDEO_MODEL_KLING=kling-omni
VIDEO_MODEL_VEO3=veo-3.1-fast
@@ -96,6 +97,13 @@ POE_API_KEY=
# VIDEO_STATUS_PATH=/api/v3/contents/generations/tasks/{id}
# VIDEO_CONTENT_PATH=/api/v3/contents/generations/tasks/{id}/content
#
+# SKG xAI/Grok Imagine 视频网关。真实 key 只填本地/服务器私有 .env。
+XAI_VIDEO_API_BASE_URL=https://ai.skg.com/ezlink/xai
+XAI_VIDEO_API_KEY=
+XAI_VIDEO_CREATE_PATH=/v1/videos/generations
+XAI_VIDEO_STATUS_PATH=/v1/videos/{id}
+XAI_VIDEO_CONTENT_PATH=
+#
# 自定义视频网关覆盖;留空时如配置 POE_API_KEY,则走 Poe。
VIDEO_API_BASE_URL=
VIDEO_API_KEY=
diff --git a/api/main.py b/api/main.py
index 4c3312b..e71910d 100644
--- a/api/main.py
+++ b/api/main.py
@@ -350,9 +350,31 @@ VIDEO_MODEL_ALIASES = {
"veo3": env_video_model("VIDEO_MODEL_VEO3", "veo-3.1-fast"),
"veo": env_video_model("VIDEO_MODEL_VEO3", "veo-3.1-fast"),
"voe": env_video_model("VIDEO_MODEL_VEO3", "veo-3.1-fast"),
+ "grok_imagine_video": env_video_model("VIDEO_MODEL_XAI", "grok-imagine-video"),
+ "grok-imagine-video": env_video_model("VIDEO_MODEL_XAI", "grok-imagine-video"),
+ "xai": env_video_model("VIDEO_MODEL_XAI", "grok-imagine-video"),
}
VIDEO_API_BASE_URL = os.getenv("VIDEO_API_BASE_URL", "").strip()
VIDEO_API_KEY = os.getenv("VIDEO_API_KEY", "").strip()
+_VIDEO_XAI_BASE_DEFAULT = (
+ VIDEO_API_BASE_URL
+ if "xai" in VIDEO_API_BASE_URL.lower()
+ else "https://ai.skg.com/ezlink/xai"
+)
+XAI_VIDEO_API_BASE_URL = (
+ os.getenv("XAI_VIDEO_API_BASE_URL")
+ or os.getenv("XAI_GATEWAY_BASE")
+ or _VIDEO_XAI_BASE_DEFAULT
+).strip().rstrip("/")
+XAI_VIDEO_API_KEY = (
+ os.getenv("XAI_VIDEO_API_KEY")
+ or os.getenv("XAI_GATEWAY_KEY")
+ or (VIDEO_API_KEY if "xai" in VIDEO_API_BASE_URL.lower() else "")
+).strip()
+XAI_VIDEO_MODEL = VIDEO_MODEL_ALIASES["xai"]
+XAI_VIDEO_CREATE_PATH = os.getenv("XAI_VIDEO_CREATE_PATH", "/v1/videos/generations").strip() or "/v1/videos/generations"
+XAI_VIDEO_STATUS_PATH = os.getenv("XAI_VIDEO_STATUS_PATH", "/v1/videos/{id}").strip() or "/v1/videos/{id}"
+XAI_VIDEO_CONTENT_PATH = os.getenv("XAI_VIDEO_CONTENT_PATH", "").strip()
WEB_AUTH_USERNAME = os.getenv("WEB_AUTH_USERNAME", "").strip()
WEB_AUTH_PASSWORD = os.getenv("WEB_AUTH_PASSWORD", "").strip()
WEB_AUTH_SESSION_SECRET = os.getenv("WEB_AUTH_SESSION_SECRET", "").strip()
@@ -389,6 +411,12 @@ WEB_AUTH_CONFIGURED = bool(PASSWORD_AUTH_CONFIGURED or FEISHU_AUTH_CONFIGURED)
def default_video_gateway_paths(base_url: str) -> tuple[str, str, str]:
base = base_url.strip().rstrip("/").lower()
+ if "api.x.ai" in base or "/ezlink/xai" in base:
+ return (
+ "/v1/videos/generations",
+ "/v1/videos/{id}",
+ "",
+ )
if "ai.skg.com/doubao" in base:
return (
"/api/v3/contents/generations/tasks",
@@ -1446,13 +1474,30 @@ def video_uses_poe() -> bool:
return bool(POE_API_KEY)
-def video_uses_ark() -> bool:
- base = video_api_base()
+def is_xai_video_model(model: str | None) -> bool:
+ value = (model or "").strip().lower()
+ if not value:
+ value = (VIDEO_MODEL or "").strip().lower()
+ resolved = VIDEO_MODEL_ALIASES.get(value, value).strip().lower()
+ xai_model = (XAI_VIDEO_MODEL or "grok-imagine-video").strip().lower()
+ return resolved == xai_model or resolved.startswith("grok-imagine-video")
+
+
+def video_uses_xai(model: str | None = None) -> bool:
+ return is_xai_video_model(model) or "api.x.ai" in video_api_base(model).lower() or "/ezlink/xai" in video_api_base(model).lower()
+
+
+def video_uses_ark(model: str | None = None) -> bool:
+ if video_uses_xai(model):
+ return False
+ base = video_api_base(model)
return "ark.cn-beijing.volces.com" in base or "ai.skg.com/doubao" in base
-def video_provider_name() -> str:
- base = video_api_base()
+def video_provider_name(model: str | None = None) -> str:
+ base = video_api_base(model)
+ if video_uses_xai(model):
+ return "xai"
if video_uses_poe():
return "poe"
if "ai.skg.com/doubao" in base:
@@ -1462,7 +1507,9 @@ def video_provider_name() -> str:
return "custom"
-def video_api_base() -> str:
+def video_api_base(model: str | None = None) -> str:
+ if is_xai_video_model(model):
+ return XAI_VIDEO_API_BASE_URL.rstrip("/")
if VIDEO_API_BASE_URL:
return VIDEO_API_BASE_URL.rstrip("/")
if POE_API_KEY:
@@ -1470,7 +1517,13 @@ def video_api_base() -> str:
return (LLM_BASE_URL or "https://api.openai.com/v1").rstrip("/")
-def video_api_key() -> str:
+def video_api_key(model: str | None = None) -> str:
+ if is_xai_video_model(model):
+ if XAI_VIDEO_API_KEY:
+ return XAI_VIDEO_API_KEY
+ if "xai" in VIDEO_API_BASE_URL.lower() and VIDEO_API_KEY:
+ return VIDEO_API_KEY
+ return ""
if VIDEO_API_KEY:
return VIDEO_API_KEY
if video_uses_poe():
@@ -1478,14 +1531,26 @@ def video_api_key() -> str:
return LLM_API_KEY
+def video_create_paths(model: str | None = None) -> list[str]:
+ return [XAI_VIDEO_CREATE_PATH] if video_uses_xai(model) else VIDEO_CREATE_PATHS
+
+
+def video_status_path(model: str | None = None) -> str:
+ return XAI_VIDEO_STATUS_PATH if video_uses_xai(model) else VIDEO_STATUS_PATH
+
+
+def video_content_path(model: str | None = None) -> str:
+ return XAI_VIDEO_CONTENT_PATH if video_uses_xai(model) else VIDEO_CONTENT_PATH
+
+
def video_path(template: str, **values: str) -> str:
path = template.format(**values)
return path if path.startswith("/") else f"/{path}"
-def ensure_video_api_configured() -> None:
- if not video_api_key():
- raise HTTPException(503, "POE_API_KEY、VIDEO_API_KEY 或 LLM_API_KEY 未配置,无法调用生视频 API")
+def ensure_video_api_configured(model: str | None = None) -> None:
+ if not video_api_key(model):
+ raise HTTPException(503, "POE_API_KEY、VIDEO_API_KEY、XAI_VIDEO_API_KEY 或 LLM_API_KEY 未配置,无法调用生视频 API")
def storyboard_ref_path(job_id: str, ref: dict | None) -> Path | None:
@@ -4973,13 +5038,16 @@ def _image_size_payload(raw: str | None, model: str | None = None) -> dict:
return {} if size == "auto" else {"size": size}
-def video_duration_options() -> list[int]:
- if video_uses_ark():
+def video_duration_options(model: str | None = None) -> list[int]:
+ if video_uses_ark(model) or video_uses_xai(model):
return [5, 8, 10, 12, 15]
return [4, 8, 12]
-def video_size_options() -> list[dict]:
+def video_size_options(model: str | None = None) -> list[dict]:
+ if video_uses_xai(model):
+ allowed = {"720x1280", "1280x720", "1024x1024"}
+ return [item for item in VIDEO_SIZE_CHOICES if str(item["value"]) in allowed]
return VIDEO_SIZE_CHOICES
@@ -4992,7 +5060,9 @@ def _video_resolution_choice(value: str) -> dict:
def _video_resolution_values_for_model(model: str | None) -> list[str]:
concrete = (model or "").strip().lower()
- if video_uses_ark():
+ if video_uses_xai(concrete):
+ return ["480p", "720p"]
+ if video_uses_ark(concrete):
if "seedance-2-0-fast" in concrete:
return ["480p", "720p"]
if "seedance-2-0" in concrete or "seedance-1-5-pro" in concrete or "seedance-1-0-pro" in concrete:
@@ -5029,7 +5099,7 @@ def _normalize_video_resolution(raw: str | None, model: str | None = None) -> st
return value
-def _normalize_video_size(raw: str | None) -> str:
+def _normalize_video_size(raw: str | None, model: str | None = None) -> str:
value = (raw or "720x1280").strip().lower().replace(" ", "")
aliases = {
"vertical": "720x1280",
@@ -5046,7 +5116,7 @@ def _normalize_video_size(raw: str | None) -> str:
"3:4": "960x1280",
}
value = aliases.get(value, value)
- allowed = {str(item["value"]) for item in VIDEO_SIZE_CHOICES}
+ allowed = {str(item["value"]) for item in video_size_options(model)}
if value not in allowed:
raise HTTPException(400, f"unsupported video size: {raw}")
return value
@@ -5060,14 +5130,18 @@ def video_model_options() -> list[dict]:
"veo3": "Veo 3",
"veo": "Veo",
"voe": "Veo",
+ "xai": "Grok Imagine Video",
+ "grok_imagine_video": "Grok Imagine Video",
+ "grok-imagine-video": "Grok Imagine Video",
}
concrete_label_map = {
"doubao-seedance-2-0-fast-260128": "Seedance 2.0 Fast",
"doubao-seedance-2-0-260128": "Seedance 2.0 高清",
+ "grok-imagine-video": "Grok Imagine Video",
}
seen_models: set[str] = set()
options: list[dict] = []
- for key in ["seedance", "seedance_hd", "kling", "veo3", "veo"]:
+ for key in ["seedance", "seedance_hd", "xai", "kling", "veo3", "veo"]:
if key not in VIDEO_MODEL_ALIASES:
continue
model = VIDEO_MODEL_ALIASES[key]
@@ -5078,13 +5152,14 @@ def video_model_options() -> list[dict]:
"id": key,
"label": concrete_label_map.get(model, label_map.get(key, key)),
"model": model,
- "description": f"当前视频网关可选模型;单次时长最高 {max(video_duration_options())} 秒",
- "duration_options": video_duration_options(),
- "size_options": video_size_options(),
+ "provider": video_provider_name(model),
+ "description": f"当前视频网关可选模型;单次时长最高 {max(video_duration_options(model))} 秒",
+ "duration_options": video_duration_options(model),
+ "size_options": video_size_options(model),
"resolution_options": video_resolution_options(model),
"default_resolution": default_video_resolution(model),
- "max_duration_seconds": max(video_duration_options()),
- "available": bool(video_api_key()),
+ "max_duration_seconds": max(video_duration_options(model)),
+ "available": bool(video_api_key(model)),
})
default_model = resolve_video_model(VIDEO_MODEL)
if not any(item["id"] == VIDEO_MODEL or item["model"] == default_model for item in options):
@@ -5092,13 +5167,14 @@ def video_model_options() -> list[dict]:
"id": VIDEO_MODEL,
"label": label_map.get(VIDEO_MODEL, VIDEO_MODEL),
"model": default_model,
+ "provider": video_provider_name(default_model),
"description": "默认视频模型",
- "duration_options": video_duration_options(),
- "size_options": video_size_options(),
+ "duration_options": video_duration_options(default_model),
+ "size_options": video_size_options(default_model),
"resolution_options": video_resolution_options(default_model),
"default_resolution": default_video_resolution(default_model),
- "max_duration_seconds": max(video_duration_options()),
- "available": bool(video_api_key()),
+ "max_duration_seconds": max(video_duration_options(default_model)),
+ "available": bool(video_api_key(default_model)),
})
return options
@@ -6585,6 +6661,10 @@ def health() -> dict:
"video_base_url": video_api_base(),
"video_configured": bool(video_api_key()),
"video_create_paths": VIDEO_CREATE_PATHS,
+ "xai_video_model": XAI_VIDEO_MODEL,
+ "xai_video_base_url": XAI_VIDEO_API_BASE_URL,
+ "xai_video_configured": bool(video_api_key(XAI_VIDEO_MODEL)),
+ "xai_video_create_path": XAI_VIDEO_CREATE_PATH,
},
}
@@ -8832,8 +8912,8 @@ class ProductFusionDescriptionReq(BaseModel):
shots: list[ProductFusionShot] = Field(default_factory=list)
-def video_seconds(duration: float) -> str:
- if video_uses_ark():
+def video_seconds(duration: float, model: str | None = None) -> str:
+ if video_uses_ark(model) or video_uses_xai(model):
if duration <= 0:
return "5"
return str(max(4, min(15, round(duration))))
@@ -8848,7 +8928,7 @@ def resolve_video_model(raw: str | None) -> str:
requested = (raw or VIDEO_MODEL or "seedance").strip()
lowered = requested.lower()
if lowered in {"sora", "sora-2", "sora_2"}:
- raise HTTPException(400, "Sora 已停用,请选择当前已接入的 Seedance")
+ raise HTTPException(400, "Sora 已停用,请选择当前已接入的 Seedance 或 Grok Imagine Video")
return VIDEO_MODEL_ALIASES.get(lowered, requested)
@@ -8897,6 +8977,12 @@ def video_url_from_response(data: dict) -> str:
v = content.get(key)
if isinstance(v, str) and v:
return v
+ video = data.get("video")
+ if isinstance(video, dict):
+ for key in ("url", "video_url", "download_url", "file_url"):
+ v = video.get(key)
+ if isinstance(v, str) and v:
+ return v
return ""
@@ -8987,12 +9073,15 @@ def _video_create_failure_message(create_errors: list[str]) -> str:
return "视频生成失败:视频模型没有接受本次请求。请换一张参考图或简化提示词后重试;如果持续失败,请联系管理员。"
-def download_generated_video(client, base: str, headers: dict, provider_id: str, direct_url: str, out_mp4: Path) -> None:
+def download_generated_video(client, base: str, headers: dict, provider_id: str, direct_url: str, out_mp4: Path, model: str | None = None) -> None:
if direct_url:
url = direct_url if direct_url.startswith("http") else f"{base}{direct_url if direct_url.startswith('/') else '/' + direct_url}"
r = client.get(url, headers=headers if url.startswith(base) else None)
else:
- r = client.get(f"{base}{video_path(VIDEO_CONTENT_PATH, id=provider_id)}", headers=headers)
+ content_path = video_content_path(model)
+ if not content_path:
+ raise RuntimeError("视频生成完成但未返回可下载地址")
+ r = client.get(f"{base}{video_path(content_path, id=provider_id)}", headers=headers)
r.raise_for_status()
out_mp4.write_bytes(r.content)
@@ -9032,7 +9121,33 @@ def submit_video_create(
product_imgs: list[Path] | None = None,
primary_role: str = "first_frame",
):
- if video_uses_ark():
+ model = str(payload.get("model") or "")
+ if video_uses_xai(model):
+ duration = int(float(str(payload.get("duration") or payload.get(VIDEO_DURATION_FIELD) or 8)))
+ data: dict = {
+ "model": model,
+ "prompt": payload["prompt"],
+ "duration": max(1, duration),
+ "aspect_ratio": size_to_video_ratio(str(payload.get("size", ""))),
+ "resolution": _normalize_video_resolution(str(payload.get("resolution") or ""), model),
+ }
+ reference_images: list[dict] = []
+ if ref_img.exists() and primary_role:
+ ref_payload = {"url": ark_reference_data_url(ref_img)}
+ if primary_role == "first_frame":
+ data["image"] = ref_payload
+ else:
+ reference_images.append(ref_payload)
+ if last_img and last_img.exists():
+ reference_images.append({"url": ark_reference_data_url(last_img)})
+ for product_img in (product_imgs or [])[:6]:
+ if product_img.exists():
+ reference_images.append({"url": ark_reference_data_url(product_img)})
+ if reference_images:
+ data["reference_images"] = reference_images[:6]
+ return client.post(url, headers={**headers, "Content-Type": "application/json"}, json=data)
+
+ if video_uses_ark(model):
content = [{"type": "text", "text": payload["prompt"]}]
if source_ref and source_ref.kind == "source_video" and source_ref.url:
content.append(
@@ -9046,7 +9161,7 @@ def submit_video_create(
{
"type": "image_url",
"image_url": {"url": ark_reference_data_url(ref_img)},
- "role": primary_role,
+ "role": primary_role or "reference_image",
}
)
if last_img and last_img.exists():
@@ -9112,8 +9227,8 @@ def render_storyboard_video(
ref_img = out_dir / "reference.jpg"
last_img = out_dir / "last_reference.jpg"
out_mp4 = out_dir / "video.mp4"
- base = video_api_base()
- headers = {"Authorization": f"Bearer {video_api_key()}"}
+ base = video_api_base(model)
+ headers = {"Authorization": f"Bearer {video_api_key(model)}"}
try:
prepare_video_reference(ref_path, ref_img)
@@ -9133,15 +9248,15 @@ def render_storyboard_video(
payload[VIDEO_DURATION_FIELD] = seconds
create = None
create_errors: list[str] = []
- for create_path in VIDEO_CREATE_PATHS:
+ for create_path in video_create_paths(model):
resp = submit_video_create(client, f"{base}{video_path(create_path)}", headers, ref_img, payload, source_ref, prepared_last_img, prepared_product_imgs, primary_role)
- if video_uses_ark() and source_ref and resp.status_code in {400, 422}:
+ if video_uses_ark(model) and source_ref and resp.status_code in {400, 422}:
create_errors.append(f"{video_path(create_path)} + reference_video -> HTTP {resp.status_code}: {resp.text[:700]}")
resp = submit_video_create(client, f"{base}{video_path(create_path)}", headers, ref_img, payload, None, prepared_last_img, prepared_product_imgs, primary_role)
- if video_uses_ark() and prepared_last_img and resp.status_code in {400, 422}:
+ if video_uses_ark(model) and prepared_last_img and resp.status_code in {400, 422}:
create_errors.append(f"{video_path(create_path)} + last_frame -> HTTP {resp.status_code}: {resp.text[:700]}")
resp = submit_video_create(client, f"{base}{video_path(create_path)}", headers, ref_img, payload, None, None, prepared_product_imgs, primary_role)
- if video_uses_ark() and prepared_product_imgs and resp.status_code in {400, 422}:
+ if video_uses_ark(model) and prepared_product_imgs and resp.status_code in {400, 422}:
create_errors.append(f"{video_path(create_path)} + product_reference -> HTTP {resp.status_code}: {resp.text[:700]}")
resp = submit_video_create(client, f"{base}{video_path(create_path)}", headers, ref_img, payload, None, prepared_last_img, None, primary_role)
if resp.status_code < 400:
@@ -9154,7 +9269,7 @@ def render_storyboard_video(
print(f"[video create failed] job={job_id} video={local_id} errors={' | '.join(create_errors)[:1800]}", flush=True)
raise RuntimeError(_video_create_failure_message(create_errors))
data = create.json()
- video_api_id = data.get("id") or provider_id or local_id
+ video_api_id = data.get("request_id") or data.get("id") or provider_id or local_id
status = normalize_video_status(data.get("status"))
progress = video_progress(data, 5)
direct_url = video_url_from_response(data)
@@ -9171,7 +9286,7 @@ def render_storyboard_video(
deadline = time.time() + VIDEO_POLL_TIMEOUT_SECONDS
while status in {"queued", "in_progress"} and time.time() < deadline:
time.sleep(8)
- poll = client.get(f"{base}{video_path(VIDEO_STATUS_PATH, id=video_api_id)}", headers=headers)
+ poll = client.get(f"{base}{video_path(video_status_path(model), id=video_api_id)}", headers=headers)
poll.raise_for_status()
pdata = poll.json()
status = normalize_video_status(pdata.get("status"))
@@ -9200,7 +9315,7 @@ def render_storyboard_video(
update_generated_video(job_id, local_id, status="failed", error=_video_public_error(raw_error or f"video status: {status}"), progress=progress, queue_message="")
return
- download_generated_video(client, base, headers, video_api_id, direct_url, out_mp4)
+ download_generated_video(client, base, headers, video_api_id, direct_url, out_mp4, model)
update_generated_video(
job_id,
local_id,
@@ -9286,7 +9401,6 @@ def refine_storyboard(job_id: str, idx: int, req: RefineStoryboardReq) -> dict:
def _enqueue_storyboard_videos(job: Job, frame: KeyFrame, req: GenerateStoryboardVideoReq, bg: BackgroundTasks | None = None) -> list[str]:
- ensure_video_api_configured()
prompt = _ensure_english(req.prompt.strip())
if not prompt and frame.storyboard:
prompt = _storyboard_video_prompt(frame.storyboard, req.seed)
@@ -9295,7 +9409,7 @@ def _enqueue_storyboard_videos(job: Job, frame: KeyFrame, req: GenerateStoryboar
count = max(1, min(12, int(req.count or 1)))
ref = req.first_image or req.subject_image or req.product_image or req.scene_image or req.action_image
- primary_role = "first_frame" if req.first_image else "reference_image"
+ primary_role = "first_frame" if req.first_image else ("reference_image" if ref else "")
ref_path = storyboard_ref_path(job.id, ref) or (job_dir(job.id) / "frames" / f"{frame.index:03d}.jpg")
if not ref_path.exists():
raise HTTPException(404, "reference image missing")
@@ -9315,13 +9429,23 @@ def _enqueue_storyboard_videos(job: Job, frame: KeyFrame, req: GenerateStoryboar
seen_ref_paths.add(key)
model = resolve_video_model(req.model)
- seconds = video_seconds(float(req.duration or 4))
- video_size = _normalize_video_size(req.size)
+ ensure_video_api_configured(model)
+ seconds = video_seconds(float(req.duration or 4), model)
+ video_size = _normalize_video_size(req.size, model)
video_resolution = _normalize_video_resolution(req.resolution, model)
source_ref = req.source_ref
if source_ref and source_ref.kind == "source_video" and not source_ref.url:
source_ref = None
- has_visual_reference = bool(ref_path.exists() or last_ref_path or reference_ref_paths)
+ has_visual_reference = bool(
+ req.first_image
+ or req.subject_image
+ or req.product_image
+ or req.scene_image
+ or req.action_image
+ or req.last_image
+ or raw_product_refs
+ or req.subject_images
+ )
items: list[GeneratedVideo] = []
ids: list[str] = []
queued_tasks: list[tuple[str, tuple]] = []
diff --git a/deploy/.env.local.example b/deploy/.env.local.example
index 14d9415..e1512f6 100644
--- a/deploy/.env.local.example
+++ b/deploy/.env.local.example
@@ -75,6 +75,7 @@ VIDEO_API_BASE_URL=https://ai.skg.com/doubao
VIDEO_API_KEY=
VIDEO_MODEL=seedance
VIDEO_MODEL_SEEDANCE=doubao-seedance-2-0-fast-260128
+VIDEO_MODEL_XAI=grok-imagine-video
VIDEO_MODEL_KLING=kling-omni
VIDEO_MODEL_VEO3=veo-3.1-fast
VIDEO_CREATE_PATHS=/api/v3/contents/generations/tasks
@@ -82,6 +83,11 @@ VIDEO_STATUS_PATH=/api/v3/contents/generations/tasks/{id}
VIDEO_CONTENT_PATH=/api/v3/contents/generations/tasks/{id}/content
VIDEO_DURATION_FIELD=seconds
VIDEO_POLL_TIMEOUT_SECONDS=900
+XAI_VIDEO_API_BASE_URL=https://ai.skg.com/ezlink/xai
+XAI_VIDEO_API_KEY=
+XAI_VIDEO_CREATE_PATH=/v1/videos/generations
+XAI_VIDEO_STATUS_PATH=/v1/videos/{id}
+XAI_VIDEO_CONTENT_PATH=
# Azure OpenAI TTS. Leave blank unless testing voice locally.
AUDIO_REWRITE_MODEL=gemini-2.5-pro
diff --git a/deploy/.env.production.example b/deploy/.env.production.example
index 817ae93..f71c3b8 100644
--- a/deploy/.env.production.example
+++ b/deploy/.env.production.example
@@ -107,6 +107,7 @@ VIDEO_API_BASE_URL=https://ai.skg.com/doubao
VIDEO_API_KEY=
VIDEO_MODEL=seedance
VIDEO_MODEL_SEEDANCE=doubao-seedance-2-0-fast-260128
+VIDEO_MODEL_XAI=grok-imagine-video
VIDEO_MODEL_KLING=kling-omni
VIDEO_MODEL_VEO3=veo-3.1-fast
VIDEO_CREATE_PATHS=/api/v3/contents/generations/tasks
@@ -114,3 +115,8 @@ VIDEO_STATUS_PATH=/api/v3/contents/generations/tasks/{id}
VIDEO_CONTENT_PATH=/api/v3/contents/generations/tasks/{id}/content
VIDEO_DURATION_FIELD=seconds
VIDEO_POLL_TIMEOUT_SECONDS=900
+XAI_VIDEO_API_BASE_URL=https://ai.skg.com/ezlink/xai
+XAI_VIDEO_API_KEY=
+XAI_VIDEO_CREATE_PATH=/v1/videos/generations
+XAI_VIDEO_STATUS_PATH=/v1/videos/{id}
+XAI_VIDEO_CONTENT_PATH=
diff --git a/docs/source-analysis.html b/docs/source-analysis.html
index 3946871..194b72d 100644
--- a/docs/source-analysis.html
+++ b/docs/source-analysis.html
@@ -618,7 +618,7 @@
2026-05-25 三模式版:默认首页再收敛为一个中央对话框,首页和画布底部输入框只让用户选文生图、文生视频、图生视频,然后手写提示词生成。图生视频只显示“上传图片”,不再把首帧 / 首尾帧这类模型实现概念作为主入口;营销图文不再作为首页默认入口。后端 /health 返回可选图片 / 视频模型、图片尺寸、视频画幅和真实可用视频时长,首页按返回值显示模型和规格选择;当前 Doubao / Seedance 生产链路单条最长 15 秒,不向用户暴露 30 秒按钮。
2026-05-25 根域名画布版:https://marketing.skg.com 登录后直接进入个人生成画布,不再先进入 React 单对话框首页再点画布;/canvas/ 只保留为旧链接兼容跳转。后续优先少改成熟画布结构,只在必要时改模式文案、生成接入和结果/队列显示。
2026-05-25 上游能力恢复版:用户明确要求“API 没关系,其他恢复,别削弱”。因此根域名画布恢复 chatfire-AI/huobao-canvas 的成熟节点和工作流结构:推荐词、AI 润色、自动执行、工作流模板、首帧/尾帧/参考图节点、图片/视频/LLM 配置、多角度分镜、故事板、绘本和批量下载都保留;只继续替换品牌、路由和 API 接入。生成请求仍走 SKG 后端 /api 与登录 Cookie,员工不需要个人 API Key。
- 2026-05-25 媒体模型接入收口:图片和视频模型选择只暴露当前后端真实可用项:图片为 auto、gpt-image-2、gemini-3-pro-image-preview;视频当前只接通 Seedance 2.0 Fast(真实模型 doubao-seedance-2-0-fast-260128)。旧上游的 Nano Banana、Seedream、Kling、Veo 或浏览器本地自定义媒体模型不能进入生成下拉,避免同事选到实际不可用的模型。
+ 2026-05-25 媒体模型接入收口:图片和视频模型选择只暴露当前后端真实可用项:图片为 auto、gpt-image-2、gemini-3-pro-image-preview;视频已接通 Seedance 2.0 Fast(真实模型 doubao-seedance-2-0-fast-260128)和按独立 key 配置的 Grok Imagine Video(真实模型 grok-imagine-video)。旧上游的 Nano Banana、Seedream、Kling、Veo 或浏览器本地自定义媒体模型不能进入生成下拉,避免同事选到实际不可用的模型。
2026-05-26 公司沉淀版:画布项目从浏览器本地存储升级为服务端 Postgres 持久化;localStorage 只作为离线缓存和首次导入来源。后端同时建立用户、任务、资源索引和审计表,保留原有 state.json 文件作为任务详情真源,避免一次迁移动到大文件资产结构。
2026-05-26 AI 润色中性化:画布 AI 润色 不再复用 SKG 广告文案接口 /creative/copy。后端新增 POST /prompt/polish,前端 useChat、根画布输入框、文本节点和自动执行意图分析改走中性提示词/通用生成接口:只优化用户已经给出的主体、风格、镜头和细节,不主动添加 SKG、按摩产品、TikTok 广告话术或用户没有提到的品牌。当前润色链路会先清理上一次润色遗留的模板尾巴,再判断人物/无人/物体/场景/动物/未知主体;原文明确有人时才声明虚构 AI 角色,原文明确无人时才保留无人物约束,原文没写人时不主动造人但也不追加“必须无人物”的模板尾巴;当输入或参考图已经有人物时,按 AI 生成的虚拟角色继续描述,而不是把人物参考图判定为不可用。
2026-05-26 我的工作流云端版:工作流面板从只有公共模板扩展为“公共工作流 / 我的工作流”两类。当前画布可以保存成当前登录用户自己的云端工作流模板,后续在同一账号的其他电脑或浏览器打开后可插回画布;保存时只沉淀节点结构、连线、配置和提示词,主动清掉已生成图片、视频、任务进度、错误和运行态字段,避免把一次性生成结果误当模板复用。
@@ -657,7 +657,7 @@
web/canvas-app/src/stores/workflows.js | 我的工作流 store:调用 GET/POST/DELETE /canvas-workflows 读取、保存和删除当前登录用户自己的云端工作流模板。保存前会清理节点里的 base64、生成 URL、任务进度、错误、视频结果和 LLM 输出等运行态字段,只保留可复用的节点结构、连线、配置和提示词。 |
web/canvas-app/src/views/Canvas.vue | 画布主交互:恢复上游底部 prompt composer、AI 润色、自动执行、推荐词、节点菜单、工作流面板、API/模型设置入口和批量下载入口。自动执行会调用 useWorkflowOrchestrator 分析提示词,创建文生图、图转视频、故事板、多角度分镜或绘本节点组;手动模式只创建文本节点,用户自行连接节点。工作流面板支持公共模板和我的工作流:公共模板走本地 createNodes(),我的工作流从云端 workflow_data 插回当前画布,并重新生成节点 ID、按视口中心重排、按映射重连边。Vue Flow 开启可见节点渲染,大画布不再把所有节点同时挂载到 DOM;节点数超过 120 时隐藏 MiniMap,减少点击后的同步重绘压力。底部推荐词来自共享短词池,4 个一组单行展示,刷新按钮在 30 组内轮换,不改变输入面板高度。 |
web/canvas-app/src/config/suggestions.js | 首页和画布共用的推荐词配置:维护 QUICK_SUGGESTION_GROUPS,当前为 30 组 / 120 个短词,每组 4 个,控制刷新按钮的轮换范围;词条保持短小,避免推荐栏换行或顶起 composer。 |
- web/canvas-app/src/config/models.js | 画布媒体模型和规格的前端白名单:图片只内置 auto、gpt-image-2、gemini-3-pro-image-preview,尺寸只内置 auto、1024x1536、1024x1024、1536x1024;视频只内置 seedance / Seedance 2.0 Fast,画幅和时长对齐后端 /health 能力边界。useModelConfig.js 和 Pinia 模型 store 会忽略浏览器本地自定义图片/视频模型,防止旧缓存把不可用模型带回生成下拉。 |
+ web/canvas-app/src/config/models.js | 画布媒体模型和规格的前端白名单:图片只内置 auto、gpt-image-2、gemini-3-pro-image-preview,尺寸只内置 auto、1024x1536、1024x1024、1536x1024;视频内置 seedance / Seedance 2.0 Fast 和默认不可用的 xai / Grok Imagine Video,后者只有后端 /health 回传 available=true 时才进入生成下拉。画幅和时长对齐后端 /health 能力边界。useModelConfig.js 和 Pinia 模型 store 会忽略浏览器本地自定义图片/视频模型,防止旧缓存把不可用模型带回生成下拉。 |
web/canvas-app/src/hooks/useCachedMediaUrl.js | 画布媒体本地缓存 Hook:只缓存同源、登录保护下的 /api/jobs/... 和 /api/agent-runs/... 图片 / 视频 / 音频。图片节点和视频节点先用原始 URL 保证首屏可见,再后台写入浏览器 Cache Storage;下次打开同一素材时返回本机 blob: URL,减少反复从 VPS 下载。 |
web/canvas-app/src/hooks/useApi.js | 画布到本项目后端的适配层:不再读取浏览器 API Key,而是使用当前登录会话 Cookie 调用 /api。文生图 / 图生图先创建轻量 creative job,再调用 /frames/0/generate;本地上传到图片节点的参考图也会先通过 /creative/jobs/image 写成后端资产,再把 /api/jobs/... URL 保存到节点,避免刷新后丢失。文生视频 / 图生视频调用 /storyboard/video 并轮询 /jobs/{id},完成后把图片或 mp4 URL 写回画布节点。useChat 已从 SKG 广告文案接口切到 /prompt/polish:AI 润色显式使用 image/video prompt 模式,LLM 节点使用通用 chat 模式,避免自动注入用户没有提到的 SKG、产品、平台或营销语境;后端会清理旧润色模板尾巴、判断人物/无人/物体/场景意图,并在输出后检查“有人却禁止人物、无人却新增人物、未写 SKG 却出现 SKG”等冲突。图生视频实际提交到后端后,后端会对参考图追加 AI 虚拟角色条件说明,不要求前端判断图片里是否有人脸。 |
web/scripts/sync-canvas-root.mjs | 构建桥接脚本:在 next build 静态导出完成后,把 Vite 画布产物 web/canvas-app/dist 覆盖到 web/out 根目录,使 https://marketing.skg.com 登录后直接进入画布;旧 web/scripts/sync-canvas-dist.mjs 保留但不再由生产构建调用。 |
@@ -692,7 +692,7 @@
api/main.py | FastAPI 单文件后端:登录会话、状态模型、任务恢复、下载、抽帧、Vision、清洗、元素、分镜、原音频转写/翻译、声音与背景音分析、后续口播改写/TTS、文件返回;同时承载全局 prompt_library 和 asset_library 的磁盘索引、CRUD、删除保护和复制到 job API。启动时会初始化 Postgres schema、扫描现有 state.json / 资源库并写入索引;/canvas-projects 系列接口把画布项目按当前登录用户持久化,/canvas-workflows 系列接口把我的工作流按当前登录用户持久化为可复用模板。轻量创作入口 POST /creative/jobs/image 把上传图片或空白底图写成一个只有 0 号关键帧的 Job,让首页直接复用生图/生视频接口;该接口兼容无 body / JSON 空对象 / 正常 multipart 上传,避免无首帧文生图或文生视频时空 multipart 被 FastAPI 在业务前置解析阶段拒绝;POST /prompt/polish 用于中性 AI 润色和通用 LLM 文本生成,只保留用户明确给出的主体、品牌、产品、地点、风格和意图,不默认加入 SKG、按摩产品、平台或短视频广告话术。润色链路会先用 _strip_previous_polish_boilerplate 去掉旧模板尾巴,再用 _classify_prompt_intent 判断人物、无人、物体、场景、动物或未知主体,最后用 _repair_polished_prompt 修掉有人/无人矛盾、未写人却新增人物、未写 SKG 却出现 SKG 等冲突;_append_reference_image_person_guard 会在视频任务最终入队前给参考图请求追加条件提示,声明参考图里若有人物则按 AI 生成的虚拟角色处理;/health 返回 database、image_options、image_size_options、video_options、video_size_options、video_duration_options 和 video_max_duration_seconds;/frames/{idx}/generate 的 model 字段用于图片模型偏好,size 字段用于图片输出尺寸;/storyboard/video 继续使用 model 字段选择视频别名,并先校验画幅与时长能力边界,然后把 GeneratedVideo 写成 queued 占位并进入进程内视频队列。队列默认 VIDEO_QUEUE_MAX_CONCURRENT=2、VIDEO_QUEUE_MAX_CONCURRENT_PER_USER=1,同一用户连续提交不会占满全局并发;排队任务会回写 queue_position、queue_size、queue_message。旧 AgentRun 一键出片状态机、TK 复刻接口和 POST /creative/copy 作为明确的 SKG 营销文案接口继续保留。 |
api/db.py | Postgres 适配层:在 DATABASE_URL 存在且 psycopg 可用时启用;负责建表、健康检查、用户 upsert、审计日志、画布项目 CRUD、我的工作流 CRUD,以及把 Job、AgentRun、提示词库和素材库写入索引表。数据库不可用时本地开发会降级为 disabled,生产 verify-prod-docker.sh 会要求 database.connected=true。 |
- video_model_options() | 视频模型能力出口:如果 seedance、kling、veo3、veo 等业务别名实际都映射到同一个真实模型,会按真实模型去重,只给前端返回一个可用选项;当前生产真实模型为 doubao-seedance-2-0-fast-260128,前端显示为 Seedance 2.0 Fast。后续只有在服务器真的配置了不同可用视频模型时,才应把新的模型重新暴露给画布。 |
+ video_model_options() | 视频模型能力出口:如果 seedance、kling、veo3、veo 等业务别名实际都映射到同一个真实模型,会按真实模型去重,只给前端返回一个可用选项;当前 Seedance 真实模型为 doubao-seedance-2-0-fast-260128,前端显示为 Seedance 2.0 Fast。新增 xai / grok-imagine-video 独立走 XAI_VIDEO_API_BASE_URL=https://ai.skg.com/ezlink/xai、XAI_VIDEO_API_KEY、/v1/videos/generations 和 /v1/videos/{id},创建返回 request_id、轮询完成返回 video.url;未配置 xAI key 时 /health 会标记不可用,前端不显示。 |
api/product_library/skg-products | 内置 SKG 白底产品图库:manifest.json 记录从桌面产品图筛出的 gallery 白底图和桌面 4 张产品角度图,images/ 存 45 张参考图。 |
api/character_library/skg-characters | 内置相似主体形象库:从桌面 5 套策划形象导入,manifest.json 记录运动阳光男、都市型男、优雅白领女、运动辣妹、绅士大叔,每套含 7 张透明骨架参考图和一段 prompt_brief。相似主体生成时优先使用文字 brief 作为创意方向,避免把内置图作为强参考图复制。 |
asset_library/ | 全局素材库目录,和 jobs/ 平级,不写入任何 job state。四类目录为 subjects、products、scenes、videos;每个素材自带 manifest.json 和图片/视频文件,index.json 只是启动扫描重建出来的缓存。库素材选用到 job 时必须复制文件到 jobs/<jobId>/assets 或 storyboard-videos,禁止直接保存 library 引用。 |
@@ -1266,7 +1266,7 @@ ProductRefStateItem {
ASR:优先走当前 OpenAI-compatible 音频转写入口;如果该网关没有 /audio/transcriptions,自动 fallback 到 ASR_FALLBACK_MODEL(默认 gemini-2.5-flash)的多模态音频识别。
Voice:当前语音通道固定是 VOICE_PROVIDER=azure_openai,通过 AZURE_OPENAI_BASE_URL=https://ai.skg.com/azure 的 OpenAI 协议生成 TTS;后端按 AZURE_TTS_PATHS 依次尝试路径。第一步暂不默认调用。
Audio Product Brief:默认是通用 SKG 放松产品卖点;当前第一步只保留配置,后续分镜/新配音阶段再使用。
- Video Gen:当前视频通道固定优先 Seedance;VIDEO_API_BASE_URL=https://ai.skg.com/doubao 走 content JSON 异步任务,提交后写入候选片段并轮询到完成。
+ Video Gen:当前视频通道默认 Seedance;VIDEO_API_BASE_URL=https://ai.skg.com/doubao 走 content JSON 异步任务。新增 xai / Grok Imagine Video 时,后端按模型分流到 XAI_VIDEO_API_BASE_URL=https://ai.skg.com/ezlink/xai 的 /v1/videos/generations,使用 request_id 轮询 /v1/videos/{id},完成后下载 video.url 写入候选片段。
Compose:还没做本地 ffmpeg 字幕/TTS 合成。
@@ -1310,6 +1310,19 @@ ProductRefStateItem {
变更记录
这个记录不是 git log 的替代品。它记录“产品理解发生了什么变化、影响了哪些源码、你以后描述需求时该怎么说”。后续每次改功能都要补一条。
+
+
+ 2026-06-03 · 接入 xAI Grok Imagine Video
+ API
+ Model
+ Canvas
+
+
+
问题:SKG xAI 网关 https://ai.skg.com/ezlink/xai 已确认可用 grok-imagine-video 文生视频,但项目只把 Seedance 暴露给画布,后端也按单一视频网关处理,无法同时保留 Seedance 并新增 xAI。
+
改动:api/main.py 新增 xai / grok-imagine-video 视频模型别名、XAI_VIDEO_API_BASE_URL / XAI_VIDEO_API_KEY / XAI_VIDEO_CREATE_PATH / XAI_VIDEO_STATUS_PATH 配置,按模型分流到 /v1/videos/generations 和 /v1/videos/{id};创建时识别 xAI 的 request_id,轮询完成时读取 video.url 并下载 MP4。纯文生视频不会把系统空白帧误传为参考图;图生视频会把用户上传首帧作为 image 传入。
+
前端 / 配置:web/canvas-app/src/config/models.js 新增默认不可用的 xai 模型,web/canvas-app/src/stores/pinia/models.js 改为接受后端 /health 返回的可用视频模型,不再硬编码只保留 Seedance。api/.env.example、deploy/.env.local.example 和 deploy/.env.production.example 增加 xAI 私有 key 配置位,真实 key 只填本地或服务器私有 env。
+
+
2026-05-30 · 稳定性 / 安全加固(子进程超时、SSRF、并发锁、上传持久化、轮询容错)
diff --git a/web/canvas-app/src/config/models.js b/web/canvas-app/src/config/models.js
index 1a59ea7..dc7adb9 100644
--- a/web/canvas-app/src/config/models.js
+++ b/web/canvas-app/src/config/models.js
@@ -127,6 +127,24 @@ export const VIDEO_MODELS = [
defaultResolution: '720p',
defaultParams: { ratio: '720x1280', duration: 10, resolution: '720p' }
},
+ {
+ label: 'Grok Imagine Video',
+ key: 'xai',
+ provider: ['chatfire'],
+ type: 't2v+i2v',
+ ratios: ['720x1280', '1280x720', '1024x1024'],
+ durs: [
+ { label: '5 秒', key: 5 },
+ { label: '8 秒', key: 8 },
+ { label: '10 秒', key: 10 },
+ { label: '12 秒', key: 12 },
+ { label: '15 秒', key: 15 }
+ ],
+ resolutions: ['480p', '720p'],
+ defaultResolution: '720p',
+ defaultParams: { ratio: '720x1280', duration: 8, resolution: '720p' },
+ available: false
+ },
{
label: 'Seedance 2.0 高清',
key: 'seedance_hd',
diff --git a/web/canvas-app/src/stores/pinia/models.js b/web/canvas-app/src/stores/pinia/models.js
index d24aa2e..2319b7c 100644
--- a/web/canvas-app/src/stores/pinia/models.js
+++ b/web/canvas-app/src/stores/pinia/models.js
@@ -460,11 +460,7 @@ export const useModelStore = defineStore('model', () => {
.filter(Boolean)
const videoOptions = data?.models?.video_options || []
runtimeVideoModels.value = videoOptions
- .filter(item => {
- const id = String(item?.id || '').toLowerCase()
- const model = String(item?.model || '').toLowerCase()
- return id.includes('seedance') || model.includes('seedance')
- })
+ .filter(item => item?.id && item.available !== false)
.map(normalizeRuntimeVideoModel)
.filter(Boolean)
return true