feat: expose generation model choices
This commit is contained in:
2
RULES.md
2
RULES.md
@@ -11,7 +11,7 @@
|
||||
- 详见 `CLAUDE.md` 立项决策段 + `.memory/plan.md` 七步管线拆解
|
||||
- 风格:`04-Dark-Gallery-Ambient`(路径:`~/Projects/research/20260305-网页风格库/04-Dark-Gallery-Ambient.md`)
|
||||
- 第一冲刺:步骤 1-4(下载 / 拆轨 / 关键帧 / ASR+翻译)
|
||||
- 当前产品方向(2026-05-25 单对话框版):默认首页彻底从“信息流广告复刻管线”切换为多人通用的 SKG 营销内容生成入口,服务约 6 名公司成员同时使用。首页默认只保留一个中央对话框,不再显示侧栏、灵感区、任务列表或大结果面板;用户先选择四种生成方式之一:文生视频、文生图、首帧生视频、首尾帧生视频,然后手写提示词并点击生成。首帧 / 首尾帧模式只露必要图片上传位,视频模式只保留时长选择。用户登录后仍只看到自己的任务、结果和详情页,继续沿用后端 owner 隔离;结果生成后从对话框下方进入 `/detail/?job=<id>` 沉淀参考图、生成图、视频候选和提示词。旧 TK 复刻工作台、Agent Cut 一键出片和营销图文方案保留为高级/详情页能力,不再作为默认首页入口或默认理解框架。
|
||||
- 当前产品方向(2026-05-25 单对话框版):默认首页彻底从“信息流广告复刻管线”切换为多人通用的 SKG 营销内容生成入口,服务约 6 名公司成员同时使用。首页默认只保留一个中央对话框,不再显示侧栏、灵感区、任务列表或大结果面板;用户先选择四种生成方式之一:文生视频、文生图、首帧生视频、首尾帧生视频,然后手写提示词并点击生成。首帧 / 首尾帧模式只露必要图片上传位,视频模式只保留时长选择。后端 `/health` 向前端返回可选图片 / 视频模型,首页允许用户选择图片模型(自动、GPT Image 2、Gemini 图片兜底)和视频模型(Seedance、Kling、Veo 3 等别名;实际可用模型以环境变量映射为准)。用户登录后仍只看到自己的任务、结果和详情页,继续沿用后端 owner 隔离;结果生成后从对话框下方进入 `/detail/?job=<id>` 沉淀参考图、生成图、视频候选和提示词。旧 TK 复刻工作台、Agent Cut 一键出片和营销图文方案保留为高级/详情页能力,不再作为默认首页入口或默认理解框架。
|
||||
|
||||
## 部署事实
|
||||
- 平台:VPS `76.13.31.179`(Ubuntu 24.04 / Docker Compose / Coolify Traefik)
|
||||
|
||||
73
api/main.py
73
api/main.py
@@ -4106,6 +4106,71 @@ def _image_model_candidates(force_fallback: bool = False, preference: str | None
|
||||
return [GPT_IMAGE_MODEL, *fallbacks]
|
||||
|
||||
|
||||
def image_model_options() -> list[dict]:
|
||||
options = [
|
||||
{
|
||||
"id": "auto",
|
||||
"label": "自动",
|
||||
"model": GPT_IMAGE_MODEL,
|
||||
"description": "优先 GPT Image 2,必要时按后端熔断和兜底策略切到备用图片模型",
|
||||
"available": bool(IMAGE_API_KEY),
|
||||
},
|
||||
{
|
||||
"id": GPT_IMAGE_MODEL,
|
||||
"label": "GPT Image 2",
|
||||
"model": GPT_IMAGE_MODEL,
|
||||
"description": "主生图模型,适合营销图和参考图重绘",
|
||||
"available": bool(IMAGE_API_KEY),
|
||||
},
|
||||
]
|
||||
if IMAGE_FALLBACK_ENABLED and IMAGE_FALLBACK_MODEL and IMAGE_FALLBACK_MODEL != GPT_IMAGE_MODEL:
|
||||
options.append({
|
||||
"id": IMAGE_FALLBACK_MODEL,
|
||||
"label": "Gemini 图片",
|
||||
"model": IMAGE_FALLBACK_MODEL,
|
||||
"description": "备用图片模型,适合主模型慢或失败时手动选择",
|
||||
"available": bool(IMAGE_API_KEY),
|
||||
})
|
||||
return options
|
||||
|
||||
|
||||
def video_model_options() -> list[dict]:
|
||||
label_map = {
|
||||
"seedance": "Seedance",
|
||||
"kling": "Kling",
|
||||
"veo3": "Veo 3",
|
||||
"veo": "Veo",
|
||||
"voe": "Veo",
|
||||
}
|
||||
seen: set[str] = set()
|
||||
options: list[dict] = []
|
||||
for key in ["seedance", "kling", "veo3", "veo"]:
|
||||
if key not in VIDEO_MODEL_ALIASES:
|
||||
continue
|
||||
model = VIDEO_MODEL_ALIASES[key]
|
||||
unique_key = f"{key}:{model}"
|
||||
if unique_key in seen:
|
||||
continue
|
||||
seen.add(unique_key)
|
||||
options.append({
|
||||
"id": key,
|
||||
"label": label_map.get(key, key),
|
||||
"model": model,
|
||||
"description": "当前视频网关可选模型",
|
||||
"available": bool(video_api_key()),
|
||||
})
|
||||
default_model = resolve_video_model(VIDEO_MODEL)
|
||||
if not any(item["id"] == VIDEO_MODEL or item["model"] == default_model for item in options):
|
||||
options.insert(0, {
|
||||
"id": VIDEO_MODEL,
|
||||
"label": label_map.get(VIDEO_MODEL, VIDEO_MODEL),
|
||||
"model": default_model,
|
||||
"description": "默认视频模型",
|
||||
"available": bool(video_api_key()),
|
||||
})
|
||||
return options
|
||||
|
||||
|
||||
def _image_failure_can_fallback(status_code: int, body: str, last_err: str) -> bool:
|
||||
if status_code in (400, 401, 403, 404):
|
||||
return False
|
||||
@@ -5132,6 +5197,7 @@ def health() -> dict:
|
||||
"image": IMAGE_MODEL,
|
||||
"image_base_url": IMAGE_BASE_URL or LLM_BASE_URL or "openai-default",
|
||||
"image_request_timeout_seconds": IMAGE_REQUEST_TIMEOUT_SECONDS,
|
||||
"image_options": image_model_options(),
|
||||
"ai_proxy_configured": bool(AI_HTTP_PROXY),
|
||||
"image_fallbacks": _image_fallback_models(),
|
||||
"image_circuit": _image_circuit_snapshot(),
|
||||
@@ -5146,6 +5212,7 @@ def health() -> dict:
|
||||
"voice_configured": bool(AZURE_OPENAI_API_KEY),
|
||||
"video": VIDEO_MODEL,
|
||||
"video_aliases": VIDEO_MODEL_ALIASES,
|
||||
"video_options": video_model_options(),
|
||||
"video_provider": video_provider_name(),
|
||||
"video_base_url": video_api_base(),
|
||||
"video_configured": bool(video_api_key()),
|
||||
@@ -5598,7 +5665,7 @@ class GenerateReq(BaseModel):
|
||||
prompt: str
|
||||
extra_prompt: str = "" # ✓ 需要的元素(正向)
|
||||
negative_prompt: str = "" # ✗ 不需要的元素(负向)
|
||||
model: str = "" # 兼容旧前端字段;服务端强制使用 gpt-image-2
|
||||
model: str = "auto" # auto / gpt-image-2 / gemini-3-pro-image-preview
|
||||
mode: str = "edit" # "edit" 带参考图,"text" 纯文字
|
||||
from_selected: bool = False # True 时优先用 frame.selected 的生成图作 reference(迭代),否则原关键帧
|
||||
|
||||
@@ -5649,8 +5716,8 @@ def generate_image(job_id: str, idx: int, req: GenerateReq) -> Job:
|
||||
if req.mode == "edit":
|
||||
img_bytes_in = reference_path.read_bytes()
|
||||
|
||||
# 尝试 i2i;主模型上游异常时允许 Gemini 兜底。无兜底时保留旧的多次重试。
|
||||
model_candidates = _image_model_candidates()
|
||||
# 尝试 i2i;auto 允许按熔断策略兜底,显式模型只走用户所选模型。
|
||||
model_candidates = _image_model_candidates(preference=req.model)
|
||||
plan: list[str] = ([req.mode] if model_candidates != [GPT_IMAGE_MODEL] else [req.mode] * 3) if req.mode == "edit" else [req.mode]
|
||||
if req.mode == "edit":
|
||||
plan.append("text") # i2i 都失败时自动降级
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -22,11 +22,13 @@ import {
|
||||
deleteGeneratedVideo,
|
||||
generateImage,
|
||||
generateStoryboardVideo,
|
||||
getRuntimeHealth,
|
||||
getJob,
|
||||
uploadReferenceFrame,
|
||||
type GeneratedImage,
|
||||
type GeneratedVideo,
|
||||
type Job,
|
||||
type RuntimeModelOption,
|
||||
} from "@/lib/api"
|
||||
|
||||
type CreationMode = "text-video" | "text-image" | "first-frame-video" | "first-last-frame-video"
|
||||
@@ -105,6 +107,14 @@ export default function Home() {
|
||||
const [lastFrameFile, setLastFrameFile] = useState<File | null>(null)
|
||||
const [firstFramePreview, setFirstFramePreview] = useState("")
|
||||
const [lastFramePreview, setLastFramePreview] = useState("")
|
||||
const [imageModel, setImageModel] = useState("auto")
|
||||
const [videoModel, setVideoModel] = useState("seedance")
|
||||
const [imageOptions, setImageOptions] = useState<RuntimeModelOption[]>([
|
||||
{ id: "auto", label: "自动", model: "gpt-image-2", available: true },
|
||||
])
|
||||
const [videoOptions, setVideoOptions] = useState<RuntimeModelOption[]>([
|
||||
{ id: "seedance", label: "Seedance", model: "seedance", available: true },
|
||||
])
|
||||
const [job, setJob] = useState<Job | null>(null)
|
||||
const [busy, setBusy] = useState<BusyTask>(null)
|
||||
const [error, setError] = useState("")
|
||||
@@ -117,6 +127,30 @@ export default function Home() {
|
||||
const runningVideo = (job?.generated_videos ?? []).some((item) => item.status === "queued" || item.status === "in_progress")
|
||||
const submitting = busy === mode || busy === "job"
|
||||
|
||||
useEffect(() => {
|
||||
getRuntimeHealth()
|
||||
.then((health) => {
|
||||
const models = health.models
|
||||
const nextImageOptions = models?.image_options?.length
|
||||
? models.image_options
|
||||
: [
|
||||
{ id: "auto", label: "自动", model: models?.image || "gpt-image-2", available: true },
|
||||
{ id: models?.image || "gpt-image-2", label: "GPT Image 2", model: models?.image || "gpt-image-2", available: true },
|
||||
]
|
||||
const nextVideoOptions = models?.video_options?.length
|
||||
? models.video_options
|
||||
: [{ id: models?.video || "seedance", label: "Seedance", model: models?.video || "seedance", available: !!models?.video_configured }]
|
||||
setImageOptions(nextImageOptions)
|
||||
setVideoOptions(nextVideoOptions)
|
||||
if (!nextImageOptions.some((item) => item.id === imageModel)) setImageModel(nextImageOptions[0]?.id || "auto")
|
||||
if (!nextVideoOptions.some((item) => item.id === videoModel)) setVideoModel(nextVideoOptions[0]?.id || "seedance")
|
||||
})
|
||||
.catch(() => {
|
||||
setImageOptions([{ id: "auto", label: "自动", model: "gpt-image-2", available: true }])
|
||||
setVideoOptions([{ id: "seedance", label: "Seedance", model: "seedance", available: true }])
|
||||
})
|
||||
}, [])
|
||||
|
||||
useEffect(() => {
|
||||
if (!firstFrameFile) {
|
||||
setFirstFramePreview("")
|
||||
@@ -211,6 +245,7 @@ export default function Home() {
|
||||
const updated = await generateImage(target.id, 0, {
|
||||
prompt: promptWithGuardrails(),
|
||||
mode: "text",
|
||||
model: imageModel,
|
||||
})
|
||||
setJob(updated)
|
||||
toast.success("图片已生成")
|
||||
@@ -237,6 +272,7 @@ export default function Home() {
|
||||
first_image: activeMode.needsFirstFrame ? { kind: "keyframe", frame_idx: 0 } : null,
|
||||
last_image: activeMode.needsLastFrame && lastFrame ? { kind: "keyframe", frame_idx: lastFrame.index } : null,
|
||||
size: "720x1280",
|
||||
model: videoModel,
|
||||
})
|
||||
setJob(updated)
|
||||
toast.success("视频已提交")
|
||||
@@ -368,7 +404,24 @@ export default function Home() {
|
||||
/>
|
||||
|
||||
<div className="mt-3 flex flex-wrap items-center justify-between gap-3">
|
||||
<div className="flex items-center gap-2 text-xs text-white/38">
|
||||
<div className="flex flex-wrap items-center gap-2 text-xs text-white/38">
|
||||
<label className="inline-flex h-9 items-center gap-2 rounded-xl border border-white/7 bg-black/14 px-3">
|
||||
模型
|
||||
<select
|
||||
value={isVideoMode(mode) ? videoModel : imageModel}
|
||||
onChange={(event) => {
|
||||
if (isVideoMode(mode)) setVideoModel(event.target.value)
|
||||
else setImageModel(event.target.value)
|
||||
}}
|
||||
className="max-w-36 bg-transparent text-white/76 outline-none"
|
||||
>
|
||||
{(isVideoMode(mode) ? videoOptions : imageOptions).map((item) => (
|
||||
<option key={item.id} value={item.id} disabled={item.available === false}>
|
||||
{item.label}
|
||||
</option>
|
||||
))}
|
||||
</select>
|
||||
</label>
|
||||
{isVideoMode(mode) ? (
|
||||
<label className="inline-flex h-9 items-center gap-2 rounded-xl border border-white/7 bg-black/14 px-3">
|
||||
时长
|
||||
|
||||
@@ -254,6 +254,14 @@ export interface GeneratedVideo {
|
||||
created_at: number
|
||||
}
|
||||
|
||||
export interface RuntimeModelOption {
|
||||
id: string
|
||||
label: string
|
||||
model: string
|
||||
description?: string
|
||||
available?: boolean
|
||||
}
|
||||
|
||||
export interface RuntimeModels {
|
||||
asr?: string
|
||||
asr_language?: string
|
||||
@@ -271,6 +279,7 @@ export interface RuntimeModels {
|
||||
product_view?: string
|
||||
image?: string
|
||||
image_base_url?: string
|
||||
image_options?: RuntimeModelOption[]
|
||||
image_fallbacks?: string[]
|
||||
image_circuit?: {
|
||||
primary?: string
|
||||
@@ -293,6 +302,7 @@ export interface RuntimeModels {
|
||||
voice_tts_paths?: string[]
|
||||
video?: string
|
||||
video_aliases?: Record<string, string>
|
||||
video_options?: RuntimeModelOption[]
|
||||
video_provider?: string
|
||||
video_base_url?: string
|
||||
video_configured?: boolean
|
||||
|
||||
Reference in New Issue
Block a user