feat: expose generation model choices

This commit is contained in:
2026-05-25 11:02:13 +08:00
parent 6ba84a7603
commit dcc8abc812
5 changed files with 159 additions and 15 deletions

View File

@@ -11,7 +11,7 @@
- 详见 `CLAUDE.md` 立项决策段 + `.memory/plan.md` 七步管线拆解 - 详见 `CLAUDE.md` 立项决策段 + `.memory/plan.md` 七步管线拆解
- 风格:`04-Dark-Gallery-Ambient`(路径:`~/Projects/research/20260305-网页风格库/04-Dark-Gallery-Ambient.md` - 风格:`04-Dark-Gallery-Ambient`(路径:`~/Projects/research/20260305-网页风格库/04-Dark-Gallery-Ambient.md`
- 第一冲刺:步骤 1-4下载 / 拆轨 / 关键帧 / ASR+翻译) - 第一冲刺:步骤 1-4下载 / 拆轨 / 关键帧 / ASR+翻译)
- 当前产品方向2026-05-25 单对话框版):默认首页彻底从“信息流广告复刻管线”切换为多人通用的 SKG 营销内容生成入口,服务约 6 名公司成员同时使用。首页默认只保留一个中央对话框,不再显示侧栏、灵感区、任务列表或大结果面板;用户先选择四种生成方式之一:文生视频、文生图、首帧生视频、首尾帧生视频,然后手写提示词并点击生成。首帧 / 首尾帧模式只露必要图片上传位,视频模式只保留时长选择。用户登录后仍只看到自己的任务、结果和详情页,继续沿用后端 owner 隔离;结果生成后从对话框下方进入 `/detail/?job=<id>` 沉淀参考图、生成图、视频候选和提示词。旧 TK 复刻工作台、Agent Cut 一键出片和营销图文方案保留为高级/详情页能力,不再作为默认首页入口或默认理解框架。 - 当前产品方向2026-05-25 单对话框版):默认首页彻底从“信息流广告复刻管线”切换为多人通用的 SKG 营销内容生成入口,服务约 6 名公司成员同时使用。首页默认只保留一个中央对话框,不再显示侧栏、灵感区、任务列表或大结果面板;用户先选择四种生成方式之一:文生视频、文生图、首帧生视频、首尾帧生视频,然后手写提示词并点击生成。首帧 / 首尾帧模式只露必要图片上传位,视频模式只保留时长选择。后端 `/health` 向前端返回可选图片 / 视频模型首页允许用户选择图片模型自动、GPT Image 2、Gemini 图片兜底和视频模型Seedance、Kling、Veo 3 等别名;实际可用模型以环境变量映射为准)。用户登录后仍只看到自己的任务、结果和详情页,继续沿用后端 owner 隔离;结果生成后从对话框下方进入 `/detail/?job=<id>` 沉淀参考图、生成图、视频候选和提示词。旧 TK 复刻工作台、Agent Cut 一键出片和营销图文方案保留为高级/详情页能力,不再作为默认首页入口或默认理解框架。
## 部署事实 ## 部署事实
- 平台VPS `76.13.31.179`Ubuntu 24.04 / Docker Compose / Coolify Traefik - 平台VPS `76.13.31.179`Ubuntu 24.04 / Docker Compose / Coolify Traefik

View File

@@ -4106,6 +4106,71 @@ def _image_model_candidates(force_fallback: bool = False, preference: str | None
return [GPT_IMAGE_MODEL, *fallbacks] return [GPT_IMAGE_MODEL, *fallbacks]
def image_model_options() -> list[dict]:
options = [
{
"id": "auto",
"label": "自动",
"model": GPT_IMAGE_MODEL,
"description": "优先 GPT Image 2必要时按后端熔断和兜底策略切到备用图片模型",
"available": bool(IMAGE_API_KEY),
},
{
"id": GPT_IMAGE_MODEL,
"label": "GPT Image 2",
"model": GPT_IMAGE_MODEL,
"description": "主生图模型,适合营销图和参考图重绘",
"available": bool(IMAGE_API_KEY),
},
]
if IMAGE_FALLBACK_ENABLED and IMAGE_FALLBACK_MODEL and IMAGE_FALLBACK_MODEL != GPT_IMAGE_MODEL:
options.append({
"id": IMAGE_FALLBACK_MODEL,
"label": "Gemini 图片",
"model": IMAGE_FALLBACK_MODEL,
"description": "备用图片模型,适合主模型慢或失败时手动选择",
"available": bool(IMAGE_API_KEY),
})
return options
def video_model_options() -> list[dict]:
label_map = {
"seedance": "Seedance",
"kling": "Kling",
"veo3": "Veo 3",
"veo": "Veo",
"voe": "Veo",
}
seen: set[str] = set()
options: list[dict] = []
for key in ["seedance", "kling", "veo3", "veo"]:
if key not in VIDEO_MODEL_ALIASES:
continue
model = VIDEO_MODEL_ALIASES[key]
unique_key = f"{key}:{model}"
if unique_key in seen:
continue
seen.add(unique_key)
options.append({
"id": key,
"label": label_map.get(key, key),
"model": model,
"description": "当前视频网关可选模型",
"available": bool(video_api_key()),
})
default_model = resolve_video_model(VIDEO_MODEL)
if not any(item["id"] == VIDEO_MODEL or item["model"] == default_model for item in options):
options.insert(0, {
"id": VIDEO_MODEL,
"label": label_map.get(VIDEO_MODEL, VIDEO_MODEL),
"model": default_model,
"description": "默认视频模型",
"available": bool(video_api_key()),
})
return options
def _image_failure_can_fallback(status_code: int, body: str, last_err: str) -> bool: def _image_failure_can_fallback(status_code: int, body: str, last_err: str) -> bool:
if status_code in (400, 401, 403, 404): if status_code in (400, 401, 403, 404):
return False return False
@@ -5132,6 +5197,7 @@ def health() -> dict:
"image": IMAGE_MODEL, "image": IMAGE_MODEL,
"image_base_url": IMAGE_BASE_URL or LLM_BASE_URL or "openai-default", "image_base_url": IMAGE_BASE_URL or LLM_BASE_URL or "openai-default",
"image_request_timeout_seconds": IMAGE_REQUEST_TIMEOUT_SECONDS, "image_request_timeout_seconds": IMAGE_REQUEST_TIMEOUT_SECONDS,
"image_options": image_model_options(),
"ai_proxy_configured": bool(AI_HTTP_PROXY), "ai_proxy_configured": bool(AI_HTTP_PROXY),
"image_fallbacks": _image_fallback_models(), "image_fallbacks": _image_fallback_models(),
"image_circuit": _image_circuit_snapshot(), "image_circuit": _image_circuit_snapshot(),
@@ -5146,6 +5212,7 @@ def health() -> dict:
"voice_configured": bool(AZURE_OPENAI_API_KEY), "voice_configured": bool(AZURE_OPENAI_API_KEY),
"video": VIDEO_MODEL, "video": VIDEO_MODEL,
"video_aliases": VIDEO_MODEL_ALIASES, "video_aliases": VIDEO_MODEL_ALIASES,
"video_options": video_model_options(),
"video_provider": video_provider_name(), "video_provider": video_provider_name(),
"video_base_url": video_api_base(), "video_base_url": video_api_base(),
"video_configured": bool(video_api_key()), "video_configured": bool(video_api_key()),
@@ -5598,7 +5665,7 @@ class GenerateReq(BaseModel):
prompt: str prompt: str
extra_prompt: str = "" # ✓ 需要的元素(正向) extra_prompt: str = "" # ✓ 需要的元素(正向)
negative_prompt: str = "" # ✗ 不需要的元素(负向) negative_prompt: str = "" # ✗ 不需要的元素(负向)
model: str = "" # 兼容旧前端字段;服务端强制使用 gpt-image-2 model: str = "auto" # auto / gpt-image-2 / gemini-3-pro-image-preview
mode: str = "edit" # "edit" 带参考图,"text" 纯文字 mode: str = "edit" # "edit" 带参考图,"text" 纯文字
from_selected: bool = False # True 时优先用 frame.selected 的生成图作 reference迭代否则原关键帧 from_selected: bool = False # True 时优先用 frame.selected 的生成图作 reference迭代否则原关键帧
@@ -5649,8 +5716,8 @@ def generate_image(job_id: str, idx: int, req: GenerateReq) -> Job:
if req.mode == "edit": if req.mode == "edit":
img_bytes_in = reference_path.read_bytes() img_bytes_in = reference_path.read_bytes()
# 尝试 i2i主模型上游异常时允许 Gemini 兜底。无兜底时保留旧的多次重试 # 尝试 i2iauto 允许按熔断策略兜底,显式模型只走用户所选模型
model_candidates = _image_model_candidates() model_candidates = _image_model_candidates(preference=req.model)
plan: list[str] = ([req.mode] if model_candidates != [GPT_IMAGE_MODEL] else [req.mode] * 3) if req.mode == "edit" else [req.mode] plan: list[str] = ([req.mode] if model_candidates != [GPT_IMAGE_MODEL] else [req.mode] * 3) if req.mode == "edit" else [req.mode]
if req.mode == "edit": if req.mode == "edit":
plan.append("text") # i2i 都失败时自动降级 plan.append("text") # i2i 都失败时自动降级

File diff suppressed because one or more lines are too long

View File

@@ -22,11 +22,13 @@ import {
deleteGeneratedVideo, deleteGeneratedVideo,
generateImage, generateImage,
generateStoryboardVideo, generateStoryboardVideo,
getRuntimeHealth,
getJob, getJob,
uploadReferenceFrame, uploadReferenceFrame,
type GeneratedImage, type GeneratedImage,
type GeneratedVideo, type GeneratedVideo,
type Job, type Job,
type RuntimeModelOption,
} from "@/lib/api" } from "@/lib/api"
type CreationMode = "text-video" | "text-image" | "first-frame-video" | "first-last-frame-video" type CreationMode = "text-video" | "text-image" | "first-frame-video" | "first-last-frame-video"
@@ -105,6 +107,14 @@ export default function Home() {
const [lastFrameFile, setLastFrameFile] = useState<File | null>(null) const [lastFrameFile, setLastFrameFile] = useState<File | null>(null)
const [firstFramePreview, setFirstFramePreview] = useState("") const [firstFramePreview, setFirstFramePreview] = useState("")
const [lastFramePreview, setLastFramePreview] = useState("") const [lastFramePreview, setLastFramePreview] = useState("")
const [imageModel, setImageModel] = useState("auto")
const [videoModel, setVideoModel] = useState("seedance")
const [imageOptions, setImageOptions] = useState<RuntimeModelOption[]>([
{ id: "auto", label: "自动", model: "gpt-image-2", available: true },
])
const [videoOptions, setVideoOptions] = useState<RuntimeModelOption[]>([
{ id: "seedance", label: "Seedance", model: "seedance", available: true },
])
const [job, setJob] = useState<Job | null>(null) const [job, setJob] = useState<Job | null>(null)
const [busy, setBusy] = useState<BusyTask>(null) const [busy, setBusy] = useState<BusyTask>(null)
const [error, setError] = useState("") const [error, setError] = useState("")
@@ -117,6 +127,30 @@ export default function Home() {
const runningVideo = (job?.generated_videos ?? []).some((item) => item.status === "queued" || item.status === "in_progress") const runningVideo = (job?.generated_videos ?? []).some((item) => item.status === "queued" || item.status === "in_progress")
const submitting = busy === mode || busy === "job" const submitting = busy === mode || busy === "job"
useEffect(() => {
getRuntimeHealth()
.then((health) => {
const models = health.models
const nextImageOptions = models?.image_options?.length
? models.image_options
: [
{ id: "auto", label: "自动", model: models?.image || "gpt-image-2", available: true },
{ id: models?.image || "gpt-image-2", label: "GPT Image 2", model: models?.image || "gpt-image-2", available: true },
]
const nextVideoOptions = models?.video_options?.length
? models.video_options
: [{ id: models?.video || "seedance", label: "Seedance", model: models?.video || "seedance", available: !!models?.video_configured }]
setImageOptions(nextImageOptions)
setVideoOptions(nextVideoOptions)
if (!nextImageOptions.some((item) => item.id === imageModel)) setImageModel(nextImageOptions[0]?.id || "auto")
if (!nextVideoOptions.some((item) => item.id === videoModel)) setVideoModel(nextVideoOptions[0]?.id || "seedance")
})
.catch(() => {
setImageOptions([{ id: "auto", label: "自动", model: "gpt-image-2", available: true }])
setVideoOptions([{ id: "seedance", label: "Seedance", model: "seedance", available: true }])
})
}, [])
useEffect(() => { useEffect(() => {
if (!firstFrameFile) { if (!firstFrameFile) {
setFirstFramePreview("") setFirstFramePreview("")
@@ -211,6 +245,7 @@ export default function Home() {
const updated = await generateImage(target.id, 0, { const updated = await generateImage(target.id, 0, {
prompt: promptWithGuardrails(), prompt: promptWithGuardrails(),
mode: "text", mode: "text",
model: imageModel,
}) })
setJob(updated) setJob(updated)
toast.success("图片已生成") toast.success("图片已生成")
@@ -237,6 +272,7 @@ export default function Home() {
first_image: activeMode.needsFirstFrame ? { kind: "keyframe", frame_idx: 0 } : null, first_image: activeMode.needsFirstFrame ? { kind: "keyframe", frame_idx: 0 } : null,
last_image: activeMode.needsLastFrame && lastFrame ? { kind: "keyframe", frame_idx: lastFrame.index } : null, last_image: activeMode.needsLastFrame && lastFrame ? { kind: "keyframe", frame_idx: lastFrame.index } : null,
size: "720x1280", size: "720x1280",
model: videoModel,
}) })
setJob(updated) setJob(updated)
toast.success("视频已提交") toast.success("视频已提交")
@@ -368,7 +404,24 @@ export default function Home() {
/> />
<div className="mt-3 flex flex-wrap items-center justify-between gap-3"> <div className="mt-3 flex flex-wrap items-center justify-between gap-3">
<div className="flex items-center gap-2 text-xs text-white/38"> <div className="flex flex-wrap items-center gap-2 text-xs text-white/38">
<label className="inline-flex h-9 items-center gap-2 rounded-xl border border-white/7 bg-black/14 px-3">
<select
value={isVideoMode(mode) ? videoModel : imageModel}
onChange={(event) => {
if (isVideoMode(mode)) setVideoModel(event.target.value)
else setImageModel(event.target.value)
}}
className="max-w-36 bg-transparent text-white/76 outline-none"
>
{(isVideoMode(mode) ? videoOptions : imageOptions).map((item) => (
<option key={item.id} value={item.id} disabled={item.available === false}>
{item.label}
</option>
))}
</select>
</label>
{isVideoMode(mode) ? ( {isVideoMode(mode) ? (
<label className="inline-flex h-9 items-center gap-2 rounded-xl border border-white/7 bg-black/14 px-3"> <label className="inline-flex h-9 items-center gap-2 rounded-xl border border-white/7 bg-black/14 px-3">

View File

@@ -254,6 +254,14 @@ export interface GeneratedVideo {
created_at: number created_at: number
} }
export interface RuntimeModelOption {
id: string
label: string
model: string
description?: string
available?: boolean
}
export interface RuntimeModels { export interface RuntimeModels {
asr?: string asr?: string
asr_language?: string asr_language?: string
@@ -271,6 +279,7 @@ export interface RuntimeModels {
product_view?: string product_view?: string
image?: string image?: string
image_base_url?: string image_base_url?: string
image_options?: RuntimeModelOption[]
image_fallbacks?: string[] image_fallbacks?: string[]
image_circuit?: { image_circuit?: {
primary?: string primary?: string
@@ -293,6 +302,7 @@ export interface RuntimeModels {
voice_tts_paths?: string[] voice_tts_paths?: string[]
video?: string video?: string
video_aliases?: Record<string, string> video_aliases?: Record<string, string>
video_options?: RuntimeModelOption[]
video_provider?: string video_provider?: string
video_base_url?: string video_base_url?: string
video_configured?: boolean video_configured?: boolean