From 970bc56cc27c2ed83d0ab0927ce09c9575efd78f Mon Sep 17 00:00:00 2001 From: kang Date: Sun, 17 May 2026 23:35:20 +0800 Subject: [PATCH] auto-save 2026-05-17 23:35 (~4) --- .memory/worklog.json | 40 ++--- api/main.py | 4 + web/components/ad-recreation-board.tsx | 212 ++++++++++++++++++++++++- web/lib/api.ts | 39 +++++ 4 files changed, 268 insertions(+), 27 deletions(-) diff --git a/.memory/worklog.json b/.memory/worklog.json index cec0720..217210e 100644 --- a/.memory/worklog.json +++ b/.memory/worklog.json @@ -1,25 +1,5 @@ { "entries": [ - { - "files_changed": 1, - "hash": "8090674", - "message": "auto-save 2026-05-15 14:33 (~1)", - "ts": "2026-05-15T14:36:57+08:00", - "type": "commit" - }, - { - "files_changed": 1, - "hash": "ce04cf4", - "message": "auto-save 2026-05-15 14:42 (~1)", - "ts": "2026-05-15T14:42:29+08:00", - "type": "commit" - }, - { - "files_changed": 5, - "message": "Codex 会话活跃 · 最近命令:codex · 5 项未提交变更 · 最近提交:auto-save 2026-05-15 14:42 (~1)", - "ts": "2026-05-15T06:44:46Z", - "type": "session-heartbeat" - }, { "files_changed": 12, "hash": "27e2002", @@ -3259,6 +3239,26 @@ "type": "session-heartbeat", "message": "Codex 会话活跃 · 最近命令:codex · 分支 main · 2 项未提交变更 · 最近提交:fix: clarify source frame workflow copy", "files_changed": 2 + }, + { + "ts": "2026-05-17T23:19:14+08:00", + "type": "commit", + "message": "auto-save 2026-05-17 23:19 (~2)", + "hash": "cbe7a1b", + "files_changed": 2 + }, + { + "ts": "2026-05-17T23:23:56+08:00", + "type": "commit", + "message": "fix: enable unified similar subject views", + "hash": "44136f5", + "files_changed": 3 + }, + { + "ts": "2026-05-17T15:28:31Z", + "type": "session-heartbeat", + "message": "Codex 会话活跃 · 最近命令:codex · 分支 main · 1 项未提交变更 · 最近提交:fix: enable unified similar subject views", + "files_changed": 1 } ] } diff --git a/api/main.py b/api/main.py index 6cc5636..e48479d 100644 --- a/api/main.py +++ b/api/main.py @@ -2745,10 +2745,14 @@ def health() -> dict: "base_url": LLM_BASE_URL or "openai-default", "models": { "asr": ASR_MODEL, + "local_asr": LOCAL_ASR_MODEL, "asr_fallback": ASR_FALLBACK_MODEL, "translate": TRANSLATE_MODEL, "rewrite": REWRITE_MODEL, "audio_rewrite": AUDIO_REWRITE_MODEL, + "vision": VISION_MODEL, + "image": IMAGE_MODEL, + "image_fallbacks": [IMAGE_MODEL, "gemini-3.1-flash-image-preview", "gemini-2.5-flash-image"], "minimax_tts": MINIMAX_TTS_MODEL, "minimax_voice": MINIMAX_TTS_VOICE_ID, "minimax_voice_pool": MINIMAX_TTS_VOICE_POOL or [MINIMAX_TTS_VOICE_ID], diff --git a/web/components/ad-recreation-board.tsx b/web/components/ad-recreation-board.tsx index 2581239..c2ddca3 100644 --- a/web/components/ad-recreation-board.tsx +++ b/web/components/ad-recreation-board.tsx @@ -3,7 +3,7 @@ import { type MouseEvent as ReactMouseEvent, type ReactNode, type RefObject, useEffect, useMemo, useRef, useState } from "react" import { createPortal } from "react-dom" import { - AlertTriangle, Check, ChevronDown, Circle, Film, FileText, Image as ImageIcon, Link2, Loader2, + AlertTriangle, Check, ChevronDown, Circle, Film, FileText, Image as ImageIcon, Info, Link2, Loader2, Mic, Package, PanelRight, Play, Plus, Scissors, Sparkles, Trash2, Upload, Wand2, } from "lucide-react" import { toast } from "sonner" @@ -18,6 +18,7 @@ import { type KeyFrame, type ProductViewAnalysisItem, type ProductRefStateItem, + type RuntimeModels, type StoryboardScriptRewriteSegment, type StoryboardScene, type SubjectAsset, @@ -31,6 +32,7 @@ import { generateProductAngleAsset, generateSubjectAssets, generatedImageUrl, + getRuntimeHealth, hasCutout, representativeCutoutUrl, resolveImageRefUrl, @@ -95,6 +97,13 @@ type AudioStoryboardRow = { type ProductRefItem = ProductRefStateItem type SubjectStyleMode = "transparent_human" | "source_actor" +type ModelTraceSpec = { + title: string + model: string + chain: string[] + note?: string +} + const PRODUCT_VIEW_SLOTS = [ { value: "front", label: "正面/外侧", hint: "整体 U 形轮廓、开口宽度、外壳主外观" }, { value: "left_45", label: "佩戴者左 45", hint: "戴在脖子上时佩戴者左肩一侧的弧度、按钮/结构差异" }, @@ -285,6 +294,89 @@ function subjectAssetUrl(job: Job, asset: SubjectAsset) { return apiAssetUrl(asset.url) || resolveImageRefUrl(job.id, { kind: "asset", frame_idx: 0, element_id: asset.id }) } +function modelValue(value?: string) { + return value?.trim() || "待配置" +} + +function modelList(values: Array) { + return values.map(modelValue).filter((value, index, list) => value && list.indexOf(value) === index).join(" / ") +} + +function imageModelChain(models?: RuntimeModels) { + return modelList(models?.image_fallbacks?.length ? models.image_fallbacks : [models?.image, "gemini-3.1-flash-image-preview", "gemini-2.5-flash-image"]) +} + +function resolveVideoModelLabel(models: RuntimeModels | undefined, model: string) { + const concrete = models?.video_aliases?.[model] || (model === models?.video ? models.video : "") + return concrete && concrete !== model ? `${model} -> ${concrete}` : modelValue(concrete || model) +} + +function audioModelTrace(models?: RuntimeModels): ModelTraceSpec { + return { + title: "音频解析", + model: modelList([models?.asr, models?.translate, models?.asr_fallback]), + chain: [ + `ASR 转写:优先 ${modelValue(models?.asr)},失败后尝试本机 ${modelValue(models?.local_asr)},再回退 ${modelValue(models?.asr_fallback)}`, + `字幕翻译:${modelValue(models?.translate)} 输出中文逐句时间轴`, + `讲话人 / 节奏 / 背景音:${modelValue(models?.asr_fallback)} 读取 audio.wav 做多模态音频分析`, + ], + note: "点击“解析音频”后触发;开始任务下载完成后也会自动走这条链路。", + } +} + +function productModelTrace(models?: RuntimeModels): ModelTraceSpec { + return { + title: "产品视角识别 / 补图", + model: modelList([models?.vision, models?.image]), + chain: [ + `批量视角识别:${modelValue(models?.vision)} 一次读取同一产品多张图,标注视角、左右、上下、用途和风险`, + `缺角度补图:${imageModelChain(models)} 按同一肩颈按摩仪结构补齐缺失视角`, + "前端只保存标注和 AI 补图结果;后续生成视频时每条最多挑 6 张相关产品图", + ], + note: "上传产品图、重新识别、缺视角重试都会使用这组模型链路。", + } +} + +function similarSubjectModelTrace(models: RuntimeModels | undefined, subjectStyle: SubjectStyleMode): ModelTraceSpec { + return { + title: subjectStyle === "transparent_human" ? "相似透明骨架主体" : "相似普通真人主体", + model: imageModelChain(models), + chain: [ + "参考帧策略:未勾选时使用全部关键帧,勾选后只使用已选关键帧", + `主体类型:${subjectStyle === "transparent_human" ? "透明/半透明皮肤包裹可见白色骨架" : "普通商业广告真人"}`, + `图像生成:${imageModelChain(models)} 逐张生成正、背、左、右、左前 45、右前 45`, + "身份锁定:六张必须是同一个主体,性别表现、年龄段、体型、材质和风格保持一致", + ], + note: "这是生成类似主体,不是复制、抠出或复刻源视频人物身份。", + } +} + +function scriptRewriteModelTrace(models?: RuntimeModels): ModelTraceSpec { + return { + title: "新口播文案改写", + model: modelList([models?.audio_rewrite, models?.asr_fallback, models?.translate]), + chain: [ + `主改写:${modelValue(models?.audio_rewrite)} 根据原文案、当前分镜、作者想法生成新口播`, + `失败回退:依次尝试 ${modelValue(models?.asr_fallback)} 和 ${modelValue(models?.translate)}`, + "返回结果只写入当前分镜文案编辑框;生成视频时再把当前文案写入分镜 action", + ], + } +} + +function videoModelTrace(models: RuntimeModels | undefined, model: string): ModelTraceSpec { + return { + title: "视频生成", + model: resolveVideoModelLabel(models, model), + chain: [ + `前端选择:${model}`, + `后端解析:${resolveVideoModelLabel(models, model)}`, + `服务商:${modelValue(models?.video_provider)} · ${modelValue(models?.video_base_url)}`, + "输入:当前分镜文案、参考帧、产品素材、产品方向标注和画面规划", + "输出:异步候选视频,完成后回填到对应分镜行", + ], + } +} + function buildFallbackScene(job: Job, frame: KeyFrame, order: number): StoryboardScene { const frames = [...job.frames].sort((a, b) => a.timestamp - b.timestamp) const nextFrame = frames.find((item) => item.timestamp > frame.timestamp) ?? null @@ -632,6 +724,7 @@ export function AdRecreationBoard({ const [elementBusyFrame, setElementBusyFrame] = useState(null) const [sixViewBusyKey, setSixViewBusyKey] = useState(null) const [generatingAll, setGeneratingAll] = useState(false) + const [runtimeModels, setRuntimeModels] = useState() const fileRef = useRef(null) const selectedFrames = job ? job.frames.filter((frame) => data.selectedFrames.has(frame.index)).sort((a, b) => a.timestamp - b.timestamp) @@ -648,6 +741,20 @@ export function AdRecreationBoard({ setSelectedVideoIds(new Set()) }, [activeJobId]) + useEffect(() => { + let cancelled = false + getRuntimeHealth() + .then((health) => { + if (!cancelled) setRuntimeModels(health.models) + }) + .catch((error) => { + console.warn("模型配置读取失败", error) + }) + return () => { + cancelled = true + } + }, []) + const submitUrl = () => { const trimmed = url.trim() if (!trimmed) return @@ -847,10 +954,13 @@ export function AdRecreationBoard({ {job?.message || "下载源视频后解析音频,再抽参考帧并生成相似主体。"} - data.onTranscribeAudio?.(job?.id)}> - - 解析音频 - +
+ + data.onTranscribeAudio?.(job?.id)}> + + 解析音频 + +
@@ -890,6 +1000,7 @@ export function AdRecreationBoard({ selectedFrames={data.selectedFrames} onJobUpdate={data.onJobUpdate} onGenerateVideo={onGenerateVideo} + runtimeModels={runtimeModels} />
@@ -1258,6 +1369,7 @@ function AudioIntakePanel({ onToggleFrame={onToggleFrame} onJobUpdate={onJobUpdate} onDeleteFrame={onDeleteFrame} + runtimeModels={runtimeModels} /> @@ -1271,12 +1383,14 @@ function SourceReferenceBuildPanel({ onToggleFrame, onJobUpdate, onDeleteFrame, + runtimeModels, }: { job: Job selectedFrames: Set onToggleFrame: (idx: number) => void onJobUpdate: (job: Job) => void onDeleteFrame?: (jobId: string, idx: number) => Promise | void + runtimeModels?: RuntimeModels }) { const [extracting, setExtracting] = useState(false) const [subjectBusy, setSubjectBusy] = useState(false) @@ -1498,7 +1612,10 @@ function SourceReferenceBuildPanel({
- 相似主体白底视图 +
+ 相似主体白底视图 + +
{[ @@ -1566,11 +1683,13 @@ function AudioStoryboardPlanPanel({ selectedFrames, onJobUpdate, onGenerateVideo, + runtimeModels, }: { job: Job | null selectedFrames: Set onJobUpdate?: (job: Job) => void onGenerateVideo?: (frameIdx: number, scene: StoryboardScene, model: string) => Promise | void + runtimeModels?: RuntimeModels }) { const [videoBusyRow, setVideoBusyRow] = useState(null) const [productItems, setProductItems] = useState([]) @@ -1881,6 +2000,7 @@ function AudioStoryboardPlanPanel({
} title="同一产品素材池 / 视角标注" /> + {productItems.length ? `${productItems.length} 张素材` : "素材池不限量"} {(productAnalyzing || productAngleBusy) && ( @@ -1959,6 +2079,7 @@ function AudioStoryboardPlanPanel({ className="min-h-[42px] resize-y rounded-md border border-white/10 bg-black/35 px-2.5 py-2 text-[11px] leading-snug text-white outline-none placeholder:text-white/25 focus:border-cyan-300/50" />
+ @@ -2366,6 +2491,79 @@ function ProfileTile({ label, value, running }: { label: string; value?: string; ) } +function ModelTrace({ trace, compact = false }: { trace: ModelTraceSpec; compact?: boolean }) { + const [position, setPosition] = useState<{ left: number; top: number } | null>(null) + const buttonRef = useRef(null) + + const toggle = () => { + if (position) { + setPosition(null) + return + } + const rect = buttonRef.current?.getBoundingClientRect() + if (!rect) return + const width = Math.min(380, window.innerWidth - 32) + const height = 260 + let left = rect.right - width + let top = rect.bottom + 8 + if (left < 16) left = 16 + if (left + width > window.innerWidth - 16) left = window.innerWidth - width - 16 + if (top + height > window.innerHeight - 16) top = Math.max(16, rect.top - height - 8) + setPosition({ left, top }) + } + + const popover = position && typeof document !== "undefined" + ? createPortal( +
+
+
+
{trace.title}
+
{trace.model}
+
+ +
+
    + {trace.chain.map((item, index) => ( +
  1. + {index + 1} + {item} +
  2. + ))} +
+ {trace.note ?
{trace.note}
: null} +
, + document.body, + ) + : null + + return ( + <> + + {popover} + + ) +} + function FrameExtractControls({ job, data, diff --git a/web/lib/api.ts b/web/lib/api.ts index 3453c19..0020985 100644 --- a/web/lib/api.ts +++ b/web/lib/api.ts @@ -134,6 +134,45 @@ export interface GeneratedVideo { created_at: number } +export interface RuntimeModels { + asr?: string + local_asr?: string + asr_fallback?: string + translate?: string + rewrite?: string + audio_rewrite?: string + vision?: string + image?: string + image_fallbacks?: string[] + minimax_tts?: string + minimax_voice?: string + minimax_voice_pool?: string[] + minimax_configured?: boolean + video?: string + video_aliases?: Record + video_provider?: string + video_base_url?: string + video_configured?: boolean + video_create_paths?: string[] +} + +export interface RuntimeHealth { + ok: boolean + llm_configured?: boolean + auth_configured?: boolean + base_url?: string + models?: RuntimeModels +} + +export async function getRuntimeHealth(): Promise { + const res = await fetch(`${API_BASE}/health`, { cache: "no-store" }) + if (!res.ok) { + const txt = await res.text().catch(() => "") + throw new Error(`health ${res.status} ${txt.slice(0, 300)}`) + } + return res.json() +} + // 把 ImageRef 解析成可显示的 src URL export function resolveImageRefUrl(jobId: string, ref: ImageRef): string { if (ref.kind === "keyframe") {