fix: improve filmstrip picking and audio retry

This commit is contained in:
2026-05-19 20:01:45 +08:00
parent fe60d5dc99
commit aabddef486
5 changed files with 74 additions and 18 deletions

View File

@@ -11,11 +11,11 @@
- 详见 `CLAUDE.md` 立项决策段 + `.memory/plan.md` 七步管线拆解
- 风格:`04-Dark-Gallery-Ambient`(路径:`~/Projects/research/20260305-网页风格库/04-Dark-Gallery-Ambient.md`
- 第一冲刺:步骤 1-4下载 / 拆轨 / 关键帧 / ASR+翻译)
- 当前产品方向2026-05-19 再确认):信息流广告快速复刻默认进入“三字段候选生成”工作流。主界面为“左侧素材输入列 + 右侧信息流复刻工作表”。用户粘贴 TK 链接或上传视频后点击“开始分析”,系统自动下载源视频;下载完成后并行启动两条路:音频文案路提取原音频文案/字幕,并分析讲话人、语速节奏、背景音乐/环境声/音效;视频视觉路自动抽取参考帧。源视频工作区右侧主体链路是“参考帧池 → 转换层 → 主体元素”:参考帧池竖向排列,用户拖 1-2 张关键帧到转换层,转换层按参考创新生成新的主体套图,主体元素区展示后续分镜可用的主体图;旧下方“相似主体 / 主体模板库”不再作为主路径。波形下方的画面胶片只是临时预览,点击只跳转原视频时间点,拖进参考帧池才正式加入关键帧。产品图上传后独立形成产品资产包,自动识别视角/结构/比例并补缺角度。分镜工作台按逐句时间轴默认只露“文案 / 场景一句话 / 人物+产品+动作”,产品素材池、批量控制、三字段、视频候选和高级区都必须可折叠;视频候选无内容时默认不占大面积,有候选时默认只显示迷你缩略条,展开后才显示 4-grid。单条默认生成 4 个视频候选,顶部支持整片批量生成候选;首尾帧、视觉规划、产品出现方式和旧 6 字段保留在“高级”抽屉与后端 quick-plan 自动展开中,不能再作为客户默认闸门。
- 当前产品方向2026-05-19 再确认):信息流广告快速复刻默认进入“三字段候选生成”工作流。主界面为“左侧素材输入列 + 右侧信息流复刻工作表”。用户粘贴 TK 链接或上传视频后点击“开始分析”,系统自动下载源视频;下载完成后并行启动两条路:音频文案路提取原音频文案/字幕,并分析讲话人、语速节奏、背景音乐/环境声/音效;视频视觉路自动抽取参考帧。源视频工作区右侧主体链路是“参考帧池 → 转换层 → 主体元素”:参考帧池竖向排列,用户拖 1-2 张关键帧到转换层,转换层按参考创新生成新的主体套图,主体元素区展示后续分镜可用的主体图;旧下方“相似主体 / 主体模板库”不再作为主路径。波形下方的画面胶片只是临时预览,点击只跳转原视频时间点,双击或拖进参考帧池才正式加入关键帧,已加入的胶片直接显示“已添加”。产品图上传后独立形成产品资产包,自动识别视角/结构/比例并补缺角度。分镜工作台按逐句时间轴默认只露“文案 / 场景一句话 / 人物+产品+动作”,产品素材池、批量控制、三字段、视频候选和高级区都必须可折叠;视频候选无内容时默认不占大面积,有候选时默认只显示迷你缩略条,展开后才显示 4-grid。单条默认生成 4 个视频候选,顶部支持整片批量生成候选;首尾帧、视觉规划、产品出现方式和旧 6 字段保留在“高级”抽屉与后端 quick-plan 自动展开中,不能再作为客户默认闸门。
## 部署事实
- 平台VPS `76.13.31.179`Ubuntu 24.04 / Docker Compose / Coolify Traefik
- 发布状态已部署并验证2026-05-19右侧三栏主体管线竖向参考帧池 + 转换层 + 主体元素,参考帧缩略图保持小尺寸 9:16 比例 + hover 左侧紧凑预览 + 转换层多参考滚动,旧主体模板区移出主路径 + 逐句时间轴移到原版视频下方并支持双行文案 + 波形同框时间对齐画面胶片 + 胶片密度按钮上移波形顶部 + 去分隔线 + 胶片上下错落 + body 顶层原位大放大 + 隐藏源视频工作区音频解析摘要卡 + 隐藏工作区顶部状态提示条 + 三字段候选生成工作流 + 折叠紧凑候选区);`https://marketing.skg.com` 已启用应用内登录页,未登录 API 返回 401认证后首页 200容器内 `/health` 返回 `ok:true`
- 发布状态已部署并验证2026-05-19胶片双击/拖拽加入参考帧池 + 胶片缓存复用 + 音频解析失败可重试,右侧三栏主体管线:竖向参考帧池 + 转换层 + 主体元素,参考帧缩略图保持小尺寸 9:16 比例 + hover 左侧紧凑预览 + 转换层多参考滚动,旧主体模板区移出主路径 + 逐句时间轴移到原版视频下方并支持双行文案 + 波形同框时间对齐画面胶片 + 胶片密度按钮上移波形顶部 + 去分隔线 + 胶片上下错落 + body 顶层原位大放大 + 隐藏源视频工作区音频解析摘要卡 + 隐藏工作区顶部状态提示条 + 三字段候选生成工作流 + 折叠紧凑候选区);`https://marketing.skg.com` 已启用应用内登录页,未登录 API 返回 401认证后首页 200容器内 `/health` 返回 `ok:true`
- 主站 / 前端:`https://marketing.skg.com`
- API / 后端:`https://marketing.skg.com/api`
- 代码仓库 / Gitea`https://git.kang-kang.com/kangwan/20260512-skg-tk`

View File

@@ -4252,7 +4252,7 @@ async def trigger_transcribe(job_id: str, bg: BackgroundTasks) -> Job:
mp4 = job_dir(job_id) / "source.mp4"
if job.status in {"created", "downloading"} or not mp4.exists():
raise HTTPException(409, f"video not ready, got {job.status}")
if job.status == "transcribing" or job.audio_script.status == "rewriting" or job_id in AUDIO_WORKERS_RUNNING:
if job.audio_script.status == "rewriting" or job_id in AUDIO_WORKERS_RUNNING:
raise HTTPException(409, f"job is busy, got {job.status}")
manage_job_status = job.status != "splitting"
audio_payload = AudioScript(

File diff suppressed because one or more lines are too long

View File

@@ -60,6 +60,11 @@ const DEFAULT_PRODUCT_LIBRARY_IDS = [
]
const VIDEO_READY_STATUSES: Job["status"][] = ["downloaded", "frames_extracted", "transcribed", "failed"]
function isAudioProcessing(job?: Job | null) {
if (!job) return false
return job.audio_script?.status === "rewriting" || (job.status === "transcribing" && job.audio_script?.status !== "failed")
}
const PRODUCT_FUSION_WEARING_PROMPT = [
"Product placement must be physically correct:",
"The SKG device is a rigid opaque white U-shaped neck massager, not a soft scarf, necklace, cable, collar, sticker, implant, or transparent body part.",
@@ -448,7 +453,7 @@ export default function Home() {
if (!options?.silent) toast.info("视频导入完成后,可在音频卡片点击提取音频")
return
}
if (target.status === "transcribing" || target.audio_script?.status === "rewriting") {
if (isAudioProcessing(target)) {
if (!options?.silent) toast.info("音频正在处理中")
return
}
@@ -466,8 +471,9 @@ export default function Home() {
if (!videoReady) return
const audioKey = `${target.id}:audio`
const hasAudioResult = !!target.audio_script?.source_text || target.transcript.length > 0
const audioRunning = target.status === "transcribing" || target.audio_script?.status === "rewriting"
const audioFailed = target.audio_script?.status === "failed"
const hasAudioResult = !audioFailed && (!!target.audio_script?.source_text || target.transcript.length > 0)
const audioRunning = isAudioProcessing(target)
if (!hasAudioResult && !audioRunning && !autoTriggeredRef.current.has(audioKey)) {
autoTriggeredRef.current.add(audioKey)
try {

View File

@@ -139,6 +139,27 @@ const FILMSTRIP_DENSITIES: Array<{ value: FilmstripDensitySeconds; label: string
const FILMSTRIP_TILT_CLASSES = ["-rotate-[8deg]", "-rotate-[6deg]", "-rotate-[9deg]"]
const FILMSTRIP_VERTICAL_OFFSET_CLASSES = ["translate-y-0", "translate-y-2", "-translate-y-1.5", "translate-y-1", "-translate-y-2"]
const FILMSTRIP_HOVER_SCALE = 4.8
const FILMSTRIP_CACHE_LIMIT = 8
const filmstripPreviewCache = new Map<string, FilmstripPreviewFrame[]>()
function filmstripCacheKey(jobId: string, videoUrl: string, density: FilmstripDensitySeconds, duration: number) {
return `${jobId}:${videoUrl}:${density}:${Math.round(duration * 10) / 10}`
}
function rememberFilmstripPreview(key: string, frames: FilmstripPreviewFrame[]) {
filmstripPreviewCache.delete(key)
filmstripPreviewCache.set(key, frames)
while (filmstripPreviewCache.size > FILMSTRIP_CACHE_LIMIT) {
const oldest = filmstripPreviewCache.keys().next().value
if (!oldest) break
filmstripPreviewCache.delete(oldest)
}
}
function isAudioProcessing(job?: Job | null) {
if (!job) return false
return job.audio_script?.status === "rewriting" || (job.status === "transcribing" && job.audio_script?.status !== "failed")
}
type AudioStoryboardRow = {
index: number
@@ -1910,7 +1931,7 @@ export function AdRecreationBoard({
const readySegments = countReadySegments(job, draftSegments)
const transcriptCount = job?.transcript.length ?? 0
const backgroundReady = !!job?.audio_script?.background_audio_profile?.trim()
const audioRunning = job?.status === "transcribing" || job?.audio_script?.status === "rewriting"
const audioRunning = isAudioProcessing(job)
const visualRunning = job?.status === "splitting"
const visualReady = (job?.frames.length ?? 0) > 0
const subjectAssetCount = countSubjectAssetViews(job)
@@ -2393,7 +2414,7 @@ function AudioIntakePanel({
const syncFrameRef = useRef<number | null>(null)
const audioSrcUrl = job ? apiAssetUrl(job.source_audio_url) || sourceAudioUrl(job.id) : ""
const videoSrcUrl = job ? apiAssetUrl(job.video_url) || videoUrl(job.id) : ""
const processing = !!job && (job.status === "transcribing" || job.audio_script?.status === "rewriting")
const processing = isAudioProcessing(job)
const timelineDuration = useMemo(() => {
if (!job) return 1
const lastTranscriptEnd = job.transcript.reduce((max, segment) => Math.max(max, segment.end || 0), 0)
@@ -2449,12 +2470,20 @@ function AudioIntakePanel({
setFilmstripStatus("idle")
return
}
const cacheKey = filmstripCacheKey(job.id, videoSrcUrl, filmstripDensity, timelineDuration)
const cached = filmstripPreviewCache.get(cacheKey)
if (cached) {
setFilmstripPreviews(cached)
setFilmstripStatus(cached.length ? "ready" : "idle")
return
}
let cancelled = false
setFilmstripPreviews([])
setFilmstripStatus("loading")
captureVideoFilmstrip(videoSrcUrl, timelineDuration, filmstripDensity, () => cancelled)
.then((frames) => {
if (!cancelled) {
rememberFilmstripPreview(cacheKey, frames)
setFilmstripPreviews(frames)
setFilmstripStatus(frames.length ? "ready" : "idle")
}
@@ -2655,6 +2684,7 @@ function AudioIntakePanel({
selectedTimes={frames.map((frame) => frame.timestamp)}
busyTime={filmstripBusyTime}
onSeek={seekTo}
onAddFrame={(time) => void addFilmstripFrame(time)}
onDragStart={setFilmstripDragTime}
onDragEnd={() => setFilmstripDragTime(null)}
/>
@@ -2750,6 +2780,7 @@ function TimelineFilmstrip({
selectedTimes,
busyTime,
onSeek,
onAddFrame,
onDragStart,
onDragEnd,
}: {
@@ -2762,6 +2793,7 @@ function TimelineFilmstrip({
selectedTimes: number[]
busyTime: number | null
onSeek: (time: number) => void
onAddFrame: (time: number) => void
onDragStart: (time: number) => void
onDragEnd: () => void
}) {
@@ -2838,6 +2870,10 @@ function TimelineFilmstrip({
onMouseEnter={(event) => showHoverPreview(event, frame, active, selected, busy)}
onMouseMove={(event) => showHoverPreview(event, frame, active, selected, busy)}
onMouseLeave={() => setHoverPreview(null)}
onDoubleClick={(event) => {
event.preventDefault()
if (!busy) onAddFrame(frame.time)
}}
onDragStart={(event) => {
setHoverPreview(null)
event.dataTransfer.setData(FILMSTRIP_DRAG_TYPE, frame.time.toFixed(2))
@@ -2864,9 +2900,10 @@ function TimelineFilmstrip({
disablePreview
selected={selected}
onClick={() => onSeek(frame.time)}
title="击跳到该时间点,拖入关键帧库才正式选取"
title="击跳到该时间点,双击或拖入参考帧池才正式选取"
topLeft={selected ? <span className="rounded bg-emerald-500/85 px-1 text-[8.5px] font-semibold text-black"></span> : undefined}
topRight={busy ? <Loader2 className="h-3 w-3 animate-spin text-cyan-100" /> : selected ? <Check className="h-3 w-3 text-emerald-200" /> : undefined}
bottom={<span className="block rounded bg-black/74 px-1 py-0.5 text-center font-mono text-[9px] text-white/68">{frame.time.toFixed(1)}s</span>}
bottom={<span className={`block rounded px-1 py-0.5 text-center font-mono text-[9px] ${selected ? "bg-emerald-400/82 text-black" : "bg-black/74 text-white/68"}`}>{selected ? "已添加" : `${frame.time.toFixed(1)}s`}</span>}
/>
</div>
</div>
@@ -2906,8 +2943,9 @@ function TimelineFilmstrip({
objectFit="contain"
disablePreview
selected={hoverPreview.selected}
topLeft={hoverPreview.selected ? <span className="rounded-md bg-emerald-500/88 px-2 py-1 text-[22px] font-semibold leading-none text-black"></span> : undefined}
topRight={hoverPreview.busy ? <Loader2 className="h-6 w-6 animate-spin text-cyan-100" /> : hoverPreview.selected ? <Check className="h-6 w-6 text-emerald-200" /> : undefined}
bottom={<span className="block rounded-md bg-black/74 px-2 py-1 text-center font-mono text-[42px] leading-none text-white/68">{hoverPreview.time.toFixed(1)}s</span>}
bottom={<span className={`block rounded-md px-2 py-1 text-center font-mono text-[42px] leading-none ${hoverPreview.selected ? "bg-emerald-400/86 text-black" : "bg-black/74 text-white/68"}`}>{hoverPreview.selected ? `已添加 · ${hoverPreview.time.toFixed(1)}s` : `${hoverPreview.time.toFixed(1)}s`}</span>}
/>
</div>,
document.body,