refactor: narrow intake to audio-first workflow

This commit is contained in:
2026-05-17 12:55:45 +08:00
parent b02bc3f583
commit 3b9e74f0a2
9 changed files with 395 additions and 220 deletions

View File

@@ -226,7 +226,9 @@ export default function Home() {
toast.info(`上传中:${file.name} (${(file.size / 1024 / 1024).toFixed(1)} MB)`)
const created = await uploadJob(file)
addJob(created)
toast.success(`已上传 ${created.id.slice(0, 8)}`)
setProductionJobIds((prev) => new Set(prev).add(created.id))
setAudioStripJobId(created.id)
toast.success(`已上传 ${created.id.slice(0, 8)},下载完成后自动解析音频`)
} catch (e) {
toast.error("上传失败:" + (e instanceof Error ? e.message : String(e)))
} finally {
@@ -537,13 +539,11 @@ export default function Home() {
}
setProductionJobIds((prev) => new Set(prev).add(target.id))
setAudioStripJobId(target.id)
toast.success("已进入自动生产:下载完成后会抽帧、解析音频并生成分镜初稿")
toast.success("已进入第一步:下载完成后自动解析音频文案、讲话人和背景音")
if (target.video_url && ["downloaded", "frames_extracted", "transcribed", "failed"].includes(target.status)) {
if (!target.frames.length) void handleAnalyzeJob(target.id, { mode: "replace" })
void handleTranscribeAudio(target.id, { silent: true })
if (target.frames.length) void handlePlanStoryboardJob(target.id)
}
}, [handleAnalyzeJob, handlePlanStoryboardJob, handleSubmit, handleTranscribeAudio, job])
}, [handleSubmit, handleTranscribeAudio, job])
useEffect(() => {
if (productionJobIds.size === 0) return
@@ -552,22 +552,13 @@ export default function Home() {
const videoReady = !!item.video_url && ["downloaded", "frames_extracted", "transcribed", "failed"].includes(item.status)
if (!videoReady) continue
const audioKey = `${item.id}:audio`
if (!autoTriggeredRef.current.has(audioKey) && item.audio_script?.status !== "rewriting" && !item.audio_script?.rewritten_text) {
const hasAudioResult = !!item.audio_script?.source_text || item.transcript.length > 0
if (!autoTriggeredRef.current.has(audioKey) && item.audio_script?.status !== "rewriting" && !hasAudioResult) {
autoTriggeredRef.current.add(audioKey)
void handleTranscribeAudio(item.id, { silent: true })
}
const analyzeKey = `${item.id}:analyze`
if (!autoTriggeredRef.current.has(analyzeKey) && item.frames.length === 0 && item.status !== "splitting") {
autoTriggeredRef.current.add(analyzeKey)
void handleAnalyzeJob(item.id, { mode: "replace" })
}
const planKey = `${item.id}:plan:${item.frames.length}`
if (item.frames.length > 0 && !autoTriggeredRef.current.has(planKey)) {
autoTriggeredRef.current.add(planKey)
void handlePlanStoryboardJob(item.id)
}
}
}, [handleAnalyzeJob, handlePlanStoryboardJob, handleTranscribeAudio, jobs, productionJobIds])
}, [handleTranscribeAudio, jobs, productionJobIds])
const handleQuickGenerateVideo = useCallback(async (frameIdx: number, scene: StoryboardScene, model: string) => {
if (!job) return
@@ -812,7 +803,7 @@ export default function Home() {
if (jobs.length === 0) return
// 状态切到 downloaded 时提示用户点解析(仅一次)
if (job?.status === "downloaded" && prevStatusRef.current !== "downloaded") {
toast.info("视频已就绪,请在左侧看板开始抽帧", { duration: 6000 })
toast.info("视频已下载,音频解析会自动开始;也可以在右侧手动重试", { duration: 6000 })
}
prevStatusRef.current = job?.status ?? null

View File

@@ -105,11 +105,11 @@ function videoSrc(video: GeneratedVideo) {
}
function audioPreview(job: Job | null) {
if (!job) return "导入素材后,先解析音频,再把产品内容改写成新的分镜文字。"
const rewritten = job.audio_script?.rewritten_text?.trim()
if (rewritten) return rewritten
if (!job) return "粘贴 TK 链接或上传视频后,系统会先下载视频;下载完成后自动提取音频文案。"
const source = job.audio_script?.source_text?.trim() || job.audio_script?.source_zh?.trim()
if (source) return source
if (job.transcript?.length) return job.transcript.slice(0, 5).map((item) => item.en || item.zh).join(" ")
return "暂无音频文案。解析后这里会作为新剧情和分镜文字的依据。"
return "暂无音频文案。下载完成后会自动提取原音频文案、讲话人和背景音。"
}
function orderedFrames(job: Job | null, selectedFrames: KeyFrame[]) {
@@ -172,8 +172,10 @@ export function AdRecreationBoard({
: []
const framesForSegments = orderedFrames(job, selectedFrames)
const generatedVideos = job?.generated_videos ?? []
const audioReady = !!job?.audio_script?.rewritten_text?.trim()
const audioReady = !!job?.audio_script?.source_text?.trim() || !!job?.transcript?.length
const readySegments = countReadySegments(job, draftSegments)
const transcriptCount = job?.transcript.length ?? 0
const backgroundReady = !!job?.audio_script?.background_audio_profile?.trim()
useEffect(() => {
setDraftSegments([])
@@ -341,15 +343,15 @@ export function AdRecreationBoard({
<div className="relative flex h-full flex-col px-4 py-4">
<header className="mb-3 flex items-center justify-between gap-4 rounded-lg border border-white/10 bg-white/[0.04] px-4 py-3">
<div className="min-w-0">
<div className="text-[11px] font-medium uppercase tracking-[0.18em] text-white/40">feed ad storyboard board</div>
<h1 className="mt-1 text-[22px] font-semibold leading-tight text-white">广</h1>
<div className="text-[11px] font-medium uppercase tracking-[0.18em] text-white/40">feed ad audio intake board</div>
<h1 className="mt-1 text-[22px] font-semibold leading-tight text-white">广</h1>
</div>
<div className="grid min-w-[520px] grid-cols-5 gap-2 text-[11px] text-white/48">
<Metric label="素材" value={`${jobs.length}`} />
<Metric label="当前" value={shortId(activeJobId)} />
<Metric label="抽帧" value={`${job?.frames.length ?? 0}`} />
<Metric label="分镜" value={`${readySegments}`} />
<Metric label="片段" value={`${generatedVideos.length}`} />
<Metric label="视频" value={job?.video_url ? "ready" : "-"} />
<Metric label="文案段" value={`${transcriptCount}`} />
<Metric label="背景音" value={backgroundReady ? "ready" : "-"} />
</div>
</header>
@@ -371,11 +373,11 @@ export function AdRecreationBoard({
<div className="flex items-start justify-between gap-4">
<div>
<div className="flex items-center gap-2">
<span className="inline-flex h-8 w-8 items-center justify-center rounded-md bg-rose-500/12 text-rose-100"><Wand2 className="h-4 w-4" /></span>
<span className="inline-flex h-8 w-8 items-center justify-center rounded-md bg-rose-500/12 text-rose-100"><Mic className="h-4 w-4" /></span>
<span className="font-mono text-[12px] text-white/36">02</span>
</div>
<h2 className="mt-2 text-[17px] font-semibold leading-tight text-white"></h2>
<p className="mt-1 text-[12px] text-white/42"></p>
<h2 className="mt-2 text-[17px] font-semibold leading-tight text-white"></h2>
<p className="mt-1 text-[12px] text-white/42"></p>
</div>
<div className="flex shrink-0 flex-wrap justify-end gap-2">
<ActionButton disabled={!job?.video_url || job.status === "transcribing"} onClick={() => data.onTranscribeAudio?.(job?.id)}>
@@ -385,14 +387,6 @@ export function AdRecreationBoard({
<ActionButton disabled={!job?.source_audio_url && !job?.audio_script?.voice_url} variant="ghost" onClick={() => data.onOpenAudioStrip?.(job?.id)}>
</ActionButton>
<ActionButton variant="ghost" onClick={addDraftSegment}>
<Plus className="h-3.5 w-3.5" />
</ActionButton>
<ActionButton disabled={!framesForSegments.length || generatingAll} onClick={generateAllVideos}>
{generatingAll ? <Loader2 className="h-3.5 w-3.5 animate-spin" /> : <Play className="h-3.5 w-3.5" />}
</ActionButton>
</div>
</div>
@@ -409,69 +403,21 @@ export function AdRecreationBoard({
<div className="mt-2 grid gap-1 text-[11px] leading-relaxed text-white/42">
{job.audio_script.speaker_profile && <div>{job.audio_script.speaker_profile}</div>}
{job.audio_script.rhythm_profile && <div>{job.audio_script.rhythm_profile}</div>}
{job.audio_script.background_audio_profile && <div>{job.audio_script.background_audio_profile}</div>}
</div>
)}
</div>
<FrameExtractControls
job={job}
data={data}
selectedFramesCount={selectedFrames.length}
onSelectAllFrames={selectAllFrames}
onClearFrameSelection={clearFrameSelection}
/>
<AudioIntakeStatus job={job} audioReady={audioReady} />
</div>
</header>
<div className="min-h-0 flex-1 overflow-y-auto p-4">
<div className="space-y-3">
{job && framesForSegments.length > 0 ? framesForSegments.map((frame, order) => (
<StoryboardSegmentCard
key={`${job.id}:${frame.index}`}
job={job}
frame={frame}
order={order}
selected={data.selectedFrames.has(frame.index)}
selectedVideoIds={selectedVideoIds}
videos={generatedVideos.filter((video) => video.frame_idx === frame.index)}
busy={elementBusyFrame === frame.index}
sixViewBusyKey={sixViewBusyKey}
onToggleFrame={() => data.onToggleFrame(frame.index)}
onJobUpdate={data.onJobUpdate}
onGenerateElement={(candidate) => generateElementForFrame(frame, candidate)}
onGenerateSixViews={(element) => generateSixViewsForElement(frame, element)}
onGenerateVideo={onGenerateVideo}
onToggleVideo={toggleVideo}
onDeleteVideo={(videoId) => data.onDeleteVideo?.(videoId)}
/>
)) : null}
{draftSegments.map((draft, index) => (
<DraftSegmentCard
key={draft.id}
draft={draft}
order={framesForSegments.length + index}
job={job}
onPatch={(patch) => updateDraftSegment(draft.id, patch)}
onRemove={() => removeDraftSegment(draft.id)}
onJobUpdate={data.onJobUpdate}
onGenerateVideo={onGenerateVideo}
/>
))}
{!job && <EmptyState text="先在左侧导入素材,再从音频分镜开始追加或编辑分镜。" />}
{job && framesForSegments.length === 0 && draftSegments.length === 0 && (
<EmptyState text="可以先解析音频并追加分镜;抽帧后,每张分镜卡会显示对应关键元素和视频生成区。" />
)}
</div>
<AudioIntakePanel job={job} />
</div>
<footer className="shrink-0 border-t border-white/10 p-3">
<ComposeSummary
audioReady={audioReady}
selectedVideoCount={selectedVideoIds.size}
generatedVideoCount={generatedVideos.length}
/>
<AudioStepSummary job={job} audioReady={audioReady} />
</footer>
</section>
</div>
@@ -577,6 +523,123 @@ function MaterialColumn({
)
}
function AudioIntakeStatus({ job, audioReady }: { job: Job | null; audioReady: boolean }) {
const downloading = !!job && ["created", "downloading"].includes(job.status)
const audioRunning = !!job && (job.status === "transcribing" || job.audio_script?.status === "rewriting")
return (
<div className="rounded-lg border border-white/10 bg-black/32 p-3">
<div className="mb-2 flex items-center justify-between gap-2">
<SectionTitle icon={<PanelRight className="h-4 w-4" />} title="当前步骤" />
<StatusPill ready={audioReady} running={downloading || audioRunning} />
</div>
<div className="grid grid-cols-2 gap-2 text-[11px] text-white/52">
<Requirement label="素材" ready={!!job} detail={job ? shortId(job.id) : "待输入"} />
<Requirement label="视频" ready={!!job?.video_url} detail={downloading ? "下载中" : job?.video_url ? "已就绪" : "待下载"} />
<Requirement label="音频" ready={!!job?.source_audio_url} detail={audioRunning ? "解析中" : job?.source_audio_url ? "已提取" : "待提取"} />
<Requirement label="文案" ready={audioReady} detail={audioReady ? `${job?.transcript.length ?? 0}` : "待解析"} />
</div>
<div className="mt-3 rounded-md border border-white/10 bg-black/28 px-3 py-2 text-[11px] leading-relaxed text-white/42">
{job?.message || "粘贴 TK 链接或上传视频后,点击开始进入下载和音频解析。"}
</div>
</div>
)
}
function AudioIntakePanel({ job }: { job: Job | null }) {
if (!job) {
return <EmptyState text="先在左侧粘贴 TK 链接或上传本地视频。点击开始后,会先下载视频,再自动解析原音频文案、讲话人节奏和背景音。" />
}
const script = job.audio_script
const original = script?.source_text?.trim() || job.transcript.map((item) => item.en).filter(Boolean).join(" ")
const translated = script?.source_zh?.trim() || job.transcript.map((item) => item.zh).filter(Boolean).join(" ")
const profiles = [
{ label: "讲话人", value: script?.speaker_profile },
{ label: "节奏", value: script?.rhythm_profile },
{ label: "背景音", value: script?.background_audio_profile },
]
const processing = job.status === "transcribing" || script?.status === "rewriting"
return (
<div className="grid gap-3">
<section className="rounded-lg border border-white/10 bg-black/28 p-3">
<div className="mb-3 flex items-center justify-between gap-3">
<SectionTitle icon={<FileText className="h-4 w-4" />} title="原文案提取" />
<StatusPill ready={!!original || job.transcript.length > 0} running={processing} />
</div>
<div className="grid gap-3 xl:grid-cols-2">
<TextBlock title="原始文案" value={original} empty={processing ? "正在提取原音频文案..." : "还没有提取到原文案。"} />
<TextBlock title="中文翻译" value={translated} empty={processing ? "正在翻译..." : "还没有中文翻译。"} />
</div>
</section>
<section className="rounded-lg border border-white/10 bg-black/28 p-3">
<div className="mb-3 flex items-center justify-between gap-3">
<SectionTitle icon={<Mic className="h-4 w-4" />} title="声音与背景音分析" />
<span className="font-mono text-[11px] text-white/38">{formatSeconds(job.duration)}</span>
</div>
<div className="grid gap-2 lg:grid-cols-3">
{profiles.map((item) => (
<ProfileTile key={item.label} label={item.label} value={item.value} running={processing} />
))}
</div>
</section>
<section className="rounded-lg border border-white/10 bg-black/28 p-3">
<div className="mb-3 flex items-center justify-between gap-3">
<SectionTitle icon={<Film className="h-4 w-4" />} title="逐句时间轴" />
<span className="rounded-md border border-white/10 bg-black/35 px-2 py-1 text-[11px] text-white/45">{job.transcript.length} </span>
</div>
{job.transcript.length ? (
<div className="overflow-hidden rounded-lg border border-white/10">
<div className="grid grid-cols-[88px_minmax(0,1fr)_minmax(0,1fr)] border-b border-white/10 bg-white/[0.04] px-3 py-2 text-[11px] font-semibold text-white/50">
<div></div>
<div></div>
<div></div>
</div>
<div className="max-h-[36vh] overflow-y-auto">
{job.transcript.map((segment) => (
<div key={segment.index} className="grid grid-cols-[88px_minmax(0,1fr)_minmax(0,1fr)] gap-3 border-b border-white/8 px-3 py-2 text-[12px] leading-relaxed text-white/64 last:border-b-0">
<div className="font-mono text-[11px] text-white/38">{segment.start.toFixed(1)}-{segment.end.toFixed(1)}s</div>
<div>{segment.en || <span className="text-white/30">-</span>}</div>
<div>{segment.zh || <span className="text-white/30"></span>}</div>
</div>
))}
</div>
</div>
) : (
<EmptyState text={processing ? "音频解析中,完成后这里会按时间列出原文案和中文翻译。" : "下载完成后会自动解析音频;也可以点击右上角“解析音频”手动重试。"} />
)}
</section>
</div>
)
}
function TextBlock({ title, value, empty }: { title: string; value?: string; empty: string }) {
return (
<div className="min-h-[156px] rounded-lg border border-white/10 bg-black/35 p-3">
<div className="mb-2 text-[11px] font-semibold text-white/48">{title}</div>
<div className="max-h-[220px] overflow-y-auto whitespace-pre-wrap text-[12.5px] leading-relaxed text-white/72">
{value || <span className="text-white/32">{empty}</span>}
</div>
</div>
)
}
function ProfileTile({ label, value, running }: { label: string; value?: string; running?: boolean }) {
return (
<div className="min-h-[112px] rounded-lg border border-white/10 bg-black/35 p-3">
<div className="mb-2 flex items-center justify-between gap-2">
<span className="text-[11px] font-semibold text-white/48">{label}</span>
{running ? <Loader2 className="h-3.5 w-3.5 animate-spin text-cyan-200" /> : value ? <Check className="h-3.5 w-3.5 text-emerald-200" /> : <Circle className="h-3.5 w-3.5 text-white/32" />}
</div>
<p className="text-[12px] leading-relaxed text-white/62">
{value || (running ? "模型分析中..." : "等待音频分析结果。")}
</p>
</div>
)
}
function FrameExtractControls({
job,
data,
@@ -1006,6 +1069,29 @@ function SegmentBand({ icon, title, children }: { icon: ReactNode; title: string
)
}
function AudioStepSummary({ job, audioReady }: { job: Job | null; audioReady: boolean }) {
const downloading = !!job && ["created", "downloading"].includes(job.status)
const audioRunning = !!job && (job.status === "transcribing" || job.audio_script?.status === "rewriting")
return (
<div className="flex items-center justify-between gap-3 rounded-lg border border-white/10 bg-black/35 px-3 py-2">
<div className="flex min-w-0 items-center gap-2">
<PanelRight className="h-4 w-4 shrink-0 text-rose-200" />
<div className="min-w-0">
<div className="text-[13px] font-semibold text-white"></div>
<div className="truncate text-[11px] text-white/40">
{job?.message || "等待素材输入;完成后再进入分镜规划和素材生成。"}
</div>
</div>
</div>
<div className="flex shrink-0 items-center gap-2 text-[11px] text-white/52">
<Requirement label="下载" ready={!!job?.video_url} detail={downloading ? "running" : job?.video_url ? "ready" : "wait"} />
<Requirement label="音频" ready={!!job?.source_audio_url} detail={audioRunning ? "running" : job?.source_audio_url ? "ready" : "wait"} />
<Requirement label="文案" ready={audioReady} detail={audioReady ? `${job?.transcript.length ?? 0}` : "wait"} />
</div>
</div>
)
}
function ComposeSummary({
audioReady,
selectedVideoCount,
@@ -1068,9 +1154,9 @@ function MaterialCard({
<span className={`shrink-0 rounded-md border px-2 py-1 text-[11px] ${tone.className}`}>{tone.label}</span>
</div>
<div className="mt-3 grid grid-cols-3 gap-2 text-[11px] text-white/44">
<Metric label="" value={`${job.frames.length}`} compact />
<Metric label="音频" value={job.audio_script?.rewritten_text ? "ready" : "-"} compact />
<Metric label="段" value={`${job.generated_videos?.length ?? 0}`} compact />
<Metric label="视频" value={job.video_url ? "ready" : "-"} compact />
<Metric label="文案" value={job.audio_script?.source_text || job.transcript.length ? "ready" : "-"} compact />
<Metric label="段" value={`${job.transcript.length}`} compact />
</div>
{onDelete && (
<span

View File

@@ -1,7 +1,7 @@
"use client"
import { useEffect, useMemo, useRef, useState, type PointerEvent as ReactPointerEvent } from "react"
import { ChevronDown, ChevronUp, GripHorizontal, Mic2, Volume2, X } from "lucide-react"
import { ChevronDown, ChevronUp, GripHorizontal, Mic2, X } from "lucide-react"
import { apiAssetUrl, sourceAudioUrl, type Job, type TranscriptSegment } from "@/lib/api"
const STORAGE_KEY = "skg.audio-strip.height"
@@ -151,7 +151,6 @@ export function AudioStrip({ job, open, onClose }: { job: Job | null; open: bool
const audioRef = useRef<HTMLAudioElement>(null)
const transcript = job?.transcript ?? []
const audioScript = job?.audio_script
const voiceUrl = apiAssetUrl(audioScript?.voice_url)
const sourceUrl = job ? apiAssetUrl(job.source_audio_url || sourceAudioUrl(job.id)) : ""
const processing = !!job && (job.status === "transcribing" || audioScript?.status === "rewriting")
const activeSegment = transcript.find((segment) => currentTime >= segment.start && currentTime <= Math.max(segment.end, segment.start + 0.2))
@@ -248,12 +247,6 @@ export function AudioStrip({ job, open, onClose }: { job: Job | null; open: bool
<span className="rounded-full border border-white/10 px-2 py-0.5 text-[10px] text-white/45">{transcript.length || 0} </span>
</div>
<div className="flex items-center gap-2">
{voiceUrl && (
<div className="hidden items-center gap-1.5 text-[10px] text-emerald-200/80 sm:flex">
<Volume2 className="h-3.5 w-3.5" />
English VO ready
</div>
)}
<button
type="button"
onClick={() => setCollapsed((v) => !v)}
@@ -332,18 +325,24 @@ export function AudioStrip({ job, open, onClose }: { job: Job | null; open: bool
</div>
</div>
<div className="min-h-0 overflow-y-auto rounded-lg border border-emerald-300/20 bg-emerald-300/[0.07] p-3 max-lg:hidden">
<div className="mb-2 text-[10px] uppercase tracking-widest text-emerald-100/70">English product VO · SKG intro</div>
<p className="text-[12.5px] leading-relaxed text-white/90">
{audioScript?.rewritten_text || "Waiting for the source audio length to become a natural English SKG product voice-over."}
</p>
{voiceUrl && (
<audio controls src={voiceUrl} className="mt-3 h-8 w-full" />
)}
{audioScript?.product_brief && (
<div className="mt-3 border-t border-white/10 pt-2 text-[11px] leading-relaxed text-white/55">
{audioScript.product_brief}
<div className="mb-2 text-[10px] uppercase tracking-widest text-emerald-100/70">Original audio analysis</div>
<div className="space-y-3 text-[12px] leading-relaxed text-white/86">
<div>
<div className="mb-1 text-[10px] font-semibold uppercase tracking-widest text-white/38"></div>
<p>{audioScript?.source_text || "Waiting for transcript extraction."}</p>
</div>
)}
{audioScript?.source_zh && (
<div>
<div className="mb-1 text-[10px] font-semibold uppercase tracking-widest text-white/38"></div>
<p>{audioScript.source_zh}</p>
</div>
)}
<div className="border-t border-white/10 pt-3 text-[11px] text-white/60">
{audioScript?.speaker_profile && <p><span className="text-white/36"></span>{audioScript.speaker_profile}</p>}
{audioScript?.rhythm_profile && <p className="mt-1"><span className="text-white/36"></span>{audioScript.rhythm_profile}</p>}
{audioScript?.background_audio_profile && <p className="mt-1"><span className="text-white/36"></span>{audioScript.background_audio_profile}</p>}
</div>
</div>
</div>
</div>
)}

View File

@@ -358,6 +358,7 @@ export interface AudioScript {
rewritten_text: string
speaker_profile: string
rhythm_profile: string
background_audio_profile: string
product_brief: string
rewrite_model: string
voice_provider: string