auto-save 2026-05-14 11:47 (~7)

This commit is contained in:
2026-05-14 11:47:40 +08:00
parent b474d804c8
commit ba491c0c5a
7 changed files with 187 additions and 70 deletions

View File

@@ -1,19 +1,5 @@
{ {
"entries": [ "entries": [
{
"files_changed": 1,
"hash": "c8fd985",
"message": "auto-save 2026-05-13 03:54 (~1)",
"ts": "2026-05-13T03:54:21+08:00",
"type": "commit"
},
{
"files_changed": 1,
"hash": "ffc7437",
"message": "auto-save 2026-05-13 04:00 (~1)",
"ts": "2026-05-13T04:00:13+08:00",
"type": "commit"
},
{ {
"files_changed": 1, "files_changed": 1,
"hash": "7a5b09a", "hash": "7a5b09a",
@@ -3299,6 +3285,19 @@
"type": "session-heartbeat", "type": "session-heartbeat",
"message": "Codex 会话活跃 · 最近命令codex · 1 项未提交变更 · 最近提交auto-save 2026-05-14 11:36 (~3)", "message": "Codex 会话活跃 · 最近命令codex · 1 项未提交变更 · 最近提交auto-save 2026-05-14 11:36 (~3)",
"files_changed": 1 "files_changed": 1
},
{
"ts": "2026-05-14T11:42:06+08:00",
"type": "commit",
"message": "auto-save 2026-05-14 11:41 (~1)",
"hash": "b474d80",
"files_changed": 1
},
{
"ts": "2026-05-14T03:46:10Z",
"type": "session-heartbeat",
"message": "Codex 会话活跃 · 最近命令codex · 6 项未提交变更 · 最近提交auto-save 2026-05-14 11:41 (~1)",
"files_changed": 6
} }
] ]
} }

View File

@@ -19,7 +19,7 @@ AUDIO_PRODUCT_BRIEF="SKG 智能按摩产品,主打日常肩颈、腰背、眼
MINIMAX_API_KEY= MINIMAX_API_KEY=
MINIMAX_TTS_BASE_URL=https://api.minimax.io MINIMAX_TTS_BASE_URL=https://api.minimax.io
MINIMAX_TTS_MODEL=speech-2.8-turbo MINIMAX_TTS_MODEL=speech-2.8-turbo
MINIMAX_TTS_VOICE_ID="Chinese (Mandarin)_Reliable_Executive" MINIMAX_TTS_VOICE_ID=English_expressive_narrator
# Poe 视频 API优先用于 Seedance / Kling / Veo # Poe 视频 API优先用于 Seedance / Kling / Veo
POE_API_BASE_URL=https://api.poe.com/v1 POE_API_BASE_URL=https://api.poe.com/v1

View File

@@ -49,8 +49,8 @@ MINIMAX_TTS_BASE_URL = os.getenv("MINIMAX_TTS_BASE_URL", "https://api.minimax.io
MINIMAX_TTS_MODEL = os.getenv("MINIMAX_TTS_MODEL", "speech-2.8-turbo").strip() or "speech-2.8-turbo" MINIMAX_TTS_MODEL = os.getenv("MINIMAX_TTS_MODEL", "speech-2.8-turbo").strip() or "speech-2.8-turbo"
MINIMAX_TTS_VOICE_ID = os.getenv( MINIMAX_TTS_VOICE_ID = os.getenv(
"MINIMAX_TTS_VOICE_ID", "MINIMAX_TTS_VOICE_ID",
"Chinese (Mandarin)_Reliable_Executive", "English_expressive_narrator",
).strip() or "Chinese (Mandarin)_Reliable_Executive" ).strip() or "English_expressive_narrator"
POE_API_BASE_URL = os.getenv("POE_API_BASE_URL", "https://api.poe.com/v1").strip() or "https://api.poe.com/v1" POE_API_BASE_URL = os.getenv("POE_API_BASE_URL", "https://api.poe.com/v1").strip() or "https://api.poe.com/v1"
POE_API_KEY = os.getenv("POE_API_KEY", "").strip() POE_API_KEY = os.getenv("POE_API_KEY", "").strip()
@@ -379,6 +379,7 @@ class Job(BaseModel):
duration: float = 0.0 duration: float = 0.0
width: int = 0 width: int = 0
height: int = 0 height: int = 0
source_audio_url: str = ""
frames: list[KeyFrame] = Field(default_factory=list) frames: list[KeyFrame] = Field(default_factory=list)
transcript: list[TranscriptSegment] = Field(default_factory=list) transcript: list[TranscriptSegment] = Field(default_factory=list)
audio_script: AudioScript = Field(default_factory=AudioScript) audio_script: AudioScript = Field(default_factory=AudioScript)
@@ -400,6 +401,14 @@ def job_dir(job_id: str) -> Path:
return d return d
def source_audio_url_for(job_id: str) -> str:
return f"/jobs/{job_id}/audio.wav" if (JOBS_DIR / job_id / "audio.wav").exists() else ""
def job_with_artifacts(job: Job) -> Job:
return job.model_copy(update={"source_audio_url": source_audio_url_for(job.id)})
def save_state(job: Job) -> None: def save_state(job: Job) -> None:
(job_dir(job.id) / "state.json").write_text(job.model_dump_json(indent=2)) (job_dir(job.id) / "state.json").write_text(job.model_dump_json(indent=2))
@@ -1224,7 +1233,7 @@ def pipeline_analyze(
wav = d / "audio.wav" wav = d / "audio.wav"
if wav.exists(): if wav.exists():
update(job, status="splitting", message="复用音轨 · 准备抽帧…", progress=35) update(job, status="splitting", message="复用音轨 · 准备抽帧…", progress=35, source_audio_url=f"/jobs/{job_id}/audio.wav")
else: else:
update(job, status="splitting", message="ffmpeg 拆分音轨…", progress=35) update(job, status="splitting", message="ffmpeg 拆分音轨…", progress=35)
run([ run([
@@ -1232,6 +1241,7 @@ def pipeline_analyze(
"-vn", "-ac", "1", "-ar", "16000", "-c:a", "pcm_s16le", "-vn", "-ac", "1", "-ar", "16000", "-c:a", "pcm_s16le",
str(wav), str(wav),
]) ])
update(job, source_audio_url=f"/jobs/{job_id}/audio.wav")
n = max(1, min(int(frame_count), 20)) n = max(1, min(int(frame_count), 20))
target_label = FRAME_TARGET_LABELS.get(target, FRAME_TARGET_LABELS["balanced"]) target_label = FRAME_TARGET_LABELS.get(target, FRAME_TARGET_LABELS["balanced"])
duration = max(float(job.duration or 1.0), 0.1) duration = max(float(job.duration or 1.0), 0.1)
@@ -1497,12 +1507,12 @@ def _transcript_join(segments: list[TranscriptSegment], field: Literal["en", "zh
def _fallback_audio_script(segments: list[TranscriptSegment]) -> str: def _fallback_audio_script(segments: list[TranscriptSegment]) -> str:
joined = " ".join((s.zh or s.en).strip() for s in segments if (s.zh or s.en).strip()) joined = " ".join((s.en or s.zh).strip() for s in segments if (s.en or s.zh).strip())
if not joined: if not joined:
return "日常疲惫不用硬扛。戴上 SKG让肩颈慢慢放松跟着呼吸找回轻松状态。" return "Ease into the moment with SKG. Gentle warmth and rhythmic massage help everyday tension feel lighter, cleaner, and easier to leave behind."
return ( return (
"把日常紧绷交给 SKG。贴合身体需要放松的位置热敷与按摩节奏自然陪伴" "Let SKG turn a short break into real relief. With soothing warmth and steady massage rhythm, "
"让每一次短暂休息都更轻松、更有质感。" "everyday tension feels lighter, calmer, and easier to leave behind."
) )
@@ -1513,24 +1523,24 @@ def _rewrite_audio_script_sync(segments: list[TranscriptSegment]) -> tuple[str,
source_text = _transcript_join(segments, "en") source_text = _transcript_join(segments, "en")
source_zh = _transcript_join(segments, "zh") source_zh = _transcript_join(segments, "zh")
prompt = ( prompt = (
"你是 SKG 短视频口播编导。根据参考视频音频转写,抽取它的表达结构、情绪节奏和可复用卖点," "You are an English short-video voice-over writer for SKG wellness massagers. "
"改写成适合 SKG 按摩/放松产品二创视频的中文口播文案。\n" "Use the source transcript only for structure, pacing, and emotional hook, then rewrite it into a clean English VO for SKG.\n"
"要求:\n" "Rules:\n"
"1. 输出 35-90 个中文字,适合 8-18 秒短视频配音。\n" "1. Output 28-55 English words, suitable for an 8-18 second TTS voice-over.\n"
"2. 口语化、干净、高级,能直接给 TTS 朗读。\n" "2. Make it natural, premium, concise, and ready to read aloud.\n"
"3. 不承诺治疗、治愈、医学疗效,不夸大。\n" "3. Do not claim medical treatment, cure, pain elimination, or clinical effects.\n"
"4. 不复刻原视频品牌/人物/价格/平台话术,只保留表达结构。\n" "4. Do not copy the original brand, creator, price, platform language, or exact claims.\n"
"5. 如果参考转写信息不足,按产品信息生成通用 SKG 放松口播。\n" "5. If the source transcript is too thin, write a general SKG relaxation VO.\n"
'严格返回 JSON{"rewritten_text":"..."}\n\n' 'Return strict JSON only: {"rewritten_text":"..."}.\n\n'
f"SKG 产品信息:{AUDIO_PRODUCT_BRIEF}\n\n" f"SKG product context: {AUDIO_PRODUCT_BRIEF}\n\n"
f"英文转写:\n{source_text or ''}\n\n" f"English transcript:\n{source_text or 'None'}\n\n"
f"中文翻译:\n{source_zh or ''}" f"Chinese translation for reference:\n{source_zh or 'None'}"
) )
try: try:
resp = llm().chat.completions.create( resp = llm().chat.completions.create(
model=AUDIO_REWRITE_MODEL, model=AUDIO_REWRITE_MODEL,
messages=[ messages=[
{"role": "system", "content": "只输出合法 JSON不要解释不要 markdown"}, {"role": "system", "content": "Return valid JSON only. No explanation. No markdown."},
{"role": "user", "content": prompt}, {"role": "user", "content": prompt},
], ],
response_format={"type": "json_object"}, response_format={"type": "json_object"},
@@ -1564,7 +1574,7 @@ def _minimax_tts_sync(job_id: str, text: str) -> str:
"model": MINIMAX_TTS_MODEL, "model": MINIMAX_TTS_MODEL,
"text": text.strip()[:9500], "text": text.strip()[:9500],
"stream": False, "stream": False,
"language_boost": "Chinese", "language_boost": "English",
"output_format": "hex", "output_format": "hex",
"voice_setting": { "voice_setting": {
"voice_id": MINIMAX_TTS_VOICE_ID, "voice_id": MINIMAX_TTS_VOICE_ID,
@@ -1651,6 +1661,7 @@ def pipeline_transcribe(job_id: str, manage_job_status: bool = True) -> None:
]) ])
if not wav.exists(): if not wav.exists():
raise RuntimeError("音频提取完成但找不到 audio.wav") raise RuntimeError("音频提取完成但找不到 audio.wav")
update(job, source_audio_url=f"/jobs/{job_id}/audio.wav")
if not LLM_API_KEY: if not LLM_API_KEY:
# 无 key 模式mock 数据 # 无 key 模式mock 数据
@@ -2112,7 +2123,7 @@ def get_job(job_id: str) -> Job:
job = JOBS.get(job_id) job = JOBS.get(job_id)
if not job: if not job:
raise HTTPException(404, "job not found") raise HTTPException(404, "job not found")
return job return job_with_artifacts(job)
@app.delete("/jobs/{job_id}") @app.delete("/jobs/{job_id}")
@@ -2153,7 +2164,7 @@ async def trigger_transcribe(job_id: str, bg: BackgroundTasks) -> Job:
update(job, error="", audio_script=audio_payload) update(job, error="", audio_script=audio_payload)
if not start_audio_processing(job_id, manage_job_status=manage_job_status): if not start_audio_processing(job_id, manage_job_status=manage_job_status):
update(job, message="音频已在处理中") update(job, message="音频已在处理中")
return job return job_with_artifacts(job)
@app.get("/jobs/{job_id}/video.mp4") @app.get("/jobs/{job_id}/video.mp4")

View File

@@ -100,6 +100,8 @@ export default function Home() {
const [jobs, setJobs] = useState<Job[]>([]) const [jobs, setJobs] = useState<Job[]>([])
const [activeJobId, setActiveJobId] = useState<string | null>(null) const [activeJobId, setActiveJobId] = useState<string | null>(null)
const job = useMemo(() => jobs.find((j) => j.id === activeJobId) ?? null, [jobs, activeJobId]) const job = useMemo(() => jobs.find((j) => j.id === activeJobId) ?? null, [jobs, activeJobId])
const [audioStripJobId, setAudioStripJobId] = useState<string | null>(null)
const audioStripJob = useMemo(() => jobs.find((j) => j.id === audioStripJobId) ?? null, [jobs, audioStripJobId])
const [submitting, setSubmitting] = useState(false) const [submitting, setSubmitting] = useState(false)
const [analyzing, setAnalyzing] = useState(false) const [analyzing, setAnalyzing] = useState(false)
const [frameTargets, setFrameTargets] = useState<Record<string, FrameExtractTarget>>({}) const [frameTargets, setFrameTargets] = useState<Record<string, FrameExtractTarget>>({})
@@ -159,6 +161,10 @@ export default function Home() {
const handleSwitchJob = useCallback((id: string) => { const handleSwitchJob = useCallback((id: string) => {
setActiveJobId(id) setActiveJobId(id)
}, []) }, [])
const handleOpenAudioStrip = useCallback((jobId?: string) => {
const targetId = jobId ?? activeJobId
if (targetId) setAudioStripJobId(targetId)
}, [activeJobId])
const pollRef = useRef<ReturnType<typeof setInterval> | null>(null) const pollRef = useRef<ReturnType<typeof setInterval> | null>(null)
const handleSubmit = useCallback(async (url: string) => { const handleSubmit = useCallback(async (url: string) => {
@@ -393,16 +399,13 @@ export default function Home() {
const handleTranscribeAudio = useCallback(async (jobId?: string, options?: { silent?: boolean }) => { const handleTranscribeAudio = useCallback(async (jobId?: string, options?: { silent?: boolean }) => {
const targetId = jobId ?? activeJobId const targetId = jobId ?? activeJobId
if (!targetId) return if (!targetId) return
setAudioStripJobId(targetId)
const target = jobs.find((item) => item.id === targetId) const target = jobs.find((item) => item.id === targetId)
if (!target) return if (!target) return
if (!target.video_url) { if (!target.video_url) {
if (!options?.silent) toast.info("视频导入完成后,可在音频卡片点击提取音频") if (!options?.silent) toast.info("视频导入完成后,可在音频卡片点击提取音频")
return return
} }
if (target.status === "splitting") {
if (!options?.silent) toast.info("当前正在抽帧,结束后可重新点击提取音频")
return
}
if (target.status === "transcribing" || target.audio_script?.status === "rewriting") { if (target.status === "transcribing" || target.audio_script?.status === "rewriting") {
if (!options?.silent) toast.info("音频正在处理中") if (!options?.silent) toast.info("音频正在处理中")
return return
@@ -728,9 +731,10 @@ export default function Home() {
onCopyImage: handleCopyImage, onCopyImage: handleCopyImage,
onGenerateProductFusionVideo: handleGenerateProductFusionVideo, onGenerateProductFusionVideo: handleGenerateProductFusionVideo,
onTranscribeAudio: handleTranscribeAudio, onTranscribeAudio: handleTranscribeAudio,
onOpenAudioStrip: handleOpenAudioStrip,
pinnedNodes, pinnedNodes,
onToggleNodePin: handleToggleNodePin, onToggleNodePin: handleToggleNodePin,
}), [job, jobs, activeJobId, submitting, analyzing, frameTargets, frameCounts, frameQualities, selectedFrames, expandedFrame, framePanelScale, framePanelPinned, framePanelDock, videoPanelJobId, videoPanelScale, videoPanelDock, handleSubmit, handleUpload, handleAnalyze, handleAnalyzeJob, handleFrameTargetChange, handleFrameCountChange, handleFrameQualityChange, handleToggleFrame, handleOpenFramePanel, handleFramePanelScaleChange, handleCloseExpandedFrame, handleAddManualFrame, handleAddManualFrameForJob, handleOpenVideoPanel, handleVideoPanelScaleChange, handleSwitchJob, updateJobInList, handleDeleteJob, handleDeleteFrame, handleDeleteFrameForJob, handleDeleteGenerated, handleDeleteVideo, handleDeleteCutout, handleOpenStoryboard, handleOpenWorkbench, clipboard, handleCopyImage, handleGenerateProductFusionVideo, handleTranscribeAudio, pinnedNodes, handleToggleNodePin]) }), [job, jobs, activeJobId, submitting, analyzing, frameTargets, frameCounts, frameQualities, selectedFrames, expandedFrame, framePanelScale, framePanelPinned, framePanelDock, videoPanelJobId, videoPanelScale, videoPanelDock, handleSubmit, handleUpload, handleAnalyze, handleAnalyzeJob, handleFrameTargetChange, handleFrameCountChange, handleFrameQualityChange, handleToggleFrame, handleOpenFramePanel, handleFramePanelScaleChange, handleCloseExpandedFrame, handleAddManualFrame, handleAddManualFrameForJob, handleOpenVideoPanel, handleVideoPanelScaleChange, handleSwitchJob, updateJobInList, handleDeleteJob, handleDeleteFrame, handleDeleteFrameForJob, handleDeleteGenerated, handleDeleteVideo, handleDeleteCutout, handleOpenStoryboard, handleOpenWorkbench, clipboard, handleCopyImage, handleGenerateProductFusionVideo, handleTranscribeAudio, handleOpenAudioStrip, pinnedNodes, handleToggleNodePin])
// 用 useNodesState 让 ReactFlow 自己管位置(避免轮询时重置 drag // 用 useNodesState 让 ReactFlow 自己管位置(避免轮询时重置 drag
const savedSizes = useMemo(() => loadNodeSizes(), []) const savedSizes = useMemo(() => loadNodeSizes(), [])
@@ -1013,7 +1017,7 @@ export default function Home() {
<div className="h-full w-full" suppressHydrationWarning /> <div className="h-full w-full" suppressHydrationWarning />
)} )}
</div> </div>
{clientReady && <AudioStrip job={job} />} {clientReady && <AudioStrip job={audioStripJob} open={!!audioStripJob} onClose={() => setAudioStripJobId(null)} />}
</section> </section>
<Toaster theme="system" position="top-center" /> <Toaster theme="system" position="top-center" />

View File

@@ -1,7 +1,7 @@
"use client" "use client"
import { useEffect, useMemo, useRef, useState, type PointerEvent as ReactPointerEvent } from "react" import { useEffect, useMemo, useRef, useState, type PointerEvent as ReactPointerEvent } from "react"
import { ChevronDown, ChevronUp, GripHorizontal, Mic2, Volume2 } from "lucide-react" import { ChevronDown, ChevronUp, GripHorizontal, Mic2, Volume2, X } from "lucide-react"
import { apiAssetUrl, sourceAudioUrl, type Job, type TranscriptSegment } from "@/lib/api" import { apiAssetUrl, sourceAudioUrl, type Job, type TranscriptSegment } from "@/lib/api"
const STORAGE_KEY = "skg.audio-strip.height" const STORAGE_KEY = "skg.audio-strip.height"
@@ -58,20 +58,34 @@ function SegmentCard({
segment, segment,
peaks, peaks,
duration, duration,
currentTime,
}: { }: {
segment: TranscriptSegment segment: TranscriptSegment
peaks: number[] peaks: number[]
duration: number duration: number
currentTime: number
}) { }) {
const segDuration = Math.max(1.2, segment.end - segment.start) const segDuration = Math.max(1.2, segment.end - segment.start)
const width = clamp(180 + segDuration * 42, 220, 520) const width = clamp(180 + segDuration * 42, 220, 520)
const segPeaks = slicePeaks(peaks, segment.start, segment.end, duration) const segPeaks = slicePeaks(peaks, segment.start, segment.end, duration)
const active = currentTime >= segment.start && currentTime <= Math.max(segment.end, segment.start + 0.2)
const pointerPct = active ? clamp(((currentTime - segment.start) / Math.max(0.2, segment.end - segment.start)) * 100, 0, 100) : 0
return ( return (
<article <article
className="shrink-0 rounded-lg border border-white/10 bg-white/[0.045] p-3 shadow-[0_12px_30px_-22px_rgba(0,0,0,0.8)]" className={`relative shrink-0 overflow-hidden rounded-lg border p-3 shadow-[0_12px_30px_-22px_rgba(0,0,0,0.8)] transition ${
active
? "border-emerald-300/55 bg-emerald-300/[0.105]"
: "border-white/10 bg-white/[0.045]"
}`}
style={{ width }} style={{ width }}
> >
{active && (
<div
className="pointer-events-none absolute inset-y-0 z-10 w-[2px] bg-emerald-200 shadow-[0_0_18px_rgba(110,231,183,0.9)]"
style={{ left: `${pointerPct}%` }}
/>
)}
<div className="mb-2 flex items-center justify-between gap-3"> <div className="mb-2 flex items-center justify-between gap-3">
<span className="font-mono text-[10px] text-[var(--text-faint)]"> <span className="font-mono text-[10px] text-[var(--text-faint)]">
{segment.start.toFixed(1)}s to {segment.end.toFixed(1)}s {segment.start.toFixed(1)}s to {segment.end.toFixed(1)}s
@@ -93,7 +107,7 @@ function SegmentCard({
{segment.zh || <span className="text-[var(--text-faint)] italic">...</span>} {segment.zh || <span className="text-[var(--text-faint)] italic">...</span>}
</p> </p>
</div> </div>
<Waveform peaks={segPeaks} /> <Waveform peaks={segPeaks} active={active} />
</div> </div>
</article> </article>
) )
@@ -126,19 +140,33 @@ async function decodeWaveform(url: string, targetPeaks = 1800) {
} }
} }
export function AudioStrip({ job }: { job: Job | null }) { export function AudioStrip({ job, open, onClose }: { job: Job | null; open: boolean; onClose?: () => void }) {
const [collapsed, setCollapsed] = useState(false) const [collapsed, setCollapsed] = useState(false)
const [height, setHeight] = useState(DEFAULT_HEIGHT) const [height, setHeight] = useState(DEFAULT_HEIGHT)
const [peaks, setPeaks] = useState<number[]>([]) const [peaks, setPeaks] = useState<number[]>([])
const [sourceReady, setSourceReady] = useState(false)
const [audioKey, setAudioKey] = useState(0)
const [currentTime, setCurrentTime] = useState(0)
const dragRef = useRef<{ startY: number; startHeight: number } | null>(null) const dragRef = useRef<{ startY: number; startHeight: number } | null>(null)
const audioRef = useRef<HTMLAudioElement>(null)
const transcript = job?.transcript ?? [] const transcript = job?.transcript ?? []
const audioScript = job?.audio_script const audioScript = job?.audio_script
const voiceUrl = apiAssetUrl(audioScript?.voice_url) const voiceUrl = apiAssetUrl(audioScript?.voice_url)
const hasAudio = !!job && (transcript.length > 0 || !!audioScript?.rewritten_text || job.status === "transcribing") const sourceUrl = job ? apiAssetUrl(job.source_audio_url || sourceAudioUrl(job.id)) : ""
const processing = !!job && (job.status === "transcribing" || audioScript?.status === "rewriting")
const activeSegment = transcript.find((segment) => currentTime >= segment.start && currentTime <= Math.max(segment.end, segment.start + 0.2))
const duration = useMemo(() => { const duration = useMemo(() => {
const lastTranscriptEnd = transcript.reduce((max, s) => Math.max(max, s.end || 0), 0) const lastTranscriptEnd = transcript.reduce((max, s) => Math.max(max, s.end || 0), 0)
return Math.max(job?.duration ?? 0, lastTranscriptEnd, 1) const audioDuration = audioRef.current?.duration
return Math.max(
Number.isFinite(audioDuration) ? Number(audioDuration) : 0,
job?.duration ?? 0,
lastTranscriptEnd,
1,
)
}, [job?.duration, transcript]) }, [job?.duration, transcript])
const timelinePeaks = useMemo(() => slicePeaks(peaks, 0, duration, duration, 160), [duration, peaks])
const pointerPct = clamp((currentTime / duration) * 100, 0, 100)
useEffect(() => { useEffect(() => {
if (typeof window === "undefined") return if (typeof window === "undefined") return
@@ -148,21 +176,38 @@ export function AudioStrip({ job }: { job: Job | null }) {
useEffect(() => { useEffect(() => {
let cancelled = false let cancelled = false
let timer: ReturnType<typeof setTimeout> | null = null
let attempts = 0
setPeaks([]) setPeaks([])
if (!job?.id || !hasAudio) return setSourceReady(false)
decodeWaveform(sourceAudioUrl(job.id)) setCurrentTime(0)
if (!job?.id || !open) return
setPeaks(fallbackPeaks(1800, `${job.id}-loading`))
const load = () => {
attempts += 1
decodeWaveform(sourceUrl)
.then((next) => { .then((next) => {
if (!cancelled) setPeaks(next) if (cancelled) return
setPeaks(next)
setSourceReady(true)
setAudioKey((key) => key + 1)
}) })
.catch(() => { .catch(() => {
if (!cancelled) setPeaks(fallbackPeaks(1800, `${job.id}-${transcript.length}`)) if (cancelled) return
setSourceReady(false)
if (attempts < (processing ? 45 : 6)) {
timer = setTimeout(load, 1000)
}
}) })
}
load()
return () => { return () => {
cancelled = true cancelled = true
if (timer) clearTimeout(timer)
} }
}, [job?.id, hasAudio, transcript.length]) }, [job?.id, open, processing, sourceUrl, transcript.length])
if (!hasAudio || !job) return null if (!open || !job) return null
const startDrag = (e: ReactPointerEvent<HTMLDivElement>) => { const startDrag = (e: ReactPointerEvent<HTMLDivElement>) => {
e.preventDefault() e.preventDefault()
@@ -206,7 +251,7 @@ export function AudioStrip({ job }: { job: Job | null }) {
{voiceUrl && ( {voiceUrl && (
<div className="hidden items-center gap-1.5 text-[10px] text-emerald-200/80 sm:flex"> <div className="hidden items-center gap-1.5 text-[10px] text-emerald-200/80 sm:flex">
<Volume2 className="h-3.5 w-3.5" /> <Volume2 className="h-3.5 w-3.5" />
MiniMax ready English VO ready
</div> </div>
)} )}
<button <button
@@ -217,27 +262,79 @@ export function AudioStrip({ job }: { job: Job | null }) {
> >
{collapsed ? <ChevronUp className="h-3.5 w-3.5" /> : <ChevronDown className="h-3.5 w-3.5" />} {collapsed ? <ChevronUp className="h-3.5 w-3.5" /> : <ChevronDown className="h-3.5 w-3.5" />}
</button> </button>
{onClose && (
<button
type="button"
onClick={onClose}
className="inline-flex h-6 w-6 items-center justify-center rounded-md border border-white/10 text-white/65 transition hover:bg-white/10 hover:text-white"
title="关闭音频条"
>
<X className="h-3.5 w-3.5" />
</button>
)}
</div> </div>
</div> </div>
{!collapsed && ( {!collapsed && (
<div className="grid h-[calc(100%-48px)] grid-cols-[minmax(0,1fr)_300px] gap-3 p-3 max-lg:grid-cols-1"> <div className="grid h-[calc(100%-48px)] grid-cols-[minmax(0,1fr)_300px] gap-3 p-3 max-lg:grid-cols-1">
<div className="min-w-0 overflow-x-auto overflow-y-hidden pb-1"> <div className="flex min-w-0 min-h-0 flex-col gap-3 overflow-hidden">
{transcript.length > 0 ? ( <div className="rounded-lg border border-white/10 bg-black/20 p-2">
<div className="flex h-full items-stretch gap-3"> <div className="mb-2 flex items-center justify-between gap-3">
{transcript.map((segment) => ( <div className="min-w-0 text-[10px] uppercase tracking-widest text-white/45">
<SegmentCard key={segment.index} segment={segment} peaks={peaks} duration={duration} /> Source audio playback
))} {activeSegment ? <span className="ml-2 text-emerald-200/80">#{activeSegment.index + 1}</span> : null}
</div>
<div className="shrink-0 font-mono text-[10px] text-white/45">
{currentTime.toFixed(1)}s / {duration.toFixed(1)}s
</div>
</div> </div>
{sourceReady ? (
<audio
key={audioKey}
ref={audioRef}
controls
src={sourceUrl}
className="h-8 w-full"
onTimeUpdate={(event) => setCurrentTime(event.currentTarget.currentTime)}
onSeeked={(event) => setCurrentTime(event.currentTarget.currentTime)}
onLoadedMetadata={(event) => setCurrentTime(event.currentTarget.currentTime)}
/>
) : (
<div className="flex h-8 items-center rounded-md border border-dashed border-white/12 px-3 text-[11px] text-white/45">
{processing ? "正在提取原音频并准备波形..." : "等待原音频波形..."}
</div>
)}
<div className="relative mt-2">
<Waveform peaks={timelinePeaks} active={sourceReady} />
<div
className="pointer-events-none absolute inset-y-0 w-[2px] bg-emerald-200 shadow-[0_0_18px_rgba(110,231,183,0.9)]"
style={{ left: `${pointerPct}%` }}
/>
</div>
</div>
<div className="min-h-0 overflow-x-auto overflow-y-hidden pb-1">
{transcript.length > 0 ? (
<div className="flex h-full items-stretch gap-3">
{transcript.map((segment) => (
<SegmentCard
key={segment.index}
segment={segment}
peaks={peaks}
duration={duration}
currentTime={currentTime}
/>
))}
</div>
) : ( ) : (
<div className="flex h-full items-center justify-center rounded-lg border border-dashed border-white/12 text-[12px] text-white/45"> <div className="flex h-full items-center justify-center rounded-lg border border-dashed border-white/12 text-[12px] text-white/45">
</div> </div>
)} )}
</div>
</div> </div>
<div className="min-h-0 overflow-y-auto rounded-lg border border-emerald-300/20 bg-emerald-300/[0.07] p-3 max-lg:hidden"> <div className="min-h-0 overflow-y-auto rounded-lg border border-emerald-300/20 bg-emerald-300/[0.07] p-3 max-lg:hidden">
<div className="mb-2 text-[10px] uppercase tracking-widest text-emerald-100/70"> · SKG </div> <div className="mb-2 text-[10px] uppercase tracking-widest text-emerald-100/70">English VO · SKG rewrite</div>
<p className="text-[12.5px] leading-relaxed text-white/90"> <p className="text-[12.5px] leading-relaxed text-white/90">
{audioScript?.rewritten_text || "等待转录完成后生成适合 SKG 产品视频的口播文案。"} {audioScript?.rewritten_text || "Waiting for the parsed transcript to become an English SKG voice-over."}
</p> </p>
{voiceUrl && ( {voiceUrl && (
<audio controls src={voiceUrl} className="mt-3 h-8 w-full" /> <audio controls src={voiceUrl} className="mt-3 h-8 w-full" />

View File

@@ -76,6 +76,7 @@ export interface NodeData {
onCopyImage?: (ref: ImageRef) => void // 复制图片到全局剪贴板(粘贴到分镜头编排插槽) onCopyImage?: (ref: ImageRef) => void // 复制图片到全局剪贴板(粘贴到分镜头编排插槽)
onGenerateProductFusionVideo?: (frameIdx: number, shot: ProductFusionShot) => Promise<void> | void onGenerateProductFusionVideo?: (frameIdx: number, shot: ProductFusionShot) => Promise<void> | void
onTranscribeAudio?: (jobId?: string) => Promise<void> | void onTranscribeAudio?: (jobId?: string) => Promise<void> | void
onOpenAudioStrip?: (jobId?: string) => void
pinnedNodes?: Set<string> // 已钉住的节点 id 集合 — 钉住后位置 + 尺寸锁定 pinnedNodes?: Set<string> // 已钉住的节点 id 集合 — 钉住后位置 + 尺寸锁定
onToggleNodePin?: (id: string) => void onToggleNodePin?: (id: string) => void
} }
@@ -2141,7 +2142,12 @@ export function AudioNode({ data, selected }: any) {
pinned={d.pinnedNodes?.has("audio")} pinned={d.pinnedNodes?.has("audio")}
onTogglePin={() => d.onToggleNodePin?.("audio")} onTogglePin={() => d.onToggleNodePin?.("audio")}
> >
<div className="space-y-2 text-[11px] text-[var(--text-soft)] leading-snug"> <div
className="space-y-2 text-[11px] text-[var(--text-soft)] leading-snug"
onClick={() => {
if (job?.video_url) d.onOpenAudioStrip?.(job.id)
}}
>
<div> <div>
ASR SKG MiniMax <br /> ASR SKG MiniMax <br />
<span className="text-[var(--text-faint)] font-mono"> <span className="text-[var(--text-faint)] font-mono">
@@ -2154,6 +2160,7 @@ export function AudioNode({ data, selected }: any) {
disabled={audioButtonDisabled} disabled={audioButtonDisabled}
onClick={(e) => { onClick={(e) => {
e.stopPropagation() e.stopPropagation()
d.onOpenAudioStrip?.(job.id)
if (audioButtonDisabled) return if (audioButtonDisabled) return
void d.onTranscribeAudio?.(job.id) void d.onTranscribeAudio?.(job.id)
}} }}
@@ -2185,9 +2192,7 @@ export function AudioNode({ data, selected }: any) {
)} )}
</div> </div>
)} )}
{voiceUrl && ( {voiceUrl && <div className="text-[10.5px] text-emerald-200/85">MiniMax English voice ready · </div>}
<audio controls src={voiceUrl} className="h-7 w-full" />
)}
{isRewriting && ( {isRewriting && (
<div className="text-[10.5px] text-[var(--text-faint)]"></div> <div className="text-[10.5px] text-[var(--text-faint)]"></div>
)} )}

View File

@@ -331,6 +331,7 @@ export interface Job {
duration?: number duration?: number
width?: number width?: number
height?: number height?: number
source_audio_url?: string
frames: KeyFrame[] frames: KeyFrame[]
transcript: TranscriptSegment[] transcript: TranscriptSegment[]
audio_script?: AudioScript audio_script?: AudioScript