Files
20260512-skg-tk/web/components/audio-strip.tsx

352 lines
15 KiB
TypeScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"use client"
import { useEffect, useMemo, useRef, useState, type PointerEvent as ReactPointerEvent } from "react"
import { ChevronDown, ChevronUp, GripHorizontal, Mic2, X } from "lucide-react"
import { apiAssetUrl, sourceAudioUrl, type Job, type TranscriptSegment } from "@/lib/api"
const STORAGE_KEY = "skg.audio-strip.height"
const MIN_HEIGHT = 132
const MAX_HEIGHT = 420
const DEFAULT_HEIGHT = 236
function clamp(value: number, min: number, max: number) {
return Math.min(max, Math.max(min, value))
}
function fallbackPeaks(count: number, seedText: string) {
let seed = 0
for (let i = 0; i < seedText.length; i++) seed = (seed * 31 + seedText.charCodeAt(i)) % 9973
return Array.from({ length: count }, (_, i) => {
const wave = Math.sin((i + seed) * 0.43) * 0.35 + Math.sin((i + seed) * 0.11) * 0.25
const pulse = ((i + seed) % 9) / 18
return clamp(0.22 + Math.abs(wave) + pulse, 0.18, 1)
})
}
function slicePeaks(peaks: number[], start: number, end: number, duration: number, count = 56) {
if (peaks.length === 0 || duration <= 0 || end <= start) return fallbackPeaks(count, `${start}-${end}`)
const from = clamp(Math.floor((start / duration) * peaks.length), 0, peaks.length - 1)
const to = clamp(Math.ceil((end / duration) * peaks.length), from + 1, peaks.length)
const source = peaks.slice(from, to)
return Array.from({ length: count }, (_, i) => {
const a = Math.floor((i / count) * source.length)
const b = Math.max(a + 1, Math.floor(((i + 1) / count) * source.length))
return Math.max(...source.slice(a, b), 0.12)
})
}
function Waveform({ peaks, active = false }: { peaks: number[]; active?: boolean }) {
return (
<div className="flex h-10 items-center gap-[2px] rounded-md border border-white/10 bg-black/20 px-2">
{peaks.map((p, i) => (
<div
key={i}
className={active ? "bg-emerald-300/80" : "bg-violet-300/65"}
style={{
width: 3,
height: `${Math.round(8 + p * 28)}px`,
borderRadius: 999,
opacity: 0.42 + p * 0.45,
}}
/>
))}
</div>
)
}
function SegmentCard({
segment,
peaks,
duration,
currentTime,
}: {
segment: TranscriptSegment
peaks: number[]
duration: number
currentTime: number
}) {
const segDuration = Math.max(1.2, segment.end - segment.start)
const width = clamp(180 + segDuration * 42, 220, 520)
const segPeaks = slicePeaks(peaks, segment.start, segment.end, duration)
const active = currentTime >= segment.start && currentTime <= Math.max(segment.end, segment.start + 0.2)
const pointerPct = active ? clamp(((currentTime - segment.start) / Math.max(0.2, segment.end - segment.start)) * 100, 0, 100) : 0
return (
<article
className={`relative shrink-0 overflow-hidden rounded-lg border p-3 shadow-[0_12px_30px_-22px_rgba(0,0,0,0.8)] transition ${
active
? "border-emerald-300/55 bg-emerald-300/[0.105]"
: "border-white/10 bg-white/[0.045]"
}`}
style={{ width }}
>
{active && (
<div
className="pointer-events-none absolute inset-y-0 z-10 w-[2px] bg-emerald-200 shadow-[0_0_18px_rgba(110,231,183,0.9)]"
style={{ left: `${pointerPct}%` }}
/>
)}
<div className="mb-2 flex items-center justify-between gap-3">
<span className="font-mono text-[10px] text-[var(--text-faint)]">
{segment.start.toFixed(1)}s to {segment.end.toFixed(1)}s
</span>
<span className="rounded-full border border-white/10 px-2 py-0.5 text-[9.5px] uppercase tracking-widest text-[var(--text-faint)]">
#{segment.index + 1}
</span>
</div>
<div className="space-y-2">
{segment.en && (
<div>
<div className="mb-1 text-[9.5px] uppercase tracking-widest text-violet-200/70">English</div>
<p className="line-clamp-3 text-[12px] leading-relaxed text-[var(--text-strong)]">{segment.en}</p>
</div>
)}
<div>
<div className="mb-1 text-[9.5px] uppercase tracking-widest text-emerald-200/75"></div>
<p className="line-clamp-3 text-[12.5px] leading-relaxed text-[var(--text-strong)]">
{segment.zh || <span className="text-[var(--text-faint)] italic">...</span>}
</p>
</div>
<Waveform peaks={segPeaks} active={active} />
</div>
</article>
)
}
async function decodeWaveform(url: string, targetPeaks = 1800) {
const res = await fetch(url)
if (!res.ok) throw new Error(`audio ${res.status}`)
const arrayBuffer = await res.arrayBuffer()
const AudioContextClass = window.AudioContext || (window as typeof window & { webkitAudioContext?: typeof AudioContext }).webkitAudioContext
if (!AudioContextClass) throw new Error("AudioContext unavailable")
const ctx = new AudioContextClass()
try {
const buffer = await ctx.decodeAudioData(arrayBuffer.slice(0))
const data = buffer.getChannelData(0)
const bucket = Math.max(1, Math.floor(data.length / targetPeaks))
let maxPeak = 0.01
const raw: number[] = []
for (let i = 0; i < targetPeaks; i++) {
const start = i * bucket
const end = Math.min(data.length, start + bucket)
let peak = 0
for (let j = start; j < end; j++) peak = Math.max(peak, Math.abs(data[j] || 0))
raw.push(peak)
maxPeak = Math.max(maxPeak, peak)
}
return raw.map((p) => clamp(p / maxPeak, 0.08, 1))
} finally {
void ctx.close().catch(() => {})
}
}
export function AudioStrip({ job, open, onClose }: { job: Job | null; open: boolean; onClose?: () => void }) {
const [collapsed, setCollapsed] = useState(false)
const [height, setHeight] = useState(DEFAULT_HEIGHT)
const [peaks, setPeaks] = useState<number[]>([])
const [sourceReady, setSourceReady] = useState(false)
const [audioKey, setAudioKey] = useState(0)
const [currentTime, setCurrentTime] = useState(0)
const dragRef = useRef<{ startY: number; startHeight: number } | null>(null)
const audioRef = useRef<HTMLAudioElement>(null)
const transcript = job?.transcript ?? []
const audioScript = job?.audio_script
const sourceUrl = job ? apiAssetUrl(job.source_audio_url || sourceAudioUrl(job.id)) : ""
const processing = !!job && (job.status === "transcribing" || audioScript?.status === "rewriting")
const activeSegment = transcript.find((segment) => currentTime >= segment.start && currentTime <= Math.max(segment.end, segment.start + 0.2))
const duration = useMemo(() => {
const lastTranscriptEnd = transcript.reduce((max, s) => Math.max(max, s.end || 0), 0)
const audioDuration = audioRef.current?.duration
return Math.max(
Number.isFinite(audioDuration) ? Number(audioDuration) : 0,
job?.duration ?? 0,
lastTranscriptEnd,
1,
)
}, [job?.duration, transcript])
const timelinePeaks = useMemo(() => slicePeaks(peaks, 0, duration, duration, 160), [duration, peaks])
const pointerPct = clamp((currentTime / duration) * 100, 0, 100)
useEffect(() => {
if (typeof window === "undefined") return
const stored = Number(window.localStorage.getItem(STORAGE_KEY) || "")
if (Number.isFinite(stored) && stored > 0) setHeight(clamp(stored, MIN_HEIGHT, MAX_HEIGHT))
}, [])
useEffect(() => {
let cancelled = false
let timer: ReturnType<typeof setTimeout> | null = null
let attempts = 0
setPeaks([])
setSourceReady(false)
setCurrentTime(0)
if (!job?.id || !open) return
setPeaks(fallbackPeaks(1800, `${job.id}-loading`))
const load = () => {
attempts += 1
decodeWaveform(sourceUrl)
.then((next) => {
if (cancelled) return
setPeaks(next)
setSourceReady(true)
setAudioKey((key) => key + 1)
})
.catch(() => {
if (cancelled) return
setSourceReady(false)
if (attempts < (processing ? 45 : 6)) {
timer = setTimeout(load, 1000)
}
})
}
load()
return () => {
cancelled = true
if (timer) clearTimeout(timer)
}
}, [job?.id, open, processing, sourceUrl, transcript.length])
if (!open || !job) return null
const startDrag = (e: ReactPointerEvent<HTMLDivElement>) => {
e.preventDefault()
dragRef.current = { startY: e.clientY, startHeight: height }
const onMove = (ev: PointerEvent) => {
if (!dragRef.current) return
const next = clamp(dragRef.current.startHeight + (dragRef.current.startY - ev.clientY), MIN_HEIGHT, MAX_HEIGHT)
setHeight(next)
}
const onUp = () => {
if (dragRef.current) {
try { window.localStorage.setItem(STORAGE_KEY, String(height)) } catch {}
}
dragRef.current = null
window.removeEventListener("pointermove", onMove)
window.removeEventListener("pointerup", onUp)
}
window.addEventListener("pointermove", onMove)
window.addEventListener("pointerup", onUp)
}
return (
<aside
className="pointer-events-auto absolute inset-x-4 bottom-4 z-40 overflow-hidden rounded-xl border border-white/12 bg-[rgba(10,13,28,0.88)] shadow-[0_24px_80px_-28px_rgba(0,0,0,0.85)] backdrop-blur-xl"
style={{ height: collapsed ? 48 : height }}
>
<div
className="flex h-4 cursor-ns-resize items-center justify-center border-b border-white/8 bg-white/[0.035]"
onPointerDown={startDrag}
title="拖拽调整音频条高度"
>
<GripHorizontal className="h-3.5 w-3.5 text-white/45" />
</div>
<div className="flex h-8 items-center justify-between gap-3 border-b border-white/8 px-3">
<div className="flex min-w-0 items-center gap-2">
<Mic2 className="h-3.5 w-3.5 shrink-0 text-violet-200" />
<span className="truncate text-[12px] font-semibold text-white/90"> · / / </span>
<span className="rounded-full border border-white/10 px-2 py-0.5 text-[10px] text-white/45">{transcript.length || 0} </span>
</div>
<div className="flex items-center gap-2">
<button
type="button"
onClick={() => setCollapsed((v) => !v)}
className="inline-flex h-6 w-6 items-center justify-center rounded-md border border-white/10 text-white/65 transition hover:bg-white/10 hover:text-white"
title={collapsed ? "展开音频条" : "收起音频条"}
>
{collapsed ? <ChevronUp className="h-3.5 w-3.5" /> : <ChevronDown className="h-3.5 w-3.5" />}
</button>
{onClose && (
<button
type="button"
onClick={onClose}
className="inline-flex h-6 w-6 items-center justify-center rounded-md border border-white/10 text-white/65 transition hover:bg-white/10 hover:text-white"
title="关闭音频条"
>
<X className="h-3.5 w-3.5" />
</button>
)}
</div>
</div>
{!collapsed && (
<div className="grid h-[calc(100%-48px)] grid-cols-[minmax(0,1fr)_300px] gap-3 p-3 max-lg:grid-cols-1">
<div className="flex min-w-0 min-h-0 flex-col gap-3 overflow-hidden">
<div className="rounded-lg border border-white/10 bg-black/20 p-2">
<div className="mb-2 flex items-center justify-between gap-3">
<div className="min-w-0 text-[10px] uppercase tracking-widest text-white/45">
Source audio playback
{activeSegment ? <span className="ml-2 text-emerald-200/80">#{activeSegment.index + 1}</span> : null}
</div>
<div className="shrink-0 font-mono text-[10px] text-white/45">
{currentTime.toFixed(1)}s / {duration.toFixed(1)}s
</div>
</div>
{sourceReady ? (
<audio
key={audioKey}
ref={audioRef}
controls
src={sourceUrl}
className="h-8 w-full"
onTimeUpdate={(event) => setCurrentTime(event.currentTarget.currentTime)}
onSeeked={(event) => setCurrentTime(event.currentTarget.currentTime)}
onLoadedMetadata={(event) => setCurrentTime(event.currentTarget.currentTime)}
/>
) : (
<div className="flex h-8 items-center rounded-md border border-dashed border-white/12 px-3 text-[11px] text-white/45">
{processing ? "正在提取原音频并准备波形..." : "等待原音频波形..."}
</div>
)}
<div className="relative mt-2">
<Waveform peaks={timelinePeaks} active={sourceReady} />
<div
className="pointer-events-none absolute inset-y-0 w-[2px] bg-emerald-200 shadow-[0_0_18px_rgba(110,231,183,0.9)]"
style={{ left: `${pointerPct}%` }}
/>
</div>
</div>
<div className="min-h-0 overflow-x-auto overflow-y-hidden pb-1">
{transcript.length > 0 ? (
<div className="flex h-full items-stretch gap-3">
{transcript.map((segment) => (
<SegmentCard
key={segment.index}
segment={segment}
peaks={peaks}
duration={duration}
currentTime={currentTime}
/>
))}
</div>
) : (
<div className="flex h-full items-center justify-center rounded-lg border border-dashed border-white/12 text-[12px] text-white/45">
</div>
)}
</div>
</div>
<div className="min-h-0 overflow-y-auto rounded-lg border border-emerald-300/20 bg-emerald-300/[0.07] p-3 max-lg:hidden">
<div className="mb-2 text-[10px] uppercase tracking-widest text-emerald-100/70">Original audio analysis</div>
<div className="space-y-3 text-[12px] leading-relaxed text-white/86">
<div>
<div className="mb-1 text-[10px] font-semibold uppercase tracking-widest text-white/38"></div>
<p>{audioScript?.source_text || "Waiting for transcript extraction."}</p>
</div>
{audioScript?.source_zh && (
<div>
<div className="mb-1 text-[10px] font-semibold uppercase tracking-widest text-white/38"></div>
<p>{audioScript.source_zh}</p>
</div>
)}
<div className="border-t border-white/10 pt-3 text-[11px] text-white/60">
{audioScript?.speaker_profile && <p><span className="text-white/36"></span>{audioScript.speaker_profile}</p>}
{audioScript?.rhythm_profile && <p className="mt-1"><span className="text-white/36"></span>{audioScript.rhythm_profile}</p>}
{audioScript?.background_audio_profile && <p className="mt-1"><span className="text-white/36"></span>{audioScript.background_audio_profile}</p>}
</div>
</div>
</div>
</div>
)}
</aside>
)
}