diff --git a/.memory/worklog.json b/.memory/worklog.json
index 2ae3f44..0c5319d 100644
--- a/.memory/worklog.json
+++ b/.memory/worklog.json
@@ -1,19 +1,5 @@
 {
   "entries": [
-    {
-      "files_changed": 1,
-      "hash": "c8fd985",
-      "message": "auto-save 2026-05-13 03:54 (~1)",
-      "ts": "2026-05-13T03:54:21+08:00",
-      "type": "commit"
-    },
-    {
-      "files_changed": 1,
-      "hash": "ffc7437",
-      "message": "auto-save 2026-05-13 04:00 (~1)",
-      "ts": "2026-05-13T04:00:13+08:00",
-      "type": "commit"
-    },
     {
       "files_changed": 1,
       "hash": "7a5b09a",
@@ -3299,6 +3285,19 @@
       "type": "session-heartbeat",
       "message": "Codex 会话活跃 · 最近命令：codex · 1 项未提交变更 · 最近提交：auto-save 2026-05-14 11:36 (~3)",
       "files_changed": 1
+    },
+    {
+      "ts": "2026-05-14T11:42:06+08:00",
+      "type": "commit",
+      "message": "auto-save 2026-05-14 11:41 (~1)",
+      "hash": "b474d80",
+      "files_changed": 1
+    },
+    {
+      "ts": "2026-05-14T03:46:10Z",
+      "type": "session-heartbeat",
+      "message": "Codex 会话活跃 · 最近命令：codex · 6 项未提交变更 · 最近提交：auto-save 2026-05-14 11:41 (~1)",
+      "files_changed": 6
     }
   ]
 }
diff --git a/api/.env.example b/api/.env.example
index 5b6a159..1ed17f6 100644
--- a/api/.env.example
+++ b/api/.env.example
@@ -19,7 +19,7 @@ AUDIO_PRODUCT_BRIEF="SKG 智能按摩产品，主打日常肩颈、腰背、眼
 MINIMAX_API_KEY=
 MINIMAX_TTS_BASE_URL=https://api.minimax.io
 MINIMAX_TTS_MODEL=speech-2.8-turbo
-MINIMAX_TTS_VOICE_ID="Chinese (Mandarin)_Reliable_Executive"
+MINIMAX_TTS_VOICE_ID=English_expressive_narrator
 
 # Poe 视频 API（优先用于 Seedance / Kling / Veo）
 POE_API_BASE_URL=https://api.poe.com/v1
diff --git a/api/main.py b/api/main.py
index 3abc6ac..84805b6 100644
--- a/api/main.py
+++ b/api/main.py
@@ -49,8 +49,8 @@ MINIMAX_TTS_BASE_URL = os.getenv("MINIMAX_TTS_BASE_URL", "https://api.minimax.io
 MINIMAX_TTS_MODEL = os.getenv("MINIMAX_TTS_MODEL", "speech-2.8-turbo").strip() or "speech-2.8-turbo"
 MINIMAX_TTS_VOICE_ID = os.getenv(
     "MINIMAX_TTS_VOICE_ID",
-    "Chinese (Mandarin)_Reliable_Executive",
-).strip() or "Chinese (Mandarin)_Reliable_Executive"
+    "English_expressive_narrator",
+).strip() or "English_expressive_narrator"
 
 POE_API_BASE_URL = os.getenv("POE_API_BASE_URL", "https://api.poe.com/v1").strip() or "https://api.poe.com/v1"
 POE_API_KEY = os.getenv("POE_API_KEY", "").strip()
@@ -379,6 +379,7 @@ class Job(BaseModel):
     duration: float = 0.0
     width: int = 0
     height: int = 0
+    source_audio_url: str = ""
     frames: list[KeyFrame] = Field(default_factory=list)
     transcript: list[TranscriptSegment] = Field(default_factory=list)
     audio_script: AudioScript = Field(default_factory=AudioScript)
@@ -400,6 +401,14 @@ def job_dir(job_id: str) -> Path:
     return d
 
 
+def source_audio_url_for(job_id: str) -> str:
+    return f"/jobs/{job_id}/audio.wav" if (JOBS_DIR / job_id / "audio.wav").exists() else ""
+
+
+def job_with_artifacts(job: Job) -> Job:
+    return job.model_copy(update={"source_audio_url": source_audio_url_for(job.id)})
+
+
 def save_state(job: Job) -> None:
     (job_dir(job.id) / "state.json").write_text(job.model_dump_json(indent=2))
 
@@ -1224,7 +1233,7 @@ def pipeline_analyze(
 
         wav = d / "audio.wav"
         if wav.exists():
-            update(job, status="splitting", message="复用音轨 · 准备抽帧…", progress=35)
+            update(job, status="splitting", message="复用音轨 · 准备抽帧…", progress=35, source_audio_url=f"/jobs/{job_id}/audio.wav")
         else:
             update(job, status="splitting", message="ffmpeg 拆分音轨…", progress=35)
             run([
@@ -1232,6 +1241,7 @@ def pipeline_analyze(
                 "-vn", "-ac", "1", "-ar", "16000", "-c:a", "pcm_s16le",
                 str(wav),
             ])
+            update(job, source_audio_url=f"/jobs/{job_id}/audio.wav")
         n = max(1, min(int(frame_count), 20))
         target_label = FRAME_TARGET_LABELS.get(target, FRAME_TARGET_LABELS["balanced"])
         duration = max(float(job.duration or 1.0), 0.1)
@@ -1497,12 +1507,12 @@ def _transcript_join(segments: list[TranscriptSegment], field: Literal["en", "zh
 
 
 def _fallback_audio_script(segments: list[TranscriptSegment]) -> str:
-    joined = " ".join((s.zh or s.en).strip() for s in segments if (s.zh or s.en).strip())
+    joined = " ".join((s.en or s.zh).strip() for s in segments if (s.en or s.zh).strip())
     if not joined:
-        return "日常疲惫不用硬扛。戴上 SKG，让肩颈慢慢放松，跟着呼吸找回轻松状态。"
+        return "Ease into the moment with SKG. Gentle warmth and rhythmic massage help everyday tension feel lighter, cleaner, and easier to leave behind."
     return (
-        "把日常紧绷交给 SKG。贴合身体需要放松的位置，热敷与按摩节奏自然陪伴，"
-        "让每一次短暂休息都更轻松、更有质感。"
+        "Let SKG turn a short break into real relief. With soothing warmth and steady massage rhythm, "
+        "everyday tension feels lighter, calmer, and easier to leave behind."
     )
 
 
@@ -1513,24 +1523,24 @@ def _rewrite_audio_script_sync(segments: list[TranscriptSegment]) -> tuple[str,
     source_text = _transcript_join(segments, "en")
     source_zh = _transcript_join(segments, "zh")
     prompt = (
-        "你是 SKG 短视频口播编导。根据参考视频音频转写，抽取它的表达结构、情绪节奏和可复用卖点，"
-        "改写成适合 SKG 按摩/放松产品二创视频的中文口播文案。\n"
-        "要求：\n"
-        "1. 输出 35-90 个中文字，适合 8-18 秒短视频配音。\n"
-        "2. 口语化、干净、高级，能直接给 TTS 朗读。\n"
-        "3. 不承诺治疗、治愈、医学疗效，不夸大。\n"
-        "4. 不复刻原视频品牌/人物/价格/平台话术，只保留表达结构。\n"
-        "5. 如果参考转写信息不足，按产品信息生成通用 SKG 放松口播。\n"
-        '严格返回 JSON：{"rewritten_text":"..."}。\n\n'
-        f"SKG 产品信息：{AUDIO_PRODUCT_BRIEF}\n\n"
-        f"英文转写：\n{source_text or '无'}\n\n"
-        f"中文翻译：\n{source_zh or '无'}"
+        "You are an English short-video voice-over writer for SKG wellness massagers. "
+        "Use the source transcript only for structure, pacing, and emotional hook, then rewrite it into a clean English VO for SKG.\n"
+        "Rules:\n"
+        "1. Output 28-55 English words, suitable for an 8-18 second TTS voice-over.\n"
+        "2. Make it natural, premium, concise, and ready to read aloud.\n"
+        "3. Do not claim medical treatment, cure, pain elimination, or clinical effects.\n"
+        "4. Do not copy the original brand, creator, price, platform language, or exact claims.\n"
+        "5. If the source transcript is too thin, write a general SKG relaxation VO.\n"
+        'Return strict JSON only: {"rewritten_text":"..."}.\n\n'
+        f"SKG product context: {AUDIO_PRODUCT_BRIEF}\n\n"
+        f"English transcript:\n{source_text or 'None'}\n\n"
+        f"Chinese translation for reference:\n{source_zh or 'None'}"
     )
     try:
         resp = llm().chat.completions.create(
             model=AUDIO_REWRITE_MODEL,
             messages=[
-                {"role": "system", "content": "只输出合法 JSON，不要解释，不要 markdown。"},
+                {"role": "system", "content": "Return valid JSON only. No explanation. No markdown."},
                 {"role": "user", "content": prompt},
             ],
             response_format={"type": "json_object"},
@@ -1564,7 +1574,7 @@ def _minimax_tts_sync(job_id: str, text: str) -> str:
         "model": MINIMAX_TTS_MODEL,
         "text": text.strip()[:9500],
         "stream": False,
-        "language_boost": "Chinese",
+        "language_boost": "English",
         "output_format": "hex",
         "voice_setting": {
             "voice_id": MINIMAX_TTS_VOICE_ID,
@@ -1651,6 +1661,7 @@ def pipeline_transcribe(job_id: str, manage_job_status: bool = True) -> None:
             ])
             if not wav.exists():
                 raise RuntimeError("音频提取完成但找不到 audio.wav")
+        update(job, source_audio_url=f"/jobs/{job_id}/audio.wav")
 
         if not LLM_API_KEY:
             # 无 key 模式：mock 数据
@@ -2112,7 +2123,7 @@ def get_job(job_id: str) -> Job:
     job = JOBS.get(job_id)
     if not job:
         raise HTTPException(404, "job not found")
-    return job
+    return job_with_artifacts(job)
 
 
 @app.delete("/jobs/{job_id}")
@@ -2153,7 +2164,7 @@ async def trigger_transcribe(job_id: str, bg: BackgroundTasks) -> Job:
         update(job, error="", audio_script=audio_payload)
     if not start_audio_processing(job_id, manage_job_status=manage_job_status):
         update(job, message="音频已在处理中")
-    return job
+    return job_with_artifacts(job)
 
 
 @app.get("/jobs/{job_id}/video.mp4")
diff --git a/web/app/page.tsx b/web/app/page.tsx
index 5940cce..1bec5be 100644
--- a/web/app/page.tsx
+++ b/web/app/page.tsx
@@ -100,6 +100,8 @@ export default function Home() {
   const [jobs, setJobs] = useState<Job[]>([])
   const [activeJobId, setActiveJobId] = useState<string | null>(null)
   const job = useMemo(() => jobs.find((j) => j.id === activeJobId) ?? null, [jobs, activeJobId])
+  const [audioStripJobId, setAudioStripJobId] = useState<string | null>(null)
+  const audioStripJob = useMemo(() => jobs.find((j) => j.id === audioStripJobId) ?? null, [jobs, audioStripJobId])
   const [submitting, setSubmitting] = useState(false)
   const [analyzing, setAnalyzing] = useState(false)
   const [frameTargets, setFrameTargets] = useState<Record<string, FrameExtractTarget>>({})
@@ -159,6 +161,10 @@ export default function Home() {
   const handleSwitchJob = useCallback((id: string) => {
     setActiveJobId(id)
   }, [])
+  const handleOpenAudioStrip = useCallback((jobId?: string) => {
+    const targetId = jobId ?? activeJobId
+    if (targetId) setAudioStripJobId(targetId)
+  }, [activeJobId])
   const pollRef = useRef<ReturnType<typeof setInterval> | null>(null)
 
   const handleSubmit = useCallback(async (url: string) => {
@@ -393,16 +399,13 @@ export default function Home() {
   const handleTranscribeAudio = useCallback(async (jobId?: string, options?: { silent?: boolean }) => {
     const targetId = jobId ?? activeJobId
     if (!targetId) return
+    setAudioStripJobId(targetId)
     const target = jobs.find((item) => item.id === targetId)
     if (!target) return
     if (!target.video_url) {
       if (!options?.silent) toast.info("视频导入完成后，可在音频卡片点击提取音频")
       return
     }
-    if (target.status === "splitting") {
-      if (!options?.silent) toast.info("当前正在抽帧，结束后可重新点击提取音频")
-      return
-    }
     if (target.status === "transcribing" || target.audio_script?.status === "rewriting") {
       if (!options?.silent) toast.info("音频正在处理中")
       return
@@ -728,9 +731,10 @@ export default function Home() {
     onCopyImage: handleCopyImage,
     onGenerateProductFusionVideo: handleGenerateProductFusionVideo,
     onTranscribeAudio: handleTranscribeAudio,
+    onOpenAudioStrip: handleOpenAudioStrip,
     pinnedNodes,
     onToggleNodePin: handleToggleNodePin,
-  }), [job, jobs, activeJobId, submitting, analyzing, frameTargets, frameCounts, frameQualities, selectedFrames, expandedFrame, framePanelScale, framePanelPinned, framePanelDock, videoPanelJobId, videoPanelScale, videoPanelDock, handleSubmit, handleUpload, handleAnalyze, handleAnalyzeJob, handleFrameTargetChange, handleFrameCountChange, handleFrameQualityChange, handleToggleFrame, handleOpenFramePanel, handleFramePanelScaleChange, handleCloseExpandedFrame, handleAddManualFrame, handleAddManualFrameForJob, handleOpenVideoPanel, handleVideoPanelScaleChange, handleSwitchJob, updateJobInList, handleDeleteJob, handleDeleteFrame, handleDeleteFrameForJob, handleDeleteGenerated, handleDeleteVideo, handleDeleteCutout, handleOpenStoryboard, handleOpenWorkbench, clipboard, handleCopyImage, handleGenerateProductFusionVideo, handleTranscribeAudio, pinnedNodes, handleToggleNodePin])
+  }), [job, jobs, activeJobId, submitting, analyzing, frameTargets, frameCounts, frameQualities, selectedFrames, expandedFrame, framePanelScale, framePanelPinned, framePanelDock, videoPanelJobId, videoPanelScale, videoPanelDock, handleSubmit, handleUpload, handleAnalyze, handleAnalyzeJob, handleFrameTargetChange, handleFrameCountChange, handleFrameQualityChange, handleToggleFrame, handleOpenFramePanel, handleFramePanelScaleChange, handleCloseExpandedFrame, handleAddManualFrame, handleAddManualFrameForJob, handleOpenVideoPanel, handleVideoPanelScaleChange, handleSwitchJob, updateJobInList, handleDeleteJob, handleDeleteFrame, handleDeleteFrameForJob, handleDeleteGenerated, handleDeleteVideo, handleDeleteCutout, handleOpenStoryboard, handleOpenWorkbench, clipboard, handleCopyImage, handleGenerateProductFusionVideo, handleTranscribeAudio, handleOpenAudioStrip, pinnedNodes, handleToggleNodePin])
 
   // 用 useNodesState 让 ReactFlow 自己管位置（避免轮询时重置 drag）
   const savedSizes = useMemo(() => loadNodeSizes(), [])
@@ -1013,7 +1017,7 @@ export default function Home() {
             <div className="h-full w-full" suppressHydrationWarning />
           )}
           </div>
-          {clientReady && <AudioStrip job={job} />}
+          {clientReady && <AudioStrip job={audioStripJob} open={!!audioStripJob} onClose={() => setAudioStripJobId(null)} />}
         </section>
 
         <Toaster theme="system" position="top-center" />
diff --git a/web/components/audio-strip.tsx b/web/components/audio-strip.tsx
index 0b2d16f..ab479fd 100644
--- a/web/components/audio-strip.tsx
+++ b/web/components/audio-strip.tsx
@@ -1,7 +1,7 @@
 "use client"
 
 import { useEffect, useMemo, useRef, useState, type PointerEvent as ReactPointerEvent } from "react"
-import { ChevronDown, ChevronUp, GripHorizontal, Mic2, Volume2 } from "lucide-react"
+import { ChevronDown, ChevronUp, GripHorizontal, Mic2, Volume2, X } from "lucide-react"
 import { apiAssetUrl, sourceAudioUrl, type Job, type TranscriptSegment } from "@/lib/api"
 
 const STORAGE_KEY = "skg.audio-strip.height"
@@ -58,20 +58,34 @@ function SegmentCard({
   segment,
   peaks,
   duration,
+  currentTime,
 }: {
   segment: TranscriptSegment
   peaks: number[]
   duration: number
+  currentTime: number
 }) {
   const segDuration = Math.max(1.2, segment.end - segment.start)
   const width = clamp(180 + segDuration * 42, 220, 520)
   const segPeaks = slicePeaks(peaks, segment.start, segment.end, duration)
+  const active = currentTime >= segment.start && currentTime <= Math.max(segment.end, segment.start + 0.2)
+  const pointerPct = active ? clamp(((currentTime - segment.start) / Math.max(0.2, segment.end - segment.start)) * 100, 0, 100) : 0
 
   return (
     <article
-      className="shrink-0 rounded-lg border border-white/10 bg-white/[0.045] p-3 shadow-[0_12px_30px_-22px_rgba(0,0,0,0.8)]"
+      className={`relative shrink-0 overflow-hidden rounded-lg border p-3 shadow-[0_12px_30px_-22px_rgba(0,0,0,0.8)] transition ${
+        active
+          ? "border-emerald-300/55 bg-emerald-300/[0.105]"
+          : "border-white/10 bg-white/[0.045]"
+      }`}
       style={{ width }}
     >
+      {active && (
+        <div
+          className="pointer-events-none absolute inset-y-0 z-10 w-[2px] bg-emerald-200 shadow-[0_0_18px_rgba(110,231,183,0.9)]"
+          style={{ left: `${pointerPct}%` }}
+        />
+      )}
       <div className="mb-2 flex items-center justify-between gap-3">
         <span className="font-mono text-[10px] text-[var(--text-faint)]">
           {segment.start.toFixed(1)}s to {segment.end.toFixed(1)}s
@@ -93,7 +107,7 @@ function SegmentCard({
             {segment.zh || <span className="text-[var(--text-faint)] italic">翻译中...</span>}
           </p>
         </div>
-        <Waveform peaks={segPeaks} />
+        <Waveform peaks={segPeaks} active={active} />
       </div>
     </article>
   )
@@ -126,19 +140,33 @@ async function decodeWaveform(url: string, targetPeaks = 1800) {
   }
 }
 
-export function AudioStrip({ job }: { job: Job | null }) {
+export function AudioStrip({ job, open, onClose }: { job: Job | null; open: boolean; onClose?: () => void }) {
   const [collapsed, setCollapsed] = useState(false)
   const [height, setHeight] = useState(DEFAULT_HEIGHT)
   const [peaks, setPeaks] = useState<number[]>([])
+  const [sourceReady, setSourceReady] = useState(false)
+  const [audioKey, setAudioKey] = useState(0)
+  const [currentTime, setCurrentTime] = useState(0)
   const dragRef = useRef<{ startY: number; startHeight: number } | null>(null)
+  const audioRef = useRef<HTMLAudioElement>(null)
   const transcript = job?.transcript ?? []
   const audioScript = job?.audio_script
   const voiceUrl = apiAssetUrl(audioScript?.voice_url)
-  const hasAudio = !!job && (transcript.length > 0 || !!audioScript?.rewritten_text || job.status === "transcribing")
+  const sourceUrl = job ? apiAssetUrl(job.source_audio_url || sourceAudioUrl(job.id)) : ""
+  const processing = !!job && (job.status === "transcribing" || audioScript?.status === "rewriting")
+  const activeSegment = transcript.find((segment) => currentTime >= segment.start && currentTime <= Math.max(segment.end, segment.start + 0.2))
   const duration = useMemo(() => {
     const lastTranscriptEnd = transcript.reduce((max, s) => Math.max(max, s.end || 0), 0)
-    return Math.max(job?.duration ?? 0, lastTranscriptEnd, 1)
+    const audioDuration = audioRef.current?.duration
+    return Math.max(
+      Number.isFinite(audioDuration) ? Number(audioDuration) : 0,
+      job?.duration ?? 0,
+      lastTranscriptEnd,
+      1,
+    )
   }, [job?.duration, transcript])
+  const timelinePeaks = useMemo(() => slicePeaks(peaks, 0, duration, duration, 160), [duration, peaks])
+  const pointerPct = clamp((currentTime / duration) * 100, 0, 100)
 
   useEffect(() => {
     if (typeof window === "undefined") return
@@ -148,21 +176,38 @@ export function AudioStrip({ job }: { job: Job | null }) {
 
   useEffect(() => {
     let cancelled = false
+    let timer: ReturnType<typeof setTimeout> | null = null
+    let attempts = 0
     setPeaks([])
-    if (!job?.id || !hasAudio) return
-    decodeWaveform(sourceAudioUrl(job.id))
+    setSourceReady(false)
+    setCurrentTime(0)
+    if (!job?.id || !open) return
+    setPeaks(fallbackPeaks(1800, `${job.id}-loading`))
+    const load = () => {
+      attempts += 1
+      decodeWaveform(sourceUrl)
       .then((next) => {
-        if (!cancelled) setPeaks(next)
+        if (cancelled) return
+        setPeaks(next)
+        setSourceReady(true)
+        setAudioKey((key) => key + 1)
       })
       .catch(() => {
-        if (!cancelled) setPeaks(fallbackPeaks(1800, `${job.id}-${transcript.length}`))
+        if (cancelled) return
+        setSourceReady(false)
+        if (attempts < (processing ? 45 : 6)) {
+          timer = setTimeout(load, 1000)
+        }
       })
+    }
+    load()
     return () => {
       cancelled = true
+      if (timer) clearTimeout(timer)
     }
-  }, [job?.id, hasAudio, transcript.length])
+  }, [job?.id, open, processing, sourceUrl, transcript.length])
 
-  if (!hasAudio || !job) return null
+  if (!open || !job) return null
 
   const startDrag = (e: ReactPointerEvent<HTMLDivElement>) => {
     e.preventDefault()
@@ -206,7 +251,7 @@ export function AudioStrip({ job }: { job: Job | null }) {
           {voiceUrl && (
             <div className="hidden items-center gap-1.5 text-[10px] text-emerald-200/80 sm:flex">
               <Volume2 className="h-3.5 w-3.5" />
-              MiniMax ready
+              English VO ready
             </div>
           )}
           <button
@@ -217,27 +262,79 @@ export function AudioStrip({ job }: { job: Job | null }) {
           >
             {collapsed ? <ChevronUp className="h-3.5 w-3.5" /> : <ChevronDown className="h-3.5 w-3.5" />}
           </button>
+          {onClose && (
+            <button
+              type="button"
+              onClick={onClose}
+              className="inline-flex h-6 w-6 items-center justify-center rounded-md border border-white/10 text-white/65 transition hover:bg-white/10 hover:text-white"
+              title="关闭音频条"
+            >
+              <X className="h-3.5 w-3.5" />
+            </button>
+          )}
         </div>
       </div>
       {!collapsed && (
         <div className="grid h-[calc(100%-48px)] grid-cols-[minmax(0,1fr)_300px] gap-3 p-3 max-lg:grid-cols-1">
-          <div className="min-w-0 overflow-x-auto overflow-y-hidden pb-1">
-            {transcript.length > 0 ? (
-              <div className="flex h-full items-stretch gap-3">
-                {transcript.map((segment) => (
-                  <SegmentCard key={segment.index} segment={segment} peaks={peaks} duration={duration} />
-                ))}
+          <div className="flex min-w-0 min-h-0 flex-col gap-3 overflow-hidden">
+            <div className="rounded-lg border border-white/10 bg-black/20 p-2">
+              <div className="mb-2 flex items-center justify-between gap-3">
+                <div className="min-w-0 text-[10px] uppercase tracking-widest text-white/45">
+                  Source audio playback
+                  {activeSegment ? <span className="ml-2 text-emerald-200/80">#{activeSegment.index + 1}</span> : null}
+                </div>
+                <div className="shrink-0 font-mono text-[10px] text-white/45">
+                  {currentTime.toFixed(1)}s / {duration.toFixed(1)}s
+                </div>
               </div>
+              {sourceReady ? (
+                <audio
+                  key={audioKey}
+                  ref={audioRef}
+                  controls
+                  src={sourceUrl}
+                  className="h-8 w-full"
+                  onTimeUpdate={(event) => setCurrentTime(event.currentTarget.currentTime)}
+                  onSeeked={(event) => setCurrentTime(event.currentTarget.currentTime)}
+                  onLoadedMetadata={(event) => setCurrentTime(event.currentTarget.currentTime)}
+                />
+              ) : (
+                <div className="flex h-8 items-center rounded-md border border-dashed border-white/12 px-3 text-[11px] text-white/45">
+                  {processing ? "正在提取原音频并准备波形..." : "等待原音频波形..."}
+                </div>
+              )}
+              <div className="relative mt-2">
+                <Waveform peaks={timelinePeaks} active={sourceReady} />
+                <div
+                  className="pointer-events-none absolute inset-y-0 w-[2px] bg-emerald-200 shadow-[0_0_18px_rgba(110,231,183,0.9)]"
+                  style={{ left: `${pointerPct}%` }}
+                />
+              </div>
+            </div>
+            <div className="min-h-0 overflow-x-auto overflow-y-hidden pb-1">
+              {transcript.length > 0 ? (
+                <div className="flex h-full items-stretch gap-3">
+                  {transcript.map((segment) => (
+                    <SegmentCard
+                      key={segment.index}
+                      segment={segment}
+                      peaks={peaks}
+                      duration={duration}
+                      currentTime={currentTime}
+                    />
+                  ))}
+                </div>
             ) : (
               <div className="flex h-full items-center justify-center rounded-lg border border-dashed border-white/12 text-[12px] text-white/45">
-                音频识别完成后，这里会按时间显示英文、中文翻译和对应波形。
+                点击音频卡片后开始解析；完成后这里会按时间显示英文、中文翻译和对应波形。
               </div>
             )}
+            </div>
           </div>
           <div className="min-h-0 overflow-y-auto rounded-lg border border-emerald-300/20 bg-emerald-300/[0.07] p-3 max-lg:hidden">
-            <div className="mb-2 text-[10px] uppercase tracking-widest text-emerald-100/70">改后 · SKG 口播</div>
+            <div className="mb-2 text-[10px] uppercase tracking-widest text-emerald-100/70">English VO · SKG rewrite</div>
             <p className="text-[12.5px] leading-relaxed text-white/90">
-              {audioScript?.rewritten_text || "等待转录完成后生成适合 SKG 产品视频的口播文案。"}
+              {audioScript?.rewritten_text || "Waiting for the parsed transcript to become an English SKG voice-over."}
             </p>
             {voiceUrl && (
               <audio controls src={voiceUrl} className="mt-3 h-8 w-full" />
diff --git a/web/components/nodes/index.tsx b/web/components/nodes/index.tsx
index 2478fdc..32d626e 100644
--- a/web/components/nodes/index.tsx
+++ b/web/components/nodes/index.tsx
@@ -76,6 +76,7 @@ export interface NodeData {
   onCopyImage?: (ref: ImageRef) => void  // 复制图片到全局剪贴板（粘贴到分镜头编排插槽）
   onGenerateProductFusionVideo?: (frameIdx: number, shot: ProductFusionShot) => Promise<void> | void
   onTranscribeAudio?: (jobId?: string) => Promise<void> | void
+  onOpenAudioStrip?: (jobId?: string) => void
   pinnedNodes?: Set<string>              // 已钉住的节点 id 集合 — 钉住后位置 + 尺寸锁定
   onToggleNodePin?: (id: string) => void
 }
@@ -2141,7 +2142,12 @@ export function AudioNode({ data, selected }: any) {
       pinned={d.pinnedNodes?.has("audio")}
       onTogglePin={() => d.onToggleNodePin?.("audio")}
     >
-      <div className="space-y-2 text-[11px] text-[var(--text-soft)] leading-snug">
+      <div
+        className="space-y-2 text-[11px] text-[var(--text-soft)] leading-snug"
+        onClick={() => {
+          if (job?.video_url) d.onOpenAudioStrip?.(job.id)
+        }}
+      >
         <div>
           音轨 → ASR 转录 → 英中翻译 → SKG 口播改写 → MiniMax 配音<br />
           <span className="text-[var(--text-faint)] font-mono">
@@ -2154,6 +2160,7 @@ export function AudioNode({ data, selected }: any) {
             disabled={audioButtonDisabled}
             onClick={(e) => {
               e.stopPropagation()
+              d.onOpenAudioStrip?.(job.id)
               if (audioButtonDisabled) return
               void d.onTranscribeAudio?.(job.id)
             }}
@@ -2185,9 +2192,7 @@ export function AudioNode({ data, selected }: any) {
             )}
           </div>
         )}
-        {voiceUrl && (
-          <audio controls src={voiceUrl} className="h-7 w-full" />
-        )}
+        {voiceUrl && <div className="text-[10.5px] text-emerald-200/85">MiniMax English voice ready · 底部音频条播放</div>}
         {isRewriting && (
           <div className="text-[10.5px] text-[var(--text-faint)]">正在生成改写文案和配音…</div>
         )}
diff --git a/web/lib/api.ts b/web/lib/api.ts
index a2541da..7ddbd43 100644
--- a/web/lib/api.ts
+++ b/web/lib/api.ts
@@ -331,6 +331,7 @@ export interface Job {
   duration?: number
   width?: number
   height?: number
+  source_audio_url?: string
   frames: KeyFrame[]
   transcript: TranscriptSegment[]
   audio_script?: AudioScript