auto-save 2026-05-14 11:47 (~7)
This commit is contained in:
@@ -1,19 +1,5 @@
|
|||||||
{
|
{
|
||||||
"entries": [
|
"entries": [
|
||||||
{
|
|
||||||
"files_changed": 1,
|
|
||||||
"hash": "c8fd985",
|
|
||||||
"message": "auto-save 2026-05-13 03:54 (~1)",
|
|
||||||
"ts": "2026-05-13T03:54:21+08:00",
|
|
||||||
"type": "commit"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"files_changed": 1,
|
|
||||||
"hash": "ffc7437",
|
|
||||||
"message": "auto-save 2026-05-13 04:00 (~1)",
|
|
||||||
"ts": "2026-05-13T04:00:13+08:00",
|
|
||||||
"type": "commit"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"files_changed": 1,
|
"files_changed": 1,
|
||||||
"hash": "7a5b09a",
|
"hash": "7a5b09a",
|
||||||
@@ -3299,6 +3285,19 @@
|
|||||||
"type": "session-heartbeat",
|
"type": "session-heartbeat",
|
||||||
"message": "Codex 会话活跃 · 最近命令:codex · 1 项未提交变更 · 最近提交:auto-save 2026-05-14 11:36 (~3)",
|
"message": "Codex 会话活跃 · 最近命令:codex · 1 项未提交变更 · 最近提交:auto-save 2026-05-14 11:36 (~3)",
|
||||||
"files_changed": 1
|
"files_changed": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"ts": "2026-05-14T11:42:06+08:00",
|
||||||
|
"type": "commit",
|
||||||
|
"message": "auto-save 2026-05-14 11:41 (~1)",
|
||||||
|
"hash": "b474d80",
|
||||||
|
"files_changed": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"ts": "2026-05-14T03:46:10Z",
|
||||||
|
"type": "session-heartbeat",
|
||||||
|
"message": "Codex 会话活跃 · 最近命令:codex · 6 项未提交变更 · 最近提交:auto-save 2026-05-14 11:41 (~1)",
|
||||||
|
"files_changed": 6
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ AUDIO_PRODUCT_BRIEF="SKG 智能按摩产品,主打日常肩颈、腰背、眼
|
|||||||
MINIMAX_API_KEY=
|
MINIMAX_API_KEY=
|
||||||
MINIMAX_TTS_BASE_URL=https://api.minimax.io
|
MINIMAX_TTS_BASE_URL=https://api.minimax.io
|
||||||
MINIMAX_TTS_MODEL=speech-2.8-turbo
|
MINIMAX_TTS_MODEL=speech-2.8-turbo
|
||||||
MINIMAX_TTS_VOICE_ID="Chinese (Mandarin)_Reliable_Executive"
|
MINIMAX_TTS_VOICE_ID=English_expressive_narrator
|
||||||
|
|
||||||
# Poe 视频 API(优先用于 Seedance / Kling / Veo)
|
# Poe 视频 API(优先用于 Seedance / Kling / Veo)
|
||||||
POE_API_BASE_URL=https://api.poe.com/v1
|
POE_API_BASE_URL=https://api.poe.com/v1
|
||||||
|
|||||||
57
api/main.py
57
api/main.py
@@ -49,8 +49,8 @@ MINIMAX_TTS_BASE_URL = os.getenv("MINIMAX_TTS_BASE_URL", "https://api.minimax.io
|
|||||||
MINIMAX_TTS_MODEL = os.getenv("MINIMAX_TTS_MODEL", "speech-2.8-turbo").strip() or "speech-2.8-turbo"
|
MINIMAX_TTS_MODEL = os.getenv("MINIMAX_TTS_MODEL", "speech-2.8-turbo").strip() or "speech-2.8-turbo"
|
||||||
MINIMAX_TTS_VOICE_ID = os.getenv(
|
MINIMAX_TTS_VOICE_ID = os.getenv(
|
||||||
"MINIMAX_TTS_VOICE_ID",
|
"MINIMAX_TTS_VOICE_ID",
|
||||||
"Chinese (Mandarin)_Reliable_Executive",
|
"English_expressive_narrator",
|
||||||
).strip() or "Chinese (Mandarin)_Reliable_Executive"
|
).strip() or "English_expressive_narrator"
|
||||||
|
|
||||||
POE_API_BASE_URL = os.getenv("POE_API_BASE_URL", "https://api.poe.com/v1").strip() or "https://api.poe.com/v1"
|
POE_API_BASE_URL = os.getenv("POE_API_BASE_URL", "https://api.poe.com/v1").strip() or "https://api.poe.com/v1"
|
||||||
POE_API_KEY = os.getenv("POE_API_KEY", "").strip()
|
POE_API_KEY = os.getenv("POE_API_KEY", "").strip()
|
||||||
@@ -379,6 +379,7 @@ class Job(BaseModel):
|
|||||||
duration: float = 0.0
|
duration: float = 0.0
|
||||||
width: int = 0
|
width: int = 0
|
||||||
height: int = 0
|
height: int = 0
|
||||||
|
source_audio_url: str = ""
|
||||||
frames: list[KeyFrame] = Field(default_factory=list)
|
frames: list[KeyFrame] = Field(default_factory=list)
|
||||||
transcript: list[TranscriptSegment] = Field(default_factory=list)
|
transcript: list[TranscriptSegment] = Field(default_factory=list)
|
||||||
audio_script: AudioScript = Field(default_factory=AudioScript)
|
audio_script: AudioScript = Field(default_factory=AudioScript)
|
||||||
@@ -400,6 +401,14 @@ def job_dir(job_id: str) -> Path:
|
|||||||
return d
|
return d
|
||||||
|
|
||||||
|
|
||||||
|
def source_audio_url_for(job_id: str) -> str:
|
||||||
|
return f"/jobs/{job_id}/audio.wav" if (JOBS_DIR / job_id / "audio.wav").exists() else ""
|
||||||
|
|
||||||
|
|
||||||
|
def job_with_artifacts(job: Job) -> Job:
|
||||||
|
return job.model_copy(update={"source_audio_url": source_audio_url_for(job.id)})
|
||||||
|
|
||||||
|
|
||||||
def save_state(job: Job) -> None:
|
def save_state(job: Job) -> None:
|
||||||
(job_dir(job.id) / "state.json").write_text(job.model_dump_json(indent=2))
|
(job_dir(job.id) / "state.json").write_text(job.model_dump_json(indent=2))
|
||||||
|
|
||||||
@@ -1224,7 +1233,7 @@ def pipeline_analyze(
|
|||||||
|
|
||||||
wav = d / "audio.wav"
|
wav = d / "audio.wav"
|
||||||
if wav.exists():
|
if wav.exists():
|
||||||
update(job, status="splitting", message="复用音轨 · 准备抽帧…", progress=35)
|
update(job, status="splitting", message="复用音轨 · 准备抽帧…", progress=35, source_audio_url=f"/jobs/{job_id}/audio.wav")
|
||||||
else:
|
else:
|
||||||
update(job, status="splitting", message="ffmpeg 拆分音轨…", progress=35)
|
update(job, status="splitting", message="ffmpeg 拆分音轨…", progress=35)
|
||||||
run([
|
run([
|
||||||
@@ -1232,6 +1241,7 @@ def pipeline_analyze(
|
|||||||
"-vn", "-ac", "1", "-ar", "16000", "-c:a", "pcm_s16le",
|
"-vn", "-ac", "1", "-ar", "16000", "-c:a", "pcm_s16le",
|
||||||
str(wav),
|
str(wav),
|
||||||
])
|
])
|
||||||
|
update(job, source_audio_url=f"/jobs/{job_id}/audio.wav")
|
||||||
n = max(1, min(int(frame_count), 20))
|
n = max(1, min(int(frame_count), 20))
|
||||||
target_label = FRAME_TARGET_LABELS.get(target, FRAME_TARGET_LABELS["balanced"])
|
target_label = FRAME_TARGET_LABELS.get(target, FRAME_TARGET_LABELS["balanced"])
|
||||||
duration = max(float(job.duration or 1.0), 0.1)
|
duration = max(float(job.duration or 1.0), 0.1)
|
||||||
@@ -1497,12 +1507,12 @@ def _transcript_join(segments: list[TranscriptSegment], field: Literal["en", "zh
|
|||||||
|
|
||||||
|
|
||||||
def _fallback_audio_script(segments: list[TranscriptSegment]) -> str:
|
def _fallback_audio_script(segments: list[TranscriptSegment]) -> str:
|
||||||
joined = " ".join((s.zh or s.en).strip() for s in segments if (s.zh or s.en).strip())
|
joined = " ".join((s.en or s.zh).strip() for s in segments if (s.en or s.zh).strip())
|
||||||
if not joined:
|
if not joined:
|
||||||
return "日常疲惫不用硬扛。戴上 SKG,让肩颈慢慢放松,跟着呼吸找回轻松状态。"
|
return "Ease into the moment with SKG. Gentle warmth and rhythmic massage help everyday tension feel lighter, cleaner, and easier to leave behind."
|
||||||
return (
|
return (
|
||||||
"把日常紧绷交给 SKG。贴合身体需要放松的位置,热敷与按摩节奏自然陪伴,"
|
"Let SKG turn a short break into real relief. With soothing warmth and steady massage rhythm, "
|
||||||
"让每一次短暂休息都更轻松、更有质感。"
|
"everyday tension feels lighter, calmer, and easier to leave behind."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -1513,24 +1523,24 @@ def _rewrite_audio_script_sync(segments: list[TranscriptSegment]) -> tuple[str,
|
|||||||
source_text = _transcript_join(segments, "en")
|
source_text = _transcript_join(segments, "en")
|
||||||
source_zh = _transcript_join(segments, "zh")
|
source_zh = _transcript_join(segments, "zh")
|
||||||
prompt = (
|
prompt = (
|
||||||
"你是 SKG 短视频口播编导。根据参考视频音频转写,抽取它的表达结构、情绪节奏和可复用卖点,"
|
"You are an English short-video voice-over writer for SKG wellness massagers. "
|
||||||
"改写成适合 SKG 按摩/放松产品二创视频的中文口播文案。\n"
|
"Use the source transcript only for structure, pacing, and emotional hook, then rewrite it into a clean English VO for SKG.\n"
|
||||||
"要求:\n"
|
"Rules:\n"
|
||||||
"1. 输出 35-90 个中文字,适合 8-18 秒短视频配音。\n"
|
"1. Output 28-55 English words, suitable for an 8-18 second TTS voice-over.\n"
|
||||||
"2. 口语化、干净、高级,能直接给 TTS 朗读。\n"
|
"2. Make it natural, premium, concise, and ready to read aloud.\n"
|
||||||
"3. 不承诺治疗、治愈、医学疗效,不夸大。\n"
|
"3. Do not claim medical treatment, cure, pain elimination, or clinical effects.\n"
|
||||||
"4. 不复刻原视频品牌/人物/价格/平台话术,只保留表达结构。\n"
|
"4. Do not copy the original brand, creator, price, platform language, or exact claims.\n"
|
||||||
"5. 如果参考转写信息不足,按产品信息生成通用 SKG 放松口播。\n"
|
"5. If the source transcript is too thin, write a general SKG relaxation VO.\n"
|
||||||
'严格返回 JSON:{"rewritten_text":"..."}。\n\n'
|
'Return strict JSON only: {"rewritten_text":"..."}.\n\n'
|
||||||
f"SKG 产品信息:{AUDIO_PRODUCT_BRIEF}\n\n"
|
f"SKG product context: {AUDIO_PRODUCT_BRIEF}\n\n"
|
||||||
f"英文转写:\n{source_text or '无'}\n\n"
|
f"English transcript:\n{source_text or 'None'}\n\n"
|
||||||
f"中文翻译:\n{source_zh or '无'}"
|
f"Chinese translation for reference:\n{source_zh or 'None'}"
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
resp = llm().chat.completions.create(
|
resp = llm().chat.completions.create(
|
||||||
model=AUDIO_REWRITE_MODEL,
|
model=AUDIO_REWRITE_MODEL,
|
||||||
messages=[
|
messages=[
|
||||||
{"role": "system", "content": "只输出合法 JSON,不要解释,不要 markdown。"},
|
{"role": "system", "content": "Return valid JSON only. No explanation. No markdown."},
|
||||||
{"role": "user", "content": prompt},
|
{"role": "user", "content": prompt},
|
||||||
],
|
],
|
||||||
response_format={"type": "json_object"},
|
response_format={"type": "json_object"},
|
||||||
@@ -1564,7 +1574,7 @@ def _minimax_tts_sync(job_id: str, text: str) -> str:
|
|||||||
"model": MINIMAX_TTS_MODEL,
|
"model": MINIMAX_TTS_MODEL,
|
||||||
"text": text.strip()[:9500],
|
"text": text.strip()[:9500],
|
||||||
"stream": False,
|
"stream": False,
|
||||||
"language_boost": "Chinese",
|
"language_boost": "English",
|
||||||
"output_format": "hex",
|
"output_format": "hex",
|
||||||
"voice_setting": {
|
"voice_setting": {
|
||||||
"voice_id": MINIMAX_TTS_VOICE_ID,
|
"voice_id": MINIMAX_TTS_VOICE_ID,
|
||||||
@@ -1651,6 +1661,7 @@ def pipeline_transcribe(job_id: str, manage_job_status: bool = True) -> None:
|
|||||||
])
|
])
|
||||||
if not wav.exists():
|
if not wav.exists():
|
||||||
raise RuntimeError("音频提取完成但找不到 audio.wav")
|
raise RuntimeError("音频提取完成但找不到 audio.wav")
|
||||||
|
update(job, source_audio_url=f"/jobs/{job_id}/audio.wav")
|
||||||
|
|
||||||
if not LLM_API_KEY:
|
if not LLM_API_KEY:
|
||||||
# 无 key 模式:mock 数据
|
# 无 key 模式:mock 数据
|
||||||
@@ -2112,7 +2123,7 @@ def get_job(job_id: str) -> Job:
|
|||||||
job = JOBS.get(job_id)
|
job = JOBS.get(job_id)
|
||||||
if not job:
|
if not job:
|
||||||
raise HTTPException(404, "job not found")
|
raise HTTPException(404, "job not found")
|
||||||
return job
|
return job_with_artifacts(job)
|
||||||
|
|
||||||
|
|
||||||
@app.delete("/jobs/{job_id}")
|
@app.delete("/jobs/{job_id}")
|
||||||
@@ -2153,7 +2164,7 @@ async def trigger_transcribe(job_id: str, bg: BackgroundTasks) -> Job:
|
|||||||
update(job, error="", audio_script=audio_payload)
|
update(job, error="", audio_script=audio_payload)
|
||||||
if not start_audio_processing(job_id, manage_job_status=manage_job_status):
|
if not start_audio_processing(job_id, manage_job_status=manage_job_status):
|
||||||
update(job, message="音频已在处理中")
|
update(job, message="音频已在处理中")
|
||||||
return job
|
return job_with_artifacts(job)
|
||||||
|
|
||||||
|
|
||||||
@app.get("/jobs/{job_id}/video.mp4")
|
@app.get("/jobs/{job_id}/video.mp4")
|
||||||
|
|||||||
@@ -100,6 +100,8 @@ export default function Home() {
|
|||||||
const [jobs, setJobs] = useState<Job[]>([])
|
const [jobs, setJobs] = useState<Job[]>([])
|
||||||
const [activeJobId, setActiveJobId] = useState<string | null>(null)
|
const [activeJobId, setActiveJobId] = useState<string | null>(null)
|
||||||
const job = useMemo(() => jobs.find((j) => j.id === activeJobId) ?? null, [jobs, activeJobId])
|
const job = useMemo(() => jobs.find((j) => j.id === activeJobId) ?? null, [jobs, activeJobId])
|
||||||
|
const [audioStripJobId, setAudioStripJobId] = useState<string | null>(null)
|
||||||
|
const audioStripJob = useMemo(() => jobs.find((j) => j.id === audioStripJobId) ?? null, [jobs, audioStripJobId])
|
||||||
const [submitting, setSubmitting] = useState(false)
|
const [submitting, setSubmitting] = useState(false)
|
||||||
const [analyzing, setAnalyzing] = useState(false)
|
const [analyzing, setAnalyzing] = useState(false)
|
||||||
const [frameTargets, setFrameTargets] = useState<Record<string, FrameExtractTarget>>({})
|
const [frameTargets, setFrameTargets] = useState<Record<string, FrameExtractTarget>>({})
|
||||||
@@ -159,6 +161,10 @@ export default function Home() {
|
|||||||
const handleSwitchJob = useCallback((id: string) => {
|
const handleSwitchJob = useCallback((id: string) => {
|
||||||
setActiveJobId(id)
|
setActiveJobId(id)
|
||||||
}, [])
|
}, [])
|
||||||
|
const handleOpenAudioStrip = useCallback((jobId?: string) => {
|
||||||
|
const targetId = jobId ?? activeJobId
|
||||||
|
if (targetId) setAudioStripJobId(targetId)
|
||||||
|
}, [activeJobId])
|
||||||
const pollRef = useRef<ReturnType<typeof setInterval> | null>(null)
|
const pollRef = useRef<ReturnType<typeof setInterval> | null>(null)
|
||||||
|
|
||||||
const handleSubmit = useCallback(async (url: string) => {
|
const handleSubmit = useCallback(async (url: string) => {
|
||||||
@@ -393,16 +399,13 @@ export default function Home() {
|
|||||||
const handleTranscribeAudio = useCallback(async (jobId?: string, options?: { silent?: boolean }) => {
|
const handleTranscribeAudio = useCallback(async (jobId?: string, options?: { silent?: boolean }) => {
|
||||||
const targetId = jobId ?? activeJobId
|
const targetId = jobId ?? activeJobId
|
||||||
if (!targetId) return
|
if (!targetId) return
|
||||||
|
setAudioStripJobId(targetId)
|
||||||
const target = jobs.find((item) => item.id === targetId)
|
const target = jobs.find((item) => item.id === targetId)
|
||||||
if (!target) return
|
if (!target) return
|
||||||
if (!target.video_url) {
|
if (!target.video_url) {
|
||||||
if (!options?.silent) toast.info("视频导入完成后,可在音频卡片点击提取音频")
|
if (!options?.silent) toast.info("视频导入完成后,可在音频卡片点击提取音频")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if (target.status === "splitting") {
|
|
||||||
if (!options?.silent) toast.info("当前正在抽帧,结束后可重新点击提取音频")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if (target.status === "transcribing" || target.audio_script?.status === "rewriting") {
|
if (target.status === "transcribing" || target.audio_script?.status === "rewriting") {
|
||||||
if (!options?.silent) toast.info("音频正在处理中")
|
if (!options?.silent) toast.info("音频正在处理中")
|
||||||
return
|
return
|
||||||
@@ -728,9 +731,10 @@ export default function Home() {
|
|||||||
onCopyImage: handleCopyImage,
|
onCopyImage: handleCopyImage,
|
||||||
onGenerateProductFusionVideo: handleGenerateProductFusionVideo,
|
onGenerateProductFusionVideo: handleGenerateProductFusionVideo,
|
||||||
onTranscribeAudio: handleTranscribeAudio,
|
onTranscribeAudio: handleTranscribeAudio,
|
||||||
|
onOpenAudioStrip: handleOpenAudioStrip,
|
||||||
pinnedNodes,
|
pinnedNodes,
|
||||||
onToggleNodePin: handleToggleNodePin,
|
onToggleNodePin: handleToggleNodePin,
|
||||||
}), [job, jobs, activeJobId, submitting, analyzing, frameTargets, frameCounts, frameQualities, selectedFrames, expandedFrame, framePanelScale, framePanelPinned, framePanelDock, videoPanelJobId, videoPanelScale, videoPanelDock, handleSubmit, handleUpload, handleAnalyze, handleAnalyzeJob, handleFrameTargetChange, handleFrameCountChange, handleFrameQualityChange, handleToggleFrame, handleOpenFramePanel, handleFramePanelScaleChange, handleCloseExpandedFrame, handleAddManualFrame, handleAddManualFrameForJob, handleOpenVideoPanel, handleVideoPanelScaleChange, handleSwitchJob, updateJobInList, handleDeleteJob, handleDeleteFrame, handleDeleteFrameForJob, handleDeleteGenerated, handleDeleteVideo, handleDeleteCutout, handleOpenStoryboard, handleOpenWorkbench, clipboard, handleCopyImage, handleGenerateProductFusionVideo, handleTranscribeAudio, pinnedNodes, handleToggleNodePin])
|
}), [job, jobs, activeJobId, submitting, analyzing, frameTargets, frameCounts, frameQualities, selectedFrames, expandedFrame, framePanelScale, framePanelPinned, framePanelDock, videoPanelJobId, videoPanelScale, videoPanelDock, handleSubmit, handleUpload, handleAnalyze, handleAnalyzeJob, handleFrameTargetChange, handleFrameCountChange, handleFrameQualityChange, handleToggleFrame, handleOpenFramePanel, handleFramePanelScaleChange, handleCloseExpandedFrame, handleAddManualFrame, handleAddManualFrameForJob, handleOpenVideoPanel, handleVideoPanelScaleChange, handleSwitchJob, updateJobInList, handleDeleteJob, handleDeleteFrame, handleDeleteFrameForJob, handleDeleteGenerated, handleDeleteVideo, handleDeleteCutout, handleOpenStoryboard, handleOpenWorkbench, clipboard, handleCopyImage, handleGenerateProductFusionVideo, handleTranscribeAudio, handleOpenAudioStrip, pinnedNodes, handleToggleNodePin])
|
||||||
|
|
||||||
// 用 useNodesState 让 ReactFlow 自己管位置(避免轮询时重置 drag)
|
// 用 useNodesState 让 ReactFlow 自己管位置(避免轮询时重置 drag)
|
||||||
const savedSizes = useMemo(() => loadNodeSizes(), [])
|
const savedSizes = useMemo(() => loadNodeSizes(), [])
|
||||||
@@ -1013,7 +1017,7 @@ export default function Home() {
|
|||||||
<div className="h-full w-full" suppressHydrationWarning />
|
<div className="h-full w-full" suppressHydrationWarning />
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
{clientReady && <AudioStrip job={job} />}
|
{clientReady && <AudioStrip job={audioStripJob} open={!!audioStripJob} onClose={() => setAudioStripJobId(null)} />}
|
||||||
</section>
|
</section>
|
||||||
|
|
||||||
<Toaster theme="system" position="top-center" />
|
<Toaster theme="system" position="top-center" />
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
"use client"
|
"use client"
|
||||||
|
|
||||||
import { useEffect, useMemo, useRef, useState, type PointerEvent as ReactPointerEvent } from "react"
|
import { useEffect, useMemo, useRef, useState, type PointerEvent as ReactPointerEvent } from "react"
|
||||||
import { ChevronDown, ChevronUp, GripHorizontal, Mic2, Volume2 } from "lucide-react"
|
import { ChevronDown, ChevronUp, GripHorizontal, Mic2, Volume2, X } from "lucide-react"
|
||||||
import { apiAssetUrl, sourceAudioUrl, type Job, type TranscriptSegment } from "@/lib/api"
|
import { apiAssetUrl, sourceAudioUrl, type Job, type TranscriptSegment } from "@/lib/api"
|
||||||
|
|
||||||
const STORAGE_KEY = "skg.audio-strip.height"
|
const STORAGE_KEY = "skg.audio-strip.height"
|
||||||
@@ -58,20 +58,34 @@ function SegmentCard({
|
|||||||
segment,
|
segment,
|
||||||
peaks,
|
peaks,
|
||||||
duration,
|
duration,
|
||||||
|
currentTime,
|
||||||
}: {
|
}: {
|
||||||
segment: TranscriptSegment
|
segment: TranscriptSegment
|
||||||
peaks: number[]
|
peaks: number[]
|
||||||
duration: number
|
duration: number
|
||||||
|
currentTime: number
|
||||||
}) {
|
}) {
|
||||||
const segDuration = Math.max(1.2, segment.end - segment.start)
|
const segDuration = Math.max(1.2, segment.end - segment.start)
|
||||||
const width = clamp(180 + segDuration * 42, 220, 520)
|
const width = clamp(180 + segDuration * 42, 220, 520)
|
||||||
const segPeaks = slicePeaks(peaks, segment.start, segment.end, duration)
|
const segPeaks = slicePeaks(peaks, segment.start, segment.end, duration)
|
||||||
|
const active = currentTime >= segment.start && currentTime <= Math.max(segment.end, segment.start + 0.2)
|
||||||
|
const pointerPct = active ? clamp(((currentTime - segment.start) / Math.max(0.2, segment.end - segment.start)) * 100, 0, 100) : 0
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<article
|
<article
|
||||||
className="shrink-0 rounded-lg border border-white/10 bg-white/[0.045] p-3 shadow-[0_12px_30px_-22px_rgba(0,0,0,0.8)]"
|
className={`relative shrink-0 overflow-hidden rounded-lg border p-3 shadow-[0_12px_30px_-22px_rgba(0,0,0,0.8)] transition ${
|
||||||
|
active
|
||||||
|
? "border-emerald-300/55 bg-emerald-300/[0.105]"
|
||||||
|
: "border-white/10 bg-white/[0.045]"
|
||||||
|
}`}
|
||||||
style={{ width }}
|
style={{ width }}
|
||||||
>
|
>
|
||||||
|
{active && (
|
||||||
|
<div
|
||||||
|
className="pointer-events-none absolute inset-y-0 z-10 w-[2px] bg-emerald-200 shadow-[0_0_18px_rgba(110,231,183,0.9)]"
|
||||||
|
style={{ left: `${pointerPct}%` }}
|
||||||
|
/>
|
||||||
|
)}
|
||||||
<div className="mb-2 flex items-center justify-between gap-3">
|
<div className="mb-2 flex items-center justify-between gap-3">
|
||||||
<span className="font-mono text-[10px] text-[var(--text-faint)]">
|
<span className="font-mono text-[10px] text-[var(--text-faint)]">
|
||||||
{segment.start.toFixed(1)}s to {segment.end.toFixed(1)}s
|
{segment.start.toFixed(1)}s to {segment.end.toFixed(1)}s
|
||||||
@@ -93,7 +107,7 @@ function SegmentCard({
|
|||||||
{segment.zh || <span className="text-[var(--text-faint)] italic">翻译中...</span>}
|
{segment.zh || <span className="text-[var(--text-faint)] italic">翻译中...</span>}
|
||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
<Waveform peaks={segPeaks} />
|
<Waveform peaks={segPeaks} active={active} />
|
||||||
</div>
|
</div>
|
||||||
</article>
|
</article>
|
||||||
)
|
)
|
||||||
@@ -126,19 +140,33 @@ async function decodeWaveform(url: string, targetPeaks = 1800) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export function AudioStrip({ job }: { job: Job | null }) {
|
export function AudioStrip({ job, open, onClose }: { job: Job | null; open: boolean; onClose?: () => void }) {
|
||||||
const [collapsed, setCollapsed] = useState(false)
|
const [collapsed, setCollapsed] = useState(false)
|
||||||
const [height, setHeight] = useState(DEFAULT_HEIGHT)
|
const [height, setHeight] = useState(DEFAULT_HEIGHT)
|
||||||
const [peaks, setPeaks] = useState<number[]>([])
|
const [peaks, setPeaks] = useState<number[]>([])
|
||||||
|
const [sourceReady, setSourceReady] = useState(false)
|
||||||
|
const [audioKey, setAudioKey] = useState(0)
|
||||||
|
const [currentTime, setCurrentTime] = useState(0)
|
||||||
const dragRef = useRef<{ startY: number; startHeight: number } | null>(null)
|
const dragRef = useRef<{ startY: number; startHeight: number } | null>(null)
|
||||||
|
const audioRef = useRef<HTMLAudioElement>(null)
|
||||||
const transcript = job?.transcript ?? []
|
const transcript = job?.transcript ?? []
|
||||||
const audioScript = job?.audio_script
|
const audioScript = job?.audio_script
|
||||||
const voiceUrl = apiAssetUrl(audioScript?.voice_url)
|
const voiceUrl = apiAssetUrl(audioScript?.voice_url)
|
||||||
const hasAudio = !!job && (transcript.length > 0 || !!audioScript?.rewritten_text || job.status === "transcribing")
|
const sourceUrl = job ? apiAssetUrl(job.source_audio_url || sourceAudioUrl(job.id)) : ""
|
||||||
|
const processing = !!job && (job.status === "transcribing" || audioScript?.status === "rewriting")
|
||||||
|
const activeSegment = transcript.find((segment) => currentTime >= segment.start && currentTime <= Math.max(segment.end, segment.start + 0.2))
|
||||||
const duration = useMemo(() => {
|
const duration = useMemo(() => {
|
||||||
const lastTranscriptEnd = transcript.reduce((max, s) => Math.max(max, s.end || 0), 0)
|
const lastTranscriptEnd = transcript.reduce((max, s) => Math.max(max, s.end || 0), 0)
|
||||||
return Math.max(job?.duration ?? 0, lastTranscriptEnd, 1)
|
const audioDuration = audioRef.current?.duration
|
||||||
|
return Math.max(
|
||||||
|
Number.isFinite(audioDuration) ? Number(audioDuration) : 0,
|
||||||
|
job?.duration ?? 0,
|
||||||
|
lastTranscriptEnd,
|
||||||
|
1,
|
||||||
|
)
|
||||||
}, [job?.duration, transcript])
|
}, [job?.duration, transcript])
|
||||||
|
const timelinePeaks = useMemo(() => slicePeaks(peaks, 0, duration, duration, 160), [duration, peaks])
|
||||||
|
const pointerPct = clamp((currentTime / duration) * 100, 0, 100)
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (typeof window === "undefined") return
|
if (typeof window === "undefined") return
|
||||||
@@ -148,21 +176,38 @@ export function AudioStrip({ job }: { job: Job | null }) {
|
|||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
let cancelled = false
|
let cancelled = false
|
||||||
|
let timer: ReturnType<typeof setTimeout> | null = null
|
||||||
|
let attempts = 0
|
||||||
setPeaks([])
|
setPeaks([])
|
||||||
if (!job?.id || !hasAudio) return
|
setSourceReady(false)
|
||||||
decodeWaveform(sourceAudioUrl(job.id))
|
setCurrentTime(0)
|
||||||
|
if (!job?.id || !open) return
|
||||||
|
setPeaks(fallbackPeaks(1800, `${job.id}-loading`))
|
||||||
|
const load = () => {
|
||||||
|
attempts += 1
|
||||||
|
decodeWaveform(sourceUrl)
|
||||||
.then((next) => {
|
.then((next) => {
|
||||||
if (!cancelled) setPeaks(next)
|
if (cancelled) return
|
||||||
|
setPeaks(next)
|
||||||
|
setSourceReady(true)
|
||||||
|
setAudioKey((key) => key + 1)
|
||||||
})
|
})
|
||||||
.catch(() => {
|
.catch(() => {
|
||||||
if (!cancelled) setPeaks(fallbackPeaks(1800, `${job.id}-${transcript.length}`))
|
if (cancelled) return
|
||||||
|
setSourceReady(false)
|
||||||
|
if (attempts < (processing ? 45 : 6)) {
|
||||||
|
timer = setTimeout(load, 1000)
|
||||||
|
}
|
||||||
})
|
})
|
||||||
|
}
|
||||||
|
load()
|
||||||
return () => {
|
return () => {
|
||||||
cancelled = true
|
cancelled = true
|
||||||
|
if (timer) clearTimeout(timer)
|
||||||
}
|
}
|
||||||
}, [job?.id, hasAudio, transcript.length])
|
}, [job?.id, open, processing, sourceUrl, transcript.length])
|
||||||
|
|
||||||
if (!hasAudio || !job) return null
|
if (!open || !job) return null
|
||||||
|
|
||||||
const startDrag = (e: ReactPointerEvent<HTMLDivElement>) => {
|
const startDrag = (e: ReactPointerEvent<HTMLDivElement>) => {
|
||||||
e.preventDefault()
|
e.preventDefault()
|
||||||
@@ -206,7 +251,7 @@ export function AudioStrip({ job }: { job: Job | null }) {
|
|||||||
{voiceUrl && (
|
{voiceUrl && (
|
||||||
<div className="hidden items-center gap-1.5 text-[10px] text-emerald-200/80 sm:flex">
|
<div className="hidden items-center gap-1.5 text-[10px] text-emerald-200/80 sm:flex">
|
||||||
<Volume2 className="h-3.5 w-3.5" />
|
<Volume2 className="h-3.5 w-3.5" />
|
||||||
MiniMax ready
|
English VO ready
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
<button
|
<button
|
||||||
@@ -217,27 +262,79 @@ export function AudioStrip({ job }: { job: Job | null }) {
|
|||||||
>
|
>
|
||||||
{collapsed ? <ChevronUp className="h-3.5 w-3.5" /> : <ChevronDown className="h-3.5 w-3.5" />}
|
{collapsed ? <ChevronUp className="h-3.5 w-3.5" /> : <ChevronDown className="h-3.5 w-3.5" />}
|
||||||
</button>
|
</button>
|
||||||
|
{onClose && (
|
||||||
|
<button
|
||||||
|
type="button"
|
||||||
|
onClick={onClose}
|
||||||
|
className="inline-flex h-6 w-6 items-center justify-center rounded-md border border-white/10 text-white/65 transition hover:bg-white/10 hover:text-white"
|
||||||
|
title="关闭音频条"
|
||||||
|
>
|
||||||
|
<X className="h-3.5 w-3.5" />
|
||||||
|
</button>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
{!collapsed && (
|
{!collapsed && (
|
||||||
<div className="grid h-[calc(100%-48px)] grid-cols-[minmax(0,1fr)_300px] gap-3 p-3 max-lg:grid-cols-1">
|
<div className="grid h-[calc(100%-48px)] grid-cols-[minmax(0,1fr)_300px] gap-3 p-3 max-lg:grid-cols-1">
|
||||||
<div className="min-w-0 overflow-x-auto overflow-y-hidden pb-1">
|
<div className="flex min-w-0 min-h-0 flex-col gap-3 overflow-hidden">
|
||||||
{transcript.length > 0 ? (
|
<div className="rounded-lg border border-white/10 bg-black/20 p-2">
|
||||||
<div className="flex h-full items-stretch gap-3">
|
<div className="mb-2 flex items-center justify-between gap-3">
|
||||||
{transcript.map((segment) => (
|
<div className="min-w-0 text-[10px] uppercase tracking-widest text-white/45">
|
||||||
<SegmentCard key={segment.index} segment={segment} peaks={peaks} duration={duration} />
|
Source audio playback
|
||||||
))}
|
{activeSegment ? <span className="ml-2 text-emerald-200/80">#{activeSegment.index + 1}</span> : null}
|
||||||
|
</div>
|
||||||
|
<div className="shrink-0 font-mono text-[10px] text-white/45">
|
||||||
|
{currentTime.toFixed(1)}s / {duration.toFixed(1)}s
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
{sourceReady ? (
|
||||||
|
<audio
|
||||||
|
key={audioKey}
|
||||||
|
ref={audioRef}
|
||||||
|
controls
|
||||||
|
src={sourceUrl}
|
||||||
|
className="h-8 w-full"
|
||||||
|
onTimeUpdate={(event) => setCurrentTime(event.currentTarget.currentTime)}
|
||||||
|
onSeeked={(event) => setCurrentTime(event.currentTarget.currentTime)}
|
||||||
|
onLoadedMetadata={(event) => setCurrentTime(event.currentTarget.currentTime)}
|
||||||
|
/>
|
||||||
|
) : (
|
||||||
|
<div className="flex h-8 items-center rounded-md border border-dashed border-white/12 px-3 text-[11px] text-white/45">
|
||||||
|
{processing ? "正在提取原音频并准备波形..." : "等待原音频波形..."}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
<div className="relative mt-2">
|
||||||
|
<Waveform peaks={timelinePeaks} active={sourceReady} />
|
||||||
|
<div
|
||||||
|
className="pointer-events-none absolute inset-y-0 w-[2px] bg-emerald-200 shadow-[0_0_18px_rgba(110,231,183,0.9)]"
|
||||||
|
style={{ left: `${pointerPct}%` }}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div className="min-h-0 overflow-x-auto overflow-y-hidden pb-1">
|
||||||
|
{transcript.length > 0 ? (
|
||||||
|
<div className="flex h-full items-stretch gap-3">
|
||||||
|
{transcript.map((segment) => (
|
||||||
|
<SegmentCard
|
||||||
|
key={segment.index}
|
||||||
|
segment={segment}
|
||||||
|
peaks={peaks}
|
||||||
|
duration={duration}
|
||||||
|
currentTime={currentTime}
|
||||||
|
/>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
) : (
|
) : (
|
||||||
<div className="flex h-full items-center justify-center rounded-lg border border-dashed border-white/12 text-[12px] text-white/45">
|
<div className="flex h-full items-center justify-center rounded-lg border border-dashed border-white/12 text-[12px] text-white/45">
|
||||||
音频识别完成后,这里会按时间显示英文、中文翻译和对应波形。
|
点击音频卡片后开始解析;完成后这里会按时间显示英文、中文翻译和对应波形。
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div className="min-h-0 overflow-y-auto rounded-lg border border-emerald-300/20 bg-emerald-300/[0.07] p-3 max-lg:hidden">
|
<div className="min-h-0 overflow-y-auto rounded-lg border border-emerald-300/20 bg-emerald-300/[0.07] p-3 max-lg:hidden">
|
||||||
<div className="mb-2 text-[10px] uppercase tracking-widest text-emerald-100/70">改后 · SKG 口播</div>
|
<div className="mb-2 text-[10px] uppercase tracking-widest text-emerald-100/70">English VO · SKG rewrite</div>
|
||||||
<p className="text-[12.5px] leading-relaxed text-white/90">
|
<p className="text-[12.5px] leading-relaxed text-white/90">
|
||||||
{audioScript?.rewritten_text || "等待转录完成后生成适合 SKG 产品视频的口播文案。"}
|
{audioScript?.rewritten_text || "Waiting for the parsed transcript to become an English SKG voice-over."}
|
||||||
</p>
|
</p>
|
||||||
{voiceUrl && (
|
{voiceUrl && (
|
||||||
<audio controls src={voiceUrl} className="mt-3 h-8 w-full" />
|
<audio controls src={voiceUrl} className="mt-3 h-8 w-full" />
|
||||||
|
|||||||
@@ -76,6 +76,7 @@ export interface NodeData {
|
|||||||
onCopyImage?: (ref: ImageRef) => void // 复制图片到全局剪贴板(粘贴到分镜头编排插槽)
|
onCopyImage?: (ref: ImageRef) => void // 复制图片到全局剪贴板(粘贴到分镜头编排插槽)
|
||||||
onGenerateProductFusionVideo?: (frameIdx: number, shot: ProductFusionShot) => Promise<void> | void
|
onGenerateProductFusionVideo?: (frameIdx: number, shot: ProductFusionShot) => Promise<void> | void
|
||||||
onTranscribeAudio?: (jobId?: string) => Promise<void> | void
|
onTranscribeAudio?: (jobId?: string) => Promise<void> | void
|
||||||
|
onOpenAudioStrip?: (jobId?: string) => void
|
||||||
pinnedNodes?: Set<string> // 已钉住的节点 id 集合 — 钉住后位置 + 尺寸锁定
|
pinnedNodes?: Set<string> // 已钉住的节点 id 集合 — 钉住后位置 + 尺寸锁定
|
||||||
onToggleNodePin?: (id: string) => void
|
onToggleNodePin?: (id: string) => void
|
||||||
}
|
}
|
||||||
@@ -2141,7 +2142,12 @@ export function AudioNode({ data, selected }: any) {
|
|||||||
pinned={d.pinnedNodes?.has("audio")}
|
pinned={d.pinnedNodes?.has("audio")}
|
||||||
onTogglePin={() => d.onToggleNodePin?.("audio")}
|
onTogglePin={() => d.onToggleNodePin?.("audio")}
|
||||||
>
|
>
|
||||||
<div className="space-y-2 text-[11px] text-[var(--text-soft)] leading-snug">
|
<div
|
||||||
|
className="space-y-2 text-[11px] text-[var(--text-soft)] leading-snug"
|
||||||
|
onClick={() => {
|
||||||
|
if (job?.video_url) d.onOpenAudioStrip?.(job.id)
|
||||||
|
}}
|
||||||
|
>
|
||||||
<div>
|
<div>
|
||||||
音轨 → ASR 转录 → 英中翻译 → SKG 口播改写 → MiniMax 配音<br />
|
音轨 → ASR 转录 → 英中翻译 → SKG 口播改写 → MiniMax 配音<br />
|
||||||
<span className="text-[var(--text-faint)] font-mono">
|
<span className="text-[var(--text-faint)] font-mono">
|
||||||
@@ -2154,6 +2160,7 @@ export function AudioNode({ data, selected }: any) {
|
|||||||
disabled={audioButtonDisabled}
|
disabled={audioButtonDisabled}
|
||||||
onClick={(e) => {
|
onClick={(e) => {
|
||||||
e.stopPropagation()
|
e.stopPropagation()
|
||||||
|
d.onOpenAudioStrip?.(job.id)
|
||||||
if (audioButtonDisabled) return
|
if (audioButtonDisabled) return
|
||||||
void d.onTranscribeAudio?.(job.id)
|
void d.onTranscribeAudio?.(job.id)
|
||||||
}}
|
}}
|
||||||
@@ -2185,9 +2192,7 @@ export function AudioNode({ data, selected }: any) {
|
|||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
{voiceUrl && (
|
{voiceUrl && <div className="text-[10.5px] text-emerald-200/85">MiniMax English voice ready · 底部音频条播放</div>}
|
||||||
<audio controls src={voiceUrl} className="h-7 w-full" />
|
|
||||||
)}
|
|
||||||
{isRewriting && (
|
{isRewriting && (
|
||||||
<div className="text-[10.5px] text-[var(--text-faint)]">正在生成改写文案和配音…</div>
|
<div className="text-[10.5px] text-[var(--text-faint)]">正在生成改写文案和配音…</div>
|
||||||
)}
|
)}
|
||||||
|
|||||||
@@ -331,6 +331,7 @@ export interface Job {
|
|||||||
duration?: number
|
duration?: number
|
||||||
width?: number
|
width?: number
|
||||||
height?: number
|
height?: number
|
||||||
|
source_audio_url?: string
|
||||||
frames: KeyFrame[]
|
frames: KeyFrame[]
|
||||||
transcript: TranscriptSegment[]
|
transcript: TranscriptSegment[]
|
||||||
audio_script?: AudioScript
|
audio_script?: AudioScript
|
||||||
|
|||||||
Reference in New Issue
Block a user