auto-save 2026-05-14 11:47 (~7)

This commit is contained in:
2026-05-14 11:47:40 +08:00
parent b474d804c8
commit ba491c0c5a
7 changed files with 187 additions and 70 deletions

View File

@@ -19,7 +19,7 @@ AUDIO_PRODUCT_BRIEF="SKG 智能按摩产品,主打日常肩颈、腰背、眼
MINIMAX_API_KEY=
MINIMAX_TTS_BASE_URL=https://api.minimax.io
MINIMAX_TTS_MODEL=speech-2.8-turbo
MINIMAX_TTS_VOICE_ID="Chinese (Mandarin)_Reliable_Executive"
MINIMAX_TTS_VOICE_ID=English_expressive_narrator
# Poe 视频 API优先用于 Seedance / Kling / Veo
POE_API_BASE_URL=https://api.poe.com/v1

View File

@@ -49,8 +49,8 @@ MINIMAX_TTS_BASE_URL = os.getenv("MINIMAX_TTS_BASE_URL", "https://api.minimax.io
MINIMAX_TTS_MODEL = os.getenv("MINIMAX_TTS_MODEL", "speech-2.8-turbo").strip() or "speech-2.8-turbo"
MINIMAX_TTS_VOICE_ID = os.getenv(
"MINIMAX_TTS_VOICE_ID",
"Chinese (Mandarin)_Reliable_Executive",
).strip() or "Chinese (Mandarin)_Reliable_Executive"
"English_expressive_narrator",
).strip() or "English_expressive_narrator"
POE_API_BASE_URL = os.getenv("POE_API_BASE_URL", "https://api.poe.com/v1").strip() or "https://api.poe.com/v1"
POE_API_KEY = os.getenv("POE_API_KEY", "").strip()
@@ -379,6 +379,7 @@ class Job(BaseModel):
duration: float = 0.0
width: int = 0
height: int = 0
source_audio_url: str = ""
frames: list[KeyFrame] = Field(default_factory=list)
transcript: list[TranscriptSegment] = Field(default_factory=list)
audio_script: AudioScript = Field(default_factory=AudioScript)
@@ -400,6 +401,14 @@ def job_dir(job_id: str) -> Path:
return d
def source_audio_url_for(job_id: str) -> str:
return f"/jobs/{job_id}/audio.wav" if (JOBS_DIR / job_id / "audio.wav").exists() else ""
def job_with_artifacts(job: Job) -> Job:
return job.model_copy(update={"source_audio_url": source_audio_url_for(job.id)})
def save_state(job: Job) -> None:
(job_dir(job.id) / "state.json").write_text(job.model_dump_json(indent=2))
@@ -1224,7 +1233,7 @@ def pipeline_analyze(
wav = d / "audio.wav"
if wav.exists():
update(job, status="splitting", message="复用音轨 · 准备抽帧…", progress=35)
update(job, status="splitting", message="复用音轨 · 准备抽帧…", progress=35, source_audio_url=f"/jobs/{job_id}/audio.wav")
else:
update(job, status="splitting", message="ffmpeg 拆分音轨…", progress=35)
run([
@@ -1232,6 +1241,7 @@ def pipeline_analyze(
"-vn", "-ac", "1", "-ar", "16000", "-c:a", "pcm_s16le",
str(wav),
])
update(job, source_audio_url=f"/jobs/{job_id}/audio.wav")
n = max(1, min(int(frame_count), 20))
target_label = FRAME_TARGET_LABELS.get(target, FRAME_TARGET_LABELS["balanced"])
duration = max(float(job.duration or 1.0), 0.1)
@@ -1497,12 +1507,12 @@ def _transcript_join(segments: list[TranscriptSegment], field: Literal["en", "zh
def _fallback_audio_script(segments: list[TranscriptSegment]) -> str:
joined = " ".join((s.zh or s.en).strip() for s in segments if (s.zh or s.en).strip())
joined = " ".join((s.en or s.zh).strip() for s in segments if (s.en or s.zh).strip())
if not joined:
return "日常疲惫不用硬扛。戴上 SKG让肩颈慢慢放松跟着呼吸找回轻松状态。"
return "Ease into the moment with SKG. Gentle warmth and rhythmic massage help everyday tension feel lighter, cleaner, and easier to leave behind."
return (
"把日常紧绷交给 SKG。贴合身体需要放松的位置热敷与按摩节奏自然陪伴"
"让每一次短暂休息都更轻松、更有质感。"
"Let SKG turn a short break into real relief. With soothing warmth and steady massage rhythm, "
"everyday tension feels lighter, calmer, and easier to leave behind."
)
@@ -1513,24 +1523,24 @@ def _rewrite_audio_script_sync(segments: list[TranscriptSegment]) -> tuple[str,
source_text = _transcript_join(segments, "en")
source_zh = _transcript_join(segments, "zh")
prompt = (
"你是 SKG 短视频口播编导。根据参考视频音频转写,抽取它的表达结构、情绪节奏和可复用卖点,"
"改写成适合 SKG 按摩/放松产品二创视频的中文口播文案。\n"
"要求:\n"
"1. 输出 35-90 个中文字,适合 8-18 秒短视频配音。\n"
"2. 口语化、干净、高级,能直接给 TTS 朗读。\n"
"3. 不承诺治疗、治愈、医学疗效,不夸大。\n"
"4. 不复刻原视频品牌/人物/价格/平台话术,只保留表达结构。\n"
"5. 如果参考转写信息不足,按产品信息生成通用 SKG 放松口播。\n"
'严格返回 JSON{"rewritten_text":"..."}\n\n'
f"SKG 产品信息:{AUDIO_PRODUCT_BRIEF}\n\n"
f"英文转写:\n{source_text or ''}\n\n"
f"中文翻译:\n{source_zh or ''}"
"You are an English short-video voice-over writer for SKG wellness massagers. "
"Use the source transcript only for structure, pacing, and emotional hook, then rewrite it into a clean English VO for SKG.\n"
"Rules:\n"
"1. Output 28-55 English words, suitable for an 8-18 second TTS voice-over.\n"
"2. Make it natural, premium, concise, and ready to read aloud.\n"
"3. Do not claim medical treatment, cure, pain elimination, or clinical effects.\n"
"4. Do not copy the original brand, creator, price, platform language, or exact claims.\n"
"5. If the source transcript is too thin, write a general SKG relaxation VO.\n"
'Return strict JSON only: {"rewritten_text":"..."}.\n\n'
f"SKG product context: {AUDIO_PRODUCT_BRIEF}\n\n"
f"English transcript:\n{source_text or 'None'}\n\n"
f"Chinese translation for reference:\n{source_zh or 'None'}"
)
try:
resp = llm().chat.completions.create(
model=AUDIO_REWRITE_MODEL,
messages=[
{"role": "system", "content": "只输出合法 JSON不要解释不要 markdown"},
{"role": "system", "content": "Return valid JSON only. No explanation. No markdown."},
{"role": "user", "content": prompt},
],
response_format={"type": "json_object"},
@@ -1564,7 +1574,7 @@ def _minimax_tts_sync(job_id: str, text: str) -> str:
"model": MINIMAX_TTS_MODEL,
"text": text.strip()[:9500],
"stream": False,
"language_boost": "Chinese",
"language_boost": "English",
"output_format": "hex",
"voice_setting": {
"voice_id": MINIMAX_TTS_VOICE_ID,
@@ -1651,6 +1661,7 @@ def pipeline_transcribe(job_id: str, manage_job_status: bool = True) -> None:
])
if not wav.exists():
raise RuntimeError("音频提取完成但找不到 audio.wav")
update(job, source_audio_url=f"/jobs/{job_id}/audio.wav")
if not LLM_API_KEY:
# 无 key 模式mock 数据
@@ -2112,7 +2123,7 @@ def get_job(job_id: str) -> Job:
job = JOBS.get(job_id)
if not job:
raise HTTPException(404, "job not found")
return job
return job_with_artifacts(job)
@app.delete("/jobs/{job_id}")
@@ -2153,7 +2164,7 @@ async def trigger_transcribe(job_id: str, bg: BackgroundTasks) -> Job:
update(job, error="", audio_script=audio_payload)
if not start_audio_processing(job_id, manage_job_status=manage_job_status):
update(job, message="音频已在处理中")
return job
return job_with_artifacts(job)
@app.get("/jobs/{job_id}/video.mp4")