auto-save 2026-05-14 11:47 (~7)
This commit is contained in:
@@ -19,7 +19,7 @@ AUDIO_PRODUCT_BRIEF="SKG 智能按摩产品,主打日常肩颈、腰背、眼
|
||||
MINIMAX_API_KEY=
|
||||
MINIMAX_TTS_BASE_URL=https://api.minimax.io
|
||||
MINIMAX_TTS_MODEL=speech-2.8-turbo
|
||||
MINIMAX_TTS_VOICE_ID="Chinese (Mandarin)_Reliable_Executive"
|
||||
MINIMAX_TTS_VOICE_ID=English_expressive_narrator
|
||||
|
||||
# Poe 视频 API(优先用于 Seedance / Kling / Veo)
|
||||
POE_API_BASE_URL=https://api.poe.com/v1
|
||||
|
||||
57
api/main.py
57
api/main.py
@@ -49,8 +49,8 @@ MINIMAX_TTS_BASE_URL = os.getenv("MINIMAX_TTS_BASE_URL", "https://api.minimax.io
|
||||
MINIMAX_TTS_MODEL = os.getenv("MINIMAX_TTS_MODEL", "speech-2.8-turbo").strip() or "speech-2.8-turbo"
|
||||
MINIMAX_TTS_VOICE_ID = os.getenv(
|
||||
"MINIMAX_TTS_VOICE_ID",
|
||||
"Chinese (Mandarin)_Reliable_Executive",
|
||||
).strip() or "Chinese (Mandarin)_Reliable_Executive"
|
||||
"English_expressive_narrator",
|
||||
).strip() or "English_expressive_narrator"
|
||||
|
||||
POE_API_BASE_URL = os.getenv("POE_API_BASE_URL", "https://api.poe.com/v1").strip() or "https://api.poe.com/v1"
|
||||
POE_API_KEY = os.getenv("POE_API_KEY", "").strip()
|
||||
@@ -379,6 +379,7 @@ class Job(BaseModel):
|
||||
duration: float = 0.0
|
||||
width: int = 0
|
||||
height: int = 0
|
||||
source_audio_url: str = ""
|
||||
frames: list[KeyFrame] = Field(default_factory=list)
|
||||
transcript: list[TranscriptSegment] = Field(default_factory=list)
|
||||
audio_script: AudioScript = Field(default_factory=AudioScript)
|
||||
@@ -400,6 +401,14 @@ def job_dir(job_id: str) -> Path:
|
||||
return d
|
||||
|
||||
|
||||
def source_audio_url_for(job_id: str) -> str:
|
||||
return f"/jobs/{job_id}/audio.wav" if (JOBS_DIR / job_id / "audio.wav").exists() else ""
|
||||
|
||||
|
||||
def job_with_artifacts(job: Job) -> Job:
|
||||
return job.model_copy(update={"source_audio_url": source_audio_url_for(job.id)})
|
||||
|
||||
|
||||
def save_state(job: Job) -> None:
|
||||
(job_dir(job.id) / "state.json").write_text(job.model_dump_json(indent=2))
|
||||
|
||||
@@ -1224,7 +1233,7 @@ def pipeline_analyze(
|
||||
|
||||
wav = d / "audio.wav"
|
||||
if wav.exists():
|
||||
update(job, status="splitting", message="复用音轨 · 准备抽帧…", progress=35)
|
||||
update(job, status="splitting", message="复用音轨 · 准备抽帧…", progress=35, source_audio_url=f"/jobs/{job_id}/audio.wav")
|
||||
else:
|
||||
update(job, status="splitting", message="ffmpeg 拆分音轨…", progress=35)
|
||||
run([
|
||||
@@ -1232,6 +1241,7 @@ def pipeline_analyze(
|
||||
"-vn", "-ac", "1", "-ar", "16000", "-c:a", "pcm_s16le",
|
||||
str(wav),
|
||||
])
|
||||
update(job, source_audio_url=f"/jobs/{job_id}/audio.wav")
|
||||
n = max(1, min(int(frame_count), 20))
|
||||
target_label = FRAME_TARGET_LABELS.get(target, FRAME_TARGET_LABELS["balanced"])
|
||||
duration = max(float(job.duration or 1.0), 0.1)
|
||||
@@ -1497,12 +1507,12 @@ def _transcript_join(segments: list[TranscriptSegment], field: Literal["en", "zh
|
||||
|
||||
|
||||
def _fallback_audio_script(segments: list[TranscriptSegment]) -> str:
|
||||
joined = " ".join((s.zh or s.en).strip() for s in segments if (s.zh or s.en).strip())
|
||||
joined = " ".join((s.en or s.zh).strip() for s in segments if (s.en or s.zh).strip())
|
||||
if not joined:
|
||||
return "日常疲惫不用硬扛。戴上 SKG,让肩颈慢慢放松,跟着呼吸找回轻松状态。"
|
||||
return "Ease into the moment with SKG. Gentle warmth and rhythmic massage help everyday tension feel lighter, cleaner, and easier to leave behind."
|
||||
return (
|
||||
"把日常紧绷交给 SKG。贴合身体需要放松的位置,热敷与按摩节奏自然陪伴,"
|
||||
"让每一次短暂休息都更轻松、更有质感。"
|
||||
"Let SKG turn a short break into real relief. With soothing warmth and steady massage rhythm, "
|
||||
"everyday tension feels lighter, calmer, and easier to leave behind."
|
||||
)
|
||||
|
||||
|
||||
@@ -1513,24 +1523,24 @@ def _rewrite_audio_script_sync(segments: list[TranscriptSegment]) -> tuple[str,
|
||||
source_text = _transcript_join(segments, "en")
|
||||
source_zh = _transcript_join(segments, "zh")
|
||||
prompt = (
|
||||
"你是 SKG 短视频口播编导。根据参考视频音频转写,抽取它的表达结构、情绪节奏和可复用卖点,"
|
||||
"改写成适合 SKG 按摩/放松产品二创视频的中文口播文案。\n"
|
||||
"要求:\n"
|
||||
"1. 输出 35-90 个中文字,适合 8-18 秒短视频配音。\n"
|
||||
"2. 口语化、干净、高级,能直接给 TTS 朗读。\n"
|
||||
"3. 不承诺治疗、治愈、医学疗效,不夸大。\n"
|
||||
"4. 不复刻原视频品牌/人物/价格/平台话术,只保留表达结构。\n"
|
||||
"5. 如果参考转写信息不足,按产品信息生成通用 SKG 放松口播。\n"
|
||||
'严格返回 JSON:{"rewritten_text":"..."}。\n\n'
|
||||
f"SKG 产品信息:{AUDIO_PRODUCT_BRIEF}\n\n"
|
||||
f"英文转写:\n{source_text or '无'}\n\n"
|
||||
f"中文翻译:\n{source_zh or '无'}"
|
||||
"You are an English short-video voice-over writer for SKG wellness massagers. "
|
||||
"Use the source transcript only for structure, pacing, and emotional hook, then rewrite it into a clean English VO for SKG.\n"
|
||||
"Rules:\n"
|
||||
"1. Output 28-55 English words, suitable for an 8-18 second TTS voice-over.\n"
|
||||
"2. Make it natural, premium, concise, and ready to read aloud.\n"
|
||||
"3. Do not claim medical treatment, cure, pain elimination, or clinical effects.\n"
|
||||
"4. Do not copy the original brand, creator, price, platform language, or exact claims.\n"
|
||||
"5. If the source transcript is too thin, write a general SKG relaxation VO.\n"
|
||||
'Return strict JSON only: {"rewritten_text":"..."}.\n\n'
|
||||
f"SKG product context: {AUDIO_PRODUCT_BRIEF}\n\n"
|
||||
f"English transcript:\n{source_text or 'None'}\n\n"
|
||||
f"Chinese translation for reference:\n{source_zh or 'None'}"
|
||||
)
|
||||
try:
|
||||
resp = llm().chat.completions.create(
|
||||
model=AUDIO_REWRITE_MODEL,
|
||||
messages=[
|
||||
{"role": "system", "content": "只输出合法 JSON,不要解释,不要 markdown。"},
|
||||
{"role": "system", "content": "Return valid JSON only. No explanation. No markdown."},
|
||||
{"role": "user", "content": prompt},
|
||||
],
|
||||
response_format={"type": "json_object"},
|
||||
@@ -1564,7 +1574,7 @@ def _minimax_tts_sync(job_id: str, text: str) -> str:
|
||||
"model": MINIMAX_TTS_MODEL,
|
||||
"text": text.strip()[:9500],
|
||||
"stream": False,
|
||||
"language_boost": "Chinese",
|
||||
"language_boost": "English",
|
||||
"output_format": "hex",
|
||||
"voice_setting": {
|
||||
"voice_id": MINIMAX_TTS_VOICE_ID,
|
||||
@@ -1651,6 +1661,7 @@ def pipeline_transcribe(job_id: str, manage_job_status: bool = True) -> None:
|
||||
])
|
||||
if not wav.exists():
|
||||
raise RuntimeError("音频提取完成但找不到 audio.wav")
|
||||
update(job, source_audio_url=f"/jobs/{job_id}/audio.wav")
|
||||
|
||||
if not LLM_API_KEY:
|
||||
# 无 key 模式:mock 数据
|
||||
@@ -2112,7 +2123,7 @@ def get_job(job_id: str) -> Job:
|
||||
job = JOBS.get(job_id)
|
||||
if not job:
|
||||
raise HTTPException(404, "job not found")
|
||||
return job
|
||||
return job_with_artifacts(job)
|
||||
|
||||
|
||||
@app.delete("/jobs/{job_id}")
|
||||
@@ -2153,7 +2164,7 @@ async def trigger_transcribe(job_id: str, bg: BackgroundTasks) -> Job:
|
||||
update(job, error="", audio_script=audio_payload)
|
||||
if not start_audio_processing(job_id, manage_job_status=manage_job_status):
|
||||
update(job, message="音频已在处理中")
|
||||
return job
|
||||
return job_with_artifacts(job)
|
||||
|
||||
|
||||
@app.get("/jobs/{job_id}/video.mp4")
|
||||
|
||||
Reference in New Issue
Block a user