feat: add automatic production start workflow
This commit is contained in:
27
api/main.py
27
api/main.py
@@ -435,6 +435,8 @@ class AudioScript(BaseModel):
|
||||
source_text: str = ""
|
||||
source_zh: str = ""
|
||||
rewritten_text: str = ""
|
||||
speaker_profile: str = ""
|
||||
rhythm_profile: str = ""
|
||||
product_brief: str = ""
|
||||
rewrite_model: str = ""
|
||||
voice_provider: str = ""
|
||||
@@ -1763,6 +1765,26 @@ def _fallback_audio_script(segments: list[TranscriptSegment], target_seconds: fl
|
||||
)
|
||||
|
||||
|
||||
def _audio_delivery_profile(segments: list[TranscriptSegment], target_seconds: float, voice_id: str) -> tuple[str, str]:
|
||||
duration = max(float(target_seconds or 0), _segment_duration(segments), 0.0)
|
||||
words = sum(len([w for w in s.en.replace("\n", " ").split(" ") if w.strip()]) for s in segments)
|
||||
sentence_count = len([s for s in segments if (s.en or s.zh).strip()])
|
||||
wpm = int(round(words / max(duration, 1.0) * 60)) if words else 0
|
||||
avg_sentence = duration / sentence_count if sentence_count else 0.0
|
||||
speaker = (
|
||||
f"按原素材的短视频单人旁白处理;当前近似音色为 {voice_id},用于保持商业口播的亲近感和节奏。"
|
||||
if voice_id
|
||||
else "按原素材的短视频单人旁白处理;等待选择 TTS 音色。"
|
||||
)
|
||||
rhythm = (
|
||||
f"源音频约 {duration:.1f}s,{sentence_count} 个语义段,语速约 {wpm} wpm,平均每段 {avg_sentence:.1f}s;"
|
||||
"新配音按相同时长、短句停顿和信息密度改写。"
|
||||
if duration > 0 and sentence_count
|
||||
else "源音频节奏信息不足;新配音按 8-12 秒信息流广告口播节奏生成。"
|
||||
)
|
||||
return speaker, rhythm
|
||||
|
||||
|
||||
def _rewrite_audio_script_sync(segments: list[TranscriptSegment], target_seconds: float = 12.0) -> tuple[str, str]:
|
||||
fallback = _fallback_audio_script(segments, target_seconds)
|
||||
if not LLM_API_KEY:
|
||||
@@ -1889,6 +1911,7 @@ def _build_audio_script_sync(job_id: str, segments: list[TranscriptSegment], tar
|
||||
duration = max(float(target_seconds or 0), _segment_duration(segments), 4.0)
|
||||
rewritten, rewrite_error = _rewrite_audio_script_sync(segments, duration)
|
||||
selected_voice_id = _choose_minimax_voice_id()
|
||||
speaker_profile, rhythm_profile = _audio_delivery_profile(segments, duration, selected_voice_id)
|
||||
voice_url = ""
|
||||
voice_error = ""
|
||||
try:
|
||||
@@ -1902,6 +1925,8 @@ def _build_audio_script_sync(job_id: str, segments: list[TranscriptSegment], tar
|
||||
source_text=source_text,
|
||||
source_zh=source_zh,
|
||||
rewritten_text=rewritten,
|
||||
speaker_profile=speaker_profile,
|
||||
rhythm_profile=rhythm_profile,
|
||||
product_brief=AUDIO_PRODUCT_BRIEF,
|
||||
rewrite_model=AUDIO_REWRITE_MODEL,
|
||||
voice_provider="minimax",
|
||||
@@ -2472,6 +2497,8 @@ async def trigger_transcribe(job_id: str, bg: BackgroundTasks) -> Job:
|
||||
manage_job_status = job.status != "splitting"
|
||||
audio_payload = AudioScript(
|
||||
status="rewriting",
|
||||
speaker_profile="正在分析原音频讲话人和口播节奏…",
|
||||
rhythm_profile="正在按原音频时长、语速和停顿生成 SKG 产品配音脚本…",
|
||||
product_brief=AUDIO_PRODUCT_BRIEF,
|
||||
rewrite_model=AUDIO_REWRITE_MODEL,
|
||||
voice_provider="minimax",
|
||||
|
||||
Reference in New Issue
Block a user