feat: add automatic production start workflow

This commit is contained in:
2026-05-17 12:33:13 +08:00
parent 08f18373b9
commit b02bc3f583
7 changed files with 76 additions and 25 deletions

View File

@@ -435,6 +435,8 @@ class AudioScript(BaseModel):
source_text: str = ""
source_zh: str = ""
rewritten_text: str = ""
speaker_profile: str = ""
rhythm_profile: str = ""
product_brief: str = ""
rewrite_model: str = ""
voice_provider: str = ""
@@ -1763,6 +1765,26 @@ def _fallback_audio_script(segments: list[TranscriptSegment], target_seconds: fl
)
def _audio_delivery_profile(segments: list[TranscriptSegment], target_seconds: float, voice_id: str) -> tuple[str, str]:
duration = max(float(target_seconds or 0), _segment_duration(segments), 0.0)
words = sum(len([w for w in s.en.replace("\n", " ").split(" ") if w.strip()]) for s in segments)
sentence_count = len([s for s in segments if (s.en or s.zh).strip()])
wpm = int(round(words / max(duration, 1.0) * 60)) if words else 0
avg_sentence = duration / sentence_count if sentence_count else 0.0
speaker = (
f"按原素材的短视频单人旁白处理;当前近似音色为 {voice_id},用于保持商业口播的亲近感和节奏。"
if voice_id
else "按原素材的短视频单人旁白处理;等待选择 TTS 音色。"
)
rhythm = (
f"源音频约 {duration:.1f}s{sentence_count} 个语义段,语速约 {wpm} wpm平均每段 {avg_sentence:.1f}s"
"新配音按相同时长、短句停顿和信息密度改写。"
if duration > 0 and sentence_count
else "源音频节奏信息不足;新配音按 8-12 秒信息流广告口播节奏生成。"
)
return speaker, rhythm
def _rewrite_audio_script_sync(segments: list[TranscriptSegment], target_seconds: float = 12.0) -> tuple[str, str]:
fallback = _fallback_audio_script(segments, target_seconds)
if not LLM_API_KEY:
@@ -1889,6 +1911,7 @@ def _build_audio_script_sync(job_id: str, segments: list[TranscriptSegment], tar
duration = max(float(target_seconds or 0), _segment_duration(segments), 4.0)
rewritten, rewrite_error = _rewrite_audio_script_sync(segments, duration)
selected_voice_id = _choose_minimax_voice_id()
speaker_profile, rhythm_profile = _audio_delivery_profile(segments, duration, selected_voice_id)
voice_url = ""
voice_error = ""
try:
@@ -1902,6 +1925,8 @@ def _build_audio_script_sync(job_id: str, segments: list[TranscriptSegment], tar
source_text=source_text,
source_zh=source_zh,
rewritten_text=rewritten,
speaker_profile=speaker_profile,
rhythm_profile=rhythm_profile,
product_brief=AUDIO_PRODUCT_BRIEF,
rewrite_model=AUDIO_REWRITE_MODEL,
voice_provider="minimax",
@@ -2472,6 +2497,8 @@ async def trigger_transcribe(job_id: str, bg: BackgroundTasks) -> Job:
manage_job_status = job.status != "splitting"
audio_payload = AudioScript(
status="rewriting",
speaker_profile="正在分析原音频讲话人和口播节奏…",
rhythm_profile="正在按原音频时长、语速和停顿生成 SKG 产品配音脚本…",
product_brief=AUDIO_PRODUCT_BRIEF,
rewrite_model=AUDIO_REWRITE_MODEL,
voice_provider="minimax",