auto-save 2026-05-18 14:46 (~7)

This commit is contained in:
2026-05-18 14:46:24 +08:00
parent fde94f4698
commit e6387cf7af
7 changed files with 74 additions and 65 deletions

View File

@@ -105,7 +105,9 @@ MINIMAX_TTS_VOICE_POOL = [
for v in os.getenv("MINIMAX_TTS_VOICE_POOL", ",".join(DEFAULT_MINIMAX_TTS_VOICE_POOL)).split(",")
if v.strip()
]
VOICE_PROVIDER = os.getenv("VOICE_PROVIDER", "azure_openai").strip().lower() or "azure_openai"
# Voice is intentionally fixed to Azure OpenAI. Older envs may still contain
# VOICE_PROVIDER=minimax, but the runtime must not fall back to MiniMax.
VOICE_PROVIDER = "azure_openai"
AZURE_OPENAI_BASE_URL = os.getenv("AZURE_OPENAI_BASE_URL", "https://ai.skg.com/azure").strip().rstrip("/")
AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY", LLM_API_KEY).strip()
AZURE_TTS_MODEL = os.getenv("AZURE_TTS_MODEL", "gpt-4o-mini-tts").strip() or "gpt-4o-mini-tts"
@@ -117,6 +119,11 @@ AZURE_TTS_VOICE_POOL = [
if v.strip()
]
AZURE_TTS_PATH = os.getenv("AZURE_TTS_PATH", "/audio/speech").strip() or "/audio/speech"
AZURE_TTS_PATHS = [
p.strip()
for p in os.getenv("AZURE_TTS_PATHS", f"{AZURE_TTS_PATH},/audio/speech,/v1/audio/speech").split(",")
if p.strip()
]
POE_API_BASE_URL = os.getenv("POE_API_BASE_URL", "https://api.poe.com/v1").strip() or "https://api.poe.com/v1"
POE_API_KEY = os.getenv("POE_API_KEY", "").strip()
@@ -2334,9 +2341,7 @@ def _choose_azure_voice_id() -> str:
def _choose_tts_voice_id() -> str:
if VOICE_PROVIDER == "azure_openai":
return _choose_azure_voice_id()
return _choose_minimax_voice_id()
return _choose_azure_voice_id()
def _voice_speed_for(voice_id: str, target_seconds: float, text: str) -> float:
@@ -2400,13 +2405,22 @@ def _minimax_tts_sync(job_id: str, text: str, voice_id: str, target_seconds: flo
return f"/jobs/{job_id}/audio-script.mp3"
def _azure_tts_url() -> str:
path = AZURE_TTS_PATH if AZURE_TTS_PATH.startswith("/") else f"/{AZURE_TTS_PATH}"
def _azure_tts_url_for(path_value: str) -> str:
path = path_value if path_value.startswith("/") else f"/{path_value}"
if AZURE_OPENAI_BASE_URL.endswith(path):
return AZURE_OPENAI_BASE_URL
return f"{AZURE_OPENAI_BASE_URL}{path}"
def _azure_tts_urls() -> list[str]:
urls: list[str] = []
for path in AZURE_TTS_PATHS or [AZURE_TTS_PATH]:
url = _azure_tts_url_for(path)
if url not in urls:
urls.append(url)
return urls
def _azure_openai_tts_sync(job_id: str, text: str, voice_id: str, target_seconds: float = 12.0) -> str:
if not AZURE_OPENAI_API_KEY:
raise RuntimeError("AZURE_OPENAI_API_KEY 或 LLM_API_KEY 未配置,未生成配音")
@@ -2419,18 +2433,32 @@ def _azure_openai_tts_sync(job_id: str, text: str, voice_id: str, target_seconds
"response_format": "mp3",
"speed": _voice_speed_for(voice_id, target_seconds, text),
}
resp = httpx.post(
_azure_tts_url(),
headers={
"Authorization": f"Bearer {AZURE_OPENAI_API_KEY}",
"api-key": AZURE_OPENAI_API_KEY,
"Content-Type": "application/json",
},
json=payload,
timeout=120,
)
headers = {
"Authorization": f"Bearer {AZURE_OPENAI_API_KEY}",
"api-key": AZURE_OPENAI_API_KEY,
"Content-Type": "application/json",
}
resp: httpx.Response | None = None
errors: list[str] = []
with ai_http_client(timeout=120) as client:
for url in _azure_tts_urls():
try:
current = client.post(url, headers=headers, json=payload)
except Exception as e:
errors.append(f"{url}: {type(e).__name__}: {e}")
continue
if current.status_code < 400:
resp = current
break
errors.append(f"{url}: HTTP {current.status_code}: {current.text[:180]}")
if current.status_code not in {404, 405}:
resp = current
break
if resp is None:
raise RuntimeError("Azure OpenAI TTS 不可用;已尝试 " + " | ".join(errors))
if resp.status_code >= 400:
raise RuntimeError(f"Azure OpenAI TTS HTTP {resp.status_code}: {resp.text[:300]}")
detail = " | ".join(errors) or resp.text[:300]
raise RuntimeError(f"Azure OpenAI TTS HTTP {resp.status_code}: {detail[:600]}")
audio_bytes = resp.content
if not audio_bytes:
raise RuntimeError("Azure OpenAI TTS 未返回音频内容")
@@ -2447,9 +2475,7 @@ def _azure_openai_tts_sync(job_id: str, text: str, voice_id: str, target_seconds
def _tts_sync(job_id: str, text: str, voice_id: str, target_seconds: float = 12.0) -> tuple[str, str, str]:
if VOICE_PROVIDER == "azure_openai":
return _azure_openai_tts_sync(job_id, text, voice_id, target_seconds), "azure_openai", AZURE_TTS_MODEL
return _minimax_tts_sync(job_id, text, voice_id, target_seconds), "minimax", MINIMAX_TTS_MODEL
return _azure_openai_tts_sync(job_id, text, voice_id, target_seconds), "azure_openai", AZURE_TTS_MODEL
def _build_audio_script_sync(job_id: str, segments: list[TranscriptSegment], target_seconds: float = 12.0) -> AudioScript:
@@ -2461,8 +2487,8 @@ def _build_audio_script_sync(job_id: str, segments: list[TranscriptSegment], tar
speaker_profile, rhythm_profile = _audio_delivery_profile(segments, duration, selected_voice_id)
voice_url = ""
voice_error = ""
voice_provider = "azure_openai" if VOICE_PROVIDER == "azure_openai" else "minimax"
voice_model = AZURE_TTS_MODEL if voice_provider == "azure_openai" else MINIMAX_TTS_MODEL
voice_provider = "azure_openai"
voice_model = AZURE_TTS_MODEL
try:
voice_url, voice_provider, voice_model = _tts_sync(job_id, rewritten, selected_voice_id, duration)
except Exception as e:
@@ -3060,7 +3086,7 @@ def health() -> dict:
"auth_configured": WEB_AUTH_CONFIGURED,
"base_url": LLM_BASE_URL or "openai-default",
"image_base_url": IMAGE_BASE_URL or LLM_BASE_URL or "openai-default",
"voice_base_url": AZURE_OPENAI_BASE_URL if VOICE_PROVIDER == "azure_openai" else MINIMAX_TTS_BASE_URL,
"voice_base_url": AZURE_OPENAI_BASE_URL,
"models": {
"asr": ASR_MODEL,
"local_asr": LOCAL_ASR_MODEL,
@@ -3077,15 +3103,13 @@ def health() -> dict:
"subject_image": SUBJECT_ASSET_IMAGE_MODEL,
"subject_image_fallbacks": SUBJECT_ASSET_IMAGE_MODELS,
"voice_provider": VOICE_PROVIDER,
"voice_base_url": AZURE_OPENAI_BASE_URL if VOICE_PROVIDER == "azure_openai" else MINIMAX_TTS_BASE_URL,
"voice_tts": AZURE_TTS_MODEL if VOICE_PROVIDER == "azure_openai" else MINIMAX_TTS_MODEL,
"voice_id": AZURE_TTS_VOICE_ID if VOICE_PROVIDER == "azure_openai" else MINIMAX_TTS_VOICE_ID,
"voice_pool": AZURE_TTS_VOICE_POOL if VOICE_PROVIDER == "azure_openai" else (MINIMAX_TTS_VOICE_POOL or [MINIMAX_TTS_VOICE_ID]),
"voice_configured": bool(AZURE_OPENAI_API_KEY) if VOICE_PROVIDER == "azure_openai" else bool(MINIMAX_API_KEY),
"minimax_tts": MINIMAX_TTS_MODEL,
"minimax_voice": MINIMAX_TTS_VOICE_ID,
"minimax_voice_pool": MINIMAX_TTS_VOICE_POOL or [MINIMAX_TTS_VOICE_ID],
"minimax_configured": bool(MINIMAX_API_KEY),
"voice_base_url": AZURE_OPENAI_BASE_URL,
"voice_tts": AZURE_TTS_MODEL,
"voice_tts_paths": AZURE_TTS_PATHS,
"voice_id": AZURE_TTS_VOICE_ID,
"voice_pool": AZURE_TTS_VOICE_POOL,
"voice_configured": bool(AZURE_OPENAI_API_KEY),
"minimax_disabled": True,
"video": VIDEO_MODEL,
"video_aliases": VIDEO_MODEL_ALIASES,
"video_provider": video_provider_name(),