auto-save 2026-05-18 14:46 (~7)
This commit is contained in:
88
api/main.py
88
api/main.py
@@ -105,7 +105,9 @@ MINIMAX_TTS_VOICE_POOL = [
|
||||
for v in os.getenv("MINIMAX_TTS_VOICE_POOL", ",".join(DEFAULT_MINIMAX_TTS_VOICE_POOL)).split(",")
|
||||
if v.strip()
|
||||
]
|
||||
VOICE_PROVIDER = os.getenv("VOICE_PROVIDER", "azure_openai").strip().lower() or "azure_openai"
|
||||
# Voice is intentionally fixed to Azure OpenAI. Older envs may still contain
|
||||
# VOICE_PROVIDER=minimax, but the runtime must not fall back to MiniMax.
|
||||
VOICE_PROVIDER = "azure_openai"
|
||||
AZURE_OPENAI_BASE_URL = os.getenv("AZURE_OPENAI_BASE_URL", "https://ai.skg.com/azure").strip().rstrip("/")
|
||||
AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY", LLM_API_KEY).strip()
|
||||
AZURE_TTS_MODEL = os.getenv("AZURE_TTS_MODEL", "gpt-4o-mini-tts").strip() or "gpt-4o-mini-tts"
|
||||
@@ -117,6 +119,11 @@ AZURE_TTS_VOICE_POOL = [
|
||||
if v.strip()
|
||||
]
|
||||
AZURE_TTS_PATH = os.getenv("AZURE_TTS_PATH", "/audio/speech").strip() or "/audio/speech"
|
||||
AZURE_TTS_PATHS = [
|
||||
p.strip()
|
||||
for p in os.getenv("AZURE_TTS_PATHS", f"{AZURE_TTS_PATH},/audio/speech,/v1/audio/speech").split(",")
|
||||
if p.strip()
|
||||
]
|
||||
|
||||
POE_API_BASE_URL = os.getenv("POE_API_BASE_URL", "https://api.poe.com/v1").strip() or "https://api.poe.com/v1"
|
||||
POE_API_KEY = os.getenv("POE_API_KEY", "").strip()
|
||||
@@ -2334,9 +2341,7 @@ def _choose_azure_voice_id() -> str:
|
||||
|
||||
|
||||
def _choose_tts_voice_id() -> str:
|
||||
if VOICE_PROVIDER == "azure_openai":
|
||||
return _choose_azure_voice_id()
|
||||
return _choose_minimax_voice_id()
|
||||
return _choose_azure_voice_id()
|
||||
|
||||
|
||||
def _voice_speed_for(voice_id: str, target_seconds: float, text: str) -> float:
|
||||
@@ -2400,13 +2405,22 @@ def _minimax_tts_sync(job_id: str, text: str, voice_id: str, target_seconds: flo
|
||||
return f"/jobs/{job_id}/audio-script.mp3"
|
||||
|
||||
|
||||
def _azure_tts_url() -> str:
|
||||
path = AZURE_TTS_PATH if AZURE_TTS_PATH.startswith("/") else f"/{AZURE_TTS_PATH}"
|
||||
def _azure_tts_url_for(path_value: str) -> str:
|
||||
path = path_value if path_value.startswith("/") else f"/{path_value}"
|
||||
if AZURE_OPENAI_BASE_URL.endswith(path):
|
||||
return AZURE_OPENAI_BASE_URL
|
||||
return f"{AZURE_OPENAI_BASE_URL}{path}"
|
||||
|
||||
|
||||
def _azure_tts_urls() -> list[str]:
|
||||
urls: list[str] = []
|
||||
for path in AZURE_TTS_PATHS or [AZURE_TTS_PATH]:
|
||||
url = _azure_tts_url_for(path)
|
||||
if url not in urls:
|
||||
urls.append(url)
|
||||
return urls
|
||||
|
||||
|
||||
def _azure_openai_tts_sync(job_id: str, text: str, voice_id: str, target_seconds: float = 12.0) -> str:
|
||||
if not AZURE_OPENAI_API_KEY:
|
||||
raise RuntimeError("AZURE_OPENAI_API_KEY 或 LLM_API_KEY 未配置,未生成配音")
|
||||
@@ -2419,18 +2433,32 @@ def _azure_openai_tts_sync(job_id: str, text: str, voice_id: str, target_seconds
|
||||
"response_format": "mp3",
|
||||
"speed": _voice_speed_for(voice_id, target_seconds, text),
|
||||
}
|
||||
resp = httpx.post(
|
||||
_azure_tts_url(),
|
||||
headers={
|
||||
"Authorization": f"Bearer {AZURE_OPENAI_API_KEY}",
|
||||
"api-key": AZURE_OPENAI_API_KEY,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json=payload,
|
||||
timeout=120,
|
||||
)
|
||||
headers = {
|
||||
"Authorization": f"Bearer {AZURE_OPENAI_API_KEY}",
|
||||
"api-key": AZURE_OPENAI_API_KEY,
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
resp: httpx.Response | None = None
|
||||
errors: list[str] = []
|
||||
with ai_http_client(timeout=120) as client:
|
||||
for url in _azure_tts_urls():
|
||||
try:
|
||||
current = client.post(url, headers=headers, json=payload)
|
||||
except Exception as e:
|
||||
errors.append(f"{url}: {type(e).__name__}: {e}")
|
||||
continue
|
||||
if current.status_code < 400:
|
||||
resp = current
|
||||
break
|
||||
errors.append(f"{url}: HTTP {current.status_code}: {current.text[:180]}")
|
||||
if current.status_code not in {404, 405}:
|
||||
resp = current
|
||||
break
|
||||
if resp is None:
|
||||
raise RuntimeError("Azure OpenAI TTS 不可用;已尝试 " + " | ".join(errors))
|
||||
if resp.status_code >= 400:
|
||||
raise RuntimeError(f"Azure OpenAI TTS HTTP {resp.status_code}: {resp.text[:300]}")
|
||||
detail = " | ".join(errors) or resp.text[:300]
|
||||
raise RuntimeError(f"Azure OpenAI TTS HTTP {resp.status_code}: {detail[:600]}")
|
||||
audio_bytes = resp.content
|
||||
if not audio_bytes:
|
||||
raise RuntimeError("Azure OpenAI TTS 未返回音频内容")
|
||||
@@ -2447,9 +2475,7 @@ def _azure_openai_tts_sync(job_id: str, text: str, voice_id: str, target_seconds
|
||||
|
||||
|
||||
def _tts_sync(job_id: str, text: str, voice_id: str, target_seconds: float = 12.0) -> tuple[str, str, str]:
|
||||
if VOICE_PROVIDER == "azure_openai":
|
||||
return _azure_openai_tts_sync(job_id, text, voice_id, target_seconds), "azure_openai", AZURE_TTS_MODEL
|
||||
return _minimax_tts_sync(job_id, text, voice_id, target_seconds), "minimax", MINIMAX_TTS_MODEL
|
||||
return _azure_openai_tts_sync(job_id, text, voice_id, target_seconds), "azure_openai", AZURE_TTS_MODEL
|
||||
|
||||
|
||||
def _build_audio_script_sync(job_id: str, segments: list[TranscriptSegment], target_seconds: float = 12.0) -> AudioScript:
|
||||
@@ -2461,8 +2487,8 @@ def _build_audio_script_sync(job_id: str, segments: list[TranscriptSegment], tar
|
||||
speaker_profile, rhythm_profile = _audio_delivery_profile(segments, duration, selected_voice_id)
|
||||
voice_url = ""
|
||||
voice_error = ""
|
||||
voice_provider = "azure_openai" if VOICE_PROVIDER == "azure_openai" else "minimax"
|
||||
voice_model = AZURE_TTS_MODEL if voice_provider == "azure_openai" else MINIMAX_TTS_MODEL
|
||||
voice_provider = "azure_openai"
|
||||
voice_model = AZURE_TTS_MODEL
|
||||
try:
|
||||
voice_url, voice_provider, voice_model = _tts_sync(job_id, rewritten, selected_voice_id, duration)
|
||||
except Exception as e:
|
||||
@@ -3060,7 +3086,7 @@ def health() -> dict:
|
||||
"auth_configured": WEB_AUTH_CONFIGURED,
|
||||
"base_url": LLM_BASE_URL or "openai-default",
|
||||
"image_base_url": IMAGE_BASE_URL or LLM_BASE_URL or "openai-default",
|
||||
"voice_base_url": AZURE_OPENAI_BASE_URL if VOICE_PROVIDER == "azure_openai" else MINIMAX_TTS_BASE_URL,
|
||||
"voice_base_url": AZURE_OPENAI_BASE_URL,
|
||||
"models": {
|
||||
"asr": ASR_MODEL,
|
||||
"local_asr": LOCAL_ASR_MODEL,
|
||||
@@ -3077,15 +3103,13 @@ def health() -> dict:
|
||||
"subject_image": SUBJECT_ASSET_IMAGE_MODEL,
|
||||
"subject_image_fallbacks": SUBJECT_ASSET_IMAGE_MODELS,
|
||||
"voice_provider": VOICE_PROVIDER,
|
||||
"voice_base_url": AZURE_OPENAI_BASE_URL if VOICE_PROVIDER == "azure_openai" else MINIMAX_TTS_BASE_URL,
|
||||
"voice_tts": AZURE_TTS_MODEL if VOICE_PROVIDER == "azure_openai" else MINIMAX_TTS_MODEL,
|
||||
"voice_id": AZURE_TTS_VOICE_ID if VOICE_PROVIDER == "azure_openai" else MINIMAX_TTS_VOICE_ID,
|
||||
"voice_pool": AZURE_TTS_VOICE_POOL if VOICE_PROVIDER == "azure_openai" else (MINIMAX_TTS_VOICE_POOL or [MINIMAX_TTS_VOICE_ID]),
|
||||
"voice_configured": bool(AZURE_OPENAI_API_KEY) if VOICE_PROVIDER == "azure_openai" else bool(MINIMAX_API_KEY),
|
||||
"minimax_tts": MINIMAX_TTS_MODEL,
|
||||
"minimax_voice": MINIMAX_TTS_VOICE_ID,
|
||||
"minimax_voice_pool": MINIMAX_TTS_VOICE_POOL or [MINIMAX_TTS_VOICE_ID],
|
||||
"minimax_configured": bool(MINIMAX_API_KEY),
|
||||
"voice_base_url": AZURE_OPENAI_BASE_URL,
|
||||
"voice_tts": AZURE_TTS_MODEL,
|
||||
"voice_tts_paths": AZURE_TTS_PATHS,
|
||||
"voice_id": AZURE_TTS_VOICE_ID,
|
||||
"voice_pool": AZURE_TTS_VOICE_POOL,
|
||||
"voice_configured": bool(AZURE_OPENAI_API_KEY),
|
||||
"minimax_disabled": True,
|
||||
"video": VIDEO_MODEL,
|
||||
"video_aliases": VIDEO_MODEL_ALIASES,
|
||||
"video_provider": video_provider_name(),
|
||||
|
||||
Reference in New Issue
Block a user