auto-save 2026-05-18 00:23 (~2)
This commit is contained in:
@@ -1,25 +1,5 @@
|
|||||||
{
|
{
|
||||||
"entries": [
|
"entries": [
|
||||||
{
|
|
||||||
"files_changed": 10,
|
|
||||||
"hash": "f7cc49a",
|
|
||||||
"message": "auto-save 2026-05-15 15:21 (+1, ~9)",
|
|
||||||
"ts": "2026-05-15T15:21:20+08:00",
|
|
||||||
"type": "commit"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"files_changed": 2,
|
|
||||||
"message": "Codex 会话活跃 · 最近命令:codex · 2 项未提交变更 · 最近提交:auto-save 2026-05-15 15:21 (+1, ~9)",
|
|
||||||
"ts": "2026-05-15T07:24:47Z",
|
|
||||||
"type": "session-heartbeat"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"files_changed": 3,
|
|
||||||
"hash": "caa28e2",
|
|
||||||
"message": "auto-save 2026-05-15 15:26 (~3)",
|
|
||||||
"ts": "2026-05-15T15:26:51+08:00",
|
|
||||||
"type": "commit"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"files_changed": 1,
|
"files_changed": 1,
|
||||||
"hash": "45e7401",
|
"hash": "45e7401",
|
||||||
@@ -3258,6 +3238,26 @@
|
|||||||
"type": "session-heartbeat",
|
"type": "session-heartbeat",
|
||||||
"message": "Codex 会话活跃 · 最近命令:codex · 分支 main · 2 项未提交变更 · 最近提交:auto-save 2026-05-18 00:07 (~3)",
|
"message": "Codex 会话活跃 · 最近命令:codex · 分支 main · 2 项未提交变更 · 最近提交:auto-save 2026-05-18 00:07 (~3)",
|
||||||
"files_changed": 2
|
"files_changed": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"ts": "2026-05-18T00:12:58+08:00",
|
||||||
|
"type": "commit",
|
||||||
|
"message": "auto-save 2026-05-18 00:12 (~3)",
|
||||||
|
"hash": "ba202e4",
|
||||||
|
"files_changed": 3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"ts": "2026-05-18T00:16:10+08:00",
|
||||||
|
"type": "commit",
|
||||||
|
"message": "fix: show generated subject views",
|
||||||
|
"hash": "eeff64c",
|
||||||
|
"files_changed": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"ts": "2026-05-17T16:18:31Z",
|
||||||
|
"type": "session-heartbeat",
|
||||||
|
"message": "Codex 会话活跃 · 最近命令:codex · 分支 main · 1 项未提交变更 · 最近提交:fix: show generated subject views",
|
||||||
|
"files_changed": 1
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|||||||
134
api/main.py
134
api/main.py
@@ -50,8 +50,10 @@ LOCAL_ASR_TIMEOUT_SECONDS = max(30, int(os.getenv("LOCAL_ASR_TIMEOUT_SECONDS", "
|
|||||||
TRANSLATE_MODEL = os.getenv("TRANSLATE_MODEL", "gemini-2.5-flash")
|
TRANSLATE_MODEL = os.getenv("TRANSLATE_MODEL", "gemini-2.5-flash")
|
||||||
REWRITE_MODEL = os.getenv("REWRITE_MODEL", "gemini-2.5-pro")
|
REWRITE_MODEL = os.getenv("REWRITE_MODEL", "gemini-2.5-pro")
|
||||||
VISION_MODEL = os.getenv("VISION_MODEL", "gemini-2.5-flash")
|
VISION_MODEL = os.getenv("VISION_MODEL", "gemini-2.5-flash")
|
||||||
IMAGE_MODEL = os.getenv("IMAGE_MODEL", "gemini-3-pro-image-preview")
|
|
||||||
GPT_IMAGE_MODEL = os.getenv("GPT_IMAGE_MODEL", "gpt-image-2").strip() or "gpt-image-2"
|
GPT_IMAGE_MODEL = os.getenv("GPT_IMAGE_MODEL", "gpt-image-2").strip() or "gpt-image-2"
|
||||||
|
IMAGE_BASE_URL = os.getenv("IMAGE_BASE_URL", LLM_BASE_URL).strip()
|
||||||
|
IMAGE_API_KEY = os.getenv("IMAGE_API_KEY", LLM_API_KEY).strip()
|
||||||
|
IMAGE_MODEL = os.getenv("IMAGE_MODEL", GPT_IMAGE_MODEL).strip() or GPT_IMAGE_MODEL
|
||||||
SUBJECT_ASSET_IMAGE_MODEL = os.getenv("SUBJECT_ASSET_IMAGE_MODEL", GPT_IMAGE_MODEL).strip() or GPT_IMAGE_MODEL
|
SUBJECT_ASSET_IMAGE_MODEL = os.getenv("SUBJECT_ASSET_IMAGE_MODEL", GPT_IMAGE_MODEL).strip() or GPT_IMAGE_MODEL
|
||||||
SUBJECT_ASSET_IMAGE_MODELS = [
|
SUBJECT_ASSET_IMAGE_MODELS = [
|
||||||
m.strip()
|
m.strip()
|
||||||
@@ -87,6 +89,18 @@ MINIMAX_TTS_VOICE_POOL = [
|
|||||||
for v in os.getenv("MINIMAX_TTS_VOICE_POOL", ",".join(DEFAULT_MINIMAX_TTS_VOICE_POOL)).split(",")
|
for v in os.getenv("MINIMAX_TTS_VOICE_POOL", ",".join(DEFAULT_MINIMAX_TTS_VOICE_POOL)).split(",")
|
||||||
if v.strip()
|
if v.strip()
|
||||||
]
|
]
|
||||||
|
VOICE_PROVIDER = os.getenv("VOICE_PROVIDER", "azure_openai").strip().lower() or "azure_openai"
|
||||||
|
AZURE_OPENAI_BASE_URL = os.getenv("AZURE_OPENAI_BASE_URL", "https://ai.skg.com/azure").strip().rstrip("/")
|
||||||
|
AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY", LLM_API_KEY).strip()
|
||||||
|
AZURE_TTS_MODEL = os.getenv("AZURE_TTS_MODEL", "gpt-4o-mini-tts").strip() or "gpt-4o-mini-tts"
|
||||||
|
AZURE_TTS_VOICE_ID = os.getenv("AZURE_TTS_VOICE_ID", "alloy").strip() or "alloy"
|
||||||
|
DEFAULT_AZURE_TTS_VOICE_POOL = ["alloy", "verse", "shimmer"]
|
||||||
|
AZURE_TTS_VOICE_POOL = [
|
||||||
|
v.strip()
|
||||||
|
for v in os.getenv("AZURE_TTS_VOICE_POOL", ",".join(DEFAULT_AZURE_TTS_VOICE_POOL)).split(",")
|
||||||
|
if v.strip()
|
||||||
|
]
|
||||||
|
AZURE_TTS_PATH = os.getenv("AZURE_TTS_PATH", "/audio/speech").strip() or "/audio/speech"
|
||||||
|
|
||||||
POE_API_BASE_URL = os.getenv("POE_API_BASE_URL", "https://api.poe.com/v1").strip() or "https://api.poe.com/v1"
|
POE_API_BASE_URL = os.getenv("POE_API_BASE_URL", "https://api.poe.com/v1").strip() or "https://api.poe.com/v1"
|
||||||
POE_API_KEY = os.getenv("POE_API_KEY", "").strip()
|
POE_API_KEY = os.getenv("POE_API_KEY", "").strip()
|
||||||
@@ -163,6 +177,7 @@ _MEDIA_BIN_CACHE: dict[str, str] = {}
|
|||||||
# OpenAI 客户端(OpenAI 兼容网关,含 SKG ezlink)
|
# OpenAI 客户端(OpenAI 兼容网关,含 SKG ezlink)
|
||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
_llm_client: OpenAI | None = None
|
_llm_client: OpenAI | None = None
|
||||||
|
_image_client: OpenAI | None = None
|
||||||
def llm() -> OpenAI:
|
def llm() -> OpenAI:
|
||||||
global _llm_client
|
global _llm_client
|
||||||
if _llm_client is None:
|
if _llm_client is None:
|
||||||
@@ -171,6 +186,14 @@ def llm() -> OpenAI:
|
|||||||
_llm_client = OpenAI(base_url=LLM_BASE_URL or None, api_key=LLM_API_KEY)
|
_llm_client = OpenAI(base_url=LLM_BASE_URL or None, api_key=LLM_API_KEY)
|
||||||
return _llm_client
|
return _llm_client
|
||||||
|
|
||||||
|
def image_llm() -> OpenAI:
|
||||||
|
global _image_client
|
||||||
|
if _image_client is None:
|
||||||
|
if not IMAGE_API_KEY:
|
||||||
|
raise RuntimeError("IMAGE_API_KEY 或 LLM_API_KEY 未配置")
|
||||||
|
_image_client = OpenAI(base_url=IMAGE_BASE_URL or None, api_key=IMAGE_API_KEY)
|
||||||
|
return _image_client
|
||||||
|
|
||||||
# Pipeline 状态:
|
# Pipeline 状态:
|
||||||
# created → downloading → downloaded(前端“开始”会继续触发音频解析)
|
# created → downloading → downloaded(前端“开始”会继续触发音频解析)
|
||||||
# → splitting → frames_extracted
|
# → splitting → frames_extracted
|
||||||
@@ -2180,6 +2203,18 @@ def _choose_minimax_voice_id() -> str:
|
|||||||
return MINIMAX_TTS_VOICE_ID
|
return MINIMAX_TTS_VOICE_ID
|
||||||
|
|
||||||
|
|
||||||
|
def _choose_azure_voice_id() -> str:
|
||||||
|
if AZURE_TTS_VOICE_POOL:
|
||||||
|
return random.choice(AZURE_TTS_VOICE_POOL)
|
||||||
|
return AZURE_TTS_VOICE_ID
|
||||||
|
|
||||||
|
|
||||||
|
def _choose_tts_voice_id() -> str:
|
||||||
|
if VOICE_PROVIDER == "azure_openai":
|
||||||
|
return _choose_azure_voice_id()
|
||||||
|
return _choose_minimax_voice_id()
|
||||||
|
|
||||||
|
|
||||||
def _voice_speed_for(voice_id: str, target_seconds: float, text: str) -> float:
|
def _voice_speed_for(voice_id: str, target_seconds: float, text: str) -> float:
|
||||||
words = len([w for w in text.replace("\n", " ").split(" ") if w.strip()])
|
words = len([w for w in text.replace("\n", " ").split(" ") if w.strip()])
|
||||||
estimated_seconds = words / 2.35 if words else target_seconds
|
estimated_seconds = words / 2.35 if words else target_seconds
|
||||||
@@ -2241,17 +2276,71 @@ def _minimax_tts_sync(job_id: str, text: str, voice_id: str, target_seconds: flo
|
|||||||
return f"/jobs/{job_id}/audio-script.mp3"
|
return f"/jobs/{job_id}/audio-script.mp3"
|
||||||
|
|
||||||
|
|
||||||
|
def _azure_tts_url() -> str:
|
||||||
|
path = AZURE_TTS_PATH if AZURE_TTS_PATH.startswith("/") else f"/{AZURE_TTS_PATH}"
|
||||||
|
if AZURE_OPENAI_BASE_URL.endswith(path):
|
||||||
|
return AZURE_OPENAI_BASE_URL
|
||||||
|
return f"{AZURE_OPENAI_BASE_URL}{path}"
|
||||||
|
|
||||||
|
|
||||||
|
def _azure_openai_tts_sync(job_id: str, text: str, voice_id: str, target_seconds: float = 12.0) -> str:
|
||||||
|
if not AZURE_OPENAI_API_KEY:
|
||||||
|
raise RuntimeError("AZURE_OPENAI_API_KEY 或 LLM_API_KEY 未配置,未生成配音")
|
||||||
|
if not text.strip():
|
||||||
|
raise RuntimeError("改写文案为空,未生成配音")
|
||||||
|
payload = {
|
||||||
|
"model": AZURE_TTS_MODEL,
|
||||||
|
"voice": voice_id,
|
||||||
|
"input": text.strip()[:9500],
|
||||||
|
"response_format": "mp3",
|
||||||
|
"speed": _voice_speed_for(voice_id, target_seconds, text),
|
||||||
|
}
|
||||||
|
resp = httpx.post(
|
||||||
|
_azure_tts_url(),
|
||||||
|
headers={
|
||||||
|
"Authorization": f"Bearer {AZURE_OPENAI_API_KEY}",
|
||||||
|
"api-key": AZURE_OPENAI_API_KEY,
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
json=payload,
|
||||||
|
timeout=120,
|
||||||
|
)
|
||||||
|
if resp.status_code >= 400:
|
||||||
|
raise RuntimeError(f"Azure OpenAI TTS HTTP {resp.status_code}: {resp.text[:300]}")
|
||||||
|
audio_bytes = resp.content
|
||||||
|
if not audio_bytes:
|
||||||
|
raise RuntimeError("Azure OpenAI TTS 未返回音频内容")
|
||||||
|
content_type = resp.headers.get("content-type", "")
|
||||||
|
if "application/json" in content_type.lower():
|
||||||
|
try:
|
||||||
|
data = resp.json()
|
||||||
|
except Exception:
|
||||||
|
data = {"error": resp.text[:300]}
|
||||||
|
raise RuntimeError(f"Azure OpenAI TTS 返回 JSON 而不是音频:{str(data)[:300]}")
|
||||||
|
out = job_dir(job_id) / "audio_script.mp3"
|
||||||
|
out.write_bytes(audio_bytes)
|
||||||
|
return f"/jobs/{job_id}/audio-script.mp3"
|
||||||
|
|
||||||
|
|
||||||
|
def _tts_sync(job_id: str, text: str, voice_id: str, target_seconds: float = 12.0) -> tuple[str, str, str]:
|
||||||
|
if VOICE_PROVIDER == "azure_openai":
|
||||||
|
return _azure_openai_tts_sync(job_id, text, voice_id, target_seconds), "azure_openai", AZURE_TTS_MODEL
|
||||||
|
return _minimax_tts_sync(job_id, text, voice_id, target_seconds), "minimax", MINIMAX_TTS_MODEL
|
||||||
|
|
||||||
|
|
||||||
def _build_audio_script_sync(job_id: str, segments: list[TranscriptSegment], target_seconds: float = 12.0) -> AudioScript:
|
def _build_audio_script_sync(job_id: str, segments: list[TranscriptSegment], target_seconds: float = 12.0) -> AudioScript:
|
||||||
source_text = _transcript_join(segments, "en")
|
source_text = _transcript_join(segments, "en")
|
||||||
source_zh = _transcript_join(segments, "zh")
|
source_zh = _transcript_join(segments, "zh")
|
||||||
duration = max(float(target_seconds or 0), _segment_duration(segments), 4.0)
|
duration = max(float(target_seconds or 0), _segment_duration(segments), 4.0)
|
||||||
rewritten, rewrite_error = _rewrite_audio_script_sync(segments, duration)
|
rewritten, rewrite_error = _rewrite_audio_script_sync(segments, duration)
|
||||||
selected_voice_id = _choose_minimax_voice_id()
|
selected_voice_id = _choose_tts_voice_id()
|
||||||
speaker_profile, rhythm_profile = _audio_delivery_profile(segments, duration, selected_voice_id)
|
speaker_profile, rhythm_profile = _audio_delivery_profile(segments, duration, selected_voice_id)
|
||||||
voice_url = ""
|
voice_url = ""
|
||||||
voice_error = ""
|
voice_error = ""
|
||||||
|
voice_provider = "azure_openai" if VOICE_PROVIDER == "azure_openai" else "minimax"
|
||||||
|
voice_model = AZURE_TTS_MODEL if voice_provider == "azure_openai" else MINIMAX_TTS_MODEL
|
||||||
try:
|
try:
|
||||||
voice_url = _minimax_tts_sync(job_id, rewritten, selected_voice_id, duration)
|
voice_url, voice_provider, voice_model = _tts_sync(job_id, rewritten, selected_voice_id, duration)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
voice_error = str(e)
|
voice_error = str(e)
|
||||||
# 改写失败时已有本地 SKG 模板兜底,不把它标成用户可见错误;配音失败才需要提示。
|
# 改写失败时已有本地 SKG 模板兜底,不把它标成用户可见错误;配音失败才需要提示。
|
||||||
@@ -2265,8 +2354,8 @@ def _build_audio_script_sync(job_id: str, segments: list[TranscriptSegment], tar
|
|||||||
rhythm_profile=rhythm_profile,
|
rhythm_profile=rhythm_profile,
|
||||||
product_brief=AUDIO_PRODUCT_BRIEF,
|
product_brief=AUDIO_PRODUCT_BRIEF,
|
||||||
rewrite_model=AUDIO_REWRITE_MODEL,
|
rewrite_model=AUDIO_REWRITE_MODEL,
|
||||||
voice_provider="minimax",
|
voice_provider=voice_provider,
|
||||||
voice_model=MINIMAX_TTS_MODEL,
|
voice_model=voice_model,
|
||||||
voice_id=selected_voice_id,
|
voice_id=selected_voice_id,
|
||||||
voice_url=voice_url,
|
voice_url=voice_url,
|
||||||
error=errors,
|
error=errors,
|
||||||
@@ -2453,8 +2542,8 @@ def _image_edit_call(
|
|||||||
import time as _time
|
import time as _time
|
||||||
import httpx
|
import httpx
|
||||||
from PIL import Image as _PILImage
|
from PIL import Image as _PILImage
|
||||||
if not LLM_API_KEY:
|
if not IMAGE_API_KEY:
|
||||||
raise RuntimeError("LLM_API_KEY 未配置")
|
raise RuntimeError("IMAGE_API_KEY 或 LLM_API_KEY 未配置")
|
||||||
# model 优先级:models 列表 > 单个 model 参数 > IMAGE_MODEL
|
# model 优先级:models 列表 > 单个 model 参数 > IMAGE_MODEL
|
||||||
if models and len(models) > 0:
|
if models and len(models) > 0:
|
||||||
models_cycle = list(models)
|
models_cycle = list(models)
|
||||||
@@ -2489,9 +2578,9 @@ def _image_edit_call(
|
|||||||
if current_mode == "edit":
|
if current_mode == "edit":
|
||||||
with httpx.Client(timeout=120) as client:
|
with httpx.Client(timeout=120) as client:
|
||||||
r = client.post(
|
r = client.post(
|
||||||
f"{LLM_BASE_URL}/images/generations",
|
f"{IMAGE_BASE_URL}/images/generations",
|
||||||
headers={
|
headers={
|
||||||
"Authorization": f"Bearer {LLM_API_KEY}",
|
"Authorization": f"Bearer {IMAGE_API_KEY}",
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
},
|
},
|
||||||
json={"model": current_model, "prompt": prompt, "image": data_uri, "n": 1},
|
json={"model": current_model, "prompt": prompt, "image": data_uri, "n": 1},
|
||||||
@@ -2499,7 +2588,7 @@ def _image_edit_call(
|
|||||||
r.raise_for_status()
|
r.raise_for_status()
|
||||||
resp_data = r.json()
|
resp_data = r.json()
|
||||||
else:
|
else:
|
||||||
resp = llm().images.generate(model=current_model, prompt=prompt, n=1)
|
resp = image_llm().images.generate(model=current_model, prompt=prompt, n=1)
|
||||||
resp_data = resp.model_dump() if hasattr(resp, "model_dump") else {"data": [{"b64_json": resp.data[0].b64_json}]}
|
resp_data = resp.model_dump() if hasattr(resp, "model_dump") else {"data": [{"b64_json": resp.data[0].b64_json}]}
|
||||||
if resp_data.get("data"):
|
if resp_data.get("data"):
|
||||||
effective_mode = current_mode
|
effective_mode = current_mode
|
||||||
@@ -2542,15 +2631,15 @@ def _image_text_call(
|
|||||||
"""Text-only image generation with light model rotation."""
|
"""Text-only image generation with light model rotation."""
|
||||||
import base64 as b64lib
|
import base64 as b64lib
|
||||||
import time as _time
|
import time as _time
|
||||||
if not LLM_API_KEY:
|
if not IMAGE_API_KEY:
|
||||||
raise RuntimeError("LLM_API_KEY 未配置")
|
raise RuntimeError("IMAGE_API_KEY 或 LLM_API_KEY 未配置")
|
||||||
models_cycle = list(models) if models else [model or IMAGE_MODEL]
|
models_cycle = list(models) if models else [model or IMAGE_MODEL]
|
||||||
last_err = ""
|
last_err = ""
|
||||||
resp_data: dict = {}
|
resp_data: dict = {}
|
||||||
for attempt in range(max_attempts):
|
for attempt in range(max_attempts):
|
||||||
current_model = models_cycle[min(attempt, len(models_cycle) - 1)]
|
current_model = models_cycle[min(attempt, len(models_cycle) - 1)]
|
||||||
try:
|
try:
|
||||||
resp = llm().images.generate(model=current_model, prompt=prompt, n=1)
|
resp = image_llm().images.generate(model=current_model, prompt=prompt, n=1)
|
||||||
resp_data = resp.model_dump() if hasattr(resp, "model_dump") else {"data": [{"b64_json": resp.data[0].b64_json}]}
|
resp_data = resp.model_dump() if hasattr(resp, "model_dump") else {"data": [{"b64_json": resp.data[0].b64_json}]}
|
||||||
if resp_data.get("data"):
|
if resp_data.get("data"):
|
||||||
b64 = resp_data["data"][0].get("b64_json")
|
b64 = resp_data["data"][0].get("b64_json")
|
||||||
@@ -2752,6 +2841,8 @@ def health() -> dict:
|
|||||||
"llm_configured": bool(LLM_API_KEY),
|
"llm_configured": bool(LLM_API_KEY),
|
||||||
"auth_configured": WEB_AUTH_CONFIGURED,
|
"auth_configured": WEB_AUTH_CONFIGURED,
|
||||||
"base_url": LLM_BASE_URL or "openai-default",
|
"base_url": LLM_BASE_URL or "openai-default",
|
||||||
|
"image_base_url": IMAGE_BASE_URL or LLM_BASE_URL or "openai-default",
|
||||||
|
"voice_base_url": AZURE_OPENAI_BASE_URL if VOICE_PROVIDER == "azure_openai" else MINIMAX_TTS_BASE_URL,
|
||||||
"models": {
|
"models": {
|
||||||
"asr": ASR_MODEL,
|
"asr": ASR_MODEL,
|
||||||
"local_asr": LOCAL_ASR_MODEL,
|
"local_asr": LOCAL_ASR_MODEL,
|
||||||
@@ -2761,9 +2852,16 @@ def health() -> dict:
|
|||||||
"audio_rewrite": AUDIO_REWRITE_MODEL,
|
"audio_rewrite": AUDIO_REWRITE_MODEL,
|
||||||
"vision": VISION_MODEL,
|
"vision": VISION_MODEL,
|
||||||
"image": IMAGE_MODEL,
|
"image": IMAGE_MODEL,
|
||||||
"image_fallbacks": [IMAGE_MODEL, "gemini-3.1-flash-image-preview", "gemini-2.5-flash-image"],
|
"image_base_url": IMAGE_BASE_URL or LLM_BASE_URL or "openai-default",
|
||||||
|
"image_fallbacks": [IMAGE_MODEL, GPT_IMAGE_MODEL, "gpt-image-1.5"],
|
||||||
"subject_image": SUBJECT_ASSET_IMAGE_MODEL,
|
"subject_image": SUBJECT_ASSET_IMAGE_MODEL,
|
||||||
"subject_image_fallbacks": SUBJECT_ASSET_IMAGE_MODELS,
|
"subject_image_fallbacks": SUBJECT_ASSET_IMAGE_MODELS,
|
||||||
|
"voice_provider": VOICE_PROVIDER,
|
||||||
|
"voice_base_url": AZURE_OPENAI_BASE_URL if VOICE_PROVIDER == "azure_openai" else MINIMAX_TTS_BASE_URL,
|
||||||
|
"voice_tts": AZURE_TTS_MODEL if VOICE_PROVIDER == "azure_openai" else MINIMAX_TTS_MODEL,
|
||||||
|
"voice_id": AZURE_TTS_VOICE_ID if VOICE_PROVIDER == "azure_openai" else MINIMAX_TTS_VOICE_ID,
|
||||||
|
"voice_pool": AZURE_TTS_VOICE_POOL if VOICE_PROVIDER == "azure_openai" else (MINIMAX_TTS_VOICE_POOL or [MINIMAX_TTS_VOICE_ID]),
|
||||||
|
"voice_configured": bool(AZURE_OPENAI_API_KEY) if VOICE_PROVIDER == "azure_openai" else bool(MINIMAX_API_KEY),
|
||||||
"minimax_tts": MINIMAX_TTS_MODEL,
|
"minimax_tts": MINIMAX_TTS_MODEL,
|
||||||
"minimax_voice": MINIMAX_TTS_VOICE_ID,
|
"minimax_voice": MINIMAX_TTS_VOICE_ID,
|
||||||
"minimax_voice_pool": MINIMAX_TTS_VOICE_POOL or [MINIMAX_TTS_VOICE_ID],
|
"minimax_voice_pool": MINIMAX_TTS_VOICE_POOL or [MINIMAX_TTS_VOICE_ID],
|
||||||
@@ -3049,6 +3147,8 @@ def generate_image(job_id: str, idx: int, req: GenerateReq) -> Job:
|
|||||||
full_prompt = f"{full_prompt}. Avoid: {req.negative_prompt.strip()}"
|
full_prompt = f"{full_prompt}. Avoid: {req.negative_prompt.strip()}"
|
||||||
if not full_prompt:
|
if not full_prompt:
|
||||||
raise HTTPException(400, "prompt required")
|
raise HTTPException(400, "prompt required")
|
||||||
|
if not IMAGE_API_KEY:
|
||||||
|
raise HTTPException(503, "IMAGE_API_KEY 或 LLM_API_KEY 未配置")
|
||||||
|
|
||||||
model = req.model or IMAGE_MODEL
|
model = req.model or IMAGE_MODEL
|
||||||
gen_id = uuid.uuid4().hex[:12]
|
gen_id = uuid.uuid4().hex[:12]
|
||||||
@@ -3075,9 +3175,9 @@ def generate_image(job_id: str, idx: int, req: GenerateReq) -> Job:
|
|||||||
# OpenAI SDK 不直接支持 image 参数,用底层 httpx
|
# OpenAI SDK 不直接支持 image 参数,用底层 httpx
|
||||||
with httpx.Client(timeout=120) as client:
|
with httpx.Client(timeout=120) as client:
|
||||||
r = client.post(
|
r = client.post(
|
||||||
f"{LLM_BASE_URL}/images/generations",
|
f"{IMAGE_BASE_URL}/images/generations",
|
||||||
headers={
|
headers={
|
||||||
"Authorization": f"Bearer {LLM_API_KEY}",
|
"Authorization": f"Bearer {IMAGE_API_KEY}",
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
},
|
},
|
||||||
json={
|
json={
|
||||||
@@ -3091,7 +3191,7 @@ def generate_image(job_id: str, idx: int, req: GenerateReq) -> Job:
|
|||||||
resp_data = r.json()
|
resp_data = r.json()
|
||||||
else:
|
else:
|
||||||
# text-only
|
# text-only
|
||||||
resp = llm().images.generate(model=model, prompt=full_prompt, n=1)
|
resp = image_llm().images.generate(model=model, prompt=full_prompt, n=1)
|
||||||
resp_data = resp.model_dump() if hasattr(resp, "model_dump") else {"data": [{"b64_json": resp.data[0].b64_json}]}
|
resp_data = resp.model_dump() if hasattr(resp, "model_dump") else {"data": [{"b64_json": resp.data[0].b64_json}]}
|
||||||
|
|
||||||
if resp_data.get("data"):
|
if resp_data.get("data"):
|
||||||
|
|||||||
Reference in New Issue
Block a user