fix: route ai polish through available models
This commit is contained in:
3
RULES.md
3
RULES.md
@@ -150,7 +150,8 @@
|
|||||||
- `LOCAL_ASR_BIN` / `LOCAL_ASR_MODEL` / `LOCAL_ASR_TIMEOUT_SECONDS`:本机 ASR 兜底,默认使用 `/opt/homebrew/bin/mlx_whisper` + `mlx-community/whisper-tiny`,用于当前 SKG 网关 `/audio/transcriptions` 不可用时生成真实逐句时间轴
|
- `LOCAL_ASR_BIN` / `LOCAL_ASR_MODEL` / `LOCAL_ASR_TIMEOUT_SECONDS`:本机 ASR 兜底,默认使用 `/opt/homebrew/bin/mlx_whisper` + `mlx-community/whisper-tiny`,用于当前 SKG 网关 `/audio/transcriptions` 不可用时生成真实逐句时间轴
|
||||||
- `TRANSLATE_MODEL`:字幕翻译模型,默认 `gemini-2.5-flash`
|
- `TRANSLATE_MODEL`:字幕翻译模型,默认 `gemini-2.5-flash`
|
||||||
- `GPT_TEXT_MODEL`:GPT 文本 / 视觉默认模型,默认 `gpt-4o`;用于兜底修正旧 Gemini 覆盖值
|
- `GPT_TEXT_MODEL`:GPT 文本 / 视觉默认模型,默认 `gpt-4o`;用于兜底修正旧 Gemini 覆盖值
|
||||||
- `REWRITE_MODEL`:通用改写/分镜描述模型,默认 `gpt-4o`;如果旧环境仍写 `gemini-*`,后端会自动改用 `GPT_TEXT_MODEL`
|
- `REWRITE_MODEL`:通用改写/分镜描述主模型,当前用于 AI 润色时默认 `gpt-4o-mini`;如果主模型不可用,`/prompt/polish` 会继续尝试 `REWRITE_MODEL_FALLBACKS`
|
||||||
|
- `REWRITE_MODEL_FALLBACKS`:AI 润色备用模型列表,逗号分隔,默认 `gpt-4o-mini,gemini-2.5-flash`;只有全部失败时才允许返回本地模板 fallback
|
||||||
- `VISION_MODEL`:关键帧画面理解模型,默认 `gpt-4o`;如果旧环境仍写 `gemini-*`,后端会自动改用 `GPT_TEXT_MODEL`
|
- `VISION_MODEL`:关键帧画面理解模型,默认 `gpt-4o`;如果旧环境仍写 `gemini-*`,后端会自动改用 `GPT_TEXT_MODEL`
|
||||||
- `AUDIO_REWRITE_MODEL`:后续音频口播改写模型,默认跟随 `REWRITE_MODEL`;如果旧环境仍写 `gemini-*`,后端会自动改用 `REWRITE_MODEL`
|
- `AUDIO_REWRITE_MODEL`:后续音频口播改写模型,默认跟随 `REWRITE_MODEL`;如果旧环境仍写 `gemini-*`,后端会自动改用 `REWRITE_MODEL`
|
||||||
- `AUDIO_PRODUCT_BRIEF`:音频口播改写时注入的 SKG 产品卖点
|
- `AUDIO_PRODUCT_BRIEF`:音频口播改写时注入的 SKG 产品卖点
|
||||||
|
|||||||
@@ -30,7 +30,8 @@ LOCAL_ASR_MODEL=mlx-community/whisper-tiny
|
|||||||
LOCAL_ASR_TIMEOUT_SECONDS=180
|
LOCAL_ASR_TIMEOUT_SECONDS=180
|
||||||
TRANSLATE_MODEL=gemini-2.5-flash
|
TRANSLATE_MODEL=gemini-2.5-flash
|
||||||
GPT_TEXT_MODEL=gpt-4o
|
GPT_TEXT_MODEL=gpt-4o
|
||||||
REWRITE_MODEL=gpt-4o
|
REWRITE_MODEL=gpt-4o-mini
|
||||||
|
REWRITE_MODEL_FALLBACKS=gemini-2.5-flash
|
||||||
VISION_MODEL=gpt-4o
|
VISION_MODEL=gpt-4o
|
||||||
PRODUCT_VIEW_MODEL=gpt-image-2
|
PRODUCT_VIEW_MODEL=gpt-image-2
|
||||||
IMAGE_BASE_URL=https://ai.skg.com/ezlink/v1
|
IMAGE_BASE_URL=https://ai.skg.com/ezlink/v1
|
||||||
|
|||||||
64
api/main.py
64
api/main.py
@@ -107,6 +107,11 @@ def gpt_model_env(name: str, default: str | None = None) -> str:
|
|||||||
|
|
||||||
|
|
||||||
REWRITE_MODEL = gpt_model_env("REWRITE_MODEL")
|
REWRITE_MODEL = gpt_model_env("REWRITE_MODEL")
|
||||||
|
REWRITE_MODEL_FALLBACKS = [
|
||||||
|
model.strip()
|
||||||
|
for model in os.getenv("REWRITE_MODEL_FALLBACKS", "gpt-4o-mini,gemini-2.5-flash").split(",")
|
||||||
|
if model.strip()
|
||||||
|
]
|
||||||
VISION_MODEL = gpt_model_env("VISION_MODEL")
|
VISION_MODEL = gpt_model_env("VISION_MODEL")
|
||||||
IMAGE_BASE_URL = os.getenv("IMAGE_BASE_URL", LLM_BASE_URL).strip()
|
IMAGE_BASE_URL = os.getenv("IMAGE_BASE_URL", LLM_BASE_URL).strip()
|
||||||
IMAGE_API_KEY = os.getenv("IMAGE_API_KEY", LLM_API_KEY).strip()
|
IMAGE_API_KEY = os.getenv("IMAGE_API_KEY", LLM_API_KEY).strip()
|
||||||
@@ -5914,6 +5919,7 @@ class CreativeCopyResp(BaseModel):
|
|||||||
class PromptPolishReq(BaseModel):
|
class PromptPolishReq(BaseModel):
|
||||||
text: str
|
text: str
|
||||||
system_prompt: str = ""
|
system_prompt: str = ""
|
||||||
|
model: str = ""
|
||||||
mode: Literal["image", "video", "general", "chat"] = "image"
|
mode: Literal["image", "video", "general", "chat"] = "image"
|
||||||
target_language: Literal["en", "zh", "keep"] = "en"
|
target_language: Literal["en", "zh", "keep"] = "en"
|
||||||
|
|
||||||
@@ -6320,11 +6326,26 @@ def _prompt_polish_fallback(req: PromptPolishReq) -> PromptPolishResp:
|
|||||||
return PromptPolishResp(model="fallback", text=_sanitize_polished_prompt(req, intent, _basic_polished_prompt(req, intent)))
|
return PromptPolishResp(model="fallback", text=_sanitize_polished_prompt(req, intent, _basic_polished_prompt(req, intent)))
|
||||||
|
|
||||||
|
|
||||||
def _repair_polished_prompt(req: PromptPolishReq, intent: PromptIntent, output: str, *, allow_llm: bool = False) -> str:
|
def _prompt_polish_model_candidates(req: PromptPolishReq) -> list[str]:
|
||||||
|
requested = (req.model or "").strip()
|
||||||
|
candidates = [requested, REWRITE_MODEL, *REWRITE_MODEL_FALLBACKS]
|
||||||
|
seen: set[str] = set()
|
||||||
|
out: list[str] = []
|
||||||
|
for model in candidates:
|
||||||
|
model = model.strip()
|
||||||
|
key = model.lower()
|
||||||
|
if model and key not in seen:
|
||||||
|
out.append(model)
|
||||||
|
seen.add(key)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def _repair_polished_prompt(req: PromptPolishReq, intent: PromptIntent, output: str, *, allow_llm: bool = False, model: str | None = None) -> str:
|
||||||
out = _sanitize_polished_prompt(req, intent, output)
|
out = _sanitize_polished_prompt(req, intent, output)
|
||||||
issue = _polished_prompt_issue(intent, out)
|
issue = _polished_prompt_issue(intent, out)
|
||||||
if not issue or not allow_llm or not LLM_API_KEY:
|
if not issue or not allow_llm or not LLM_API_KEY:
|
||||||
return out
|
return out
|
||||||
|
repair_model = (model or REWRITE_MODEL).strip() or REWRITE_MODEL
|
||||||
repair_prompt = (
|
repair_prompt = (
|
||||||
"Repair the rewritten generation prompt so it follows the source input exactly.\n"
|
"Repair the rewritten generation prompt so it follows the source input exactly.\n"
|
||||||
f"Issue to fix: {issue}.\n"
|
f"Issue to fix: {issue}.\n"
|
||||||
@@ -6340,7 +6361,7 @@ def _repair_polished_prompt(req: PromptPolishReq, intent: PromptIntent, output:
|
|||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
resp = llm().chat.completions.create(
|
resp = llm().chat.completions.create(
|
||||||
model=REWRITE_MODEL,
|
model=repair_model,
|
||||||
messages=[
|
messages=[
|
||||||
{"role": "system", "content": "You repair generation prompts by removing contradictions and preserving only source intent."},
|
{"role": "system", "content": "You repair generation prompts by removing contradictions and preserving only source intent."},
|
||||||
{"role": "user", "content": repair_prompt},
|
{"role": "user", "content": repair_prompt},
|
||||||
@@ -6410,23 +6431,28 @@ def polish_prompt(req: PromptPolishReq) -> PromptPolishResp:
|
|||||||
prompt += f"\nUser-selected polishing guidance:\n{user_system[:1000]}\n"
|
prompt += f"\nUser-selected polishing guidance:\n{user_system[:1000]}\n"
|
||||||
prompt += f"\nSource input:\n{intent.cleaned_text[:2500]}"
|
prompt += f"\nSource input:\n{intent.cleaned_text[:2500]}"
|
||||||
|
|
||||||
try:
|
model_errors: list[str] = []
|
||||||
resp = llm().chat.completions.create(
|
for model in _prompt_polish_model_candidates(req):
|
||||||
model=REWRITE_MODEL,
|
try:
|
||||||
messages=[
|
resp = llm().chat.completions.create(
|
||||||
{"role": "system", "content": "You are a neutral professional prompt editor. Preserve source intent exactly and never inject SKG or unrelated brands, products, platforms, people, or marketing context."},
|
model=model,
|
||||||
{"role": "user", "content": prompt},
|
messages=[
|
||||||
],
|
{"role": "system", "content": "You are a neutral professional prompt editor. Preserve source intent exactly and never inject SKG or unrelated brands, products, platforms, people, or marketing context."},
|
||||||
temperature=0.45,
|
{"role": "user", "content": prompt},
|
||||||
max_tokens=900,
|
],
|
||||||
)
|
temperature=0.45,
|
||||||
out = _clean_prompt_output(resp.choices[0].message.content or "")
|
max_tokens=900,
|
||||||
if not out:
|
)
|
||||||
out = _prompt_polish_fallback(req).text
|
out = _clean_prompt_output(resp.choices[0].message.content or "")
|
||||||
return PromptPolishResp(model=REWRITE_MODEL, text=_repair_polished_prompt(req, intent, out, allow_llm=True))
|
if not out:
|
||||||
except Exception as e:
|
raise RuntimeError("empty prompt polish response")
|
||||||
print(f"[prompt polish fallback] {e}", flush=True)
|
return PromptPolishResp(model=model, text=_repair_polished_prompt(req, intent, out, allow_llm=True, model=model))
|
||||||
return _prompt_polish_fallback(req)
|
except Exception as e:
|
||||||
|
message = str(e).replace("\n", " ")[:400]
|
||||||
|
model_errors.append(f"{model}: {message}")
|
||||||
|
print(f"[prompt polish model fallback] model={model} error={message}", flush=True)
|
||||||
|
print(f"[prompt polish fallback] {' | '.join(model_errors)}", flush=True)
|
||||||
|
return _prompt_polish_fallback(req)
|
||||||
|
|
||||||
|
|
||||||
@app.post("/translate")
|
@app.post("/translate")
|
||||||
|
|||||||
@@ -54,7 +54,8 @@ AI_HTTP_PROXY=
|
|||||||
|
|
||||||
# Text/vision/audio model names
|
# Text/vision/audio model names
|
||||||
GPT_TEXT_MODEL=gpt-4o
|
GPT_TEXT_MODEL=gpt-4o
|
||||||
REWRITE_MODEL=gpt-4o
|
REWRITE_MODEL=gpt-4o-mini
|
||||||
|
REWRITE_MODEL_FALLBACKS=gemini-2.5-flash
|
||||||
VISION_MODEL=gpt-4o
|
VISION_MODEL=gpt-4o
|
||||||
TRANSLATE_MODEL=gemini-2.5-flash
|
TRANSLATE_MODEL=gemini-2.5-flash
|
||||||
ASR_BASE_URL=https://ai.skg.com/azure/v1
|
ASR_BASE_URL=https://ai.skg.com/azure/v1
|
||||||
|
|||||||
@@ -56,7 +56,8 @@ FASTER_WHISPER_DEVICE=cpu
|
|||||||
FASTER_WHISPER_COMPUTE_TYPE=int8
|
FASTER_WHISPER_COMPUTE_TYPE=int8
|
||||||
TRANSLATE_MODEL=gemini-2.5-flash
|
TRANSLATE_MODEL=gemini-2.5-flash
|
||||||
GPT_TEXT_MODEL=gpt-4o
|
GPT_TEXT_MODEL=gpt-4o
|
||||||
REWRITE_MODEL=gpt-4o
|
REWRITE_MODEL=gpt-4o-mini
|
||||||
|
REWRITE_MODEL_FALLBACKS=gemini-2.5-flash
|
||||||
VISION_MODEL=gpt-4o
|
VISION_MODEL=gpt-4o
|
||||||
PRODUCT_VIEW_MODEL=gpt-image-2
|
PRODUCT_VIEW_MODEL=gpt-image-2
|
||||||
IMAGE_BASE_URL=https://ai.skg.com/ezlink/v1
|
IMAGE_BASE_URL=https://ai.skg.com/ezlink/v1
|
||||||
|
|||||||
@@ -1417,6 +1417,19 @@ ProductRefStateItem {
|
|||||||
<p><strong>影响:</strong>Postgres 里的 <code>canvas_projects</code> 重新成为主存储;刷新、换浏览器或本地缓存异常时,不应再把服务端项目缩小或清空。旧项目首次迁移仍可用,但迁移动作变为非破坏性。</p>
|
<p><strong>影响:</strong>Postgres 里的 <code>canvas_projects</code> 重新成为主存储;刷新、换浏览器或本地缓存异常时,不应再把服务端项目缩小或清空。旧项目首次迁移仍可用,但迁移动作变为非破坏性。</p>
|
||||||
</div>
|
</div>
|
||||||
</article>
|
</article>
|
||||||
|
<article class="change">
|
||||||
|
<header>
|
||||||
|
<h3>2026-06-04 · AI 润色不再静默套模板</h3>
|
||||||
|
<span class="tag amber">API</span>
|
||||||
|
<span class="tag violet">Canvas</span>
|
||||||
|
<span class="tag cyan">Model</span>
|
||||||
|
</header>
|
||||||
|
<div class="body">
|
||||||
|
<p><strong>问题:</strong>当前网关分组对 <code>gpt-4o</code> 返回“无可用渠道”,而 <code>/prompt/polish</code> 捕获异常后直接返回本地 <code>fallback</code>,用户看到的是固定尾巴模板,不是真正的模型润色;同时前端 <code>useChat({ model: 'gpt-4o-mini' })</code> 没有把 <code>model</code> 发给后端,配置实际上未生效。</p>
|
||||||
|
<p><strong>改动:</strong><code>PromptPolishReq</code> 新增 <code>model</code> 字段,<code>web/canvas-app/src/hooks/useApi.js</code> 会把前端选择的模型传到 <code>/prompt/polish</code>;后端按“请求模型 → <code>REWRITE_MODEL</code> → <code>REWRITE_MODEL_FALLBACKS</code>”依次尝试,当前本地默认 <code>REWRITE_MODEL=gpt-4o-mini</code>、备用 <code>gemini-2.5-flash</code>。只有全部模型失败时才返回本地模板 <code>model=fallback</code>,并在日志里记录每个失败模型。</p>
|
||||||
|
<p><strong>影响:</strong>画布底部和文本节点的 AI 润色会优先走真实模型输出,不再把固定 “Clear main subject...” 或 “Cinematic motion...” 当作正常润色结果;如果未来网关主模型不可用,接口会自动降级到备用模型,而不是立刻套模板。</p>
|
||||||
|
</div>
|
||||||
|
</article>
|
||||||
<article class="change">
|
<article class="change">
|
||||||
<header>
|
<header>
|
||||||
<h3>2026-05-26 · AI 润色改为意图分类和冲突校验</h3>
|
<h3>2026-05-26 · AI 润色改为意图分类和冲突校验</h3>
|
||||||
|
|||||||
@@ -200,6 +200,7 @@ export const useChat = (options = {}) => {
|
|||||||
body: JSON.stringify({
|
body: JSON.stringify({
|
||||||
text: content,
|
text: content,
|
||||||
system_prompt: options.systemPrompt || '',
|
system_prompt: options.systemPrompt || '',
|
||||||
|
model: options.model || '',
|
||||||
mode,
|
mode,
|
||||||
target_language: options.targetLanguage || (mode === 'chat' ? 'keep' : 'en')
|
target_language: options.targetLanguage || (mode === 'chat' ? 'keep' : 'en')
|
||||||
})
|
})
|
||||||
|
|||||||
Reference in New Issue
Block a user