From 88d598303c9440536d9e9a5e35f57f6a046d3cae Mon Sep 17 00:00:00 2001 From: kang Date: Thu, 4 Jun 2026 10:13:31 +0800 Subject: [PATCH] fix: route ai polish through available models --- RULES.md | 3 +- api/.env.example | 3 +- api/main.py | 64 +++++++++++++++++++++--------- deploy/.env.local.example | 3 +- deploy/.env.production.example | 3 +- docs/source-analysis.html | 13 ++++++ web/canvas-app/src/hooks/useApi.js | 1 + 7 files changed, 67 insertions(+), 23 deletions(-) diff --git a/RULES.md b/RULES.md index 9eb7bdb..b6ff9b8 100644 --- a/RULES.md +++ b/RULES.md @@ -150,7 +150,8 @@ - `LOCAL_ASR_BIN` / `LOCAL_ASR_MODEL` / `LOCAL_ASR_TIMEOUT_SECONDS`:本机 ASR 兜底,默认使用 `/opt/homebrew/bin/mlx_whisper` + `mlx-community/whisper-tiny`,用于当前 SKG 网关 `/audio/transcriptions` 不可用时生成真实逐句时间轴 - `TRANSLATE_MODEL`:字幕翻译模型,默认 `gemini-2.5-flash` - `GPT_TEXT_MODEL`:GPT 文本 / 视觉默认模型,默认 `gpt-4o`;用于兜底修正旧 Gemini 覆盖值 -- `REWRITE_MODEL`:通用改写/分镜描述模型,默认 `gpt-4o`;如果旧环境仍写 `gemini-*`,后端会自动改用 `GPT_TEXT_MODEL` +- `REWRITE_MODEL`:通用改写/分镜描述主模型,当前用于 AI 润色时默认 `gpt-4o-mini`;如果主模型不可用,`/prompt/polish` 会继续尝试 `REWRITE_MODEL_FALLBACKS` +- `REWRITE_MODEL_FALLBACKS`:AI 润色备用模型列表,逗号分隔,默认 `gpt-4o-mini,gemini-2.5-flash`;只有全部失败时才允许返回本地模板 fallback - `VISION_MODEL`:关键帧画面理解模型,默认 `gpt-4o`;如果旧环境仍写 `gemini-*`,后端会自动改用 `GPT_TEXT_MODEL` - `AUDIO_REWRITE_MODEL`:后续音频口播改写模型,默认跟随 `REWRITE_MODEL`;如果旧环境仍写 `gemini-*`,后端会自动改用 `REWRITE_MODEL` - `AUDIO_PRODUCT_BRIEF`:音频口播改写时注入的 SKG 产品卖点 diff --git a/api/.env.example b/api/.env.example index 4392222..91654b4 100644 --- a/api/.env.example +++ b/api/.env.example @@ -30,7 +30,8 @@ LOCAL_ASR_MODEL=mlx-community/whisper-tiny LOCAL_ASR_TIMEOUT_SECONDS=180 TRANSLATE_MODEL=gemini-2.5-flash GPT_TEXT_MODEL=gpt-4o -REWRITE_MODEL=gpt-4o +REWRITE_MODEL=gpt-4o-mini +REWRITE_MODEL_FALLBACKS=gemini-2.5-flash VISION_MODEL=gpt-4o PRODUCT_VIEW_MODEL=gpt-image-2 IMAGE_BASE_URL=https://ai.skg.com/ezlink/v1 diff --git a/api/main.py b/api/main.py index 2d5952a..3831680 100644 --- a/api/main.py +++ b/api/main.py @@ -107,6 +107,11 @@ def gpt_model_env(name: str, default: str | None = None) -> str: REWRITE_MODEL = gpt_model_env("REWRITE_MODEL") +REWRITE_MODEL_FALLBACKS = [ + model.strip() + for model in os.getenv("REWRITE_MODEL_FALLBACKS", "gpt-4o-mini,gemini-2.5-flash").split(",") + if model.strip() +] VISION_MODEL = gpt_model_env("VISION_MODEL") IMAGE_BASE_URL = os.getenv("IMAGE_BASE_URL", LLM_BASE_URL).strip() IMAGE_API_KEY = os.getenv("IMAGE_API_KEY", LLM_API_KEY).strip() @@ -5914,6 +5919,7 @@ class CreativeCopyResp(BaseModel): class PromptPolishReq(BaseModel): text: str system_prompt: str = "" + model: str = "" mode: Literal["image", "video", "general", "chat"] = "image" target_language: Literal["en", "zh", "keep"] = "en" @@ -6320,11 +6326,26 @@ def _prompt_polish_fallback(req: PromptPolishReq) -> PromptPolishResp: return PromptPolishResp(model="fallback", text=_sanitize_polished_prompt(req, intent, _basic_polished_prompt(req, intent))) -def _repair_polished_prompt(req: PromptPolishReq, intent: PromptIntent, output: str, *, allow_llm: bool = False) -> str: +def _prompt_polish_model_candidates(req: PromptPolishReq) -> list[str]: + requested = (req.model or "").strip() + candidates = [requested, REWRITE_MODEL, *REWRITE_MODEL_FALLBACKS] + seen: set[str] = set() + out: list[str] = [] + for model in candidates: + model = model.strip() + key = model.lower() + if model and key not in seen: + out.append(model) + seen.add(key) + return out + + +def _repair_polished_prompt(req: PromptPolishReq, intent: PromptIntent, output: str, *, allow_llm: bool = False, model: str | None = None) -> str: out = _sanitize_polished_prompt(req, intent, output) issue = _polished_prompt_issue(intent, out) if not issue or not allow_llm or not LLM_API_KEY: return out + repair_model = (model or REWRITE_MODEL).strip() or REWRITE_MODEL repair_prompt = ( "Repair the rewritten generation prompt so it follows the source input exactly.\n" f"Issue to fix: {issue}.\n" @@ -6340,7 +6361,7 @@ def _repair_polished_prompt(req: PromptPolishReq, intent: PromptIntent, output: ) try: resp = llm().chat.completions.create( - model=REWRITE_MODEL, + model=repair_model, messages=[ {"role": "system", "content": "You repair generation prompts by removing contradictions and preserving only source intent."}, {"role": "user", "content": repair_prompt}, @@ -6410,23 +6431,28 @@ def polish_prompt(req: PromptPolishReq) -> PromptPolishResp: prompt += f"\nUser-selected polishing guidance:\n{user_system[:1000]}\n" prompt += f"\nSource input:\n{intent.cleaned_text[:2500]}" - try: - resp = llm().chat.completions.create( - model=REWRITE_MODEL, - messages=[ - {"role": "system", "content": "You are a neutral professional prompt editor. Preserve source intent exactly and never inject SKG or unrelated brands, products, platforms, people, or marketing context."}, - {"role": "user", "content": prompt}, - ], - temperature=0.45, - max_tokens=900, - ) - out = _clean_prompt_output(resp.choices[0].message.content or "") - if not out: - out = _prompt_polish_fallback(req).text - return PromptPolishResp(model=REWRITE_MODEL, text=_repair_polished_prompt(req, intent, out, allow_llm=True)) - except Exception as e: - print(f"[prompt polish fallback] {e}", flush=True) - return _prompt_polish_fallback(req) + model_errors: list[str] = [] + for model in _prompt_polish_model_candidates(req): + try: + resp = llm().chat.completions.create( + model=model, + messages=[ + {"role": "system", "content": "You are a neutral professional prompt editor. Preserve source intent exactly and never inject SKG or unrelated brands, products, platforms, people, or marketing context."}, + {"role": "user", "content": prompt}, + ], + temperature=0.45, + max_tokens=900, + ) + out = _clean_prompt_output(resp.choices[0].message.content or "") + if not out: + raise RuntimeError("empty prompt polish response") + return PromptPolishResp(model=model, text=_repair_polished_prompt(req, intent, out, allow_llm=True, model=model)) + except Exception as e: + message = str(e).replace("\n", " ")[:400] + model_errors.append(f"{model}: {message}") + print(f"[prompt polish model fallback] model={model} error={message}", flush=True) + print(f"[prompt polish fallback] {' | '.join(model_errors)}", flush=True) + return _prompt_polish_fallback(req) @app.post("/translate") diff --git a/deploy/.env.local.example b/deploy/.env.local.example index 626dfc9..6a77223 100644 --- a/deploy/.env.local.example +++ b/deploy/.env.local.example @@ -54,7 +54,8 @@ AI_HTTP_PROXY= # Text/vision/audio model names GPT_TEXT_MODEL=gpt-4o -REWRITE_MODEL=gpt-4o +REWRITE_MODEL=gpt-4o-mini +REWRITE_MODEL_FALLBACKS=gemini-2.5-flash VISION_MODEL=gpt-4o TRANSLATE_MODEL=gemini-2.5-flash ASR_BASE_URL=https://ai.skg.com/azure/v1 diff --git a/deploy/.env.production.example b/deploy/.env.production.example index 412329d..70e8b8c 100644 --- a/deploy/.env.production.example +++ b/deploy/.env.production.example @@ -56,7 +56,8 @@ FASTER_WHISPER_DEVICE=cpu FASTER_WHISPER_COMPUTE_TYPE=int8 TRANSLATE_MODEL=gemini-2.5-flash GPT_TEXT_MODEL=gpt-4o -REWRITE_MODEL=gpt-4o +REWRITE_MODEL=gpt-4o-mini +REWRITE_MODEL_FALLBACKS=gemini-2.5-flash VISION_MODEL=gpt-4o PRODUCT_VIEW_MODEL=gpt-image-2 IMAGE_BASE_URL=https://ai.skg.com/ezlink/v1 diff --git a/docs/source-analysis.html b/docs/source-analysis.html index f0ddd6c..b52ee15 100644 --- a/docs/source-analysis.html +++ b/docs/source-analysis.html @@ -1417,6 +1417,19 @@ ProductRefStateItem {

影响:Postgres 里的 canvas_projects 重新成为主存储;刷新、换浏览器或本地缓存异常时,不应再把服务端项目缩小或清空。旧项目首次迁移仍可用,但迁移动作变为非破坏性。

+
+
+

2026-06-04 · AI 润色不再静默套模板

+ API + Canvas + Model +
+
+

问题:当前网关分组对 gpt-4o 返回“无可用渠道”,而 /prompt/polish 捕获异常后直接返回本地 fallback,用户看到的是固定尾巴模板,不是真正的模型润色;同时前端 useChat({ model: 'gpt-4o-mini' }) 没有把 model 发给后端,配置实际上未生效。

+

改动:PromptPolishReq 新增 model 字段,web/canvas-app/src/hooks/useApi.js 会把前端选择的模型传到 /prompt/polish;后端按“请求模型 → REWRITE_MODELREWRITE_MODEL_FALLBACKS”依次尝试,当前本地默认 REWRITE_MODEL=gpt-4o-mini、备用 gemini-2.5-flash。只有全部模型失败时才返回本地模板 model=fallback,并在日志里记录每个失败模型。

+

影响:画布底部和文本节点的 AI 润色会优先走真实模型输出,不再把固定 “Clear main subject...” 或 “Cinematic motion...” 当作正常润色结果;如果未来网关主模型不可用,接口会自动降级到备用模型,而不是立刻套模板。

+
+

2026-05-26 · AI 润色改为意图分类和冲突校验

diff --git a/web/canvas-app/src/hooks/useApi.js b/web/canvas-app/src/hooks/useApi.js index dab3cd0..acec881 100644 --- a/web/canvas-app/src/hooks/useApi.js +++ b/web/canvas-app/src/hooks/useApi.js @@ -200,6 +200,7 @@ export const useChat = (options = {}) => { body: JSON.stringify({ text: content, system_prompt: options.systemPrompt || '', + model: options.model || '', mode, target_language: options.targetLanguage || (mode === 'chat' ? 'keep' : 'en') })