fix: make AI polish intent-aware
This commit is contained in:
2
RULES.md
2
RULES.md
@@ -12,7 +12,7 @@
|
||||
- 详见 `CLAUDE.md` 立项决策段 + `.memory/plan.md` 七步管线拆解
|
||||
- 风格:`04-Dark-Gallery-Ambient`(路径:`~/Projects/research/20260305-网页风格库/04-Dark-Gallery-Ambient.md`)
|
||||
- 第一冲刺:步骤 1-4(下载 / 拆轨 / 关键帧 / ASR+翻译)
|
||||
- 当前产品方向(2026-05-26 Postgres 持久化版):默认入口是多人通用的 SKG 营销内容生产平台,`https://marketing.skg.com` 登录后直接进入个人生成画布,`/canvas/` 只作为旧链接兼容跳转到根域名。终端可见品牌位只放 SKG logo,不在主界面展示“生图生视频”“SKG 生成画布”或长系统名。画布本体尽量恢复 `chatfire-AI/huobao-canvas` 的成熟交互,不再削成三模式单输入框:保留首页推荐词、画布底部推荐词、AI 润色、自动执行、公共工作流、我的工作流、首帧/尾帧/参考图节点、图片/视频/LLM 配置节点、模型配置和批量下载等上游能力;多角度分镜、故事板、图转视频、绘本等工作流按上游结构创建节点。API 接入是例外:生成调用继续走本项目后端 `/api` 和当前登录 Cookie,不要求员工在浏览器配置个人 API Key;图片/视频模型选择只显示后端已经接通的媒体模型,不能让浏览器本地自定义或旧缓存模型进入生成下拉。API 设置弹窗只保留模型/端点配置外观,不能出现上游注册链接或外部品牌。用户登录后仍只看到自己的任务、结果、详情页、画布项目和个人工作流模板,继续沿用后端 owner 隔离;画布项目和我的工作流以服务端 Postgres 为主持久化,浏览器 `localStorage` 只作为项目缓存和首次导入来源,图片/视频资产按登录用户写入后端 job。旧 TK 复刻工作台、Agent Cut 一键出片和营销图文方案保留为高级/详情页能力,不再作为默认首页入口或默认理解框架。
|
||||
- 当前产品方向(2026-05-26 Postgres 持久化版):默认入口是多人通用的 SKG 营销内容生产平台,`https://marketing.skg.com` 登录后直接进入个人生成画布,`/canvas/` 只作为旧链接兼容跳转到根域名。终端可见品牌位只放 SKG logo,不在主界面展示“生图生视频”“SKG 生成画布”或长系统名。画布本体尽量恢复 `chatfire-AI/huobao-canvas` 的成熟交互,不再削成三模式单输入框:保留首页推荐词、画布底部推荐词、AI 润色、自动执行、公共工作流、我的工作流、首帧/尾帧/参考图节点、图片/视频/LLM 配置节点、模型配置和批量下载等上游能力;多角度分镜、故事板、图转视频、绘本等工作流按上游结构创建节点。API 接入是例外:生成调用继续走本项目后端 `/api` 和当前登录 Cookie,不要求员工在浏览器配置个人 API Key;AI 润色只扩写用户明确写出的主体、品牌、产品、平台、动作和镜头,用户没写 `SKG` 时绝不主动加入 SKG,也不能把未知主体润成人物或强行润成无人物;上传/生成的参考图如果本来就有人物,应在视频提示词里按 AI 生成的虚拟角色、非真人、非公众人物处理,继续允许 AI 人像素材参与图生视频;图片/视频模型选择只显示后端已经接通的媒体模型,不能让浏览器本地自定义或旧缓存模型进入生成下拉。API 设置弹窗只保留模型/端点配置外观,不能出现上游注册链接或外部品牌。用户登录后仍只看到自己的任务、结果、详情页、画布项目和个人工作流模板,继续沿用后端 owner 隔离;画布项目和我的工作流以服务端 Postgres 为主持久化,浏览器 `localStorage` 只作为项目缓存和首次导入来源,图片/视频资产按登录用户写入后端 job。旧 TK 复刻工作台、Agent Cut 一键出片和营销图文方案保留为高级/详情页能力,不再作为默认首页入口或默认理解框架。
|
||||
|
||||
## 部署事实
|
||||
- 平台:VPS `76.13.31.179`(Ubuntu 24.04 / Docker Compose / Coolify Traefik)
|
||||
|
||||
352
api/main.py
352
api/main.py
@@ -5517,55 +5517,282 @@ _PERSON_INTENT_RE = re.compile(
|
||||
re.I,
|
||||
)
|
||||
|
||||
_OBJECT_INTENT_RE = re.compile(
|
||||
r"("
|
||||
r"产品|商品|物体|物件|道具|设备|机器|仪器|建筑|房子|屋顶|椅子|桌子|汽车|飞船|"
|
||||
r"\b(?:product|object|item|device|machine|gadget|building|house|roof|chair|table|car|vehicle|spaceship)\b"
|
||||
r")",
|
||||
re.I,
|
||||
)
|
||||
|
||||
def _prompt_has_person_intent(*parts: str) -> bool:
|
||||
text = "\n".join(part for part in parts if part).strip()
|
||||
if not text or _NO_PERSON_INTENT_RE.search(text):
|
||||
return False
|
||||
return bool(_PERSON_INTENT_RE.search(text))
|
||||
_SCENE_INTENT_RE = re.compile(
|
||||
r"("
|
||||
r"场景|街道|房间|室内|室外|空间|城市|森林|海边|天空|太空|夜景|摊位|"
|
||||
r"\b(?:scene|street|room|interior|exterior|space|city|forest|beach|sky|night|stall|booth)\b"
|
||||
r")",
|
||||
re.I,
|
||||
)
|
||||
|
||||
_ANIMAL_INTENT_RE = re.compile(
|
||||
r"("
|
||||
r"动物|猫|狗|鸟|马|鱼|龙|"
|
||||
r"\b(?:animal|cat|dog|bird|horse|fish|dragon)\b"
|
||||
r")",
|
||||
re.I,
|
||||
)
|
||||
|
||||
_SKG_RE = re.compile(r"\bskg\b", re.I)
|
||||
|
||||
_NO_PERSON_CLAUSE_RE = re.compile(
|
||||
r"\b(?:no|without)\s+people[^.。!?!?]*(?:[.。!?!?]|$)|"
|
||||
r"\bdo\s+not\s+(?:introduce|add|include)\s+people[^.。!?!?]*(?:[.。!?!?]|$)",
|
||||
re.I,
|
||||
)
|
||||
|
||||
_PREVIOUS_POLISH_BOILERPLATE_PATTERNS = [
|
||||
re.compile(
|
||||
r"\bDetailed visual prompt,\s*clear main subject,\s*coherent composition,\s*"
|
||||
r"natural lighting,\s*refined color palette,\s*high-quality details\.?",
|
||||
re.I,
|
||||
),
|
||||
re.compile(
|
||||
r"\bSmooth camera movement,\s*clear subject continuity,\s*stable composition,\s*"
|
||||
r"natural motion,\s*coherent lighting,\s*no subtitles,\s*no watermark\.?",
|
||||
re.I,
|
||||
),
|
||||
re.compile(
|
||||
r"\bPreserve the original object-only,\s*scene-only,\s*or product-only composition;?\s*"
|
||||
r"do not introduce people,\s*faces,\s*bodies,\s*hands,\s*avatars,\s*characters,\s*"
|
||||
r"crowds,\s*bystanders,\s*or human silhouettes\.?",
|
||||
re.I,
|
||||
),
|
||||
re.compile(
|
||||
r"\bUse a fully fictional synthetic AI character,\s*not based on any real person,\s*"
|
||||
r"celebrity,\s*public figure,\s*or identifiable private individual\.?",
|
||||
re.I,
|
||||
),
|
||||
re.compile(
|
||||
r"\bNot based on any real person,\s*celebrity,\s*public figure,\s*or identifiable private individual\.?",
|
||||
re.I,
|
||||
),
|
||||
re.compile(
|
||||
r"\bThe subject is a fictional synthetic AI character,\s*not based on any real person\.?",
|
||||
re.I,
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def _prompt_person_guard(req: PromptPolishReq) -> str:
|
||||
if req.mode not in {"image", "video", "general"}:
|
||||
@dataclass(frozen=True)
|
||||
class PromptIntent:
|
||||
raw_text: str
|
||||
cleaned_text: str
|
||||
person_requested: bool
|
||||
no_person_requested: bool
|
||||
subject_kind: str
|
||||
skg_requested: bool
|
||||
|
||||
|
||||
def _strip_previous_polish_boilerplate(text: str) -> str:
|
||||
raw = (text or "").strip()
|
||||
if not raw:
|
||||
return ""
|
||||
if _prompt_has_person_intent(req.text, req.system_prompt):
|
||||
return (
|
||||
"The user requested a person, portrait, model, or character subject. "
|
||||
"Describe any such subject as a fully fictional synthetic AI character or virtual avatar, "
|
||||
"not based on any real person, celebrity, public figure, or identifiable private individual. "
|
||||
"Avoid real-person likeness, biometric identity, endorsement, or impersonation.\n"
|
||||
)
|
||||
return (
|
||||
"The user did not request a person or character subject. Preserve the original object-only, "
|
||||
"scene-only, or product-only composition. Do not introduce people, faces, bodies, hands, "
|
||||
"avatars, characters, crowds, bystanders, or human silhouettes.\n"
|
||||
cleaned = raw
|
||||
for pattern in _PREVIOUS_POLISH_BOILERPLATE_PATTERNS:
|
||||
cleaned = pattern.sub(" ", cleaned)
|
||||
cleaned = re.sub(r"\s+", " ", cleaned).strip()
|
||||
cleaned = re.sub(r"\s+([,.;:!?])", r"\1", cleaned)
|
||||
cleaned = re.sub(r"(?:[.。]\s*){2,}", ". ", cleaned)
|
||||
cleaned = re.sub(r"^[,.;:!?,。!?\s]+", "", cleaned)
|
||||
cleaned = re.sub(r"[,;:,;:\s]+$", "", cleaned)
|
||||
return cleaned or raw
|
||||
|
||||
|
||||
def _classify_prompt_intent(text: str) -> PromptIntent:
|
||||
cleaned = _strip_previous_polish_boilerplate(text)
|
||||
person_requested = bool(_PERSON_INTENT_RE.search(cleaned))
|
||||
no_person_requested = bool(_NO_PERSON_INTENT_RE.search(cleaned)) and not person_requested
|
||||
if person_requested:
|
||||
subject_kind = "person"
|
||||
elif _ANIMAL_INTENT_RE.search(cleaned):
|
||||
subject_kind = "animal"
|
||||
elif _OBJECT_INTENT_RE.search(cleaned):
|
||||
subject_kind = "object"
|
||||
elif _SCENE_INTENT_RE.search(cleaned) or no_person_requested:
|
||||
subject_kind = "scene"
|
||||
else:
|
||||
subject_kind = "unknown"
|
||||
return PromptIntent(
|
||||
raw_text=text,
|
||||
cleaned_text=cleaned,
|
||||
person_requested=person_requested,
|
||||
no_person_requested=no_person_requested,
|
||||
subject_kind=subject_kind,
|
||||
skg_requested=bool(_SKG_RE.search(cleaned)),
|
||||
)
|
||||
|
||||
|
||||
def _prompt_polish_fallback(req: PromptPolishReq) -> PromptPolishResp:
|
||||
text = req.text.strip()
|
||||
base = _ensure_english(text) if req.target_language == "en" else text
|
||||
def _remove_no_person_phrases(text: str) -> str:
|
||||
return _NO_PERSON_INTENT_RE.sub(" ", _NO_PERSON_CLAUSE_RE.sub(" ", text or ""))
|
||||
|
||||
|
||||
def _output_mentions_person_subject(text: str) -> bool:
|
||||
return bool(_PERSON_INTENT_RE.search(_remove_no_person_phrases(text or "")))
|
||||
|
||||
|
||||
def _clean_prompt_output(text: str) -> str:
|
||||
out = (text or "").strip()
|
||||
out = re.sub(r"^```(?:text)?\s*", "", out, flags=re.I).strip()
|
||||
out = re.sub(r"\s*```$", "", out).strip()
|
||||
out = re.sub(r'^[\'"「『]+|[\'"」』]+$', "", out).strip()
|
||||
out = _strip_previous_polish_boilerplate(out)
|
||||
return out
|
||||
|
||||
|
||||
def _ensure_fictional_person_subject(text: str) -> str:
|
||||
out = (text or "").strip()
|
||||
if not out:
|
||||
return out
|
||||
if re.search(r"\b(?:fictional|synthetic|virtual avatar|AI character|not based on any real person)\b", out, re.I):
|
||||
return out
|
||||
out = re.sub(
|
||||
r"\b(?:a|an|the)?\s*(?:person|human|model|woman|man|girl|boy|actor|actress|character|avatar)\b",
|
||||
"a fully fictional synthetic AI character",
|
||||
out,
|
||||
count=1,
|
||||
flags=re.I,
|
||||
)
|
||||
if not re.search(r"\b(?:fictional|synthetic|virtual avatar|AI character)\b", out, re.I):
|
||||
out = f"{out}. The subject is a fictional synthetic AI character, not based on any real person."
|
||||
return out
|
||||
|
||||
|
||||
def _basic_polished_prompt(req: PromptPolishReq, intent: PromptIntent) -> str:
|
||||
base = intent.cleaned_text or req.text.strip()
|
||||
base = _ensure_english(base) if req.target_language == "en" else base
|
||||
base = re.sub(r"\s+", " ", base).strip()
|
||||
base = re.sub(r"[。.!!??]+$", "", base).strip()
|
||||
person_intent = _prompt_has_person_intent(req.text, req.system_prompt)
|
||||
person_guard = (
|
||||
" Use a fully fictional synthetic AI character, not based on any real person, celebrity, public figure, or identifiable private individual."
|
||||
if person_intent
|
||||
else " Preserve the original object-only, scene-only, or product-only composition; do not introduce people, faces, bodies, hands, avatars, characters, crowds, bystanders, or human silhouettes."
|
||||
)
|
||||
if intent.person_requested:
|
||||
base = _ensure_fictional_person_subject(base)
|
||||
if req.mode == "video":
|
||||
polished = (
|
||||
f"{base}. Smooth camera movement, clear subject continuity, stable composition, "
|
||||
f"natural motion, coherent lighting, no subtitles, no watermark.{person_guard}"
|
||||
f"{base}. Cinematic motion, clear subject continuity, coherent camera movement, "
|
||||
"natural lighting transition, stable composition, detailed environmental interaction, "
|
||||
"no subtitles, no watermark."
|
||||
)
|
||||
elif req.mode in {"general", "chat"}:
|
||||
polished = base
|
||||
else:
|
||||
polished = (
|
||||
f"{base}. Detailed visual prompt, clear main subject, coherent composition, "
|
||||
f"natural lighting, refined color palette, high-quality details.{person_guard}"
|
||||
f"{base}. Clear main subject, coherent composition, natural lighting, refined color palette, "
|
||||
"detailed textures, cinematic camera framing, high-quality visual detail."
|
||||
)
|
||||
return PromptPolishResp(model="fallback", text=polished[:1800])
|
||||
if intent.no_person_requested:
|
||||
polished = f"{polished} No people, faces, bodies, hands, crowds, or human silhouettes."
|
||||
return re.sub(r"\s+", " ", polished).strip()
|
||||
|
||||
|
||||
def _polished_prompt_issue(intent: PromptIntent, output: str) -> str:
|
||||
out = output or ""
|
||||
if not intent.skg_requested and _SKG_RE.search(out):
|
||||
return "introduced SKG without user input"
|
||||
if intent.person_requested and _NO_PERSON_INTENT_RE.search(out):
|
||||
return "person prompt contains a no-person prohibition"
|
||||
if not intent.no_person_requested and _NO_PERSON_INTENT_RE.search(out):
|
||||
return "added a no-person prohibition that the user did not request"
|
||||
if not intent.person_requested and _output_mentions_person_subject(out):
|
||||
return "introduced a person or character subject that the user did not request"
|
||||
return ""
|
||||
|
||||
|
||||
def _sanitize_polished_prompt(req: PromptPolishReq, intent: PromptIntent, output: str) -> str:
|
||||
out = _clean_prompt_output(output)
|
||||
if not out:
|
||||
out = _basic_polished_prompt(req, intent)
|
||||
if not intent.skg_requested:
|
||||
out = re.sub(r"\bSKG\b[-\s]*(?:branded|brand|product|device|campaign|ad)?", "", out, flags=re.I)
|
||||
if intent.person_requested:
|
||||
out = _remove_no_person_phrases(out)
|
||||
out = _ensure_fictional_person_subject(out)
|
||||
elif intent.no_person_requested:
|
||||
if _output_mentions_person_subject(out):
|
||||
out = _basic_polished_prompt(req, intent)
|
||||
else:
|
||||
out = _remove_no_person_phrases(out)
|
||||
if _output_mentions_person_subject(out):
|
||||
out = _basic_polished_prompt(req, intent)
|
||||
out = re.sub(r"\s+", " ", out).strip()
|
||||
out = re.sub(r"\s+([,.;:!?])", r"\1", out)
|
||||
if out and "a" <= out[0] <= "z":
|
||||
out = out[0].upper() + out[1:]
|
||||
return out[:1800]
|
||||
|
||||
|
||||
def _prompt_has_person_intent(*parts: str) -> bool:
|
||||
text = "\n".join(part for part in parts if part).strip()
|
||||
return _classify_prompt_intent(text).person_requested
|
||||
|
||||
|
||||
def _prompt_person_guard(req: PromptPolishReq) -> str:
|
||||
if req.mode not in {"image", "video", "general"}:
|
||||
return ""
|
||||
intent = _classify_prompt_intent(req.text)
|
||||
if intent.person_requested:
|
||||
return (
|
||||
"The input explicitly requests a person, portrait, model, or character subject. "
|
||||
"Keep that subject, but describe them as a fully fictional synthetic AI character or virtual avatar. "
|
||||
"Do not imply a real person, celebrity, public figure, private individual, endorsement, or copied likeness.\n"
|
||||
)
|
||||
if intent.no_person_requested:
|
||||
return (
|
||||
"The input explicitly requests a no-person composition. Keep the output free of people, faces, bodies, "
|
||||
"hands, avatars, characters, crowds, bystanders, and human silhouettes.\n"
|
||||
)
|
||||
return (
|
||||
"The input does not explicitly request a person or character. Do not force a person into the rewrite, "
|
||||
"and do not add a no-person prohibition. If the source input or an attached/reference image already contains "
|
||||
"a person or character, preserve that visible subject conditionally as a fictional AI-generated synthetic "
|
||||
"character; otherwise use neutral wording such as 'main subject' when the subject is unclear.\n"
|
||||
)
|
||||
|
||||
|
||||
def _prompt_polish_fallback(req: PromptPolishReq) -> PromptPolishResp:
|
||||
intent = _classify_prompt_intent(req.text)
|
||||
return PromptPolishResp(model="fallback", text=_sanitize_polished_prompt(req, intent, _basic_polished_prompt(req, intent)))
|
||||
|
||||
|
||||
def _repair_polished_prompt(req: PromptPolishReq, intent: PromptIntent, output: str, *, allow_llm: bool = False) -> str:
|
||||
out = _sanitize_polished_prompt(req, intent, output)
|
||||
issue = _polished_prompt_issue(intent, out)
|
||||
if not issue or not allow_llm or not LLM_API_KEY:
|
||||
return out
|
||||
repair_prompt = (
|
||||
"Repair the rewritten generation prompt so it follows the source input exactly.\n"
|
||||
f"Issue to fix: {issue}.\n"
|
||||
"Hard rules:\n"
|
||||
"- Do not introduce SKG or any brand unless it appears literally in the source input.\n"
|
||||
"- Do not introduce products, platforms, ad framing, sales language, slogans, hashtags, or claims unless present in the source input.\n"
|
||||
"- If the source requests a person, keep the person only as a fully fictional synthetic AI character.\n"
|
||||
"- If the source mentions a reference, uploaded, first-frame, last-frame, or current image that may already contain a person, preserve that visible subject conditionally as a fictional AI-generated synthetic character.\n"
|
||||
"- If neither the source nor a referenced image requests or shows a person, do not add a person or character.\n"
|
||||
"- Return one clean prompt only, no explanation.\n\n"
|
||||
f"Source input:\n{intent.cleaned_text[:1800]}\n\n"
|
||||
f"Current rewritten prompt:\n{out[:1800]}"
|
||||
)
|
||||
try:
|
||||
resp = llm().chat.completions.create(
|
||||
model=REWRITE_MODEL,
|
||||
messages=[
|
||||
{"role": "system", "content": "You repair generation prompts by removing contradictions and preserving only source intent."},
|
||||
{"role": "user", "content": repair_prompt},
|
||||
],
|
||||
temperature=0.15,
|
||||
max_tokens=700,
|
||||
)
|
||||
repaired = _sanitize_polished_prompt(req, intent, resp.choices[0].message.content or "")
|
||||
return repaired if not _polished_prompt_issue(intent, repaired) else out
|
||||
except Exception as e:
|
||||
print(f"[prompt polish repair fallback] {e}", flush=True)
|
||||
return out
|
||||
|
||||
|
||||
@app.post("/prompt/polish", response_model=PromptPolishResp)
|
||||
@@ -5573,6 +5800,7 @@ def polish_prompt(req: PromptPolishReq) -> PromptPolishResp:
|
||||
text = req.text.strip()
|
||||
if not text:
|
||||
raise HTTPException(400, "text required")
|
||||
intent = _classify_prompt_intent(text)
|
||||
if not LLM_API_KEY:
|
||||
return _prompt_polish_fallback(req)
|
||||
|
||||
@@ -5588,47 +5816,54 @@ def polish_prompt(req: PromptPolishReq) -> PromptPolishResp:
|
||||
"chat": "a professional response to the user's request",
|
||||
}.get(req.mode, "an image-generation prompt")
|
||||
user_system = req.system_prompt.strip()
|
||||
structure_hint = (
|
||||
"For image prompts, write one polished paragraph covering subject, action/state, setting, composition, camera framing, lighting, material/detail, color mood, and production quality. "
|
||||
if req.mode != "video"
|
||||
else "For video prompts, write one polished paragraph covering opening state, subject motion, camera movement, continuity, environmental interaction, lighting transition, and ending state. "
|
||||
)
|
||||
prompt = (
|
||||
f"Rewrite the user's input into {mode_hint} in {target_label}.\n"
|
||||
"Preserve the user's actual subject, brand, product, place, style, and intent.\n"
|
||||
"Do not add SKG, health-tech, massage products, TikTok ad framing, product sales language, hashtags, captions, or any brand/product not explicitly present in the input or user-selected guidance.\n"
|
||||
"Do not add medical, wellness, or advertising claims unless the user asked for them.\n"
|
||||
"Improve concrete visual details, composition, lighting, camera language, materials, mood, and quality.\n"
|
||||
"Return only the rewritten prompt. No markdown, labels, JSON, quotes, explanation, or alternatives.\n"
|
||||
"Preserve only the subject, brand, product, place, platform, style, action, and intent explicitly present in the source input.\n"
|
||||
"Do not introduce SKG or any other brand unless the source input literally includes it.\n"
|
||||
"Do not introduce products, platforms, ad framing, sales language, slogans, hashtags, captions, or marketing claims unless explicitly present in the source input.\n"
|
||||
"Improve visual specificity, composition, lighting, camera language, materials, motion, mood, and production quality without changing the subject.\n"
|
||||
"Do not add a no-person prohibition unless the source input explicitly asks for no people.\n"
|
||||
"If the source input mentions uploaded images, reference images, first frames, last frames, or current images, keep any existing visible person in those references as a fictional AI-generated synthetic character; do not invent people for references that have none.\n"
|
||||
f"{structure_hint}"
|
||||
"Return only the rewritten prompt. No markdown, labels, JSON, quotes, explanation, alternatives, or meta-instructions.\n"
|
||||
f"{_prompt_person_guard(req)}"
|
||||
)
|
||||
if req.mode == "chat":
|
||||
prompt = (
|
||||
f"Answer or rewrite the user's request professionally in {target_label}.\n"
|
||||
"Follow the user-selected guidance when provided.\n"
|
||||
"Do not add SKG, health-tech, massage products, TikTok ad framing, product sales language, hashtags, captions, or any brand/product not explicitly present in the input or user-selected guidance.\n"
|
||||
"Do not add SKG or any brand/product/platform not explicitly present in the source input or user-selected guidance.\n"
|
||||
"Do not add medical, wellness, or advertising claims unless the user asked for them.\n"
|
||||
"Return only the final content in the format requested by the guidance. No markdown fences, labels, explanation, or alternatives unless explicitly requested.\n"
|
||||
)
|
||||
if req.mode == "video":
|
||||
prompt += (
|
||||
"For video, describe motion, timing, camera movement, continuity, and what changes over time. "
|
||||
"Do not add people for scale, atmosphere, lifestyle context, or background decoration unless the input explicitly asked for people.\n"
|
||||
"Do not add people for scale, atmosphere, lifestyle context, or background decoration unless the input or reference image already contains or requests people.\n"
|
||||
)
|
||||
if user_system:
|
||||
prompt += f"\nUser-selected polishing guidance:\n{user_system[:1000]}\n"
|
||||
prompt += f"\nInput:\n{text[:2500]}"
|
||||
prompt += f"\nSource input:\n{intent.cleaned_text[:2500]}"
|
||||
|
||||
try:
|
||||
resp = llm().chat.completions.create(
|
||||
model=REWRITE_MODEL,
|
||||
messages=[
|
||||
{"role": "system", "content": "You are a neutral professional prompt editor. You preserve intent and never inject unrelated brands or products."},
|
||||
{"role": "system", "content": "You are a neutral professional prompt editor. Preserve source intent exactly and never inject SKG or unrelated brands, products, platforms, people, or marketing context."},
|
||||
{"role": "user", "content": prompt},
|
||||
],
|
||||
temperature=0.45,
|
||||
max_tokens=900,
|
||||
)
|
||||
out = (resp.choices[0].message.content or "").strip()
|
||||
out = re.sub(r"^```(?:text)?\s*", "", out, flags=re.I).strip()
|
||||
out = re.sub(r"\s*```$", "", out).strip()
|
||||
out = re.sub(r'^[\'"「『]+|[\'"」』]+$', "", out).strip()
|
||||
return PromptPolishResp(model=REWRITE_MODEL, text=(out or _prompt_polish_fallback(req).text)[:1800])
|
||||
out = _clean_prompt_output(resp.choices[0].message.content or "")
|
||||
if not out:
|
||||
out = _prompt_polish_fallback(req).text
|
||||
return PromptPolishResp(model=REWRITE_MODEL, text=_repair_polished_prompt(req, intent, out, allow_llm=True))
|
||||
except Exception as e:
|
||||
print(f"[prompt polish fallback] {e}", flush=True)
|
||||
return _prompt_polish_fallback(req)
|
||||
@@ -8197,6 +8432,24 @@ def _storyboard_video_prompt(scene: StoryboardScene, seed: int | None = None) ->
|
||||
return "\n".join([p for p in parts if p.strip()])
|
||||
|
||||
|
||||
_REFERENCE_IMAGE_SYNTHETIC_PERSON_GUARD = (
|
||||
"Reference images may be AI-generated visual assets. If any person, face, body, hand, avatar, or character "
|
||||
"appears in the provided reference image(s), treat the visible subject as a fully fictional AI-generated "
|
||||
"synthetic character, not a real person, celebrity, public figure, private individual, or copied likeness. "
|
||||
"Preserve the reference subject and composition without identifying, impersonating, or implying endorsement "
|
||||
"by any real person."
|
||||
)
|
||||
|
||||
|
||||
def _append_reference_image_person_guard(prompt: str, has_reference: bool) -> str:
|
||||
out = (prompt or "").strip()
|
||||
if not has_reference or not out:
|
||||
return out
|
||||
if "Reference images may be AI-generated visual assets" in out or "fully fictional AI-generated synthetic character" in out:
|
||||
return out
|
||||
return f"{out}\n\n{_REFERENCE_IMAGE_SYNTHETIC_PERSON_GUARD}"
|
||||
|
||||
|
||||
class ProductFusionDescriptionReq(BaseModel):
|
||||
shots: list[ProductFusionShot] = Field(default_factory=list)
|
||||
|
||||
@@ -8287,8 +8540,9 @@ def _video_public_error(raw: object) -> str:
|
||||
"人脸",
|
||||
)):
|
||||
return (
|
||||
"视频生成失败:参考图里有清晰人物或疑似真实人脸,视频模型出于肖像/隐私风控拒绝生成。"
|
||||
"请换成无可识别人脸的首帧,或先裁掉/模糊人物脸,再重新生成视频。"
|
||||
"视频生成失败:参考图被视频模型判定为疑似真实人脸或肖像隐私。"
|
||||
"系统会按 AI 生成的虚拟角色提交,但上游仍可能误判;请尝试换更低识别度的首帧,"
|
||||
"或裁掉/弱化脸部后重新生成视频。"
|
||||
)
|
||||
|
||||
if any(token in lower for token in (
|
||||
@@ -8687,6 +8941,7 @@ def _enqueue_storyboard_videos(job: Job, frame: KeyFrame, req: GenerateStoryboar
|
||||
source_ref = req.source_ref
|
||||
if source_ref and source_ref.kind == "source_video" and not source_ref.url:
|
||||
source_ref = None
|
||||
has_visual_reference = bool(ref_path.exists() or last_ref_path or reference_ref_paths)
|
||||
items: list[GeneratedVideo] = []
|
||||
ids: list[str] = []
|
||||
queued_tasks: list[tuple[str, tuple]] = []
|
||||
@@ -8695,6 +8950,7 @@ def _enqueue_storyboard_videos(job: Job, frame: KeyFrame, req: GenerateStoryboar
|
||||
ids.append(local_id)
|
||||
variant_seed = (req.seed + i) if req.seed is not None else random.randint(100000, 999999)
|
||||
variant_prompt = _ensure_english(f"{prompt}\n\nCreate variation {i + 1} of {count}. Variation seed: {variant_seed}. Keep the same compact row meaning but vary camera motion, gesture timing, and composition.")
|
||||
variant_prompt = _append_reference_image_person_guard(variant_prompt, has_visual_reference)
|
||||
items.append(GeneratedVideo(
|
||||
id=local_id,
|
||||
provider_id="",
|
||||
|
||||
@@ -582,10 +582,10 @@
|
||||
<p><strong>2026-05-25 上游能力恢复版:</strong>用户明确要求“API 没关系,其他恢复,别削弱”。因此根域名画布恢复 <code>chatfire-AI/huobao-canvas</code> 的成熟节点和工作流结构:推荐词、AI 润色、自动执行、工作流模板、首帧/尾帧/参考图节点、图片/视频/LLM 配置、多角度分镜、故事板、绘本和批量下载都保留;只继续替换品牌、路由和 API 接入。生成请求仍走 SKG 后端 <code>/api</code> 与登录 Cookie,员工不需要个人 API Key。</p>
|
||||
<p><strong>2026-05-25 媒体模型接入收口:</strong>图片和视频模型选择只暴露当前后端真实可用项:图片为 <code>auto</code>、<code>gpt-image-2</code>、<code>gemini-3-pro-image-preview</code>;视频当前只接通 <code>Seedance 2.0 Fast</code>(真实模型 <code>doubao-seedance-2-0-fast-260128</code>)。旧上游的 Nano Banana、Seedream、Kling、Veo 或浏览器本地自定义媒体模型不能进入生成下拉,避免同事选到实际不可用的模型。</p>
|
||||
<p><strong>2026-05-26 公司沉淀版:</strong>画布项目从浏览器本地存储升级为服务端 Postgres 持久化;<code>localStorage</code> 只作为离线缓存和首次导入来源。后端同时建立用户、任务、资源索引和审计表,保留原有 <code>state.json</code> 文件作为任务详情真源,避免一次迁移动到大文件资产结构。</p>
|
||||
<p><strong>2026-05-26 AI 润色中性化:</strong>画布 <code>AI 润色</code> 不再复用 SKG 广告文案接口 <code>/creative/copy</code>。后端新增 <code>POST /prompt/polish</code>,前端 <code>useChat</code>、根画布输入框、文本节点和自动执行意图分析改走中性提示词/通用生成接口:只优化用户已经给出的主体、风格、镜头和细节,不主动添加 SKG、按摩产品、TikTok 广告话术或用户没有提到的品牌。润色会按人物意图分流:原提示词没有人物时明确保持物体/场景/产品构图且不新增人物;原提示词明确有人物、人像、模特或角色时,才补充“虚构 AI 角色、非真人、非公众人物”的合规描述。</p>
|
||||
<p><strong>2026-05-26 AI 润色中性化:</strong>画布 <code>AI 润色</code> 不再复用 SKG 广告文案接口 <code>/creative/copy</code>。后端新增 <code>POST /prompt/polish</code>,前端 <code>useChat</code>、根画布输入框、文本节点和自动执行意图分析改走中性提示词/通用生成接口:只优化用户已经给出的主体、风格、镜头和细节,不主动添加 SKG、按摩产品、TikTok 广告话术或用户没有提到的品牌。当前润色链路会先清理上一次润色遗留的模板尾巴,再判断人物/无人/物体/场景/动物/未知主体;原文明确有人时才声明虚构 AI 角色,原文明确无人时才保留无人物约束,原文没写人时不主动造人但也不追加“必须无人物”的模板尾巴;当输入或参考图已经有人物时,按 AI 生成的虚拟角色继续描述,而不是把人物参考图判定为不可用。</p>
|
||||
<p><strong>2026-05-26 我的工作流云端版:</strong>工作流面板从只有公共模板扩展为“公共工作流 / 我的工作流”两类。当前画布可以保存成当前登录用户自己的云端工作流模板,后续在同一账号的其他电脑或浏览器打开后可插回画布;保存时只沉淀节点结构、连线、配置和提示词,主动清掉已生成图片、视频、任务进度、错误和运行态字段,避免把一次性生成结果误当模板复用。</p>
|
||||
</div>
|
||||
<p>当前默认业务管线是“个人隔离任务 → 根域名进入个人画布 → 画布项目同步到服务端 Postgres → 用提示词、推荐词、AI 润色、公共工作流或我的工作流创建节点 → 画布自动执行或手动连接图片/视频/文本节点 → 生成结果沉淀在当前个人画布 → 可把当前节点结构保存为我的工作流 → 需要时进入详情页继续编辑”。画布不再被削成三模式入口;首帧、尾帧、参考图、图生视频、多角度分镜、故事板和绘本等上游概念按节点能力保留。底层生成仍由 <code>web/canvas-app/src/hooks/useApi.js</code> 适配到本项目 <code>/creative/jobs/image</code>、<code>/jobs/{id}/frames/{idx}/generate</code>、<code>/jobs/{id}/frames/{idx}/storyboard/video</code>,AI 润色和通用 LLM 文本生成走 <code>/prompt/polish</code> 并保持中性专业,不再默认套入 SKG 广告语境。生成资产按当前登录用户写入个人 job。图片尺寸只显示 <code>auto</code>、<code>1024x1536</code>、<code>1024x1024</code>、<code>1536x1024</code>;视频画幅只显示 <code>720x1280</code>、<code>1280x720</code>、<code>1024x1024</code>、<code>960x1280</code>;视频时长只显示 <code>5/8/10/12/15</code> 秒。多人互不影响依赖后端 <code>owner_id</code>、画布项目 owner、我的工作流 owner 和飞书 / 备用登录会话隔离。旧 React 单对话框首页、信息流复刻链路仍保留在源码里作为回滚/高级能力,但不作为生产默认入口。</p>
|
||||
<p>当前默认业务管线是“个人隔离任务 → 根域名进入个人画布 → 画布项目同步到服务端 Postgres → 用提示词、推荐词、AI 润色、公共工作流或我的工作流创建节点 → 画布自动执行或手动连接图片/视频/文本节点 → 生成结果沉淀在当前个人画布 → 可把当前节点结构保存为我的工作流 → 需要时进入详情页继续编辑”。画布不再被削成三模式入口;首帧、尾帧、参考图、图生视频、多角度分镜、故事板和绘本等上游概念按节点能力保留。底层生成仍由 <code>web/canvas-app/src/hooks/useApi.js</code> 适配到本项目 <code>/creative/jobs/image</code>、<code>/jobs/{id}/frames/{idx}/generate</code>、<code>/jobs/{id}/frames/{idx}/storyboard/video</code>,AI 润色和通用 LLM 文本生成走 <code>/prompt/polish</code> 并保持中性专业:不主动套入 SKG,不主动补产品、平台、广告语境或人物,只扩写用户明确写出的主体、动作、场景、镜头、光线和质量细节;视频提交若带参考图,会在最终提示词中条件声明“参考图里若有人物,应按 AI 生成的虚拟角色处理”,避免把 AI 人像素材误当成真实肖像。生成资产按当前登录用户写入个人 job。图片尺寸只显示 <code>auto</code>、<code>1024x1536</code>、<code>1024x1024</code>、<code>1536x1024</code>;视频画幅只显示 <code>720x1280</code>、<code>1280x720</code>、<code>1024x1024</code>、<code>960x1280</code>;视频时长只显示 <code>5/8/10/12/15</code> 秒。多人互不影响依赖后端 <code>owner_id</code>、画布项目 owner、我的工作流 owner 和飞书 / 备用登录会话隔离。旧 React 单对话框首页、信息流复刻链路仍保留在源码里作为回滚/高级能力,但不作为生产默认入口。</p>
|
||||
<div class="pipeline">
|
||||
<div class="step"><div class="num">01</div><h3>个人任务</h3><p><code>GET /jobs</code> 按当前登录用户过滤;旧无 owner 任务只对备用账号可见。</p></div>
|
||||
<div class="step"><div class="num">02</div><h3>进入画布</h3><p>用户直接在根域名个人画布里操作;项目列表优先读取服务端 <code>/canvas-projects</code>,本地旧项目会首次导入。</p></div>
|
||||
@@ -615,7 +615,7 @@
|
||||
<tr><td><code>web/canvas-app/src/views/Canvas.vue</code></td><td>画布主交互:恢复上游底部 prompt composer、<code>AI 润色</code>、<code>自动执行</code>、推荐词、节点菜单、工作流面板、API/模型设置入口和批量下载入口。自动执行会调用 <code>useWorkflowOrchestrator</code> 分析提示词,创建文生图、图转视频、故事板、多角度分镜或绘本节点组;手动模式只创建文本节点,用户自行连接节点。工作流面板支持公共模板和我的工作流:公共模板走本地 <code>createNodes()</code>,我的工作流从云端 <code>workflow_data</code> 插回当前画布,并重新生成节点 ID、按视口中心重排、按映射重连边。底部推荐词来自共享短词池,4 个一组单行展示,刷新按钮在 30 组内轮换,不改变输入面板高度。</td></tr>
|
||||
<tr><td><code>web/canvas-app/src/config/suggestions.js</code></td><td>首页和画布共用的推荐词配置:维护 <code>QUICK_SUGGESTION_GROUPS</code>,当前为 30 组 / 120 个短词,每组 4 个,控制刷新按钮的轮换范围;词条保持短小,避免推荐栏换行或顶起 composer。</td></tr>
|
||||
<tr><td><code>web/canvas-app/src/config/models.js</code></td><td>画布媒体模型和规格的前端白名单:图片只内置 <code>auto</code>、<code>gpt-image-2</code>、<code>gemini-3-pro-image-preview</code>,尺寸只内置 <code>auto</code>、<code>1024x1536</code>、<code>1024x1024</code>、<code>1536x1024</code>;视频只内置 <code>seedance</code> / <code>Seedance 2.0 Fast</code>,画幅和时长对齐后端 <code>/health</code> 能力边界。<code>useModelConfig.js</code> 和 Pinia 模型 store 会忽略浏览器本地自定义图片/视频模型,防止旧缓存把不可用模型带回生成下拉。</td></tr>
|
||||
<tr><td><code>web/canvas-app/src/hooks/useApi.js</code></td><td>画布到本项目后端的适配层:不再读取浏览器 API Key,而是使用当前登录会话 Cookie 调用 <code>/api</code>。文生图 / 图生图先创建轻量 creative job,再调用 <code>/frames/0/generate</code>;文生视频 / 图生视频调用 <code>/storyboard/video</code> 并轮询 <code>/jobs/{id}</code>,完成后把图片或 mp4 URL 写回画布节点。<code>useChat</code> 已从 SKG 广告文案接口切到 <code>/prompt/polish</code>:AI 润色显式使用 image/video prompt 模式,LLM 节点使用通用 chat 模式,避免自动注入用户没有提到的 SKG 或营销语境;后端会判断原提示词是否有人物意图,无人物时禁止新增人物,有人物时才声明虚构 AI 角色。</td></tr>
|
||||
<tr><td><code>web/canvas-app/src/hooks/useApi.js</code></td><td>画布到本项目后端的适配层:不再读取浏览器 API Key,而是使用当前登录会话 Cookie 调用 <code>/api</code>。文生图 / 图生图先创建轻量 creative job,再调用 <code>/frames/0/generate</code>;文生视频 / 图生视频调用 <code>/storyboard/video</code> 并轮询 <code>/jobs/{id}</code>,完成后把图片或 mp4 URL 写回画布节点。<code>useChat</code> 已从 SKG 广告文案接口切到 <code>/prompt/polish</code>:AI 润色显式使用 image/video prompt 模式,LLM 节点使用通用 chat 模式,避免自动注入用户没有提到的 SKG、产品、平台或营销语境;后端会清理旧润色模板尾巴、判断人物/无人/物体/场景意图,并在输出后检查“有人却禁止人物、无人却新增人物、未写 SKG 却出现 SKG”等冲突。图生视频实际提交到后端后,后端会对参考图追加 AI 虚拟角色条件说明,不要求前端判断图片里是否有人脸。</td></tr>
|
||||
<tr><td><code>web/scripts/sync-canvas-root.mjs</code></td><td>构建桥接脚本:在 <code>next build</code> 静态导出完成后,把 Vite 画布产物 <code>web/canvas-app/dist</code> 覆盖到 <code>web/out</code> 根目录,使 <code>https://marketing.skg.com</code> 登录后直接进入画布;旧 <code>web/scripts/sync-canvas-dist.mjs</code> 保留但不再由生产构建调用。</td></tr>
|
||||
<tr><td><code>web/app/detail/page.tsx</code></td><td>任务详情页:静态导出路由 <code>/detail/?job=<id></code>,通过 query 读取 job id,调用 <code>getJob</code> 恢复同一任务。页面展示参考图、全部生成图、视频候选、营销图文方案和历史提示词,可继续调用 <code>generateImage</code>、<code>generateStoryboardVideo</code>、<code>generateCreativeCopy</code>,并支持删除图片/视频。该页继续依赖后端 owner 过滤,用户不能通过切换 URL 读取别人的任务。</td></tr>
|
||||
<tr><td><code>web/app/agent/page.tsx</code></td><td>新增一键出片终端页:只保留 TikTok 链接、产品图上传、实时 <code>Agent Terminal</code> 和最终成片播放器;通过 <code>POST /agent-runs</code> 创建受限后台状态机任务,通过 <code>GET /agent-runs/{id}</code> 轮询日志、进度、审片图和最终 mp4。该页不替代旧工作台深度编辑能力,只承接“用户只看成品”的快速出片主路径。</td></tr>
|
||||
@@ -646,7 +646,7 @@
|
||||
<h3>后端核心</h3>
|
||||
<table>
|
||||
<tbody>
|
||||
<tr><td><code>api/main.py</code></td><td>FastAPI 单文件后端:登录会话、状态模型、任务恢复、下载、抽帧、Vision、清洗、元素、分镜、原音频转写/翻译、声音与背景音分析、后续口播改写/TTS、文件返回;同时承载全局 <code>prompt_library</code> 和 <code>asset_library</code> 的磁盘索引、CRUD、删除保护和复制到 job API。启动时会初始化 Postgres schema、扫描现有 <code>state.json</code> / 资源库并写入索引;<code>/canvas-projects</code> 系列接口把画布项目按当前登录用户持久化,<code>/canvas-workflows</code> 系列接口把我的工作流按当前登录用户持久化为可复用模板。轻量创作入口 <code>POST /creative/jobs/image</code> 把上传图片或空白底图写成一个只有 0 号关键帧的 <code>Job</code>,让首页直接复用生图/生视频接口;该接口兼容无 body / JSON 空对象 / 正常 multipart 上传,避免无首帧文生图或文生视频时空 multipart 被 FastAPI 在业务前置解析阶段拒绝;<code>POST /prompt/polish</code> 用于中性 AI 润色和通用 LLM 文本生成,只保留用户明确给出的主体、品牌、产品、地点、风格和意图,不默认加入 SKG、按摩产品或短视频广告话术,并通过 <code>_prompt_has_person_intent</code> / <code>_prompt_person_guard</code> 区分“无人画面不新增人物”和“有人物画面声明虚构 AI 角色”;<code>/health</code> 返回 <code>database</code>、<code>image_options</code>、<code>image_size_options</code>、<code>video_options</code>、<code>video_size_options</code>、<code>video_duration_options</code> 和 <code>video_max_duration_seconds</code>;<code>/frames/{idx}/generate</code> 的 <code>model</code> 字段用于图片模型偏好,<code>size</code> 字段用于图片输出尺寸;<code>/storyboard/video</code> 继续使用 <code>model</code> 字段选择视频别名,并先校验画幅与时长能力边界,然后把 <code>GeneratedVideo</code> 写成 <code>queued</code> 占位并进入进程内视频队列。队列默认 <code>VIDEO_QUEUE_MAX_CONCURRENT=2</code>、<code>VIDEO_QUEUE_MAX_CONCURRENT_PER_USER=1</code>,同一用户连续提交不会占满全局并发;排队任务会回写 <code>queue_position</code>、<code>queue_size</code>、<code>queue_message</code>。旧 <code>AgentRun</code> 一键出片状态机、TK 复刻接口和 <code>POST /creative/copy</code> 作为明确的 SKG 营销文案接口继续保留。</td></tr>
|
||||
<tr><td><code>api/main.py</code></td><td>FastAPI 单文件后端:登录会话、状态模型、任务恢复、下载、抽帧、Vision、清洗、元素、分镜、原音频转写/翻译、声音与背景音分析、后续口播改写/TTS、文件返回;同时承载全局 <code>prompt_library</code> 和 <code>asset_library</code> 的磁盘索引、CRUD、删除保护和复制到 job API。启动时会初始化 Postgres schema、扫描现有 <code>state.json</code> / 资源库并写入索引;<code>/canvas-projects</code> 系列接口把画布项目按当前登录用户持久化,<code>/canvas-workflows</code> 系列接口把我的工作流按当前登录用户持久化为可复用模板。轻量创作入口 <code>POST /creative/jobs/image</code> 把上传图片或空白底图写成一个只有 0 号关键帧的 <code>Job</code>,让首页直接复用生图/生视频接口;该接口兼容无 body / JSON 空对象 / 正常 multipart 上传,避免无首帧文生图或文生视频时空 multipart 被 FastAPI 在业务前置解析阶段拒绝;<code>POST /prompt/polish</code> 用于中性 AI 润色和通用 LLM 文本生成,只保留用户明确给出的主体、品牌、产品、地点、风格和意图,不默认加入 SKG、按摩产品、平台或短视频广告话术。润色链路会先用 <code>_strip_previous_polish_boilerplate</code> 去掉旧模板尾巴,再用 <code>_classify_prompt_intent</code> 判断人物、无人、物体、场景、动物或未知主体,最后用 <code>_repair_polished_prompt</code> 修掉有人/无人矛盾、未写人却新增人物、未写 SKG 却出现 SKG 等冲突;<code>_append_reference_image_person_guard</code> 会在视频任务最终入队前给参考图请求追加条件提示,声明参考图里若有人物则按 AI 生成的虚拟角色处理;<code>/health</code> 返回 <code>database</code>、<code>image_options</code>、<code>image_size_options</code>、<code>video_options</code>、<code>video_size_options</code>、<code>video_duration_options</code> 和 <code>video_max_duration_seconds</code>;<code>/frames/{idx}/generate</code> 的 <code>model</code> 字段用于图片模型偏好,<code>size</code> 字段用于图片输出尺寸;<code>/storyboard/video</code> 继续使用 <code>model</code> 字段选择视频别名,并先校验画幅与时长能力边界,然后把 <code>GeneratedVideo</code> 写成 <code>queued</code> 占位并进入进程内视频队列。队列默认 <code>VIDEO_QUEUE_MAX_CONCURRENT=2</code>、<code>VIDEO_QUEUE_MAX_CONCURRENT_PER_USER=1</code>,同一用户连续提交不会占满全局并发;排队任务会回写 <code>queue_position</code>、<code>queue_size</code>、<code>queue_message</code>。旧 <code>AgentRun</code> 一键出片状态机、TK 复刻接口和 <code>POST /creative/copy</code> 作为明确的 SKG 营销文案接口继续保留。</td></tr>
|
||||
<tr><td><code>api/db.py</code></td><td>Postgres 适配层:在 <code>DATABASE_URL</code> 存在且 <code>psycopg</code> 可用时启用;负责建表、健康检查、用户 upsert、审计日志、画布项目 CRUD、我的工作流 CRUD,以及把 <code>Job</code>、<code>AgentRun</code>、提示词库和素材库写入索引表。数据库不可用时本地开发会降级为 disabled,生产 <code>verify-prod-docker.sh</code> 会要求 <code>database.connected=true</code>。</td></tr>
|
||||
<tr><td><code>video_model_options()</code></td><td>视频模型能力出口:如果 <code>seedance</code>、<code>kling</code>、<code>veo3</code>、<code>veo</code> 等业务别名实际都映射到同一个真实模型,会按真实模型去重,只给前端返回一个可用选项;当前生产真实模型为 <code>doubao-seedance-2-0-fast-260128</code>,前端显示为 <code>Seedance 2.0 Fast</code>。后续只有在服务器真的配置了不同可用视频模型时,才应把新的模型重新暴露给画布。</td></tr>
|
||||
<tr><td><code>api/product_library/skg-products</code></td><td>内置 SKG 白底产品图库:<code>manifest.json</code> 记录从桌面产品图筛出的 gallery 白底图和桌面 4 张产品角度图,<code>images/</code> 存 45 张参考图。</td></tr>
|
||||
@@ -1107,8 +1107,8 @@ ProductRefStateItem {
|
||||
<tr><td>创建任务</td><td><code>POST /jobs</code></td><td><code>createJob</code></td><td>提交 TK 链接,后台开始下载;后端会把当前登录用户写入 <code>Job.owner_*</code>,后续详情、素材文件、删除和生成接口都通过统一中间件校验归属。下载阶段默认不带 cookies;生产环境必须显式保持 <code>YTDLP_COOKIES_FILE=</code> 和 <code>YTDLP_COOKIES_FROM_BROWSER=</code> 为空,避免容器内误读被打进镜像的开发 <code>api/.env</code>。</td></tr>
|
||||
<tr><td>画布项目</td><td><code>GET /canvas-projects</code><br><code>POST /canvas-projects</code><br><code>PUT /canvas-projects/{id}</code><br><code>GET /canvas-projects/{id}</code><br><code>DELETE /canvas-projects/{id}</code><br><code>POST /canvas-projects/import</code></td><td><code>web/canvas-app/src/stores/projects.js</code></td><td>根域名画布项目的服务端持久化接口。列表和详情按当前登录用户过滤;写入时保存画布 JSON、缩略图、可见性、版本和更新时间;删除为软删除。首次上线后本地 <code>localStorage</code> 旧项目会通过 import 导入到当前用户,之后服务端 Postgres 是主存储。</td></tr>
|
||||
<tr><td>我的工作流</td><td><code>GET /canvas-workflows</code><br><code>POST /canvas-workflows</code><br><code>PUT /canvas-workflows/{id}</code><br><code>DELETE /canvas-workflows/{id}</code></td><td><code>web/canvas-app/src/stores/workflows.js</code><br><code>WorkflowPanel.vue</code></td><td>工作流面板“我的工作流”页的个人模板接口。列表、保存、更新和删除都按当前登录用户过滤;保存的是清理过运行态的 <code>workflow_data.nodes/edges/viewport</code>,用于跨设备复用画布结构。插回画布时前端会按当前视口中心重排节点、重建节点 ID,并用旧 ID 到新 ID 的映射重连边,避免和现有画布节点冲突。</td></tr>
|
||||
<tr><td>画布生成</td><td><code>POST /creative/jobs/image</code><br><code>POST /jobs/{id}/frames/upload</code><br><code>POST /jobs/{id}/frames/{idx}/generate</code><br><code>POST /jobs/{id}/frames/{idx}/storyboard/video</code><br><code>GET /jobs/{id}</code></td><td><code>web/canvas-app/src/hooks/useApi.js</code></td><td>画布项目结构保存在 <code>/canvas-projects</code>;一旦生成图片或视频,就通过同一套 creative job / frame / storyboard video 接口写入当前登录用户自己的 job 目录。文生图会创建空白 creative job 后生成图片;图生视频会把上传图转成 frame 并作为视频参考图提交,提交视频后用 <code>skg:{jobId}:{videoId}</code> 作为画布侧任务 id 轮询 <code>/jobs/{id}</code>,直到视频状态完成或失败。</td></tr>
|
||||
<tr><td>AI 润色 / LLM 节点</td><td><code>POST /prompt/polish</code></td><td><code>web/canvas-app/src/hooks/useApi.js</code><br><code>web/canvas-app/src/api/chat.js</code></td><td>中性的提示词润色和通用文本生成接口。根画布和文本节点传 <code>mode=image</code>、默认输出英文提示词;LLM 节点和自动执行意图分析传 <code>mode=chat</code>、保持输入语言。接口会遵守 system prompt,但明确禁止自动添加用户没有提到的 SKG、按摩产品、短视频广告 framing、营销标题或 hashtag。人物安全词按输入条件加入:原文无人物语义时追加“不新增人物/脸/身体/人群”;原文有人像、模特、角色、数字人等语义时才追加“虚构 AI 角色、非真人、非公众人物、不可识别私人个体”。</td></tr>
|
||||
<tr><td>画布生成</td><td><code>POST /creative/jobs/image</code><br><code>POST /jobs/{id}/frames/upload</code><br><code>POST /jobs/{id}/frames/{idx}/generate</code><br><code>POST /jobs/{id}/frames/{idx}/storyboard/video</code><br><code>GET /jobs/{id}</code></td><td><code>web/canvas-app/src/hooks/useApi.js</code></td><td>画布项目结构保存在 <code>/canvas-projects</code>;一旦生成图片或视频,就通过同一套 creative job / frame / storyboard video 接口写入当前登录用户自己的 job 目录。文生图会创建空白 creative job 后生成图片;图生视频会把上传图转成 frame 并作为视频参考图提交,提交视频后用 <code>skg:{jobId}:{videoId}</code> 作为画布侧任务 id 轮询 <code>/jobs/{id}</code>,直到视频状态完成或失败。视频任务最终 prompt 会条件说明参考图人物是 AI 生成的虚拟角色,避免员工上传的 AI 人像素材被当成真实肖像处理。</td></tr>
|
||||
<tr><td>AI 润色 / LLM 节点</td><td><code>POST /prompt/polish</code></td><td><code>web/canvas-app/src/hooks/useApi.js</code><br><code>web/canvas-app/src/api/chat.js</code></td><td>中性的提示词润色和通用文本生成接口。根画布和文本节点传 <code>mode=image</code>、默认输出英文提示词;LLM 节点和自动执行意图分析传 <code>mode=chat</code>、保持输入语言。接口会清掉上一次润色遗留的模板尾巴,只保留用户明确写出的主体、品牌、产品、地点、平台、风格和意图;用户没写 <code>SKG</code> 时绝不主动加入 SKG,也不主动补产品、平台、广告语境、slogan 或 hashtag。人物安全词按输入条件加入:原文明确有人像、模特、角色、数字人等语义时才声明“虚构 AI 角色、非真人、非公众人物”;原文明确无人时才保留无人物约束;原文没写人时不主动造人,也不主动追加无人物禁令;输入提到参考图、首帧或尾帧时,提示词只条件保留已有可见人物,不凭空新增人物。</td></tr>
|
||||
<tr><td>一键出片终端</td><td><code>POST /agent-runs</code><br><code>GET /agent-runs</code><br><code>GET /agent-runs/{id}</code><br><code>GET /agent-runs/{id}/final.mp4</code><br><code>GET /agent-runs/{id}/contact.jpg</code></td><td><code>web/app/agent/page.tsx</code></td><td>快速出片页的唯一主接口。前端提交 TikTok 链接和最多 6 张产品图;后端创建同 owner 的 <code>Job</code> 与 <code>AgentRun</code>,后台执行下载、产品图归一化、透明骨架主体参考复制、12 段镜头计划、视频生成、失败镜头自动重跑一次、审片接触表和 ffmpeg 最终合成。列表、详情、最终 mp4 和接触表同样按 owner 隔离。</td></tr>
|
||||
<tr><td>重试下载</td><td><code>POST /jobs/{id}/download/retry</code></td><td><code>retryJobDownload</code></td><td>用于 TK 链接下载失败且没有 <code>video_url</code> 的素材;清空错误、重新进入下载状态,并在后台再次执行 <code>pipeline_download</code>。上传视频不能重下载,需要重新上传文件。</td></tr>
|
||||
<tr><td>上传视频</td><td><code>POST /jobs/upload</code></td><td><code>uploadJob</code></td><td>保存 source.mp4,然后同样进入下载完成状态;当前上传后也加入第一步队列,下载完成后自动解析音频。</td></tr>
|
||||
@@ -1147,7 +1147,7 @@ ProductRefStateItem {
|
||||
<tr><td>分镜保存</td><td><code>PUT /frames/{idx}/storyboard</code></td><td><code>updateStoryboard</code></td><td>保存三字段中英镜像、选中视频 ID、4 图槽、时长、改造说明,以及高级抽屉里的镜头类型、人物描述、人物/产品开关、首帧规划、尾帧规划和产品出现方式。当前音频分镜行会额外写 <code>storyboard_row_idx</code>,避免多条分镜共用同一参考帧时互相覆盖。</td></tr>
|
||||
<tr><td>三字段自动展开</td><td><code>POST /jobs/{job_id}/frames/{idx}/storyboard/quick-plan</code></td><td><code>quickPlanStoryboard</code></td><td>输入 <code>skg_copy_*</code>、<code>scene_one_line_*</code>、<code>action_one_line_*</code> 和 <code>subject_brief</code>,用 <code>REWRITE_MODEL</code> 展开为完整 <code>StoryboardScene</code>,只作为视频 prompt 来源,不直接持久化。</td></tr>
|
||||
<tr><td>AI 改文案</td><td><code>POST /jobs/{job_id}/frames/{idx}/storyboard/refine</code></td><td><code>refineStoryboard</code></td><td>输入当前三字段和中文反馈,返回新的三字段中英镜像。前端必须先弹改前/改后预览,用户点应用后才写入行状态。</td></tr>
|
||||
<tr><td>单条视频候选生成</td><td><code>POST /jobs/{job_id}/frames/{idx}/storyboard/video</code></td><td><code>generateStoryboardVideo</code></td><td>新增 <code>count</code>、<code>seed</code> 和 <code>storyboard_row_idx</code>,默认一次创建 4 个 <code>GeneratedVideo</code> 任务并立即返回 job;每个候选独立排队、生成、失败或成功。前端提交 prompt 前用 quick-plan 展开,高级首尾帧存在时继续带上,不存在时后端用参考帧/主体图/产品图透明兜底。视频候选显示必须优先按 <code>storyboard_row_idx</code> 归属到音频分镜行,而不是只按 <code>frame_idx</code>。</td></tr>
|
||||
<tr><td>单条视频候选生成</td><td><code>POST /jobs/{job_id}/frames/{idx}/storyboard/video</code></td><td><code>generateStoryboardVideo</code></td><td>新增 <code>count</code>、<code>seed</code> 和 <code>storyboard_row_idx</code>,默认一次创建 4 个 <code>GeneratedVideo</code> 任务并立即返回 job;每个候选独立排队、生成、失败或成功。前端提交 prompt 前用 quick-plan 展开,高级首尾帧存在时继续带上,不存在时后端用参考帧/主体图/产品图透明兜底。最终提交给视频模型前,后端会为参考图追加 AI 虚拟角色条件提示:参考图若包含人物、脸、身体、手、头像或角色,就按虚构 AI 角色处理,不按真人或公众人物处理。视频候选显示必须优先按 <code>storyboard_row_idx</code> 归属到音频分镜行,而不是只按 <code>frame_idx</code>。</td></tr>
|
||||
<tr><td>整片一键生成候选</td><td><code>POST /jobs/{job_id}/storyboard/batch-generate-all</code></td><td>当前主路径改为逐行调用 <code>generateStoryboardVideo</code></td><td>用户选择“每行 N 条”后,前端按音频分镜逐行提交,确保每个候选都带 <code>storyboard_row_idx</code>。后端批量接口保留为兼容能力,默认 <code>concurrency=1</code>,但当前 UI 不再用它做主路径。</td></tr>
|
||||
<tr><td>生图</td><td><code>POST /frames/{idx}/generate</code></td><td><code>generateImage</code></td><td>基于关键帧或已选生成图做 image-to-image,目前可用。</td></tr>
|
||||
</tbody>
|
||||
@@ -1266,6 +1266,18 @@ ProductRefStateItem {
|
||||
<h2>变更记录</h2>
|
||||
<p>这个记录不是 git log 的替代品。它记录“产品理解发生了什么变化、影响了哪些源码、你以后描述需求时该怎么说”。后续每次改功能都要补一条。</p>
|
||||
<div class="changelog">
|
||||
<article class="change">
|
||||
<header>
|
||||
<h3>2026-05-26 · AI 润色改为意图分类和冲突校验</h3>
|
||||
<span class="tag amber">API</span>
|
||||
<span class="tag violet">Canvas</span>
|
||||
</header>
|
||||
<div class="body">
|
||||
<p><strong>问题:</strong>旧润色会把人物/无人物安全词作为模板尾巴直接拼到可见 prompt。用户二次润色时,这些尾巴会污染意图判断,出现 <code>A person...</code> 后面又接 <code>do not introduce people</code> 这类自相矛盾结果;同时系统背景也不能替用户主动加入 SKG。</p>
|
||||
<p><strong>改动:</strong><code>api/main.py</code> 的 <code>/prompt/polish</code> 改成“清理旧模板尾巴 → 分类人物/无人/物体/场景/动物/未知主体 → 按图片或视频结构改写 → 输出冲突校验/修复”。用户没写 <code>SKG</code> 时明确禁止主动加入 SKG;用户没写人时不把未知主体润成 person,也不追加“必须无人物”的兜底禁令;用户明确有人时才把人物描述成虚构 AI 角色;用户明确无人时才保留无人物约束。</p>
|
||||
<p><strong>影响:</strong>AI 润色继续保持中性专业,但不会再把公司背景、SKG、产品、平台或人物安全尾巴硬塞给所有提示词。员工要 SKG 或具体产品时,需要自己写进输入;写了就保留并润色。视频生成的参考图链路会额外声明图中人物是 AI 生成的虚拟角色,以便员工继续使用 AI 人像素材做图生视频。</p>
|
||||
</div>
|
||||
</article>
|
||||
<article class="change">
|
||||
<header>
|
||||
<h3>2026-05-26 · 我的工作流接入云端个人模板</h3>
|
||||
@@ -1335,7 +1347,7 @@ ProductRefStateItem {
|
||||
</header>
|
||||
<div class="body">
|
||||
<p><strong>问题:</strong>Seedance / Doubao 视频上游返回 <code>InputImageSensitiveContentDetected.PrivacyInformation</code>、HTTP 400、429、timeout 等机器错误时,画布错误框原样展示会让员工误以为账号、模型或网关坏了,需要人工解释。</p>
|
||||
<p><strong>改动:</strong><code>api/main.py</code> 新增视频错误归一化逻辑,提交失败、轮询失败和后台任务异常都会先转换成可读中文,再写入 <code>GeneratedVideo.error</code>。例如含疑似真实人脸的参考图会提示“参考图里有清晰人物或疑似真实人脸,视频模型出于肖像/隐私风控拒绝生成”,并给出换无脸首帧、裁掉或模糊人物脸的下一步。</p>
|
||||
<p><strong>改动:</strong><code>api/main.py</code> 新增视频错误归一化逻辑,提交失败、轮询失败和后台任务异常都会先转换成可读中文,再写入 <code>GeneratedVideo.error</code>。例如参考图被上游判成疑似真实人脸时,会提示系统已按 AI 虚拟角色提交但上游仍可能误判,并建议换低识别度首帧、裁掉或弱化脸部后重试。</p>
|
||||
<p><strong>影响:</strong>前端现有视频失败卡、画布轮询错误框和详情里的 <code>video.error</code> 会自动显示中文解释;原始上游错误只写入 API 日志,方便管理员排查,不再要求用户把英文错误码发给开发者翻译。</p>
|
||||
</div>
|
||||
</article>
|
||||
|
||||
Reference in New Issue
Block a user