fix: normalize media prompts and patent views
This commit is contained in:
53
api/main.py
53
api/main.py
@@ -2664,6 +2664,15 @@ SUBJECT_VIEW_LABELS: dict[str, str] = {
|
||||
"back_detail": "背部特写",
|
||||
}
|
||||
|
||||
OBJECT_PATENT_VIEW_LABELS: dict[str, str] = {
|
||||
"front": "正投影主视图",
|
||||
"back": "正投影后视图",
|
||||
"left": "正投影左视图",
|
||||
"right": "正投影右视图",
|
||||
"top": "正投影俯视图",
|
||||
"bottom": "正投影仰视图",
|
||||
}
|
||||
|
||||
|
||||
def _subject_view_labels(kind: SubjectKind, requested: list[str] | None = None) -> list[tuple[SubjectView, str]]:
|
||||
if requested:
|
||||
@@ -2672,7 +2681,8 @@ def _subject_view_labels(kind: SubjectKind, requested: list[str] | None = None)
|
||||
key = "".join(ch for ch in str(raw).strip().lower() if ch.isalnum() or ch == "_")
|
||||
if key and key not in normalized:
|
||||
normalized.append(key)
|
||||
return [(key, SUBJECT_VIEW_LABELS.get(key, key.replace("_", " "))) for key in normalized[:10]]
|
||||
labels = OBJECT_PATENT_VIEW_LABELS if kind == "object" else SUBJECT_VIEW_LABELS
|
||||
return [(key, labels.get(key, SUBJECT_VIEW_LABELS.get(key, key.replace("_", " ")))) for key in normalized[:10]]
|
||||
if kind == "living":
|
||||
return [
|
||||
("front", "正面站立"),
|
||||
@@ -2687,16 +2697,36 @@ def _subject_view_labels(kind: SubjectKind, requested: list[str] | None = None)
|
||||
("back_neck_detail", "后颈/肩背特写"),
|
||||
]
|
||||
return [
|
||||
("front", "正面"),
|
||||
("back", "背面"),
|
||||
("left", "左侧"),
|
||||
("right", "右侧"),
|
||||
("top", "正投影俯视图"),
|
||||
("bottom", "正投影仰视图"),
|
||||
("front", OBJECT_PATENT_VIEW_LABELS["front"]),
|
||||
("back", OBJECT_PATENT_VIEW_LABELS["back"]),
|
||||
("left", OBJECT_PATENT_VIEW_LABELS["left"]),
|
||||
("right", OBJECT_PATENT_VIEW_LABELS["right"]),
|
||||
("top", OBJECT_PATENT_VIEW_LABELS["top"]),
|
||||
("bottom", OBJECT_PATENT_VIEW_LABELS["bottom"]),
|
||||
]
|
||||
|
||||
|
||||
def _subject_view_projection_clause(view: str) -> str:
|
||||
if view == "front":
|
||||
return (
|
||||
"Patent-style orthographic main/front elevation view: look straight at the designated main face, "
|
||||
"with the viewing direction perpendicular to that face. No perspective, no tilt, no 3/4 angle, no isometric view. "
|
||||
)
|
||||
if view == "back":
|
||||
return (
|
||||
"Patent-style orthographic rear elevation view: look straight at the rear face, "
|
||||
"with the viewing direction perpendicular to that face. No perspective, no tilt, no 3/4 angle, no isometric view. "
|
||||
)
|
||||
if view == "left":
|
||||
return (
|
||||
"Patent-style orthographic left side elevation view: look straight at the product's left side, "
|
||||
"with the viewing direction perpendicular to that side face. No perspective, no tilt, no 3/4 angle, no isometric view. "
|
||||
)
|
||||
if view == "right":
|
||||
return (
|
||||
"Patent-style orthographic right side elevation view: look straight at the product's right side, "
|
||||
"with the viewing direction perpendicular to that side face. No perspective, no tilt, no 3/4 angle, no isometric view. "
|
||||
)
|
||||
if view == "top":
|
||||
return (
|
||||
"Patent-style orthographic top view: look straight down from directly above the product, "
|
||||
@@ -5597,13 +5627,14 @@ def generate_image(job_id: str, idx: int, req: GenerateReq) -> Job:
|
||||
reference_path = sel_path
|
||||
reference_source = f"gen:{sel.id[:6]}"
|
||||
|
||||
full_prompt = req.prompt.strip()
|
||||
raw_prompt = req.prompt.strip()
|
||||
if req.extra_prompt.strip():
|
||||
full_prompt = f"{full_prompt}. Include: {req.extra_prompt.strip()}"
|
||||
raw_prompt = f"{raw_prompt}. Include: {req.extra_prompt.strip()}"
|
||||
if req.negative_prompt.strip():
|
||||
full_prompt = f"{full_prompt}. Avoid: {req.negative_prompt.strip()}"
|
||||
if not full_prompt:
|
||||
raw_prompt = f"{raw_prompt}. Avoid: {req.negative_prompt.strip()}"
|
||||
if not raw_prompt:
|
||||
raise HTTPException(400, "prompt required")
|
||||
full_prompt = _ensure_english(raw_prompt)
|
||||
if not IMAGE_API_KEY:
|
||||
raise HTTPException(503, "IMAGE_API_KEY 或 LLM_API_KEY 未配置")
|
||||
|
||||
|
||||
@@ -1181,6 +1181,19 @@ ProductRefStateItem {
|
||||
<h2>变更记录</h2>
|
||||
<p>这个记录不是 git log 的替代品。它记录“产品理解发生了什么变化、影响了哪些源码、你以后描述需求时该怎么说”。后续每次改功能都要补一条。</p>
|
||||
<div class="changelog">
|
||||
<article class="change">
|
||||
<header>
|
||||
<h3>2026-05-25 · 生图生视频统一英文提示词和完整正投影视图</h3>
|
||||
<span class="tag amber">API</span>
|
||||
<span class="tag violet">UI</span>
|
||||
<span class="tag blue">Docs</span>
|
||||
</header>
|
||||
<div class="body">
|
||||
<p><strong>问题:</strong>生视频链路已经会把中文要求转成英文再提交外部视频 API,但首页直接生图仍可能把中文混合 prompt 直接传给图片 API;物体六视图也只对俯视/仰视加了正投影约束,前后左右仍有“正面/背面/左侧/右侧”的口语化表述。</p>
|
||||
<p><strong>改动:</strong><code>/jobs/{job_id}/frames/{idx}/generate</code> 在调用图片 API 前统一对完整 prompt 执行 <code>_ensure_english()</code>,和生视频 <code>_enqueue_storyboard_videos</code> 的英文提交规则保持一致。物体主体套图的默认六面视图改为 <code>正投影主视图 / 正投影后视图 / 正投影左视图 / 正投影右视图 / 正投影俯视图 / 正投影仰视图</code>,并对前后左右也加入 <code>orthographic elevation</code>、<code>no perspective</code>、<code>no tilt</code>、<code>no 3/4 angle</code>、<code>no isometric view</code> 约束。</p>
|
||||
<p><strong>影响:</strong>用户仍可以用中文描述生图或生视频需求;后端实际提交给图片/视频模型的提示词会优先归一为英文。以后“专利六视图”应明确理解为六面正投影视图,不等同于摄影角度图;真人/短视频主体参考包仍保留口播、45 度、肩颈近景等非专利视角。</p>
|
||||
</div>
|
||||
</article>
|
||||
<article class="change">
|
||||
<header>
|
||||
<h3>2026-05-25 · 主体套图俯视和仰视改为正投影口径</h3>
|
||||
|
||||
@@ -32,10 +32,10 @@ interface Props {
|
||||
}
|
||||
|
||||
const OBJECT_VIEW_OPTIONS = [
|
||||
["front", "正面"],
|
||||
["back", "背面"],
|
||||
["left", "左侧"],
|
||||
["right", "右侧"],
|
||||
["front", "正投影主视图"],
|
||||
["back", "正投影后视图"],
|
||||
["left", "正投影左视图"],
|
||||
["right", "正投影右视图"],
|
||||
["top", "正投影俯视图"],
|
||||
["bottom", "正投影仰视图"],
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user