fix: tolerate product view model output
This commit is contained in:
50
api/main.py
50
api/main.py
@@ -4284,16 +4284,50 @@ def fallback_product_view(index: int) -> dict:
|
||||
}
|
||||
|
||||
|
||||
def parse_product_view_response(raw: str, index: int) -> dict:
|
||||
text = (raw or "").strip()
|
||||
text = re.sub(r"^```(?:json)?\s*", "", text, flags=re.I).strip()
|
||||
text = re.sub(r"\s*```$", "", text).strip()
|
||||
match = re.search(r"\{[\s\S]*\}", text)
|
||||
json_text = match.group(0) if match else text
|
||||
try:
|
||||
data = json.loads(json_text)
|
||||
except Exception:
|
||||
view_match = re.search(r'["\']?view["\']?\s*[::]\s*["\']?([a-z0-9_]+)', text, flags=re.I)
|
||||
note_match = re.search(
|
||||
r'["\']?note["\']?\s*[::]\s*["\']?([\s\S]*?)(?:["\']?\s*,\s*["\']?confidence|["\']?\s*[,}]\s*$)',
|
||||
text,
|
||||
flags=re.I,
|
||||
)
|
||||
confidence_match = re.search(r'["\']?confidence["\']?\s*[::]\s*["\']?([0-9.]+)', text, flags=re.I)
|
||||
data = {
|
||||
"view": view_match.group(1) if view_match else "",
|
||||
"note": note_match.group(1) if note_match else "",
|
||||
"confidence": confidence_match.group(1) if confidence_match else 0.45,
|
||||
}
|
||||
view = str(data.get("view") or "").strip().strip('"\' ,。')
|
||||
if view not in PRODUCT_VIEW_VALUES:
|
||||
return fallback_product_view(index)
|
||||
note = str(data.get("note") or "").strip().strip('"\' ,,。')
|
||||
note = re.sub(r"\s+", " ", note)[:220] or f"{PRODUCT_VIEW_LABELS.get(view, view)}参考"
|
||||
try:
|
||||
confidence = max(0.0, min(1.0, float(data.get("confidence", 0.5))))
|
||||
except Exception:
|
||||
confidence = 0.5
|
||||
return {"view": view, "note": note, "confidence": confidence}
|
||||
|
||||
|
||||
def analyze_product_view(ref_path: Path, index: int) -> dict:
|
||||
if not LLM_API_KEY:
|
||||
return fallback_product_view(index)
|
||||
img_b64 = base64.b64encode(ref_path.read_bytes()).decode("ascii")
|
||||
prompt = (
|
||||
"You are inspecting a clean white-background product reference image for a SKG neck-and-shoulder wearable massage device. "
|
||||
"You are inspecting a product reference image for a SKG neck-and-shoulder wearable massage device. The background may be white, black, or simple studio color. "
|
||||
"Classify the camera/view angle into exactly one enum: front, left_45, right_45, side_thickness, inner_contacts, back_bottom. "
|
||||
"Also write a concise Chinese note for video generation, focused on visible structure, asymmetry, thickness, inner massage contacts, buttons, opening width, and shoulder-neck wearing scale. "
|
||||
"If uncertain, choose the closest useful view; do not ask the user. "
|
||||
"Output strict JSON only: {\"view\":\"front|left_45|right_45|side_thickness|inner_contacts|back_bottom\", \"note\":\"中文备注\", \"confidence\":0.0}."
|
||||
"Output one-line strict JSON only. Do not use markdown or line breaks. "
|
||||
"{\"view\":\"front|left_45|right_45|side_thickness|inner_contacts|back_bottom\", \"note\":\"中文备注\", \"confidence\":0.0}."
|
||||
)
|
||||
try:
|
||||
resp = llm().chat.completions.create(
|
||||
@@ -4309,17 +4343,7 @@ def analyze_product_view(ref_path: Path, index: int) -> dict:
|
||||
raw = (resp.choices[0].message.content or "").strip()
|
||||
if not raw:
|
||||
raw = (getattr(resp.choices[0].message, "reasoning_content", "") or "").strip()
|
||||
match = re.search(r"\{[\s\S]*\}", raw)
|
||||
data = json.loads(match.group(0) if match else raw)
|
||||
view = str(data.get("view") or "").strip()
|
||||
if view not in PRODUCT_VIEW_VALUES:
|
||||
return fallback_product_view(index)
|
||||
note = str(data.get("note") or "").strip() or f"{PRODUCT_VIEW_LABELS.get(view, view)}参考"
|
||||
try:
|
||||
confidence = max(0.0, min(1.0, float(data.get("confidence", 0.5))))
|
||||
except Exception:
|
||||
confidence = 0.5
|
||||
return {"view": view, "note": note, "confidence": confidence}
|
||||
return parse_product_view_response(raw, index)
|
||||
except Exception as e:
|
||||
fallback = fallback_product_view(index)
|
||||
fallback["note"] = f"{fallback['note']} 识别失败:{str(e)[:80]}"
|
||||
|
||||
@@ -950,6 +950,18 @@ SubjectAsset {
|
||||
<h2>变更记录</h2>
|
||||
<p>这个记录不是 git log 的替代品。它记录“产品理解发生了什么变化、影响了哪些源码、你以后描述需求时该怎么说”。后续每次改功能都要补一条。</p>
|
||||
<div class="changelog">
|
||||
<article class="change">
|
||||
<header>
|
||||
<h3>2026-05-17 · 产品视角识别容错解析</h3>
|
||||
<span class="tag blue">API</span>
|
||||
<span class="tag cyan">Workflow</span>
|
||||
</header>
|
||||
<div class="body">
|
||||
<p><strong>问题:</strong>视觉模型有时能判断视角,但返回的 JSON 含换行、引号或尾部格式问题,后端直接 <code>json.loads</code> 会失败,导致整张图被标成“识别失败”。</p>
|
||||
<p><strong>改动:</strong><code>api/main.py</code> 新增 <code>parse_product_view_response</code>:先按严格 JSON 解析,失败后从原始输出里容错提取 <code>view</code>、<code>note</code> 和 <code>confidence</code>。同时收紧产品视角识别 prompt,要求模型输出单行 JSON。</p>
|
||||
<p><strong>影响:</strong><code>POST /jobs/{id}/assets/product-views/analyze</code> 在模型输出不完全规范时也能保留视角结果,减少无意义 fallback;真正无法识别时才按上传顺序兜底。</p>
|
||||
</div>
|
||||
</article>
|
||||
<article class="change">
|
||||
<header>
|
||||
<h3>2026-05-17 · 产品素材池取消数量上限,单条生成自动选图</h3>
|
||||
|
||||
Reference in New Issue
Block a user