fix: harden product view parsing
This commit is contained in:
39
api/main.py
39
api/main.py
@@ -4480,6 +4480,8 @@ def normalize_product_view_data(data: dict, index: int) -> dict:
|
||||
confidence = max(0.0, min(1.0, float(data.get("confidence", 0.5))))
|
||||
except Exception:
|
||||
confidence = 0.5
|
||||
if confidence <= 0 and not risk and landmarks:
|
||||
confidence = 0.65
|
||||
return {
|
||||
"view": view,
|
||||
"background": background,
|
||||
@@ -4510,15 +4512,27 @@ def parse_product_view_response(raw: str, index: int) -> dict:
|
||||
confidence_match = re.search(r'["\']?confidence["\']?\s*[::]\s*["\']?([0-9.]+)', text, flags=re.I)
|
||||
background_match = re.search(r'["\']?background["\']?\s*[::]\s*["\']?([a-z0-9_]+)', text, flags=re.I)
|
||||
tags_match = re.search(r'["\']?use_tags["\']?\s*[::]\s*\[([\s\S]*?)\]', text, flags=re.I)
|
||||
landmarks_match = re.search(r'["\']?landmarks["\']?\s*[::]\s*\[([\s\S]*?)(?:\]|\}\s*$)', text, flags=re.I)
|
||||
risk_match = re.search(
|
||||
r'["\']?risk["\']?\s*[::]\s*["\']?([\s\S]*?)(?:["\']?\s*[,}]\s*$)',
|
||||
text,
|
||||
flags=re.I,
|
||||
)
|
||||
orientation = {}
|
||||
for key in PRODUCT_ORIENTATION_KEYS:
|
||||
orientation_match = re.search(
|
||||
rf'["\']?{key}["\']?\s*[::]\s*["\']?([^"\',,}}\]]+)',
|
||||
text,
|
||||
flags=re.I,
|
||||
)
|
||||
if orientation_match:
|
||||
orientation[key] = orientation_match.group(1)
|
||||
data = {
|
||||
"view": view_match.group(1) if view_match else "",
|
||||
"background": background_match.group(1) if background_match else "unknown",
|
||||
"use_tags": re.findall(r"[a-z_]+", tags_match.group(1)) if tags_match else [],
|
||||
"orientation": orientation,
|
||||
"landmarks": re.findall(r"[\u4e00-\u9fffA-Za-z0-9/_-]+", landmarks_match.group(1)) if landmarks_match else [],
|
||||
"note": note_match.group(1) if note_match else "",
|
||||
"risk": risk_match.group(1) if risk_match else "",
|
||||
"confidence": confidence_match.group(1) if confidence_match else 0.45,
|
||||
@@ -4532,7 +4546,22 @@ def parse_product_view_batch_response(raw: str, indices: list[int]) -> dict[int,
|
||||
text = re.sub(r"\s*```$", "", text).strip()
|
||||
match = re.search(r"\{[\s\S]*\}", text)
|
||||
json_text = match.group(0) if match else text
|
||||
data = json.loads(json_text)
|
||||
try:
|
||||
data = json.loads(json_text)
|
||||
except Exception:
|
||||
starts: list[tuple[int, int]] = []
|
||||
for index in indices:
|
||||
found = re.search(rf'["\']?index["\']?\s*[::]\s*["\']?{index}["\']?', text)
|
||||
if found:
|
||||
starts.append((index, found.start()))
|
||||
if not starts and len(indices) == 1:
|
||||
return {indices[0]: parse_product_view_response(text, indices[0])}
|
||||
starts.sort(key=lambda item: item[1])
|
||||
tolerant: dict[int, dict] = {}
|
||||
for offset, (index, start_pos) in enumerate(starts):
|
||||
end_pos = starts[offset + 1][1] if offset + 1 < len(starts) else len(text)
|
||||
tolerant[index] = parse_product_view_response(text[start_pos:end_pos], index)
|
||||
return tolerant
|
||||
raw_items = data.get("items") if isinstance(data, dict) else data
|
||||
if not isinstance(raw_items, list):
|
||||
raise ValueError("product view batch response missing items[]")
|
||||
@@ -4560,7 +4589,7 @@ def product_view_batch_prompt(indices: list[int]) -> str:
|
||||
"background enum:white, black, simple, complex, unknown。use_tags 只能从 enum 选:hero_packshot, wearing_scale, inner_contact, side_thickness, asymmetry, button_detail, back_bottom, material_texture。\n"
|
||||
"landmarks 用中文短词列出可见结构,例如:佩戴者左侧臂、佩戴者右侧臂、U形开口、贴颈内侧、按摩触点、侧边厚度、按键、充电口、底部、外壳材质、局部细节。note 必须用中文写给生视频模型,重点说明这张图适合约束什么,尤其要写清楚左/右/上/下、内/外侧、触点或局部细节。risk 只在可能误导生视频时写中文,如局部裁切、无法判断产品左右、上下颠倒风险、反光、遮挡、分辨率低、背景干扰;否则为空。\n"
|
||||
f"本次共有 {count} 张图片,图片前的 Image index 就是输出 index。必须输出同样数量的 items,且 index 不要改。只输出一行严格 JSON,不要 markdown,不要换行。\n"
|
||||
"{\"items\":[{\"index\":0,\"view\":\"front|left_45|right_45|side_thickness|inner_contacts|back_bottom\",\"background\":\"white|black|simple|complex|unknown\",\"use_tags\":[\"hero_packshot\"],\"orientation\":{\"product_left\":\"图中哪一侧/不可见/不确定\",\"product_right\":\"图中哪一侧/不可见/不确定\",\"top\":\"图中哪一侧/不可见/不确定\",\"bottom\":\"图中哪一侧/不可见/不确定\",\"inner_side\":\"图中哪一侧/是否可见\",\"outer_side\":\"图中哪一侧/是否可见\",\"opening_direction\":\"U形开口朝图中哪一侧/不可见/不确定\"},\"landmarks\":[\"U形开口\"],\"note\":\"中文备注\",\"risk\":\"\",\"confidence\":0.0}]}"
|
||||
"{\"items\":[{\"index\":0,\"view\":\"front|left_45|right_45|side_thickness|inner_contacts|back_bottom\",\"background\":\"white|black|simple|complex|unknown\",\"use_tags\":[\"hero_packshot\"],\"orientation\":{\"product_left\":\"图中哪一侧/不可见/不确定\",\"product_right\":\"图中哪一侧/不可见/不确定\",\"top\":\"图中哪一侧/不可见/不确定\",\"bottom\":\"图中哪一侧/不可见/不确定\",\"inner_side\":\"图中哪一侧/是否可见\",\"outer_side\":\"图中哪一侧/是否可见\",\"opening_direction\":\"U形开口朝图中哪一侧/不可见/不确定\"},\"landmarks\":[\"U形开口\"],\"note\":\"中文备注\",\"risk\":\"\",\"confidence\":0.86}]}"
|
||||
)
|
||||
|
||||
|
||||
@@ -4575,7 +4604,7 @@ def analyze_product_view(ref_path: Path, index: int) -> dict:
|
||||
"background 从 enum 选:white, black, simple, complex, unknown。use_tags 只能从 enum 选:hero_packshot, wearing_scale, inner_contact, side_thickness, asymmetry, button_detail, back_bottom, material_texture。 "
|
||||
"landmarks 用中文短词列出可见结构,例如佩戴者左侧臂、佩戴者右侧臂、U形开口、贴颈内侧、按摩触点、侧边厚度、按键、充电口、底部、外壳材质、局部细节。note 用中文写给生视频模型,重点说明左/右/上/下、内/外侧、触点或局部细节。risk 只在可能误导生视频时写中文,否则为空。 "
|
||||
"Output one-line strict JSON only. Do not use markdown or line breaks. "
|
||||
"{\"view\":\"front|left_45|right_45|side_thickness|inner_contacts|back_bottom\",\"background\":\"white|black|simple|complex|unknown\",\"use_tags\":[\"hero_packshot\"],\"orientation\":{\"product_left\":\"图中哪一侧/不可见/不确定\",\"product_right\":\"图中哪一侧/不可见/不确定\",\"top\":\"图中哪一侧/不可见/不确定\",\"bottom\":\"图中哪一侧/不可见/不确定\",\"inner_side\":\"图中哪一侧/是否可见\",\"outer_side\":\"图中哪一侧/是否可见\",\"opening_direction\":\"U形开口朝图中哪一侧/不可见/不确定\"},\"landmarks\":[\"U形开口\"],\"note\":\"中文备注\",\"risk\":\"\",\"confidence\":0.0}."
|
||||
"{\"view\":\"front|left_45|right_45|side_thickness|inner_contacts|back_bottom\",\"background\":\"white|black|simple|complex|unknown\",\"use_tags\":[\"hero_packshot\"],\"orientation\":{\"product_left\":\"图中哪一侧/不可见/不确定\",\"product_right\":\"图中哪一侧/不可见/不确定\",\"top\":\"图中哪一侧/不可见/不确定\",\"bottom\":\"图中哪一侧/不可见/不确定\",\"inner_side\":\"图中哪一侧/是否可见\",\"outer_side\":\"图中哪一侧/是否可见\",\"opening_direction\":\"U形开口朝图中哪一侧/不可见/不确定\"},\"landmarks\":[\"U形开口\"],\"note\":\"中文备注\",\"risk\":\"\",\"confidence\":0.86}."
|
||||
)
|
||||
try:
|
||||
resp = llm().chat.completions.create(
|
||||
@@ -4586,7 +4615,7 @@ def analyze_product_view(ref_path: Path, index: int) -> dict:
|
||||
]}],
|
||||
response_format={"type": "json_object"},
|
||||
temperature=0.1,
|
||||
max_tokens=700,
|
||||
max_tokens=1600,
|
||||
)
|
||||
raw = (resp.choices[0].message.content or "").strip()
|
||||
if not raw:
|
||||
@@ -4616,7 +4645,7 @@ def analyze_product_views_batch(paths_by_index: list[tuple[int, Path]]) -> dict[
|
||||
messages=[{"role": "user", "content": content}],
|
||||
response_format={"type": "json_object"},
|
||||
temperature=0.05,
|
||||
max_tokens=1600,
|
||||
max_tokens=max(2400, min(7000, 1200 * len(chunk))),
|
||||
)
|
||||
raw = (resp.choices[0].message.content or "").strip()
|
||||
if not raw:
|
||||
|
||||
Reference in New Issue
Block a user