diff --git a/api/main.py b/api/main.py index d6b9be1..9d90511 100644 --- a/api/main.py +++ b/api/main.py @@ -4480,6 +4480,8 @@ def normalize_product_view_data(data: dict, index: int) -> dict: confidence = max(0.0, min(1.0, float(data.get("confidence", 0.5)))) except Exception: confidence = 0.5 + if confidence <= 0 and not risk and landmarks: + confidence = 0.65 return { "view": view, "background": background, @@ -4510,15 +4512,27 @@ def parse_product_view_response(raw: str, index: int) -> dict: confidence_match = re.search(r'["\']?confidence["\']?\s*[::]\s*["\']?([0-9.]+)', text, flags=re.I) background_match = re.search(r'["\']?background["\']?\s*[::]\s*["\']?([a-z0-9_]+)', text, flags=re.I) tags_match = re.search(r'["\']?use_tags["\']?\s*[::]\s*\[([\s\S]*?)\]', text, flags=re.I) + landmarks_match = re.search(r'["\']?landmarks["\']?\s*[::]\s*\[([\s\S]*?)(?:\]|\}\s*$)', text, flags=re.I) risk_match = re.search( r'["\']?risk["\']?\s*[::]\s*["\']?([\s\S]*?)(?:["\']?\s*[,}]\s*$)', text, flags=re.I, ) + orientation = {} + for key in PRODUCT_ORIENTATION_KEYS: + orientation_match = re.search( + rf'["\']?{key}["\']?\s*[::]\s*["\']?([^"\',,}}\]]+)', + text, + flags=re.I, + ) + if orientation_match: + orientation[key] = orientation_match.group(1) data = { "view": view_match.group(1) if view_match else "", "background": background_match.group(1) if background_match else "unknown", "use_tags": re.findall(r"[a-z_]+", tags_match.group(1)) if tags_match else [], + "orientation": orientation, + "landmarks": re.findall(r"[\u4e00-\u9fffA-Za-z0-9/_-]+", landmarks_match.group(1)) if landmarks_match else [], "note": note_match.group(1) if note_match else "", "risk": risk_match.group(1) if risk_match else "", "confidence": confidence_match.group(1) if confidence_match else 0.45, @@ -4532,7 +4546,22 @@ def parse_product_view_batch_response(raw: str, indices: list[int]) -> dict[int, text = re.sub(r"\s*```$", "", text).strip() match = re.search(r"\{[\s\S]*\}", text) json_text = match.group(0) if match else text - data = json.loads(json_text) + try: + data = json.loads(json_text) + except Exception: + starts: list[tuple[int, int]] = [] + for index in indices: + found = re.search(rf'["\']?index["\']?\s*[::]\s*["\']?{index}["\']?', text) + if found: + starts.append((index, found.start())) + if not starts and len(indices) == 1: + return {indices[0]: parse_product_view_response(text, indices[0])} + starts.sort(key=lambda item: item[1]) + tolerant: dict[int, dict] = {} + for offset, (index, start_pos) in enumerate(starts): + end_pos = starts[offset + 1][1] if offset + 1 < len(starts) else len(text) + tolerant[index] = parse_product_view_response(text[start_pos:end_pos], index) + return tolerant raw_items = data.get("items") if isinstance(data, dict) else data if not isinstance(raw_items, list): raise ValueError("product view batch response missing items[]") @@ -4560,7 +4589,7 @@ def product_view_batch_prompt(indices: list[int]) -> str: "background enum:white, black, simple, complex, unknown。use_tags 只能从 enum 选:hero_packshot, wearing_scale, inner_contact, side_thickness, asymmetry, button_detail, back_bottom, material_texture。\n" "landmarks 用中文短词列出可见结构,例如:佩戴者左侧臂、佩戴者右侧臂、U形开口、贴颈内侧、按摩触点、侧边厚度、按键、充电口、底部、外壳材质、局部细节。note 必须用中文写给生视频模型,重点说明这张图适合约束什么,尤其要写清楚左/右/上/下、内/外侧、触点或局部细节。risk 只在可能误导生视频时写中文,如局部裁切、无法判断产品左右、上下颠倒风险、反光、遮挡、分辨率低、背景干扰;否则为空。\n" f"本次共有 {count} 张图片,图片前的 Image index 就是输出 index。必须输出同样数量的 items,且 index 不要改。只输出一行严格 JSON,不要 markdown,不要换行。\n" - "{\"items\":[{\"index\":0,\"view\":\"front|left_45|right_45|side_thickness|inner_contacts|back_bottom\",\"background\":\"white|black|simple|complex|unknown\",\"use_tags\":[\"hero_packshot\"],\"orientation\":{\"product_left\":\"图中哪一侧/不可见/不确定\",\"product_right\":\"图中哪一侧/不可见/不确定\",\"top\":\"图中哪一侧/不可见/不确定\",\"bottom\":\"图中哪一侧/不可见/不确定\",\"inner_side\":\"图中哪一侧/是否可见\",\"outer_side\":\"图中哪一侧/是否可见\",\"opening_direction\":\"U形开口朝图中哪一侧/不可见/不确定\"},\"landmarks\":[\"U形开口\"],\"note\":\"中文备注\",\"risk\":\"\",\"confidence\":0.0}]}" + "{\"items\":[{\"index\":0,\"view\":\"front|left_45|right_45|side_thickness|inner_contacts|back_bottom\",\"background\":\"white|black|simple|complex|unknown\",\"use_tags\":[\"hero_packshot\"],\"orientation\":{\"product_left\":\"图中哪一侧/不可见/不确定\",\"product_right\":\"图中哪一侧/不可见/不确定\",\"top\":\"图中哪一侧/不可见/不确定\",\"bottom\":\"图中哪一侧/不可见/不确定\",\"inner_side\":\"图中哪一侧/是否可见\",\"outer_side\":\"图中哪一侧/是否可见\",\"opening_direction\":\"U形开口朝图中哪一侧/不可见/不确定\"},\"landmarks\":[\"U形开口\"],\"note\":\"中文备注\",\"risk\":\"\",\"confidence\":0.86}]}" ) @@ -4575,7 +4604,7 @@ def analyze_product_view(ref_path: Path, index: int) -> dict: "background 从 enum 选:white, black, simple, complex, unknown。use_tags 只能从 enum 选:hero_packshot, wearing_scale, inner_contact, side_thickness, asymmetry, button_detail, back_bottom, material_texture。 " "landmarks 用中文短词列出可见结构,例如佩戴者左侧臂、佩戴者右侧臂、U形开口、贴颈内侧、按摩触点、侧边厚度、按键、充电口、底部、外壳材质、局部细节。note 用中文写给生视频模型,重点说明左/右/上/下、内/外侧、触点或局部细节。risk 只在可能误导生视频时写中文,否则为空。 " "Output one-line strict JSON only. Do not use markdown or line breaks. " - "{\"view\":\"front|left_45|right_45|side_thickness|inner_contacts|back_bottom\",\"background\":\"white|black|simple|complex|unknown\",\"use_tags\":[\"hero_packshot\"],\"orientation\":{\"product_left\":\"图中哪一侧/不可见/不确定\",\"product_right\":\"图中哪一侧/不可见/不确定\",\"top\":\"图中哪一侧/不可见/不确定\",\"bottom\":\"图中哪一侧/不可见/不确定\",\"inner_side\":\"图中哪一侧/是否可见\",\"outer_side\":\"图中哪一侧/是否可见\",\"opening_direction\":\"U形开口朝图中哪一侧/不可见/不确定\"},\"landmarks\":[\"U形开口\"],\"note\":\"中文备注\",\"risk\":\"\",\"confidence\":0.0}." + "{\"view\":\"front|left_45|right_45|side_thickness|inner_contacts|back_bottom\",\"background\":\"white|black|simple|complex|unknown\",\"use_tags\":[\"hero_packshot\"],\"orientation\":{\"product_left\":\"图中哪一侧/不可见/不确定\",\"product_right\":\"图中哪一侧/不可见/不确定\",\"top\":\"图中哪一侧/不可见/不确定\",\"bottom\":\"图中哪一侧/不可见/不确定\",\"inner_side\":\"图中哪一侧/是否可见\",\"outer_side\":\"图中哪一侧/是否可见\",\"opening_direction\":\"U形开口朝图中哪一侧/不可见/不确定\"},\"landmarks\":[\"U形开口\"],\"note\":\"中文备注\",\"risk\":\"\",\"confidence\":0.86}." ) try: resp = llm().chat.completions.create( @@ -4586,7 +4615,7 @@ def analyze_product_view(ref_path: Path, index: int) -> dict: ]}], response_format={"type": "json_object"}, temperature=0.1, - max_tokens=700, + max_tokens=1600, ) raw = (resp.choices[0].message.content or "").strip() if not raw: @@ -4616,7 +4645,7 @@ def analyze_product_views_batch(paths_by_index: list[tuple[int, Path]]) -> dict[ messages=[{"role": "user", "content": content}], response_format={"type": "json_object"}, temperature=0.05, - max_tokens=1600, + max_tokens=max(2400, min(7000, 1200 * len(chunk))), ) raw = (resp.choices[0].message.content or "").strip() if not raw: