auto-save 2026-05-17 20:15 (~4)

2026-05-17 20:15:13 +08:00
parent d32e87a376
commit 72aef99592
4 changed files with 272 additions and 44 deletions
--- a/api/main.py
+++ b/api/main.py
@@ -4327,12 +4327,14 @@ async def upload_storyboard_asset(job_id: str, file: UploadFile = File(...)) ->


 PRODUCT_VIEW_VALUES = ["front", "left_45", "right_45", "side_thickness", "inner_contacts", "back_bottom"]
+PRODUCT_VIEW_BATCH_SIZE = max(1, min(12, int(os.getenv("PRODUCT_VIEW_BATCH_SIZE", "8"))))
+
 PRODUCT_VIEW_LABELS = {
-    "front": "正面",
-    "left_45": "左 45",
-    "right_45": "右 45",
+    "front": "正面/外侧主外观",
+    "left_45": "佩戴者左 45",
+    "right_45": "佩戴者右 45",
    "side_thickness": "侧面厚度",
-    "inner_contacts": "内侧触点",
+    "inner_contacts": "贴颈内侧/触点",
    "back_bottom": "背面/底部",
 }

@@ -4382,12 +4384,114 @@ def fallback_product_view(index: int) -> dict:
        "view": view,
        "background": "unknown",
        "use_tags": default_product_use_tags(view),
-        "note": f"{PRODUCT_VIEW_LABELS.get(view, view)}参考；模型识别不可用时按上传顺序自动标注，请人工只检查备注。",
+        "orientation": default_product_orientation(view),
+        "landmarks": default_product_landmarks(view),
+        "note": f"{PRODUCT_VIEW_LABELS.get(view, view)}参考；模型识别不可用时按上传顺序自动标注，请重点复核佩戴者左/右、上/下和贴颈内侧。",
        "risk": "模型识别不可用，按上传顺序兜底",
        "confidence": 0.25,
    }


+PRODUCT_ORIENTATION_KEYS = [
+    "product_left",
+    "product_right",
+    "top",
+    "bottom",
+    "inner_side",
+    "outer_side",
+    "opening_direction",
+]
+
+
+def default_product_orientation(view: str) -> dict:
+    base = {
+        "product_left": "佩戴者左侧；需人工复核图中位置",
+        "product_right": "佩戴者右侧；需人工复核图中位置",
+        "top": "靠近下巴/脸/颈部上沿",
+        "bottom": "靠近锁骨/肩部下沿",
+        "inner_side": "贴近脖子皮肤的一侧，通常可见按摩触点",
+        "outer_side": "外壳展示面，通常可见按键/Logo/材质",
+        "opening_direction": "U 形开口方向需结合图片复核",
+    }
+    if view == "inner_contacts":
+        base["inner_side"] = "本图重点：贴颈内侧/按摩触点"
+    elif view == "side_thickness":
+        base["outer_side"] = "本图重点：侧厚、边缘和机身厚度"
+    elif view in {"left_45", "right_45"}:
+        base["opening_direction"] = "注意不要把图片左右直接当成产品佩戴者左右"
+    return base
+
+
+def default_product_landmarks(view: str) -> list[str]:
+    defaults = {
+        "front": ["U形开口", "外壳主轮廓", "左右臂"],
+        "left_45": ["佩戴者左侧臂", "侧边弧度", "按键/结构差异"],
+        "right_45": ["佩戴者右侧臂", "侧边弧度", "按键/结构差异"],
+        "side_thickness": ["机身厚度", "侧边轮廓", "佩戴比例"],
+        "inner_contacts": ["贴颈内侧", "按摩触点", "皮肤接触面"],
+        "back_bottom": ["背面/底部", "接口/底面", "材质细节"],
+    }
+    return defaults.get(view, ["U形挂脖轮廓"])
+
+
+def normalize_product_orientation(value: object, view: str) -> dict:
+    base = default_product_orientation(view)
+    if isinstance(value, dict):
+        for key in PRODUCT_ORIENTATION_KEYS:
+            raw = value.get(key)
+            if raw is None:
+                continue
+            text = re.sub(r"\s+", " ", str(raw)).strip().strip('"\' ,，。')
+            if text:
+                base[key] = text[:80]
+    return base
+
+
+def normalize_product_landmarks(value: object, view: str) -> list[str]:
+    if isinstance(value, str):
+        raw_items = re.split(r"[,，/、\n]+", value)
+    elif isinstance(value, list):
+        raw_items = [str(item) for item in value]
+    else:
+        raw_items = []
+    result = []
+    for item in raw_items + default_product_landmarks(view):
+        text = re.sub(r"\s+", " ", str(item)).strip().strip('"\' ,，。')
+        if text and text not in result:
+            result.append(text[:24])
+    return result[:8]
+
+
+def normalize_product_view_data(data: dict, index: int) -> dict:
+    view = str(data.get("view") or "").strip().strip('"\' ,。')
+    if view not in PRODUCT_VIEW_VALUES:
+        return fallback_product_view(index)
+    background = str(data.get("background") or "unknown").strip().strip('"\' ,。')
+    if background not in PRODUCT_BACKGROUND_VALUES:
+        background = "unknown"
+    use_tags = normalize_product_use_tags(data.get("use_tags"), view)
+    orientation = normalize_product_orientation(data.get("orientation"), view)
+    landmarks = normalize_product_landmarks(data.get("landmarks"), view)
+    note = str(data.get("note") or "").strip().strip('"\' ,，。')
+    note = re.sub(r"\s+", " ", note)[:320] or f"{PRODUCT_VIEW_LABELS.get(view, view)}参考"
+    risk = str(data.get("risk") or "").strip().strip('"\' ,，。')
+    risk = re.sub(r"\s+", " ", risk)[:160]
+    try:
+        confidence = max(0.0, min(1.0, float(data.get("confidence", 0.5))))
+    except Exception:
+        confidence = 0.5
+    return {
+        "view": view,
+        "background": background,
+        "use_tags": use_tags,
+        "orientation": orientation,
+        "landmarks": landmarks,
+        "note": note,
+        "risk": risk,
+        "confidence": confidence,
+    }
+
+
 def parse_product_view_response(raw: str, index: int) -> dict:
    text = (raw or "").strip()
    text = re.sub(r"^```(?:json)?\s*", "", text, flags=re.I).strip()
@@ -4419,22 +4523,45 @@ def parse_product_view_response(raw: str, index: int) -> dict:
            "risk": risk_match.group(1) if risk_match else "",
            "confidence": confidence_match.group(1) if confidence_match else 0.45,
        }
-    view = str(data.get("view") or "").strip().strip('"\' ,。')
-    if view not in PRODUCT_VIEW_VALUES:
-        return fallback_product_view(index)
-    background = str(data.get("background") or "unknown").strip().strip('"\' ,。')
-    if background not in PRODUCT_BACKGROUND_VALUES:
-        background = "unknown"
-    use_tags = normalize_product_use_tags(data.get("use_tags"), view)
-    note = str(data.get("note") or "").strip().strip('"\' ,，。')
-    note = re.sub(r"\s+", " ", note)[:220] or f"{PRODUCT_VIEW_LABELS.get(view, view)}参考"
-    risk = str(data.get("risk") or "").strip().strip('"\' ,，。')
-    risk = re.sub(r"\s+", " ", risk)[:120]
-    try:
-        confidence = max(0.0, min(1.0, float(data.get("confidence", 0.5))))
-    except Exception:
-        confidence = 0.5
-    return {"view": view, "background": background, "use_tags": use_tags, "note": note, "risk": risk, "confidence": confidence}
+    return normalize_product_view_data(data, index)
+
+
+def parse_product_view_batch_response(raw: str, indices: list[int]) -> dict[int, dict]:
+    text = (raw or "").strip()
+    text = re.sub(r"^```(?:json)?\s*", "", text, flags=re.I).strip()
+    text = re.sub(r"\s*```$", "", text).strip()
+    match = re.search(r"\{[\s\S]*\}", text)
+    json_text = match.group(0) if match else text
+    data = json.loads(json_text)
+    raw_items = data.get("items") if isinstance(data, dict) else data
+    if not isinstance(raw_items, list):
+        raise ValueError("product view batch response missing items[]")
+    allowed = set(indices)
+    results: dict[int, dict] = {}
+    for offset, item in enumerate(raw_items):
+        if not isinstance(item, dict):
+            continue
+        try:
+            item_index = int(item.get("index", indices[offset] if offset < len(indices) else -1))
+        except Exception:
+            item_index = indices[offset] if offset < len(indices) else -1
+        if item_index not in allowed:
+            continue
+        results[item_index] = normalize_product_view_data(item, item_index)
+    return results
+
+
+def product_view_batch_prompt(indices: list[int]) -> str:
+    count = len(indices)
+    return (
+        "你在识别同一款 SKG 挂脖肩颈按摩仪的产品参考图。所有图片都是同一产品，不要判断是不是不同产品，也不要把它当耳机、头戴设备或护颈枕；它是套在脖子上、外置佩戴在肩颈位置的 U 形/围脖式按摩仪，可能有内侧按摩触点、外壳按键、厚度、底部接口和左右不对称结构。\n"
+        "先建立产品坐标系，再逐图识别：product_left=产品戴在真人脖子上时佩戴者左肩那一侧；product_right=佩戴者右肩那一侧；top=靠近下巴/脸/颈部上沿；bottom=靠近锁骨/肩部下沿；inner_side=贴近脖子皮肤/按摩触点的一侧；outer_side=外壳/按键/Logo/材质展示面。不要把图片左侧直接等同于产品左侧，必须在 orientation 里说明产品左/右/上/下分别对应图中的哪一边；不确定就写不确定并在 risk 里提醒。\n"
+        "每张图的 view 必须从 enum 选一个：front（正面/外侧主外观）, left_45（佩戴者左侧45度）, right_45（佩戴者右侧45度）, side_thickness（侧面厚度）, inner_contacts（贴颈内侧/按摩触点）, back_bottom（背面/底部/接口）。left_45/right_45 指佩戴者身体左右，不是画面左右。\n"
+        "background enum：white, black, simple, complex, unknown。use_tags 只能从 enum 选：hero_packshot, wearing_scale, inner_contact, side_thickness, asymmetry, button_detail, back_bottom, material_texture。\n"
+        "landmarks 用中文短词列出可见结构，例如：佩戴者左侧臂、佩戴者右侧臂、U形开口、贴颈内侧、按摩触点、侧边厚度、按键、充电口、底部、外壳材质、局部细节。note 必须用中文写给生视频模型，重点说明这张图适合约束什么，尤其要写清楚左/右/上/下、内/外侧、触点或局部细节。risk 只在可能误导生视频时写中文，如局部裁切、无法判断产品左右、上下颠倒风险、反光、遮挡、分辨率低、背景干扰；否则为空。\n"
+        f"本次共有 {count} 张图片，图片前的 Image index 就是输出 index。必须输出同样数量的 items，且 index 不要改。只输出一行严格 JSON，不要 markdown，不要换行。\n"
+        "{\"items\":[{\"index\":0,\"view\":\"front|left_45|right_45|side_thickness|inner_contacts|back_bottom\",\"background\":\"white|black|simple|complex|unknown\",\"use_tags\":[\"hero_packshot\"],\"orientation\":{\"product_left\":\"图中哪一侧/不可见/不确定\",\"product_right\":\"图中哪一侧/不可见/不确定\",\"top\":\"图中哪一侧/不可见/不确定\",\"bottom\":\"图中哪一侧/不可见/不确定\",\"inner_side\":\"图中哪一侧/是否可见\",\"outer_side\":\"图中哪一侧/是否可见\",\"opening_direction\":\"U形开口朝图中哪一侧/不可见/不确定\"},\"landmarks\":[\"U形开口\"],\"note\":\"中文备注\",\"risk\":\"\",\"confidence\":0.0}]}"
+    )


 def analyze_product_view(ref_path: Path, index: int) -> dict:
@@ -4473,22 +4600,69 @@ def analyze_product_view(ref_path: Path, index: int) -> dict:
        return fallback


+def analyze_product_views_batch(paths_by_index: list[tuple[int, Path]]) -> dict[int, dict]:
+    if not LLM_API_KEY:
+        return {index: fallback_product_view(index) for index, _path in paths_by_index}
+    results: dict[int, dict] = {}
+    for start in range(0, len(paths_by_index), PRODUCT_VIEW_BATCH_SIZE):
+        chunk = paths_by_index[start:start + PRODUCT_VIEW_BATCH_SIZE]
+        indices = [index for index, _path in chunk]
+        content: list[dict] = [{"type": "text", "text": product_view_batch_prompt(indices)}]
+        for index, path in chunk:
+            img_b64 = base64.b64encode(path.read_bytes()).decode("ascii")
+            content.append({"type": "text", "text": f"Image index {index}"})
+            content.append({"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}})
+        try:
+            resp = llm().chat.completions.create(
+                model=VISION_MODEL,
+                messages=[{"role": "user", "content": content}],
+                response_format={"type": "json_object"},
+                temperature=0.05,
+                max_tokens=1600,
+            )
+            raw = (resp.choices[0].message.content or "").strip()
+            if not raw:
+                raw = (getattr(resp.choices[0].message, "reasoning_content", "") or "").strip()
+            parsed = parse_product_view_batch_response(raw, indices)
+            for index in indices:
+                results[index] = parsed.get(index) or analyze_product_view(chunk[indices.index(index)][1], index)
+        except Exception as e:
+            for index, path in chunk:
+                try:
+                    result = analyze_product_view(path, index)
+                except Exception:
+                    result = fallback_product_view(index)
+                if result.get("risk"):
+                    result["risk"] = f"{result['risk']}；批量识别失败后单图兜底"
+                else:
+                    result["risk"] = f"批量识别失败后单图兜底：{str(e)[:60]}"
+                results[index] = result
+    return results
+
+
@app.post("/jobs/{job_id}/assets/product-views/analyze")
 def analyze_product_views(job_id: str, req: AnalyzeProductViewsReq) -> dict:
    if job_id not in JOBS:
        raise HTTPException(404, "job not found")
-    items = []
+    path_items: list[tuple[int, Path]] = []
+    missing_results: dict[int, dict] = {}
    for index, ref in enumerate(req.refs):
        ref_path = storyboard_ref_path(job_id, ref)
        if not ref_path or not ref_path.exists():
-            result = fallback_product_view(index)
+            missing_results[index] = fallback_product_view(index)
        else:
-            result = analyze_product_view(ref_path, index)
+            path_items.append((index, ref_path))
+    batch_results = analyze_product_views_batch(path_items) if path_items else {}
+    items = []
+    for index, _ref in enumerate(req.refs):
+        result = batch_results.get(index) or missing_results.get(index) or fallback_product_view(index)
        items.append({
            "index": index,
            "view": result["view"],
            "background": result.get("background", "unknown"),
            "use_tags": result.get("use_tags", default_product_use_tags(result["view"])),
+            "orientation": result.get("orientation", default_product_orientation(result["view"])),
+            "landmarks": result.get("landmarks", default_product_landmarks(result["view"])),
            "note": result["note"],
            "risk": result.get("risk", ""),
            "confidence": result["confidence"],
@@ -4510,7 +4684,8 @@ def generate_product_angle_asset(job_id: str, req: GenerateProductAngleAssetReq)
    prompt = (
        "Use the reference image as the same SKG neck-and-shoulder wearable massage product. "
        f"Generate a clean product-only white-background reference image in this missing view: {target_view}. "
-        "Preserve the exact product identity: white U-shaped shoulder/neck device, asymmetric left and right details, side buttons, inner metal massage contacts, opening width, material, thickness, curvature, and scale. "
+        "Preserve the exact product identity: white U-shaped wearable neck and shoulder massager that sits around the neck, asymmetric wearer-left and wearer-right details, side buttons, inner metal massage contacts, opening width, material, thickness, curvature, and real shoulder-neck wearing scale. "
+        "Use product coordinates: wearer-left/right are the user's body left/right when worn, top is near chin/upper neck, bottom is near collarbone/shoulders, inner side touches skin, outer side is the shell/buttons. "
        "Do not mirror both sides into identical shapes; keep visible left/right asymmetry and believable shoulder-neck wearable proportions. "
        "The product should be complete, centered, isolated on pure white, large enough to inspect, with no hands, people, packaging, text, UI, watermark, extra accessories, or scene background. "
        "If the target view is not fully visible in the source, infer the missing surfaces conservatively from the same product design without inventing a new model. "