diff --git a/.memory/worklog.json b/.memory/worklog.json index 72de503..ae38ad0 100644 --- a/.memory/worklog.json +++ b/.memory/worklog.json @@ -1,18 +1,5 @@ { "entries": [ - { - "files_changed": 1, - "message": "Codex 会话活跃 · 最近命令:codex · 1 项未提交变更 · 最近提交:auto-save 2026-05-15 12:02 (~1)", - "ts": "2026-05-15T04:04:44Z", - "type": "session-heartbeat" - }, - { - "files_changed": 1, - "hash": "46a5d76", - "message": "auto-save 2026-05-15 12:07 (~1)", - "ts": "2026-05-15T12:08:04+08:00", - "type": "commit" - }, { "files_changed": 1, "hash": "b4d31f6", @@ -3263,6 +3250,19 @@ "type": "session-heartbeat", "message": "Codex 会话活跃 · 最近命令:codex · 分支 main · 3 项未提交变更 · 最近提交:feat: standardize product asset inputs", "files_changed": 3 + }, + { + "ts": "2026-05-17T19:59:06+08:00", + "type": "commit", + "message": "auto-save 2026-05-17 19:59 (~3)", + "hash": "d32e87a", + "files_changed": 3 + }, + { + "ts": "2026-05-17T12:08:29Z", + "type": "session-heartbeat", + "message": "Codex 会话活跃 · 最近命令:codex · 分支 main · 1 项未提交变更 · 最近提交:auto-save 2026-05-17 19:59 (~3)", + "files_changed": 1 } ] } diff --git a/api/main.py b/api/main.py index e58f6cb..9e6df4c 100644 --- a/api/main.py +++ b/api/main.py @@ -4327,12 +4327,14 @@ async def upload_storyboard_asset(job_id: str, file: UploadFile = File(...)) -> PRODUCT_VIEW_VALUES = ["front", "left_45", "right_45", "side_thickness", "inner_contacts", "back_bottom"] +PRODUCT_VIEW_BATCH_SIZE = max(1, min(12, int(os.getenv("PRODUCT_VIEW_BATCH_SIZE", "8")))) + PRODUCT_VIEW_LABELS = { - "front": "正面", - "left_45": "左 45", - "right_45": "右 45", + "front": "正面/外侧主外观", + "left_45": "佩戴者左 45", + "right_45": "佩戴者右 45", "side_thickness": "侧面厚度", - "inner_contacts": "内侧触点", + "inner_contacts": "贴颈内侧/触点", "back_bottom": "背面/底部", } @@ -4382,12 +4384,114 @@ def fallback_product_view(index: int) -> dict: "view": view, "background": "unknown", "use_tags": default_product_use_tags(view), - "note": f"{PRODUCT_VIEW_LABELS.get(view, view)}参考;模型识别不可用时按上传顺序自动标注,请人工只检查备注。", + "orientation": default_product_orientation(view), + "landmarks": default_product_landmarks(view), + "note": f"{PRODUCT_VIEW_LABELS.get(view, view)}参考;模型识别不可用时按上传顺序自动标注,请重点复核佩戴者左/右、上/下和贴颈内侧。", "risk": "模型识别不可用,按上传顺序兜底", "confidence": 0.25, } +PRODUCT_ORIENTATION_KEYS = [ + "product_left", + "product_right", + "top", + "bottom", + "inner_side", + "outer_side", + "opening_direction", +] + + +def default_product_orientation(view: str) -> dict: + base = { + "product_left": "佩戴者左侧;需人工复核图中位置", + "product_right": "佩戴者右侧;需人工复核图中位置", + "top": "靠近下巴/脸/颈部上沿", + "bottom": "靠近锁骨/肩部下沿", + "inner_side": "贴近脖子皮肤的一侧,通常可见按摩触点", + "outer_side": "外壳展示面,通常可见按键/Logo/材质", + "opening_direction": "U 形开口方向需结合图片复核", + } + if view == "inner_contacts": + base["inner_side"] = "本图重点:贴颈内侧/按摩触点" + elif view == "side_thickness": + base["outer_side"] = "本图重点:侧厚、边缘和机身厚度" + elif view in {"left_45", "right_45"}: + base["opening_direction"] = "注意不要把图片左右直接当成产品佩戴者左右" + return base + + +def default_product_landmarks(view: str) -> list[str]: + defaults = { + "front": ["U形开口", "外壳主轮廓", "左右臂"], + "left_45": ["佩戴者左侧臂", "侧边弧度", "按键/结构差异"], + "right_45": ["佩戴者右侧臂", "侧边弧度", "按键/结构差异"], + "side_thickness": ["机身厚度", "侧边轮廓", "佩戴比例"], + "inner_contacts": ["贴颈内侧", "按摩触点", "皮肤接触面"], + "back_bottom": ["背面/底部", "接口/底面", "材质细节"], + } + return defaults.get(view, ["U形挂脖轮廓"]) + + +def normalize_product_orientation(value: object, view: str) -> dict: + base = default_product_orientation(view) + if isinstance(value, dict): + for key in PRODUCT_ORIENTATION_KEYS: + raw = value.get(key) + if raw is None: + continue + text = re.sub(r"\s+", " ", str(raw)).strip().strip('"\' ,,。') + if text: + base[key] = text[:80] + return base + + +def normalize_product_landmarks(value: object, view: str) -> list[str]: + if isinstance(value, str): + raw_items = re.split(r"[,,/、\n]+", value) + elif isinstance(value, list): + raw_items = [str(item) for item in value] + else: + raw_items = [] + result = [] + for item in raw_items + default_product_landmarks(view): + text = re.sub(r"\s+", " ", str(item)).strip().strip('"\' ,,。') + if text and text not in result: + result.append(text[:24]) + return result[:8] + + +def normalize_product_view_data(data: dict, index: int) -> dict: + view = str(data.get("view") or "").strip().strip('"\' ,。') + if view not in PRODUCT_VIEW_VALUES: + return fallback_product_view(index) + background = str(data.get("background") or "unknown").strip().strip('"\' ,。') + if background not in PRODUCT_BACKGROUND_VALUES: + background = "unknown" + use_tags = normalize_product_use_tags(data.get("use_tags"), view) + orientation = normalize_product_orientation(data.get("orientation"), view) + landmarks = normalize_product_landmarks(data.get("landmarks"), view) + note = str(data.get("note") or "").strip().strip('"\' ,,。') + note = re.sub(r"\s+", " ", note)[:320] or f"{PRODUCT_VIEW_LABELS.get(view, view)}参考" + risk = str(data.get("risk") or "").strip().strip('"\' ,,。') + risk = re.sub(r"\s+", " ", risk)[:160] + try: + confidence = max(0.0, min(1.0, float(data.get("confidence", 0.5)))) + except Exception: + confidence = 0.5 + return { + "view": view, + "background": background, + "use_tags": use_tags, + "orientation": orientation, + "landmarks": landmarks, + "note": note, + "risk": risk, + "confidence": confidence, + } + + def parse_product_view_response(raw: str, index: int) -> dict: text = (raw or "").strip() text = re.sub(r"^```(?:json)?\s*", "", text, flags=re.I).strip() @@ -4419,22 +4523,45 @@ def parse_product_view_response(raw: str, index: int) -> dict: "risk": risk_match.group(1) if risk_match else "", "confidence": confidence_match.group(1) if confidence_match else 0.45, } - view = str(data.get("view") or "").strip().strip('"\' ,。') - if view not in PRODUCT_VIEW_VALUES: - return fallback_product_view(index) - background = str(data.get("background") or "unknown").strip().strip('"\' ,。') - if background not in PRODUCT_BACKGROUND_VALUES: - background = "unknown" - use_tags = normalize_product_use_tags(data.get("use_tags"), view) - note = str(data.get("note") or "").strip().strip('"\' ,,。') - note = re.sub(r"\s+", " ", note)[:220] or f"{PRODUCT_VIEW_LABELS.get(view, view)}参考" - risk = str(data.get("risk") or "").strip().strip('"\' ,,。') - risk = re.sub(r"\s+", " ", risk)[:120] - try: - confidence = max(0.0, min(1.0, float(data.get("confidence", 0.5)))) - except Exception: - confidence = 0.5 - return {"view": view, "background": background, "use_tags": use_tags, "note": note, "risk": risk, "confidence": confidence} + return normalize_product_view_data(data, index) + + +def parse_product_view_batch_response(raw: str, indices: list[int]) -> dict[int, dict]: + text = (raw or "").strip() + text = re.sub(r"^```(?:json)?\s*", "", text, flags=re.I).strip() + text = re.sub(r"\s*```$", "", text).strip() + match = re.search(r"\{[\s\S]*\}", text) + json_text = match.group(0) if match else text + data = json.loads(json_text) + raw_items = data.get("items") if isinstance(data, dict) else data + if not isinstance(raw_items, list): + raise ValueError("product view batch response missing items[]") + allowed = set(indices) + results: dict[int, dict] = {} + for offset, item in enumerate(raw_items): + if not isinstance(item, dict): + continue + try: + item_index = int(item.get("index", indices[offset] if offset < len(indices) else -1)) + except Exception: + item_index = indices[offset] if offset < len(indices) else -1 + if item_index not in allowed: + continue + results[item_index] = normalize_product_view_data(item, item_index) + return results + + +def product_view_batch_prompt(indices: list[int]) -> str: + count = len(indices) + return ( + "你在识别同一款 SKG 挂脖肩颈按摩仪的产品参考图。所有图片都是同一产品,不要判断是不是不同产品,也不要把它当耳机、头戴设备或护颈枕;它是套在脖子上、外置佩戴在肩颈位置的 U 形/围脖式按摩仪,可能有内侧按摩触点、外壳按键、厚度、底部接口和左右不对称结构。\n" + "先建立产品坐标系,再逐图识别:product_left=产品戴在真人脖子上时佩戴者左肩那一侧;product_right=佩戴者右肩那一侧;top=靠近下巴/脸/颈部上沿;bottom=靠近锁骨/肩部下沿;inner_side=贴近脖子皮肤/按摩触点的一侧;outer_side=外壳/按键/Logo/材质展示面。不要把图片左侧直接等同于产品左侧,必须在 orientation 里说明产品左/右/上/下分别对应图中的哪一边;不确定就写不确定并在 risk 里提醒。\n" + "每张图的 view 必须从 enum 选一个:front(正面/外侧主外观), left_45(佩戴者左侧45度), right_45(佩戴者右侧45度), side_thickness(侧面厚度), inner_contacts(贴颈内侧/按摩触点), back_bottom(背面/底部/接口)。left_45/right_45 指佩戴者身体左右,不是画面左右。\n" + "background enum:white, black, simple, complex, unknown。use_tags 只能从 enum 选:hero_packshot, wearing_scale, inner_contact, side_thickness, asymmetry, button_detail, back_bottom, material_texture。\n" + "landmarks 用中文短词列出可见结构,例如:佩戴者左侧臂、佩戴者右侧臂、U形开口、贴颈内侧、按摩触点、侧边厚度、按键、充电口、底部、外壳材质、局部细节。note 必须用中文写给生视频模型,重点说明这张图适合约束什么,尤其要写清楚左/右/上/下、内/外侧、触点或局部细节。risk 只在可能误导生视频时写中文,如局部裁切、无法判断产品左右、上下颠倒风险、反光、遮挡、分辨率低、背景干扰;否则为空。\n" + f"本次共有 {count} 张图片,图片前的 Image index 就是输出 index。必须输出同样数量的 items,且 index 不要改。只输出一行严格 JSON,不要 markdown,不要换行。\n" + "{\"items\":[{\"index\":0,\"view\":\"front|left_45|right_45|side_thickness|inner_contacts|back_bottom\",\"background\":\"white|black|simple|complex|unknown\",\"use_tags\":[\"hero_packshot\"],\"orientation\":{\"product_left\":\"图中哪一侧/不可见/不确定\",\"product_right\":\"图中哪一侧/不可见/不确定\",\"top\":\"图中哪一侧/不可见/不确定\",\"bottom\":\"图中哪一侧/不可见/不确定\",\"inner_side\":\"图中哪一侧/是否可见\",\"outer_side\":\"图中哪一侧/是否可见\",\"opening_direction\":\"U形开口朝图中哪一侧/不可见/不确定\"},\"landmarks\":[\"U形开口\"],\"note\":\"中文备注\",\"risk\":\"\",\"confidence\":0.0}]}" + ) def analyze_product_view(ref_path: Path, index: int) -> dict: @@ -4473,22 +4600,69 @@ def analyze_product_view(ref_path: Path, index: int) -> dict: return fallback +def analyze_product_views_batch(paths_by_index: list[tuple[int, Path]]) -> dict[int, dict]: + if not LLM_API_KEY: + return {index: fallback_product_view(index) for index, _path in paths_by_index} + results: dict[int, dict] = {} + for start in range(0, len(paths_by_index), PRODUCT_VIEW_BATCH_SIZE): + chunk = paths_by_index[start:start + PRODUCT_VIEW_BATCH_SIZE] + indices = [index for index, _path in chunk] + content: list[dict] = [{"type": "text", "text": product_view_batch_prompt(indices)}] + for index, path in chunk: + img_b64 = base64.b64encode(path.read_bytes()).decode("ascii") + content.append({"type": "text", "text": f"Image index {index}"}) + content.append({"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}}) + try: + resp = llm().chat.completions.create( + model=VISION_MODEL, + messages=[{"role": "user", "content": content}], + response_format={"type": "json_object"}, + temperature=0.05, + max_tokens=1600, + ) + raw = (resp.choices[0].message.content or "").strip() + if not raw: + raw = (getattr(resp.choices[0].message, "reasoning_content", "") or "").strip() + parsed = parse_product_view_batch_response(raw, indices) + for index in indices: + results[index] = parsed.get(index) or analyze_product_view(chunk[indices.index(index)][1], index) + except Exception as e: + for index, path in chunk: + try: + result = analyze_product_view(path, index) + except Exception: + result = fallback_product_view(index) + if result.get("risk"): + result["risk"] = f"{result['risk']};批量识别失败后单图兜底" + else: + result["risk"] = f"批量识别失败后单图兜底:{str(e)[:60]}" + results[index] = result + return results + + @app.post("/jobs/{job_id}/assets/product-views/analyze") def analyze_product_views(job_id: str, req: AnalyzeProductViewsReq) -> dict: if job_id not in JOBS: raise HTTPException(404, "job not found") - items = [] + path_items: list[tuple[int, Path]] = [] + missing_results: dict[int, dict] = {} for index, ref in enumerate(req.refs): ref_path = storyboard_ref_path(job_id, ref) if not ref_path or not ref_path.exists(): - result = fallback_product_view(index) + missing_results[index] = fallback_product_view(index) else: - result = analyze_product_view(ref_path, index) + path_items.append((index, ref_path)) + batch_results = analyze_product_views_batch(path_items) if path_items else {} + items = [] + for index, _ref in enumerate(req.refs): + result = batch_results.get(index) or missing_results.get(index) or fallback_product_view(index) items.append({ "index": index, "view": result["view"], "background": result.get("background", "unknown"), "use_tags": result.get("use_tags", default_product_use_tags(result["view"])), + "orientation": result.get("orientation", default_product_orientation(result["view"])), + "landmarks": result.get("landmarks", default_product_landmarks(result["view"])), "note": result["note"], "risk": result.get("risk", ""), "confidence": result["confidence"], @@ -4510,7 +4684,8 @@ def generate_product_angle_asset(job_id: str, req: GenerateProductAngleAssetReq) prompt = ( "Use the reference image as the same SKG neck-and-shoulder wearable massage product. " f"Generate a clean product-only white-background reference image in this missing view: {target_view}. " - "Preserve the exact product identity: white U-shaped shoulder/neck device, asymmetric left and right details, side buttons, inner metal massage contacts, opening width, material, thickness, curvature, and scale. " + "Preserve the exact product identity: white U-shaped wearable neck and shoulder massager that sits around the neck, asymmetric wearer-left and wearer-right details, side buttons, inner metal massage contacts, opening width, material, thickness, curvature, and real shoulder-neck wearing scale. " + "Use product coordinates: wearer-left/right are the user's body left/right when worn, top is near chin/upper neck, bottom is near collarbone/shoulders, inner side touches skin, outer side is the shell/buttons. " "Do not mirror both sides into identical shapes; keep visible left/right asymmetry and believable shoulder-neck wearable proportions. " "The product should be complete, centered, isolated on pure white, large enough to inspect, with no hands, people, packaging, text, UI, watermark, extra accessories, or scene background. " "If the target view is not fully visible in the source, infer the missing surfaces conservatively from the same product design without inventing a new model. " diff --git a/web/components/ad-recreation-board.tsx b/web/components/ad-recreation-board.tsx index e7330f1..04137dc 100644 --- a/web/components/ad-recreation-board.tsx +++ b/web/components/ad-recreation-board.tsx @@ -92,6 +92,8 @@ type ProductRefItem = { view: string background: string useTags: string[] + orientation?: ProductViewAnalysisItem["orientation"] + landmarks: string[] note: string risk: string source: "upload" | "ai" @@ -100,11 +102,11 @@ type ProductRefItem = { } const PRODUCT_VIEW_SLOTS = [ - { value: "front", label: "正面", hint: "整体 U 形轮廓、开口宽度、主外观" }, - { value: "left_45", label: "左 45", hint: "左侧弧度、按钮/结构差异" }, - { value: "right_45", label: "右 45", hint: "右侧弧度、另一侧非对称细节" }, + { value: "front", label: "正面/外侧", hint: "整体 U 形轮廓、开口宽度、外壳主外观" }, + { value: "left_45", label: "佩戴者左 45", hint: "戴在脖子上时佩戴者左肩一侧的弧度、按钮/结构差异" }, + { value: "right_45", label: "佩戴者右 45", hint: "戴在脖子上时佩戴者右肩一侧的弧度、非对称细节" }, { value: "side_thickness", label: "侧面厚度", hint: "机身厚度、后颈包裹体积" }, - { value: "inner_contacts", label: "内侧触点", hint: "按摩触点、贴颈面、佩戴比例" }, + { value: "inner_contacts", label: "贴颈内侧/触点", hint: "按摩触点、贴颈面、内侧皮肤接触位置" }, { value: "back_bottom", label: "背面/底部", hint: "底面、背部闭合结构、补缺" }, ] as const @@ -383,6 +385,40 @@ function normalizeProductUseTags(tags: string[] | undefined, view: string) { return result.slice(0, 4) } +function defaultProductLandmarks(view: string) { + const defaults: Record = { + front: ["U形开口", "外壳主轮廓", "左右臂"], + left_45: ["佩戴者左侧臂", "侧边弧度", "按键/结构差异"], + right_45: ["佩戴者右侧臂", "侧边弧度", "按键/结构差异"], + side_thickness: ["机身厚度", "侧边轮廓", "佩戴比例"], + inner_contacts: ["贴颈内侧", "按摩触点", "皮肤接触面"], + back_bottom: ["背面/底部", "接口/底面", "材质细节"], + } + return defaults[view] ?? ["U形挂脖轮廓"] +} + +function normalizeProductLandmarks(landmarks: string[] | undefined, view: string) { + const result: string[] = [] + for (const item of [...(landmarks ?? []), ...defaultProductLandmarks(view)]) { + const text = item.trim() + if (text && !result.includes(text)) result.push(text) + } + return result.slice(0, 8) +} + +function formatProductOrientation(orientation?: ProductViewAnalysisItem["orientation"]) { + if (!orientation) return "" + const parts = [ + orientation.product_left ? `左=${orientation.product_left}` : "", + orientation.product_right ? `右=${orientation.product_right}` : "", + orientation.top ? `上=${orientation.top}` : "", + orientation.bottom ? `下=${orientation.bottom}` : "", + orientation.inner_side ? `内=${orientation.inner_side}` : "", + orientation.opening_direction ? `开口=${orientation.opening_direction}` : "", + ].filter(Boolean) + return parts.join(";") +} + function createProductRefItem( ref: ImageRef, index: number, @@ -391,6 +427,8 @@ function createProductRefItem( note?: string, background = "unknown", useTags?: string[], + orientation?: ProductViewAnalysisItem["orientation"], + landmarks?: string[], risk = "", confidence?: number, ): ProductRefItem { @@ -402,6 +440,8 @@ function createProductRefItem( view: view ?? targetSlot.value, background, useTags: normalizeProductUseTags(useTags, view ?? targetSlot.value), + orientation, + landmarks: normalizeProductLandmarks(landmarks, view ?? targetSlot.value), note: note ?? targetSlot.hint, risk, source, @@ -415,8 +455,11 @@ function productReferenceNotes(items: ProductRefItem[]) { return items .map((item, index) => { const tags = item.useTags.map((tag) => PRODUCT_USE_TAG_LABELS[tag]).filter(Boolean).join("/") + const orientation = formatProductOrientation(item.orientation) + const direction = orientation ? `;方向:${orientation}` : "" + const landmarks = item.landmarks.length ? `;结构:${item.landmarks.join("/")}` : "" const risk = item.risk ? `;风险:${item.risk}` : "" - return `${index + 1}. ${productViewLabel(item.view)}|${productBackgroundLabel(item.background)}|${tags}:${item.note || "无补充备注"}${risk}` + return `${index + 1}. ${productViewLabel(item.view)}|${productBackgroundLabel(item.background)}|${tags}:${item.note || "无补充备注"}${direction}${landmarks}${risk}` }) .join(";") } @@ -500,7 +543,7 @@ function buildStoryboardSceneFromAudioRow(row: AudioStoryboardRow, frame: KeyFra const productRefs = selectedProductItems.map((item) => item.ref) const notes = productReferenceNotes(selectedProductItems) const productGuidance = productItems.length - ? `产品素材池共有 ${productItems.length} 张,本条只选用 ${selectedProductItems.length} 张最相关参考图,不要把未选素材混入本条画面。所选图片只作为产品结构、角度、比例和细节参考,不要照搬参考图的白底/黑底/棚拍背景。视角标注:${notes}。保留左右非对称细节,不要把两边做成镜像对称;肩颈产品大小必须贴近真实佩戴比例,不能缩成耳机,也不能放大成护颈枕。` + ? `产品素材池共有 ${productItems.length} 张,本条只选用 ${selectedProductItems.length} 张最相关参考图,不要把未选素材混入本条画面。产品硬定义:这是套在脖子上的 U 形肩颈按摩仪,不是耳机、头戴设备或护颈枕。坐标系硬规则:左/右按佩戴者身体左右,不能按图片左右;上=靠近下巴/脸/颈部上沿,下=靠近锁骨/肩部下沿;内侧=贴颈皮肤/按摩触点,外侧=外壳/按键/Logo。所选图片只作为产品结构、角度、比例和细节参考,不要照搬参考图的白底/黑底/棚拍背景。视角标注:${notes}。保留左右非对称细节,不要把两边做成镜像对称;肩颈产品大小必须贴近真实佩戴比例,不能缩成耳机,也不能放大成护颈枕。` : "未上传产品图时使用默认 SKG 产品图;生成前建议先建立同一产品素材池,锁定左右差异、厚度和佩戴比例。" return { duration: Number(Math.max(3.2, Math.min(6.5, row.end - row.start || 4.5)).toFixed(1)), diff --git a/web/lib/api.ts b/web/lib/api.ts index ce4b416..fb8bcee 100644 --- a/web/lib/api.ts +++ b/web/lib/api.ts @@ -184,6 +184,16 @@ export interface ProductViewAnalysisItem { view: string background?: string use_tags?: string[] + orientation?: { + product_left?: string + product_right?: string + top?: string + bottom?: string + inner_side?: string + outer_side?: string + opening_direction?: string + } + landmarks?: string[] note: string risk?: string confidence: number