auto-save 2026-05-17 20:15 (~4)
This commit is contained in:
225
api/main.py
225
api/main.py
@@ -4327,12 +4327,14 @@ async def upload_storyboard_asset(job_id: str, file: UploadFile = File(...)) ->
|
||||
|
||||
|
||||
PRODUCT_VIEW_VALUES = ["front", "left_45", "right_45", "side_thickness", "inner_contacts", "back_bottom"]
|
||||
PRODUCT_VIEW_BATCH_SIZE = max(1, min(12, int(os.getenv("PRODUCT_VIEW_BATCH_SIZE", "8"))))
|
||||
|
||||
PRODUCT_VIEW_LABELS = {
|
||||
"front": "正面",
|
||||
"left_45": "左 45",
|
||||
"right_45": "右 45",
|
||||
"front": "正面/外侧主外观",
|
||||
"left_45": "佩戴者左 45",
|
||||
"right_45": "佩戴者右 45",
|
||||
"side_thickness": "侧面厚度",
|
||||
"inner_contacts": "内侧触点",
|
||||
"inner_contacts": "贴颈内侧/触点",
|
||||
"back_bottom": "背面/底部",
|
||||
}
|
||||
|
||||
@@ -4382,12 +4384,114 @@ def fallback_product_view(index: int) -> dict:
|
||||
"view": view,
|
||||
"background": "unknown",
|
||||
"use_tags": default_product_use_tags(view),
|
||||
"note": f"{PRODUCT_VIEW_LABELS.get(view, view)}参考;模型识别不可用时按上传顺序自动标注,请人工只检查备注。",
|
||||
"orientation": default_product_orientation(view),
|
||||
"landmarks": default_product_landmarks(view),
|
||||
"note": f"{PRODUCT_VIEW_LABELS.get(view, view)}参考;模型识别不可用时按上传顺序自动标注,请重点复核佩戴者左/右、上/下和贴颈内侧。",
|
||||
"risk": "模型识别不可用,按上传顺序兜底",
|
||||
"confidence": 0.25,
|
||||
}
|
||||
|
||||
|
||||
PRODUCT_ORIENTATION_KEYS = [
|
||||
"product_left",
|
||||
"product_right",
|
||||
"top",
|
||||
"bottom",
|
||||
"inner_side",
|
||||
"outer_side",
|
||||
"opening_direction",
|
||||
]
|
||||
|
||||
|
||||
def default_product_orientation(view: str) -> dict:
|
||||
base = {
|
||||
"product_left": "佩戴者左侧;需人工复核图中位置",
|
||||
"product_right": "佩戴者右侧;需人工复核图中位置",
|
||||
"top": "靠近下巴/脸/颈部上沿",
|
||||
"bottom": "靠近锁骨/肩部下沿",
|
||||
"inner_side": "贴近脖子皮肤的一侧,通常可见按摩触点",
|
||||
"outer_side": "外壳展示面,通常可见按键/Logo/材质",
|
||||
"opening_direction": "U 形开口方向需结合图片复核",
|
||||
}
|
||||
if view == "inner_contacts":
|
||||
base["inner_side"] = "本图重点:贴颈内侧/按摩触点"
|
||||
elif view == "side_thickness":
|
||||
base["outer_side"] = "本图重点:侧厚、边缘和机身厚度"
|
||||
elif view in {"left_45", "right_45"}:
|
||||
base["opening_direction"] = "注意不要把图片左右直接当成产品佩戴者左右"
|
||||
return base
|
||||
|
||||
|
||||
def default_product_landmarks(view: str) -> list[str]:
|
||||
defaults = {
|
||||
"front": ["U形开口", "外壳主轮廓", "左右臂"],
|
||||
"left_45": ["佩戴者左侧臂", "侧边弧度", "按键/结构差异"],
|
||||
"right_45": ["佩戴者右侧臂", "侧边弧度", "按键/结构差异"],
|
||||
"side_thickness": ["机身厚度", "侧边轮廓", "佩戴比例"],
|
||||
"inner_contacts": ["贴颈内侧", "按摩触点", "皮肤接触面"],
|
||||
"back_bottom": ["背面/底部", "接口/底面", "材质细节"],
|
||||
}
|
||||
return defaults.get(view, ["U形挂脖轮廓"])
|
||||
|
||||
|
||||
def normalize_product_orientation(value: object, view: str) -> dict:
|
||||
base = default_product_orientation(view)
|
||||
if isinstance(value, dict):
|
||||
for key in PRODUCT_ORIENTATION_KEYS:
|
||||
raw = value.get(key)
|
||||
if raw is None:
|
||||
continue
|
||||
text = re.sub(r"\s+", " ", str(raw)).strip().strip('"\' ,,。')
|
||||
if text:
|
||||
base[key] = text[:80]
|
||||
return base
|
||||
|
||||
|
||||
def normalize_product_landmarks(value: object, view: str) -> list[str]:
|
||||
if isinstance(value, str):
|
||||
raw_items = re.split(r"[,,/、\n]+", value)
|
||||
elif isinstance(value, list):
|
||||
raw_items = [str(item) for item in value]
|
||||
else:
|
||||
raw_items = []
|
||||
result = []
|
||||
for item in raw_items + default_product_landmarks(view):
|
||||
text = re.sub(r"\s+", " ", str(item)).strip().strip('"\' ,,。')
|
||||
if text and text not in result:
|
||||
result.append(text[:24])
|
||||
return result[:8]
|
||||
|
||||
|
||||
def normalize_product_view_data(data: dict, index: int) -> dict:
|
||||
view = str(data.get("view") or "").strip().strip('"\' ,。')
|
||||
if view not in PRODUCT_VIEW_VALUES:
|
||||
return fallback_product_view(index)
|
||||
background = str(data.get("background") or "unknown").strip().strip('"\' ,。')
|
||||
if background not in PRODUCT_BACKGROUND_VALUES:
|
||||
background = "unknown"
|
||||
use_tags = normalize_product_use_tags(data.get("use_tags"), view)
|
||||
orientation = normalize_product_orientation(data.get("orientation"), view)
|
||||
landmarks = normalize_product_landmarks(data.get("landmarks"), view)
|
||||
note = str(data.get("note") or "").strip().strip('"\' ,,。')
|
||||
note = re.sub(r"\s+", " ", note)[:320] or f"{PRODUCT_VIEW_LABELS.get(view, view)}参考"
|
||||
risk = str(data.get("risk") or "").strip().strip('"\' ,,。')
|
||||
risk = re.sub(r"\s+", " ", risk)[:160]
|
||||
try:
|
||||
confidence = max(0.0, min(1.0, float(data.get("confidence", 0.5))))
|
||||
except Exception:
|
||||
confidence = 0.5
|
||||
return {
|
||||
"view": view,
|
||||
"background": background,
|
||||
"use_tags": use_tags,
|
||||
"orientation": orientation,
|
||||
"landmarks": landmarks,
|
||||
"note": note,
|
||||
"risk": risk,
|
||||
"confidence": confidence,
|
||||
}
|
||||
|
||||
|
||||
def parse_product_view_response(raw: str, index: int) -> dict:
|
||||
text = (raw or "").strip()
|
||||
text = re.sub(r"^```(?:json)?\s*", "", text, flags=re.I).strip()
|
||||
@@ -4419,22 +4523,45 @@ def parse_product_view_response(raw: str, index: int) -> dict:
|
||||
"risk": risk_match.group(1) if risk_match else "",
|
||||
"confidence": confidence_match.group(1) if confidence_match else 0.45,
|
||||
}
|
||||
view = str(data.get("view") or "").strip().strip('"\' ,。')
|
||||
if view not in PRODUCT_VIEW_VALUES:
|
||||
return fallback_product_view(index)
|
||||
background = str(data.get("background") or "unknown").strip().strip('"\' ,。')
|
||||
if background not in PRODUCT_BACKGROUND_VALUES:
|
||||
background = "unknown"
|
||||
use_tags = normalize_product_use_tags(data.get("use_tags"), view)
|
||||
note = str(data.get("note") or "").strip().strip('"\' ,,。')
|
||||
note = re.sub(r"\s+", " ", note)[:220] or f"{PRODUCT_VIEW_LABELS.get(view, view)}参考"
|
||||
risk = str(data.get("risk") or "").strip().strip('"\' ,,。')
|
||||
risk = re.sub(r"\s+", " ", risk)[:120]
|
||||
try:
|
||||
confidence = max(0.0, min(1.0, float(data.get("confidence", 0.5))))
|
||||
except Exception:
|
||||
confidence = 0.5
|
||||
return {"view": view, "background": background, "use_tags": use_tags, "note": note, "risk": risk, "confidence": confidence}
|
||||
return normalize_product_view_data(data, index)
|
||||
|
||||
|
||||
def parse_product_view_batch_response(raw: str, indices: list[int]) -> dict[int, dict]:
|
||||
text = (raw or "").strip()
|
||||
text = re.sub(r"^```(?:json)?\s*", "", text, flags=re.I).strip()
|
||||
text = re.sub(r"\s*```$", "", text).strip()
|
||||
match = re.search(r"\{[\s\S]*\}", text)
|
||||
json_text = match.group(0) if match else text
|
||||
data = json.loads(json_text)
|
||||
raw_items = data.get("items") if isinstance(data, dict) else data
|
||||
if not isinstance(raw_items, list):
|
||||
raise ValueError("product view batch response missing items[]")
|
||||
allowed = set(indices)
|
||||
results: dict[int, dict] = {}
|
||||
for offset, item in enumerate(raw_items):
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
try:
|
||||
item_index = int(item.get("index", indices[offset] if offset < len(indices) else -1))
|
||||
except Exception:
|
||||
item_index = indices[offset] if offset < len(indices) else -1
|
||||
if item_index not in allowed:
|
||||
continue
|
||||
results[item_index] = normalize_product_view_data(item, item_index)
|
||||
return results
|
||||
|
||||
|
||||
def product_view_batch_prompt(indices: list[int]) -> str:
|
||||
count = len(indices)
|
||||
return (
|
||||
"你在识别同一款 SKG 挂脖肩颈按摩仪的产品参考图。所有图片都是同一产品,不要判断是不是不同产品,也不要把它当耳机、头戴设备或护颈枕;它是套在脖子上、外置佩戴在肩颈位置的 U 形/围脖式按摩仪,可能有内侧按摩触点、外壳按键、厚度、底部接口和左右不对称结构。\n"
|
||||
"先建立产品坐标系,再逐图识别:product_left=产品戴在真人脖子上时佩戴者左肩那一侧;product_right=佩戴者右肩那一侧;top=靠近下巴/脸/颈部上沿;bottom=靠近锁骨/肩部下沿;inner_side=贴近脖子皮肤/按摩触点的一侧;outer_side=外壳/按键/Logo/材质展示面。不要把图片左侧直接等同于产品左侧,必须在 orientation 里说明产品左/右/上/下分别对应图中的哪一边;不确定就写不确定并在 risk 里提醒。\n"
|
||||
"每张图的 view 必须从 enum 选一个:front(正面/外侧主外观), left_45(佩戴者左侧45度), right_45(佩戴者右侧45度), side_thickness(侧面厚度), inner_contacts(贴颈内侧/按摩触点), back_bottom(背面/底部/接口)。left_45/right_45 指佩戴者身体左右,不是画面左右。\n"
|
||||
"background enum:white, black, simple, complex, unknown。use_tags 只能从 enum 选:hero_packshot, wearing_scale, inner_contact, side_thickness, asymmetry, button_detail, back_bottom, material_texture。\n"
|
||||
"landmarks 用中文短词列出可见结构,例如:佩戴者左侧臂、佩戴者右侧臂、U形开口、贴颈内侧、按摩触点、侧边厚度、按键、充电口、底部、外壳材质、局部细节。note 必须用中文写给生视频模型,重点说明这张图适合约束什么,尤其要写清楚左/右/上/下、内/外侧、触点或局部细节。risk 只在可能误导生视频时写中文,如局部裁切、无法判断产品左右、上下颠倒风险、反光、遮挡、分辨率低、背景干扰;否则为空。\n"
|
||||
f"本次共有 {count} 张图片,图片前的 Image index 就是输出 index。必须输出同样数量的 items,且 index 不要改。只输出一行严格 JSON,不要 markdown,不要换行。\n"
|
||||
"{\"items\":[{\"index\":0,\"view\":\"front|left_45|right_45|side_thickness|inner_contacts|back_bottom\",\"background\":\"white|black|simple|complex|unknown\",\"use_tags\":[\"hero_packshot\"],\"orientation\":{\"product_left\":\"图中哪一侧/不可见/不确定\",\"product_right\":\"图中哪一侧/不可见/不确定\",\"top\":\"图中哪一侧/不可见/不确定\",\"bottom\":\"图中哪一侧/不可见/不确定\",\"inner_side\":\"图中哪一侧/是否可见\",\"outer_side\":\"图中哪一侧/是否可见\",\"opening_direction\":\"U形开口朝图中哪一侧/不可见/不确定\"},\"landmarks\":[\"U形开口\"],\"note\":\"中文备注\",\"risk\":\"\",\"confidence\":0.0}]}"
|
||||
)
|
||||
|
||||
|
||||
def analyze_product_view(ref_path: Path, index: int) -> dict:
|
||||
@@ -4473,22 +4600,69 @@ def analyze_product_view(ref_path: Path, index: int) -> dict:
|
||||
return fallback
|
||||
|
||||
|
||||
def analyze_product_views_batch(paths_by_index: list[tuple[int, Path]]) -> dict[int, dict]:
|
||||
if not LLM_API_KEY:
|
||||
return {index: fallback_product_view(index) for index, _path in paths_by_index}
|
||||
results: dict[int, dict] = {}
|
||||
for start in range(0, len(paths_by_index), PRODUCT_VIEW_BATCH_SIZE):
|
||||
chunk = paths_by_index[start:start + PRODUCT_VIEW_BATCH_SIZE]
|
||||
indices = [index for index, _path in chunk]
|
||||
content: list[dict] = [{"type": "text", "text": product_view_batch_prompt(indices)}]
|
||||
for index, path in chunk:
|
||||
img_b64 = base64.b64encode(path.read_bytes()).decode("ascii")
|
||||
content.append({"type": "text", "text": f"Image index {index}"})
|
||||
content.append({"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}})
|
||||
try:
|
||||
resp = llm().chat.completions.create(
|
||||
model=VISION_MODEL,
|
||||
messages=[{"role": "user", "content": content}],
|
||||
response_format={"type": "json_object"},
|
||||
temperature=0.05,
|
||||
max_tokens=1600,
|
||||
)
|
||||
raw = (resp.choices[0].message.content or "").strip()
|
||||
if not raw:
|
||||
raw = (getattr(resp.choices[0].message, "reasoning_content", "") or "").strip()
|
||||
parsed = parse_product_view_batch_response(raw, indices)
|
||||
for index in indices:
|
||||
results[index] = parsed.get(index) or analyze_product_view(chunk[indices.index(index)][1], index)
|
||||
except Exception as e:
|
||||
for index, path in chunk:
|
||||
try:
|
||||
result = analyze_product_view(path, index)
|
||||
except Exception:
|
||||
result = fallback_product_view(index)
|
||||
if result.get("risk"):
|
||||
result["risk"] = f"{result['risk']};批量识别失败后单图兜底"
|
||||
else:
|
||||
result["risk"] = f"批量识别失败后单图兜底:{str(e)[:60]}"
|
||||
results[index] = result
|
||||
return results
|
||||
|
||||
|
||||
@app.post("/jobs/{job_id}/assets/product-views/analyze")
|
||||
def analyze_product_views(job_id: str, req: AnalyzeProductViewsReq) -> dict:
|
||||
if job_id not in JOBS:
|
||||
raise HTTPException(404, "job not found")
|
||||
items = []
|
||||
path_items: list[tuple[int, Path]] = []
|
||||
missing_results: dict[int, dict] = {}
|
||||
for index, ref in enumerate(req.refs):
|
||||
ref_path = storyboard_ref_path(job_id, ref)
|
||||
if not ref_path or not ref_path.exists():
|
||||
result = fallback_product_view(index)
|
||||
missing_results[index] = fallback_product_view(index)
|
||||
else:
|
||||
result = analyze_product_view(ref_path, index)
|
||||
path_items.append((index, ref_path))
|
||||
batch_results = analyze_product_views_batch(path_items) if path_items else {}
|
||||
items = []
|
||||
for index, _ref in enumerate(req.refs):
|
||||
result = batch_results.get(index) or missing_results.get(index) or fallback_product_view(index)
|
||||
items.append({
|
||||
"index": index,
|
||||
"view": result["view"],
|
||||
"background": result.get("background", "unknown"),
|
||||
"use_tags": result.get("use_tags", default_product_use_tags(result["view"])),
|
||||
"orientation": result.get("orientation", default_product_orientation(result["view"])),
|
||||
"landmarks": result.get("landmarks", default_product_landmarks(result["view"])),
|
||||
"note": result["note"],
|
||||
"risk": result.get("risk", ""),
|
||||
"confidence": result["confidence"],
|
||||
@@ -4510,7 +4684,8 @@ def generate_product_angle_asset(job_id: str, req: GenerateProductAngleAssetReq)
|
||||
prompt = (
|
||||
"Use the reference image as the same SKG neck-and-shoulder wearable massage product. "
|
||||
f"Generate a clean product-only white-background reference image in this missing view: {target_view}. "
|
||||
"Preserve the exact product identity: white U-shaped shoulder/neck device, asymmetric left and right details, side buttons, inner metal massage contacts, opening width, material, thickness, curvature, and scale. "
|
||||
"Preserve the exact product identity: white U-shaped wearable neck and shoulder massager that sits around the neck, asymmetric wearer-left and wearer-right details, side buttons, inner metal massage contacts, opening width, material, thickness, curvature, and real shoulder-neck wearing scale. "
|
||||
"Use product coordinates: wearer-left/right are the user's body left/right when worn, top is near chin/upper neck, bottom is near collarbone/shoulders, inner side touches skin, outer side is the shell/buttons. "
|
||||
"Do not mirror both sides into identical shapes; keep visible left/right asymmetry and believable shoulder-neck wearable proportions. "
|
||||
"The product should be complete, centered, isolated on pure white, large enough to inspect, with no hands, people, packaging, text, UI, watermark, extra accessories, or scene background. "
|
||||
"If the target view is not fully visible in the source, infer the missing surfaces conservatively from the same product design without inventing a new model. "
|
||||
|
||||
Reference in New Issue
Block a user