auto-save 2026-05-17 20:15 (~4)

This commit is contained in:
2026-05-17 20:15:13 +08:00
parent d32e87a376
commit 72aef99592
4 changed files with 272 additions and 44 deletions

View File

@@ -4327,12 +4327,14 @@ async def upload_storyboard_asset(job_id: str, file: UploadFile = File(...)) ->
PRODUCT_VIEW_VALUES = ["front", "left_45", "right_45", "side_thickness", "inner_contacts", "back_bottom"]
PRODUCT_VIEW_BATCH_SIZE = max(1, min(12, int(os.getenv("PRODUCT_VIEW_BATCH_SIZE", "8"))))
PRODUCT_VIEW_LABELS = {
"front": "正面",
"left_45": "左 45",
"right_45": "右 45",
"front": "正面/外侧主外观",
"left_45": "佩戴者左 45",
"right_45": "佩戴者右 45",
"side_thickness": "侧面厚度",
"inner_contacts": "内侧触点",
"inner_contacts": "贴颈内侧/触点",
"back_bottom": "背面/底部",
}
@@ -4382,12 +4384,114 @@ def fallback_product_view(index: int) -> dict:
"view": view,
"background": "unknown",
"use_tags": default_product_use_tags(view),
"note": f"{PRODUCT_VIEW_LABELS.get(view, view)}参考;模型识别不可用时按上传顺序自动标注,请人工只检查备注。",
"orientation": default_product_orientation(view),
"landmarks": default_product_landmarks(view),
"note": f"{PRODUCT_VIEW_LABELS.get(view, view)}参考;模型识别不可用时按上传顺序自动标注,请重点复核佩戴者左/右、上/下和贴颈内侧。",
"risk": "模型识别不可用,按上传顺序兜底",
"confidence": 0.25,
}
PRODUCT_ORIENTATION_KEYS = [
"product_left",
"product_right",
"top",
"bottom",
"inner_side",
"outer_side",
"opening_direction",
]
def default_product_orientation(view: str) -> dict:
base = {
"product_left": "佩戴者左侧;需人工复核图中位置",
"product_right": "佩戴者右侧;需人工复核图中位置",
"top": "靠近下巴/脸/颈部上沿",
"bottom": "靠近锁骨/肩部下沿",
"inner_side": "贴近脖子皮肤的一侧,通常可见按摩触点",
"outer_side": "外壳展示面,通常可见按键/Logo/材质",
"opening_direction": "U 形开口方向需结合图片复核",
}
if view == "inner_contacts":
base["inner_side"] = "本图重点:贴颈内侧/按摩触点"
elif view == "side_thickness":
base["outer_side"] = "本图重点:侧厚、边缘和机身厚度"
elif view in {"left_45", "right_45"}:
base["opening_direction"] = "注意不要把图片左右直接当成产品佩戴者左右"
return base
def default_product_landmarks(view: str) -> list[str]:
defaults = {
"front": ["U形开口", "外壳主轮廓", "左右臂"],
"left_45": ["佩戴者左侧臂", "侧边弧度", "按键/结构差异"],
"right_45": ["佩戴者右侧臂", "侧边弧度", "按键/结构差异"],
"side_thickness": ["机身厚度", "侧边轮廓", "佩戴比例"],
"inner_contacts": ["贴颈内侧", "按摩触点", "皮肤接触面"],
"back_bottom": ["背面/底部", "接口/底面", "材质细节"],
}
return defaults.get(view, ["U形挂脖轮廓"])
def normalize_product_orientation(value: object, view: str) -> dict:
base = default_product_orientation(view)
if isinstance(value, dict):
for key in PRODUCT_ORIENTATION_KEYS:
raw = value.get(key)
if raw is None:
continue
text = re.sub(r"\s+", " ", str(raw)).strip().strip('"\' ,,。')
if text:
base[key] = text[:80]
return base
def normalize_product_landmarks(value: object, view: str) -> list[str]:
if isinstance(value, str):
raw_items = re.split(r"[,/、\n]+", value)
elif isinstance(value, list):
raw_items = [str(item) for item in value]
else:
raw_items = []
result = []
for item in raw_items + default_product_landmarks(view):
text = re.sub(r"\s+", " ", str(item)).strip().strip('"\' ,,。')
if text and text not in result:
result.append(text[:24])
return result[:8]
def normalize_product_view_data(data: dict, index: int) -> dict:
view = str(data.get("view") or "").strip().strip('"\' ,。')
if view not in PRODUCT_VIEW_VALUES:
return fallback_product_view(index)
background = str(data.get("background") or "unknown").strip().strip('"\' ,。')
if background not in PRODUCT_BACKGROUND_VALUES:
background = "unknown"
use_tags = normalize_product_use_tags(data.get("use_tags"), view)
orientation = normalize_product_orientation(data.get("orientation"), view)
landmarks = normalize_product_landmarks(data.get("landmarks"), view)
note = str(data.get("note") or "").strip().strip('"\' ,,。')
note = re.sub(r"\s+", " ", note)[:320] or f"{PRODUCT_VIEW_LABELS.get(view, view)}参考"
risk = str(data.get("risk") or "").strip().strip('"\' ,,。')
risk = re.sub(r"\s+", " ", risk)[:160]
try:
confidence = max(0.0, min(1.0, float(data.get("confidence", 0.5))))
except Exception:
confidence = 0.5
return {
"view": view,
"background": background,
"use_tags": use_tags,
"orientation": orientation,
"landmarks": landmarks,
"note": note,
"risk": risk,
"confidence": confidence,
}
def parse_product_view_response(raw: str, index: int) -> dict:
text = (raw or "").strip()
text = re.sub(r"^```(?:json)?\s*", "", text, flags=re.I).strip()
@@ -4419,22 +4523,45 @@ def parse_product_view_response(raw: str, index: int) -> dict:
"risk": risk_match.group(1) if risk_match else "",
"confidence": confidence_match.group(1) if confidence_match else 0.45,
}
view = str(data.get("view") or "").strip().strip('"\' ,。')
if view not in PRODUCT_VIEW_VALUES:
return fallback_product_view(index)
background = str(data.get("background") or "unknown").strip().strip('"\' ,。')
if background not in PRODUCT_BACKGROUND_VALUES:
background = "unknown"
use_tags = normalize_product_use_tags(data.get("use_tags"), view)
note = str(data.get("note") or "").strip().strip('"\' ,,。')
note = re.sub(r"\s+", " ", note)[:220] or f"{PRODUCT_VIEW_LABELS.get(view, view)}参考"
risk = str(data.get("risk") or "").strip().strip('"\' ,,。')
risk = re.sub(r"\s+", " ", risk)[:120]
try:
confidence = max(0.0, min(1.0, float(data.get("confidence", 0.5))))
except Exception:
confidence = 0.5
return {"view": view, "background": background, "use_tags": use_tags, "note": note, "risk": risk, "confidence": confidence}
return normalize_product_view_data(data, index)
def parse_product_view_batch_response(raw: str, indices: list[int]) -> dict[int, dict]:
text = (raw or "").strip()
text = re.sub(r"^```(?:json)?\s*", "", text, flags=re.I).strip()
text = re.sub(r"\s*```$", "", text).strip()
match = re.search(r"\{[\s\S]*\}", text)
json_text = match.group(0) if match else text
data = json.loads(json_text)
raw_items = data.get("items") if isinstance(data, dict) else data
if not isinstance(raw_items, list):
raise ValueError("product view batch response missing items[]")
allowed = set(indices)
results: dict[int, dict] = {}
for offset, item in enumerate(raw_items):
if not isinstance(item, dict):
continue
try:
item_index = int(item.get("index", indices[offset] if offset < len(indices) else -1))
except Exception:
item_index = indices[offset] if offset < len(indices) else -1
if item_index not in allowed:
continue
results[item_index] = normalize_product_view_data(item, item_index)
return results
def product_view_batch_prompt(indices: list[int]) -> str:
count = len(indices)
return (
"你在识别同一款 SKG 挂脖肩颈按摩仪的产品参考图。所有图片都是同一产品,不要判断是不是不同产品,也不要把它当耳机、头戴设备或护颈枕;它是套在脖子上、外置佩戴在肩颈位置的 U 形/围脖式按摩仪,可能有内侧按摩触点、外壳按键、厚度、底部接口和左右不对称结构。\n"
"先建立产品坐标系再逐图识别product_left=产品戴在真人脖子上时佩戴者左肩那一侧product_right=佩戴者右肩那一侧top=靠近下巴/脸/颈部上沿bottom=靠近锁骨/肩部下沿inner_side=贴近脖子皮肤/按摩触点的一侧outer_side=外壳/按键/Logo/材质展示面。不要把图片左侧直接等同于产品左侧,必须在 orientation 里说明产品左/右/上/下分别对应图中的哪一边;不确定就写不确定并在 risk 里提醒。\n"
"每张图的 view 必须从 enum 选一个front正面/外侧主外观), left_45佩戴者左侧45度, right_45佩戴者右侧45度, side_thickness侧面厚度, inner_contacts贴颈内侧/按摩触点), back_bottom背面/底部/接口。left_45/right_45 指佩戴者身体左右,不是画面左右。\n"
"background enumwhite, black, simple, complex, unknown。use_tags 只能从 enum 选hero_packshot, wearing_scale, inner_contact, side_thickness, asymmetry, button_detail, back_bottom, material_texture。\n"
"landmarks 用中文短词列出可见结构例如佩戴者左侧臂、佩戴者右侧臂、U形开口、贴颈内侧、按摩触点、侧边厚度、按键、充电口、底部、外壳材质、局部细节。note 必须用中文写给生视频模型,重点说明这张图适合约束什么,尤其要写清楚左/右/上/下、内/外侧、触点或局部细节。risk 只在可能误导生视频时写中文,如局部裁切、无法判断产品左右、上下颠倒风险、反光、遮挡、分辨率低、背景干扰;否则为空。\n"
f"本次共有 {count} 张图片,图片前的 Image index 就是输出 index。必须输出同样数量的 items且 index 不要改。只输出一行严格 JSON不要 markdown不要换行。\n"
"{\"items\":[{\"index\":0,\"view\":\"front|left_45|right_45|side_thickness|inner_contacts|back_bottom\",\"background\":\"white|black|simple|complex|unknown\",\"use_tags\":[\"hero_packshot\"],\"orientation\":{\"product_left\":\"图中哪一侧/不可见/不确定\",\"product_right\":\"图中哪一侧/不可见/不确定\",\"top\":\"图中哪一侧/不可见/不确定\",\"bottom\":\"图中哪一侧/不可见/不确定\",\"inner_side\":\"图中哪一侧/是否可见\",\"outer_side\":\"图中哪一侧/是否可见\",\"opening_direction\":\"U形开口朝图中哪一侧/不可见/不确定\"},\"landmarks\":[\"U形开口\"],\"note\":\"中文备注\",\"risk\":\"\",\"confidence\":0.0}]}"
)
def analyze_product_view(ref_path: Path, index: int) -> dict:
@@ -4473,22 +4600,69 @@ def analyze_product_view(ref_path: Path, index: int) -> dict:
return fallback
def analyze_product_views_batch(paths_by_index: list[tuple[int, Path]]) -> dict[int, dict]:
if not LLM_API_KEY:
return {index: fallback_product_view(index) for index, _path in paths_by_index}
results: dict[int, dict] = {}
for start in range(0, len(paths_by_index), PRODUCT_VIEW_BATCH_SIZE):
chunk = paths_by_index[start:start + PRODUCT_VIEW_BATCH_SIZE]
indices = [index for index, _path in chunk]
content: list[dict] = [{"type": "text", "text": product_view_batch_prompt(indices)}]
for index, path in chunk:
img_b64 = base64.b64encode(path.read_bytes()).decode("ascii")
content.append({"type": "text", "text": f"Image index {index}"})
content.append({"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}})
try:
resp = llm().chat.completions.create(
model=VISION_MODEL,
messages=[{"role": "user", "content": content}],
response_format={"type": "json_object"},
temperature=0.05,
max_tokens=1600,
)
raw = (resp.choices[0].message.content or "").strip()
if not raw:
raw = (getattr(resp.choices[0].message, "reasoning_content", "") or "").strip()
parsed = parse_product_view_batch_response(raw, indices)
for index in indices:
results[index] = parsed.get(index) or analyze_product_view(chunk[indices.index(index)][1], index)
except Exception as e:
for index, path in chunk:
try:
result = analyze_product_view(path, index)
except Exception:
result = fallback_product_view(index)
if result.get("risk"):
result["risk"] = f"{result['risk']};批量识别失败后单图兜底"
else:
result["risk"] = f"批量识别失败后单图兜底:{str(e)[:60]}"
results[index] = result
return results
@app.post("/jobs/{job_id}/assets/product-views/analyze")
def analyze_product_views(job_id: str, req: AnalyzeProductViewsReq) -> dict:
if job_id not in JOBS:
raise HTTPException(404, "job not found")
items = []
path_items: list[tuple[int, Path]] = []
missing_results: dict[int, dict] = {}
for index, ref in enumerate(req.refs):
ref_path = storyboard_ref_path(job_id, ref)
if not ref_path or not ref_path.exists():
result = fallback_product_view(index)
missing_results[index] = fallback_product_view(index)
else:
result = analyze_product_view(ref_path, index)
path_items.append((index, ref_path))
batch_results = analyze_product_views_batch(path_items) if path_items else {}
items = []
for index, _ref in enumerate(req.refs):
result = batch_results.get(index) or missing_results.get(index) or fallback_product_view(index)
items.append({
"index": index,
"view": result["view"],
"background": result.get("background", "unknown"),
"use_tags": result.get("use_tags", default_product_use_tags(result["view"])),
"orientation": result.get("orientation", default_product_orientation(result["view"])),
"landmarks": result.get("landmarks", default_product_landmarks(result["view"])),
"note": result["note"],
"risk": result.get("risk", ""),
"confidence": result["confidence"],
@@ -4510,7 +4684,8 @@ def generate_product_angle_asset(job_id: str, req: GenerateProductAngleAssetReq)
prompt = (
"Use the reference image as the same SKG neck-and-shoulder wearable massage product. "
f"Generate a clean product-only white-background reference image in this missing view: {target_view}. "
"Preserve the exact product identity: white U-shaped shoulder/neck device, asymmetric left and right details, side buttons, inner metal massage contacts, opening width, material, thickness, curvature, and scale. "
"Preserve the exact product identity: white U-shaped wearable neck and shoulder massager that sits around the neck, asymmetric wearer-left and wearer-right details, side buttons, inner metal massage contacts, opening width, material, thickness, curvature, and real shoulder-neck wearing scale. "
"Use product coordinates: wearer-left/right are the user's body left/right when worn, top is near chin/upper neck, bottom is near collarbone/shoulders, inner side touches skin, outer side is the shell/buttons. "
"Do not mirror both sides into identical shapes; keep visible left/right asymmetry and believable shoulder-neck wearable proportions. "
"The product should be complete, centered, isolated on pure white, large enough to inspect, with no hands, people, packaging, text, UI, watermark, extra accessories, or scene background. "
"If the target view is not fully visible in the source, infer the missing surfaces conservatively from the same product design without inventing a new model. "