fix: send product angle refs as image inputs

This commit is contained in:
2026-05-18 07:10:27 +08:00
parent 5fde9f3e22
commit 05283aed52
2 changed files with 35 additions and 56 deletions

View File

@@ -2580,8 +2580,22 @@ def _image_error_status(error: Exception) -> int:
return 503 if ("上游负载饱和" in msg or "HTTP 429" in msg or "saturated" in msg.lower()) else 500 return 503 if ("上游负载饱和" in msg or "HTTP 429" in msg or "saturated" in msg.lower()) else 500
def _prepare_image_edit_bytes(image_path: Path, max_side: int) -> bytes:
import io as _io
from PIL import Image as _PILImage
try:
im = _PILImage.open(image_path)
if max(im.size) > max_side:
im.thumbnail((max_side, max_side), _PILImage.LANCZOS)
buf = _io.BytesIO()
im.convert("RGB").save(buf, format="JPEG", quality=88)
return buf.getvalue()
except Exception:
return image_path.read_bytes()
def _image_edit_call( def _image_edit_call(
image_path: Path, image_path: Path | list[Path],
prompt: str, prompt: str,
model: str | None = None, model: str | None = None,
models: list[str] | None = None, models: list[str] | None = None,
@@ -2592,28 +2606,20 @@ def _image_edit_call(
"""通用 image edit 调用 · 失败重试 + 可选 text fallback。 """通用 image edit 调用 · 失败重试 + 可选 text fallback。
返回 (image_bytes, effective_mode) where effective_mode in {"edit","text"}。 返回 (image_bytes, effective_mode) where effective_mode in {"edit","text"}。
失败 raise RuntimeError。 失败 raise RuntimeError。
输入图自动 resize 到 max_side默认 1024边长后再 base64 输入图自动 resize 到 max_side默认 1024边长后再用 multipart 上传;多参考图使用 image[]
生图模型按产品规则强制使用 gpt-image-2model/models 参数只保留兼容旧调用。""" 生图模型按产品规则强制使用 gpt-image-2model/models 参数只保留兼容旧调用。"""
import base64 as b64lib import base64 as b64lib
import io as _io
import time as _time import time as _time
import httpx import httpx
from PIL import Image as _PILImage
if not IMAGE_API_KEY: if not IMAGE_API_KEY:
raise RuntimeError("IMAGE_API_KEY 或 LLM_API_KEY 未配置") raise RuntimeError("IMAGE_API_KEY 或 LLM_API_KEY 未配置")
models_cycle = [GPT_IMAGE_MODEL] models_cycle = [GPT_IMAGE_MODEL]
model = GPT_IMAGE_MODEL model = GPT_IMAGE_MODEL
# 缩到 max_side 内 image_paths = image_path if isinstance(image_path, list) else [image_path]
try: image_paths = [path for path in image_paths if path and path.exists()][:6]
im = _PILImage.open(image_path) if not image_paths:
if max(im.size) > max_side: raise RuntimeError("image edit reference image missing")
im.thumbnail((max_side, max_side), _PILImage.LANCZOS) img_bytes_list = [_prepare_image_edit_bytes(path, max_side) for path in image_paths]
buf = _io.BytesIO()
im.convert("RGB").save(buf, format="JPEG", quality=88)
img_bytes_in = buf.getvalue()
except Exception:
# PIL 失败兜底走原文件
img_bytes_in = image_path.read_bytes()
plan: list[str] = ["edit"] * max_attempts plan: list[str] = ["edit"] * max_attempts
if fallback_text: if fallback_text:
plan.append("text") plan.append("text")
@@ -2636,7 +2642,14 @@ def _image_edit_call(
"Authorization": f"Bearer {IMAGE_API_KEY}", "Authorization": f"Bearer {IMAGE_API_KEY}",
}, },
data={"model": current_model, "prompt": prompt, "n": "1"}, data={"model": current_model, "prompt": prompt, "n": "1"},
files={"image": ("reference.jpg", img_bytes_in, "image/jpeg")}, files=(
{"image": ("reference.jpg", img_bytes_list[0], "image/jpeg")}
if len(img_bytes_list) == 1
else [
("image[]", (f"reference_{idx + 1}.jpg", img_bytes, "image/jpeg"))
for idx, img_bytes in enumerate(img_bytes_list)
]
),
) )
r.raise_for_status() r.raise_for_status()
resp_data = r.json() resp_data = r.json()
@@ -5123,29 +5136,6 @@ def analyze_product_views(job_id: str, req: AnalyzeProductViewsReq) -> dict:
return {"items": items, "missing_views": missing} return {"items": items, "missing_views": missing}
def _make_product_angle_reference_sheet(paths: list[Path], out_path: Path) -> Path:
thumbs: list[Image.Image] = []
for path in paths[:6]:
try:
img = ImageOps.exif_transpose(Image.open(path)).convert("RGB")
img.thumbnail((520, 520), Image.Resampling.LANCZOS)
cell = Image.new("RGB", (560, 560), (255, 255, 255))
cell.paste(img, ((560 - img.width) // 2, (560 - img.height) // 2))
thumbs.append(cell)
except Exception:
continue
if not thumbs:
raise RuntimeError("no usable product reference images")
cols = 3 if len(thumbs) > 2 else len(thumbs)
rows = (len(thumbs) + cols - 1) // cols
sheet = Image.new("RGB", (cols * 560, rows * 560), (245, 245, 245))
for i, thumb in enumerate(thumbs):
sheet.paste(thumb, ((i % cols) * 560, (i // cols) * 560))
out_path.parent.mkdir(parents=True, exist_ok=True)
sheet.save(out_path, "JPEG", quality=94)
return out_path
@app.post("/jobs/{job_id}/assets/product-angle") @app.post("/jobs/{job_id}/assets/product-angle")
def generate_product_angle_asset(job_id: str, req: GenerateProductAngleAssetReq) -> dict: def generate_product_angle_asset(job_id: str, req: GenerateProductAngleAssetReq) -> dict:
if job_id not in JOBS: if job_id not in JOBS:
@@ -5165,11 +5155,6 @@ def generate_product_angle_asset(job_id: str, req: GenerateProductAngleAssetReq)
if not source_paths: if not source_paths:
raise HTTPException(404, "source product image not found") raise HTTPException(404, "source product image not found")
source_path = source_paths[0] source_path = source_paths[0]
model_src = source_path
sheet_tmp: Path | None = None
if len(source_paths) > 1:
sheet_tmp = job_dir(job_id) / "tmp" / f"product_angle_refs_{uuid.uuid4().hex[:8]}.jpg"
model_src = _make_product_angle_reference_sheet(source_paths, sheet_tmp)
target_view = (req.target_view or "目标视角").strip() target_view = (req.target_view or "目标视角").strip()
note = (req.note or "").strip() note = (req.note or "").strip()
source_notes = [re.sub(r"\s+", " ", str(item)).strip()[:180] for item in (req.source_notes or []) if str(item).strip()] source_notes = [re.sub(r"\s+", " ", str(item)).strip()[:180] for item in (req.source_notes or []) if str(item).strip()]
@@ -5181,11 +5166,11 @@ def generate_product_angle_asset(job_id: str, req: GenerateProductAngleAssetReq)
else "" else ""
) )
prompt = ( prompt = (
"Use the reference image or reference board as evidence for the same SKG neck-and-shoulder wearable massage product. " "Use all provided reference images as evidence for the same SKG neck-and-shoulder wearable massage product. "
"If a reference board is provided, all panels are the same product from uploaded views; do not output a board, collage, or multiple products. " "Each input image is one uploaded view of the same product; do not output a board, collage, or multiple products. "
f"Generate a clean product-only white-background reference image in this missing view: {target_view}. " f"Generate a clean product-only white-background reference image in this missing view: {target_view}. "
+ source_note_clause + source_note_clause
"Preserve the exact product identity: white U-shaped wearable neck and shoulder massager that sits around the neck, asymmetric wearer-left and wearer-right details, side buttons, inner metal massage contacts, opening width, material, thickness, curvature, and real shoulder-neck wearing scale. " + "Preserve the exact product identity: white U-shaped wearable neck and shoulder massager that sits around the neck, asymmetric wearer-left and wearer-right details, side buttons, inner metal massage contacts, opening width, material, thickness, curvature, and real shoulder-neck wearing scale. "
"Use product coordinates: wearer-left/right are the user's body left/right when worn, top is near chin/upper neck, bottom is near collarbone/shoulders, inner side touches skin, outer side is the shell/buttons. " "Use product coordinates: wearer-left/right are the user's body left/right when worn, top is near chin/upper neck, bottom is near collarbone/shoulders, inner side touches skin, outer side is the shell/buttons. "
"Do not mirror both sides into identical shapes; keep visible left/right asymmetry and believable shoulder-neck wearable proportions. " "Do not mirror both sides into identical shapes; keep visible left/right asymmetry and believable shoulder-neck wearable proportions. "
"The product should be complete, centered, isolated on pure white, large enough to inspect, with no hands, people, packaging, text, UI, watermark, extra accessories, or scene background. " "The product should be complete, centered, isolated on pure white, large enough to inspect, with no hands, people, packaging, text, UI, watermark, extra accessories, or scene background. "
@@ -5194,15 +5179,9 @@ def generate_product_angle_asset(job_id: str, req: GenerateProductAngleAssetReq)
) )
models = [GPT_IMAGE_MODEL] models = [GPT_IMAGE_MODEL]
try: try:
img_bytes, _mode = _image_edit_call(model_src, prompt, models=models, fallback_text=False, max_attempts=5, max_side=1600) img_bytes, _mode = _image_edit_call(source_paths, prompt, models=models, fallback_text=False, max_attempts=5, max_side=1600)
except RuntimeError as e: except RuntimeError as e:
raise HTTPException(_image_error_status(e), f"product angle generation failed: {e}") raise HTTPException(_image_error_status(e), f"product angle generation failed: {e}")
finally:
if sheet_tmp and sheet_tmp.exists():
try:
sheet_tmp.unlink()
except OSError:
pass
asset_id = f"product_angle_{uuid.uuid4().hex[:10]}" asset_id = f"product_angle_{uuid.uuid4().hex[:10]}"
out_path = job_dir(job_id) / "assets" / f"{asset_id}.jpg" out_path = job_dir(job_id) / "assets" / f"{asset_id}.jpg"
_normalize_asset_image(img_bytes, out_path, source_path, "1024", "white", square=True, fill_subject=True) _normalize_asset_image(img_bytes, out_path, source_path, "1024", "white", square=True, fill_subject=True)

View File

@@ -894,7 +894,7 @@ ProductRefStateItem {
<tr><td>产品图入库到 job</td><td><code>POST /jobs/{id}/assets</code><code>POST /jobs/{id}/assets/product-library</code></td><td><code>uploadStoryboardAsset</code><code>copyProductLibraryAsset</code></td><td>上传产品图或把内置产品图库条目复制为当前 job 的普通 asset。后端统一生成最长边 1600px、JPEG 92 的 AI 工作副本,透明底铺白,过大/过小图片会在 <code>ImageRef.asset_meta</code> 里返回转换动作和风险;黑底/白底背景本身不强行转换。注意该接口只写图片文件,产品素材池列表另由 <code>PUT /jobs/{id}/product-refs</code> 持久化。</td></tr> <tr><td>产品图入库到 job</td><td><code>POST /jobs/{id}/assets</code><code>POST /jobs/{id}/assets/product-library</code></td><td><code>uploadStoryboardAsset</code><code>copyProductLibraryAsset</code></td><td>上传产品图或把内置产品图库条目复制为当前 job 的普通 asset。后端统一生成最长边 1600px、JPEG 92 的 AI 工作副本,透明底铺白,过大/过小图片会在 <code>ImageRef.asset_meta</code> 里返回转换动作和风险;黑底/白底背景本身不强行转换。注意该接口只写图片文件,产品素材池列表另由 <code>PUT /jobs/{id}/product-refs</code> 持久化。</td></tr>
<tr><td>产品素材池保存</td><td><code>PUT /jobs/{id}/product-refs</code></td><td><code>saveProductRefs</code></td><td>把当前 job 的产品素材池列表、识别视角、用途标签、方向、结构点、备注、AI 补图和删除结果保存到 <code>Job.product_refs</code> / <code>state.json</code>。前端上传、识别完成、补角度、编辑备注和删除时都会同步保存;刷新页面或热更新后从 job 恢复,不再要求重新上传和重新识别。</td></tr> <tr><td>产品素材池保存</td><td><code>PUT /jobs/{id}/product-refs</code></td><td><code>saveProductRefs</code></td><td>把当前 job 的产品素材池列表、识别视角、用途标签、方向、结构点、备注、AI 补图和删除结果保存到 <code>Job.product_refs</code> / <code>state.json</code>。前端上传、识别完成、补角度、编辑备注和删除时都会同步保存;刷新页面或热更新后从 job 恢复,不再要求重新上传和重新识别。</td></tr>
<tr><td>产品视角识别</td><td><code>POST /jobs/{id}/assets/product-views/analyze</code></td><td><code>analyzeProductViews</code></td><td>读取同一产品素材池,按批次把多张图一次性提交给 <code>PRODUCT_VIEW_MODEL=gpt-image-2</code> 做视角标注,不限制只看前 6 张;识别对象被固定为套在脖子上的 U 形肩颈按摩仪。返回 <code>view</code><code>background</code><code>use_tags</code><code>orientation</code><code>landmarks</code>、中文备注、生成风险和置信度;<code>orientation</code> 明确佩戴者左/右、上/下、内外侧和开口方向对应图中哪边,避免把图片左右误当产品左右。前端不再要求用户手动选择视角,也不做不同产品身份判断。</td></tr> <tr><td>产品视角识别</td><td><code>POST /jobs/{id}/assets/product-views/analyze</code></td><td><code>analyzeProductViews</code></td><td>读取同一产品素材池,按批次把多张图一次性提交给 <code>PRODUCT_VIEW_MODEL=gpt-image-2</code> 做视角标注,不限制只看前 6 张;识别对象被固定为套在脖子上的 U 形肩颈按摩仪。返回 <code>view</code><code>background</code><code>use_tags</code><code>orientation</code><code>landmarks</code>、中文备注、生成风险和置信度;<code>orientation</code> 明确佩戴者左/右、上/下、内外侧和开口方向对应图中哪边,避免把图片左右误当产品左右。前端不再要求用户手动选择视角,也不做不同产品身份判断。</td></tr>
<tr><td>产品缺角度补图</td><td><code>POST /jobs/{id}/assets/product-angle</code></td><td><code>generateProductAngleAsset</code></td><td>用当前同一产品素材池作为参考,通过 <code>gpt-image-2</code> 自动补全缺失视角,输出新的 <code>ImageRef(kind="asset")</code>。前端不再固定传第一张图,而是按目标视角给已上传/已标注参考图打分,优先选择真实上传图、目标相邻视角、侧厚/触点/底部对应用途标签和低风险高置信图,最多传 6 张;后端把这些参考图拼成同产品参考板,再通过 <code>/images/edits</code> multipart 提交给 <code>gpt-image-2</code>。Prompt 会约束白底产品图、左右非对称、厚度、内侧触点和肩颈真实佩戴比例,并禁止输出拼图/多产品;遇到上游 429 / saturated 会按退避节奏重试,最终仍失败时返回 503 和可读提示。</td></tr> <tr><td>产品缺角度补图</td><td><code>POST /jobs/{id}/assets/product-angle</code></td><td><code>generateProductAngleAsset</code></td><td>用当前同一产品素材池作为参考,通过 <code>gpt-image-2</code> 自动补全缺失视角,输出新的 <code>ImageRef(kind="asset")</code>。前端不再固定传第一张图,而是按目标视角给已上传/已标注参考图打分,优先选择真实上传图、目标相邻视角、侧厚/触点/底部对应用途标签和低风险高置信图,最多传 6 张;后端通过 <code>/images/edits</code> multipart 的多张 <code>image[]</code> 直接提交给 <code>gpt-image-2</code>,不再把参考图拼成一张板,降低模型误解成拼图/多产品的概率。Prompt 会约束白底产品图、左右非对称、厚度、内侧触点和肩颈真实佩戴比例,并禁止输出拼图/多产品;遇到上游 429 / saturated 会按退避节奏重试,最终仍失败时返回 503 和可读提示。</td></tr>
<tr><td>角色库</td><td><code>GET /character-library/skg</code></td><td><code>listCharacterLibrary</code></td><td>读取内置 5 个透明骨架人角色 manifest每个角色含正面、左右 45 度、侧面、背面、半身近景和背部特写 7 张参考图。</td></tr> <tr><td>角色库</td><td><code>GET /character-library/skg</code></td><td><code>listCharacterLibrary</code></td><td>读取内置 5 个透明骨架人角色 manifest每个角色含正面、左右 45 度、侧面、背面、半身近景和背部特写 7 张参考图。</td></tr>
<tr><td>角色图入库到 job</td><td><code>POST /jobs/{id}/assets/character-library</code></td><td><code>copyCharacterLibraryAssets</code></td><td>把所选角色的 7 张参考图复制为当前 job asset返回 <code>subject_images</code>,产品融合生成视频时作为人物身份参考图提交。</td></tr> <tr><td>角色图入库到 job</td><td><code>POST /jobs/{id}/assets/character-library</code></td><td><code>copyCharacterLibraryAssets</code></td><td>把所选角色的 7 张参考图复制为当前 job asset返回 <code>subject_images</code>,产品融合生成视频时作为人物身份参考图提交。</td></tr>
<tr><td>产品融合引导图</td><td><code>POST /jobs/{id}/product-fusion/guide</code></td><td><code>createProductFusionGuide</code></td><td>旧流程兼容接口:读取产品图和白底人物图,按 <code>product_region</code> 合成位置引导图。当前内置角色 + 产品 + 描述流程不再主动调用它。</td></tr> <tr><td>产品融合引导图</td><td><code>POST /jobs/{id}/product-fusion/guide</code></td><td><code>createProductFusionGuide</code></td><td>旧流程兼容接口:读取产品图和白底人物图,按 <code>product_region</code> 合成位置引导图。当前内置角色 + 产品 + 描述流程不再主动调用它。</td></tr>
@@ -1013,7 +1013,7 @@ ProductRefStateItem {
</header> </header>
<div class="body"> <div class="body">
<p><strong>问题:</strong>同一产品素材池的视角标注仍显示通用 Vision 模型;缺角度补图固定拿第一张产品图作为参考,少侧面或内侧时容易用错误视角硬推,生成结果偏离产品真实结构。</p> <p><strong>问题:</strong>同一产品素材池的视角标注仍显示通用 Vision 模型;缺角度补图固定拿第一张产品图作为参考,少侧面或内侧时容易用错误视角硬推,生成结果偏离产品真实结构。</p>
<p><strong>改动:</strong><code>api/main.py</code> 新增 <code>PRODUCT_VIEW_MODEL=gpt-image-2</code><code>analyze_product_view</code> / <code>analyze_product_views_batch</code> 改用该模型并在 <code>/health</code> 返回 <code>models.product_view</code><code>generateProductAngleAsset</code> 前端请求新增 <code>source_refs</code><code>source_notes</code><code>AudioStoryboardPlanPanel</code> 按目标视角给产品图打分,优先真实上传图、相邻视角、用途标签、置信度和低风险图,最多传 6 张。后端把多张参考图拼成同产品参考板,再用 <code>gpt-image-2</code> 生成目标角度,避免只照抄第一张。</p> <p><strong>改动:</strong><code>api/main.py</code> 新增 <code>PRODUCT_VIEW_MODEL=gpt-image-2</code><code>analyze_product_view</code> / <code>analyze_product_views_batch</code> 改用该模型并在 <code>/health</code> 返回 <code>models.product_view</code><code>generateProductAngleAsset</code> 前端请求新增 <code>source_refs</code><code>source_notes</code><code>AudioStoryboardPlanPanel</code> 按目标视角给产品图打分,优先真实上传图、相邻视角、用途标签、置信度和低风险图,最多传 6 张。后端把多张参考图作为独立 <code>image[]</code> 提交给 <code>gpt-image-2</code> 生成目标角度,避免只照抄第一张,也避免参考板被模型当成拼图格式</p>
<p><strong>影响:</strong><code>api/main.py</code><code>web/lib/api.ts</code><code>web/components/ad-recreation-board.tsx</code><code>RULES.md</code><code>api/.env.example</code><code>deploy/.env.production.example</code><code>docs/source-analysis.html</code>。后续补产品角度必须从同一产品素材池里挑多张证据图,不要再默认第一张。</p> <p><strong>影响:</strong><code>api/main.py</code><code>web/lib/api.ts</code><code>web/components/ad-recreation-board.tsx</code><code>RULES.md</code><code>api/.env.example</code><code>deploy/.env.production.example</code><code>docs/source-analysis.html</code>。后续补产品角度必须从同一产品素材池里挑多张证据图,不要再默认第一张。</p>
</div> </div>
</article> </article>