fix: send product angle refs as image inputs
This commit is contained in:
87
api/main.py
87
api/main.py
@@ -2580,8 +2580,22 @@ def _image_error_status(error: Exception) -> int:
|
|||||||
return 503 if ("上游负载饱和" in msg or "HTTP 429" in msg or "saturated" in msg.lower()) else 500
|
return 503 if ("上游负载饱和" in msg or "HTTP 429" in msg or "saturated" in msg.lower()) else 500
|
||||||
|
|
||||||
|
|
||||||
|
def _prepare_image_edit_bytes(image_path: Path, max_side: int) -> bytes:
|
||||||
|
import io as _io
|
||||||
|
from PIL import Image as _PILImage
|
||||||
|
try:
|
||||||
|
im = _PILImage.open(image_path)
|
||||||
|
if max(im.size) > max_side:
|
||||||
|
im.thumbnail((max_side, max_side), _PILImage.LANCZOS)
|
||||||
|
buf = _io.BytesIO()
|
||||||
|
im.convert("RGB").save(buf, format="JPEG", quality=88)
|
||||||
|
return buf.getvalue()
|
||||||
|
except Exception:
|
||||||
|
return image_path.read_bytes()
|
||||||
|
|
||||||
|
|
||||||
def _image_edit_call(
|
def _image_edit_call(
|
||||||
image_path: Path,
|
image_path: Path | list[Path],
|
||||||
prompt: str,
|
prompt: str,
|
||||||
model: str | None = None,
|
model: str | None = None,
|
||||||
models: list[str] | None = None,
|
models: list[str] | None = None,
|
||||||
@@ -2592,28 +2606,20 @@ def _image_edit_call(
|
|||||||
"""通用 image edit 调用 · 失败重试 + 可选 text fallback。
|
"""通用 image edit 调用 · 失败重试 + 可选 text fallback。
|
||||||
返回 (image_bytes, effective_mode) where effective_mode in {"edit","text"}。
|
返回 (image_bytes, effective_mode) where effective_mode in {"edit","text"}。
|
||||||
失败 raise RuntimeError。
|
失败 raise RuntimeError。
|
||||||
输入图自动 resize 到 max_side(默认 1024)边长后再 base64。
|
输入图自动 resize 到 max_side(默认 1024)边长后再用 multipart 上传;多参考图使用 image[]。
|
||||||
生图模型按产品规则强制使用 gpt-image-2;model/models 参数只保留兼容旧调用。"""
|
生图模型按产品规则强制使用 gpt-image-2;model/models 参数只保留兼容旧调用。"""
|
||||||
import base64 as b64lib
|
import base64 as b64lib
|
||||||
import io as _io
|
|
||||||
import time as _time
|
import time as _time
|
||||||
import httpx
|
import httpx
|
||||||
from PIL import Image as _PILImage
|
|
||||||
if not IMAGE_API_KEY:
|
if not IMAGE_API_KEY:
|
||||||
raise RuntimeError("IMAGE_API_KEY 或 LLM_API_KEY 未配置")
|
raise RuntimeError("IMAGE_API_KEY 或 LLM_API_KEY 未配置")
|
||||||
models_cycle = [GPT_IMAGE_MODEL]
|
models_cycle = [GPT_IMAGE_MODEL]
|
||||||
model = GPT_IMAGE_MODEL
|
model = GPT_IMAGE_MODEL
|
||||||
# 缩到 max_side 内
|
image_paths = image_path if isinstance(image_path, list) else [image_path]
|
||||||
try:
|
image_paths = [path for path in image_paths if path and path.exists()][:6]
|
||||||
im = _PILImage.open(image_path)
|
if not image_paths:
|
||||||
if max(im.size) > max_side:
|
raise RuntimeError("image edit reference image missing")
|
||||||
im.thumbnail((max_side, max_side), _PILImage.LANCZOS)
|
img_bytes_list = [_prepare_image_edit_bytes(path, max_side) for path in image_paths]
|
||||||
buf = _io.BytesIO()
|
|
||||||
im.convert("RGB").save(buf, format="JPEG", quality=88)
|
|
||||||
img_bytes_in = buf.getvalue()
|
|
||||||
except Exception:
|
|
||||||
# PIL 失败兜底走原文件
|
|
||||||
img_bytes_in = image_path.read_bytes()
|
|
||||||
plan: list[str] = ["edit"] * max_attempts
|
plan: list[str] = ["edit"] * max_attempts
|
||||||
if fallback_text:
|
if fallback_text:
|
||||||
plan.append("text")
|
plan.append("text")
|
||||||
@@ -2636,7 +2642,14 @@ def _image_edit_call(
|
|||||||
"Authorization": f"Bearer {IMAGE_API_KEY}",
|
"Authorization": f"Bearer {IMAGE_API_KEY}",
|
||||||
},
|
},
|
||||||
data={"model": current_model, "prompt": prompt, "n": "1"},
|
data={"model": current_model, "prompt": prompt, "n": "1"},
|
||||||
files={"image": ("reference.jpg", img_bytes_in, "image/jpeg")},
|
files=(
|
||||||
|
{"image": ("reference.jpg", img_bytes_list[0], "image/jpeg")}
|
||||||
|
if len(img_bytes_list) == 1
|
||||||
|
else [
|
||||||
|
("image[]", (f"reference_{idx + 1}.jpg", img_bytes, "image/jpeg"))
|
||||||
|
for idx, img_bytes in enumerate(img_bytes_list)
|
||||||
|
]
|
||||||
|
),
|
||||||
)
|
)
|
||||||
r.raise_for_status()
|
r.raise_for_status()
|
||||||
resp_data = r.json()
|
resp_data = r.json()
|
||||||
@@ -5123,29 +5136,6 @@ def analyze_product_views(job_id: str, req: AnalyzeProductViewsReq) -> dict:
|
|||||||
return {"items": items, "missing_views": missing}
|
return {"items": items, "missing_views": missing}
|
||||||
|
|
||||||
|
|
||||||
def _make_product_angle_reference_sheet(paths: list[Path], out_path: Path) -> Path:
|
|
||||||
thumbs: list[Image.Image] = []
|
|
||||||
for path in paths[:6]:
|
|
||||||
try:
|
|
||||||
img = ImageOps.exif_transpose(Image.open(path)).convert("RGB")
|
|
||||||
img.thumbnail((520, 520), Image.Resampling.LANCZOS)
|
|
||||||
cell = Image.new("RGB", (560, 560), (255, 255, 255))
|
|
||||||
cell.paste(img, ((560 - img.width) // 2, (560 - img.height) // 2))
|
|
||||||
thumbs.append(cell)
|
|
||||||
except Exception:
|
|
||||||
continue
|
|
||||||
if not thumbs:
|
|
||||||
raise RuntimeError("no usable product reference images")
|
|
||||||
cols = 3 if len(thumbs) > 2 else len(thumbs)
|
|
||||||
rows = (len(thumbs) + cols - 1) // cols
|
|
||||||
sheet = Image.new("RGB", (cols * 560, rows * 560), (245, 245, 245))
|
|
||||||
for i, thumb in enumerate(thumbs):
|
|
||||||
sheet.paste(thumb, ((i % cols) * 560, (i // cols) * 560))
|
|
||||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
||||||
sheet.save(out_path, "JPEG", quality=94)
|
|
||||||
return out_path
|
|
||||||
|
|
||||||
|
|
||||||
@app.post("/jobs/{job_id}/assets/product-angle")
|
@app.post("/jobs/{job_id}/assets/product-angle")
|
||||||
def generate_product_angle_asset(job_id: str, req: GenerateProductAngleAssetReq) -> dict:
|
def generate_product_angle_asset(job_id: str, req: GenerateProductAngleAssetReq) -> dict:
|
||||||
if job_id not in JOBS:
|
if job_id not in JOBS:
|
||||||
@@ -5165,11 +5155,6 @@ def generate_product_angle_asset(job_id: str, req: GenerateProductAngleAssetReq)
|
|||||||
if not source_paths:
|
if not source_paths:
|
||||||
raise HTTPException(404, "source product image not found")
|
raise HTTPException(404, "source product image not found")
|
||||||
source_path = source_paths[0]
|
source_path = source_paths[0]
|
||||||
model_src = source_path
|
|
||||||
sheet_tmp: Path | None = None
|
|
||||||
if len(source_paths) > 1:
|
|
||||||
sheet_tmp = job_dir(job_id) / "tmp" / f"product_angle_refs_{uuid.uuid4().hex[:8]}.jpg"
|
|
||||||
model_src = _make_product_angle_reference_sheet(source_paths, sheet_tmp)
|
|
||||||
target_view = (req.target_view or "目标视角").strip()
|
target_view = (req.target_view or "目标视角").strip()
|
||||||
note = (req.note or "").strip()
|
note = (req.note or "").strip()
|
||||||
source_notes = [re.sub(r"\s+", " ", str(item)).strip()[:180] for item in (req.source_notes or []) if str(item).strip()]
|
source_notes = [re.sub(r"\s+", " ", str(item)).strip()[:180] for item in (req.source_notes or []) if str(item).strip()]
|
||||||
@@ -5181,11 +5166,11 @@ def generate_product_angle_asset(job_id: str, req: GenerateProductAngleAssetReq)
|
|||||||
else ""
|
else ""
|
||||||
)
|
)
|
||||||
prompt = (
|
prompt = (
|
||||||
"Use the reference image or reference board as evidence for the same SKG neck-and-shoulder wearable massage product. "
|
"Use all provided reference images as evidence for the same SKG neck-and-shoulder wearable massage product. "
|
||||||
"If a reference board is provided, all panels are the same product from uploaded views; do not output a board, collage, or multiple products. "
|
"Each input image is one uploaded view of the same product; do not output a board, collage, or multiple products. "
|
||||||
f"Generate a clean product-only white-background reference image in this missing view: {target_view}. "
|
f"Generate a clean product-only white-background reference image in this missing view: {target_view}. "
|
||||||
+ source_note_clause
|
+ source_note_clause
|
||||||
"Preserve the exact product identity: white U-shaped wearable neck and shoulder massager that sits around the neck, asymmetric wearer-left and wearer-right details, side buttons, inner metal massage contacts, opening width, material, thickness, curvature, and real shoulder-neck wearing scale. "
|
+ "Preserve the exact product identity: white U-shaped wearable neck and shoulder massager that sits around the neck, asymmetric wearer-left and wearer-right details, side buttons, inner metal massage contacts, opening width, material, thickness, curvature, and real shoulder-neck wearing scale. "
|
||||||
"Use product coordinates: wearer-left/right are the user's body left/right when worn, top is near chin/upper neck, bottom is near collarbone/shoulders, inner side touches skin, outer side is the shell/buttons. "
|
"Use product coordinates: wearer-left/right are the user's body left/right when worn, top is near chin/upper neck, bottom is near collarbone/shoulders, inner side touches skin, outer side is the shell/buttons. "
|
||||||
"Do not mirror both sides into identical shapes; keep visible left/right asymmetry and believable shoulder-neck wearable proportions. "
|
"Do not mirror both sides into identical shapes; keep visible left/right asymmetry and believable shoulder-neck wearable proportions. "
|
||||||
"The product should be complete, centered, isolated on pure white, large enough to inspect, with no hands, people, packaging, text, UI, watermark, extra accessories, or scene background. "
|
"The product should be complete, centered, isolated on pure white, large enough to inspect, with no hands, people, packaging, text, UI, watermark, extra accessories, or scene background. "
|
||||||
@@ -5194,15 +5179,9 @@ def generate_product_angle_asset(job_id: str, req: GenerateProductAngleAssetReq)
|
|||||||
)
|
)
|
||||||
models = [GPT_IMAGE_MODEL]
|
models = [GPT_IMAGE_MODEL]
|
||||||
try:
|
try:
|
||||||
img_bytes, _mode = _image_edit_call(model_src, prompt, models=models, fallback_text=False, max_attempts=5, max_side=1600)
|
img_bytes, _mode = _image_edit_call(source_paths, prompt, models=models, fallback_text=False, max_attempts=5, max_side=1600)
|
||||||
except RuntimeError as e:
|
except RuntimeError as e:
|
||||||
raise HTTPException(_image_error_status(e), f"product angle generation failed: {e}")
|
raise HTTPException(_image_error_status(e), f"product angle generation failed: {e}")
|
||||||
finally:
|
|
||||||
if sheet_tmp and sheet_tmp.exists():
|
|
||||||
try:
|
|
||||||
sheet_tmp.unlink()
|
|
||||||
except OSError:
|
|
||||||
pass
|
|
||||||
asset_id = f"product_angle_{uuid.uuid4().hex[:10]}"
|
asset_id = f"product_angle_{uuid.uuid4().hex[:10]}"
|
||||||
out_path = job_dir(job_id) / "assets" / f"{asset_id}.jpg"
|
out_path = job_dir(job_id) / "assets" / f"{asset_id}.jpg"
|
||||||
_normalize_asset_image(img_bytes, out_path, source_path, "1024", "white", square=True, fill_subject=True)
|
_normalize_asset_image(img_bytes, out_path, source_path, "1024", "white", square=True, fill_subject=True)
|
||||||
|
|||||||
@@ -894,7 +894,7 @@ ProductRefStateItem {
|
|||||||
<tr><td>产品图入库到 job</td><td><code>POST /jobs/{id}/assets</code>、<code>POST /jobs/{id}/assets/product-library</code></td><td><code>uploadStoryboardAsset</code>、<code>copyProductLibraryAsset</code></td><td>上传产品图或把内置产品图库条目复制为当前 job 的普通 asset。后端统一生成最长边 1600px、JPEG 92 的 AI 工作副本,透明底铺白,过大/过小图片会在 <code>ImageRef.asset_meta</code> 里返回转换动作和风险;黑底/白底背景本身不强行转换。注意该接口只写图片文件,产品素材池列表另由 <code>PUT /jobs/{id}/product-refs</code> 持久化。</td></tr>
|
<tr><td>产品图入库到 job</td><td><code>POST /jobs/{id}/assets</code>、<code>POST /jobs/{id}/assets/product-library</code></td><td><code>uploadStoryboardAsset</code>、<code>copyProductLibraryAsset</code></td><td>上传产品图或把内置产品图库条目复制为当前 job 的普通 asset。后端统一生成最长边 1600px、JPEG 92 的 AI 工作副本,透明底铺白,过大/过小图片会在 <code>ImageRef.asset_meta</code> 里返回转换动作和风险;黑底/白底背景本身不强行转换。注意该接口只写图片文件,产品素材池列表另由 <code>PUT /jobs/{id}/product-refs</code> 持久化。</td></tr>
|
||||||
<tr><td>产品素材池保存</td><td><code>PUT /jobs/{id}/product-refs</code></td><td><code>saveProductRefs</code></td><td>把当前 job 的产品素材池列表、识别视角、用途标签、方向、结构点、备注、AI 补图和删除结果保存到 <code>Job.product_refs</code> / <code>state.json</code>。前端上传、识别完成、补角度、编辑备注和删除时都会同步保存;刷新页面或热更新后从 job 恢复,不再要求重新上传和重新识别。</td></tr>
|
<tr><td>产品素材池保存</td><td><code>PUT /jobs/{id}/product-refs</code></td><td><code>saveProductRefs</code></td><td>把当前 job 的产品素材池列表、识别视角、用途标签、方向、结构点、备注、AI 补图和删除结果保存到 <code>Job.product_refs</code> / <code>state.json</code>。前端上传、识别完成、补角度、编辑备注和删除时都会同步保存;刷新页面或热更新后从 job 恢复,不再要求重新上传和重新识别。</td></tr>
|
||||||
<tr><td>产品视角识别</td><td><code>POST /jobs/{id}/assets/product-views/analyze</code></td><td><code>analyzeProductViews</code></td><td>读取同一产品素材池,按批次把多张图一次性提交给 <code>PRODUCT_VIEW_MODEL=gpt-image-2</code> 做视角标注,不限制只看前 6 张;识别对象被固定为套在脖子上的 U 形肩颈按摩仪。返回 <code>view</code>、<code>background</code>、<code>use_tags</code>、<code>orientation</code>、<code>landmarks</code>、中文备注、生成风险和置信度;<code>orientation</code> 明确佩戴者左/右、上/下、内外侧和开口方向对应图中哪边,避免把图片左右误当产品左右。前端不再要求用户手动选择视角,也不做不同产品身份判断。</td></tr>
|
<tr><td>产品视角识别</td><td><code>POST /jobs/{id}/assets/product-views/analyze</code></td><td><code>analyzeProductViews</code></td><td>读取同一产品素材池,按批次把多张图一次性提交给 <code>PRODUCT_VIEW_MODEL=gpt-image-2</code> 做视角标注,不限制只看前 6 张;识别对象被固定为套在脖子上的 U 形肩颈按摩仪。返回 <code>view</code>、<code>background</code>、<code>use_tags</code>、<code>orientation</code>、<code>landmarks</code>、中文备注、生成风险和置信度;<code>orientation</code> 明确佩戴者左/右、上/下、内外侧和开口方向对应图中哪边,避免把图片左右误当产品左右。前端不再要求用户手动选择视角,也不做不同产品身份判断。</td></tr>
|
||||||
<tr><td>产品缺角度补图</td><td><code>POST /jobs/{id}/assets/product-angle</code></td><td><code>generateProductAngleAsset</code></td><td>用当前同一产品素材池作为参考,通过 <code>gpt-image-2</code> 自动补全缺失视角,输出新的 <code>ImageRef(kind="asset")</code>。前端不再固定传第一张图,而是按目标视角给已上传/已标注参考图打分,优先选择真实上传图、目标相邻视角、侧厚/触点/底部对应用途标签和低风险高置信图,最多传 6 张;后端把这些参考图拼成同产品参考板,再通过 <code>/images/edits</code> multipart 提交给 <code>gpt-image-2</code>。Prompt 会约束白底产品图、左右非对称、厚度、内侧触点和肩颈真实佩戴比例,并禁止输出拼图/多产品;遇到上游 429 / saturated 会按退避节奏重试,最终仍失败时返回 503 和可读提示。</td></tr>
|
<tr><td>产品缺角度补图</td><td><code>POST /jobs/{id}/assets/product-angle</code></td><td><code>generateProductAngleAsset</code></td><td>用当前同一产品素材池作为参考,通过 <code>gpt-image-2</code> 自动补全缺失视角,输出新的 <code>ImageRef(kind="asset")</code>。前端不再固定传第一张图,而是按目标视角给已上传/已标注参考图打分,优先选择真实上传图、目标相邻视角、侧厚/触点/底部对应用途标签和低风险高置信图,最多传 6 张;后端通过 <code>/images/edits</code> multipart 的多张 <code>image[]</code> 直接提交给 <code>gpt-image-2</code>,不再把参考图拼成一张板,降低模型误解成拼图/多产品的概率。Prompt 会约束白底产品图、左右非对称、厚度、内侧触点和肩颈真实佩戴比例,并禁止输出拼图/多产品;遇到上游 429 / saturated 会按退避节奏重试,最终仍失败时返回 503 和可读提示。</td></tr>
|
||||||
<tr><td>角色库</td><td><code>GET /character-library/skg</code></td><td><code>listCharacterLibrary</code></td><td>读取内置 5 个透明骨架人角色 manifest,每个角色含正面、左右 45 度、侧面、背面、半身近景和背部特写 7 张参考图。</td></tr>
|
<tr><td>角色库</td><td><code>GET /character-library/skg</code></td><td><code>listCharacterLibrary</code></td><td>读取内置 5 个透明骨架人角色 manifest,每个角色含正面、左右 45 度、侧面、背面、半身近景和背部特写 7 张参考图。</td></tr>
|
||||||
<tr><td>角色图入库到 job</td><td><code>POST /jobs/{id}/assets/character-library</code></td><td><code>copyCharacterLibraryAssets</code></td><td>把所选角色的 7 张参考图复制为当前 job asset,返回 <code>subject_images</code>,产品融合生成视频时作为人物身份参考图提交。</td></tr>
|
<tr><td>角色图入库到 job</td><td><code>POST /jobs/{id}/assets/character-library</code></td><td><code>copyCharacterLibraryAssets</code></td><td>把所选角色的 7 张参考图复制为当前 job asset,返回 <code>subject_images</code>,产品融合生成视频时作为人物身份参考图提交。</td></tr>
|
||||||
<tr><td>产品融合引导图</td><td><code>POST /jobs/{id}/product-fusion/guide</code></td><td><code>createProductFusionGuide</code></td><td>旧流程兼容接口:读取产品图和白底人物图,按 <code>product_region</code> 合成位置引导图。当前内置角色 + 产品 + 描述流程不再主动调用它。</td></tr>
|
<tr><td>产品融合引导图</td><td><code>POST /jobs/{id}/product-fusion/guide</code></td><td><code>createProductFusionGuide</code></td><td>旧流程兼容接口:读取产品图和白底人物图,按 <code>product_region</code> 合成位置引导图。当前内置角色 + 产品 + 描述流程不再主动调用它。</td></tr>
|
||||||
@@ -1013,7 +1013,7 @@ ProductRefStateItem {
|
|||||||
</header>
|
</header>
|
||||||
<div class="body">
|
<div class="body">
|
||||||
<p><strong>问题:</strong>同一产品素材池的视角标注仍显示通用 Vision 模型;缺角度补图固定拿第一张产品图作为参考,少侧面或内侧时容易用错误视角硬推,生成结果偏离产品真实结构。</p>
|
<p><strong>问题:</strong>同一产品素材池的视角标注仍显示通用 Vision 模型;缺角度补图固定拿第一张产品图作为参考,少侧面或内侧时容易用错误视角硬推,生成结果偏离产品真实结构。</p>
|
||||||
<p><strong>改动:</strong><code>api/main.py</code> 新增 <code>PRODUCT_VIEW_MODEL=gpt-image-2</code>,<code>analyze_product_view</code> / <code>analyze_product_views_batch</code> 改用该模型并在 <code>/health</code> 返回 <code>models.product_view</code>。<code>generateProductAngleAsset</code> 前端请求新增 <code>source_refs</code> 和 <code>source_notes</code>;<code>AudioStoryboardPlanPanel</code> 按目标视角给产品图打分,优先真实上传图、相邻视角、用途标签、置信度和低风险图,最多传 6 张。后端把多张参考图拼成同产品参考板,再用 <code>gpt-image-2</code> 生成目标角度,避免只照抄第一张。</p>
|
<p><strong>改动:</strong><code>api/main.py</code> 新增 <code>PRODUCT_VIEW_MODEL=gpt-image-2</code>,<code>analyze_product_view</code> / <code>analyze_product_views_batch</code> 改用该模型并在 <code>/health</code> 返回 <code>models.product_view</code>。<code>generateProductAngleAsset</code> 前端请求新增 <code>source_refs</code> 和 <code>source_notes</code>;<code>AudioStoryboardPlanPanel</code> 按目标视角给产品图打分,优先真实上传图、相邻视角、用途标签、置信度和低风险图,最多传 6 张。后端把多张参考图作为独立 <code>image[]</code> 提交给 <code>gpt-image-2</code> 生成目标角度,避免只照抄第一张,也避免参考板被模型当成拼图格式。</p>
|
||||||
<p><strong>影响:</strong><code>api/main.py</code>、<code>web/lib/api.ts</code>、<code>web/components/ad-recreation-board.tsx</code>、<code>RULES.md</code>、<code>api/.env.example</code>、<code>deploy/.env.production.example</code>、<code>docs/source-analysis.html</code>。后续补产品角度必须从同一产品素材池里挑多张证据图,不要再默认第一张。</p>
|
<p><strong>影响:</strong><code>api/main.py</code>、<code>web/lib/api.ts</code>、<code>web/components/ad-recreation-board.tsx</code>、<code>RULES.md</code>、<code>api/.env.example</code>、<code>deploy/.env.production.example</code>、<code>docs/source-analysis.html</code>。后续补产品角度必须从同一产品素材池里挑多张证据图,不要再默认第一张。</p>
|
||||||
</div>
|
</div>
|
||||||
</article>
|
</article>
|
||||||
|
|||||||
Reference in New Issue
Block a user