diff --git a/.memory/worklog.json b/.memory/worklog.json
index b171c21..a235186 100644
--- a/.memory/worklog.json
+++ b/.memory/worklog.json
@@ -1,18 +1,5 @@
{
"entries": [
- {
- "files_changed": 1,
- "message": "Codex 会话活跃 · 最近命令:codex · 1 项未提交变更 · 最近提交:auto-save 2026-05-15 16:49 (~4)",
- "ts": "2026-05-15T08:54:48Z",
- "type": "session-heartbeat"
- },
- {
- "files_changed": 1,
- "hash": "0d57081",
- "message": "auto-save 2026-05-15 16:55 (~1)",
- "ts": "2026-05-15T16:55:21+08:00",
- "type": "commit"
- },
{
"files_changed": 3,
"hash": "c53d27d",
@@ -3254,6 +3241,19 @@
"type": "session-heartbeat",
"message": "Codex 会话活跃 · 最近命令:codex · 分支 main · 1 项未提交变更 · 最近提交:fix: use image edits for gpt references",
"files_changed": 1
+ },
+ {
+ "ts": "2026-05-18T07:00:37+08:00",
+ "type": "commit",
+ "message": "auto-save 2026-05-18 07:00 (~2)",
+ "hash": "d72bf62",
+ "files_changed": 2
+ },
+ {
+ "ts": "2026-05-17T23:03:44Z",
+ "type": "session-heartbeat",
+ "message": "Codex 会话活跃 · 最近命令:codex · 分支 main · 7 项未提交变更 · 最近提交:auto-save 2026-05-18 07:00 (~2)",
+ "files_changed": 7
}
]
}
diff --git a/RULES.md b/RULES.md
index 098165b..cb81ee3 100644
--- a/RULES.md
+++ b/RULES.md
@@ -59,6 +59,7 @@
- `REWRITE_MODEL`:通用改写/分镜描述模型,默认 `gemini-2.5-pro`
- `AUDIO_REWRITE_MODEL`:后续音频口播改写模型,默认跟随 `REWRITE_MODEL`;当前第一步不默认调用口播改写,只保留原文案和声音分析
- `AUDIO_PRODUCT_BRIEF`:音频口播改写时注入的 SKG 产品卖点
+- `PRODUCT_VIEW_MODEL`:同一产品素材池的视角标注/自动识别模型;当前按项目要求强制使用 `gpt-image-2`
- `IMAGE_BASE_URL` / `IMAGE_API_KEY` / `IMAGE_MODEL`:OpenAI 兼容生图网关;当前所有生图入口一律强制使用 `gpt-image-2`,不做其他图片模型 fallback
- `GPT_IMAGE_MODEL` / `SUBJECT_ASSET_IMAGE_MODEL` / `SUBJECT_ASSET_IMAGE_MODELS`:保留兼容旧环境变量名,但服务端会强制主体 6 视图和所有其他生图入口都只使用 `gpt-image-2`
- `VOICE_PROVIDER`:配音通道,当前固定使用 `azure_openai`
diff --git a/api/.env.example b/api/.env.example
index d0e647d..ea284b2 100644
--- a/api/.env.example
+++ b/api/.env.example
@@ -18,6 +18,7 @@ LOCAL_ASR_MODEL=mlx-community/whisper-tiny
LOCAL_ASR_TIMEOUT_SECONDS=180
TRANSLATE_MODEL=gemini-2.5-flash
REWRITE_MODEL=gemini-2.5-pro
+PRODUCT_VIEW_MODEL=gpt-image-2
IMAGE_BASE_URL=https://ai.skg.com/ezlink/v1
IMAGE_API_KEY=
IMAGE_MODEL=gpt-image-2
diff --git a/api/main.py b/api/main.py
index a700420..89c9c7d 100644
--- a/api/main.py
+++ b/api/main.py
@@ -4624,6 +4624,8 @@ class CopyCharacterLibraryAssetReq(BaseModel):
class GenerateProductAngleAssetReq(BaseModel):
source_ref: dict
+ source_refs: list[dict] = Field(default_factory=list)
+ source_notes: list[str] = Field(default_factory=list)
target_view: str
note: str = ""
@@ -5016,7 +5018,7 @@ def product_view_batch_prompt(indices: list[int]) -> str:
def analyze_product_view(ref_path: Path, index: int) -> dict:
- if not LLM_API_KEY:
+ if not (IMAGE_API_KEY if PRODUCT_VIEW_MODEL == GPT_IMAGE_MODEL else LLM_API_KEY):
return fallback_product_view(index)
img_b64 = base64.b64encode(ref_path.read_bytes()).decode("ascii")
prompt = (
@@ -5029,8 +5031,8 @@ def analyze_product_view(ref_path: Path, index: int) -> dict:
"{\"view\":\"front|left_45|right_45|side_thickness|inner_contacts|back_bottom\",\"background\":\"white|black|simple|complex|unknown\",\"use_tags\":[\"hero_packshot\"],\"orientation\":{\"product_left\":\"图中哪一侧/不可见/不确定\",\"product_right\":\"图中哪一侧/不可见/不确定\",\"top\":\"图中哪一侧/不可见/不确定\",\"bottom\":\"图中哪一侧/不可见/不确定\",\"inner_side\":\"图中哪一侧/是否可见\",\"outer_side\":\"图中哪一侧/是否可见\",\"opening_direction\":\"U形开口朝图中哪一侧/不可见/不确定\"},\"landmarks\":[\"U形开口\"],\"note\":\"中文备注\",\"risk\":\"\",\"confidence\":0.86}."
)
try:
- resp = llm().chat.completions.create(
- model=VISION_MODEL,
+ resp = product_view_llm().chat.completions.create(
+ model=PRODUCT_VIEW_MODEL,
messages=[{"role": "user", "content": [
{"type": "text", "text": prompt},
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}},
@@ -5050,7 +5052,7 @@ def analyze_product_view(ref_path: Path, index: int) -> dict:
def analyze_product_views_batch(paths_by_index: list[tuple[int, Path]]) -> dict[int, dict]:
- if not LLM_API_KEY:
+ if not (IMAGE_API_KEY if PRODUCT_VIEW_MODEL == GPT_IMAGE_MODEL else LLM_API_KEY):
return {index: fallback_product_view(index) for index, _path in paths_by_index}
results: dict[int, dict] = {}
for start in range(0, len(paths_by_index), PRODUCT_VIEW_BATCH_SIZE):
@@ -5062,8 +5064,8 @@ def analyze_product_views_batch(paths_by_index: list[tuple[int, Path]]) -> dict[
content.append({"type": "text", "text": f"Image index {index}"})
content.append({"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}})
try:
- resp = llm().chat.completions.create(
- model=VISION_MODEL,
+ resp = product_view_llm().chat.completions.create(
+ model=PRODUCT_VIEW_MODEL,
messages=[{"role": "user", "content": content}],
response_format={"type": "json_object"},
temperature=0.05,
@@ -5121,18 +5123,68 @@ def analyze_product_views(job_id: str, req: AnalyzeProductViewsReq) -> dict:
return {"items": items, "missing_views": missing}
+def _make_product_angle_reference_sheet(paths: list[Path], out_path: Path) -> Path:
+ thumbs: list[Image.Image] = []
+ for path in paths[:6]:
+ try:
+ img = ImageOps.exif_transpose(Image.open(path)).convert("RGB")
+ img.thumbnail((520, 520), Image.Resampling.LANCZOS)
+ cell = Image.new("RGB", (560, 560), (255, 255, 255))
+ cell.paste(img, ((560 - img.width) // 2, (560 - img.height) // 2))
+ thumbs.append(cell)
+ except Exception:
+ continue
+ if not thumbs:
+ raise RuntimeError("no usable product reference images")
+ cols = 3 if len(thumbs) > 2 else len(thumbs)
+ rows = (len(thumbs) + cols - 1) // cols
+ sheet = Image.new("RGB", (cols * 560, rows * 560), (245, 245, 245))
+ for i, thumb in enumerate(thumbs):
+ sheet.paste(thumb, ((i % cols) * 560, (i // cols) * 560))
+ out_path.parent.mkdir(parents=True, exist_ok=True)
+ sheet.save(out_path, "JPEG", quality=94)
+ return out_path
+
+
@app.post("/jobs/{job_id}/assets/product-angle")
def generate_product_angle_asset(job_id: str, req: GenerateProductAngleAssetReq) -> dict:
if job_id not in JOBS:
raise HTTPException(404, "job not found")
- source_path = storyboard_ref_path(job_id, req.source_ref)
- if not source_path or not source_path.exists():
+ raw_refs = [req.source_ref] + list(req.source_refs or [])
+ source_paths: list[Path] = []
+ seen_paths: set[str] = set()
+ for ref in raw_refs:
+ ref_path = storyboard_ref_path(job_id, ref)
+ if ref_path and ref_path.exists():
+ key = str(ref_path)
+ if key not in seen_paths:
+ seen_paths.add(key)
+ source_paths.append(ref_path)
+ if len(source_paths) >= 6:
+ break
+ if not source_paths:
raise HTTPException(404, "source product image not found")
+ source_path = source_paths[0]
+ model_src = source_path
+ sheet_tmp: Path | None = None
+ if len(source_paths) > 1:
+ sheet_tmp = job_dir(job_id) / "tmp" / f"product_angle_refs_{uuid.uuid4().hex[:8]}.jpg"
+ model_src = _make_product_angle_reference_sheet(source_paths, sheet_tmp)
target_view = (req.target_view or "目标视角").strip()
note = (req.note or "").strip()
+ source_notes = [re.sub(r"\s+", " ", str(item)).strip()[:180] for item in (req.source_notes or []) if str(item).strip()]
+ source_note_clause = (
+ "Uploaded reference notes from the operator/view recognizer: "
+ + " | ".join(source_notes[:6])
+ + ". "
+ if source_notes
+ else ""
+ )
prompt = (
- "Use the reference image as the same SKG neck-and-shoulder wearable massage product. "
+ "Use the reference image or reference board as evidence for the same SKG neck-and-shoulder wearable massage product. "
+ "If a reference board is provided, all panels are the same product from uploaded views; do not output a board, collage, or multiple products. "
f"Generate a clean product-only white-background reference image in this missing view: {target_view}. "
+ + source_note_clause
"Preserve the exact product identity: white U-shaped wearable neck and shoulder massager that sits around the neck, asymmetric wearer-left and wearer-right details, side buttons, inner metal massage contacts, opening width, material, thickness, curvature, and real shoulder-neck wearing scale. "
"Use product coordinates: wearer-left/right are the user's body left/right when worn, top is near chin/upper neck, bottom is near collarbone/shoulders, inner side touches skin, outer side is the shell/buttons. "
"Do not mirror both sides into identical shapes; keep visible left/right asymmetry and believable shoulder-neck wearable proportions. "
@@ -5142,9 +5194,15 @@ def generate_product_angle_asset(job_id: str, req: GenerateProductAngleAssetReq)
)
models = [GPT_IMAGE_MODEL]
try:
- img_bytes, _mode = _image_edit_call(source_path, prompt, models=models, fallback_text=False, max_attempts=5, max_side=1280)
+ img_bytes, _mode = _image_edit_call(model_src, prompt, models=models, fallback_text=False, max_attempts=5, max_side=1600)
except RuntimeError as e:
raise HTTPException(_image_error_status(e), f"product angle generation failed: {e}")
+ finally:
+ if sheet_tmp and sheet_tmp.exists():
+ try:
+ sheet_tmp.unlink()
+ except OSError:
+ pass
asset_id = f"product_angle_{uuid.uuid4().hex[:10]}"
out_path = job_dir(job_id) / "assets" / f"{asset_id}.jpg"
_normalize_asset_image(img_bytes, out_path, source_path, "1024", "white", square=True, fill_subject=True)
diff --git a/deploy/.env.production.example b/deploy/.env.production.example
index b1d7d20..2e86f1a 100644
--- a/deploy/.env.production.example
+++ b/deploy/.env.production.example
@@ -23,6 +23,7 @@ ASR_MODEL=whisper-1
ASR_FALLBACK_MODEL=gemini-2.5-flash
TRANSLATE_MODEL=gemini-2.5-flash
REWRITE_MODEL=gemini-2.5-pro
+PRODUCT_VIEW_MODEL=gpt-image-2
IMAGE_BASE_URL=https://ai.skg.com/ezlink/v1
IMAGE_API_KEY=
IMAGE_MODEL=gpt-image-2
diff --git a/docs/source-analysis.html b/docs/source-analysis.html
index 5455c64..7d94104 100644
--- a/docs/source-analysis.html
+++ b/docs/source-analysis.html
@@ -870,7 +870,7 @@ ProductRefStateItem {
| 网页登录 | POST /auth/login、GET /auth/check、POST /auth/logout | web/app/login/page.tsx、Nginx auth_request | 登录页提交账号密码到 /api/auth/login,后端设置 HttpOnly 会话 Cookie;生产 Nginx 对工作台和 /api/ 调 /auth/check 做统一校验,未登录页面跳 /login/,API 返回 JSON 401。 |
- | 运行配置 / 模型标注 | GET /health | getRuntimeHealth、ModelTrace | 返回 models:ASR、本机 ASR、ASR fallback、翻译、改写、Vision、GPT 图像模型、主体 6 视图 GPT 图像模型、Azure OpenAI TTS、视频别名和 Seedance 服务商。前端所有当前主路径里会调用模型的按钮旁显示模型名,点击弹出小窗口查看模型链路和输入输出逻辑;不返回 API Key 或敏感凭证。 |
+ | 运行配置 / 模型标注 | GET /health | getRuntimeHealth、ModelTrace | 返回 models:ASR、本机 ASR、ASR fallback、翻译、改写、通用 Vision、产品视角识别 product_view、GPT 图像模型、主体 6 视图 GPT 图像模型、Azure OpenAI TTS、视频别名和 Seedance 服务商。前端所有当前主路径里会调用模型的按钮旁显示模型名,点击弹出小窗口查看模型链路和输入输出逻辑;不返回 API Key 或敏感凭证。 |
| 历史列表 | GET /jobs | listJobs | 所有 job 精简列表(id/url/status/thumbnail/mtime…),按 state.json mtime 倒序。前端 URL 无 ?job= 时拉它回填全部历史;带 limit 可截断。 |
| 创建任务 | POST /jobs | createJob | 提交 TK 链接,后台开始下载;前端“开始”队列会在 downloaded 后自动触发音频解析。 |
| 上传视频 | POST /jobs/upload | uploadJob | 保存 source.mp4,然后同样进入下载完成状态;当前上传后也加入第一步队列,下载完成后自动解析音频。 |
@@ -893,8 +893,8 @@ ProductRefStateItem {
| 产品图库 | GET /product-library/skg | listProductLibrary | 读取内置 SKG 白底图库 manifest,返回产品标题、品类、尺寸、白底评分和预览图 URL。 |
| 产品图入库到 job | POST /jobs/{id}/assets、POST /jobs/{id}/assets/product-library | uploadStoryboardAsset、copyProductLibraryAsset | 上传产品图或把内置产品图库条目复制为当前 job 的普通 asset。后端统一生成最长边 1600px、JPEG 92 的 AI 工作副本,透明底铺白,过大/过小图片会在 ImageRef.asset_meta 里返回转换动作和风险;黑底/白底背景本身不强行转换。注意该接口只写图片文件,产品素材池列表另由 PUT /jobs/{id}/product-refs 持久化。 |
| 产品素材池保存 | PUT /jobs/{id}/product-refs | saveProductRefs | 把当前 job 的产品素材池列表、识别视角、用途标签、方向、结构点、备注、AI 补图和删除结果保存到 Job.product_refs / state.json。前端上传、识别完成、补角度、编辑备注和删除时都会同步保存;刷新页面或热更新后从 job 恢复,不再要求重新上传和重新识别。 |
- | 产品视角识别 | POST /jobs/{id}/assets/product-views/analyze | analyzeProductViews | 读取同一产品素材池,按批次把多张图一次性提交给视觉模型,不限制只看前 6 张;识别对象被固定为套在脖子上的 U 形肩颈按摩仪。返回 view、background、use_tags、orientation、landmarks、中文备注、生成风险和置信度;orientation 明确佩戴者左/右、上/下、内外侧和开口方向对应图中哪边,避免把图片左右误当产品左右。前端不再要求用户手动选择视角,也不做不同产品身份判断。 |
- | 产品缺角度补图 | POST /jobs/{id}/assets/product-angle | generateProductAngleAsset | 用当前产品白底图作为参考,通过图像模型自动补全缺失视角,输出新的 ImageRef(kind="asset")。Prompt 会约束白底产品图、左右非对称、厚度、内侧触点和肩颈真实佩戴比例;图生图通过 /images/edits multipart 提交参考图,不再把 image 当 JSON 参数塞进 /images/generations;遇到 gpt-image-2 上游 429 / saturated 会按退避节奏重试,最终仍失败时返回 503 和可读提示。前端只在自动补图失败时暴露重试入口。 |
+ | 产品视角识别 | POST /jobs/{id}/assets/product-views/analyze | analyzeProductViews | 读取同一产品素材池,按批次把多张图一次性提交给 PRODUCT_VIEW_MODEL=gpt-image-2 做视角标注,不限制只看前 6 张;识别对象被固定为套在脖子上的 U 形肩颈按摩仪。返回 view、background、use_tags、orientation、landmarks、中文备注、生成风险和置信度;orientation 明确佩戴者左/右、上/下、内外侧和开口方向对应图中哪边,避免把图片左右误当产品左右。前端不再要求用户手动选择视角,也不做不同产品身份判断。 |
+ | 产品缺角度补图 | POST /jobs/{id}/assets/product-angle | generateProductAngleAsset | 用当前同一产品素材池作为参考,通过 gpt-image-2 自动补全缺失视角,输出新的 ImageRef(kind="asset")。前端不再固定传第一张图,而是按目标视角给已上传/已标注参考图打分,优先选择真实上传图、目标相邻视角、侧厚/触点/底部对应用途标签和低风险高置信图,最多传 6 张;后端把这些参考图拼成同产品参考板,再通过 /images/edits multipart 提交给 gpt-image-2。Prompt 会约束白底产品图、左右非对称、厚度、内侧触点和肩颈真实佩戴比例,并禁止输出拼图/多产品;遇到上游 429 / saturated 会按退避节奏重试,最终仍失败时返回 503 和可读提示。 |
| 角色库 | GET /character-library/skg | listCharacterLibrary | 读取内置 5 个透明骨架人角色 manifest,每个角色含正面、左右 45 度、侧面、背面、半身近景和背部特写 7 张参考图。 |
| 角色图入库到 job | POST /jobs/{id}/assets/character-library | copyCharacterLibraryAssets | 把所选角色的 7 张参考图复制为当前 job asset,返回 subject_images,产品融合生成视频时作为人物身份参考图提交。 |
| 产品融合引导图 | POST /jobs/{id}/product-fusion/guide | createProductFusionGuide | 旧流程兼容接口:读取产品图和白底人物图,按 product_region 合成位置引导图。当前内置角色 + 产品 + 描述流程不再主动调用它。 |
@@ -1004,6 +1004,19 @@ ProductRefStateItem {
变更记录
这个记录不是 git log 的替代品。它记录“产品理解发生了什么变化、影响了哪些源码、你以后描述需求时该怎么说”。后续每次改功能都要补一条。
+
+
+ 2026-05-18 · 产品视角识别切到 gpt-image-2 并重做补图参考选择
+ API
+ UI
+ Workflow
+
+
+
问题:同一产品素材池的视角标注仍显示通用 Vision 模型;缺角度补图固定拿第一张产品图作为参考,少侧面或内侧时容易用错误视角硬推,生成结果偏离产品真实结构。
+
改动:api/main.py 新增 PRODUCT_VIEW_MODEL=gpt-image-2,analyze_product_view / analyze_product_views_batch 改用该模型并在 /health 返回 models.product_view。generateProductAngleAsset 前端请求新增 source_refs 和 source_notes;AudioStoryboardPlanPanel 按目标视角给产品图打分,优先真实上传图、相邻视角、用途标签、置信度和低风险图,最多传 6 张。后端把多张参考图拼成同产品参考板,再用 gpt-image-2 生成目标角度,避免只照抄第一张。
+
影响:api/main.py、web/lib/api.ts、web/components/ad-recreation-board.tsx、RULES.md、api/.env.example、deploy/.env.production.example、docs/source-analysis.html。后续补产品角度必须从同一产品素材池里挑多张证据图,不要再默认第一张。
+
+
2026-05-18 · gpt-image-2 图生图改用 edits 并处理上游饱和
diff --git a/web/components/ad-recreation-board.tsx b/web/components/ad-recreation-board.tsx
index 6e1d928..83d4f22 100644
--- a/web/components/ad-recreation-board.tsx
+++ b/web/components/ad-recreation-board.tsx
@@ -368,10 +368,10 @@ function audioModelTrace(models?: RuntimeModels): ModelTraceSpec {
function productModelTrace(models?: RuntimeModels): ModelTraceSpec {
return {
title: "产品视角识别 / 补图",
- model: modelList([models?.vision, models?.image]),
+ model: modelList([models?.product_view, models?.image]),
chain: [
- `批量视角识别:${modelValue(models?.vision)} 一次读取同一产品多张图,标注视角、左右、上下、用途和风险`,
- `缺角度补图:${imageModelChain(models)} 按同一肩颈按摩仪结构补齐缺失视角`,
+ `批量视角识别:${modelValue(models?.product_view)} 一次读取同一产品多张图,标注视角、左右、上下、用途和风险`,
+ `缺角度补图:${imageModelChain(models)} 读取最相关的多张已上传参考图,按同一肩颈按摩仪结构补齐缺失视角`,
"前端只保存标注和 AI 补图结果;后续生成视频时每条最多挑 6 张相关产品图",
],
note: "上传产品图、重新识别、缺视角重试都会使用这组模型链路。",
@@ -620,6 +620,55 @@ function createProductRefItem(
}
}
+const PRODUCT_ANGLE_REFERENCE_PRIORITY: Record = {
+ front: ["front", "left_45", "right_45", "side_thickness", "inner_contacts", "back_bottom"],
+ left_45: ["left_45", "front", "side_thickness", "right_45", "inner_contacts", "back_bottom"],
+ right_45: ["right_45", "front", "side_thickness", "left_45", "inner_contacts", "back_bottom"],
+ side_thickness: ["side_thickness", "left_45", "right_45", "front", "inner_contacts", "back_bottom"],
+ inner_contacts: ["inner_contacts", "side_thickness", "front", "left_45", "right_45", "back_bottom"],
+ back_bottom: ["back_bottom", "side_thickness", "inner_contacts", "left_45", "right_45", "front"],
+}
+
+function productAngleReferenceScore(item: ProductRefItem, targetView: string) {
+ const priority = PRODUCT_ANGLE_REFERENCE_PRIORITY[targetView] ?? PRODUCT_VIEW_SLOTS.map((slot) => slot.value)
+ const rank = priority.indexOf(item.view)
+ let score = rank === -1 ? 0 : 90 - rank * 12
+ if (item.source === "upload" || item.source === "library") score += 28
+ if (item.source === "ai") score -= 18
+ if (item.confidence) score += Math.round(item.confidence * 14)
+ if (item.useTags.includes("asymmetry")) score += 8
+ if (targetView === "side_thickness" && item.useTags.includes("side_thickness")) score += 16
+ if (targetView === "inner_contacts" && item.useTags.includes("inner_contact")) score += 16
+ if (targetView === "back_bottom" && item.useTags.includes("back_bottom")) score += 16
+ if (item.risk) score -= 10
+ return score
+}
+
+function selectProductAngleReferenceItems(items: ProductRefItem[], targetView: string) {
+ const unique = new Map()
+ for (const item of items) {
+ if (!unique.has(item.id)) unique.set(item.id, item)
+ }
+ return [...unique.values()]
+ .sort((a, b) => productAngleReferenceScore(b, targetView) - productAngleReferenceScore(a, targetView))
+ .slice(0, 6)
+}
+
+function productAngleSourceNotes(items: ProductRefItem[]) {
+ return items.map((item, index) => {
+ const parts = [
+ `ref${index + 1}`,
+ `view=${productViewLabel(item.view)}`,
+ `source=${item.source}`,
+ item.note ? `note=${item.note}` : "",
+ formatProductOrientation(item.orientation),
+ item.landmarks?.length ? `landmarks=${item.landmarks.join("/")}` : "",
+ item.risk ? `risk=${item.risk}` : "",
+ ].filter(Boolean)
+ return parts.join(";")
+ })
+}
+
function normalizeStoredProductItem(item: ProductRefItem, index: number): ProductRefItem {
const ref = { ...item.ref, asset_meta: item.ref.asset_meta ?? item.assetMeta }
const restored = createProductRefItem(
@@ -1960,10 +2009,13 @@ function AudioStoryboardPlanPanel({
for (const slot of missing) {
setProductAngleBusy(slot.value)
try {
+ const references = selectProductAngleReferenceItems(working, slot.value)
const ref = await generateProductAngleAsset(job.id, {
- source_ref: working[0].ref,
+ source_ref: references[0].ref,
+ source_refs: references.map((item) => item.ref),
+ source_notes: productAngleSourceNotes(references),
target_view: slot.label,
- note: slot.hint,
+ note: `${slot.hint};请综合这些同产品参考图补目标视角,不要只照抄某一张。`,
})
working = [
...working,
@@ -2117,13 +2169,15 @@ function AudioStoryboardPlanPanel({
const generateMissingProductAngle = async (slot: typeof PRODUCT_VIEW_SLOTS[number]) => {
if (!job || !productItems.length) return
- const source = productItems[0]
+ const references = selectProductAngleReferenceItems(productItems, slot.value)
setProductAngleBusy(slot.value)
try {
const ref = await generateProductAngleAsset(job.id, {
- source_ref: source.ref,
+ source_ref: references[0].ref,
+ source_refs: references.map((item) => item.ref),
+ source_notes: productAngleSourceNotes(references),
target_view: slot.label,
- note: slot.hint,
+ note: `${slot.hint};请综合这些同产品参考图补目标视角,不要只照抄某一张。`,
})
setProductItems((prev) => {
const next = [...prev, createProductRefItem(ref, prev.length, "ai", slot.value, `AI 补齐:${slot.hint}`, "white", undefined, undefined, undefined, "", 1)]
diff --git a/web/lib/api.ts b/web/lib/api.ts
index 3d11287..a506ac8 100644
--- a/web/lib/api.ts
+++ b/web/lib/api.ts
@@ -153,6 +153,7 @@ export interface RuntimeModels {
rewrite?: string
audio_rewrite?: string
vision?: string
+ product_view?: string
image?: string
image_base_url?: string
image_fallbacks?: string[]
@@ -224,7 +225,7 @@ export async function uploadStoryboardAsset(jobId: string, file: File): Promise<
export async function generateProductAngleAsset(
jobId: string,
- body: { source_ref: ImageRef; target_view: string; note?: string },
+ body: { source_ref: ImageRef; source_refs?: ImageRef[]; source_notes?: string[]; target_view: string; note?: string },
): Promise {
const res = await fetch(`${API_BASE}/jobs/${jobId}/assets/product-angle`, {
method: "POST",