From 5fde9f3e2258c2f786c8df1e27d15be4105227b1 Mon Sep 17 00:00:00 2001 From: kang Date: Mon, 18 May 2026 07:06:00 +0800 Subject: [PATCH] auto-save 2026-05-18 07:05 (~8) --- .memory/worklog.json | 26 ++++----- RULES.md | 1 + api/.env.example | 1 + api/main.py | 78 ++++++++++++++++++++++---- deploy/.env.production.example | 1 + docs/source-analysis.html | 19 ++++++- web/components/ad-recreation-board.tsx | 70 ++++++++++++++++++++--- web/lib/api.ts | 3 +- 8 files changed, 164 insertions(+), 35 deletions(-) diff --git a/.memory/worklog.json b/.memory/worklog.json index b171c21..a235186 100644 --- a/.memory/worklog.json +++ b/.memory/worklog.json @@ -1,18 +1,5 @@ { "entries": [ - { - "files_changed": 1, - "message": "Codex 会话活跃 · 最近命令:codex · 1 项未提交变更 · 最近提交:auto-save 2026-05-15 16:49 (~4)", - "ts": "2026-05-15T08:54:48Z", - "type": "session-heartbeat" - }, - { - "files_changed": 1, - "hash": "0d57081", - "message": "auto-save 2026-05-15 16:55 (~1)", - "ts": "2026-05-15T16:55:21+08:00", - "type": "commit" - }, { "files_changed": 3, "hash": "c53d27d", @@ -3254,6 +3241,19 @@ "type": "session-heartbeat", "message": "Codex 会话活跃 · 最近命令:codex · 分支 main · 1 项未提交变更 · 最近提交:fix: use image edits for gpt references", "files_changed": 1 + }, + { + "ts": "2026-05-18T07:00:37+08:00", + "type": "commit", + "message": "auto-save 2026-05-18 07:00 (~2)", + "hash": "d72bf62", + "files_changed": 2 + }, + { + "ts": "2026-05-17T23:03:44Z", + "type": "session-heartbeat", + "message": "Codex 会话活跃 · 最近命令:codex · 分支 main · 7 项未提交变更 · 最近提交:auto-save 2026-05-18 07:00 (~2)", + "files_changed": 7 } ] } diff --git a/RULES.md b/RULES.md index 098165b..cb81ee3 100644 --- a/RULES.md +++ b/RULES.md @@ -59,6 +59,7 @@ - `REWRITE_MODEL`:通用改写/分镜描述模型,默认 `gemini-2.5-pro` - `AUDIO_REWRITE_MODEL`:后续音频口播改写模型,默认跟随 `REWRITE_MODEL`;当前第一步不默认调用口播改写,只保留原文案和声音分析 - `AUDIO_PRODUCT_BRIEF`:音频口播改写时注入的 SKG 产品卖点 +- `PRODUCT_VIEW_MODEL`:同一产品素材池的视角标注/自动识别模型;当前按项目要求强制使用 `gpt-image-2` - `IMAGE_BASE_URL` / `IMAGE_API_KEY` / `IMAGE_MODEL`:OpenAI 兼容生图网关;当前所有生图入口一律强制使用 `gpt-image-2`,不做其他图片模型 fallback - `GPT_IMAGE_MODEL` / `SUBJECT_ASSET_IMAGE_MODEL` / `SUBJECT_ASSET_IMAGE_MODELS`:保留兼容旧环境变量名,但服务端会强制主体 6 视图和所有其他生图入口都只使用 `gpt-image-2` - `VOICE_PROVIDER`:配音通道,当前固定使用 `azure_openai` diff --git a/api/.env.example b/api/.env.example index d0e647d..ea284b2 100644 --- a/api/.env.example +++ b/api/.env.example @@ -18,6 +18,7 @@ LOCAL_ASR_MODEL=mlx-community/whisper-tiny LOCAL_ASR_TIMEOUT_SECONDS=180 TRANSLATE_MODEL=gemini-2.5-flash REWRITE_MODEL=gemini-2.5-pro +PRODUCT_VIEW_MODEL=gpt-image-2 IMAGE_BASE_URL=https://ai.skg.com/ezlink/v1 IMAGE_API_KEY= IMAGE_MODEL=gpt-image-2 diff --git a/api/main.py b/api/main.py index a700420..89c9c7d 100644 --- a/api/main.py +++ b/api/main.py @@ -4624,6 +4624,8 @@ class CopyCharacterLibraryAssetReq(BaseModel): class GenerateProductAngleAssetReq(BaseModel): source_ref: dict + source_refs: list[dict] = Field(default_factory=list) + source_notes: list[str] = Field(default_factory=list) target_view: str note: str = "" @@ -5016,7 +5018,7 @@ def product_view_batch_prompt(indices: list[int]) -> str: def analyze_product_view(ref_path: Path, index: int) -> dict: - if not LLM_API_KEY: + if not (IMAGE_API_KEY if PRODUCT_VIEW_MODEL == GPT_IMAGE_MODEL else LLM_API_KEY): return fallback_product_view(index) img_b64 = base64.b64encode(ref_path.read_bytes()).decode("ascii") prompt = ( @@ -5029,8 +5031,8 @@ def analyze_product_view(ref_path: Path, index: int) -> dict: "{\"view\":\"front|left_45|right_45|side_thickness|inner_contacts|back_bottom\",\"background\":\"white|black|simple|complex|unknown\",\"use_tags\":[\"hero_packshot\"],\"orientation\":{\"product_left\":\"图中哪一侧/不可见/不确定\",\"product_right\":\"图中哪一侧/不可见/不确定\",\"top\":\"图中哪一侧/不可见/不确定\",\"bottom\":\"图中哪一侧/不可见/不确定\",\"inner_side\":\"图中哪一侧/是否可见\",\"outer_side\":\"图中哪一侧/是否可见\",\"opening_direction\":\"U形开口朝图中哪一侧/不可见/不确定\"},\"landmarks\":[\"U形开口\"],\"note\":\"中文备注\",\"risk\":\"\",\"confidence\":0.86}." ) try: - resp = llm().chat.completions.create( - model=VISION_MODEL, + resp = product_view_llm().chat.completions.create( + model=PRODUCT_VIEW_MODEL, messages=[{"role": "user", "content": [ {"type": "text", "text": prompt}, {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}}, @@ -5050,7 +5052,7 @@ def analyze_product_view(ref_path: Path, index: int) -> dict: def analyze_product_views_batch(paths_by_index: list[tuple[int, Path]]) -> dict[int, dict]: - if not LLM_API_KEY: + if not (IMAGE_API_KEY if PRODUCT_VIEW_MODEL == GPT_IMAGE_MODEL else LLM_API_KEY): return {index: fallback_product_view(index) for index, _path in paths_by_index} results: dict[int, dict] = {} for start in range(0, len(paths_by_index), PRODUCT_VIEW_BATCH_SIZE): @@ -5062,8 +5064,8 @@ def analyze_product_views_batch(paths_by_index: list[tuple[int, Path]]) -> dict[ content.append({"type": "text", "text": f"Image index {index}"}) content.append({"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}}) try: - resp = llm().chat.completions.create( - model=VISION_MODEL, + resp = product_view_llm().chat.completions.create( + model=PRODUCT_VIEW_MODEL, messages=[{"role": "user", "content": content}], response_format={"type": "json_object"}, temperature=0.05, @@ -5121,18 +5123,68 @@ def analyze_product_views(job_id: str, req: AnalyzeProductViewsReq) -> dict: return {"items": items, "missing_views": missing} +def _make_product_angle_reference_sheet(paths: list[Path], out_path: Path) -> Path: + thumbs: list[Image.Image] = [] + for path in paths[:6]: + try: + img = ImageOps.exif_transpose(Image.open(path)).convert("RGB") + img.thumbnail((520, 520), Image.Resampling.LANCZOS) + cell = Image.new("RGB", (560, 560), (255, 255, 255)) + cell.paste(img, ((560 - img.width) // 2, (560 - img.height) // 2)) + thumbs.append(cell) + except Exception: + continue + if not thumbs: + raise RuntimeError("no usable product reference images") + cols = 3 if len(thumbs) > 2 else len(thumbs) + rows = (len(thumbs) + cols - 1) // cols + sheet = Image.new("RGB", (cols * 560, rows * 560), (245, 245, 245)) + for i, thumb in enumerate(thumbs): + sheet.paste(thumb, ((i % cols) * 560, (i // cols) * 560)) + out_path.parent.mkdir(parents=True, exist_ok=True) + sheet.save(out_path, "JPEG", quality=94) + return out_path + + @app.post("/jobs/{job_id}/assets/product-angle") def generate_product_angle_asset(job_id: str, req: GenerateProductAngleAssetReq) -> dict: if job_id not in JOBS: raise HTTPException(404, "job not found") - source_path = storyboard_ref_path(job_id, req.source_ref) - if not source_path or not source_path.exists(): + raw_refs = [req.source_ref] + list(req.source_refs or []) + source_paths: list[Path] = [] + seen_paths: set[str] = set() + for ref in raw_refs: + ref_path = storyboard_ref_path(job_id, ref) + if ref_path and ref_path.exists(): + key = str(ref_path) + if key not in seen_paths: + seen_paths.add(key) + source_paths.append(ref_path) + if len(source_paths) >= 6: + break + if not source_paths: raise HTTPException(404, "source product image not found") + source_path = source_paths[0] + model_src = source_path + sheet_tmp: Path | None = None + if len(source_paths) > 1: + sheet_tmp = job_dir(job_id) / "tmp" / f"product_angle_refs_{uuid.uuid4().hex[:8]}.jpg" + model_src = _make_product_angle_reference_sheet(source_paths, sheet_tmp) target_view = (req.target_view or "目标视角").strip() note = (req.note or "").strip() + source_notes = [re.sub(r"\s+", " ", str(item)).strip()[:180] for item in (req.source_notes or []) if str(item).strip()] + source_note_clause = ( + "Uploaded reference notes from the operator/view recognizer: " + + " | ".join(source_notes[:6]) + + ". " + if source_notes + else "" + ) prompt = ( - "Use the reference image as the same SKG neck-and-shoulder wearable massage product. " + "Use the reference image or reference board as evidence for the same SKG neck-and-shoulder wearable massage product. " + "If a reference board is provided, all panels are the same product from uploaded views; do not output a board, collage, or multiple products. " f"Generate a clean product-only white-background reference image in this missing view: {target_view}. " + + source_note_clause "Preserve the exact product identity: white U-shaped wearable neck and shoulder massager that sits around the neck, asymmetric wearer-left and wearer-right details, side buttons, inner metal massage contacts, opening width, material, thickness, curvature, and real shoulder-neck wearing scale. " "Use product coordinates: wearer-left/right are the user's body left/right when worn, top is near chin/upper neck, bottom is near collarbone/shoulders, inner side touches skin, outer side is the shell/buttons. " "Do not mirror both sides into identical shapes; keep visible left/right asymmetry and believable shoulder-neck wearable proportions. " @@ -5142,9 +5194,15 @@ def generate_product_angle_asset(job_id: str, req: GenerateProductAngleAssetReq) ) models = [GPT_IMAGE_MODEL] try: - img_bytes, _mode = _image_edit_call(source_path, prompt, models=models, fallback_text=False, max_attempts=5, max_side=1280) + img_bytes, _mode = _image_edit_call(model_src, prompt, models=models, fallback_text=False, max_attempts=5, max_side=1600) except RuntimeError as e: raise HTTPException(_image_error_status(e), f"product angle generation failed: {e}") + finally: + if sheet_tmp and sheet_tmp.exists(): + try: + sheet_tmp.unlink() + except OSError: + pass asset_id = f"product_angle_{uuid.uuid4().hex[:10]}" out_path = job_dir(job_id) / "assets" / f"{asset_id}.jpg" _normalize_asset_image(img_bytes, out_path, source_path, "1024", "white", square=True, fill_subject=True) diff --git a/deploy/.env.production.example b/deploy/.env.production.example index b1d7d20..2e86f1a 100644 --- a/deploy/.env.production.example +++ b/deploy/.env.production.example @@ -23,6 +23,7 @@ ASR_MODEL=whisper-1 ASR_FALLBACK_MODEL=gemini-2.5-flash TRANSLATE_MODEL=gemini-2.5-flash REWRITE_MODEL=gemini-2.5-pro +PRODUCT_VIEW_MODEL=gpt-image-2 IMAGE_BASE_URL=https://ai.skg.com/ezlink/v1 IMAGE_API_KEY= IMAGE_MODEL=gpt-image-2 diff --git a/docs/source-analysis.html b/docs/source-analysis.html index 5455c64..7d94104 100644 --- a/docs/source-analysis.html +++ b/docs/source-analysis.html @@ -870,7 +870,7 @@ ProductRefStateItem { 网页登录POST /auth/loginGET /auth/checkPOST /auth/logoutweb/app/login/page.tsx、Nginx auth_request登录页提交账号密码到 /api/auth/login,后端设置 HttpOnly 会话 Cookie;生产 Nginx 对工作台和 /api//auth/check 做统一校验,未登录页面跳 /login/,API 返回 JSON 401。 - 运行配置 / 模型标注GET /healthgetRuntimeHealthModelTrace返回 models:ASR、本机 ASR、ASR fallback、翻译、改写、Vision、GPT 图像模型、主体 6 视图 GPT 图像模型、Azure OpenAI TTS、视频别名和 Seedance 服务商。前端所有当前主路径里会调用模型的按钮旁显示模型名,点击弹出小窗口查看模型链路和输入输出逻辑;不返回 API Key 或敏感凭证。 + 运行配置 / 模型标注GET /healthgetRuntimeHealthModelTrace返回 models:ASR、本机 ASR、ASR fallback、翻译、改写、通用 Vision、产品视角识别 product_view、GPT 图像模型、主体 6 视图 GPT 图像模型、Azure OpenAI TTS、视频别名和 Seedance 服务商。前端所有当前主路径里会调用模型的按钮旁显示模型名,点击弹出小窗口查看模型链路和输入输出逻辑;不返回 API Key 或敏感凭证。 历史列表GET /jobslistJobs所有 job 精简列表(id/url/status/thumbnail/mtime…),按 state.json mtime 倒序。前端 URL 无 ?job= 时拉它回填全部历史;带 limit 可截断。 创建任务POST /jobscreateJob提交 TK 链接,后台开始下载;前端“开始”队列会在 downloaded 后自动触发音频解析。 上传视频POST /jobs/uploaduploadJob保存 source.mp4,然后同样进入下载完成状态;当前上传后也加入第一步队列,下载完成后自动解析音频。 @@ -893,8 +893,8 @@ ProductRefStateItem { 产品图库GET /product-library/skglistProductLibrary读取内置 SKG 白底图库 manifest,返回产品标题、品类、尺寸、白底评分和预览图 URL。 产品图入库到 jobPOST /jobs/{id}/assetsPOST /jobs/{id}/assets/product-libraryuploadStoryboardAssetcopyProductLibraryAsset上传产品图或把内置产品图库条目复制为当前 job 的普通 asset。后端统一生成最长边 1600px、JPEG 92 的 AI 工作副本,透明底铺白,过大/过小图片会在 ImageRef.asset_meta 里返回转换动作和风险;黑底/白底背景本身不强行转换。注意该接口只写图片文件,产品素材池列表另由 PUT /jobs/{id}/product-refs 持久化。 产品素材池保存PUT /jobs/{id}/product-refssaveProductRefs把当前 job 的产品素材池列表、识别视角、用途标签、方向、结构点、备注、AI 补图和删除结果保存到 Job.product_refs / state.json。前端上传、识别完成、补角度、编辑备注和删除时都会同步保存;刷新页面或热更新后从 job 恢复,不再要求重新上传和重新识别。 - 产品视角识别POST /jobs/{id}/assets/product-views/analyzeanalyzeProductViews读取同一产品素材池,按批次把多张图一次性提交给视觉模型,不限制只看前 6 张;识别对象被固定为套在脖子上的 U 形肩颈按摩仪。返回 viewbackgrounduse_tagsorientationlandmarks、中文备注、生成风险和置信度;orientation 明确佩戴者左/右、上/下、内外侧和开口方向对应图中哪边,避免把图片左右误当产品左右。前端不再要求用户手动选择视角,也不做不同产品身份判断。 - 产品缺角度补图POST /jobs/{id}/assets/product-anglegenerateProductAngleAsset用当前产品白底图作为参考,通过图像模型自动补全缺失视角,输出新的 ImageRef(kind="asset")。Prompt 会约束白底产品图、左右非对称、厚度、内侧触点和肩颈真实佩戴比例;图生图通过 /images/edits multipart 提交参考图,不再把 image 当 JSON 参数塞进 /images/generations;遇到 gpt-image-2 上游 429 / saturated 会按退避节奏重试,最终仍失败时返回 503 和可读提示。前端只在自动补图失败时暴露重试入口。 + 产品视角识别POST /jobs/{id}/assets/product-views/analyzeanalyzeProductViews读取同一产品素材池,按批次把多张图一次性提交给 PRODUCT_VIEW_MODEL=gpt-image-2 做视角标注,不限制只看前 6 张;识别对象被固定为套在脖子上的 U 形肩颈按摩仪。返回 viewbackgrounduse_tagsorientationlandmarks、中文备注、生成风险和置信度;orientation 明确佩戴者左/右、上/下、内外侧和开口方向对应图中哪边,避免把图片左右误当产品左右。前端不再要求用户手动选择视角,也不做不同产品身份判断。 + 产品缺角度补图POST /jobs/{id}/assets/product-anglegenerateProductAngleAsset用当前同一产品素材池作为参考,通过 gpt-image-2 自动补全缺失视角,输出新的 ImageRef(kind="asset")。前端不再固定传第一张图,而是按目标视角给已上传/已标注参考图打分,优先选择真实上传图、目标相邻视角、侧厚/触点/底部对应用途标签和低风险高置信图,最多传 6 张;后端把这些参考图拼成同产品参考板,再通过 /images/edits multipart 提交给 gpt-image-2。Prompt 会约束白底产品图、左右非对称、厚度、内侧触点和肩颈真实佩戴比例,并禁止输出拼图/多产品;遇到上游 429 / saturated 会按退避节奏重试,最终仍失败时返回 503 和可读提示。 角色库GET /character-library/skglistCharacterLibrary读取内置 5 个透明骨架人角色 manifest,每个角色含正面、左右 45 度、侧面、背面、半身近景和背部特写 7 张参考图。 角色图入库到 jobPOST /jobs/{id}/assets/character-librarycopyCharacterLibraryAssets把所选角色的 7 张参考图复制为当前 job asset,返回 subject_images,产品融合生成视频时作为人物身份参考图提交。 产品融合引导图POST /jobs/{id}/product-fusion/guidecreateProductFusionGuide旧流程兼容接口:读取产品图和白底人物图,按 product_region 合成位置引导图。当前内置角色 + 产品 + 描述流程不再主动调用它。 @@ -1004,6 +1004,19 @@ ProductRefStateItem {

变更记录

这个记录不是 git log 的替代品。它记录“产品理解发生了什么变化、影响了哪些源码、你以后描述需求时该怎么说”。后续每次改功能都要补一条。

+
+
+

2026-05-18 · 产品视角识别切到 gpt-image-2 并重做补图参考选择

+ API + UI + Workflow +
+
+

问题:同一产品素材池的视角标注仍显示通用 Vision 模型;缺角度补图固定拿第一张产品图作为参考,少侧面或内侧时容易用错误视角硬推,生成结果偏离产品真实结构。

+

改动:api/main.py 新增 PRODUCT_VIEW_MODEL=gpt-image-2analyze_product_view / analyze_product_views_batch 改用该模型并在 /health 返回 models.product_viewgenerateProductAngleAsset 前端请求新增 source_refssource_notesAudioStoryboardPlanPanel 按目标视角给产品图打分,优先真实上传图、相邻视角、用途标签、置信度和低风险图,最多传 6 张。后端把多张参考图拼成同产品参考板,再用 gpt-image-2 生成目标角度,避免只照抄第一张。

+

影响:api/main.pyweb/lib/api.tsweb/components/ad-recreation-board.tsxRULES.mdapi/.env.exampledeploy/.env.production.exampledocs/source-analysis.html。后续补产品角度必须从同一产品素材池里挑多张证据图,不要再默认第一张。

+
+

2026-05-18 · gpt-image-2 图生图改用 edits 并处理上游饱和

diff --git a/web/components/ad-recreation-board.tsx b/web/components/ad-recreation-board.tsx index 6e1d928..83d4f22 100644 --- a/web/components/ad-recreation-board.tsx +++ b/web/components/ad-recreation-board.tsx @@ -368,10 +368,10 @@ function audioModelTrace(models?: RuntimeModels): ModelTraceSpec { function productModelTrace(models?: RuntimeModels): ModelTraceSpec { return { title: "产品视角识别 / 补图", - model: modelList([models?.vision, models?.image]), + model: modelList([models?.product_view, models?.image]), chain: [ - `批量视角识别:${modelValue(models?.vision)} 一次读取同一产品多张图,标注视角、左右、上下、用途和风险`, - `缺角度补图:${imageModelChain(models)} 按同一肩颈按摩仪结构补齐缺失视角`, + `批量视角识别:${modelValue(models?.product_view)} 一次读取同一产品多张图,标注视角、左右、上下、用途和风险`, + `缺角度补图:${imageModelChain(models)} 读取最相关的多张已上传参考图,按同一肩颈按摩仪结构补齐缺失视角`, "前端只保存标注和 AI 补图结果;后续生成视频时每条最多挑 6 张相关产品图", ], note: "上传产品图、重新识别、缺视角重试都会使用这组模型链路。", @@ -620,6 +620,55 @@ function createProductRefItem( } } +const PRODUCT_ANGLE_REFERENCE_PRIORITY: Record = { + front: ["front", "left_45", "right_45", "side_thickness", "inner_contacts", "back_bottom"], + left_45: ["left_45", "front", "side_thickness", "right_45", "inner_contacts", "back_bottom"], + right_45: ["right_45", "front", "side_thickness", "left_45", "inner_contacts", "back_bottom"], + side_thickness: ["side_thickness", "left_45", "right_45", "front", "inner_contacts", "back_bottom"], + inner_contacts: ["inner_contacts", "side_thickness", "front", "left_45", "right_45", "back_bottom"], + back_bottom: ["back_bottom", "side_thickness", "inner_contacts", "left_45", "right_45", "front"], +} + +function productAngleReferenceScore(item: ProductRefItem, targetView: string) { + const priority = PRODUCT_ANGLE_REFERENCE_PRIORITY[targetView] ?? PRODUCT_VIEW_SLOTS.map((slot) => slot.value) + const rank = priority.indexOf(item.view) + let score = rank === -1 ? 0 : 90 - rank * 12 + if (item.source === "upload" || item.source === "library") score += 28 + if (item.source === "ai") score -= 18 + if (item.confidence) score += Math.round(item.confidence * 14) + if (item.useTags.includes("asymmetry")) score += 8 + if (targetView === "side_thickness" && item.useTags.includes("side_thickness")) score += 16 + if (targetView === "inner_contacts" && item.useTags.includes("inner_contact")) score += 16 + if (targetView === "back_bottom" && item.useTags.includes("back_bottom")) score += 16 + if (item.risk) score -= 10 + return score +} + +function selectProductAngleReferenceItems(items: ProductRefItem[], targetView: string) { + const unique = new Map() + for (const item of items) { + if (!unique.has(item.id)) unique.set(item.id, item) + } + return [...unique.values()] + .sort((a, b) => productAngleReferenceScore(b, targetView) - productAngleReferenceScore(a, targetView)) + .slice(0, 6) +} + +function productAngleSourceNotes(items: ProductRefItem[]) { + return items.map((item, index) => { + const parts = [ + `ref${index + 1}`, + `view=${productViewLabel(item.view)}`, + `source=${item.source}`, + item.note ? `note=${item.note}` : "", + formatProductOrientation(item.orientation), + item.landmarks?.length ? `landmarks=${item.landmarks.join("/")}` : "", + item.risk ? `risk=${item.risk}` : "", + ].filter(Boolean) + return parts.join(";") + }) +} + function normalizeStoredProductItem(item: ProductRefItem, index: number): ProductRefItem { const ref = { ...item.ref, asset_meta: item.ref.asset_meta ?? item.assetMeta } const restored = createProductRefItem( @@ -1960,10 +2009,13 @@ function AudioStoryboardPlanPanel({ for (const slot of missing) { setProductAngleBusy(slot.value) try { + const references = selectProductAngleReferenceItems(working, slot.value) const ref = await generateProductAngleAsset(job.id, { - source_ref: working[0].ref, + source_ref: references[0].ref, + source_refs: references.map((item) => item.ref), + source_notes: productAngleSourceNotes(references), target_view: slot.label, - note: slot.hint, + note: `${slot.hint};请综合这些同产品参考图补目标视角,不要只照抄某一张。`, }) working = [ ...working, @@ -2117,13 +2169,15 @@ function AudioStoryboardPlanPanel({ const generateMissingProductAngle = async (slot: typeof PRODUCT_VIEW_SLOTS[number]) => { if (!job || !productItems.length) return - const source = productItems[0] + const references = selectProductAngleReferenceItems(productItems, slot.value) setProductAngleBusy(slot.value) try { const ref = await generateProductAngleAsset(job.id, { - source_ref: source.ref, + source_ref: references[0].ref, + source_refs: references.map((item) => item.ref), + source_notes: productAngleSourceNotes(references), target_view: slot.label, - note: slot.hint, + note: `${slot.hint};请综合这些同产品参考图补目标视角,不要只照抄某一张。`, }) setProductItems((prev) => { const next = [...prev, createProductRefItem(ref, prev.length, "ai", slot.value, `AI 补齐:${slot.hint}`, "white", undefined, undefined, undefined, "", 1)] diff --git a/web/lib/api.ts b/web/lib/api.ts index 3d11287..a506ac8 100644 --- a/web/lib/api.ts +++ b/web/lib/api.ts @@ -153,6 +153,7 @@ export interface RuntimeModels { rewrite?: string audio_rewrite?: string vision?: string + product_view?: string image?: string image_base_url?: string image_fallbacks?: string[] @@ -224,7 +225,7 @@ export async function uploadStoryboardAsset(jobId: string, file: File): Promise< export async function generateProductAngleAsset( jobId: string, - body: { source_ref: ImageRef; target_view: string; note?: string }, + body: { source_ref: ImageRef; source_refs?: ImageRef[]; source_notes?: string[]; target_view: string; note?: string }, ): Promise { const res = await fetch(`${API_BASE}/jobs/${jobId}/assets/product-angle`, { method: "POST",