From 34ecab42ba316f56ead199079f0f2ebc39a935d4 Mon Sep 17 00:00:00 2001 From: kang Date: Mon, 18 May 2026 00:57:07 +0800 Subject: [PATCH] auto-save 2026-05-18 00:57 (~4) --- .memory/worklog.json | 53 +++++++++++++------------- api/main.py | 12 +++++- docs/source-analysis.html | 15 +++++++- web/components/ad-recreation-board.tsx | 46 ++++++++++++++++++++++ 4 files changed, 96 insertions(+), 30 deletions(-) diff --git a/.memory/worklog.json b/.memory/worklog.json index 5a5abbd..66d1034 100644 --- a/.memory/worklog.json +++ b/.memory/worklog.json @@ -1,32 +1,5 @@ { "entries": [ - { - "files_changed": 1, - "hash": "26f5d2e", - "message": "auto-save 2026-05-15 15:37 (~1)", - "ts": "2026-05-15T15:37:54+08:00", - "type": "commit" - }, - { - "files_changed": 1, - "hash": "b42aa15", - "message": "auto-save 2026-05-15 15:43 (~1)", - "ts": "2026-05-15T15:43:28+08:00", - "type": "commit" - }, - { - "files_changed": 1, - "message": "Codex 会话活跃 · 最近命令:codex · 1 项未提交变更 · 最近提交:auto-save 2026-05-15 15:43 (~1)", - "ts": "2026-05-15T07:44:47Z", - "type": "session-heartbeat" - }, - { - "files_changed": 1, - "hash": "beeed42", - "message": "auto-save 2026-05-15 15:48 (~1)", - "ts": "2026-05-15T15:49:01+08:00", - "type": "commit" - }, { "files_changed": 4, "hash": "eabec39", @@ -3258,6 +3231,32 @@ "type": "session-heartbeat", "message": "Codex 会话活跃 · 最近命令:codex · 分支 main · 8 项未提交变更 · 最近提交:auto-save 2026-05-18 00:23 (~2)", "files_changed": 8 + }, + { + "ts": "2026-05-18T00:29:11+08:00", + "type": "commit", + "message": "auto-save 2026-05-18 00:29 (~8)", + "hash": "6f0b54c", + "files_changed": 8 + }, + { + "ts": "2026-05-17T16:38:32Z", + "type": "session-heartbeat", + "message": "Codex 会话活跃 · 最近命令:codex · 分支 main · 1 项未提交变更 · 最近提交:auto-save 2026-05-18 00:29 (~8)", + "files_changed": 1 + }, + { + "ts": "2026-05-17T16:48:32Z", + "type": "session-heartbeat", + "message": "Codex 会话活跃 · 最近命令:codex · 分支 main · 1 项未提交变更 · 最近提交:auto-save 2026-05-18 00:29 (~8)", + "files_changed": 1 + }, + { + "ts": "2026-05-18T00:49:38+08:00", + "type": "commit", + "message": "feat: route media models by provider", + "hash": "29bfaef", + "files_changed": 0 } ] } diff --git a/api/main.py b/api/main.py index 8bf3a12..1420a53 100644 --- a/api/main.py +++ b/api/main.py @@ -4020,9 +4020,9 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat prompt_extra = req.prompt.strip() prompt_extra_clause = f"User direction: {prompt_extra[:1200]} " if prompt_extra else "" identity_lock_clause = ( - "Identity lock: this request generates a six-view pack for ONE single subject. " + "Identity lock: these API calls generate a six-view pack for ONE single subject, but each individual output file must show only its one requested view. " "Before rendering, infer one consistent character bible from the reference image(s): gender presentation, age range, body proportions, head shape, face direction cues, material, silhouette, wardrobe/material style, and commercial mood. " - "Keep that same character bible unchanged across every generated view. " + "Keep that same character bible unchanged across every generated view in separate files. " "If user direction requests a gender, age, or style change, apply that one change uniformly to all views; never mix male/female, young/old, or multiple style identities inside the same six-view pack. " "For transparent humanoids, keep the same transparent skin shell, skeleton proportions, visible spine/rib cage/pelvis/limb bones, and non-horror wellness character style in every view. " ) @@ -4040,10 +4040,18 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat view_prompt = f"full-body upright standing character reference, {view_label}" else: view_prompt = f"complete object/product reference, {view_label} view" + view_name = view.replace("_", " ") + single_view_clause = ( + f"Single-image output rule: this output file is ONLY for the {view_label} view ({view_name}). " + "Render exactly one subject, one time, in one pose and one camera angle. " + "Do not create a six-view sheet, contact sheet, grid, storyboard, lineup, comparison layout, before/after layout, mirrored pair, duplicate subjects, thumbnails, labels, captions, arrows, view names, panel borders, or multiple versions in the same image. " + "Do not include any of the other five views in this image. " + ) prompt = ( f"Use the reference image(s) only as visual evidence; do not crop, cut out, paste, trace, or extract pixels from the source. " f"Generate one newly rendered {view_prompt} for {target}. " f"The subject is a {kind_phrase}. If multiple frames are shown, treat them as evidence of one same subject, not multiple subjects. " + + single_view_clause + identity_clause + identity_lock_clause + prompt_extra_clause diff --git a/docs/source-analysis.html b/docs/source-analysis.html index a742148..b9ddc5f 100644 --- a/docs/source-analysis.html +++ b/docs/source-analysis.html @@ -888,7 +888,7 @@ ProductRefStateItem { 应用清洗POST /cleanup/applyapplyCleanedFrame物理覆盖 frames/{idx}.jpg,并备份原图。 元素增改删POST/PATCH/DELETE /elementsaddElement/updateElement/deleteElement让用户修正 Vision 错误,避免候选结果锁死。 元素提取POST /elements/{element_id}/cutoutcutoutElement调用图像模型生成独立白底素材图,每次累积一张 cutout。 - 主体资产包POST /elements/{element_id}/subject-assetsgenerateSubjectAssets根据参考帧重新绘制一个统一主体资产包;前端默认把全部关键帧作为 source_frame_indices,如果用户手动选择了关键帧则只传已选帧,后端拼参考板。当前源视频工作区支持 subject_style=transparent_humansubject_style=source_actor 两种相似主体:透明骨架人会保持透明/半透明皮肤包裹可见白色骨架,普通真人会保持正常广告演员方向。两种模式都使用 reconstruction_mode=similar,最多读取 12 张参考帧,生成 6 张白底视图;后端使用 SUBJECT_ASSET_IMAGE_MODELS,默认 gpt-image-2 / gpt-image-1.5,不再沿用通用 Gemini 图片模型;后端会加身份锁定约束,统一性别表现、年龄段、体型、材质、风格和视觉身份,避免六视图混成不同人物。 + 主体资产包POST /elements/{element_id}/subject-assetsgenerateSubjectAssets根据参考帧重新绘制一个统一主体资产包;前端默认把全部关键帧作为 source_frame_indices,如果用户手动选择了关键帧则只传已选帧,后端拼参考板。当前源视频工作区支持 subject_style=transparent_humansubject_style=source_actor 两种相似主体:透明骨架人会保持透明/半透明皮肤包裹可见白色骨架,普通真人会保持正常广告演员方向。两种模式都使用 reconstruction_mode=similar,最多读取 12 张参考帧,生成 6 张白底视图;后端使用 SUBJECT_ASSET_IMAGE_MODELS,默认 gpt-image-2 / gpt-image-1.5,不再沿用通用 Gemini 图片模型;后端会加身份锁定约束,统一性别表现、年龄段、体型、材质、风格和视觉身份,避免六视图混成不同人物。前端白底视图缩略图和关键帧一样,鼠标停留会用顶层浮层放大预览,点击仍打开原图;后端每个 view 单独调用一次生图,并明确禁止六视图拼图、contact sheet、多主体、多面板、标签或对比排版,保证一个视角一张照片。 首尾帧资产POST /frames/{idx}/scene-assetgenerateSceneAsset同一接口兼容旧场景图和新首尾帧;新流程传 asset_role=first_frame/last_frame,后端走文字生图,参考帧只用于理解透明骨架人形象、比例、机位和光线,生成结果仍保存在 scene_assets 并自动填入产品融合镜头。 产品图库GET /product-library/skglistProductLibrary读取内置 SKG 白底图库 manifest,返回产品标题、品类、尺寸、白底评分和预览图 URL。 产品图入库到 jobPOST /jobs/{id}/assetsPOST /jobs/{id}/assets/product-libraryuploadStoryboardAssetcopyProductLibraryAsset上传产品图或把内置产品图库条目复制为当前 job 的普通 asset。后端统一生成最长边 1600px、JPEG 92 的 AI 工作副本,透明底铺白,过大/过小图片会在 ImageRef.asset_meta 里返回转换动作和风险;黑底/白底背景本身不强行转换。注意该接口只写图片文件,产品素材池列表另由 PUT /jobs/{id}/product-refs 持久化。 @@ -1004,6 +1004,19 @@ ProductRefStateItem {

变更记录

这个记录不是 git log 的替代品。它记录“产品理解发生了什么变化、影响了哪些源码、你以后描述需求时该怎么说”。后续每次改功能都要补一条。

+
+
+

2026-05-18 · 主体 6 视图改为单图预览和单视角生成

+ UI + API + Workflow +
+
+

问题:相似主体 6 视图生成后只能点击打开原图,不能像参考帧和产品图一样鼠标停留看大图;同时后端单张生成 prompt 里仍提到 six-view pack,容易让图片模型把多个视角拼进同一张图。

+

改动:SourceReferenceBuildPanel 给生成后的白底主体视图增加 createPortal 顶层悬停预览,缩略图保持小尺寸密排,停留时显示大图、视角标签和尺寸。generateSubjectAssets 把身份锁定说明改为“六视图属于同一主体,但每个输出文件只展示当前请求视角”,并为每次 view 生成加入单图规则,禁止 contact sheet、六视图拼图、多主体、多面板、标签或对比排版。

+

影响:web/components/ad-recreation-board.tsxapi/main.pydocs/source-analysis.html。后续用户说“6 视图”,应理解为 6 个独立图片文件,而不是一张六宫格大图。

+
+

2026-05-18 · 模型链路拆分为 GPT 生图、Azure 语音和 Seedance 视频

diff --git a/web/components/ad-recreation-board.tsx b/web/components/ad-recreation-board.tsx index 19492d7..d520abf 100644 --- a/web/components/ad-recreation-board.tsx +++ b/web/components/ad-recreation-board.tsx @@ -1437,6 +1437,7 @@ function SourceReferenceBuildPanel({ const [subjectBusy, setSubjectBusy] = useState(false) const [deletingFrame, setDeletingFrame] = useState(null) const [framePreview, setFramePreview] = useState<{ index: number; left: number; top: number } | null>(null) + const [subjectAssetPreview, setSubjectAssetPreview] = useState<{ id: string; left: number; top: number } | null>(null) const [subjectStyle, setSubjectStyle] = useState("transparent_human") const [subjectDirection, setSubjectDirection] = useState("") const frames = useMemo(() => [...job.frames].sort((a, b) => a.timestamp - b.timestamp), [job.frames]) @@ -1453,6 +1454,7 @@ function SourceReferenceBuildPanel({ }, [frames, subjectReferenceFrames]) const actorAssets = actorSource?.element.subject_assets ?? [] const previewFrame = framePreview ? frames.find((frame) => frame.index === framePreview.index) ?? null : null + const previewSubjectAsset = subjectAssetPreview ? actorAssets.find((asset) => asset.id === subjectAssetPreview.id) ?? null : null const referenceCountLabel = selectedReferenceFrames.length ? `使用已选 ${selectedReferenceFrames.length} 张` : frames.length @@ -1549,6 +1551,25 @@ function SourceReferenceBuildPanel({ }) } + const updateSubjectAssetPreviewPosition = (event: ReactMouseEvent, assetId: string) => { + const margin = 16 + const previewWidth = Math.min(420, window.innerWidth - margin * 2) + const previewHeight = Math.min(720, window.innerHeight - margin * 2) + let left = event.clientX + 18 + let top = event.clientY + 18 + if (left + previewWidth > window.innerWidth - margin) { + left = event.clientX - previewWidth - 18 + } + if (top + previewHeight > window.innerHeight - margin) { + top = window.innerHeight - previewHeight - margin + } + setSubjectAssetPreview({ + id: assetId, + left: Math.max(margin, left), + top: Math.max(margin, top), + }) + } + const framePreviewPortal = framePreview && previewFrame && typeof document !== "undefined" ? createPortal(
+
+ +
+
+ {previewSubjectAsset.label || previewSubjectAsset.view || "主体视图预览"} + {previewSubjectAsset.width}x{previewSubjectAsset.height} +
+
, + document.body, + ) + : null return (
{framePreviewPortal} + {subjectAssetPreviewPortal}
} title="关键帧 / 相似主体" />
@@ -1692,6 +1735,9 @@ function SourceReferenceBuildPanel({ href={subjectAssetUrl(job, asset)} target="_blank" rel="noreferrer" + onMouseEnter={(event) => updateSubjectAssetPreviewPosition(event, asset.id)} + onMouseMove={(event) => updateSubjectAssetPreviewPosition(event, asset.id)} + onMouseLeave={() => setSubjectAssetPreview(null)} className="h-20 overflow-hidden rounded border border-white/10 bg-white transition hover:border-cyan-200/70 2xl:h-24" title={asset.label || asset.view} >