auto-save 2026-05-18 00:57 (~4)

2026-05-18 00:57:07 +08:00
parent 29bfaeff4c
commit 34ecab42ba
4 changed files with 96 additions and 30 deletions
--- a/.memory/worklog.json
+++ b/.memory/worklog.json
@@ -1,32 +1,5 @@
 {
  "entries": [
-    {
-      "files_changed": 1,
-      "hash": "26f5d2e",
-      "message": "auto-save 2026-05-15 15:37 (~1)",
-      "ts": "2026-05-15T15:37:54+08:00",
-      "type": "commit"
-    },
-    {
-      "files_changed": 1,
-      "hash": "b42aa15",
-      "message": "auto-save 2026-05-15 15:43 (~1)",
-      "ts": "2026-05-15T15:43:28+08:00",
-      "type": "commit"
-    },
-    {
-      "files_changed": 1,
-      "message": "Codex 会话活跃 · 最近命令：codex · 1 项未提交变更 · 最近提交：auto-save 2026-05-15 15:43 (~1)",
-      "ts": "2026-05-15T07:44:47Z",
-      "type": "session-heartbeat"
-    },
-    {
-      "files_changed": 1,
-      "hash": "beeed42",
-      "message": "auto-save 2026-05-15 15:48 (~1)",
-      "ts": "2026-05-15T15:49:01+08:00",
-      "type": "commit"
-    },
    {
      "files_changed": 4,
      "hash": "eabec39",
@@ -3258,6 +3231,32 @@
      "type": "session-heartbeat",
      "message": "Codex 会话活跃 · 最近命令：codex · 分支 main · 8 项未提交变更 · 最近提交：auto-save 2026-05-18 00:23 (~2)",
      "files_changed": 8
+    },
+    {
+      "ts": "2026-05-18T00:29:11+08:00",
+      "type": "commit",
+      "message": "auto-save 2026-05-18 00:29 (~8)",
+      "hash": "6f0b54c",
+      "files_changed": 8
+    },
+    {
+      "ts": "2026-05-17T16:38:32Z",
+      "type": "session-heartbeat",
+      "message": "Codex 会话活跃 · 最近命令：codex · 分支 main · 1 项未提交变更 · 最近提交：auto-save 2026-05-18 00:29 (~8)",
+      "files_changed": 1
+    },
+    {
+      "ts": "2026-05-17T16:48:32Z",
+      "type": "session-heartbeat",
+      "message": "Codex 会话活跃 · 最近命令：codex · 分支 main · 1 项未提交变更 · 最近提交：auto-save 2026-05-18 00:29 (~8)",
+      "files_changed": 1
+    },
+    {
+      "ts": "2026-05-18T00:49:38+08:00",
+      "type": "commit",
+      "message": "feat: route media models by provider",
+      "hash": "29bfaef",
+      "files_changed": 0
    }
  ]
 }
--- a/api/main.py
+++ b/api/main.py
@@ -4020,9 +4020,9 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
    prompt_extra = req.prompt.strip()
    prompt_extra_clause = f"User direction: {prompt_extra[:1200]} " if prompt_extra else ""
    identity_lock_clause = (
-        "Identity lock: this request generates a six-view pack for ONE single subject. "
+        "Identity lock: these API calls generate a six-view pack for ONE single subject, but each individual output file must show only its one requested view. "
        "Before rendering, infer one consistent character bible from the reference image(s): gender presentation, age range, body proportions, head shape, face direction cues, material, silhouette, wardrobe/material style, and commercial mood. "
-        "Keep that same character bible unchanged across every generated view. "
+        "Keep that same character bible unchanged across every generated view in separate files. "
        "If user direction requests a gender, age, or style change, apply that one change uniformly to all views; never mix male/female, young/old, or multiple style identities inside the same six-view pack. "
        "For transparent humanoids, keep the same transparent skin shell, skeleton proportions, visible spine/rib cage/pelvis/limb bones, and non-horror wellness character style in every view. "
    )
@@ -4040,10 +4040,18 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
                    view_prompt = f"full-body upright standing character reference, {view_label}"
            else:
                view_prompt = f"complete object/product reference, {view_label} view"
+            view_name = view.replace("_", " ")
+            single_view_clause = (
+                f"Single-image output rule: this output file is ONLY for the {view_label} view ({view_name}). "
+                "Render exactly one subject, one time, in one pose and one camera angle. "
+                "Do not create a six-view sheet, contact sheet, grid, storyboard, lineup, comparison layout, before/after layout, mirrored pair, duplicate subjects, thumbnails, labels, captions, arrows, view names, panel borders, or multiple versions in the same image. "
+                "Do not include any of the other five views in this image. "
+            )
            prompt = (
                f"Use the reference image(s) only as visual evidence; do not crop, cut out, paste, trace, or extract pixels from the source. "
                f"Generate one newly rendered {view_prompt} for {target}. "
                f"The subject is a {kind_phrase}. If multiple frames are shown, treat them as evidence of one same subject, not multiple subjects. "
+                + single_view_clause
                + identity_clause
                + identity_lock_clause
                + prompt_extra_clause
--- a/docs/source-analysis.html
+++ b/docs/source-analysis.html
@@ -888,7 +888,7 @@ ProductRefStateItem {
            <tr><td>应用清洗</td><td><code>POST /cleanup/apply</code></td><td><code>applyCleanedFrame</code></td><td>物理覆盖 frames/{idx}.jpg，并备份原图。</td></tr>
            <tr><td>元素增改删</td><td><code>POST/PATCH/DELETE /elements</code></td><td><code>addElement/updateElement/deleteElement</code></td><td>让用户修正 Vision 错误，避免候选结果锁死。</td></tr>
            <tr><td>元素提取</td><td><code>POST /elements/{element_id}/cutout</code></td><td><code>cutoutElement</code></td><td>调用图像模型生成独立白底素材图，每次累积一张 cutout。</td></tr>
-            <tr><td>主体资产包</td><td><code>POST /elements/{element_id}/subject-assets</code></td><td><code>generateSubjectAssets</code></td><td>根据参考帧重新绘制一个统一主体资产包；前端默认把全部关键帧作为 <code>source_frame_indices</code>，如果用户手动选择了关键帧则只传已选帧，后端拼参考板。当前源视频工作区支持 <code>subject_style=transparent_human</code> 和 <code>subject_style=source_actor</code> 两种相似主体：透明骨架人会保持透明/半透明皮肤包裹可见白色骨架，普通真人会保持正常广告演员方向。两种模式都使用 <code>reconstruction_mode=similar</code>，最多读取 12 张参考帧，生成 6 张白底视图；后端使用 <code>SUBJECT_ASSET_IMAGE_MODELS</code>，默认 <code>gpt-image-2</code> / <code>gpt-image-1.5</code>，不再沿用通用 Gemini 图片模型；后端会加身份锁定约束，统一性别表现、年龄段、体型、材质、风格和视觉身份，避免六视图混成不同人物。</td></tr>
+            <tr><td>主体资产包</td><td><code>POST /elements/{element_id}/subject-assets</code></td><td><code>generateSubjectAssets</code></td><td>根据参考帧重新绘制一个统一主体资产包；前端默认把全部关键帧作为 <code>source_frame_indices</code>，如果用户手动选择了关键帧则只传已选帧，后端拼参考板。当前源视频工作区支持 <code>subject_style=transparent_human</code> 和 <code>subject_style=source_actor</code> 两种相似主体：透明骨架人会保持透明/半透明皮肤包裹可见白色骨架，普通真人会保持正常广告演员方向。两种模式都使用 <code>reconstruction_mode=similar</code>，最多读取 12 张参考帧，生成 6 张白底视图；后端使用 <code>SUBJECT_ASSET_IMAGE_MODELS</code>，默认 <code>gpt-image-2</code> / <code>gpt-image-1.5</code>，不再沿用通用 Gemini 图片模型；后端会加身份锁定约束，统一性别表现、年龄段、体型、材质、风格和视觉身份，避免六视图混成不同人物。前端白底视图缩略图和关键帧一样，鼠标停留会用顶层浮层放大预览，点击仍打开原图；后端每个 <code>view</code> 单独调用一次生图，并明确禁止六视图拼图、contact sheet、多主体、多面板、标签或对比排版，保证一个视角一张照片。</td></tr>
            <tr><td>首尾帧资产</td><td><code>POST /frames/{idx}/scene-asset</code></td><td><code>generateSceneAsset</code></td><td>同一接口兼容旧场景图和新首尾帧；新流程传 <code>asset_role=first_frame/last_frame</code>，后端走文字生图，参考帧只用于理解透明骨架人形象、比例、机位和光线，生成结果仍保存在 <code>scene_assets</code> 并自动填入产品融合镜头。</td></tr>
            <tr><td>产品图库</td><td><code>GET /product-library/skg</code></td><td><code>listProductLibrary</code></td><td>读取内置 SKG 白底图库 manifest，返回产品标题、品类、尺寸、白底评分和预览图 URL。</td></tr>
            <tr><td>产品图入库到 job</td><td><code>POST /jobs/{id}/assets</code>、<code>POST /jobs/{id}/assets/product-library</code></td><td><code>uploadStoryboardAsset</code>、<code>copyProductLibraryAsset</code></td><td>上传产品图或把内置产品图库条目复制为当前 job 的普通 asset。后端统一生成最长边 1600px、JPEG 92 的 AI 工作副本，透明底铺白，过大/过小图片会在 <code>ImageRef.asset_meta</code> 里返回转换动作和风险；黑底/白底背景本身不强行转换。注意该接口只写图片文件，产品素材池列表另由 <code>PUT /jobs/{id}/product-refs</code> 持久化。</td></tr>
@@ -1004,6 +1004,19 @@ ProductRefStateItem {
        <h2>变更记录</h2>
        <p>这个记录不是 git log 的替代品。它记录“产品理解发生了什么变化、影响了哪些源码、你以后描述需求时该怎么说”。后续每次改功能都要补一条。</p>
        <div class="changelog">
+          <article class="change">
+            <header>
+              <h3>2026-05-18 · 主体 6 视图改为单图预览和单视角生成</h3>
+              <span class="tag rose">UI</span>
+              <span class="tag violet">API</span>
+              <span class="tag cyan">Workflow</span>
+            </header>
+            <div class="body">
+              <p><strong>问题：</strong>相似主体 6 视图生成后只能点击打开原图，不能像参考帧和产品图一样鼠标停留看大图；同时后端单张生成 prompt 里仍提到 six-view pack，容易让图片模型把多个视角拼进同一张图。</p>
+              <p><strong>改动：</strong><code>SourceReferenceBuildPanel</code> 给生成后的白底主体视图增加 <code>createPortal</code> 顶层悬停预览，缩略图保持小尺寸密排，停留时显示大图、视角标签和尺寸。<code>generateSubjectAssets</code> 把身份锁定说明改为“六视图属于同一主体，但每个输出文件只展示当前请求视角”，并为每次 view 生成加入单图规则，禁止 contact sheet、六视图拼图、多主体、多面板、标签或对比排版。</p>
+              <p><strong>影响：</strong><code>web/components/ad-recreation-board.tsx</code>、<code>api/main.py</code>、<code>docs/source-analysis.html</code>。后续用户说“6 视图”，应理解为 6 个独立图片文件，而不是一张六宫格大图。</p>
+            </div>
+          </article>
          <article class="change">
            <header>
              <h3>2026-05-18 · 模型链路拆分为 GPT 生图、Azure 语音和 Seedance 视频</h3>
--- a/web/components/ad-recreation-board.tsx
+++ b/web/components/ad-recreation-board.tsx
@@ -1437,6 +1437,7 @@ function SourceReferenceBuildPanel({
  const [subjectBusy, setSubjectBusy] = useState(false)
  const [deletingFrame, setDeletingFrame] = useState<number | null>(null)
  const [framePreview, setFramePreview] = useState<{ index: number; left: number; top: number } | null>(null)
+  const [subjectAssetPreview, setSubjectAssetPreview] = useState<{ id: string; left: number; top: number } | null>(null)
  const [subjectStyle, setSubjectStyle] = useState<SubjectStyleMode>("transparent_human")
  const [subjectDirection, setSubjectDirection] = useState("")
  const frames = useMemo(() => [...job.frames].sort((a, b) => a.timestamp - b.timestamp), [job.frames])
@@ -1453,6 +1454,7 @@ function SourceReferenceBuildPanel({
  }, [frames, subjectReferenceFrames])
  const actorAssets = actorSource?.element.subject_assets ?? []
  const previewFrame = framePreview ? frames.find((frame) => frame.index === framePreview.index) ?? null : null
+  const previewSubjectAsset = subjectAssetPreview ? actorAssets.find((asset) => asset.id === subjectAssetPreview.id) ?? null : null
  const referenceCountLabel = selectedReferenceFrames.length
    ? `使用已选 ${selectedReferenceFrames.length} 张`
    : frames.length
@@ -1549,6 +1551,25 @@ function SourceReferenceBuildPanel({
    })
  }

+  const updateSubjectAssetPreviewPosition = (event: ReactMouseEvent<HTMLElement>, assetId: string) => {
+    const margin = 16
+    const previewWidth = Math.min(420, window.innerWidth - margin * 2)
+    const previewHeight = Math.min(720, window.innerHeight - margin * 2)
+    let left = event.clientX + 18
+    let top = event.clientY + 18
+    if (left + previewWidth > window.innerWidth - margin) {
+      left = event.clientX - previewWidth - 18
+    }
+    if (top + previewHeight > window.innerHeight - margin) {
+      top = window.innerHeight - previewHeight - margin
+    }
+    setSubjectAssetPreview({
+      id: assetId,
+      left: Math.max(margin, left),
+      top: Math.max(margin, top),
+    })
+  }
+
  const framePreviewPortal = framePreview && previewFrame && typeof document !== "undefined"
    ? createPortal(
        <div
@@ -1564,10 +1585,32 @@ function SourceReferenceBuildPanel({
        document.body,
      )
    : null
+  const subjectAssetPreviewPortal = subjectAssetPreview && previewSubjectAsset && typeof document !== "undefined"
+    ? createPortal(
+        <div
+          className="pointer-events-none fixed z-[10000] w-[min(420px,calc(100vw-32px))] rounded-xl border border-white/15 bg-black/94 p-3 shadow-[0_28px_80px_rgba(0,0,0,0.72)]"
+          style={{ left: subjectAssetPreview.left, top: subjectAssetPreview.top }}
+        >
+          <div className="flex max-h-[min(70vh,620px)] items-center justify-center rounded-lg bg-white p-2">
+            <img
+              src={subjectAssetUrl(job, previewSubjectAsset)}
+              alt=""
+              className="max-h-[min(66vh,580px)] w-full object-contain"
+            />
+          </div>
+          <div className="mt-2 flex items-center justify-between gap-3 text-[11px] text-white/62">
+            <span className="truncate">{previewSubjectAsset.label || previewSubjectAsset.view || "主体视图预览"}</span>
+            <span className="shrink-0 font-mono">{previewSubjectAsset.width}x{previewSubjectAsset.height}</span>
+          </div>
+        </div>,
+        document.body,
+      )
+    : null

  return (
    <div className="min-w-0">
      {framePreviewPortal}
+      {subjectAssetPreviewPortal}
      <div className="mb-2 flex items-center justify-between gap-3">
        <SectionTitle icon={<ImageIcon className="h-4 w-4" />} title="关键帧 / 相似主体" />
        <div className="flex items-center gap-2">
@@ -1692,6 +1735,9 @@ function SourceReferenceBuildPanel({
                  href={subjectAssetUrl(job, asset)}
                  target="_blank"
                  rel="noreferrer"
+                  onMouseEnter={(event) => updateSubjectAssetPreviewPosition(event, asset.id)}
+                  onMouseMove={(event) => updateSubjectAssetPreviewPosition(event, asset.id)}
+                  onMouseLeave={() => setSubjectAssetPreview(null)}
                  className="h-20 overflow-hidden rounded border border-white/10 bg-white transition hover:border-cyan-200/70 2xl:h-24"
                  title={asset.label || asset.view}
                >