fix: enforce orthographic top bottom subject views

auto-save 2026-05-25 09:05 (~2)
2026-05-25 09:27:31 +08:00 · 2026-05-25 09:05:53 +08:00
7 changed files with 2277 additions and 2231 deletions
--- a/.memory/assistant-handoff.md
+++ b/.memory/assistant-handoff.md
@@ -1,6 +1,6 @@
 # 项目接力

- 生成时间：May 23, 2026 at 23:37
+- 生成时间：May 25, 2026 at 09:02
 - 项目：SKG Marketing Studio / SKG 营销内容工作台
 - 路径：/Users/kangwan/Projects/business/20260512-20260512-skg-tk-二创验证
 - 状态：active
@@ -9,7 +9,7 @@
 ## 最近助手会话概览

 - Claude：a9e0449c-d9cb-4a2a-bb16-16596dfb552a · 时间未知
- Codex：019e4d33-ad65-7673-934e-815226984ea6 · 时间未知
+- Codex：019e55d1-ccf1-7a90-805f-3bd4799a49cd · 时间未知
 - Cursor：未找到匹配当前项目的最近会话

 ## Claude 最近会话
@@ -92,39 +92,44 @@

 ## Codex 最近会话

- Session ID：019e4d33-ad65-7673-934e-815226984ea6
- Transcript：/Users/kangwan/.codex/sessions/2026/05/22/rollout-2026-05-22T09-01-32-019e4d33-ad65-7673-934e-815226984ea6.jsonl
+- Session ID：019e55d1-ccf1-7a90-805f-3bd4799a49cd
+- Transcript：/Users/kangwan/.codex/sessions/2026/05/24/rollout-2026-05-24T01-11-13-019e55d1-ccf1-7a90-805f-3bd4799a49cd.jsonl
 - 工作目录：/Users/kangwan/Projects/business/20260512-20260512-skg-tk-二创验证
 - 分支：main
 - 敏感字段：已对 token / key / password / secret 做脱敏

 ### 最近用户要求

- 音频解析 也需需要有中文识别  和多语言识别
- 脚本那边是自动识别语音识别的内容然后生成脚本文案么？
- 分镜那边
+- 还是不够简洁简单，我们再深度研究下怎么样的排列才最简单，别人一看就会用
+- 我看一下样式，你用figma画一下
+- 设计的版面还是太差了   我找一下模版

 ### Codex 最近回复

-> 对，问题聚焦在分镜这边就更清楚了：我已经看了分镜面板当前实现，下面按“现在实际怎么跑”说。
+> 我画了一版简化首页样式稿并打开了：
+> 
+> - HTML 样式稿：`output/design/skg-simple-home-mockup.html`
+> - 截图：`output/design/skg-simple-home-mockup-fullpage.png`
+> 
+> 说明一下：当前会话没有可写入 Figma 云文件的 MCP 工具，所以我先用本地“Figma 风格画板”做了可看的样式稿。这个稿不影响线上代码，也没有提交。
+> 
+> 核心变化是：首屏只保留一个主流程：
+> `选择产物 → 上传素材可选 → 写一句话需求 → 开始生成`
+> 
+> `git status -sb` 当前仍只有 `.memory/worklog.json` 自动日志脏文件。

-> 分镜这边现在是“半自动”，不是完全端到端自动重写整片脚本。
+> 可以，你先找模板。
 > 
-> 当前链路是：
+> 我建议你找这类方向的，不要找复杂 AI 工具后台：
 > 
-> 1. 音频解析完成后，`job.transcript` 里有逐句时间轴：原语言文案 + 中文镜像。
-> 2. 分镜面板会自动按每个 transcript 段生成一行分镜，保留原音频时间点和原文案依据。
-> 3. 每行会先自动套默认三字段：
->    - `文案`
->    - `场景一句话`
->    - `人物 + 产品 + 动作`
-> 4. 这些默认值目前主要是按段落位置和角色分类生成，比如 hook / pain / proof / solution / cta，不是每句都先让大模型深度理解原文后重写。
-> 5. 当你点单行生成视频候选或同步规划时，才会走 `quickPlanStoryboard`，把当前三字段、参考帧、产品信息扩成完整分镜 prompt。
-> 6. 你改中文字段后，系统会自动优化对应英文主字段，英文用于后续生视频。
+> - “AI image generator dashboard”
+> - “content creation workspace”
+> - “simple prompt editor”
+> - “Canva / CapCut style creation home”
+> - “one page SaaS tool dashboard”
+> - “creative brief generator UI”
 > 
-> 所以准确说：分镜会自动读取语音识别结果来切分时间轴、显示原文依据、初始化分镜行；但“根据识别内容自动生成更贴合原视频语义的新脚本文案”这层现在还不够强，偏模板化。
-> 
-> 如果你想要的效果是：音频一识别完，分镜区就自动根据原音频内容生成每句对应的新 SKG 脚本文案，那需要再补一层“transcript -> 三字段候选”的自动脚本生成逻辑。这个方向是合理的。
+> 最适合我们的是：中间一个大输入区，顶部或左侧选择产物，右侧只放最近任务/结果。不要找那种很多统计卡、很多侧边栏菜单、很多模型参数的模板，那类还是会把普通同事绕晕。

 ## Cursor 最近会话

--- a/.memory/worklog.json
+++ b/.memory/worklog.json
--- a/api/main.py
+++ b/api/main.py
@@ -2641,8 +2641,8 @@ SUBJECT_VIEW_LABELS: dict[str, str] = {
    "three_quarter_right": "右前 45°",
    "side": "侧面",
    "side_walk": "侧面走路",
-    "top": "顶部视角",
-    "bottom": "底部视角",
+    "top": "正投影俯视图",
+    "bottom": "正投影仰视图",
    "expression_neutral": "中性表情",
    "expression_smile": "微笑表情",
    "expression_happy": "开心表情",
@@ -2691,11 +2691,27 @@ def _subject_view_labels(kind: SubjectKind, requested: list[str] | None = None)
        ("back", "背面"),
        ("left", "左侧"),
        ("right", "右侧"),
-        ("top", "顶部"),
-        ("bottom", "底部"),
+        ("top", "正投影俯视图"),
+        ("bottom", "正投影仰视图"),
    ]


+def _subject_view_projection_clause(view: str) -> str:
+    if view == "top":
+        return (
+            "Patent-style orthographic top view: look straight down from directly above the product, "
+            "with the viewing direction perpendicular to the top face. No perspective, no tilt, no 3/4 angle, "
+            "no oblique overhead camera, no visible front/side depth unless it is true product thickness in orthographic projection. "
+        )
+    if view == "bottom":
+        return (
+            "Patent-style orthographic bottom view: look straight up at the underside/bottom face, "
+            "with the viewing direction perpendicular to the bottom face. No perspective, no tilt, no 3/4 angle, "
+            "no low-angle perspective camera, no visible front/side depth unless it is true product thickness in orthographic projection. "
+        )
+    return ""
+
+
 def _attach_temporal_metrics(items: list[dict]) -> None:
    """相邻低清帧差异：转场 / 动作目标依赖它，不需要逐帧高分辨率扫描。"""
    for i, it in enumerate(items):
@@ -6910,11 +6926,13 @@ def _generate_subject_assets_sync(job_id: str, idx: int, element_id: str, req: G
            else:
                view_prompt = f"complete object/product reference, {view_label} view"
            view_name = view.replace("_", " ")
+            projection_clause = _subject_view_projection_clause(view)
            single_view_clause = (
                f"Single-image output rule: this output file is ONLY for the {view_label} view ({view_name}). "
                "Render exactly one subject, one time, in one pose and one camera angle. "
                "Do not create a multi-view sheet, contact sheet, grid, storyboard, lineup, comparison layout, before/after layout, mirrored pair, duplicate subjects, thumbnails, labels, captions, arrows, view names, panel borders, or multiple versions in the same image. "
                "Do not include any other views in this image. "
+                + projection_clause
            )
            framing_clause = (
                "For this close-up view, intentionally crop as an upper-body asset from head/neck to chest or upper back; the neck, shoulders, collarbone or upper spine area must be large, clear, and useful for placing a neck-and-shoulder massage device. "
--- a/docs/source-analysis.html
+++ b/docs/source-analysis.html
@@ -1181,6 +1181,19 @@ ProductRefStateItem {
        <h2>变更记录</h2>
        <p>这个记录不是 git log 的替代品。它记录“产品理解发生了什么变化、影响了哪些源码、你以后描述需求时该怎么说”。后续每次改功能都要补一条。</p>
        <div class="changelog">
+          <article class="change">
+            <header>
+              <h3>2026-05-25 · 主体套图俯视和仰视改为正投影口径</h3>
+              <span class="tag amber">API</span>
+              <span class="tag violet">UI</span>
+              <span class="tag blue">Docs</span>
+            </header>
+            <div class="body">
+              <p><strong>问题：</strong>原先主体/产品套图里把 <code>top</code> 和 <code>bottom</code> 简写为“顶部/底部”或“俯视/仰视”，模型容易理解成斜上方、斜下方或 3/4 透视镜头，不符合外观专利六面正投影视图的要求。</p>
+              <p><strong>改动：</strong><code>api/main.py</code> 的 <code>SUBJECT_VIEW_LABELS</code>、默认物体 6 视图和主体生图 prompt 已改为“正投影俯视图 / 正投影仰视图”，并在生成单图时加入 <code>orthographic top/bottom view</code>、<code>no perspective</code>、<code>no tilt</code>、<code>no 3/4 angle</code> 约束。<code>web/components/lightbox.tsx</code>、<code>web/components/ad-recreation-board.tsx</code> 和 <code>web/components/storyboard-editor.tsx</code> 同步更新显示文案和后续分镜提示词。</p>
+              <p><strong>影响：</strong>以后描述专利六面图需求时应说“正投影俯视图 / 正投影仰视图”，不要只说“俯视角度 / 仰视角度”；这些视图是垂直投影，不是带角度的摄影机视角。</p>
+            </div>
+          </article>
          <article class="change">
            <header>
              <h3>2026-05-24 · 默认首页完整重设计为多人创作平台</h3>
--- a/web/components/ad-recreation-board.tsx
+++ b/web/components/ad-recreation-board.tsx
@@ -2019,6 +2019,11 @@ function selectProductItemsForRow(
 }

 function subjectViewLabel(view: string) {
+  const labels: Record<string, string> = {
+    top: "正投影俯视图",
+    bottom: "正投影仰视图",
+  }
+  if (labels[view]) return labels[view]
  return SUBJECT_ASSET_VIEWS.find((item) => item.value === view)?.label ?? view
 }

@@ -2034,6 +2039,8 @@ function subjectViewRoleHint(view: string) {
    bust_left_45: "肩颈左前近景、手部调整、佩戴贴合",
    bust_right_45: "肩颈右前近景、手部调整、佩戴贴合",
    back_neck_detail: "后颈肩背特写、触点位置、产品贴合",
+    top: "正投影俯视、无透视、无倾斜、产品顶部轮廓",
+    bottom: "正投影仰视、无透视、无倾斜、产品底部结构",
  }
  return hints[view] ?? "主体参考视角"
 }
@@ -2050,6 +2057,8 @@ function subjectViewPromptHint(view: string) {
    bust_left_45: "left three-quarter neck-and-shoulder close-up, hand adjustment, wearable fit",
    bust_right_45: "right three-quarter neck-and-shoulder close-up, hand adjustment, wearable fit",
    back_neck_detail: "back-neck and upper-back detail, contact-pad position, product fit",
+    top: "orthographic top view, straight down from above, no perspective, no tilt, no three-quarter angle",
+    bottom: "orthographic bottom view, straight up at underside, no perspective, no tilt, no three-quarter angle",
  }
  return hints[view] ?? "subject reference view"
 }
--- a/web/components/lightbox.tsx
+++ b/web/components/lightbox.tsx
@@ -36,8 +36,8 @@ const OBJECT_VIEW_OPTIONS = [
  ["back", "背面"],
  ["left", "左侧"],
  ["right", "右侧"],
-  ["top", "顶部"],
-  ["bottom", "底部"],
+  ["top", "正投影俯视图"],
+  ["bottom", "正投影仰视图"],
 ]

 const LIVING_VIEW_OPTIONS = [
--- a/web/components/storyboard-editor.tsx
+++ b/web/components/storyboard-editor.tsx
@@ -129,7 +129,7 @@ export function StoryboardEditor({ job, frameIndex, onClose }: Props) {
                    📐 多视角
                  </div>
                  <div className="text-[9.5px] text-white/45 leading-tight">
-                    侧面 / 仰视 / 俯视等同元素的不同角度
+                    侧面 / 正投影俯视 / 正投影仰视等同元素不同视图
                  </div>
                </button>
                <button
Author	SHA1	Message	Date
kang	f8c51b5ef6	fix: enforce orthographic top bottom subject views	2026-05-25 09:27:31 +08:00
kang	8e60c7dff9	auto-save 2026-05-25 09:05 (~2)	2026-05-25 09:05:53 +08:00