2 Commits

Author SHA1 Message Date
f8c51b5ef6 fix: enforce orthographic top bottom subject views 2026-05-25 09:27:31 +08:00
8e60c7dff9 auto-save 2026-05-25 09:05 (~2) 2026-05-25 09:05:53 +08:00
7 changed files with 2277 additions and 2231 deletions

View File

@@ -1,6 +1,6 @@
# 项目接力
- 生成时间May 23, 2026 at 23:37
- 生成时间May 25, 2026 at 09:02
- 项目SKG Marketing Studio / SKG 营销内容工作台
- 路径:/Users/kangwan/Projects/business/20260512-20260512-skg-tk-二创验证
- 状态active
@@ -9,7 +9,7 @@
## 最近助手会话概览
- Claudea9e0449c-d9cb-4a2a-bb16-16596dfb552a · 时间未知
- Codex019e4d33-ad65-7673-934e-815226984ea6 · 时间未知
- Codex019e55d1-ccf1-7a90-805f-3bd4799a49cd · 时间未知
- Cursor未找到匹配当前项目的最近会话
## Claude 最近会话
@@ -92,39 +92,44 @@
## Codex 最近会话
- Session ID019e4d33-ad65-7673-934e-815226984ea6
- Transcript/Users/kangwan/.codex/sessions/2026/05/22/rollout-2026-05-22T09-01-32-019e4d33-ad65-7673-934e-815226984ea6.jsonl
- Session ID019e55d1-ccf1-7a90-805f-3bd4799a49cd
- Transcript/Users/kangwan/.codex/sessions/2026/05/24/rollout-2026-05-24T01-11-13-019e55d1-ccf1-7a90-805f-3bd4799a49cd.jsonl
- 工作目录:/Users/kangwan/Projects/business/20260512-20260512-skg-tk-二创验证
- 分支main
- 敏感字段:已对 token / key / password / secret 做脱敏
### 最近用户要求
- 音频解析 也需需要有中文识别 和多语言识别
- 脚本那边是自动识别语音识别的内容然后生成脚本文案么?
- 分镜那边
- 还是不够简洁简单,我们再深度研究下怎么样的排列才最简单,别人一看就会用
- 我看一下样式你用figma画一下
- 设计的版面还是太差了 我找一下模版
### Codex 最近回复
> 对,问题聚焦在分镜这边就更清楚了:我已经看了分镜面板当前实现,下面按“现在实际怎么跑”说。
> 我画了一版简化首页样式稿并打开了:
>
> - HTML 样式稿:`output/design/skg-simple-home-mockup.html`
> - 截图:`output/design/skg-simple-home-mockup-fullpage.png`
>
> 说明一下:当前会话没有可写入 Figma 云文件的 MCP 工具所以我先用本地“Figma 风格画板”做了可看的样式稿。这个稿不影响线上代码,也没有提交。
>
> 核心变化是:首屏只保留一个主流程:
> `选择产物 → 上传素材可选 → 写一句话需求 → 开始生成`
>
> `git status -sb` 当前仍只有 `.memory/worklog.json` 自动日志脏文件。
> 分镜这边现在是“半自动”,不是完全端到端自动重写整片脚本
> 可以,你先找模板
>
> 当前链路是
> 我建议你找这类方向的,不要找复杂 AI 工具后台
>
> 1. 音频解析完成后,`job.transcript` 里有逐句时间轴:原语言文案 + 中文镜像。
> 2. 分镜面板会自动按每个 transcript 段生成一行分镜,保留原音频时间点和原文案依据。
> 3. 每行会先自动套默认三字段:
> - `文案`
> - `场景一句话`
> - `人物 + 产品 + 动作`
> 4. 这些默认值目前主要是按段落位置和角色分类生成,比如 hook / pain / proof / solution / cta不是每句都先让大模型深度理解原文后重写。
> 5. 当你点单行生成视频候选或同步规划时,才会走 `quickPlanStoryboard`,把当前三字段、参考帧、产品信息扩成完整分镜 prompt。
> 6. 你改中文字段后,系统会自动优化对应英文主字段,英文用于后续生视频。
> - “AI image generator dashboard”
> - “content creation workspace”
> - “simple prompt editor”
> - “Canva / CapCut style creation home”
> - “one page SaaS tool dashboard”
> - “creative brief generator UI”
>
> 所以准确说:分镜会自动读取语音识别结果来切分时间轴、显示原文依据、初始化分镜行;但“根据识别内容自动生成更贴合原视频语义的新脚本文案”这层现在还不够强,偏模板化
>
> 如果你想要的效果是:音频一识别完,分镜区就自动根据原音频内容生成每句对应的新 SKG 脚本文案那需要再补一层“transcript -> 三字段候选”的自动脚本生成逻辑。这个方向是合理的。
> 最适合我们的是:中间一个大输入区,顶部或左侧选择产物,右侧只放最近任务/结果。不要找那种很多统计卡、很多侧边栏菜单、很多模型参数的模板,那类还是会把普通同事绕晕
## Cursor 最近会话

File diff suppressed because it is too large Load Diff

View File

@@ -2641,8 +2641,8 @@ SUBJECT_VIEW_LABELS: dict[str, str] = {
"three_quarter_right": "右前 45°",
"side": "侧面",
"side_walk": "侧面走路",
"top": "顶部视角",
"bottom": "底部视角",
"top": "正投影俯视图",
"bottom": "正投影仰视图",
"expression_neutral": "中性表情",
"expression_smile": "微笑表情",
"expression_happy": "开心表情",
@@ -2691,11 +2691,27 @@ def _subject_view_labels(kind: SubjectKind, requested: list[str] | None = None)
("back", "背面"),
("left", "左侧"),
("right", "右侧"),
("top", "顶部"),
("bottom", "底部"),
("top", "正投影俯视图"),
("bottom", "正投影仰视图"),
]
def _subject_view_projection_clause(view: str) -> str:
if view == "top":
return (
"Patent-style orthographic top view: look straight down from directly above the product, "
"with the viewing direction perpendicular to the top face. No perspective, no tilt, no 3/4 angle, "
"no oblique overhead camera, no visible front/side depth unless it is true product thickness in orthographic projection. "
)
if view == "bottom":
return (
"Patent-style orthographic bottom view: look straight up at the underside/bottom face, "
"with the viewing direction perpendicular to the bottom face. No perspective, no tilt, no 3/4 angle, "
"no low-angle perspective camera, no visible front/side depth unless it is true product thickness in orthographic projection. "
)
return ""
def _attach_temporal_metrics(items: list[dict]) -> None:
"""相邻低清帧差异:转场 / 动作目标依赖它,不需要逐帧高分辨率扫描。"""
for i, it in enumerate(items):
@@ -6910,11 +6926,13 @@ def _generate_subject_assets_sync(job_id: str, idx: int, element_id: str, req: G
else:
view_prompt = f"complete object/product reference, {view_label} view"
view_name = view.replace("_", " ")
projection_clause = _subject_view_projection_clause(view)
single_view_clause = (
f"Single-image output rule: this output file is ONLY for the {view_label} view ({view_name}). "
"Render exactly one subject, one time, in one pose and one camera angle. "
"Do not create a multi-view sheet, contact sheet, grid, storyboard, lineup, comparison layout, before/after layout, mirrored pair, duplicate subjects, thumbnails, labels, captions, arrows, view names, panel borders, or multiple versions in the same image. "
"Do not include any other views in this image. "
+ projection_clause
)
framing_clause = (
"For this close-up view, intentionally crop as an upper-body asset from head/neck to chest or upper back; the neck, shoulders, collarbone or upper spine area must be large, clear, and useful for placing a neck-and-shoulder massage device. "

View File

@@ -1181,6 +1181,19 @@ ProductRefStateItem {
<h2>变更记录</h2>
<p>这个记录不是 git log 的替代品。它记录“产品理解发生了什么变化、影响了哪些源码、你以后描述需求时该怎么说”。后续每次改功能都要补一条。</p>
<div class="changelog">
<article class="change">
<header>
<h3>2026-05-25 · 主体套图俯视和仰视改为正投影口径</h3>
<span class="tag amber">API</span>
<span class="tag violet">UI</span>
<span class="tag blue">Docs</span>
</header>
<div class="body">
<p><strong>问题:</strong>原先主体/产品套图里把 <code>top</code><code>bottom</code> 简写为“顶部/底部”或“俯视/仰视”,模型容易理解成斜上方、斜下方或 3/4 透视镜头,不符合外观专利六面正投影视图的要求。</p>
<p><strong>改动:</strong><code>api/main.py</code><code>SUBJECT_VIEW_LABELS</code>、默认物体 6 视图和主体生图 prompt 已改为“正投影俯视图 / 正投影仰视图”,并在生成单图时加入 <code>orthographic top/bottom view</code><code>no perspective</code><code>no tilt</code><code>no 3/4 angle</code> 约束。<code>web/components/lightbox.tsx</code><code>web/components/ad-recreation-board.tsx</code><code>web/components/storyboard-editor.tsx</code> 同步更新显示文案和后续分镜提示词。</p>
<p><strong>影响:</strong>以后描述专利六面图需求时应说“正投影俯视图 / 正投影仰视图”,不要只说“俯视角度 / 仰视角度”;这些视图是垂直投影,不是带角度的摄影机视角。</p>
</div>
</article>
<article class="change">
<header>
<h3>2026-05-24 · 默认首页完整重设计为多人创作平台</h3>

View File

@@ -2019,6 +2019,11 @@ function selectProductItemsForRow(
}
function subjectViewLabel(view: string) {
const labels: Record<string, string> = {
top: "正投影俯视图",
bottom: "正投影仰视图",
}
if (labels[view]) return labels[view]
return SUBJECT_ASSET_VIEWS.find((item) => item.value === view)?.label ?? view
}
@@ -2034,6 +2039,8 @@ function subjectViewRoleHint(view: string) {
bust_left_45: "肩颈左前近景、手部调整、佩戴贴合",
bust_right_45: "肩颈右前近景、手部调整、佩戴贴合",
back_neck_detail: "后颈肩背特写、触点位置、产品贴合",
top: "正投影俯视、无透视、无倾斜、产品顶部轮廓",
bottom: "正投影仰视、无透视、无倾斜、产品底部结构",
}
return hints[view] ?? "主体参考视角"
}
@@ -2050,6 +2057,8 @@ function subjectViewPromptHint(view: string) {
bust_left_45: "left three-quarter neck-and-shoulder close-up, hand adjustment, wearable fit",
bust_right_45: "right three-quarter neck-and-shoulder close-up, hand adjustment, wearable fit",
back_neck_detail: "back-neck and upper-back detail, contact-pad position, product fit",
top: "orthographic top view, straight down from above, no perspective, no tilt, no three-quarter angle",
bottom: "orthographic bottom view, straight up at underside, no perspective, no tilt, no three-quarter angle",
}
return hints[view] ?? "subject reference view"
}

View File

@@ -36,8 +36,8 @@ const OBJECT_VIEW_OPTIONS = [
["back", "背面"],
["left", "左侧"],
["right", "右侧"],
["top", "顶部"],
["bottom", "底部"],
["top", "正投影俯视图"],
["bottom", "正投影仰视图"],
]
const LIVING_VIEW_OPTIONS = [

View File

@@ -129,7 +129,7 @@ export function StoryboardEditor({ job, frameIndex, onClose }: Props) {
📐
</div>
<div className="text-[9.5px] text-white/45 leading-tight">
/ /
/ /
</div>
</button>
<button