From 15c6f4d2fca6d91e7de7ffe6276c581f59acc565 Mon Sep 17 00:00:00 2001 From: kang Date: Tue, 19 May 2026 20:39:15 +0800 Subject: [PATCH] feat: simplify subject reconstruction layer --- .project.json | 2 +- RULES.md | 2 +- api/main.py | 26 +- docs/source-analysis.html | 27 +- web/components/ad-recreation-board.tsx | 641 +++++++++++++++++-------- web/lib/api.ts | 8 +- 6 files changed, 479 insertions(+), 227 deletions(-) diff --git a/.project.json b/.project.json index d555af2..c2110ab 100644 --- a/.project.json +++ b/.project.json @@ -39,7 +39,7 @@ "type" : "web_login" } ], - "description" : "SKG 信息流广告快速复刻工作台:粘贴 TK 链接或上传视频后点击开始,系统自动下载源视频;下载完成后并行启动音频文案路和视频视觉路。音频路提取原文案\/字幕、中文翻译、讲话人、语速节奏、背景音乐\/环境声\/音效;视觉路自动抽 12 张动作\/节奏参考帧,供生成相似主体、产品素材池、分镜口播和首尾帧审核。当前主流程暂停直接提交视频模型,先保存规划和首尾帧。", + "description" : "SKG 信息流广告快速复刻工作台:粘贴 TK 链接或上传视频后点击开始,系统自动下载源视频;下载完成后并行启动音频文案路和视频视觉路。音频路提取原文案\/字幕、中文翻译、讲话人、语速节奏、背景音乐\/环境声\/音效;视觉路自动抽 12 张动作\/节奏参考帧,转换层按真人重构、卡通重构、元素重构、自主描述四个方向生成全新主体 6 视图,再汇合产品素材池、分镜口播和视频候选生成。", "kind" : "app", "name" : "SKG Marketing Studio \/ SKG 营销内容工作台", "ownership" : "company", diff --git a/RULES.md b/RULES.md index a15f729..f47f457 100644 --- a/RULES.md +++ b/RULES.md @@ -11,7 +11,7 @@ - 详见 `CLAUDE.md` 立项决策段 + `.memory/plan.md` 七步管线拆解 - 风格:`04-Dark-Gallery-Ambient`(路径:`~/Projects/research/20260305-网页风格库/04-Dark-Gallery-Ambient.md`) - 第一冲刺:步骤 1-4(下载 / 拆轨 / 关键帧 / ASR+翻译) -- 当前产品方向(2026-05-19 再确认):信息流广告快速复刻默认进入“三字段候选生成”工作流。主界面为“左侧素材输入列 + 右侧信息流复刻工作表”。用户粘贴 TK 链接或上传视频后点击“开始分析”,系统自动下载源视频;下载完成后并行启动两条路:音频文案路提取原音频文案/字幕,并分析讲话人、语速节奏、背景音乐/环境声/音效;视频视觉路自动抽取参考帧。源视频工作区右侧主体链路是“参考帧池 → 转换层 → 主体元素”:参考帧池竖向排列,用户拖 1-2 张关键帧到转换层,转换层按参考创新生成新的主体套图,主体元素区展示后续分镜可用的主体图;旧下方“相似主体 / 主体模板库”不再作为主路径。波形下方的画面胶片只是临时预览,点击只跳转原视频时间点,双击或拖进参考帧池才正式加入关键帧,已加入的胶片直接显示“已添加”。产品图上传后独立形成产品资产包,自动识别视角/结构/比例并补缺角度。分镜工作台按逐句时间轴默认只露“文案 / 场景一句话 / 人物+产品+动作”,产品素材池、批量控制、三字段、视频候选和高级区都必须可折叠;视频候选无内容时默认不占大面积,有候选时默认只显示迷你缩略条,展开后才显示 4-grid。单条默认生成 4 个视频候选,顶部支持整片批量生成候选;首尾帧、视觉规划、产品出现方式和旧 6 字段保留在“高级”抽屉与后端 quick-plan 自动展开中,不能再作为客户默认闸门。 +- 当前产品方向(2026-05-19 再确认):信息流广告快速复刻默认进入“三字段候选生成”工作流。主界面为“左侧素材输入列 + 右侧信息流复刻工作表”。用户粘贴 TK 链接或上传视频后点击“开始分析”,系统自动下载源视频;下载完成后并行启动两条路:音频文案路提取原音频文案/字幕,并分析讲话人、语速节奏、背景音乐/环境声/音效;视频视觉路自动抽取参考帧。源视频工作区右侧主体链路是“参考帧池 → 转换层 → 主体元素”:参考帧池竖向排列;转换层只保留真人重构、卡通重构、元素重构、自主描述四个入口,每个入口最多拖入 3 张参考帧,拖入后立即按该方向生成全新 6 视图主体,右侧主体元素区按重构类型分组展示;这四类都属于参考重构,不抠图、不复制原人、不复刻原画面。旧下方“相似主体 / 主体模板库”不再作为主路径。波形下方的画面胶片只是临时预览,点击只跳转原视频时间点,双击或拖进参考帧池才正式加入关键帧,已加入的胶片直接显示“已添加”。产品图上传后独立形成产品资产包,自动识别视角/结构/比例并补缺角度。分镜工作台按逐句时间轴默认只露“文案 / 场景一句话 / 人物+产品+动作”,产品素材池、批量控制、三字段、视频候选和高级区都必须可折叠;视频候选无内容时默认不占大面积,有候选时默认只显示迷你缩略条,展开后才显示 4-grid。单条默认生成 4 个视频候选,顶部支持整片批量生成候选;首尾帧、视觉规划、产品出现方式和旧 6 字段保留在“高级”抽屉与后端 quick-plan 自动展开中,不能再作为客户默认闸门。 ## 部署事实 - 平台:VPS `76.13.31.179`(Ubuntu 24.04 / Docker Compose / Coolify Traefik) diff --git a/api/main.py b/api/main.py index 7470301..f503338 100644 --- a/api/main.py +++ b/api/main.py @@ -532,7 +532,7 @@ class SubjectTemplateItem(BaseModel): source_job_id: str = "" source_frame_idx: int = -1 source_element_id: str = "" - subject_style: Literal["transparent_human", "source_actor"] = "transparent_human" + subject_style: Literal["transparent_human", "source_actor", "cartoon_subject"] = "transparent_human" primary_image: str = "" images: list[SubjectTemplateImage] = Field(default_factory=list) created_at: float = 0.0 @@ -599,7 +599,7 @@ class AssetLibraryItem(BaseModel): is_official: bool = False prompt_brief: str = "" prompt_brief_zh: str = "" - subject_style: Literal["transparent_human", "source_actor"] = "transparent_human" + subject_style: Literal["transparent_human", "source_actor", "cartoon_subject"] = "transparent_human" product_type: str = "" views: list[AssetLibraryImage] = Field(default_factory=list) images: list[AssetLibraryImage] = Field(default_factory=list) @@ -619,7 +619,7 @@ class AssetLibraryPatchReq(BaseModel): source_job_id: str | None = None prompt_brief: str | None = None prompt_brief_zh: str | None = None - subject_style: Literal["transparent_human", "source_actor"] | None = None + subject_style: Literal["transparent_human", "source_actor", "cartoon_subject"] | None = None product_type: str | None = None asset_role: str | None = None aspect_ratio: str | None = None @@ -4788,7 +4788,7 @@ class GenerateSubjectAssetsReq(BaseModel): views: list[str] | None = None character_id: str = "" subject_template_id: str = "" - subject_style: Literal["transparent_human", "source_actor"] = "transparent_human" + subject_style: Literal["transparent_human", "source_actor", "cartoon_subject"] = "transparent_human" reconstruction_mode: Literal["same", "similar"] = "same" subject_profile: SubjectProfilePreference | None = None prompt: str = "" @@ -5340,7 +5340,12 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat target = (el.name_en or el.name_zh).strip() bg_phrase = "pure white" if req.background == "white" else "pure black" similar_actor = req.subject_kind == "living" and req.subject_style == "source_actor" and req.reconstruction_mode == "similar" - kind_phrase = "human actor or living character" if req.subject_kind == "living" else "object or product-like subject" + cartoon_subject = req.subject_kind == "living" and req.subject_style == "cartoon_subject" + kind_phrase = ( + "original stylized cartoon or illustrative living character" + if cartoon_subject else + "human actor or living character" if req.subject_kind == "living" else "object or product-like subject" + ) transparent_character_clause = ( TRANSPARENT_HUMAN_POSITIVE_PROMPT + " The generated living character must be a friendly transparent humanoid with transparent or translucent outer body and clean white skeleton visible inside the same body. " @@ -5357,6 +5362,14 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat if similar_actor else "" ) + cartoon_style_clause = ( + "Generate an original stylized cartoon or illustrated advertising character, not a photoreal person and not a copied likeness. " + "Use the source brief only for broad role, pose logic, mood, body proportion category, neck-and-shoulder readability, and commercial energy. " + "Change the face, exact silhouette, clothing details, marks, logos, watermarks, captions, and any identifiable source-video features. " + "Keep one consistent cartoon design system, proportions, materials, color language, and character identity across all requested views. " + if cartoon_subject + else "" + ) identity_clause = ( "Create a similar but non-identical original subject: match the performance role, silhouette category, styling direction, camera-readability, and commercial mood, while changing exact identity and unique personal features. " if req.reconstruction_mode == "similar" @@ -5431,6 +5444,7 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat + prompt_extra_clause + subject_profile_clause + actor_style_clause + + cartoon_style_clause + framing_clause + f"Create a high-definition standalone asset on a solid {bg_phrase} background. " "No extra objects, no props, no additional products, no background elements, no original scene fragments, no shadows from the original scene, no text, no watermark, no UI. " @@ -6340,7 +6354,7 @@ class SaveSubjectTemplateReq(BaseModel): frame_idx: int element_id: str asset_ids: list[str] = Field(default_factory=list) - subject_style: Literal["transparent_human", "source_actor"] = "transparent_human" + subject_style: Literal["transparent_human", "source_actor", "cartoon_subject"] = "transparent_human" @app.get("/product-library/skg", response_model=list[ProductLibraryItem]) diff --git a/docs/source-analysis.html b/docs/source-analysis.html index 134db05..ab5090c 100644 --- a/docs/source-analysis.html +++ b/docs/source-analysis.html @@ -569,13 +569,13 @@

业务管线

-

当前产品方向已收窄为“信息流广告快速复刻”:主界面左侧是素材输入列,右侧是信息流复刻工作表。后台仍按 01-09 流程顺序计算素材任务、源视频、音频文案、抽帧、主体资产、产品资产、分镜文案、三字段规划和视频候选这些状态,但这些判断不再默认显现在工作区顶部,避免状态提示挤占首屏操作空间。用户粘贴 TK 链接或上传视频后点击“开始分析”,系统自动下载源视频;下载完成后并行启动音频文案路和视频视觉路。音频文案路提取原音频文案/字幕,分析讲话人、语速节奏、背景音乐/环境声/音效,并为后续新口播和分镜文案提供时间轴;视频视觉路同步抽取参考帧。源视频工作区右侧主体链路改为“参考帧池 → 转换层 → 主体元素”:参考帧池只作为竖向原始参考,拖入转换层的 1-2 张关键帧用于参考创新生成新的主体套图,主体元素区承接生成结果供后续分镜使用;旧下方主体模板库不再作为主路径。波形下方的画面胶片由前端临时从源视频截取,密度可调,点击只跳转原视频时间点,双击或拖入参考帧池才调用手动抽帧接口正式写入关键帧;已写入的胶片显示“已添加”,相同素材、相同密度和时长下会复用内存缓存,避免返回页面时重复扫视频。产品图上传后独立形成产品资产包:自动识别视角、左右/上下/内外侧、结构点、比例和风险,并补缺角度。最终分镜规划按逐句时间轴把文案、主体元素和产品资产汇合;每条分镜默认是左侧“文案 / 场景一句话 / 人物+产品+动作”三字段、右侧横向视频候选轨。客户可直接改中文镜像,前端会调用改写/翻译链路自动优化对应英文主值;单条和整片都可选择生成数量,整片按行排队提交。首尾帧、视觉规划、产品出现方式等细节保留在高级抽屉和后端自动展开逻辑里,不再作为客户默认闸门。

+

当前产品方向已收窄为“信息流广告快速复刻”:主界面左侧是素材输入列,右侧是信息流复刻工作表。后台仍按 01-09 流程顺序计算素材任务、源视频、音频文案、抽帧、主体资产、产品资产、分镜文案、三字段规划和视频候选这些状态,但这些判断不再默认显现在工作区顶部,避免状态提示挤占首屏操作空间。用户粘贴 TK 链接或上传视频后点击“开始分析”,系统自动下载源视频;下载完成后并行启动音频文案路和视频视觉路。音频文案路提取原音频文案/字幕,分析讲话人、语速节奏、背景音乐/环境声/音效,并为后续新口播和分镜文案提供时间轴;视频视觉路同步抽取参考帧。源视频工作区右侧主体链路是“参考帧池 → 转换层 → 主体元素”:参考帧池只作为竖向原始参考,转换层收敛为真人重构、卡通重构、元素重构、自主描述四个入口,每个入口最多接收 3 张参考帧,拖入后立即按该方向生成全新 6 视图主体;主体元素区按重构类型分组承接生成结果供后续分镜使用。四种入口都属于参考重构,不抠图、不复制原人、不复刻原画面;旧下方主体模板库不再作为主路径。波形下方的画面胶片由前端临时从源视频截取,密度可调,点击只跳转原视频时间点,双击或拖入参考帧池才调用手动抽帧接口正式写入关键帧;已写入的胶片显示“已添加”,相同素材、相同密度和时长下会复用内存缓存,避免返回页面时重复扫视频。产品图上传后独立形成产品资产包:自动识别视角、左右/上下/内外侧、结构点、比例和风险,并补缺角度。最终分镜规划按逐句时间轴把文案、主体元素和产品资产汇合;每条分镜默认是左侧“文案 / 场景一句话 / 人物+产品+动作”三字段、右侧横向视频候选轨。客户可直接改中文镜像,前端会调用改写/翻译链路自动优化对应英文主值;单条和整片都可选择生成数量,整片按行排队提交。首尾帧、视觉规划、产品出现方式等细节保留在高级抽屉和后端自动展开逻辑里,不再作为客户默认闸门。

01

素材输入

有当前素材任务即通过;输入框只负责创建或切换任务。

02

源视频下载

job.video_url 存在即通过;created/downloading 视为运行中。公开视频默认不带 cookies 下载;只有 TikTok 明确要求登录态时才配置 YTDLP_COOKIES_FILE,生产容器禁止使用 YTDLP_COOKIES_FROM_BROWSER=chrome

03

音频文案

audio_script.source_texttranscript 逐句时间轴有内容即通过。

04

抽帧参考

job.frames.length > 0 即通过;参考帧只做主体重构证据。

-
05

相似主体

关键帧里存在 subject_assets 即通过;生成类似创新主体,不复刻原人。

+
05

主体重构

关键帧里存在 subject_assets 即通过;真人、卡通、元素和自主描述都生成全新 6 视图主体,不复刻原人。

06

产品素材池

product_refs 有记录即通过;不限量上传,后续按分镜最多挑 6 张。

07

分镜文案

逐句时间轴生成后进入分镜;新口播可单段或整片改写。

08

三字段规划

客户默认只编辑文案、场景一句话、人物+产品+动作;高级抽屉保留首尾帧和 6 字段。

@@ -593,8 +593,8 @@ web/next.config.mjsNext.js 构建配置:静态导出、图片不走优化、禁用开发环境左下角 Next Dev Indicator,并移除 Next 16 已不支持的 eslint 顶层配置,避免本地 dev 出现配置 Issue 提示。 web/app/globals.css全局主题变量、登录页视觉样式、信息流工作台同源品牌 token、ReactFlow 样式引用,以及本地开发态 nextjs-portal 遮挡隐藏规则。工作台在 skg-board-theme 内定义 --skg-gold-1--skg-gold-2--skg-cream--skg-bg-*--skg-text-*--skg-radius-* 和按钮阴影等变量,并新增 skg-board-brandskg-stat-cardskg-primary-actionskg-secondary-actionskg-empty-state 等样式。暗色工作台复用登录页金色聚焦、米白主按钮和弱暖光氛围;明亮模式通过 skg-board-theme--light 复用同一套结构,改成暖白底、白色 panel、黑底主 CTA 和深色文本,不另起一套界面。 web/app/page.tsx产品工作台主状态:jobs、activeJobId、生成任务状态;主渲染为全屏素材输入列 + 信息流广告复刻工作表;“开始分析”会把 job 放入并行素材分析队列,下载完成后触发 triggerTranscribe 解析音频,并触发 analyzeJob 自动抽 12 张参考帧,形成“音频文案路 + 视频视觉路”同步推进;音频失败时会忽略失败状态下残留的半成品 transcript,允许再次触发音频解析;底部吸附音频条和旧全局浮动主题按钮不再从主界面渲染,避免和工作台内的明暗模式切换重复。 - web/components/ad-recreation-board.tsx信息流广告复刻工作表:顶部先展示与登录页连续的 SKG brand strip,包含 SKG 字标、“未来健康 · 营销内容工作台”和“营销内容工作台 · TK 二创”;右侧素材/任务/视频/文案统计改为米白 stat 卡片,主动作按钮统一走 skg-primary-action,次动作走 skg-secondary-action,空状态复用 AnimatedLoginCharactersbuildWorkflowSteps 仍统一生成 01-09 流程顺序、状态和判定依据,WorkflowStepBadge / PipelineLane / 分镜列标题也继续共用同一套编号;但完整 WorkflowOrderBar、右侧素材/视频/音频/文案/参考帧需求 chips、文案依据下拉和“音频文案、抽帧参考、相似主体、产品素材池”四个状态条不再默认渲染在工作区顶部。左侧素材输入只负责链接/上传和任务切换,不再重复放横版原视频预览;右侧源视频工作区直接进入核心操作。讲话人、节奏和背景音分析仍写入 AudioScript,但不再作为“音频解析结果”卡片默认渲染;主工作区左侧宽度调整为 430-460px,上方是按 9:16 显示的竖版原视频播放器,播放器内覆盖“当前点抽帧”,按当前播放秒数手动补参考帧,播放器下方是逐句时间轴,英文和中文都最多显示两行;右侧上方是无标题的波形与切点参考框,下方是参考帧池;下一行只保留“相似主体 / 主体模板”。音频波形用参考图式的连续灰色包络显示响度、停顿和密集爆点,顶部把低/中/高密度按钮和当前播放秒数、总时长、鼠标指针停点秒数直接放在波形上方。视频播放时通过 requestAnimationFrame 平滑驱动波形播放线,同时同步高亮并滚动当前句;点击音频波形或字幕行会跳转原视频时间。音频波形下方同框渲染无标题的 TimelineFilmstrip 临时画面胶片,前端按低/中/高密度从源视频 canvas 截取预览缩略图,并按 frame.time / duration 的百分比定位到和波形同一条时间轴上;波形与胶片之间不显示分隔横线,胶片轨道贴近波形,缩略图轻微上下错落并倾斜重叠排列,hover 时用同一张胶片卡在原位置生成固定顶层克隆,约 4.8 倍放大并自动限制在视口内,避免被工作区、滚动容器或相邻面板遮挡;单击胶片只跳转原视频时间,不写入任务数据,双击胶片或拖进参考帧池时才调用手动抽帧并正式加入 job.frames,已加入的胶片显示“已添加”;胶片预览按 job、视频、密度和时长缓存,未切换低/中/高时返回页面不重新扫视频。右侧参考帧池的主入口是“自动抽帧 12 张”,一键按动作峰值目标重新抽取 12 张源视频参考帧,优先抓手势、表情变化、节奏点和镜头变化;缩略图按竖版完整比例显示不裁切并用更多列紧凑铺开,点选状态直接叠在参考帧池缩略图上,鼠标停留会通过固定浮层放大展示完整帧。“生成 10 张高清图”放在下方相似主体白底视图区,不和抽参考按钮平齐;如果用户没有勾选帧,默认把全部关键帧作为主体参考,勾选后只传已选帧;生成区可在“透明骨架 / 普通真人”之间切换,可选择桌面导入的 5 套内置形象作为创意方向,并可填写统一主体方向,例如年轻女性、更运动、更高级。关键帧和相似主体白底视图都用更小的竖版缩略图密排;白底视图只展示每个 view 的最新一张,缩略图上提供“重新生成这一张”和“删除这一张”,单张重生会用 replace_views=true 替换同一视角。前端调用 generateSubjectAssets 时按主体类型传 subject_style=transparent_humansource_actor,按需传 character_id,并使用 reconstruction_mode=similar;后端会把关键帧和内置形象视为同一个主体的创意证据,并锁定同一性别表现、年龄段、体型、材质、风格和视觉身份,同时生成全身多视角 + 肩颈正/左右近景 + 后颈肩背特写,避免整套图出现男女性别、老少年龄或样式混杂。主体生成完成后会形成 subject_consensus_brief,主体模板保存区可预览/编辑这段 brief。音频结果下方是信息流复刻分镜工作台:顶部产品参考区是“同一产品素材池”,不限量上传产品图,不做不同产品身份判断;上传原图推荐长边 1200-2000px、短边至少 600px,但后端会统一生成最长边 1600px、JPEG 92 的 AI 工作副本,并回显尺寸、自动转换和风险标注;上传后按“套在脖子上的 U 形肩颈按摩仪”进行同一产品批量识别,左/右按佩戴者身体左右、上/下按佩戴方向,额外标注内外侧、开口方向、局部结构点、背景类型、用途标签、生成风险和备注,用户只检查备注,鼠标悬停通过固定浮层显示大图预览,能盖过滚动容器和分镜框架;缺视角补图失败时保留重试入口。脚本区在分镜行上方提供“作者想法”和“整片改写”,每行新口播文案可直接编辑并可单段 AI 改写,分镜时间和原内容列压缩为窄摘要列,新口播列进一步收窄,把横向空间留给画面规划和首尾帧。每条音频分镜纵向排列,行内从左到右串起原内容、新口播文案、画面规划/产品融入和历史候选视频槽;画面规划区先选择镜头类型(人物/情绪、人物+产品、产品特写、场景过渡),再用人物/产品开关、首帧规划、尾帧规划和产品出现方式决定这一条到底需不需要产品图或相似主体参考。当前主流程暂停直接调用视频模型,不再提供“生成本条 · Seedance”或“一键提交全部”视频入口;行内新增“首尾帧闸门”,分别显示/生成首帧和尾帧,旧 keyframe 类型首尾帧会被忽略,只认真正的 asset 首尾帧。生成首尾帧时调用 generateSceneAsset,主体只传 subject_brief,不再传主体图;产品按端点选择最多 1-2 张硬参考图,默认正面,侧面/后颈/厚度/特写等关键词会额外补一张对应视角。关键帧只作为前置主体重构证据和行数据承载位置,不再作为后续视频首尾帧参考。视频候选槽只展示历史候选和待生成占位,按钮改为“保存本条规划 / 保存全部规划”。只有该行勾选“产品”时,首尾帧生成才会从产品素材池按端点视角策略自动挑选最多 1-2 张相关产品图;未勾选产品时不会把产品图提交给首尾帧/后续生视频模型,并走纯文字首尾帧。只有该行勾选“人物”时,才会把主体 brief 注入 prompt;否则 prompt 会明确禁止强行添加主角式透明骨架人,后端也不会再给产品特写强加透明骨架人约束。ModelTrace 会在音频解析、产品识别/补图、相似主体高清视图包、脚本改写等入口旁直接展示模型名;所有生图入口都显示并使用 gpt-image-2,没有其他图片模型 fallback;点击后用固定浮层展示模型链路、输入输出和回退逻辑。旧分镜卡、抽帧控制和视频生成组件仍保留在文件里,但当前主路径不渲染。 - SourceSubjectPipeline源视频工作区右侧主体管线主路径:三栏分别是竖向 参考帧池转换层主体元素。参考帧池保留自动 12 张、胶片拖入正式成帧、点击勾选和删除;参考帧缩略图保持小尺寸固定宽度、aspect-[9/16]object-contain 显示,避免人物比例被裁切或拉伸,也避免缩略图撑满整列,hover 预览通过 MediaAssetTile 的左侧紧凑浮层显示,避免盖住转换层。参考帧本身可拖到转换层。转换层不是抠图区,而是参考创新生成入口:拖入 1-2 张关键帧后自动调用 generateSubjectAssets,也保留手动重生按钮;转换层参考图列表有滚动上限,拖入多张不会挤掉下方控制;可选择透明骨架/真人、完整 10 张或常用 4 张,并填写统一方向。主体元素区显示生成出的主体套图,缩略图复用 MediaAssetTile,支持 hover 放大、单张重生和删除。旧下方 SourceReferenceBuildPanel 不再主路径渲染。 + web/components/ad-recreation-board.tsx信息流广告复刻工作表:顶部先展示与登录页连续的 SKG brand strip,包含 SKG 字标、“未来健康 · 营销内容工作台”和“营销内容工作台 · TK 二创”;右侧素材/任务/视频/文案统计改为米白 stat 卡片,主动作按钮统一走 skg-primary-action,次动作走 skg-secondary-action,空状态复用 AnimatedLoginCharactersbuildWorkflowSteps 仍统一生成 01-09 流程顺序、状态和判定依据,WorkflowStepBadge / PipelineLane / 分镜列标题也继续共用同一套编号;但完整 WorkflowOrderBar、右侧素材/视频/音频/文案/参考帧需求 chips、文案依据下拉和“音频文案、抽帧参考、主体重构、产品素材池”四个状态条不再默认渲染在工作区顶部。左侧素材输入只负责链接/上传和任务切换,不再重复放横版原视频预览;右侧源视频工作区直接进入核心操作。讲话人、节奏和背景音分析仍写入 AudioScript,但不再作为“音频解析结果”卡片默认渲染;主工作区左侧宽度调整为 430-460px,上方是按 9:16 显示的竖版原视频播放器,播放器内覆盖“当前点抽帧”,按当前播放秒数手动补参考帧,播放器下方是逐句时间轴,英文和中文都最多显示两行;右侧上方是无标题的波形与切点参考框,下方是三栏主体管线。音频波形用参考图式的连续灰色包络显示响度、停顿和密集爆点,顶部把低/中/高密度按钮和当前播放秒数、总时长、鼠标指针停点秒数直接放在波形上方。视频播放时通过 requestAnimationFrame 平滑驱动波形播放线,同时同步高亮并滚动当前句;点击音频波形或字幕行会跳转原视频时间。音频波形下方同框渲染无标题的 TimelineFilmstrip 临时画面胶片,前端按低/中/高密度从源视频 canvas 截取预览缩略图,并按 frame.time / duration 的百分比定位到和波形同一条时间轴上;波形与胶片之间不显示分隔横线,胶片轨道贴近波形,缩略图轻微上下错落并倾斜重叠排列,hover 时用同一张胶片卡在原位置生成固定顶层克隆,约 4.8 倍放大并自动限制在视口内,避免被工作区、滚动容器或相邻面板遮挡;单击胶片只跳转原视频时间,不写入任务数据,双击胶片或拖进参考帧池时才调用手动抽帧并正式加入 job.frames,已加入的胶片显示“已添加”;胶片预览按 job、视频、密度和时长缓存,未切换低/中/高时返回页面不重新扫视频。右侧参考帧池的主入口是“自动抽帧 12 张”,一键按动作峰值目标重新抽取 12 张源视频参考帧,优先抓手势、表情变化、节奏点和镜头变化;缩略图按竖版完整比例显示不裁切,点选状态直接叠在参考帧池缩略图上,鼠标停留会通过固定浮层放大展示完整帧。转换层不再暴露“生成 10 张高清图”、透明骨架/真人或完整/常用视图开关,改成真人重构、卡通重构、元素重构、自主描述四个投放区;每区最多 3 张参考帧,拖入即自动调用 generateSubjectAssets 固定生成 6 视图,卡通重构可选择具体卡通风格,文字方向会进入 prompt。主体元素区按重构类型分组显示结果;只展示每个 view 的最新一张,缩略图上提供“重新生成这一张”和“删除这一张”,单张重生会用 replace_views=true 替换同一视角。前端当前对真人/元素/自主描述传 subject_style=source_actor,对卡通重构传 subject_style=cartoon_subject,并使用 reconstruction_mode=similar;后端会把关键帧反推成非身份化文字 brief,再走 gpt-image-2 文字生图,避免复制原人、原脸和原画面。主体生成完成后会形成 subject_consensus_brief。音频结果下方是信息流复刻分镜工作台:顶部产品参考区是“同一产品素材池”,不限量上传产品图,不做不同产品身份判断;上传原图推荐长边 1200-2000px、短边至少 600px,但后端会统一生成最长边 1600px、JPEG 92 的 AI 工作副本,并回显尺寸、自动转换和风险标注;上传后按“套在脖子上的 U 形肩颈按摩仪”进行同一产品批量识别,左/右按佩戴者身体左右、上/下按佩戴方向,额外标注内外侧、开口方向、局部结构点、背景类型、用途标签、生成风险和备注,用户只检查备注,鼠标悬停通过固定浮层显示大图预览,能盖过滚动容器和分镜框架;缺视角补图失败时保留重试入口。脚本区在分镜行上方提供“作者想法”和“整片改写”,每行新口播文案可直接编辑并可单段 AI 改写。每条音频分镜默认是左侧三字段、右侧横向视频候选轨;高级区仍保留首尾帧 prompt、产品出现方式和旧 6 字段。ModelTrace 会在音频解析、产品识别/补图、主体重构视图包、脚本改写等入口旁直接展示模型名;所有生图入口都显示并使用 gpt-image-2,没有其他图片模型 fallback;点击后用固定浮层展示模型链路、输入输出和回退逻辑。旧分镜卡、抽帧控制和视频生成组件仍保留在文件里,但当前主路径不渲染。 + SourceSubjectPipeline源视频工作区右侧主体管线主路径:三栏分别是竖向 参考帧池转换层主体元素。参考帧池保留自动 12 张、胶片拖入正式成帧、点击勾选和删除;参考帧缩略图保持小尺寸固定宽度、aspect-[9/16]object-contain 显示,hover 预览通过 MediaAssetTile 的左侧紧凑浮层显示。转换层取消旧的“透明骨架 / 真人”和“完整 10 / 常用 4”开关,改成真人重构、卡通重构、元素重构、自主描述四个投放区;每个区最多保留 3 张参考帧,拖入后立即调用 generateSubjectAssets 生成固定 6 视图,文字输入会参与 prompt,卡通重构额外提供 3D 动画、潮玩公仔、日系清爽、美式插画、黏土玩具、极简扁平等风格。四种模式都强调参考重构:不抠图区、不复制原人原脸、不复刻原画面。主体元素区按重构类型分组显示生成套图,缩略图复用 MediaAssetTile,支持 hover 放大、单张重生和删除。旧下方 SourceReferenceBuildPanel 不再主路径渲染。 AudioStoryboardPlanPanel 三字段候选生成当前分镜主路径:每行是左右双栏,左侧默认显示 skg_copy_*scene_one_line_*action_one_line_* 三组中英字段,右侧直接显示视频候选横向轨。用户改中文镜像后,字段失焦会通过 refineStoryboard 优化对应英文主值,失败时退回 translateText;英文仍是后续 prompt 主值。quickPlanStoryboard 把三字段和主体 brief 展开为完整 StoryboardScenegenerateStoryboardVideocount 可由单行数字控件选择,候选新生成后持续向右追加,不再用 4-grid 撑高每行。整片生成同样可选择每行数量,并以 concurrency=1 按行排队提交。产品素材池、批量控制、每行主体区和高级区都可折叠,高级抽屉仍展示旧 6 字段、首尾帧 prompt 和首尾帧资产槽,但客户默认不用先处理首尾帧。 web/components/resource-library/library-drawer.tsx全局资源中心浮窗:由工作台顶部“资源库”按钮打开,叠加在工作台上方但不阻塞主界面;尺寸、位置和当前 Tab 写入 localStorage["skg-resource-library-drawer"]。提示词 Tab 固定 5 列(场景描述、视频描述、主体描述、SKG 文案、产品角度),每列先显示 use_count 排名前 5 的“常用”,再按月份倒序分组;提示词节点常驻复制按钮,hover 可选英文/中文/双语复制,并调用 use 接口。素材 Tab 固定 4 列(主体、产品、场景、视频),节点不可拖动,按月份倒序硬编码排列;“应用到当前 job”只调用后端复制接口,得到普通 ImageRef(kind="asset") 后再写入产品素材池或复制 ID。浮窗顶部最近 24 小时横条混合显示提示词和素材;新建提示词、上传素材、删除前查引用、详情侧栏都在该组件内完成。 AdRecreationBoard 主题切换顶部指标区左侧有“明亮/暗色”按钮,使用 Sun / Moon 图标切换 skg-board-theme--light 类名,并把选择写入 localStorage["skg-board-theme"]。暗色仍是默认模式;明亮模式只改变工作台外观,不改变任务、素材、分镜、模型调用或接口数据。 @@ -639,7 +639,7 @@ web/app/page.tsx -> 信息流广告复刻工作表:web/components/ad-recreation-board.tsx -> 开始分析:创建/激活 job → 下载完成后并行触发视频视觉路 analyzeJob 与音频文案路 triggerTranscribe - -> 后台流程判定:01 素材输入 → 02 源视频下载 → 03 音频文案 → 04 抽帧参考 → 05 相似主体 → 06 产品素材池 → 07 分镜文案 → 08 三字段规划 → 09 视频候选;每步从 buildWorkflowSteps 取判定依据和状态,但默认不渲染完整状态条 + -> 后台流程判定:01 素材输入 → 02 源视频下载 → 03 音频文案 → 04 抽帧参考 → 05 主体重构 → 06 产品素材池 → 07 分镜文案 → 08 三字段规划 → 09 视频候选;每步从 buildWorkflowSteps 取判定依据和状态,但默认不渲染完整状态条 -> 左侧素材输入列 + 源视频工作区(竖版 9:16 原视频播放器放大并内置当前点抽帧,逐句时间轴移到原版视频下方,英文/中文最多两行显示;右侧上方连续响度波形显示当前/总时长/指针停点,波形下方是可调低/中/高密度的临时画面胶片,单击仅跳转、双击或拖入参考帧池才正式选帧,并复用同密度胶片缓存;右侧下方是三栏主体管线:参考帧池竖排、转换层参考创新生成套图、主体元素展示生成结果;旧相似主体 / 主体模板区不再主路径渲染;讲话人/节奏/背景音分析写入数据但不默认显示成卡片) -> 信息流复刻分镜工作台:06 同一产品素材池不限量上传 → 自动识别视角 / 背景 / 用途 / 风险 → 人工检查备注 → 07 逐句时间轴 / 原内容 / 新口播文案 → 08 紧凑三字段(文案、场景一句话、人物+产品+动作;可折叠)→ quick-plan 自动展开高级字段 → 单条生成 4 个视频候选 / 收起态迷你缩略条 / 展开态 4-grid / 追加生成 / 选中候选 → 09 整片一键后台批量提交 -> 底部音频条:不再渲染,音频结果集中到右侧工作表 @@ -799,7 +799,7 @@ SubjectAsset { id, name, description, note, prompt_brief, prompt_brief_zh, - subject_style: transparent_human | source_actor, + subject_style: transparent_human | source_actor | cartoon_subject, primary_image, images: SubjectTemplateImage[] } @@ -983,7 +983,7 @@ ProductRefStateItem { 应用清洗POST /cleanup/applyapplyCleanedFrame物理覆盖 frames/{idx}.jpg,并备份原图。 元素增改删POST/PATCH/DELETE /elementsaddElement/updateElement/deleteElement让用户修正 Vision 错误,避免候选结果锁死。 元素提取POST /elements/{element_id}/cutoutcutoutElement调用图像模型生成独立白底素材图,每次累积一张 cutout。 - 主体资产包POST /elements/{element_id}/subject-assets
DELETE /elements/{element_id}/subject-assets/{asset_id}generateSubjectAssets
deleteSubjectAsset根据转换层里的参考帧重新绘制一个统一主体资产包;前端把拖入转换层的 1-6 张关键帧作为 source_frame_indices,不再默认读取全部关键帧,也不在主路径使用内置形象或数据库主体模板。当前源视频工作区支持 subject_style=transparent_humansource_actor 两种相似主体。reconstruction_mode=similar 是创新路径:后端先用 VISION_MODEL 把关键帧反推成非身份化文字 brief,再调用 gpt-image-2/images/generations 文字生图,日志会显示 image_refs=0;这里是参考创新生成套图,不是抠图、复制或 image-edit 复刻。生成完成后,后端会把生成视图反推/写入 KeyElement.subject_consensus_brief,作为后续首尾帧的唯一主体身份文字依据。reconstruction_mode=same 仍保留旧 image-edit 路径,用于确实需要精确复刻且有授权的场景。生成视图可由前端传 views 控制:完整 10 张或常用 4 张;每个 view 单独调用一次生图,明确禁止多视图拼图、contact sheet、多主体、多面板、标签或对比排版。后端不再要求整包全成功才写入:单个视图失败时会保留已成功生成的主体图,返回“部分生成完成”,只有一张都没生成出来才返回错误。replace_views=true 时会替换同一视角旧图;删除接口会移除对应 subject asset 记录并删除本地 jpg 文件。 + 主体资产包POST /elements/{element_id}/subject-assets
DELETE /elements/{element_id}/subject-assets/{asset_id}generateSubjectAssets
deleteSubjectAsset根据转换层里的参考帧重新绘制一个统一主体资产包;前端按真人重构、卡通重构、元素重构、自主描述四个方向分别管理 source_frame_indices,每个方向最多 3 张参考帧,固定请求 frontthree_quarter_leftleftbackrightthree_quarter_right 六个视图,不再暴露完整 10 / 常用 4 选择。当前源视频工作区使用 subject_style=source_actor 承接真人、元素和自主描述,使用 subject_style=cartoon_subject 承接卡通重构;旧 transparent_human 仍为兼容类型但不是当前转换层默认入口。reconstruction_mode=similar 是创新路径:后端先用 VISION_MODEL 把关键帧反推成非身份化文字 brief,再调用 gpt-image-2/images/generations 文字生图,日志会显示 image_refs=0;这里是参考重构生成套图,不是抠图、复制或 image-edit 复刻。卡通重构在后端额外加入原创卡通/插画主体约束,明确不输出真实人物复制 likeness。生成完成后,后端会把生成视图反推/写入 KeyElement.subject_consensus_brief,作为后续首尾帧的唯一主体身份文字依据。reconstruction_mode=same 仍保留旧 image-edit 路径,用于确实需要精确复刻且有授权的场景。每个 view 单独调用一次生图,明确禁止多视图拼图、contact sheet、多主体、多面板、标签或对比排版。后端不再要求整包全成功才写入:单个视图失败时会保留已成功生成的主体图,返回“部分生成完成”,只有一张都没生成出来才返回错误。replace_views=true 时会替换同一视角旧图;删除接口会移除对应 subject asset 记录并删除本地 jpg 文件。 首尾帧资产POST /frames/{idx}/scene-assetgenerateSceneAsset同一接口兼容旧场景图和新首尾帧;当前信息流复刻流程传 asset_role=first_frame/last_framesubject_brief 和最多 1-2 张 product_images。首尾帧不再传主体图、不再把主体图和产品图拼成 contact sheet;主体只走文字 brief,允许新动作、新景别、新表情和新环境。若本条需要产品,后端只把产品参考图作为 gpt-image-2 image-edit 的硬视觉真源;若不需要产品,则走纯文字生图。关键帧只作为行数据承载位置。生成结果保存在 scene_assets,前端再写入 StoryboardScene.first_image/last_image。 产品图库GET /product-library/skglistProductLibrary读取内置 SKG 白底图库 manifest,返回产品标题、品类、尺寸、白底评分和预览图 URL。 产品图入库到 jobPOST /jobs/{id}/assetsPOST /jobs/{id}/assets/product-libraryuploadStoryboardAssetcopyProductLibraryAsset上传产品图或把内置产品图库条目复制为当前 job 的普通 asset。后端统一生成最长边 1600px、JPEG 92 的 AI 工作副本,透明底铺白,过大/过小图片会在 ImageRef.asset_meta 里返回转换动作和风险;黑底/白底背景本身不强行转换。注意该接口只写图片文件,产品素材池列表另由 PUT /jobs/{id}/product-refs 持久化。 @@ -1109,6 +1109,19 @@ ProductRefStateItem {

变更记录

这个记录不是 git log 的替代品。它记录“产品理解发生了什么变化、影响了哪些源码、你以后描述需求时该怎么说”。后续每次改功能都要补一条。

+
+
+

2026-05-19 · 转换层改为四类主体重构入口

+ UI + Workflow + API +
+
+

问题:旧转换层暴露“透明骨架 / 真人、完整 10 / 常用 4”等生成参数,和当前“参考创新生产套图、避免侵权”的目标不一致;用户更需要把少量参考帧拖到明确方向上,右侧马上看到生成结果。

+

改动:SourceSubjectPipeline 的转换层改成真人重构、卡通重构、元素重构、自主描述四个投放区;每区最多 3 张参考帧,拖入后自动生成固定 6 视图,文字描述会参与 prompt,卡通重构可选择风格。主体元素区按重构类型分组展示结果。web/lib/api.tsapi/main.py 扩展 subject_style=cartoon_subject,后端对卡通主体额外加入原创卡通/插画约束。

+

影响:后续描述这块应说“把参考帧拖到某个重构方向,生成全新主体 6 视图”,不要再说“抠图”“模仿”“透明骨架/真人开关”或“10 张/4 张选择”。

+
+

2026-05-19 · 胶片双击加帧和音频失败重试

diff --git a/web/components/ad-recreation-board.tsx b/web/components/ad-recreation-board.tsx index 4ab2287..8b36aba 100644 --- a/web/components/ad-recreation-board.tsx +++ b/web/components/ad-recreation-board.tsx @@ -196,10 +196,12 @@ type AudioStoryboardRow = { type ProductRefItem = ProductRefStateItem type SubjectPlanningRef = ImageRef & { view: string; roleHint: string; consensusBrief?: string } -type SubjectStyleMode = "transparent_human" | "source_actor" +type SubjectStyleMode = "transparent_human" | "source_actor" | "cartoon_subject" type SubjectMode = "template" | "source_similar" type SubjectViewMode = "all" | "common" | "custom" type SubjectPipelineViewMode = "all" | "common" +type SubjectReconstructionMode = "realistic" | "cartoon" | "elements" | "custom" +type CartoonReconstructionStyle = "3d_animation" | "designer_toy" | "japanese_clean" | "american_illustration" | "clay_toy" | "flat_minimal" type SubjectProfileMode = "random" | "manual" type SubjectProfileFieldKey = "gender" | "age" | "wardrobe" | "region_ethnicity" | "skin_tone" | "body" | "hair" | "mood" type SubjectProfileDraft = Record @@ -252,6 +254,54 @@ const SUBJECT_VIEW_ORDER = [ ] const COMMON_SUBJECT_VIEW_VALUES = ["front", "three_quarter_left", "three_quarter_right", "bust_front"] +const RECONSTRUCTION_SUBJECT_VIEW_VALUES = ["front", "three_quarter_left", "left", "back", "right", "three_quarter_right"] +const RECONSTRUCTION_FRAME_LIMIT = 3 +const EMPTY_RECONSTRUCTION_FRAME_MAP: Record = { + realistic: [], + cartoon: [], + elements: [], + custom: [], +} +const DEFAULT_RECONSTRUCTION_DIRECTIONS: Record = { + realistic: "", + cartoon: "", + elements: "", + custom: "", +} +const CARTOON_RECONSTRUCTION_STYLES: Array<{ value: CartoonReconstructionStyle; label: string; prompt: string }> = [ + { value: "3d_animation", label: "3D动画", prompt: "premium 3D animated character, clean commercial toy-like rendering, friendly wellness-ad appeal" }, + { value: "designer_toy", label: "潮玩公仔", prompt: "designer art toy character, collectible figurine proportions, polished playful commercial styling" }, + { value: "japanese_clean", label: "日系清爽", prompt: "clean Japanese animation-inspired character, gentle colors, fresh wellness lifestyle advertising feel" }, + { value: "american_illustration", label: "美式插画", prompt: "American editorial advertising illustration character, confident shapes, expressive but polished" }, + { value: "clay_toy", label: "黏土玩具", prompt: "soft clay toy character, tactile handmade material, charming rounded shapes, clean studio look" }, + { value: "flat_minimal", label: "极简扁平", prompt: "minimal flat vector-like character, simple geometric shapes, restrained premium health-tech palette" }, +] +const RECONSTRUCTION_MODES: Array<{ value: SubjectReconstructionMode; label: string; subtitle: string; placeholder: string }> = [ + { + value: "realistic", + label: "真人重构", + subtitle: "参考非身份化人物特点,生成全新真人 6 视图", + placeholder: "如:更年轻、亚洲女性、运动感、不要像原人", + }, + { + value: "cartoon", + label: "卡通重构", + subtitle: "选择风格,把参考转成全新卡通主体 6 视图", + placeholder: "如:更可爱、科技感强、保留肩颈线条", + }, + { + value: "elements", + label: "元素重构", + subtitle: "参考姿态、色块和镜头语言,生成差异化主体", + placeholder: "如:保留运动气质,去掉原服装和原脸", + }, + { + value: "custom", + label: "自主描述", + subtitle: "可不依赖参考帧,直接按描述生成主体 6 视图", + placeholder: "如:30岁亚洲女性,白色运动背心,高级健康科技广告质感", + }, +] const SUBJECT_ASSET_SIZE = "2048" as const @@ -905,12 +955,18 @@ function isSimilarActorElement(element: KeyElement) { const en = (element.name_en || "").toLowerCase() const combined = `${zh} ${en}`.toLowerCase() const zhSimilarSubject = zh.includes("相似") && (zh.includes("主体") || zh.includes("主角") || zh.includes("人物")) + const zhReconstructionSubject = zh.includes("重构") && (zh.includes("主体") || zh.includes("主角") || zh.includes("人物")) const enSimilarSubject = en.includes("similar") && (en.includes("subject") || en.includes("actor") || en.includes("humanoid") || en.includes("character")) + const enReconstructionSubject = en.includes("reconstruction") && (en.includes("subject") || en.includes("actor") || en.includes("character")) return ( zhSimilarSubject + || zhReconstructionSubject || enSimilarSubject + || enReconstructionSubject || combined.includes("相似主角") || combined.includes("相似主体") + || combined.includes("重构主体") + || combined.includes("reconstructed subject") || combined.includes("similar ad actor") || combined.includes("similar actor") || combined.includes("similar subject") @@ -941,6 +997,77 @@ function findSimilarActorSource(preferredFrames: KeyFrame[], allFrames: KeyFrame type SubjectTemplatePromptSource = { name: string; sourceLabel: string } | null +function reconstructionModeConfig(mode: SubjectReconstructionMode) { + return RECONSTRUCTION_MODES.find((item) => item.value === mode) ?? RECONSTRUCTION_MODES[0] +} + +function cartoonStyleConfig(style: CartoonReconstructionStyle) { + return CARTOON_RECONSTRUCTION_STYLES.find((item) => item.value === style) ?? CARTOON_RECONSTRUCTION_STYLES[0] +} + +function reconstructionModeFromElement(element: KeyElement): SubjectReconstructionMode | null { + const text = `${element.name_zh || ""} ${element.name_en || ""}`.toLowerCase() + if (text.includes("真人重构") || text.includes("realistic reconstruction")) return "realistic" + if (text.includes("卡通重构") || text.includes("cartoon reconstruction")) return "cartoon" + if (text.includes("元素重构") || text.includes("element reconstruction")) return "elements" + if (text.includes("自主描述") || text.includes("custom description")) return "custom" + return null +} + +function reconstructionElementName(mode: SubjectReconstructionMode) { + const config = reconstructionModeConfig(mode) + return { + zh: `${config.label}主体`, + en: `${mode} reconstruction subject`, + } +} + +function reconstructionSubjectStyle(mode: SubjectReconstructionMode): SubjectStyleMode { + return mode === "cartoon" ? "cartoon_subject" : "source_actor" +} + +function buildReconstructionDirection( + mode: SubjectReconstructionMode, + direction: string, + cartoonStyle: CartoonReconstructionStyle, +) { + const trimmed = direction.trim() + const style = cartoonStyleConfig(cartoonStyle) + const common = [ + "Legal-safe reference reconstruction: use selected reference frames only as non-identifying creative evidence.", + "Do not copy the original person, face, biometric identity, unique likeness, watermark, platform UI, captions, exact outfit, exact background, exact composition, or source pixels.", + `Generate exactly ${RECONSTRUCTION_SUBJECT_VIEW_VALUES.length} separate views of one newly designed subject.`, + "Keep the neck, collarbone, shoulders, upper back, and side neck clean and usable for SKG neck-and-shoulder product placement.", + ] + if (mode === "realistic") { + common.push( + "Direction mode: realistic human reconstruction.", + "Create a new believable commercial ad actor inspired by broad non-identifying traits from the references: role, body-proportion category, gesture vocabulary, wardrobe category, health-ad energy, and camera readability.", + "Change the exact identity and personal features clearly enough that this is a new actor, not the source person.", + ) + } else if (mode === "cartoon") { + common.push( + "Direction mode: cartoon reconstruction.", + `Cartoon style: ${style.label}; ${style.prompt}.`, + "Transform broad pose, emotion, body-readability, and ad energy into a fully original stylized character, not a realistic human and not a traced version of the source.", + ) + } else if (mode === "elements") { + common.push( + "Direction mode: element reconstruction.", + "Extract only abstract visual logic: pose grammar, silhouette category, color-block relationship, camera angle, motion feeling, and wellness-ad atmosphere.", + "Create a clearly different new subject with different identity, wardrobe details, face, styling, and visual design while keeping the useful advertising logic.", + ) + } else { + common.push( + "Direction mode: autonomous description.", + "Use the user's written description as the primary subject bible. Reference frames are optional secondary mood evidence only; if they conflict with the text, follow the text.", + "Create a fully original subject from the description without depending on source identity.", + ) + } + if (trimmed) common.push(`User written direction to understand and apply: ${trimmed}`) + return common.join(" ") +} + function buildSimilarSubjectPrompt( subjectStyle: SubjectStyleMode, direction: string, @@ -974,6 +1101,11 @@ function buildSimilarSubjectPrompt( "Keep transparent skin, visible spine, rib cage, pelvis, arm bones, leg bones, and a friendly non-horror wellness advertising look consistent in every view.", "Do not generate a normal opaque human, skeleton-only character, medical anatomy, organs, blood, gore, surgery, hospital, or horror imagery.", ) + } else if (subjectStyle === "cartoon_subject") { + base.push( + "The subject must be an original stylized cartoon or illustrative character, not a photorealistic person and not a transparent skeleton character.", + "Keep the same stylized character identity, proportions, palette, material language, and commercial wellness-ad personality consistent in every view.", + ) } else { base.push( "The subject must be a normal believable commercial ad actor, not a transparent or skeleton character.", @@ -1049,12 +1181,17 @@ function productModelTrace(models?: RuntimeModels): ModelTraceSpec { } function similarSubjectModelTrace(models: RuntimeModels | undefined, subjectStyle: SubjectStyleMode): ModelTraceSpec { + const typeLabel = subjectStyle === "transparent_human" + ? "透明/半透明皮肤包裹可见白色骨架" + : subjectStyle === "cartoon_subject" + ? "原创卡通/插画/潮玩主体" + : "普通商业广告真人" return { - title: subjectStyle === "transparent_human" ? "相似透明骨架主体" : "相似普通真人主体", + title: subjectStyle === "transparent_human" ? "相似透明骨架主体" : subjectStyle === "cartoon_subject" ? "卡通重构主体" : "相似普通真人主体", model: modelList([models?.vision, models?.subject_image]), chain: [ `视觉 brief:${modelValue(models?.vision)} 把关键帧/模板图转成非身份化文字 brief;失败时继续用用户方向和模板文字`, - `主体类型:${subjectStyle === "transparent_human" ? "透明/半透明皮肤包裹可见白色骨架" : "普通商业广告真人"}`, + `主体类型:${typeLabel}`, "主体设定:前端把随机组合或手动选择的性别、年龄、着装、地域人种、肤色、体型、发型和气质锁定为结构化 profile", `图像生成:${subjectImageModelChain(models)} 走 /images/generations 逐张文字生图;当前 similar 模式不上传原帧或模板图作为 image-edit 参考`, "身份锁定:整套图必须是同一个主体,性别表现、年龄段、体型、材质和风格保持一致", @@ -3012,50 +3149,77 @@ function SourceSubjectPipeline({ onDropFilmstripFrame?: (time: number) => void }) { const [referenceDropActive, setReferenceDropActive] = useState(false) - const [conversionDropActive, setConversionDropActive] = useState(false) - const [conversionFrameIndices, setConversionFrameIndices] = useState([]) - const [subjectStyle, setSubjectStyle] = useState("transparent_human") - const [subjectViewMode, setSubjectViewMode] = useState("all") - const [subjectDirection, setSubjectDirection] = useState("") - const [subjectBusyFor, setSubjectBusyFor] = useState<{ jobId: string; jobLabel: string; viewCount: number; sourceCount: number; profileLabel: string } | null>(null) + const [activeDropMode, setActiveDropMode] = useState(null) + const [conversionFrameIndicesByMode, setConversionFrameIndicesByMode] = useState>(() => ({ ...EMPTY_RECONSTRUCTION_FRAME_MAP })) + const [reconstructionDirections, setReconstructionDirections] = useState>(() => ({ ...DEFAULT_RECONSTRUCTION_DIRECTIONS })) + const [cartoonStyle, setCartoonStyle] = useState("3d_animation") + const [cartoonStyleOpen, setCartoonStyleOpen] = useState(false) + const [subjectBusyFor, setSubjectBusyFor] = useState<{ jobId: string; jobLabel: string; mode: SubjectReconstructionMode; viewCount: number; sourceCount: number; profileLabel: string } | null>(null) const [subjectAssetBusy, setSubjectAssetBusy] = useState(null) const [lastSubjectProfile, setLastSubjectProfile] = useState(null) const subjectBusy = !!subjectBusyFor - const selectedSubjectViews = subjectViewMode === "common" - ? COMMON_SUBJECT_VIEW_VALUES - : SUBJECT_ASSET_VIEWS.map((view) => view.value) - const conversionFrames = useMemo( - () => conversionFrameIndices - .map((index) => frames.find((frame) => frame.index === index)) - .filter((frame): frame is KeyFrame => !!frame), - [conversionFrameIndices, frames], - ) - const actorSource = useMemo( - () => findSimilarActorSource(conversionFrames.length ? conversionFrames : frames, frames), - [conversionFrames, frames], - ) - const visibleActorAssets = useMemo(() => { - const latestByView = new Map() - for (const asset of actorSource?.element.subject_assets ?? []) { - const current = latestByView.get(asset.view) - if (!current || (asset.created_at || 0) >= (current.created_at || 0)) latestByView.set(asset.view, asset) + const selectedSubjectViews = RECONSTRUCTION_SUBJECT_VIEW_VALUES + const conversionFramesByMode = useMemo(() => { + const next = {} as Record + for (const config of RECONSTRUCTION_MODES) { + next[config.value] = conversionFrameIndicesByMode[config.value] + .map((index) => frames.find((frame) => frame.index === index)) + .filter((frame): frame is KeyFrame => !!frame) } - return [...latestByView.values()].sort((a, b) => { - const ai = SUBJECT_VIEW_ORDER.indexOf(a.view) - const bi = SUBJECT_VIEW_ORDER.indexOf(b.view) + return next + }, [conversionFrameIndicesByMode, frames]) + const allConversionFrameIndices = useMemo( + () => new Set(Object.values(conversionFrameIndicesByMode).flat()), + [conversionFrameIndicesByMode], + ) + const actorSources = useMemo(() => { + const items: Array<{ frame: KeyFrame; element: KeyElement; mode: SubjectReconstructionMode }> = [] + for (const frame of frames) { + for (const element of frame.elements || []) { + const mode = reconstructionModeFromElement(element) ?? (isSimilarActorElement(element) ? "realistic" : null) + if (mode && element.subject_assets?.length) items.push({ frame, element, mode }) + } + } + return items + }, [frames]) + const visibleActorAssets = useMemo(() => { + const items: Array<{ frame: KeyFrame; element: KeyElement; mode: SubjectReconstructionMode; asset: SubjectAsset }> = [] + for (const source of actorSources) { + const latestByView = new Map() + for (const asset of source.element.subject_assets ?? []) { + const current = latestByView.get(asset.view) + if (!current || (asset.created_at || 0) >= (current.created_at || 0)) latestByView.set(asset.view, asset) + } + for (const asset of latestByView.values()) items.push({ ...source, asset }) + } + return items.sort((a, b) => { + const mi = RECONSTRUCTION_MODES.findIndex((item) => item.value === a.mode) + const mj = RECONSTRUCTION_MODES.findIndex((item) => item.value === b.mode) + if (mi !== mj) return mi - mj + const ai = SUBJECT_VIEW_ORDER.indexOf(a.asset.view) + const bi = SUBJECT_VIEW_ORDER.indexOf(b.asset.view) return (ai === -1 ? 99 : ai) - (bi === -1 ? 99 : bi) }) - }, [actorSource]) + }, [actorSources]) useEffect(() => { - setConversionFrameIndices([]) + setConversionFrameIndicesByMode({ ...EMPTY_RECONSTRUCTION_FRAME_MAP }) + setReconstructionDirections({ ...DEFAULT_RECONSTRUCTION_DIRECTIONS }) setLastSubjectProfile(null) setSubjectBusyFor(null) setSubjectAssetBusy(null) + setActiveDropMode(null) + setCartoonStyleOpen(false) }, [job.id]) useEffect(() => { - setConversionFrameIndices((current) => current.filter((index) => frames.some((frame) => frame.index === index))) + setConversionFrameIndicesByMode((current) => { + const next = {} as Record + for (const config of RECONSTRUCTION_MODES) { + next[config.value] = current[config.value].filter((index) => frames.some((frame) => frame.index === index)) + } + return next + }) }, [frames]) const buildSubjectProfileForRequest = () => { @@ -3064,7 +3228,7 @@ function SourceSubjectPipeline({ return resolved } - const generateSubjectPack = async (sourceIndices = conversionFrameIndices) => { + const generateSubjectPack = async (mode: SubjectReconstructionMode, sourceIndices = conversionFrameIndicesByMode[mode]) => { if (subjectBusyFor) { toast.warning("主体套图正在生成中,完成后再重生。") return @@ -3072,34 +3236,44 @@ function SourceSubjectPipeline({ const sourceFrames = sourceIndices .map((index) => frames.find((frame) => frame.index === index)) .filter((frame): frame is KeyFrame => !!frame) - if (!sourceFrames.length) { - toast.warning("先把参考帧拖到转换层。") + if (!sourceFrames.length && mode !== "custom") { + toast.warning(`先把参考帧拖到${reconstructionModeConfig(mode).label}。`) + return + } + const baseFrame = sourceFrames[0] ?? frames[0] + if (!baseFrame) { + toast.warning("先完成抽帧,或从胶片加入至少一张参考帧。") return } - const baseFrame = sourceFrames[0] const requestJobId = job.id - const requestProfile = buildSubjectProfileForRequest() + const requestProfile = mode === "custom" && reconstructionDirections.custom.trim() + ? null + : buildSubjectProfileForRequest() + const subjectStyle = reconstructionSubjectStyle(mode) + const userDirection = buildReconstructionDirection(mode, reconstructionDirections[mode], cartoonStyle) + const modeName = reconstructionElementName(mode) setSubjectBusyFor({ jobId: requestJobId, jobLabel: shortId(requestJobId), + mode, viewCount: selectedSubjectViews.length, sourceCount: sourceFrames.length, - profileLabel: requestProfile.summary, + profileLabel: requestProfile?.summary ?? "按自主描述", }) try { let workingJob = job let workingFrame = workingJob.frames.find((frame) => frame.index === baseFrame.index) ?? baseFrame - let element = workingFrame.elements?.find(isSimilarActorElement) + let element = workingFrame.elements?.find((item) => reconstructionModeFromElement(item) === mode) if (!element) { workingJob = await addElement(requestJobId, baseFrame.index, { - name_zh: subjectStyle === "transparent_human" ? "参考创新透明骨架主体" : "参考创新广告主角", - name_en: subjectStyle === "transparent_human" ? "reference inspired transparent skeleton humanoid subject" : "reference inspired ad actor", - position: "generated from conversion layer reference frames", + name_zh: modeName.zh, + name_en: modeName.en, + position: `${reconstructionModeConfig(mode).label} · generated from conversion layer references`, source: "manual", }) onJobUpdate(workingJob) workingFrame = workingJob.frames.find((frame) => frame.index === baseFrame.index) ?? workingFrame - element = workingFrame.elements?.find(isSimilarActorElement) + element = workingFrame.elements?.find((item) => reconstructionModeFromElement(item) === mode) ?? workingFrame.elements?.[workingFrame.elements.length - 1] } if (!element) throw new Error("subject element missing") @@ -3110,64 +3284,81 @@ function SourceSubjectPipeline({ reconstruction_mode: "similar", background: "white", size: SUBJECT_ASSET_SIZE, - source_frame_indices: sourceFrames.slice(0, 8).map((frame) => frame.index), + source_frame_indices: sourceFrames.slice(0, RECONSTRUCTION_FRAME_LIMIT).map((frame) => frame.index), views: selectedSubjectViews, - subject_profile: requestProfile.payload, - prompt: buildSimilarSubjectPrompt(subjectStyle, subjectDirection, null, requestProfile), + subject_profile: requestProfile?.payload ?? null, + prompt: buildSimilarSubjectPrompt(subjectStyle, userDirection, null, requestProfile), replace_views: true, }) onJobUpdate(updated) - toast.success(`主体套图已生成:${selectedSubjectViews.length} 张`) + toast.success(`${reconstructionModeConfig(mode).label}已生成:${selectedSubjectViews.length} 张`) } catch (e) { try { onJobUpdate(await getJob(requestJobId)) } catch { /* keep original error visible */ } - toast.error("主体套图生成失败:" + (e instanceof Error ? e.message : String(e))) + toast.error(`${reconstructionModeConfig(mode).label}生成失败:` + (e instanceof Error ? e.message : String(e))) } finally { setSubjectBusyFor(null) } } - const addConversionFrame = (frame: KeyFrame) => { - const existed = conversionFrameIndices.includes(frame.index) + const addConversionFrame = (mode: SubjectReconstructionMode, frame: KeyFrame) => { + const current = conversionFrameIndicesByMode[mode] + const existed = current.includes(frame.index) const next = existed - ? conversionFrameIndices - : [...conversionFrameIndices, frame.index].slice(0, 6) - setConversionFrameIndices(next) + ? current + : current.length >= RECONSTRUCTION_FRAME_LIMIT + ? [...current.slice(1), frame.index] + : [...current, frame.index] + setConversionFrameIndicesByMode((state) => ({ ...state, [mode]: next })) if (existed) { - toast.info("这张参考帧已经在转换层。") + toast.info(`这张参考帧已经在${reconstructionModeConfig(mode).label}里。`) return } - toast.info(`已加入转换层:${frame.timestamp.toFixed(1)}s,开始生成主体套图。`) - void generateSubjectPack(next) + if (current.length >= RECONSTRUCTION_FRAME_LIMIT) { + toast.warning(`${reconstructionModeConfig(mode).label}最多保留 ${RECONSTRUCTION_FRAME_LIMIT} 张参考帧,已替换为最近拖入的组合。`) + } + toast.info(`已加入${reconstructionModeConfig(mode).label}:${frame.timestamp.toFixed(1)}s,开始生成 6 视图。`) + void generateSubjectPack(mode, next) } - const removeConversionFrame = (frameIndex: number) => { - setConversionFrameIndices((current) => current.filter((index) => index !== frameIndex)) + const removeConversionFrame = (mode: SubjectReconstructionMode, frameIndex: number) => { + setConversionFrameIndicesByMode((state) => ({ + ...state, + [mode]: state[mode].filter((index) => index !== frameIndex), + })) } - const regenerateSubjectAsset = async (asset: SubjectAsset) => { - if (!actorSource) return + const regenerateSubjectAsset = async (item: { frame: KeyFrame; element: KeyElement; mode: SubjectReconstructionMode; asset: SubjectAsset }) => { + const { frame, element, mode, asset } = item const sourceIndices = asset.source_frame_indices?.length ? asset.source_frame_indices - : conversionFrames.map((frame) => frame.index) - if (!sourceIndices.length) { + : conversionFrameIndicesByMode[mode] + if (!sourceIndices.length && mode !== "custom") { toast.warning("转换层没有参考帧,不能重生。") return } setSubjectAssetBusy(`regen:${asset.id}`) try { - const requestProfile = lastSubjectProfile ?? buildSubjectProfileForRequest() - const updated = await generateSubjectAssets(job.id, actorSource.frame.index, actorSource.element.id, { + const requestProfile = mode === "custom" && reconstructionDirections.custom.trim() + ? null + : lastSubjectProfile ?? buildSubjectProfileForRequest() + const subjectStyle = reconstructionSubjectStyle(mode) + const updated = await generateSubjectAssets(job.id, frame.index, element.id, { subject_kind: "living", subject_style: subjectStyle, reconstruction_mode: "similar", background: asset.background || "white", size: SUBJECT_ASSET_SIZE, - source_frame_indices: sourceIndices, + source_frame_indices: sourceIndices.slice(0, RECONSTRUCTION_FRAME_LIMIT), views: [asset.view], - subject_profile: requestProfile.payload, - prompt: buildSimilarSubjectPrompt(subjectStyle, subjectDirection, null, requestProfile), + subject_profile: requestProfile?.payload ?? null, + prompt: buildSimilarSubjectPrompt( + subjectStyle, + buildReconstructionDirection(mode, reconstructionDirections[mode], cartoonStyle), + null, + requestProfile, + ), replace_views: true, }) onJobUpdate(updated) @@ -3179,11 +3370,11 @@ function SourceSubjectPipeline({ } } - const deleteActorAsset = async (asset: SubjectAsset) => { - if (!actorSource) return + const deleteActorAsset = async (item: { frame: KeyFrame; element: KeyElement; asset: SubjectAsset }) => { + const { frame, element, asset } = item setSubjectAssetBusy(`delete:${asset.id}`) try { - const updated = await deleteSubjectAsset(job.id, actorSource.frame.index, actorSource.element.id, asset.id) + const updated = await deleteSubjectAsset(job.id, frame.index, element.id, asset.id) onJobUpdate(updated) toast.success("主体元素已删除") } catch (e) { @@ -3269,11 +3460,11 @@ function SourceSubjectPipeline({ previewPlacement="left" previewMaxWidth={320} previewClassName="p-2" - selected={selected || conversionFrameIndices.includes(frame.index)} + selected={selected || allConversionFrameIndices.has(frame.index)} title={`${selected ? "已选 · 点击取消" : "点击选择"} · 拖到转换层生成主体套图`} onClick={() => onToggleFrame(frame.index)} topLeft={{String(index + 1).padStart(2, "0")}} - topRight={{conversionFrameIndices.includes(frame.index) ? : selected ? : }} + topRight={{allConversionFrameIndices.has(frame.index) ? : selected ? : }} onDelete={onDeleteFrame ? () => onDeleteFrame(frame.index) : undefined} deleting={deletingFrame === frame.index} deleteLabel={`删除参考帧 ${index + 1}`} @@ -3293,119 +3484,138 @@ function SourceSubjectPipeline({
} title="转换层" /> - +
-
{ - if (!Array.from(event.dataTransfer.types).includes(SOURCE_KEYFRAME_DRAG_TYPE)) return - event.preventDefault() - setConversionDropActive(true) - }} - onDragOver={(event) => { - if (!Array.from(event.dataTransfer.types).includes(SOURCE_KEYFRAME_DRAG_TYPE)) return - event.preventDefault() - event.dataTransfer.dropEffect = "copy" - }} - onDragLeave={(event) => { - const next = event.relatedTarget as Node | null - if (next && event.currentTarget.contains(next)) return - setConversionDropActive(false) - }} - onDrop={(event) => { - event.preventDefault() - setConversionDropActive(false) - const frameIndex = Number(event.dataTransfer.getData(SOURCE_KEYFRAME_DRAG_TYPE)) - const frame = frames.find((item) => item.index === frameIndex) - if (frame) addConversionFrame(frame) - }} - > +
- 拖入 1-2 张参考帧后自动生成主体套图;这里做参考创新,不抠原图。 + 拖入 1-3 张参考帧到对应方向;系统只做参考重构,不复制原人、原脸或原画面。
-
- {conversionFrames.map((frame, index) => ( -
- {String(index + 1).padStart(2, "0")}} - /> - -
- ))} - {!conversionFrames.length ? ( -
- 把左侧参考帧拖到这里。 -
- ) : null} -
-
-
- {[ - { value: "transparent_human" as const, label: "透明骨架" }, - { value: "source_actor" as const, label: "真人" }, - ].map((item) => ( - - ))} -
-
- {[ - { value: "all" as const, label: `完整 ${SUBJECT_ASSET_VIEWS.length}` }, - { value: "common" as const, label: `常用 ${COMMON_SUBJECT_VIEW_VALUES.length}` }, - ].map((item) => ( - - ))} -
- setSubjectDirection(event.target.value)} - placeholder="统一方向:更年轻 / 更高级 / 运动感" - className="h-9 w-full rounded-md border border-white/10 bg-black/35 px-2.5 text-[11px] text-white outline-none placeholder:text-white/28 focus:border-cyan-300/50" - /> - +
+
+
{modeConfig.label}
+
{modeConfig.subtitle}
+
+ + {modeFrames.length}/{RECONSTRUCTION_FRAME_LIMIT} + +
+
+ {modeFrames.map((frame, index) => ( +
+ {String(index + 1).padStart(2, "0")}} + /> + +
+ ))} + {!modeFrames.length ? ( +
+ {mode === "custom" ? "可只写描述,也可拖入参考。" : "把参考帧拖到这里。"} +
+ ) : null} +
+ {mode === "cartoon" ? ( +
+ + {cartoonStyleOpen ? ( +
+ {CARTOON_RECONSTRUCTION_STYLES.map((style) => ( + + ))} +
+ ) : null} +
+ ) : null} +