From 9790e5bedb08f6ece6f48db97f4953e3535b90ce Mon Sep 17 00:00:00 2001 From: kang Date: Mon, 18 May 2026 07:27:45 +0800 Subject: [PATCH] auto-save 2026-05-18 07:27 (~6) --- .memory/worklog.json | 26 ++-- RULES.md | 2 +- api/main.py | 67 +++++++--- docs/source-analysis.html | 4 +- web/components/ad-recreation-board.tsx | 172 +++++++++++++++++++++---- web/lib/api.ts | 2 + 6 files changed, 213 insertions(+), 60 deletions(-) diff --git a/.memory/worklog.json b/.memory/worklog.json index 55cf794..167d26d 100644 --- a/.memory/worklog.json +++ b/.memory/worklog.json @@ -1,18 +1,5 @@ { "entries": [ - { - "files_changed": 6, - "hash": "6c9806c", - "message": "auto-save 2026-05-15 17:11 (~6)", - "ts": "2026-05-15T17:11:52+08:00", - "type": "commit" - }, - { - "files_changed": 2, - "message": "Codex 会话活跃 · 最近命令:codex · 2 项未提交变更 · 最近提交:auto-save 2026-05-15 17:11 (~6)", - "ts": "2026-05-15T09:14:48Z", - "type": "session-heartbeat" - }, { "files_changed": 2, "hash": "f590d51", @@ -3254,6 +3241,19 @@ "type": "session-heartbeat", "message": "Codex 会话活跃 · 最近命令:codex · 分支 main · 1 项未提交变更 · 最近提交:fix: send product angle refs as image inputs", "files_changed": 1 + }, + { + "ts": "2026-05-18T07:22:23+08:00", + "type": "commit", + "message": "auto-save 2026-05-18 07:22 (~2)", + "hash": "4653108", + "files_changed": 2 + }, + { + "ts": "2026-05-17T23:23:45Z", + "type": "session-heartbeat", + "message": "Codex 会话活跃 · 最近命令:codex · 分支 main · 4 项未提交变更 · 最近提交:auto-save 2026-05-18 07:22 (~2)", + "files_changed": 4 } ] } diff --git a/RULES.md b/RULES.md index cb81ee3..92addc4 100644 --- a/RULES.md +++ b/RULES.md @@ -11,7 +11,7 @@ - 详见 `CLAUDE.md` 立项决策段 + `.memory/plan.md` 七步管线拆解 - 风格:`04-Dark-Gallery-Ambient`(路径:`~/Projects/research/20260305-网页风格库/04-Dark-Gallery-Ambient.md`) - 第一冲刺:步骤 1-4(下载 / 拆轨 / 关键帧 / ASR+翻译) -- 当前产品方向(2026-05-17 再确认):先解决信息流广告快速复刻的第一步,不再沿用“开始后自动抽帧、分镜、元素生成、合成”的默认做法。主界面为“左侧素材输入列 + 右侧音频解析工作表”。用户粘贴 TK 链接或上传视频后点击“开始”,系统自动下载源视频;下载完成后优先提取原音频文案/字幕,并分析讲话人、语速节奏、背景音乐/环境声/音效。抽帧、分镜规划、产品融入、元素 6 视图和视频合成暂作为后续能力保留,不在当前第一步自动触发。 +- 当前产品方向(2026-05-17 再确认):先解决信息流广告快速复刻的第一步,不再沿用“开始后自动抽帧、分镜、元素生成、合成”的默认做法。主界面为“左侧素材输入列 + 右侧音频解析工作表”。用户粘贴 TK 链接或上传视频后点击“开始”,系统自动下载源视频;下载完成后优先提取原音频文案/字幕,并分析讲话人、语速节奏、背景音乐/环境声/音效。抽帧、分镜规划、产品融入、相似主体高清视图包(最多 10 张,含肩颈/后背特写)和视频合成暂作为后续能力保留,不在当前第一步自动触发。 ## 部署事实 - 平台:VPS `76.13.31.179`(Ubuntu 24.04 / Docker Compose / Coolify Traefik) diff --git a/api/main.py b/api/main.py index d34e336..d19efa4 100644 --- a/api/main.py +++ b/api/main.py @@ -4056,7 +4056,7 @@ def cutout_element(job_id: str, idx: int, element_id: str) -> Job: @app.post("/jobs/{job_id}/frames/{idx}/elements/{element_id}/subject-assets", response_model=Job) def generate_subject_assets(job_id: str, idx: int, element_id: str, req: GenerateSubjectAssetsReq) -> Job: """为一个主体生成多视角资产包。 - 如果传入 source_frame_indices,则把多张已选关键帧拼成参考板,表示这些帧都在服务同一个主体。""" + 如果传入 source_frame_indices 或内置 character_id,则把多张参考图作为独立 image[] 证据提交。""" import time as _time job = JOBS.get(job_id) if not job: @@ -4071,13 +4071,30 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat source_indices = [idx] + source_indices source_indices = list(dict.fromkeys(source_indices))[:12] + character_reference_paths: list[Path] = [] + character_reference_clause = "" + character_label = "" + character_id = (req.character_id or "").strip() + if character_id: + character = find_character_library_item(character_id) + character_label = character.name + for image in character.images[:7]: + character_reference_paths.append(character_library_file(image.filename)) + character_reference_clause = ( + f"Selected built-in creative character reference: {character.name}. " + "Use these planned character images as a high-quality creative direction and anatomy/style bible only; " + "do not copy the exact face, exact pose, exact silhouette, pixels, or make a duplicate. " + "Create a new innovative variation that keeps the same broad role, transparent wellness character language, " + "camera readability, and shoulder/neck product compatibility. " + ) + model_src, tmp_focus = _focus_source_for_element(job_id, idx, el) - sheet_tmp: Path | None = None - if len(source_indices) > 1: - sheet_tmp = job_dir(job_id) / "tmp" / f"subject_refs_{idx:03d}_{element_id}_{uuid.uuid4().hex[:6]}.jpg" - sheet = _make_reference_contact_sheet(job_id, source_indices, sheet_tmp, max_items=12) - if sheet: - model_src = sheet + frame_reference_paths = [p for p in (_source_frame_path(job_id, i) for i in source_indices) if p.exists()] + if character_reference_paths: + remaining = max(0, 10 - len(character_reference_paths)) + model_src = character_reference_paths + frame_reference_paths[:remaining] + elif len(frame_reference_paths) > 1: + model_src = frame_reference_paths[:10] try: with Image.open(_source_frame_path(job_id, idx)) as src_im: @@ -4118,18 +4135,27 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat prompt_extra = req.prompt.strip() prompt_extra_clause = f"User direction: {prompt_extra[:1200]} " if prompt_extra else "" identity_lock_clause = ( - "Identity lock: these API calls generate a six-view pack for ONE single subject, but each individual output file must show only its one requested view. " + "Identity lock: these API calls generate one high-definition multi-view pack for ONE single subject, but each individual output file must show only its one requested view. " "Before rendering, infer one consistent character bible from the reference image(s): gender presentation, age range, body proportions, head shape, face direction cues, material, silhouette, wardrobe/material style, and commercial mood. " "Keep that same character bible unchanged across every generated view in separate files. " - "If user direction requests a gender, age, or style change, apply that one change uniformly to all views; never mix male/female, young/old, or multiple style identities inside the same six-view pack. " + "If user direction requests a gender, age, or style change, apply that one change uniformly to all views; never mix male/female, young/old, or multiple style identities inside the same pack. " "For transparent humanoids, keep the same transparent skin shell, skeleton proportions, visible spine/rib cage/pelvis/limb bones, and non-horror wellness character style in every view. " ) + neck_product_clause = ( + "This subject pack is for SKG neck-and-shoulder wearable massage device videos. " + "Make the neck, collarbone, shoulder line, upper back, side neck, and shoulder slope clear and product-ready. " + "Avoid bulky collars, scarves, hair, hoods, props, or poses that hide the neck/shoulder placement area. " + "For back and close-up views, prioritize the cervical spine, shoulder blades, upper trapezius, and clean wearable-device contact area. " + ) models = [GPT_IMAGE_MODEL] generated: list[SubjectAsset] = [] try: for view, view_label in _subject_view_labels(req.subject_kind, req.views): + closeup_view = view in {"bust", "back_detail", "bust_front", "bust_left_45", "bust_right_45", "back_neck_detail"} or "detail" in view if req.subject_kind == "living": - if view.startswith("expression_"): + if closeup_view: + view_prompt = f"upper-body shoulder-and-neck close-up character reference, {view_label}" + elif view.startswith("expression_"): emotion = view_label.replace("表情", "") view_prompt = f"full-body upright standing character reference with a clear {emotion} facial expression" elif view.startswith("action_") or view == "side_walk": @@ -4142,8 +4168,14 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat single_view_clause = ( f"Single-image output rule: this output file is ONLY for the {view_label} view ({view_name}). " "Render exactly one subject, one time, in one pose and one camera angle. " - "Do not create a six-view sheet, contact sheet, grid, storyboard, lineup, comparison layout, before/after layout, mirrored pair, duplicate subjects, thumbnails, labels, captions, arrows, view names, panel borders, or multiple versions in the same image. " - "Do not include any of the other five views in this image. " + "Do not create a multi-view sheet, contact sheet, grid, storyboard, lineup, comparison layout, before/after layout, mirrored pair, duplicate subjects, thumbnails, labels, captions, arrows, view names, panel borders, or multiple versions in the same image. " + "Do not include any other views in this image. " + ) + framing_clause = ( + "For this close-up view, intentionally crop as an upper-body asset from head/neck to chest or upper back; the neck, shoulders, collarbone or upper spine area must be large, clear, and useful for placing a neck-and-shoulder massage device. " + "Do not force full-body framing for close-ups. " + if closeup_view and req.subject_kind == "living" + else "The subject must be complete, centered, full body or full object, head-to-feet visible when applicable, not cropped by the canvas. Make the subject large and readable: it should occupy about 85-95% of the image height with only small margins. " ) prompt = ( f"Use the reference image(s) only as visual evidence; do not crop, cut out, paste, trace, or extract pixels from the source. " @@ -4152,15 +4184,16 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat + single_view_clause + identity_clause + identity_lock_clause + + character_reference_clause + + neck_product_clause + canvas_clause + prompt_extra_clause + actor_style_clause - + "The subject must be complete, centered, full body or full object, head-to-feet visible when applicable, not cropped by the canvas. " - "Make the subject large and readable: it should occupy about 85-95% of the image height with only small margins. " + + framing_clause f"Create a high-definition standalone asset on a solid {bg_phrase} background. " "No extra objects, no props, no additional products, no background elements, no original scene fragments, no shadows from the original scene, no text, no watermark, no UI. " "If the source is incomplete, partially visible, occluded, or low resolution, reconstruct the missing parts by redrawing a clean complete subject while staying consistent with the reference. " - "For living subjects, keep a normal upright standing pose for the standard views; do not create sitting, walking, medical, horror, or distorted anatomy unless explicitly requested by the view label. " + "For living standard full-body views, keep a normal upright standing pose; do not create sitting, walking, medical, horror, or distorted anatomy unless explicitly requested by the view label. " + transparent_character_clause ) try: @@ -4174,7 +4207,7 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat generated.append(SubjectAsset( id=asset_id, view=view, - label=f"{el.name_zh} · {view_label}", + label=f"{el.name_zh} · {view_label}" + (f" · {character_label}" if character_label else ""), url=_asset_url(job_id, asset_id), width=width, height=height, @@ -4185,7 +4218,7 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat created_at=_time.time(), )) finally: - for p in (tmp_focus, sheet_tmp): + for p in (tmp_focus,): if p and p.exists(): try: p.unlink() except OSError: pass diff --git a/docs/source-analysis.html b/docs/source-analysis.html index 4bf944b..69d0dcc 100644 --- a/docs/source-analysis.html +++ b/docs/source-analysis.html @@ -569,7 +569,7 @@

业务管线

-

当前产品方向已收窄为“信息流广告快速复刻”:主界面左侧是素材输入列,右侧先完成音频解析,再进入信息流复刻分镜工作台。用户粘贴 TK 链接或上传视频后点击“开始”,系统自动下载源视频;下载完成后优先提取原音频文案/字幕,并分析讲话人、语速节奏、背景音乐/环境声/音效。分镜规划按逐句时间轴生成;视觉参考改为原版视频下方的关键帧池:显眼保留“自动抽帧 12 张”,也可在竖版播放器内按当前播放点手动补帧;生成 6 视图时未勾选关键帧则默认使用全部帧,勾选后只用已选帧,并按透明骨架人或普通真人两种主体类型生成“类似但不复刻”的统一相似主体。

+

当前产品方向已收窄为“信息流广告快速复刻”:主界面左侧是素材输入列,右侧先完成音频解析,再进入信息流复刻分镜工作台。用户粘贴 TK 链接或上传视频后点击“开始”,系统自动下载源视频;下载完成后优先提取原音频文案/字幕,并分析讲话人、语速节奏、背景音乐/环境声/音效。分镜规划按逐句时间轴生成;视觉参考改为原版视频下方的关键帧池:显眼保留“自动抽帧 12 张”,也可在竖版播放器内按当前播放点手动补帧;生成相似主体时未勾选关键帧则默认使用全部帧,勾选后只用已选帧,也可选择 5 套内置形象作为创意方向,并按透明骨架人或普通真人两种主体类型生成“类似但不复刻”的高清主体视图包。

1

导入素材

粘贴 TK / 信息流视频链接或上传本地视频;“开始”只把任务放入第一步队列。

2

下载源视频

后端用 yt-dlp 或本地上传文件落 source.mp4,记录时长、尺寸和视频只读地址。

@@ -589,7 +589,7 @@ web/next.config.mjsNext.js 构建配置:静态导出、图片不走优化、禁用开发环境左下角 Next Dev Indicator,并移除 Next 16 已不支持的 eslint 顶层配置,避免本地 dev 出现配置 Issue 提示。 web/app/globals.css全局主题变量、登录页视觉样式、ReactFlow 样式引用,以及本地开发态 nextjs-portal 遮挡隐藏规则。 web/app/page.tsx产品工作台主状态:jobs、activeJobId、生成任务状态;主渲染为全屏素材输入列 + 信息流广告复刻工作表;“开始”编排状态只负责在下载完成后自动触发 triggerTranscribe,不再默认触发抽帧、Vision 扫描或分镜初稿保存;底部吸附音频条不再从主界面渲染。 - web/components/ad-recreation-board.tsx信息流广告复刻工作表:左侧素材输入只负责链接/上传和任务切换,不再重复放横版原视频预览;右侧展示视频下载状态、默认折叠的文案依据,以及源视频工作区。音频解析结果改成默认折叠的辅助信息,展开后同一行看讲话人/节奏/背景音;主工作区左侧是按 9:16 显示的竖版原视频播放器,播放器内覆盖“当前点抽帧”,按当前播放秒数手动补参考帧;右侧上方是音频波形 / 切点参考,下方是逐句时间轴;下一行铺开“关键帧 / 相似主体”。音频波形用参考图式的连续灰色包络显示响度、停顿和密集爆点,顶部同时显示当前播放秒数、总时长和鼠标指针停点秒数。视频播放时通过 requestAnimationFrame 平滑驱动波形播放线,同时同步高亮并滚动当前句;点击音频波形或字幕行会跳转原视频时间。关键帧区的主入口是“自动抽帧 12 张”,一键按动作峰值目标重新抽取 12 张源视频参考帧,优先抓手势、表情变化、节奏点和镜头变化,缩略图按竖版完整比例显示不裁切并用更多列紧凑铺开,鼠标停留会通过固定浮层放大展示完整帧。“生成 6 视图”放在相似主体白底视图区,不和抽参考按钮平齐;如果用户没有勾选帧,默认把全部关键帧作为主体参考,勾选后只传已选帧;生成区可在“透明骨架 / 普通真人”之间切换,并可填写统一主体方向,例如年轻女性、更运动、更高级。关键帧和相似主体白底视图都用更小的竖版缩略图密排;白底视图只展示每个 view 的最新一张,缩略图上提供“重新生成这一张”和“删除这一张”,单张重生会用 replace_views=true 替换同一视角,不追加成第 7 张。前端调用 generateSubjectAssets 时按主体类型传 subject_style=transparent_humansource_actor,均使用 reconstruction_mode=similar;后端会把这些帧视为同一个主体的证据,并锁定同一性别表现、年龄段、体型、材质、风格和视觉身份,避免六视图出现男女性别、老少年龄或样式混杂。音频结果下方是信息流复刻分镜工作台:顶部产品参考区是“同一产品素材池”,不限量上传产品图,不做不同产品身份判断;上传原图推荐长边 1200-2000px、短边至少 600px,但后端会统一生成最长边 1600px、JPEG 92 的 AI 工作副本,并回显尺寸、自动转换和风险标注;上传后按“套在脖子上的 U 形肩颈按摩仪”进行同一产品批量识别,左/右按佩戴者身体左右、上/下按佩戴方向,额外标注内外侧、开口方向、局部结构点、背景类型、用途标签、生成风险和备注,用户只检查备注,鼠标悬停通过固定浮层显示大图预览,能盖过滚动容器和分镜框架;缺视角补图失败时保留重试入口。脚本区在分镜行上方提供“作者想法”和“整片改写”,每行新口播文案可直接编辑并可单段 AI 改写,分镜时间和原内容列压缩为窄摘要列,把横向空间留给新口播、画面规划和视频候选;生成本条视频时使用当前编辑后的新口播文案。每条音频分镜纵向排列,行内从左到右串起原内容、新口播文案、画面规划/产品融入和 6 个候选视频槽;候选视频槽在宽屏下一排显示 6 个竖版预览,避免前面空旷、后面拥挤。单条生成会从全局选中关键帧或 12 张关键帧中取最贴近本句时间点的参考帧。单条生成会从产品素材池按分镜角色、视角优先级、用途标签、置信度和风险自动挑选最多 6 张相关产品图,不会把全部产品图提交给生视频模型,然后把产品坐标系、视角标注、方向、结构点和风险写入 Seedance 提示。ModelTrace 会在音频解析、产品识别/补图、相似主体 6 视图、脚本改写和单条生视频入口旁直接展示模型名;所有生图入口都显示并使用 gpt-image-2,没有其他图片模型 fallback;点击后用固定浮层展示模型链路、输入输出和回退逻辑。旧分镜卡、抽帧控制和视频生成组件仍保留在文件里,但当前主路径不渲染。 + web/components/ad-recreation-board.tsx信息流广告复刻工作表:左侧素材输入只负责链接/上传和任务切换,不再重复放横版原视频预览;右侧展示视频下载状态、默认折叠的文案依据,以及源视频工作区。音频解析结果改成默认折叠的辅助信息,展开后同一行看讲话人/节奏/背景音;主工作区左侧是按 9:16 显示的竖版原视频播放器,播放器内覆盖“当前点抽帧”,按当前播放秒数手动补参考帧;右侧上方是音频波形 / 切点参考,下方是逐句时间轴;下一行铺开“关键帧 / 相似主体”。音频波形用参考图式的连续灰色包络显示响度、停顿和密集爆点,顶部同时显示当前播放秒数、总时长和鼠标指针停点秒数。视频播放时通过 requestAnimationFrame 平滑驱动波形播放线,同时同步高亮并滚动当前句;点击音频波形或字幕行会跳转原视频时间。关键帧区的主入口是“自动抽帧 12 张”,一键按动作峰值目标重新抽取 12 张源视频参考帧,优先抓手势、表情变化、节奏点和镜头变化,缩略图按竖版完整比例显示不裁切并用更多列紧凑铺开,鼠标停留会通过固定浮层放大展示完整帧。“生成 10 张高清图”放在相似主体白底视图区,不和抽参考按钮平齐;如果用户没有勾选帧,默认把全部关键帧作为主体参考,勾选后只传已选帧;生成区可在“透明骨架 / 普通真人”之间切换,可选择桌面导入的 5 套内置形象作为创意方向,并可填写统一主体方向,例如年轻女性、更运动、更高级。关键帧和相似主体白底视图都用更小的竖版缩略图密排;白底视图只展示每个 view 的最新一张,缩略图上提供“重新生成这一张”和“删除这一张”,单张重生会用 replace_views=true 替换同一视角。前端调用 generateSubjectAssets 时按主体类型传 subject_style=transparent_humansource_actor,按需传 character_id,并使用 reconstruction_mode=similar;后端会把关键帧和内置形象视为同一个主体的创意证据,并锁定同一性别表现、年龄段、体型、材质、风格和视觉身份,同时生成全身多视角 + 肩颈正/左右近景 + 后颈肩背特写,避免整套图出现男女性别、老少年龄或样式混杂。音频结果下方是信息流复刻分镜工作台:顶部产品参考区是“同一产品素材池”,不限量上传产品图,不做不同产品身份判断;上传原图推荐长边 1200-2000px、短边至少 600px,但后端会统一生成最长边 1600px、JPEG 92 的 AI 工作副本,并回显尺寸、自动转换和风险标注;上传后按“套在脖子上的 U 形肩颈按摩仪”进行同一产品批量识别,左/右按佩戴者身体左右、上/下按佩戴方向,额外标注内外侧、开口方向、局部结构点、背景类型、用途标签、生成风险和备注,用户只检查备注,鼠标悬停通过固定浮层显示大图预览,能盖过滚动容器和分镜框架;缺视角补图失败时保留重试入口。脚本区在分镜行上方提供“作者想法”和“整片改写”,每行新口播文案可直接编辑并可单段 AI 改写,分镜时间和原内容列压缩为窄摘要列,把横向空间留给新口播、画面规划和视频候选;生成本条视频时使用当前编辑后的新口播文案。每条音频分镜纵向排列,行内从左到右串起原内容、新口播文案、画面规划/产品融入和 6 个候选视频槽;候选视频槽在宽屏下一排显示 6 个竖版预览,避免前面空旷、后面拥挤。单条生成会从全局选中关键帧或 12 张关键帧中取最贴近本句时间点的参考帧。单条生成会从产品素材池按分镜角色、视角优先级、用途标签、置信度和风险自动挑选最多 6 张相关产品图,不会把全部产品图提交给生视频模型,然后把产品坐标系、视角标注、方向、结构点和风险写入 Seedance 提示。ModelTrace 会在音频解析、产品识别/补图、相似主体高清视图包、脚本改写和单条生视频入口旁直接展示模型名;所有生图入口都显示并使用 gpt-image-2,没有其他图片模型 fallback;点击后用固定浮层展示模型链路、输入输出和回退逻辑。旧分镜卡、抽帧控制和视频生成组件仍保留在文件里,但当前主路径不渲染。 web/app/login/page.tsx生产登录页:访问账号/访问密钥表单、保持登录、错误/成功状态;当前只在原版 Digital Oasis 动态背景上叠加一个组合登录框,桌面端左侧是动态角色,右侧是图标化登录表单;面板左上角展示官网 SKG 字标和中文“营销内容工作台”系统标识。 web/app/login/layout.tsx登录路由专属 layout:覆盖全站默认网页标题和描述为空,避免 /login 继承工作台 metadata 后在页面源码里继续出现登录界面文字以外的文案。 web/components/login/oasis-canvas.tsx登录页全屏动态视觉层:用 iframe 直接承载下载包 web/public/oasis-source/index.html 的原 WebGPU / Three.js 草场源码;父级登录页只覆盖自己的文案和表单,并在捕获阶段把全局鼠标坐标同时用原生事件和 postMessage 转发给 iframe,避免登录面板或输入框遮挡时草地失去鼠标响应。 diff --git a/web/components/ad-recreation-board.tsx b/web/components/ad-recreation-board.tsx index 83d4f22..21fe242 100644 --- a/web/components/ad-recreation-board.tsx +++ b/web/components/ad-recreation-board.tsx @@ -13,6 +13,7 @@ import { type FrameObject, type GeneratedVideo, type ImageRef, + type CharacterLibraryItem, type Job, type KeyElement, type KeyFrame, @@ -27,6 +28,7 @@ import { analyzeJob, analyzeProductViews, apiAssetUrl, + characterLibraryImageUrl, cutoutElement, deleteSubjectAsset, effectiveFrameUrl, @@ -35,6 +37,7 @@ import { generatedImageUrl, getRuntimeHealth, hasCutout, + listCharacterLibrary, representativeCutoutUrl, resolveImageRefUrl, rewriteStoryboardScript, @@ -98,7 +101,26 @@ type AudioStoryboardRow = { type ProductRefItem = ProductRefStateItem type SubjectStyleMode = "transparent_human" | "source_actor" -const SUBJECT_VIEW_ORDER = ["front", "three_quarter_left", "left", "back", "right", "three_quarter_right"] +const SUBJECT_ASSET_VIEWS = [ + { value: "front", label: "正面" }, + { value: "three_quarter_left", label: "左前45" }, + { value: "left", label: "左侧" }, + { value: "back", label: "背面" }, + { value: "right", label: "右侧" }, + { value: "three_quarter_right", label: "右前45" }, + { value: "bust_front", label: "肩颈正近" }, + { value: "bust_left_45", label: "肩颈左近" }, + { value: "bust_right_45", label: "肩颈右近" }, + { value: "back_neck_detail", label: "后颈肩背" }, +] as const + +const SUBJECT_VIEW_ORDER = [ + ...SUBJECT_ASSET_VIEWS.map((view) => view.value), + "bust", + "back_detail", +] + +const SUBJECT_ASSET_SIZE = "2048" as const type ModelTraceSpec = { title: string @@ -301,14 +323,22 @@ function findSimilarActorSource(preferredFrames: KeyFrame[], allFrames: KeyFrame return null } -function buildSimilarSubjectPrompt(subjectStyle: SubjectStyleMode, direction: string) { +function buildSimilarSubjectPrompt(subjectStyle: SubjectStyleMode, direction: string, selectedCharacter?: CharacterLibraryItem | null) { const base = [ "Create a new similar but non-identical information-feed ad subject from the selected reference frames.", "Treat all selected frames as evidence for ONE same subject, not multiple different subjects.", - "Lock one consistent character bible before generating: same gender presentation, age range, body proportions, head shape, material, silhouette, commercial style, and visual identity across all six views.", - "If the user direction asks to change gender, age, or style, apply that single change uniformly to every view; never mix male/female, young/old, or multiple style identities inside one six-view set.", + "Lock one consistent character bible before generating: same gender presentation, age range, body proportions, head shape, material, silhouette, commercial style, and visual identity across the full multi-view set.", + "If the user direction asks to change gender, age, or style, apply that single change uniformly to every view; never mix male/female, young/old, or multiple style identities inside one set.", "Keep the pose vocabulary, camera-readability, creator-ad energy, and commercial clarity, but do not copy the exact source identity, face, watermark, captions, platform UI, or pixels.", + "This is for SKG neck-and-shoulder wearable massage device videos: keep neck, collarbone, shoulders, side neck, upper back, shoulder blades, and product placement area clean and visible.", + "Output high-definition assets suitable for downstream video generation.", ] + if (selectedCharacter) { + base.push( + `Built-in creative character selected: ${selectedCharacter.name}.`, + "Use the built-in images as planned creative direction only; generate an innovative variation, not a duplicate of that character pack.", + ) + } if (subjectStyle === "transparent_human") { base.push( "The subject must be a transparent humanoid: transparent or translucent skin/body shell wrapping a clean visible white skeleton inside the same body.", @@ -323,7 +353,7 @@ function buildSimilarSubjectPrompt(subjectStyle: SubjectStyleMode, direction: st } const trimmed = direction.trim() if (trimmed) base.push(`User unified subject direction: ${trimmed}`) - base.push("Output separate pure white background six-view assets; each image is one view of the same unified subject.") + base.push("Output separate pure white background multi-view assets; each image is one view of the same unified subject.") return base.join(" ") } @@ -331,6 +361,13 @@ function subjectAssetUrl(job: Job, asset: SubjectAsset) { return apiAssetUrl(asset.url) || resolveImageRefUrl(job.id, { kind: "asset", frame_idx: 0, element_id: asset.id }) } +function characterPreviewImage(character?: CharacterLibraryItem | null) { + if (!character?.images?.length) return null + return character.images.find((image) => image.id === character.primary_image) + ?? character.images.find((image) => image.view === "front") + ?? character.images[0] +} + function modelValue(value?: string) { return value?.trim() || "待配置" } @@ -383,12 +420,12 @@ function similarSubjectModelTrace(models: RuntimeModels | undefined, subjectStyl title: subjectStyle === "transparent_human" ? "相似透明骨架主体" : "相似普通真人主体", model: subjectImageModelChain(models), chain: [ - "参考帧策略:未勾选时使用全部关键帧,勾选后只使用已选关键帧", + "参考策略:未勾选关键帧时使用全部关键帧,勾选后只使用已选关键帧;也可叠加内置形象作为创意参考", `主体类型:${subjectStyle === "transparent_human" ? "透明/半透明皮肤包裹可见白色骨架" : "普通商业广告真人"}`, - `图像生成:${subjectImageModelChain(models)} 逐张生成正、背、左、右、左前 45、右前 45`, - "身份锁定:六张必须是同一个主体,性别表现、年龄段、体型、材质和风格保持一致", + `图像生成:${subjectImageModelChain(models)} 逐张生成 10 张高清图,包含全身多视角和肩颈/后背特写`, + "身份锁定:整套图必须是同一个主体,性别表现、年龄段、体型、材质和风格保持一致", ], - note: "这是生成类似主体,不是复制、抠出或复刻源视频人物身份。", + note: "这是生成类似但创新的主体,不是复制、抠出或复刻源视频人物身份;内置形象也只作为方向参考。", } } @@ -945,7 +982,7 @@ export function AdRecreationBoard({ workingJob = await generateSubjectAssets(job.id, frame.index, element.id, { subject_kind: guessSubjectKind(name), background: "white", - size: "1024", + size: SUBJECT_ASSET_SIZE, source_frame_indices: framesForSegments.slice(0, 6).map((item) => item.index), }) data.onJobUpdate(workingJob) @@ -967,13 +1004,13 @@ export function AdRecreationBoard({ const updated = await generateSubjectAssets(job.id, frame.index, element.id, { subject_kind: guessSubjectKind(element.name_zh || element.name_en || "主体"), background: "white", - size: "1024", + size: SUBJECT_ASSET_SIZE, source_frame_indices: framesForSegments.slice(0, 6).map((item) => item.index), }) data.onJobUpdate(updated) - toast.success(`6 视图已生成:${element.name_zh || element.name_en}`) + toast.success(`高清视图已生成:${element.name_zh || element.name_en}`) } catch (e) { - toast.error("6 视图生成失败:" + (e instanceof Error ? e.message : String(e))) + toast.error("高清视图生成失败:" + (e instanceof Error ? e.message : String(e))) } finally { setSixViewBusyKey(null) } @@ -1493,6 +1530,8 @@ function SourceReferenceBuildPanel({ const [subjectAssetPreview, setSubjectAssetPreview] = useState<{ id: string; left: number; top: number } | null>(null) const [subjectStyle, setSubjectStyle] = useState("transparent_human") const [subjectDirection, setSubjectDirection] = useState("") + const [characterLibrary, setCharacterLibrary] = useState([]) + const [selectedCharacterId, setSelectedCharacterId] = useState("") const frames = useMemo(() => [...job.frames].sort((a, b) => a.timestamp - b.timestamp), [job.frames]) const selectedReferenceFrames = useMemo( () => frames.filter((frame) => selectedFrames.has(frame.index)), @@ -1506,6 +1545,10 @@ function SourceReferenceBuildPanel({ return findSimilarActorSource(subjectReferenceFrames, frames) }, [frames, subjectReferenceFrames]) const actorAssets = actorSource?.element.subject_assets ?? [] + const selectedCharacter = useMemo( + () => characterLibrary.find((character) => character.id === selectedCharacterId) ?? null, + [characterLibrary, selectedCharacterId], + ) const visibleActorAssets = useMemo(() => { const latestByView = new Map() for (const asset of actorAssets) { @@ -1528,6 +1571,18 @@ function SourceReferenceBuildPanel({ ? `默认使用全部 ${frames.length} 张` : "待抽帧" + useEffect(() => { + let cancelled = false + listCharacterLibrary() + .then((items) => { + if (!cancelled) setCharacterLibrary(items) + }) + .catch((e) => { + if (!cancelled) toast.error("内置形象读取失败:" + (e instanceof Error ? e.message : String(e))) + }) + return () => { cancelled = true } + }, []) + const extractKeyframes = async () => { setExtracting(true) try { @@ -1558,8 +1613,12 @@ function SourceReferenceBuildPanel({ let element = workingFrame.elements?.find(isSimilarActorElement) if (!element) { workingJob = await addElement(job.id, baseFrame.index, { - name_zh: subjectStyle === "transparent_human" ? "相似透明骨架主体" : "相似广告主角", - name_en: subjectStyle === "transparent_human" ? "similar transparent skeleton humanoid subject" : "similar ad actor", + name_zh: selectedCharacter + ? `相似透明骨架主体 · ${selectedCharacter.name}` + : subjectStyle === "transparent_human" ? "相似透明骨架主体" : "相似广告主角", + name_en: selectedCharacter + ? `similar innovative transparent skeleton humanoid subject based on ${selectedCharacter.name}` + : subjectStyle === "transparent_human" ? "similar transparent skeleton humanoid subject" : "similar ad actor", position: "source-video main subject selected from global keyframes", source: "manual", }) @@ -1575,14 +1634,15 @@ function SourceReferenceBuildPanel({ subject_style: subjectStyle, reconstruction_mode: "similar", background: "white", - size: "1024", + size: SUBJECT_ASSET_SIZE, source_frame_indices: subjectReferenceFrames.slice(0, 12).map((frame) => frame.index), - views: ["front", "back", "left", "right", "three_quarter_left", "three_quarter_right"], - prompt: buildSimilarSubjectPrompt(subjectStyle, subjectDirection), + views: SUBJECT_ASSET_VIEWS.map((view) => view.value), + character_id: selectedCharacterId, + prompt: buildSimilarSubjectPrompt(subjectStyle, subjectDirection, selectedCharacter), replace_views: true, }) onJobUpdate(updated) - toast.success("相似主体 6 张白底视图已生成") + toast.success("相似主体 10 张高清白底图已生成") } catch (e) { toast.error("相似主体重构失败:" + (e instanceof Error ? e.message : String(e))) } finally { @@ -1612,10 +1672,11 @@ function SourceReferenceBuildPanel({ subject_style: subjectStyle, reconstruction_mode: "similar", background: asset.background || "white", - size: asset.size || "1024", + size: SUBJECT_ASSET_SIZE, source_frame_indices: sourceIndices, views: [asset.view], - prompt: buildSimilarSubjectPrompt(subjectStyle, subjectDirection), + character_id: selectedCharacterId, + prompt: buildSimilarSubjectPrompt(subjectStyle, subjectDirection, selectedCharacter), replace_views: true, }) onJobUpdate(updated) @@ -1800,6 +1861,63 @@ function SourceReferenceBuildPanel({ 相似主体白底视图
+ 内置形象只做创意参考,不照抄 + + +
+
+ 内置形象选择 + {selectedCharacter ? `${selectedCharacter.name} · ${selectedCharacter.images.length} 张参考` : "源视频主角相似创新"} +
+
+ + {characterLibrary.map((character) => { + const preview = characterPreviewImage(character) + const active = selectedCharacterId === character.id + return ( + + ) + })} +
+ {selectedCharacter?.images?.length ? ( +
+ {selectedCharacter.images.slice(0, 7).map((image) => ( +
+ {image.label} +
+ ))} +
+ ) : null} +
+ +
{[ @@ -1816,7 +1934,7 @@ function SourceReferenceBuildPanel({ > {item.label} - ))} + ))}
- {visibleActorAssets.length}/6 + {visibleActorAssets.length}/{SUBJECT_ASSET_VIEWS.length}
@@ -1893,7 +2011,7 @@ function SourceReferenceBuildPanel({ ) : (
- 可直接用全部关键帧生成;勾选关键帧后会只用已选帧。六视图会围绕同一个统一主体生成。 + 可直接用全部关键帧生成;勾选关键帧后会只用已选帧。选择内置形象后,会围绕同一个统一主体生成全身多视角和肩颈/背部特写。
)} @@ -3006,7 +3124,7 @@ function StoryboardSegmentCard({ onClick={() => onGenerateElement(candidate)} disabled={busy} className="rounded-md border border-white/10 bg-white/[0.04] px-1.5 py-1 text-[10px] text-white/55 transition hover:border-cyan-300/35 hover:text-cyan-100 disabled:cursor-not-allowed disabled:opacity-40" - title="选择该元素并生成提取图 + 6 视图" + title="选择该元素并生成提取图 + 高清视图" > {candidate.name} @@ -3027,7 +3145,7 @@ function StoryboardSegmentCard({ className="inline-flex h-7 shrink-0 items-center gap-1 rounded-md border border-white/10 px-2 text-[10px] text-white/55 transition hover:border-cyan-300/35 hover:text-cyan-100 disabled:cursor-not-allowed disabled:opacity-40" > {busySix ? : } - {element.subject_assets?.length ? `${element.subject_assets.length}视图` : "6视图"} + {element.subject_assets?.length ? `${element.subject_assets.length}视图` : "高清视图"} ) diff --git a/web/lib/api.ts b/web/lib/api.ts index a506ac8..b02ae30 100644 --- a/web/lib/api.ts +++ b/web/lib/api.ts @@ -1075,6 +1075,7 @@ export async function generateSubjectAssets( size?: AssetSize source_frame_indices?: number[] views?: string[] + character_id?: string subject_style?: "transparent_human" | "source_actor" reconstruction_mode?: "same" | "similar" prompt?: string @@ -1091,6 +1092,7 @@ export async function generateSubjectAssets( size: body.size ?? "source", source_frame_indices: body.source_frame_indices ?? null, views: body.views ?? null, + character_id: body.character_id ?? "", subject_style: body.subject_style ?? "transparent_human", reconstruction_mode: body.reconstruction_mode ?? "same", prompt: body.prompt ?? "",