auto-save 2026-05-18 07:27 (~6)

This commit is contained in:
2026-05-18 07:27:45 +08:00
parent 4653108baf
commit 9790e5bedb
6 changed files with 213 additions and 60 deletions

View File

@@ -1,18 +1,5 @@
{
"entries": [
{
"files_changed": 6,
"hash": "6c9806c",
"message": "auto-save 2026-05-15 17:11 (~6)",
"ts": "2026-05-15T17:11:52+08:00",
"type": "commit"
},
{
"files_changed": 2,
"message": "Codex 会话活跃 · 最近命令codex · 2 项未提交变更 · 最近提交auto-save 2026-05-15 17:11 (~6)",
"ts": "2026-05-15T09:14:48Z",
"type": "session-heartbeat"
},
{
"files_changed": 2,
"hash": "f590d51",
@@ -3254,6 +3241,19 @@
"type": "session-heartbeat",
"message": "Codex 会话活跃 · 最近命令codex · 分支 main · 1 项未提交变更 · 最近提交fix: send product angle refs as image inputs",
"files_changed": 1
},
{
"ts": "2026-05-18T07:22:23+08:00",
"type": "commit",
"message": "auto-save 2026-05-18 07:22 (~2)",
"hash": "4653108",
"files_changed": 2
},
{
"ts": "2026-05-17T23:23:45Z",
"type": "session-heartbeat",
"message": "Codex 会话活跃 · 最近命令codex · 分支 main · 4 项未提交变更 · 最近提交auto-save 2026-05-18 07:22 (~2)",
"files_changed": 4
}
]
}

View File

@@ -11,7 +11,7 @@
- 详见 `CLAUDE.md` 立项决策段 + `.memory/plan.md` 七步管线拆解
- 风格:`04-Dark-Gallery-Ambient`(路径:`~/Projects/research/20260305-网页风格库/04-Dark-Gallery-Ambient.md`
- 第一冲刺:步骤 1-4下载 / 拆轨 / 关键帧 / ASR+翻译)
- 当前产品方向2026-05-17 再确认):先解决信息流广告快速复刻的第一步,不再沿用“开始后自动抽帧、分镜、元素生成、合成”的默认做法。主界面为“左侧素材输入列 + 右侧音频解析工作表”。用户粘贴 TK 链接或上传视频后点击“开始”,系统自动下载源视频;下载完成后优先提取原音频文案/字幕,并分析讲话人、语速节奏、背景音乐/环境声/音效。抽帧、分镜规划、产品融入、元素 6 视图和视频合成暂作为后续能力保留,不在当前第一步自动触发。
- 当前产品方向2026-05-17 再确认):先解决信息流广告快速复刻的第一步,不再沿用“开始后自动抽帧、分镜、元素生成、合成”的默认做法。主界面为“左侧素材输入列 + 右侧音频解析工作表”。用户粘贴 TK 链接或上传视频后点击“开始”,系统自动下载源视频;下载完成后优先提取原音频文案/字幕,并分析讲话人、语速节奏、背景音乐/环境声/音效。抽帧、分镜规划、产品融入、相似主体高清视图包(最多 10 张,含肩颈/后背特写)和视频合成暂作为后续能力保留,不在当前第一步自动触发。
## 部署事实
- 平台VPS `76.13.31.179`Ubuntu 24.04 / Docker Compose / Coolify Traefik

View File

@@ -4056,7 +4056,7 @@ def cutout_element(job_id: str, idx: int, element_id: str) -> Job:
@app.post("/jobs/{job_id}/frames/{idx}/elements/{element_id}/subject-assets", response_model=Job)
def generate_subject_assets(job_id: str, idx: int, element_id: str, req: GenerateSubjectAssetsReq) -> Job:
"""为一个主体生成多视角资产包。
如果传入 source_frame_indices,则把多张已选关键帧拼成参考板,表示这些帧都在服务同一个主体"""
如果传入 source_frame_indices 或内置 character_id则把多张参考图作为独立 image[] 证据提交"""
import time as _time
job = JOBS.get(job_id)
if not job:
@@ -4071,13 +4071,30 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
source_indices = [idx] + source_indices
source_indices = list(dict.fromkeys(source_indices))[:12]
character_reference_paths: list[Path] = []
character_reference_clause = ""
character_label = ""
character_id = (req.character_id or "").strip()
if character_id:
character = find_character_library_item(character_id)
character_label = character.name
for image in character.images[:7]:
character_reference_paths.append(character_library_file(image.filename))
character_reference_clause = (
f"Selected built-in creative character reference: {character.name}. "
"Use these planned character images as a high-quality creative direction and anatomy/style bible only; "
"do not copy the exact face, exact pose, exact silhouette, pixels, or make a duplicate. "
"Create a new innovative variation that keeps the same broad role, transparent wellness character language, "
"camera readability, and shoulder/neck product compatibility. "
)
model_src, tmp_focus = _focus_source_for_element(job_id, idx, el)
sheet_tmp: Path | None = None
if len(source_indices) > 1:
sheet_tmp = job_dir(job_id) / "tmp" / f"subject_refs_{idx:03d}_{element_id}_{uuid.uuid4().hex[:6]}.jpg"
sheet = _make_reference_contact_sheet(job_id, source_indices, sheet_tmp, max_items=12)
if sheet:
model_src = sheet
frame_reference_paths = [p for p in (_source_frame_path(job_id, i) for i in source_indices) if p.exists()]
if character_reference_paths:
remaining = max(0, 10 - len(character_reference_paths))
model_src = character_reference_paths + frame_reference_paths[:remaining]
elif len(frame_reference_paths) > 1:
model_src = frame_reference_paths[:10]
try:
with Image.open(_source_frame_path(job_id, idx)) as src_im:
@@ -4118,18 +4135,27 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
prompt_extra = req.prompt.strip()
prompt_extra_clause = f"User direction: {prompt_extra[:1200]} " if prompt_extra else ""
identity_lock_clause = (
"Identity lock: these API calls generate a six-view pack for ONE single subject, but each individual output file must show only its one requested view. "
"Identity lock: these API calls generate one high-definition multi-view pack for ONE single subject, but each individual output file must show only its one requested view. "
"Before rendering, infer one consistent character bible from the reference image(s): gender presentation, age range, body proportions, head shape, face direction cues, material, silhouette, wardrobe/material style, and commercial mood. "
"Keep that same character bible unchanged across every generated view in separate files. "
"If user direction requests a gender, age, or style change, apply that one change uniformly to all views; never mix male/female, young/old, or multiple style identities inside the same six-view pack. "
"If user direction requests a gender, age, or style change, apply that one change uniformly to all views; never mix male/female, young/old, or multiple style identities inside the same pack. "
"For transparent humanoids, keep the same transparent skin shell, skeleton proportions, visible spine/rib cage/pelvis/limb bones, and non-horror wellness character style in every view. "
)
neck_product_clause = (
"This subject pack is for SKG neck-and-shoulder wearable massage device videos. "
"Make the neck, collarbone, shoulder line, upper back, side neck, and shoulder slope clear and product-ready. "
"Avoid bulky collars, scarves, hair, hoods, props, or poses that hide the neck/shoulder placement area. "
"For back and close-up views, prioritize the cervical spine, shoulder blades, upper trapezius, and clean wearable-device contact area. "
)
models = [GPT_IMAGE_MODEL]
generated: list[SubjectAsset] = []
try:
for view, view_label in _subject_view_labels(req.subject_kind, req.views):
closeup_view = view in {"bust", "back_detail", "bust_front", "bust_left_45", "bust_right_45", "back_neck_detail"} or "detail" in view
if req.subject_kind == "living":
if view.startswith("expression_"):
if closeup_view:
view_prompt = f"upper-body shoulder-and-neck close-up character reference, {view_label}"
elif view.startswith("expression_"):
emotion = view_label.replace("表情", "")
view_prompt = f"full-body upright standing character reference with a clear {emotion} facial expression"
elif view.startswith("action_") or view == "side_walk":
@@ -4142,8 +4168,14 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
single_view_clause = (
f"Single-image output rule: this output file is ONLY for the {view_label} view ({view_name}). "
"Render exactly one subject, one time, in one pose and one camera angle. "
"Do not create a six-view sheet, contact sheet, grid, storyboard, lineup, comparison layout, before/after layout, mirrored pair, duplicate subjects, thumbnails, labels, captions, arrows, view names, panel borders, or multiple versions in the same image. "
"Do not include any of the other five views in this image. "
"Do not create a multi-view sheet, contact sheet, grid, storyboard, lineup, comparison layout, before/after layout, mirrored pair, duplicate subjects, thumbnails, labels, captions, arrows, view names, panel borders, or multiple versions in the same image. "
"Do not include any other views in this image. "
)
framing_clause = (
"For this close-up view, intentionally crop as an upper-body asset from head/neck to chest or upper back; the neck, shoulders, collarbone or upper spine area must be large, clear, and useful for placing a neck-and-shoulder massage device. "
"Do not force full-body framing for close-ups. "
if closeup_view and req.subject_kind == "living"
else "The subject must be complete, centered, full body or full object, head-to-feet visible when applicable, not cropped by the canvas. Make the subject large and readable: it should occupy about 85-95% of the image height with only small margins. "
)
prompt = (
f"Use the reference image(s) only as visual evidence; do not crop, cut out, paste, trace, or extract pixels from the source. "
@@ -4152,15 +4184,16 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
+ single_view_clause
+ identity_clause
+ identity_lock_clause
+ character_reference_clause
+ neck_product_clause
+ canvas_clause
+ prompt_extra_clause
+ actor_style_clause
+ "The subject must be complete, centered, full body or full object, head-to-feet visible when applicable, not cropped by the canvas. "
"Make the subject large and readable: it should occupy about 85-95% of the image height with only small margins. "
+ framing_clause
f"Create a high-definition standalone asset on a solid {bg_phrase} background. "
"No extra objects, no props, no additional products, no background elements, no original scene fragments, no shadows from the original scene, no text, no watermark, no UI. "
"If the source is incomplete, partially visible, occluded, or low resolution, reconstruct the missing parts by redrawing a clean complete subject while staying consistent with the reference. "
"For living subjects, keep a normal upright standing pose for the standard views; do not create sitting, walking, medical, horror, or distorted anatomy unless explicitly requested by the view label. "
"For living standard full-body views, keep a normal upright standing pose; do not create sitting, walking, medical, horror, or distorted anatomy unless explicitly requested by the view label. "
+ transparent_character_clause
)
try:
@@ -4174,7 +4207,7 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
generated.append(SubjectAsset(
id=asset_id,
view=view,
label=f"{el.name_zh} · {view_label}",
label=f"{el.name_zh} · {view_label}" + (f" · {character_label}" if character_label else ""),
url=_asset_url(job_id, asset_id),
width=width,
height=height,
@@ -4185,7 +4218,7 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
created_at=_time.time(),
))
finally:
for p in (tmp_focus, sheet_tmp):
for p in (tmp_focus,):
if p and p.exists():
try: p.unlink()
except OSError: pass

View File

@@ -569,7 +569,7 @@
<section id="pipeline" data-search>
<h2>业务管线</h2>
<p>当前产品方向已收窄为“信息流广告快速复刻”:主界面左侧是素材输入列,右侧先完成音频解析,再进入信息流复刻分镜工作台。用户粘贴 TK 链接或上传视频后点击“开始”,系统自动下载源视频;下载完成后优先提取原音频文案/字幕,并分析讲话人、语速节奏、背景音乐/环境声/音效。分镜规划按逐句时间轴生成;视觉参考改为原版视频下方的关键帧池:显眼保留“自动抽帧 12 张”,也可在竖版播放器内按当前播放点手动补帧;生成 6 视图时未勾选关键帧则默认使用全部帧,勾选后只用已选帧,并按透明骨架人或普通真人两种主体类型生成“类似但不复刻”的统一相似主体</p>
<p>当前产品方向已收窄为“信息流广告快速复刻”:主界面左侧是素材输入列,右侧先完成音频解析,再进入信息流复刻分镜工作台。用户粘贴 TK 链接或上传视频后点击“开始”,系统自动下载源视频;下载完成后优先提取原音频文案/字幕,并分析讲话人、语速节奏、背景音乐/环境声/音效。分镜规划按逐句时间轴生成;视觉参考改为原版视频下方的关键帧池:显眼保留“自动抽帧 12 张”,也可在竖版播放器内按当前播放点手动补帧;生成相似主体时未勾选关键帧则默认使用全部帧,勾选后只用已选帧,也可选择 5 套内置形象作为创意方向,并按透明骨架人或普通真人两种主体类型生成“类似但不复刻”的高清主体视图包</p>
<div class="pipeline">
<div class="step"><div class="num">1</div><h3>导入素材</h3><p>粘贴 TK / 信息流视频链接或上传本地视频;“开始”只把任务放入第一步队列。</p></div>
<div class="step"><div class="num">2</div><h3>下载源视频</h3><p>后端用 yt-dlp 或本地上传文件落 <code>source.mp4</code>,记录时长、尺寸和视频只读地址。</p></div>
@@ -589,7 +589,7 @@
<tr><td><code>web/next.config.mjs</code></td><td>Next.js 构建配置:静态导出、图片不走优化、禁用开发环境左下角 Next Dev Indicator并移除 Next 16 已不支持的 <code>eslint</code> 顶层配置,避免本地 dev 出现配置 Issue 提示。</td></tr>
<tr><td><code>web/app/globals.css</code></td><td>全局主题变量、登录页视觉样式、ReactFlow 样式引用,以及本地开发态 <code>nextjs-portal</code> 遮挡隐藏规则。</td></tr>
<tr><td><code>web/app/page.tsx</code></td><td>产品工作台主状态jobs、activeJobId、生成任务状态主渲染为全屏素材输入列 + 信息流广告复刻工作表;“开始”编排状态只负责在下载完成后自动触发 <code>triggerTranscribe</code>不再默认触发抽帧、Vision 扫描或分镜初稿保存;底部吸附音频条不再从主界面渲染。</td></tr>
<tr><td><code>web/components/ad-recreation-board.tsx</code></td><td>信息流广告复刻工作表:左侧素材输入只负责链接/上传和任务切换,不再重复放横版原视频预览;右侧展示视频下载状态、默认折叠的文案依据,以及源视频工作区。音频解析结果改成默认折叠的辅助信息,展开后同一行看讲话人/节奏/背景音;主工作区左侧是按 9:16 显示的竖版原视频播放器,播放器内覆盖“当前点抽帧”,按当前播放秒数手动补参考帧;右侧上方是音频波形 / 切点参考,下方是逐句时间轴;下一行铺开“关键帧 / 相似主体”。音频波形用参考图式的连续灰色包络显示响度、停顿和密集爆点,顶部同时显示当前播放秒数、总时长和鼠标指针停点秒数。视频播放时通过 <code>requestAnimationFrame</code> 平滑驱动波形播放线,同时同步高亮并滚动当前句;点击音频波形或字幕行会跳转原视频时间。关键帧区的主入口是“自动抽帧 12 张”,一键按动作峰值目标重新抽取 12 张源视频参考帧,优先抓手势、表情变化、节奏点和镜头变化,缩略图按竖版完整比例显示不裁切并用更多列紧凑铺开,鼠标停留会通过固定浮层放大展示完整帧。“生成 6 视图”放在相似主体白底视图区,不和抽参考按钮平齐;如果用户没有勾选帧,默认把全部关键帧作为主体参考,勾选后只传已选帧;生成区可在“透明骨架 / 普通真人”之间切换,并可填写统一主体方向,例如年轻女性、更运动、更高级。关键帧和相似主体白底视图都用更小的竖版缩略图密排;白底视图只展示每个 view 的最新一张,缩略图上提供“重新生成这一张”和“删除这一张”,单张重生会用 <code>replace_views=true</code> 替换同一视角,不追加成第 7 张。前端调用 <code>generateSubjectAssets</code> 时按主体类型传 <code>subject_style=transparent_human</code><code>source_actor</code>使用 <code>reconstruction_mode=similar</code>;后端会把这些帧视为同一个主体的证据,并锁定同一性别表现、年龄段、体型、材质、风格和视觉身份,避免六视图出现男女性别、老少年龄或样式混杂。音频结果下方是信息流复刻分镜工作台:顶部产品参考区是“同一产品素材池”,不限量上传产品图,不做不同产品身份判断;上传原图推荐长边 1200-2000px、短边至少 600px但后端会统一生成最长边 1600px、JPEG 92 的 AI 工作副本,并回显尺寸、自动转换和风险标注;上传后按“套在脖子上的 U 形肩颈按摩仪”进行同一产品批量识别,左/右按佩戴者身体左右、上/下按佩戴方向,额外标注内外侧、开口方向、局部结构点、背景类型、用途标签、生成风险和备注,用户只检查备注,鼠标悬停通过固定浮层显示大图预览,能盖过滚动容器和分镜框架;缺视角补图失败时保留重试入口。脚本区在分镜行上方提供“作者想法”和“整片改写”,每行新口播文案可直接编辑并可单段 AI 改写,分镜时间和原内容列压缩为窄摘要列,把横向空间留给新口播、画面规划和视频候选;生成本条视频时使用当前编辑后的新口播文案。每条音频分镜纵向排列,行内从左到右串起原内容、新口播文案、画面规划/产品融入和 6 个候选视频槽;候选视频槽在宽屏下一排显示 6 个竖版预览,避免前面空旷、后面拥挤。单条生成会从全局选中关键帧或 12 张关键帧中取最贴近本句时间点的参考帧。单条生成会从产品素材池按分镜角色、视角优先级、用途标签、置信度和风险自动挑选最多 6 张相关产品图,不会把全部产品图提交给生视频模型,然后把产品坐标系、视角标注、方向、结构点和风险写入 Seedance 提示。<code>ModelTrace</code> 会在音频解析、产品识别/补图、相似主体 6 视图、脚本改写和单条生视频入口旁直接展示模型名;所有生图入口都显示并使用 <code>gpt-image-2</code>,没有其他图片模型 fallback点击后用固定浮层展示模型链路、输入输出和回退逻辑。旧分镜卡、抽帧控制和视频生成组件仍保留在文件里但当前主路径不渲染。</td></tr>
<tr><td><code>web/components/ad-recreation-board.tsx</code></td><td>信息流广告复刻工作表:左侧素材输入只负责链接/上传和任务切换,不再重复放横版原视频预览;右侧展示视频下载状态、默认折叠的文案依据,以及源视频工作区。音频解析结果改成默认折叠的辅助信息,展开后同一行看讲话人/节奏/背景音;主工作区左侧是按 9:16 显示的竖版原视频播放器,播放器内覆盖“当前点抽帧”,按当前播放秒数手动补参考帧;右侧上方是音频波形 / 切点参考,下方是逐句时间轴;下一行铺开“关键帧 / 相似主体”。音频波形用参考图式的连续灰色包络显示响度、停顿和密集爆点,顶部同时显示当前播放秒数、总时长和鼠标指针停点秒数。视频播放时通过 <code>requestAnimationFrame</code> 平滑驱动波形播放线,同时同步高亮并滚动当前句;点击音频波形或字幕行会跳转原视频时间。关键帧区的主入口是“自动抽帧 12 张”,一键按动作峰值目标重新抽取 12 张源视频参考帧,优先抓手势、表情变化、节奏点和镜头变化,缩略图按竖版完整比例显示不裁切并用更多列紧凑铺开,鼠标停留会通过固定浮层放大展示完整帧。“生成 10 张高清图”放在相似主体白底视图区,不和抽参考按钮平齐;如果用户没有勾选帧,默认把全部关键帧作为主体参考,勾选后只传已选帧;生成区可在“透明骨架 / 普通真人”之间切换,可选择桌面导入的 5 套内置形象作为创意方向,并可填写统一主体方向,例如年轻女性、更运动、更高级。关键帧和相似主体白底视图都用更小的竖版缩略图密排;白底视图只展示每个 view 的最新一张,缩略图上提供“重新生成这一张”和“删除这一张”,单张重生会用 <code>replace_views=true</code> 替换同一视角。前端调用 <code>generateSubjectAssets</code> 时按主体类型传 <code>subject_style=transparent_human</code><code>source_actor</code>按需传 <code>character_id</code>,并使用 <code>reconstruction_mode=similar</code>;后端会把关键帧和内置形象视为同一个主体的创意证据,并锁定同一性别表现、年龄段、体型、材质、风格和视觉身份,同时生成全身多视角 + 肩颈正/左右近景 + 后颈肩背特写,避免整套图出现男女性别、老少年龄或样式混杂。音频结果下方是信息流复刻分镜工作台:顶部产品参考区是“同一产品素材池”,不限量上传产品图,不做不同产品身份判断;上传原图推荐长边 1200-2000px、短边至少 600px但后端会统一生成最长边 1600px、JPEG 92 的 AI 工作副本,并回显尺寸、自动转换和风险标注;上传后按“套在脖子上的 U 形肩颈按摩仪”进行同一产品批量识别,左/右按佩戴者身体左右、上/下按佩戴方向,额外标注内外侧、开口方向、局部结构点、背景类型、用途标签、生成风险和备注,用户只检查备注,鼠标悬停通过固定浮层显示大图预览,能盖过滚动容器和分镜框架;缺视角补图失败时保留重试入口。脚本区在分镜行上方提供“作者想法”和“整片改写”,每行新口播文案可直接编辑并可单段 AI 改写,分镜时间和原内容列压缩为窄摘要列,把横向空间留给新口播、画面规划和视频候选;生成本条视频时使用当前编辑后的新口播文案。每条音频分镜纵向排列,行内从左到右串起原内容、新口播文案、画面规划/产品融入和 6 个候选视频槽;候选视频槽在宽屏下一排显示 6 个竖版预览,避免前面空旷、后面拥挤。单条生成会从全局选中关键帧或 12 张关键帧中取最贴近本句时间点的参考帧。单条生成会从产品素材池按分镜角色、视角优先级、用途标签、置信度和风险自动挑选最多 6 张相关产品图,不会把全部产品图提交给生视频模型,然后把产品坐标系、视角标注、方向、结构点和风险写入 Seedance 提示。<code>ModelTrace</code> 会在音频解析、产品识别/补图、相似主体高清视图、脚本改写和单条生视频入口旁直接展示模型名;所有生图入口都显示并使用 <code>gpt-image-2</code>,没有其他图片模型 fallback点击后用固定浮层展示模型链路、输入输出和回退逻辑。旧分镜卡、抽帧控制和视频生成组件仍保留在文件里但当前主路径不渲染。</td></tr>
<tr><td><code>web/app/login/page.tsx</code></td><td>生产登录页:访问账号/访问密钥表单、保持登录、错误/成功状态;当前只在原版 Digital Oasis 动态背景上叠加一个组合登录框,桌面端左侧是动态角色,右侧是图标化登录表单;面板左上角展示官网 SKG 字标和中文“营销内容工作台”系统标识。</td></tr>
<tr><td><code>web/app/login/layout.tsx</code></td><td>登录路由专属 layout覆盖全站默认网页标题和描述为空避免 <code>/login</code> 继承工作台 metadata 后在页面源码里继续出现登录界面文字以外的文案。</td></tr>
<tr><td><code>web/components/login/oasis-canvas.tsx</code></td><td>登录页全屏动态视觉层:用 iframe 直接承载下载包 <code>web/public/oasis-source/index.html</code> 的原 WebGPU / Three.js 草场源码;父级登录页只覆盖自己的文案和表单,并在捕获阶段把全局鼠标坐标同时用原生事件和 <code>postMessage</code> 转发给 iframe避免登录面板或输入框遮挡时草地失去鼠标响应。</td></tr>

View File

@@ -13,6 +13,7 @@ import {
type FrameObject,
type GeneratedVideo,
type ImageRef,
type CharacterLibraryItem,
type Job,
type KeyElement,
type KeyFrame,
@@ -27,6 +28,7 @@ import {
analyzeJob,
analyzeProductViews,
apiAssetUrl,
characterLibraryImageUrl,
cutoutElement,
deleteSubjectAsset,
effectiveFrameUrl,
@@ -35,6 +37,7 @@ import {
generatedImageUrl,
getRuntimeHealth,
hasCutout,
listCharacterLibrary,
representativeCutoutUrl,
resolveImageRefUrl,
rewriteStoryboardScript,
@@ -98,7 +101,26 @@ type AudioStoryboardRow = {
type ProductRefItem = ProductRefStateItem
type SubjectStyleMode = "transparent_human" | "source_actor"
const SUBJECT_VIEW_ORDER = ["front", "three_quarter_left", "left", "back", "right", "three_quarter_right"]
const SUBJECT_ASSET_VIEWS = [
{ value: "front", label: "正面" },
{ value: "three_quarter_left", label: "左前45" },
{ value: "left", label: "左侧" },
{ value: "back", label: "背面" },
{ value: "right", label: "右侧" },
{ value: "three_quarter_right", label: "右前45" },
{ value: "bust_front", label: "肩颈正近" },
{ value: "bust_left_45", label: "肩颈左近" },
{ value: "bust_right_45", label: "肩颈右近" },
{ value: "back_neck_detail", label: "后颈肩背" },
] as const
const SUBJECT_VIEW_ORDER = [
...SUBJECT_ASSET_VIEWS.map((view) => view.value),
"bust",
"back_detail",
]
const SUBJECT_ASSET_SIZE = "2048" as const
type ModelTraceSpec = {
title: string
@@ -301,14 +323,22 @@ function findSimilarActorSource(preferredFrames: KeyFrame[], allFrames: KeyFrame
return null
}
function buildSimilarSubjectPrompt(subjectStyle: SubjectStyleMode, direction: string) {
function buildSimilarSubjectPrompt(subjectStyle: SubjectStyleMode, direction: string, selectedCharacter?: CharacterLibraryItem | null) {
const base = [
"Create a new similar but non-identical information-feed ad subject from the selected reference frames.",
"Treat all selected frames as evidence for ONE same subject, not multiple different subjects.",
"Lock one consistent character bible before generating: same gender presentation, age range, body proportions, head shape, material, silhouette, commercial style, and visual identity across all six views.",
"If the user direction asks to change gender, age, or style, apply that single change uniformly to every view; never mix male/female, young/old, or multiple style identities inside one six-view set.",
"Lock one consistent character bible before generating: same gender presentation, age range, body proportions, head shape, material, silhouette, commercial style, and visual identity across the full multi-view set.",
"If the user direction asks to change gender, age, or style, apply that single change uniformly to every view; never mix male/female, young/old, or multiple style identities inside one set.",
"Keep the pose vocabulary, camera-readability, creator-ad energy, and commercial clarity, but do not copy the exact source identity, face, watermark, captions, platform UI, or pixels.",
"This is for SKG neck-and-shoulder wearable massage device videos: keep neck, collarbone, shoulders, side neck, upper back, shoulder blades, and product placement area clean and visible.",
"Output high-definition assets suitable for downstream video generation.",
]
if (selectedCharacter) {
base.push(
`Built-in creative character selected: ${selectedCharacter.name}.`,
"Use the built-in images as planned creative direction only; generate an innovative variation, not a duplicate of that character pack.",
)
}
if (subjectStyle === "transparent_human") {
base.push(
"The subject must be a transparent humanoid: transparent or translucent skin/body shell wrapping a clean visible white skeleton inside the same body.",
@@ -323,7 +353,7 @@ function buildSimilarSubjectPrompt(subjectStyle: SubjectStyleMode, direction: st
}
const trimmed = direction.trim()
if (trimmed) base.push(`User unified subject direction: ${trimmed}`)
base.push("Output separate pure white background six-view assets; each image is one view of the same unified subject.")
base.push("Output separate pure white background multi-view assets; each image is one view of the same unified subject.")
return base.join(" ")
}
@@ -331,6 +361,13 @@ function subjectAssetUrl(job: Job, asset: SubjectAsset) {
return apiAssetUrl(asset.url) || resolveImageRefUrl(job.id, { kind: "asset", frame_idx: 0, element_id: asset.id })
}
function characterPreviewImage(character?: CharacterLibraryItem | null) {
if (!character?.images?.length) return null
return character.images.find((image) => image.id === character.primary_image)
?? character.images.find((image) => image.view === "front")
?? character.images[0]
}
function modelValue(value?: string) {
return value?.trim() || "待配置"
}
@@ -383,12 +420,12 @@ function similarSubjectModelTrace(models: RuntimeModels | undefined, subjectStyl
title: subjectStyle === "transparent_human" ? "相似透明骨架主体" : "相似普通真人主体",
model: subjectImageModelChain(models),
chain: [
"参考策略:未勾选时使用全部关键帧,勾选后只使用已选关键帧",
"参考策略:未勾选关键帧时使用全部关键帧,勾选后只使用已选关键帧;也可叠加内置形象作为创意参考",
`主体类型:${subjectStyle === "transparent_human" ? "透明/半透明皮肤包裹可见白色骨架" : "普通商业广告真人"}`,
`图像生成:${subjectImageModelChain(models)} 逐张生成正、背、左、右、左前 45、右前 45`,
"身份锁定:六张必须是同一个主体,性别表现、年龄段、体型、材质和风格保持一致",
`图像生成:${subjectImageModelChain(models)} 逐张生成 10 张高清图,包含全身多视角和肩颈/后背特写`,
"身份锁定:整套图必须是同一个主体,性别表现、年龄段、体型、材质和风格保持一致",
],
note: "这是生成类似主体,不是复制、抠出或复刻源视频人物身份。",
note: "这是生成类似但创新的主体,不是复制、抠出或复刻源视频人物身份;内置形象也只作为方向参考。",
}
}
@@ -945,7 +982,7 @@ export function AdRecreationBoard({
workingJob = await generateSubjectAssets(job.id, frame.index, element.id, {
subject_kind: guessSubjectKind(name),
background: "white",
size: "1024",
size: SUBJECT_ASSET_SIZE,
source_frame_indices: framesForSegments.slice(0, 6).map((item) => item.index),
})
data.onJobUpdate(workingJob)
@@ -967,13 +1004,13 @@ export function AdRecreationBoard({
const updated = await generateSubjectAssets(job.id, frame.index, element.id, {
subject_kind: guessSubjectKind(element.name_zh || element.name_en || "主体"),
background: "white",
size: "1024",
size: SUBJECT_ASSET_SIZE,
source_frame_indices: framesForSegments.slice(0, 6).map((item) => item.index),
})
data.onJobUpdate(updated)
toast.success(`6 视图已生成:${element.name_zh || element.name_en}`)
toast.success(`高清视图已生成:${element.name_zh || element.name_en}`)
} catch (e) {
toast.error("6 视图生成失败:" + (e instanceof Error ? e.message : String(e)))
toast.error("高清视图生成失败:" + (e instanceof Error ? e.message : String(e)))
} finally {
setSixViewBusyKey(null)
}
@@ -1493,6 +1530,8 @@ function SourceReferenceBuildPanel({
const [subjectAssetPreview, setSubjectAssetPreview] = useState<{ id: string; left: number; top: number } | null>(null)
const [subjectStyle, setSubjectStyle] = useState<SubjectStyleMode>("transparent_human")
const [subjectDirection, setSubjectDirection] = useState("")
const [characterLibrary, setCharacterLibrary] = useState<CharacterLibraryItem[]>([])
const [selectedCharacterId, setSelectedCharacterId] = useState("")
const frames = useMemo(() => [...job.frames].sort((a, b) => a.timestamp - b.timestamp), [job.frames])
const selectedReferenceFrames = useMemo(
() => frames.filter((frame) => selectedFrames.has(frame.index)),
@@ -1506,6 +1545,10 @@ function SourceReferenceBuildPanel({
return findSimilarActorSource(subjectReferenceFrames, frames)
}, [frames, subjectReferenceFrames])
const actorAssets = actorSource?.element.subject_assets ?? []
const selectedCharacter = useMemo(
() => characterLibrary.find((character) => character.id === selectedCharacterId) ?? null,
[characterLibrary, selectedCharacterId],
)
const visibleActorAssets = useMemo(() => {
const latestByView = new Map<string, SubjectAsset>()
for (const asset of actorAssets) {
@@ -1528,6 +1571,18 @@ function SourceReferenceBuildPanel({
? `默认使用全部 ${frames.length}`
: "待抽帧"
useEffect(() => {
let cancelled = false
listCharacterLibrary()
.then((items) => {
if (!cancelled) setCharacterLibrary(items)
})
.catch((e) => {
if (!cancelled) toast.error("内置形象读取失败:" + (e instanceof Error ? e.message : String(e)))
})
return () => { cancelled = true }
}, [])
const extractKeyframes = async () => {
setExtracting(true)
try {
@@ -1558,8 +1613,12 @@ function SourceReferenceBuildPanel({
let element = workingFrame.elements?.find(isSimilarActorElement)
if (!element) {
workingJob = await addElement(job.id, baseFrame.index, {
name_zh: subjectStyle === "transparent_human" ? "相似透明骨架主体" : "相似广告主角",
name_en: subjectStyle === "transparent_human" ? "similar transparent skeleton humanoid subject" : "similar ad actor",
name_zh: selectedCharacter
? `相似透明骨架主体 · ${selectedCharacter.name}`
: subjectStyle === "transparent_human" ? "相似透明骨架主体" : "相似广告主角",
name_en: selectedCharacter
? `similar innovative transparent skeleton humanoid subject based on ${selectedCharacter.name}`
: subjectStyle === "transparent_human" ? "similar transparent skeleton humanoid subject" : "similar ad actor",
position: "source-video main subject selected from global keyframes",
source: "manual",
})
@@ -1575,14 +1634,15 @@ function SourceReferenceBuildPanel({
subject_style: subjectStyle,
reconstruction_mode: "similar",
background: "white",
size: "1024",
size: SUBJECT_ASSET_SIZE,
source_frame_indices: subjectReferenceFrames.slice(0, 12).map((frame) => frame.index),
views: ["front", "back", "left", "right", "three_quarter_left", "three_quarter_right"],
prompt: buildSimilarSubjectPrompt(subjectStyle, subjectDirection),
views: SUBJECT_ASSET_VIEWS.map((view) => view.value),
character_id: selectedCharacterId,
prompt: buildSimilarSubjectPrompt(subjectStyle, subjectDirection, selectedCharacter),
replace_views: true,
})
onJobUpdate(updated)
toast.success("相似主体 6 张白底图已生成")
toast.success("相似主体 10 张高清白底图已生成")
} catch (e) {
toast.error("相似主体重构失败:" + (e instanceof Error ? e.message : String(e)))
} finally {
@@ -1612,10 +1672,11 @@ function SourceReferenceBuildPanel({
subject_style: subjectStyle,
reconstruction_mode: "similar",
background: asset.background || "white",
size: asset.size || "1024",
size: SUBJECT_ASSET_SIZE,
source_frame_indices: sourceIndices,
views: [asset.view],
prompt: buildSimilarSubjectPrompt(subjectStyle, subjectDirection),
character_id: selectedCharacterId,
prompt: buildSimilarSubjectPrompt(subjectStyle, subjectDirection, selectedCharacter),
replace_views: true,
})
onJobUpdate(updated)
@@ -1800,6 +1861,63 @@ function SourceReferenceBuildPanel({
<span></span>
<ModelTrace trace={similarSubjectModelTrace(runtimeModels, subjectStyle)} compact />
</div>
<span className="text-[10px] text-white/32"></span>
</div>
<div className="mb-2">
<div className="mb-1 flex items-center justify-between gap-2">
<span className="text-[10px] text-white/38"></span>
<span className="text-[9px] text-white/28">{selectedCharacter ? `${selectedCharacter.name} · ${selectedCharacter.images.length} 张参考` : "源视频主角相似创新"}</span>
</div>
<div className="grid grid-cols-[repeat(auto-fill,minmax(86px,1fr))] gap-1.5">
<button
type="button"
onClick={() => setSelectedCharacterId("")}
className={`min-h-[58px] rounded-md border px-2 py-1.5 text-left transition ${
!selectedCharacterId ? "border-cyan-200/55 bg-cyan-300/12 text-cyan-50" : "border-white/10 bg-black/25 text-white/45 hover:border-white/22 hover:text-white/70"
}`}
>
<span className="block text-[10.5px] font-semibold"></span>
<span className="mt-1 block text-[9px] leading-tight opacity-70"></span>
</button>
{characterLibrary.map((character) => {
const preview = characterPreviewImage(character)
const active = selectedCharacterId === character.id
return (
<button
key={character.id}
type="button"
onClick={() => {
setSelectedCharacterId(character.id)
setSubjectStyle("transparent_human")
}}
className={`group flex min-h-[58px] items-center gap-1.5 rounded-md border px-1.5 py-1 text-left transition ${
active ? "border-emerald-200/65 bg-emerald-300/12 text-emerald-50" : "border-white/10 bg-black/25 text-white/50 hover:border-emerald-200/35 hover:text-white/80"
}`}
>
<span className="h-12 w-9 shrink-0 overflow-hidden rounded border border-white/10 bg-white">
{preview ? <img src={characterLibraryImageUrl(preview.filename)} alt={character.name} className="h-full w-full object-cover" /> : null}
</span>
<span className="min-w-0">
<span className="block truncate text-[10px] font-semibold">{character.name}</span>
<span className="mt-0.5 block text-[8.5px] opacity-58">7 </span>
</span>
</button>
)
})}
</div>
{selectedCharacter?.images?.length ? (
<div className="mt-1.5 flex gap-1 overflow-x-auto pb-0.5">
{selectedCharacter.images.slice(0, 7).map((image) => (
<div key={image.id} className="h-12 w-9 shrink-0 overflow-hidden rounded border border-white/10 bg-white" title={image.label}>
<img src={characterLibraryImageUrl(image.filename)} alt={image.label} className="h-full w-full object-cover" />
</div>
))}
</div>
) : null}
</div>
<div className="mb-1.5 flex flex-wrap items-center justify-end gap-2 text-[10px] text-white/36">
<div className="flex min-w-0 flex-wrap items-center justify-end gap-2">
<div className="flex rounded-md border border-white/10 bg-black/28 p-0.5">
{[
@@ -1816,7 +1934,7 @@ function SourceReferenceBuildPanel({
>
{item.label}
</button>
))}
))}
</div>
<input
value={subjectDirection}
@@ -1824,7 +1942,7 @@ function SourceReferenceBuildPanel({
placeholder="统一方向:如年轻女性 / 更运动 / 更高级"
className="h-7 w-[240px] min-w-[180px] rounded-md border border-white/10 bg-black/35 px-2 text-[10.5px] text-white outline-none placeholder:text-white/28 focus:border-cyan-300/50"
/>
<span>{visibleActorAssets.length}/6</span>
<span>{visibleActorAssets.length}/{SUBJECT_ASSET_VIEWS.length}</span>
<button
type="button"
onClick={() => void generateSimilarActor()}
@@ -1832,7 +1950,7 @@ function SourceReferenceBuildPanel({
className="inline-flex h-7 items-center justify-center gap-1 rounded-md bg-white px-2 text-[10.5px] font-semibold text-black transition hover:bg-white/90 disabled:cursor-not-allowed disabled:opacity-40"
>
{subjectBusy ? <Loader2 className="h-3.5 w-3.5 animate-spin" /> : <Sparkles className="h-3.5 w-3.5" />}
6
10
</button>
</div>
</div>
@@ -1893,7 +2011,7 @@ function SourceReferenceBuildPanel({
</div>
) : (
<div className="rounded border border-dashed border-white/12 px-2 py-2 text-[10.5px] leading-snug text-white/32">
/
</div>
)}
</div>
@@ -3006,7 +3124,7 @@ function StoryboardSegmentCard({
onClick={() => onGenerateElement(candidate)}
disabled={busy}
className="rounded-md border border-white/10 bg-white/[0.04] px-1.5 py-1 text-[10px] text-white/55 transition hover:border-cyan-300/35 hover:text-cyan-100 disabled:cursor-not-allowed disabled:opacity-40"
title="选择该元素并生成提取图 + 6 视图"
title="选择该元素并生成提取图 + 高清视图"
>
{candidate.name}
</button>
@@ -3027,7 +3145,7 @@ function StoryboardSegmentCard({
className="inline-flex h-7 shrink-0 items-center gap-1 rounded-md border border-white/10 px-2 text-[10px] text-white/55 transition hover:border-cyan-300/35 hover:text-cyan-100 disabled:cursor-not-allowed disabled:opacity-40"
>
{busySix ? <Loader2 className="h-3 w-3 animate-spin" /> : <ImageIcon className="h-3 w-3" />}
{element.subject_assets?.length ? `${element.subject_assets.length}视图` : "6视图"}
{element.subject_assets?.length ? `${element.subject_assets.length}视图` : "高清视图"}
</button>
</div>
)

View File

@@ -1075,6 +1075,7 @@ export async function generateSubjectAssets(
size?: AssetSize
source_frame_indices?: number[]
views?: string[]
character_id?: string
subject_style?: "transparent_human" | "source_actor"
reconstruction_mode?: "same" | "similar"
prompt?: string
@@ -1091,6 +1092,7 @@ export async function generateSubjectAssets(
size: body.size ?? "source",
source_frame_indices: body.source_frame_indices ?? null,
views: body.views ?? null,
character_id: body.character_id ?? "",
subject_style: body.subject_style ?? "transparent_human",
reconstruction_mode: body.reconstruction_mode ?? "same",
prompt: body.prompt ?? "",