fix: lock subject packs and upscale assets

2026-05-20 09:52:03 +08:00
parent 75666f151f
commit 2366662d33
3 changed files with 36 additions and 8 deletions
--- a/RULES.md
+++ b/RULES.md
@@ -11,7 +11,7 @@
 - 详见 `CLAUDE.md` 立项决策段 + `.memory/plan.md` 七步管线拆解
 - 风格：`04-Dark-Gallery-Ambient`（路径：`~/Projects/research/20260305-网页风格库/04-Dark-Gallery-Ambient.md`）
 - 第一冲刺：步骤 1-4（下载 / 拆轨 / 关键帧 / ASR+翻译）
- 当前产品方向（2026-05-20 再确认）：信息流广告快速复刻默认进入“三字段候选生成”工作流。主界面为“左侧素材输入列 + 右侧信息流复刻工作表”。用户粘贴 TK 链接或上传视频后点击“开始分析”，系统自动下载源视频；下载完成后并行启动两条路：音频文案路提取原音频文案/字幕，并分析讲话人、语速节奏、背景音乐/环境声/音效；视频视觉路自动抽取参考帧。源视频工作区右侧主体链路是“参考帧池 → 转换层 → 主体元素”：参考帧池竖向排列；转换层只保留真人重构、卡通重构、元素重构、自主描述四个入口，每个入口最多拖入 3 张参考帧，拖入只加入参考队列，不自动生成；用户放好参考和文字后点击生成，右侧主体元素区按每次生成的套图文件夹展示全新 6 视图主体，当前套图在最上层展开，其他套图顺位进入下方可滚动列表，同一重构方向允许保留多套。转换层可直接选择自动 / GPT / Gemini 生图模型，偏好只影响主体套图生成；提示词输入有本地记忆，会把上次常用词生成可点击小按键。主体重构默认继承参考图里的性别、人种/肤色、年龄体态和角色气质这些广义特征，但生成同一个全新主体；同一套 6 视图必须统一脸部设定、发型、体态、服装类型、配色、材质、剪裁和配饰，避免一套图里每张衣服不同。这四类都属于参考重构，不抠图、不复制原人、不复刻原画面。旧下方“相似主体 / 主体模板库”不再作为主路径。波形下方的画面胶片只是临时预览，点击只跳转原视频时间点，双击或拖进参考帧池才正式加入关键帧，已加入的胶片直接显示“已添加”。产品图上传后独立形成产品资产包，自动识别视角/结构/比例并补缺角度。分镜工作台按逐句时间轴默认只露“文案 / 场景一句话 / 人物+产品+动作”，产品素材池、批量控制、三字段、视频候选和高级区都必须可折叠；视频候选无内容时默认不占大面积，有候选时默认只显示迷你缩略条，展开后才显示 4-grid。单条默认生成 4 个视频候选，顶部支持整片批量生成候选；首尾帧、视觉规划、产品出现方式和旧 6 字段保留在“高级”抽屉与后端 quick-plan 自动展开中，不能再作为客户默认闸门。
+- 当前产品方向（2026-05-20 再确认）：信息流广告快速复刻默认进入“三字段候选生成”工作流。主界面为“左侧素材输入列 + 右侧信息流复刻工作表”。用户粘贴 TK 链接或上传视频后点击“开始分析”，系统自动下载源视频；下载完成后并行启动两条路：音频文案路提取原音频文案/字幕，并分析讲话人、语速节奏、背景音乐/环境声/音效；视频视觉路自动抽取参考帧。源视频工作区右侧主体链路是“参考帧池 → 转换层 → 主体元素”：参考帧池竖向排列；转换层只保留真人重构、卡通重构、元素重构、自主描述四个入口，每个入口最多拖入 3 张参考帧，拖入只加入参考队列，不自动生成；用户放好参考和文字后点击生成，右侧主体元素区按每次生成的套图文件夹展示全新 6 视图主体，当前套图在最上层展开，其他套图顺位进入下方可滚动列表，同一重构方向允许保留多套。转换层可直接选择自动 / GPT / Gemini 生图模型，偏好只影响主体套图生成；提示词输入有本地记忆，会把上次常用词生成可点击小按键。主体重构默认继承参考图里的性别、人种/肤色、年龄体态和角色气质这些广义特征，但生成同一个全新主体；后端会给每套 6 视图注入同一份 pack bible，锁定脸部设定、发型、体态、服装类型、配色、材质、剪裁和配饰，并在保存时裁白边后允许放大主体到画布高度约 88-94%，避免一套图里每张衣服不同或人物太小。这四类都属于参考重构，不抠图、不复制原人、不复刻原画面。旧下方“相似主体 / 主体模板库”不再作为主路径。波形下方的画面胶片只是临时预览，点击只跳转原视频时间点，双击或拖进参考帧池才正式加入关键帧，已加入的胶片直接显示“已添加”。产品图上传后独立形成产品资产包，自动识别视角/结构/比例并补缺角度。分镜工作台按逐句时间轴默认只露“文案 / 场景一句话 / 人物+产品+动作”，产品素材池、批量控制、三字段、视频候选和高级区都必须可折叠；视频候选无内容时默认不占大面积，有候选时默认只显示迷你缩略条，展开后才显示 4-grid。单条默认生成 4 个视频候选，顶部支持整片批量生成候选；首尾帧、视觉规划、产品出现方式和旧 6 字段保留在“高级”抽屉与后端 quick-plan 自动展开中，不能再作为客户默认闸门。

 ## 部署事实
 - 平台：VPS `76.13.31.179`（Ubuntu 24.04 / Docker Compose / Coolify Traefik）
--- a/api/main.py
+++ b/api/main.py
@@ -1995,8 +1995,8 @@ def _normalize_asset_image(
            bbox = mask.getbbox()
            if bbox:
                left, top, right, bottom = bbox
-                pad_x = round((right - left) * 0.06)
-                pad_y = round((bottom - top) * 0.06)
+                pad_x = round((right - left) * 0.04)
+                pad_y = round((bottom - top) * 0.03)
                img = img.crop((
                    max(0, left - pad_x),
                    max(0, top - pad_y),
@@ -2004,8 +2004,13 @@ def _normalize_asset_image(
                    min(img.height, bottom + pad_y),
                ))
            max_w = max(1, round(target_w * 0.92))
-            max_h = max(1, round(target_h * 0.94))
-            img.thumbnail((max_w, max_h), Image.Resampling.LANCZOS)
+            max_h = max(1, round(target_h * 0.96))
+            if img.width and img.height:
+                scale = min(max_w / img.width, max_h / img.height)
+                if scale > 0:
+                    next_size = (max(1, round(img.width * scale)), max(1, round(img.height * scale)))
+                    if next_size != img.size:
+                        img = img.resize(next_size, Image.Resampling.LANCZOS)
        else:
            img.thumbnail((target_w, target_h), Image.Resampling.LANCZOS)
        canvas = Image.new("RGB", (target_w, target_h), bg)
@@ -5817,6 +5822,16 @@ def _generate_subject_assets_sync(job_id: str, idx: int, element_id: str, req: G
        "Do not change clothing between views; do not switch from sportswear to casualwear, dress, coat, hoodie, uniform, or underwear unless the user explicitly requests that single outfit for the whole pack. "
        "If the reference outfit is useful, inherit its broad wardrobe category and color family, but redraw it as a new non-identical clean commercial outfit. "
    )
+    pack_bible_clause = (
+        "PACK BIBLE - this exact bible applies to every view in this generated set. "
+        "Subject bible: one newly designed commercial wellness-ad subject; inherit only broad non-identifying casting traits from the source such as gender presentation, regional/ethnic appearance category, skin-tone family, age range, body-proportion category, hair-length family, posture energy, and neck/shoulder readability. "
+        "Do not copy the source person's biometric identity, exact face, exact hairstyle, marks, tattoos, captions, logos, or watermarks. "
+        "Keep the same new face design, same head shape, same hair color and hair silhouette, same skin tone, same body proportions, same height impression, and same character age across front, side, three-quarter, and back views. "
+        "Wardrobe bible: if the user direction names a specific outfit, use that one outfit uniformly across every view. Otherwise use one clean SKG wellness-ad activewear outfit for the entire pack: fitted short-sleeve performance top with a visible neck/collarbone area, slim athletic pants, and low-profile sneakers. "
+        "Lock the exact top color, bottom color, shoe color, neckline shape, sleeve/strap structure, seams, trim, fabric finish, fit, and accessories before rendering the first view, then repeat those same clothing decisions in every other view. "
+        "Never add or remove a jacket, blazer, hoodie, coat, dress, skirt, scarf, hat, bag, jewelry, logo, stripe pattern, or extra layer in only one view. "
+        "Back and side views must show the same garment wrapping around the same body, not a redesigned outfit. "
+    )
    neck_product_clause = (
        "This subject pack is for SKG neck-and-shoulder wearable massage device videos. "
        "Make the neck, collarbone, shoulder line, upper back, side neck, and shoulder slope clear and product-ready. "
@@ -5855,7 +5870,7 @@ def _generate_subject_assets_sync(job_id: str, idx: int, element_id: str, req: G
                "For this close-up view, intentionally crop as an upper-body asset from head/neck to chest or upper back; the neck, shoulders, collarbone or upper spine area must be large, clear, and useful for placing a neck-and-shoulder massage device. "
                "Do not force full-body framing for close-ups. "
                if closeup_view and req.subject_kind == "living"
-                else "The subject must be complete, centered, full body or full object, head-to-feet visible when applicable, not cropped by the canvas. Make the subject large and readable: it should occupy about 85-95% of the image height with only small margins. "
+                else "The subject must be complete, centered, full body or full object, head-to-feet visible when applicable, not cropped by the canvas. Make the subject large and readable: it should occupy about 88-94% of the image height, with the head close to the top margin and feet close to the bottom margin. No tiny character, no miniature person, no distant full-body figure, no large empty white margins. "
            )
            reference_strategy_clause = (
                "Text-only generation mode: no source image is attached to this image request. Use only the written source/video/template briefs below as creative constraints. "
@@ -5874,6 +5889,7 @@ def _generate_subject_assets_sync(job_id: str, idx: int, element_id: str, req: G
                + identity_clause
                + identity_lock_clause
                + wardrobe_lock_clause
+                + pack_bible_clause
                + neck_product_clause
                + canvas_clause
                + prompt_extra_clause
--- a/docs/source-analysis.html
+++ b/docs/source-analysis.html