diff --git a/RULES.md b/RULES.md
index 08e63a5..a0ae43f 100644
--- a/RULES.md
+++ b/RULES.md
@@ -56,19 +56,19 @@
 - `ASR_TIMEOUT_SECONDS`：远端 ASR / 音频分析单次请求超时，默认 45 秒，避免第一步长时间停在转录中
 - `LOCAL_ASR_BIN` / `LOCAL_ASR_MODEL` / `LOCAL_ASR_TIMEOUT_SECONDS`：本机 ASR 兜底，默认使用 `/opt/homebrew/bin/mlx_whisper` + `mlx-community/whisper-tiny`，用于当前 SKG 网关 `/audio/transcriptions` 不可用时生成真实逐句时间轴
 - `TRANSLATE_MODEL`：字幕翻译模型，默认 `gemini-2.5-flash`
-- `REWRITE_MODEL`：通用改写/分镜描述模型，默认 `gemini-2.5-pro`
-- `AUDIO_REWRITE_MODEL`：后续音频口播改写模型，默认跟随 `REWRITE_MODEL`；当前第一步不默认调用口播改写，只保留原文案和声音分析
+- `GPT_TEXT_MODEL`：GPT 文本 / 视觉默认模型，默认 `gpt-4o`；用于兜底修正旧 Gemini 覆盖值
+- `REWRITE_MODEL`：通用改写/分镜描述模型，默认 `gpt-4o`；如果旧环境仍写 `gemini-*`，后端会自动改用 `GPT_TEXT_MODEL`
+- `VISION_MODEL`：关键帧画面理解模型，默认 `gpt-4o`；如果旧环境仍写 `gemini-*`，后端会自动改用 `GPT_TEXT_MODEL`
+- `AUDIO_REWRITE_MODEL`：后续音频口播改写模型，默认跟随 `REWRITE_MODEL`；如果旧环境仍写 `gemini-*`，后端会自动改用 `REWRITE_MODEL`
 - `AUDIO_PRODUCT_BRIEF`：音频口播改写时注入的 SKG 产品卖点
 - `PRODUCT_VIEW_MODEL`：同一产品素材池的视角标注/自动识别模型；当前按项目要求强制使用 `gpt-image-2`
 - `IMAGE_BASE_URL` / `IMAGE_API_KEY` / `IMAGE_MODEL`：OpenAI 兼容生图网关；当前所有生图入口一律强制使用 `gpt-image-2`，不做其他图片模型 fallback
 - `GPT_IMAGE_MODEL` / `SUBJECT_ASSET_IMAGE_MODEL` / `SUBJECT_ASSET_IMAGE_MODELS`：保留兼容旧环境变量名，但服务端会强制主体 6 视图和所有其他生图入口都只使用 `gpt-image-2`
 - `AI_HTTP_PROXY` / `IMAGE_HTTP_PROXY`：可选的 AI 网关出站代理；本地 launchd 后台进程不一定继承 shell 的 `http_proxy/https_proxy`，如生图报 DNS / ConnectError，可在本地 `api/.env` 配置后重启后端。`/health` 只回传是否配置代理，不回传代理地址。
-- `VOICE_PROVIDER`：配音通道，当前固定使用 `azure_openai`
+- `YTDLP_COOKIES_FILE` / `YTDLP_COOKIES_FROM_BROWSER`：可选 TikTok 下载登录态；优先使用 cookies 文件，其次读取本机浏览器 cookies。cookies 文件属于敏感登录态，只能放本机或服务器私有路径，不允许入库。
+- `VOICE_PROVIDER`：配音通道，服务端固定使用 `azure_openai`；旧环境若写 `minimax` 会被忽略
 - `AZURE_OPENAI_BASE_URL` / `AZURE_OPENAI_API_KEY`：微软 Azure OpenAI 协议配音网关；本地未单独配置 Key 时回退复用 `LLM_API_KEY`
-- `AZURE_TTS_MODEL` / `AZURE_TTS_VOICE_ID` / `AZURE_TTS_VOICE_POOL` / `AZURE_TTS_PATH`：Azure OpenAI TTS 模型、默认音色、音色池和 OpenAI 协议语音路径
-- `MINIMAX_API_KEY`：MiniMax T2A 配音 Key，只能放本地 `api/.env`，不能入库；当前第一步暂不默认调用
-- `MINIMAX_TTS_BASE_URL` / `MINIMAX_TTS_MODEL` / `MINIMAX_TTS_VOICE_ID`：MiniMax 旧配音端点、模型和兜底音色配置，仅作为保留兼容；当前不作为默认语音通道
-- `MINIMAX_TTS_VOICE_POOL`：MiniMax 英文随机音色池；当前默认男声 `English_magnetic_voiced_man`、女声 `English_Upbeat_Woman`、成熟声 `English_MaturePartner`，供后续新配音阶段使用
+- `AZURE_TTS_MODEL` / `AZURE_TTS_VOICE_ID` / `AZURE_TTS_VOICE_POOL` / `AZURE_TTS_PATH` / `AZURE_TTS_PATHS`：Azure OpenAI TTS 模型、默认音色、音色池和 OpenAI 协议语音路径；后端会按 `AZURE_TTS_PATHS` 依次尝试，便于区分路径不对和整条语音服务不可用
 - `POE_API_KEY` / `VIDEO_API_KEY`：视频生成通道 Key，只能放本地环境变量
 - `WEB_AUTH_USERNAME` / `WEB_AUTH_PASSWORD` / `WEB_AUTH_SESSION_SECRET`：生产网页登录和会话签名配置；密码和 session secret 只放服务器环境变量，不入库
 - `FFMPEG_BIN` / `FFPROBE_BIN`：可选本地媒体二进制路径；本机 Homebrew ffmpeg 动态库损坏时，后端会自动跳过不可用的 PATH 版本并尝试本机静态 ffmpeg 备选，生产仍建议使用系统 ffmpeg/ffprobe
diff --git a/api/.env.example b/api/.env.example
index d8a0b43..2cd984d 100644
--- a/api/.env.example
+++ b/api/.env.example
@@ -17,7 +17,9 @@ LOCAL_ASR_BIN=/opt/homebrew/bin/mlx_whisper
 LOCAL_ASR_MODEL=mlx-community/whisper-tiny
 LOCAL_ASR_TIMEOUT_SECONDS=180
 TRANSLATE_MODEL=gemini-2.5-flash
-REWRITE_MODEL=gemini-2.5-pro
+GPT_TEXT_MODEL=gpt-4o
+REWRITE_MODEL=gpt-4o
+VISION_MODEL=gpt-4o
 PRODUCT_VIEW_MODEL=gpt-image-2
 IMAGE_BASE_URL=https://ai.skg.com/ezlink/v1
 IMAGE_API_KEY=
@@ -27,6 +29,8 @@ SUBJECT_ASSET_IMAGE_MODEL=gpt-image-2
 SUBJECT_ASSET_IMAGE_MODELS=gpt-image-2
 # 可选：本地网络需要代理访问 ai.skg.com 时配置；launchd 不一定继承 shell 代理变量。
 AI_HTTP_PROXY=
+YTDLP_COOKIES_FILE=
+YTDLP_COOKIES_FROM_BROWSER=
 VIDEO_MODEL=seedance
 VIDEO_MODEL_SEEDANCE=seedance-2-fast
 VIDEO_MODEL_KLING=kling-omni
@@ -35,6 +39,7 @@ VIDEO_MODEL_VEO3=veo-3.1-fast
 # 音频文案改写 + Azure OpenAI 配音
 AUDIO_REWRITE_MODEL=gemini-2.5-pro
 AUDIO_PRODUCT_BRIEF="SKG 智能按摩产品，主打日常肩颈、腰背、眼部、膝盖或足部放松；广告表达要高级、干净、可信，不做医疗疗效承诺。"
+# 语音通道服务端固定为 Azure OpenAI。
 VOICE_PROVIDER=azure_openai
 AZURE_OPENAI_BASE_URL=https://ai.skg.com/azure
 AZURE_OPENAI_API_KEY=
@@ -42,13 +47,7 @@ AZURE_TTS_MODEL=gpt-4o-mini-tts
 AZURE_TTS_VOICE_ID=alloy
 AZURE_TTS_VOICE_POOL=alloy,verse,shimmer
 AZURE_TTS_PATH=/audio/speech
-
-# MiniMax 旧配音通道，保留兼容；默认不走
-MINIMAX_API_KEY=
-MINIMAX_TTS_BASE_URL=https://api.minimax.io
-MINIMAX_TTS_MODEL=speech-2.8-turbo
-MINIMAX_TTS_VOICE_ID=English_expressive_narrator
-MINIMAX_TTS_VOICE_POOL=English_magnetic_voiced_man,English_Upbeat_Woman,English_MaturePartner
+AZURE_TTS_PATHS=/audio/speech,/v1/audio/speech
 
 # Poe 视频 API（优先用于 Seedance / Kling / Veo）
 POE_API_BASE_URL=https://api.poe.com/v1
diff --git a/api/README.md b/api/README.md
index 3390690..6c21794 100644
--- a/api/README.md
+++ b/api/README.md
@@ -1,6 +1,6 @@
 # SKG TK 二创 API
 
-FastAPI 后端，跑 yt-dlp + ffmpeg + ASR/翻译/英文 SKG 产品介绍文案 + MiniMax 英文配音管线。
+FastAPI 后端，跑 yt-dlp + ffmpeg + ASR/翻译/英文 SKG 产品介绍文案 + Azure OpenAI 英文配音管线。
 
 ## 启动
 
@@ -9,7 +9,7 @@ cd api
 python3 -m venv .venv
 source .venv/bin/activate
 pip install -r requirements.txt
-cp .env.example .env  # 按需填 LLM_API_KEY / MINIMAX_API_KEY
+cp .env.example .env  # 按需填 LLM_API_KEY / AZURE_OPENAI_API_KEY
 uvicorn main:app --host 127.0.0.1 --port 4291
 ```
 
@@ -20,19 +20,19 @@ uvicorn main:app --host 127.0.0.1 --port 4291
 - `GET  /health` — 健康检查 + 配置状态
 - `POST /jobs` `{url}` — 创建 job，后台下载源视频，视频就绪后可手动解析或提取音频
 - `GET  /jobs/{id}` — 当前状态 + 产物；若原始音轨已拆出，会返回 `source_audio_url`
-- `POST /jobs/{id}/transcribe` — 触发音频提取 + ASR + 翻译 + SKG 英文产品介绍文案；文案长度按原音频时长估算，配置 MiniMax 后从英文随机音色池生成配音。前端 Audio 节点提供“提取音频 / 重新提取音频”按钮，可与抽帧并行，不自动触发
+- `POST /jobs/{id}/transcribe` — 触发音频提取 + ASR + 翻译 + SKG 英文产品介绍文案；文案长度按原音频时长估算，配置 Azure OpenAI TTS 后从 Azure 音色池生成配音。前端 Audio 节点提供“提取音频 / 重新提取音频”按钮，可与抽帧并行，不自动触发
 - `GET  /jobs/{id}/video.mp4` — 原视频
 - `GET  /jobs/{id}/audio.wav` — 拆轨后的原始音频，供前端底部音频条生成波形
-- `GET  /jobs/{id}/audio-script.mp3` — 英文改写文案的 MiniMax 配音
+- `GET  /jobs/{id}/audio-script.mp3` — 英文改写文案的 Azure OpenAI TTS 配音
 - `GET  /jobs/{id}/frames/{i}.jpg` — 第 i 张关键帧（0-9）
 
 ## Mock 模式
 
-未设 `LLM_API_KEY` 时，转录走本地 mock，便于 UI 联调；未设 `MINIMAX_API_KEY` 时只生成改写文案，不生成配音文件。
+未设 `LLM_API_KEY` 时，转录走本地 mock，便于 UI 联调；未设 `AZURE_OPENAI_API_KEY` 且无法复用 `LLM_API_KEY` 时只生成改写文案，不生成配音文件。
 
 ## 依赖
 
 - `ffmpeg` 系统二进制（拆轨 / 抽帧）
 - `yt-dlp` 系统二进制（也可走 Python 包）
 - OpenAI 兼容 LLM 网关（ASR / 翻译 / 文案改写）；如果 `/audio/transcriptions` 不可用，会用 `ASR_FALLBACK_MODEL` 走 Gemini 多模态音频识别
-- MiniMax T2A HTTP（英文产品介绍文案配音，使用 `MINIMAX_API_KEY`；默认随机音色池 `English_magnetic_voiced_man,English_Upbeat_Woman,English_MaturePartner`）
+- Azure OpenAI TTS（英文产品介绍文案配音，使用 `AZURE_OPENAI_API_KEY` 或回退复用 `LLM_API_KEY`；默认音色池 `alloy,verse,shimmer`）
diff --git a/api/character_library/skg-characters/manifest.json b/api/character_library/skg-characters/manifest.json
index 1905b50..4a65ad5 100644
--- a/api/character_library/skg-characters/manifest.json
+++ b/api/character_library/skg-characters/manifest.json
@@ -8,6 +8,7 @@
       "name": "运动阳光男",
       "folder": "01_运动阳光男",
       "description": "运动阳光男透明骨架人角色，含正面、左右45度、侧面、背面、半身近景和背部特写参考。",
+      "prompt_brief": "Athletic sunny male transparent wellness character, young adult energy, lean fit proportions, open and upbeat posture, clean translucent skin shell with visible white skeleton. The character should feel friendly, active, outdoor-sport inspired, bright, healthy, and suitable for premium SKG neck-and-shoulder wearable device ads. Keep neck, collarbone, shoulders, upper back, and cervical spine readable without bulky clothing or props.",
       "primary_image": "character-01-front",
       "images": [
         {
@@ -80,6 +81,7 @@
       "name": "都市型男",
       "folder": "02_都市型男",
       "description": "都市型男透明骨架人角色，含正面、左右45度、侧面、背面、半身近景和背部特写参考。",
+      "prompt_brief": "Urban stylish male transparent wellness character, adult metropolitan feel, clean confident posture, refined proportions, translucent body shell with visible white skeleton. The commercial mood is premium city lifestyle, composed, sharp, and modern, suitable for office or commute-oriented SKG neck-and-shoulder massage ads. Keep shoulder line, side neck, collarbone, and upper back clear for wearable device placement.",
       "primary_image": "character-02-front",
       "images": [
         {
@@ -152,6 +154,7 @@
       "name": "优雅白领女",
       "folder": "03_优雅白领女",
       "description": "优雅白领女透明骨架人角色，含正面、左右45度、侧面、背面、半身近景和背部特写参考。",
+      "prompt_brief": "Elegant professional female transparent wellness character, young adult to adult office-worker mood, slim balanced proportions, calm poised posture, translucent outer body with a clean visible white skeleton. The style should feel premium, gentle, trustworthy, and workplace-friendly for SKG neck-and-shoulder wearable device ads. Keep hair, collars, and accessories from hiding the neck, shoulders, collarbone, upper back, and cervical spine.",
       "primary_image": "character-03-front",
       "images": [
         {
@@ -224,6 +227,7 @@
       "name": "运动辣妹",
       "folder": "04_运动辣妹",
       "description": "运动辣妹透明骨架人角色，含正面、左右45度、侧面、背面、半身近景和背部特写参考。",
+      "prompt_brief": "Sporty confident female transparent wellness character, energetic young adult fitness mood, toned proportions, expressive posture, translucent skin shell with visible white skeleton. The character should feel active, fashionable, bright, and creator-ad friendly while remaining premium and non-horror. Keep the neck, side neck, shoulders, collarbone, upper trapezius, and upper back open and readable for SKG wearable massage device scenes.",
       "primary_image": "character-04-front",
       "images": [
         {
@@ -296,6 +300,7 @@
       "name": "绅士大叔",
       "folder": "05_绅士大叔",
       "description": "绅士大叔透明骨架人角色，含正面、左右45度、侧面、背面、半身近景和背部特写参考。",
+      "prompt_brief": "Mature gentleman transparent wellness character, adult to middle-aged presence without exact age, steady confident posture, slightly stronger build, translucent body shell with a clean visible white skeleton. The commercial mood is calm, trustworthy, premium, and lifestyle-oriented for SKG neck-and-shoulder wearable device ads. Keep collars and styling minimal so the neck, shoulders, upper back, cervical spine, and shoulder blades remain visible.",
       "primary_image": "character-05-front",
       "images": [
         {
@@ -364,4 +369,4 @@
       ]
     }
   ]
-}
\ No newline at end of file
+}
diff --git a/api/main.py b/api/main.py
index 54409f4..41a08f8 100644
--- a/api/main.py
+++ b/api/main.py
@@ -52,8 +52,18 @@ LOCAL_ASR_BIN = os.getenv("LOCAL_ASR_BIN", "").strip()
 LOCAL_ASR_MODEL = os.getenv("LOCAL_ASR_MODEL", "mlx-community/whisper-tiny").strip() or "mlx-community/whisper-tiny"
 LOCAL_ASR_TIMEOUT_SECONDS = max(30, int(os.getenv("LOCAL_ASR_TIMEOUT_SECONDS", "180")))
 TRANSLATE_MODEL = os.getenv("TRANSLATE_MODEL", "gemini-2.5-flash")
-REWRITE_MODEL = os.getenv("REWRITE_MODEL", "gemini-2.5-pro")
-VISION_MODEL = os.getenv("VISION_MODEL", "gemini-2.5-flash")
+DEFAULT_GPT_TEXT_MODEL = os.getenv("GPT_TEXT_MODEL", "gpt-4o").strip() or "gpt-4o"
+
+
+def gpt_model_env(name: str, default: str | None = None) -> str:
+    value = os.getenv(name, default or DEFAULT_GPT_TEXT_MODEL).strip()
+    if not value or value.lower().startswith("gemini-"):
+        return default or DEFAULT_GPT_TEXT_MODEL
+    return value
+
+
+REWRITE_MODEL = gpt_model_env("REWRITE_MODEL")
+VISION_MODEL = gpt_model_env("VISION_MODEL")
 IMAGE_BASE_URL = os.getenv("IMAGE_BASE_URL", LLM_BASE_URL).strip()
 IMAGE_API_KEY = os.getenv("IMAGE_API_KEY", LLM_API_KEY).strip()
 AI_HTTP_PROXY = (
@@ -77,29 +87,14 @@ PRODUCT_ASSET_MIN_LONG_SIDE = max(512, int(os.getenv("PRODUCT_ASSET_MIN_LONG_SID
 PRODUCT_ASSET_MIN_SHORT_SIDE = max(320, int(os.getenv("PRODUCT_ASSET_MIN_SHORT_SIDE", "600")))
 PRODUCT_ASSET_JPEG_QUALITY = max(80, min(95, int(os.getenv("PRODUCT_ASSET_JPEG_QUALITY", "92"))))
 VIDEO_MODEL = os.getenv("VIDEO_MODEL", "seedance").strip() or "seedance"
+YTDLP_COOKIES_FILE = os.getenv("YTDLP_COOKIES_FILE", "").strip()
+YTDLP_COOKIES_FROM_BROWSER = os.getenv("YTDLP_COOKIES_FROM_BROWSER", "").strip()
 AUDIO_PRODUCT_BRIEF = os.getenv(
     "AUDIO_PRODUCT_BRIEF",
     "SKG 智能按摩产品，主打日常肩颈、腰背、眼部、膝盖或足部放松；广告表达要高级、干净、可信，不做医疗疗效承诺。",
 ).strip()
-AUDIO_REWRITE_MODEL = os.getenv("AUDIO_REWRITE_MODEL", REWRITE_MODEL).strip() or REWRITE_MODEL
-MINIMAX_API_KEY = os.getenv("MINIMAX_API_KEY", "").strip()
-MINIMAX_TTS_BASE_URL = os.getenv("MINIMAX_TTS_BASE_URL", "https://api.minimax.io").strip().rstrip("/")
-MINIMAX_TTS_MODEL = os.getenv("MINIMAX_TTS_MODEL", "speech-2.8-turbo").strip() or "speech-2.8-turbo"
-MINIMAX_TTS_VOICE_ID = os.getenv(
-    "MINIMAX_TTS_VOICE_ID",
-    "English_expressive_narrator",
-).strip() or "English_expressive_narrator"
-DEFAULT_MINIMAX_TTS_VOICE_POOL = [
-    "English_magnetic_voiced_man",
-    "English_Upbeat_Woman",
-    "English_MaturePartner",
-]
-MINIMAX_TTS_VOICE_POOL = [
-    v.strip()
-    for v in os.getenv("MINIMAX_TTS_VOICE_POOL", ",".join(DEFAULT_MINIMAX_TTS_VOICE_POOL)).split(",")
-    if v.strip()
-]
-VOICE_PROVIDER = os.getenv("VOICE_PROVIDER", "azure_openai").strip().lower() or "azure_openai"
+AUDIO_REWRITE_MODEL = gpt_model_env("AUDIO_REWRITE_MODEL", REWRITE_MODEL)
+VOICE_PROVIDER = "azure_openai"
 AZURE_OPENAI_BASE_URL = os.getenv("AZURE_OPENAI_BASE_URL", "https://ai.skg.com/azure").strip().rstrip("/")
 AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY", LLM_API_KEY).strip()
 AZURE_TTS_MODEL = os.getenv("AZURE_TTS_MODEL", "gpt-4o-mini-tts").strip() or "gpt-4o-mini-tts"
@@ -111,6 +106,11 @@ AZURE_TTS_VOICE_POOL = [
     if v.strip()
 ]
 AZURE_TTS_PATH = os.getenv("AZURE_TTS_PATH", "/audio/speech").strip() or "/audio/speech"
+AZURE_TTS_PATHS = [
+    p.strip()
+    for p in os.getenv("AZURE_TTS_PATHS", f"{AZURE_TTS_PATH},/audio/speech,/v1/audio/speech").split(",")
+    if p.strip()
+]
 
 POE_API_BASE_URL = os.getenv("POE_API_BASE_URL", "https://api.poe.com/v1").strip() or "https://api.poe.com/v1"
 POE_API_KEY = os.getenv("POE_API_KEY", "").strip()
@@ -452,6 +452,7 @@ class CharacterLibraryItem(BaseModel):
     name: str
     folder: str = ""
     description: str = ""
+    prompt_brief: str = ""
     primary_image: str = ""
     images: list[CharacterLibraryImage] = Field(default_factory=list)
 
@@ -477,6 +478,7 @@ class SubjectTemplateItem(BaseModel):
     name: str
     description: str = ""
     note: str = ""
+    prompt_brief: str = ""
     source: Literal["database"] = "database"
     source_job_id: str = ""
     source_frame_idx: int = -1
@@ -1075,6 +1077,35 @@ def run(cmd: list[str], cwd: Path | None = None) -> str:
     return res.stdout
 
 
+def ytdlp_cookie_args() -> list[str]:
+    if YTDLP_COOKIES_FILE:
+        cookies = Path(YTDLP_COOKIES_FILE).expanduser()
+        if not cookies.exists():
+            raise RuntimeError("TikTok cookies 文件不可用，请检查 YTDLP_COOKIES_FILE 配置。")
+        return ["--cookies", str(cookies)]
+    if YTDLP_COOKIES_FROM_BROWSER:
+        return ["--cookies-from-browser", YTDLP_COOKIES_FROM_BROWSER]
+    return []
+
+
+def normalize_download_error(error: Exception) -> str:
+    raw = str(error)
+    lower = raw.lower()
+    auth_required = (
+        "log in for access" in lower
+        or "login" in lower and "cookies" in lower
+        or "cookies-from-browser" in lower
+        or "sign in" in lower and "tiktok" in lower
+    )
+    if auth_required:
+        return (
+            "TikTok 下载需要登录态。请上传视频文件，或在后端配置 "
+            "YTDLP_COOKIES_FILE / YTDLP_COOKIES_FROM_BROWSER 后重试。"
+            f"原始错误：{raw}"
+        )
+    return raw
+
+
 # ---- 启发式选帧工具 ----
 import imagehash
 import numpy as np
@@ -1728,13 +1759,15 @@ def pipeline_download(job_id: str) -> None:
             update(job, status="downloading", message="本地上传 · 跳过下载", progress=15)
         else:
             update(job, status="downloading", message="yt-dlp 下载中…", progress=5)
-            run([
+            cmd = [
                 "yt-dlp", "-f", "best[ext=mp4]/best",
                 "-o", str(mp4),
                 "--no-warnings", "--no-playlist",
                 "--retries", "3",
+                *ytdlp_cookie_args(),
                 job.url,
-            ])
+            ]
+            run(cmd)
             if not mp4.exists():
                 raise RuntimeError("下载完成但找不到 source.mp4")
 
@@ -1757,7 +1790,7 @@ def pipeline_download(job_id: str) -> None:
         )
     except Exception as e:
         message = "视频元数据解析失败" if stage == "metadata" else "下载失败"
-        update(job, status="failed", error=str(e), message=message)
+        update(job, status="failed", error=normalize_download_error(e), message=message)
 
 
 def pipeline_analyze(
@@ -1929,7 +1962,7 @@ def analyze_queue_worker() -> None:
         ANALYZE_WORKER_RUNNING = False
 
 
-# ---------- 音频转写 + 翻译 + SKG 改写 + MiniMax 配音 ----------
+# ---------- 音频转写 + 翻译 + SKG 改写 + Azure OpenAI 配音 ----------
 
 class TranscriptionUnavailable(RuntimeError):
     pass
@@ -2385,18 +2418,6 @@ def _rewrite_audio_script_sync(segments: list[TranscriptSegment], target_seconds
         return fallback, f"改写失败，使用本地模板：{e}"
 
 
-def _minimax_tts_url() -> str:
-    if MINIMAX_TTS_BASE_URL.endswith("/v1/t2a_v2"):
-        return MINIMAX_TTS_BASE_URL
-    return f"{MINIMAX_TTS_BASE_URL}/v1/t2a_v2"
-
-
-def _choose_minimax_voice_id() -> str:
-    if MINIMAX_TTS_VOICE_POOL:
-        return random.choice(MINIMAX_TTS_VOICE_POOL)
-    return MINIMAX_TTS_VOICE_ID
-
-
 def _choose_azure_voice_id() -> str:
     if AZURE_TTS_VOICE_POOL:
         return random.choice(AZURE_TTS_VOICE_POOL)
@@ -2404,9 +2425,7 @@ def _choose_azure_voice_id() -> str:
 
 
 def _choose_tts_voice_id() -> str:
-    if VOICE_PROVIDER == "azure_openai":
-        return _choose_azure_voice_id()
-    return _choose_minimax_voice_id()
+    return _choose_azure_voice_id()
 
 
 def _voice_speed_for(voice_id: str, target_seconds: float, text: str) -> float:
@@ -2423,60 +2442,22 @@ def _voice_speed_for(voice_id: str, target_seconds: float, text: str) -> float:
     return 0.99
 
 
-def _minimax_tts_sync(job_id: str, text: str, voice_id: str, target_seconds: float = 12.0) -> str:
-    if not MINIMAX_API_KEY:
-        raise RuntimeError("MINIMAX_API_KEY 未配置，未生成配音")
-    if not text.strip():
-        raise RuntimeError("改写文案为空，未生成配音")
-    payload = {
-        "model": MINIMAX_TTS_MODEL,
-        "text": text.strip()[:9500],
-        "stream": False,
-        "language_boost": "English",
-        "output_format": "hex",
-        "voice_setting": {
-            "voice_id": voice_id,
-            "speed": _voice_speed_for(voice_id, target_seconds, text),
-            "vol": 1,
-            "pitch": 0,
-        },
-        "audio_setting": {
-            "sample_rate": 32000,
-            "bitrate": 128000,
-            "format": "mp3",
-            "channel": 1,
-        },
-    }
-    resp = httpx.post(
-        _minimax_tts_url(),
-        headers={"Authorization": f"Bearer {MINIMAX_API_KEY}", "Content-Type": "application/json"},
-        json=payload,
-        timeout=90,
-    )
-    resp.raise_for_status()
-    data = resp.json()
-    base_resp = data.get("base_resp") or {}
-    if int(base_resp.get("status_code", 0) or 0) != 0:
-        raise RuntimeError(base_resp.get("status_msg") or "MiniMax TTS 返回失败")
-    audio_hex = ((data.get("data") or {}).get("audio") or "").strip()
-    if not audio_hex:
-        raise RuntimeError("MiniMax TTS 未返回 audio hex")
-    try:
-        audio_bytes = bytes.fromhex(audio_hex)
-    except ValueError as e:
-        raise RuntimeError(f"MiniMax TTS audio hex 无法解析：{e}") from e
-    out = job_dir(job_id) / "audio_script.mp3"
-    out.write_bytes(audio_bytes)
-    return f"/jobs/{job_id}/audio-script.mp3"
-
-
-def _azure_tts_url() -> str:
-    path = AZURE_TTS_PATH if AZURE_TTS_PATH.startswith("/") else f"/{AZURE_TTS_PATH}"
+def _azure_tts_url_for(path_value: str) -> str:
+    path = path_value if path_value.startswith("/") else f"/{path_value}"
     if AZURE_OPENAI_BASE_URL.endswith(path):
         return AZURE_OPENAI_BASE_URL
     return f"{AZURE_OPENAI_BASE_URL}{path}"
 
 
+def _azure_tts_urls() -> list[str]:
+    urls: list[str] = []
+    for path in AZURE_TTS_PATHS or [AZURE_TTS_PATH]:
+        url = _azure_tts_url_for(path)
+        if url not in urls:
+            urls.append(url)
+    return urls
+
+
 def _azure_openai_tts_sync(job_id: str, text: str, voice_id: str, target_seconds: float = 12.0) -> str:
     if not AZURE_OPENAI_API_KEY:
         raise RuntimeError("AZURE_OPENAI_API_KEY 或 LLM_API_KEY 未配置，未生成配音")
@@ -2489,18 +2470,32 @@ def _azure_openai_tts_sync(job_id: str, text: str, voice_id: str, target_seconds
         "response_format": "mp3",
         "speed": _voice_speed_for(voice_id, target_seconds, text),
     }
-    resp = httpx.post(
-        _azure_tts_url(),
-        headers={
-            "Authorization": f"Bearer {AZURE_OPENAI_API_KEY}",
-            "api-key": AZURE_OPENAI_API_KEY,
-            "Content-Type": "application/json",
-        },
-        json=payload,
-        timeout=120,
-    )
+    headers = {
+        "Authorization": f"Bearer {AZURE_OPENAI_API_KEY}",
+        "api-key": AZURE_OPENAI_API_KEY,
+        "Content-Type": "application/json",
+    }
+    resp: httpx.Response | None = None
+    errors: list[str] = []
+    with ai_http_client(timeout=120) as client:
+        for url in _azure_tts_urls():
+            try:
+                current = client.post(url, headers=headers, json=payload)
+            except Exception as e:
+                errors.append(f"{url}: {type(e).__name__}: {e}")
+                continue
+            if current.status_code < 400:
+                resp = current
+                break
+            errors.append(f"{url}: HTTP {current.status_code}: {current.text[:180]}")
+            if current.status_code not in {404, 405}:
+                resp = current
+                break
+    if resp is None:
+        raise RuntimeError("Azure OpenAI TTS 不可用；已尝试 " + " | ".join(errors))
     if resp.status_code >= 400:
-        raise RuntimeError(f"Azure OpenAI TTS HTTP {resp.status_code}: {resp.text[:300]}")
+        detail = " | ".join(errors) or resp.text[:300]
+        raise RuntimeError(f"Azure OpenAI TTS HTTP {resp.status_code}: {detail[:600]}")
     audio_bytes = resp.content
     if not audio_bytes:
         raise RuntimeError("Azure OpenAI TTS 未返回音频内容")
@@ -2517,9 +2512,7 @@ def _azure_openai_tts_sync(job_id: str, text: str, voice_id: str, target_seconds
 
 
 def _tts_sync(job_id: str, text: str, voice_id: str, target_seconds: float = 12.0) -> tuple[str, str, str]:
-    if VOICE_PROVIDER == "azure_openai":
-        return _azure_openai_tts_sync(job_id, text, voice_id, target_seconds), "azure_openai", AZURE_TTS_MODEL
-    return _minimax_tts_sync(job_id, text, voice_id, target_seconds), "minimax", MINIMAX_TTS_MODEL
+    return _azure_openai_tts_sync(job_id, text, voice_id, target_seconds), "azure_openai", AZURE_TTS_MODEL
 
 
 def _build_audio_script_sync(job_id: str, segments: list[TranscriptSegment], target_seconds: float = 12.0) -> AudioScript:
@@ -2531,8 +2524,8 @@ def _build_audio_script_sync(job_id: str, segments: list[TranscriptSegment], tar
     speaker_profile, rhythm_profile = _audio_delivery_profile(segments, duration, selected_voice_id)
     voice_url = ""
     voice_error = ""
-    voice_provider = "azure_openai" if VOICE_PROVIDER == "azure_openai" else "minimax"
-    voice_model = AZURE_TTS_MODEL if voice_provider == "azure_openai" else MINIMAX_TTS_MODEL
+    voice_provider = "azure_openai"
+    voice_model = AZURE_TTS_MODEL
     try:
         voice_url, voice_provider, voice_model = _tts_sync(job_id, rewritten, selected_voice_id, duration)
     except Exception as e:
@@ -2944,6 +2937,83 @@ def _image_text_call(
     raise RuntimeError(_image_failure_message("image text", max_attempts, last_err, capacity_seen))
 
 
+def _image_path_to_data_url(path: Path) -> str:
+    media_type = "image/png" if path.suffix.lower() == ".png" else "image/jpeg"
+    return f"data:{media_type};base64,{base64.b64encode(path.read_bytes()).decode('ascii')}"
+
+
+def _vision_brief_from_images(image_paths: list[Path], prompt: str, max_images: int = 8) -> str:
+    paths = [path for path in image_paths if path.exists()][:max_images]
+    if not paths:
+        return ""
+    if not LLM_API_KEY:
+        return ""
+    content: list[dict] = [{"type": "text", "text": prompt}]
+    for path in paths:
+        content.append({"type": "image_url", "image_url": {"url": _image_path_to_data_url(path)}})
+    try:
+        resp = llm().chat.completions.create(
+            model=VISION_MODEL,
+            messages=[{"role": "user", "content": content}],
+            response_format={"type": "json_object"},
+            temperature=0.1,
+            max_tokens=1400,
+        )
+        raw = (resp.choices[0].message.content or "").strip()
+        if not raw:
+            raw = (getattr(resp.choices[0].message, "reasoning_content", "") or "").strip()
+        match = re.search(r"\{[\s\S]*\}", raw)
+        raw = match.group(0) if match else raw
+        data = json.loads(raw)
+    except Exception as e:
+        print(f"[vision brief failed] {e}", flush=True)
+        return ""
+
+    if isinstance(data, dict):
+        if isinstance(data.get("brief"), str) and data["brief"].strip():
+            return data["brief"].strip()[:1800]
+        parts: list[str] = []
+        for key in (
+            "gender_presentation", "age_range", "body_proportion", "hair", "skin_tone",
+            "wardrobe_style", "pose_language", "camera_visibility", "commercial_mood",
+            "neck_shoulder_readiness", "style_constraints",
+        ):
+            value = data.get(key)
+            if isinstance(value, str) and value.strip():
+                parts.append(f"{key.replace('_', ' ')}: {value.strip()}")
+        if parts:
+            return "; ".join(parts)[:1800]
+    return ""
+
+
+def _describe_source_subject(job_id: str, source_indices: list[int]) -> str:
+    """Turn source keyframes into a non-identifying visual brief for similar-subject text generation."""
+    paths = [_source_frame_path(job_id, idx) for idx in source_indices]
+    prompt = (
+        "You are preparing a non-identifying character brief for generating a NEW similar but non-identical ad subject. "
+        "Look at these source video keyframes as evidence of one role and style, not as a person to identify. "
+        "Do NOT identify the person, do NOT estimate exact age, do NOT describe biometric identity, and do NOT mention celebrity or real-person likeness. "
+        "Output strict JSON only. Use broad style traits suitable for text-to-image generation.\n"
+        "Required keys: gender_presentation, age_range, body_proportion, hair, skin_tone, wardrobe_style, "
+        "pose_language, camera_visibility, commercial_mood, neck_shoulder_readiness, style_constraints, brief.\n"
+        "The brief should be 80-140 words and should preserve category, role, energy, camera readability, and commercial atmosphere while explicitly allowing a new non-identical subject."
+    )
+    return _vision_brief_from_images(paths, prompt, max_images=8)
+
+
+def _describe_subject_template_from_images(name: str, subject_style: str, image_paths: list[Path], note: str = "") -> str:
+    prompt = (
+        f"You are summarizing a saved SKG subject template named '{name}' for future text-to-image generation. "
+        f"Subject style: {subject_style}. User note: {note[:500]}. "
+        "Look at the subject views and describe the reusable creative direction without copying identity or pixels. "
+        "Do NOT identify a person and do NOT describe exact facial identity. "
+        "Output strict JSON only with keys: gender_presentation, age_range, body_proportion, material_or_skin, "
+        "wardrobe_or_surface_style, pose_language, camera_readability, neck_shoulder_readiness, commercial_mood, brief. "
+        "The brief should be 80-140 words and must be useful as a reference character brief for creating a new innovative variation."
+    )
+    return _vision_brief_from_images(image_paths, prompt, max_images=10)
+
+
 # ---------- API 路由 ----------
 
 class CreateJobReq(BaseModel):
@@ -3130,7 +3200,7 @@ def health() -> dict:
         "auth_configured": WEB_AUTH_CONFIGURED,
         "base_url": LLM_BASE_URL or "openai-default",
         "image_base_url": IMAGE_BASE_URL or LLM_BASE_URL or "openai-default",
-        "voice_base_url": AZURE_OPENAI_BASE_URL if VOICE_PROVIDER == "azure_openai" else MINIMAX_TTS_BASE_URL,
+        "voice_base_url": AZURE_OPENAI_BASE_URL,
         "models": {
             "asr": ASR_MODEL,
             "local_asr": LOCAL_ASR_MODEL,
@@ -3147,15 +3217,12 @@ def health() -> dict:
             "subject_image": SUBJECT_ASSET_IMAGE_MODEL,
             "subject_image_fallbacks": SUBJECT_ASSET_IMAGE_MODELS,
             "voice_provider": VOICE_PROVIDER,
-            "voice_base_url": AZURE_OPENAI_BASE_URL if VOICE_PROVIDER == "azure_openai" else MINIMAX_TTS_BASE_URL,
-            "voice_tts": AZURE_TTS_MODEL if VOICE_PROVIDER == "azure_openai" else MINIMAX_TTS_MODEL,
-            "voice_id": AZURE_TTS_VOICE_ID if VOICE_PROVIDER == "azure_openai" else MINIMAX_TTS_VOICE_ID,
-            "voice_pool": AZURE_TTS_VOICE_POOL if VOICE_PROVIDER == "azure_openai" else (MINIMAX_TTS_VOICE_POOL or [MINIMAX_TTS_VOICE_ID]),
-            "voice_configured": bool(AZURE_OPENAI_API_KEY) if VOICE_PROVIDER == "azure_openai" else bool(MINIMAX_API_KEY),
-            "minimax_tts": MINIMAX_TTS_MODEL,
-            "minimax_voice": MINIMAX_TTS_VOICE_ID,
-            "minimax_voice_pool": MINIMAX_TTS_VOICE_POOL or [MINIMAX_TTS_VOICE_ID],
-            "minimax_configured": bool(MINIMAX_API_KEY),
+            "voice_base_url": AZURE_OPENAI_BASE_URL,
+            "voice_tts": AZURE_TTS_MODEL,
+            "voice_tts_paths": AZURE_TTS_PATHS,
+            "voice_id": AZURE_TTS_VOICE_ID,
+            "voice_pool": AZURE_TTS_VOICE_POOL,
+            "voice_configured": bool(AZURE_OPENAI_API_KEY),
             "video": VIDEO_MODEL,
             "video_aliases": VIDEO_MODEL_ALIASES,
             "video_provider": video_provider_name(),
@@ -3225,6 +3292,31 @@ async def create_job(req: CreateJobReq, bg: BackgroundTasks) -> Job:
     return job
 
 
+@app.post("/jobs/{job_id}/download/retry", response_model=Job)
+async def retry_job_download(job_id: str, bg: BackgroundTasks) -> Job:
+    job = JOBS.get(job_id)
+    if not job:
+        raise HTTPException(404, "job not found")
+    if job.source_kind == "upload" or job.url.startswith("upload://"):
+        raise HTTPException(409, "uploaded videos cannot be redownloaded; upload the file again")
+    if job.status in {"downloading", "splitting", "transcribing"}:
+        raise HTTPException(409, f"job is busy: {job.status}")
+
+    mp4 = job_dir(job_id) / "source.mp4"
+    if mp4.exists() and mp4.stat().st_size == 0:
+        mp4.unlink()
+    update(
+        job,
+        status="downloading",
+        progress=1,
+        error="",
+        message="重新提交下载…",
+        video_url="",
+    )
+    bg.add_task(pipeline_download, job_id)
+    return job
+
+
 @app.post("/jobs/upload", response_model=Job)
 async def create_job_from_upload(bg: BackgroundTasks, file: UploadFile = File(...)) -> Job:
     if not file.filename:
@@ -4308,43 +4400,56 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
         source_indices = [idx] + source_indices
     source_indices = list(dict.fromkeys(source_indices))[:12]
 
+    similar_mode = req.reconstruction_mode == "similar"
     character_reference_paths: list[Path] = []
-    character_reference_clause = ""
+    template_brief_clause = ""
     character_label = ""
     subject_template_id = (req.subject_template_id or "").strip()
     character_id = (req.character_id or "").strip()
     if subject_template_id:
         template = find_subject_template_item(subject_template_id)
         character_label = template.name
-        for image in template.images[:10]:
-            character_reference_paths.append(subject_template_image_file(image.filename))
-        character_reference_clause = (
-            f"Selected reusable subject template from database: {template.name}. "
-            "Use these saved generated subject views as a high-quality creative direction and identity bible only; "
-            "do not copy pixels, file artifacts, exact pose, labels, or accidental defects. "
-            "Create a new innovative variation that keeps the same broad subject type, transparent wellness character language, "
-            "camera readability, shoulder/neck product compatibility, and commercial role. "
+        template_paths = [subject_template_image_file(image.filename) for image in template.images[:10]]
+        character_reference_paths.extend(template_paths)
+        brief = template.prompt_brief.strip() or template.note.strip() or template.description.strip()
+        if similar_mode and not brief:
+            brief = _describe_subject_template_from_images(template.name, template.subject_style, template_paths, template.note)
+        template_brief_clause = (
+            f"Reference character brief from saved database template '{template.name}': {brief}. "
+            "Use this as a high-quality creative direction and identity bible only; do not copy a face, exact pose, pixels, file artifacts, labels, or accidental defects. "
+            "Create a new innovative variation that keeps the same broad subject type, transparent wellness character language, camera readability, shoulder/neck product compatibility, and commercial role. "
+            if brief else
+            f"Selected reusable subject template from database: {template.name}. Create a new innovative variation, not a duplicate. "
         )
     elif character_id:
         character = find_character_library_item(character_id)
         character_label = character.name
-        for image in character.images[:7]:
-            character_reference_paths.append(character_library_file(image.filename))
-        character_reference_clause = (
-            f"Selected built-in creative character reference: {character.name}. "
-            "Use these planned character images as a high-quality creative direction and anatomy/style bible only; "
+        character_reference_paths.extend(character_library_file(image.filename) for image in character.images[:7])
+        brief = character.prompt_brief.strip() or character.description.strip()
+        template_brief_clause = (
+            f"Reference character brief from built-in creative character '{character.name}': {brief}. "
+            "Use this planned character brief as a high-quality creative direction and anatomy/style bible only; "
             "do not copy the exact face, exact pose, exact silhouette, pixels, or make a duplicate. "
-            "Create a new innovative variation that keeps the same broad role, transparent wellness character language, "
-            "camera readability, and shoulder/neck product compatibility. "
+            "Create a new innovative variation that keeps the same broad role, transparent wellness character language, camera readability, and shoulder/neck product compatibility. "
         )
 
-    model_src, tmp_focus = _focus_source_for_element(job_id, idx, el)
+    tmp_focus: Path | None = None
+    model_src: Path | list[Path] | None = None
     frame_reference_paths = [p for p in (_source_frame_path(job_id, i) for i in source_indices) if p.exists()]
-    if character_reference_paths:
-        remaining = max(0, 10 - len(character_reference_paths))
-        model_src = character_reference_paths + frame_reference_paths[:remaining]
-    elif len(frame_reference_paths) > 1:
-        model_src = frame_reference_paths[:10]
+    source_subject_brief = _describe_source_subject(job_id, source_indices) if similar_mode else ""
+    source_subject_clause = (
+        f"Source video role brief from selected keyframes: {source_subject_brief}. "
+        "Use this brief to preserve role category, creator-ad energy, camera readability, and broad styling, while creating a new non-identical subject. "
+        if source_subject_brief else
+        "Source video role brief unavailable; create a new non-identical ad subject guided by the user direction, template brief, and requested view. "
+    )
+    if not similar_mode:
+        model_src, tmp_focus = _focus_source_for_element(job_id, idx, el)
+        if character_reference_paths:
+            remaining = max(0, 10 - len(character_reference_paths))
+            model_src = character_reference_paths + frame_reference_paths[:remaining]
+        elif len(frame_reference_paths) > 1:
+            model_src = frame_reference_paths[:10]
 
     try:
         with Image.open(_source_frame_path(job_id, idx)) as src_im:
@@ -4371,7 +4476,7 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
     )
     actor_style_clause = (
         "Generate a believable normal commercial video actor, not a transparent or skeleton character. "
-        "Use the references to understand the source video's casting direction, age range, gender presentation, body proportion, wardrobe category, gesture vocabulary, framing, energy, lighting, and creator-ad style. "
+        "Use the text briefs to understand the source video's casting direction, age range, gender presentation, body proportion, wardrobe category, gesture vocabulary, framing, energy, lighting, and creator-ad style. "
         "Do not recreate the exact person's face, biometric identity, unique likeness, tattoos, scars, logos, watermarks, captions, or platform UI. "
         "The output must be a newly designed similar actor that could play the same role in a new ad, with consistent identity across all views. "
         if similar_actor
@@ -4386,7 +4491,7 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
     prompt_extra_clause = f"User direction: {prompt_extra[:1200]} " if prompt_extra else ""
     identity_lock_clause = (
         "Identity lock: these API calls generate one high-definition multi-view pack for ONE single subject, but each individual output file must show only its one requested view. "
-        "Before rendering, infer one consistent character bible from the reference image(s): gender presentation, age range, body proportions, head shape, face direction cues, material, silhouette, wardrobe/material style, and commercial mood. "
+        "Before rendering, infer one consistent character bible from the supplied text brief and generation instructions: gender presentation, age range, body proportions, head shape, face direction cues, material, silhouette, wardrobe/material style, and commercial mood. "
         "Keep that same character bible unchanged across every generated view in separate files. "
         "If user direction requests a gender, age, or style change, apply that one change uniformly to all views; never mix male/female, young/old, or multiple style identities inside the same pack. "
         "For transparent humanoids, keep the same transparent skin shell, skeleton proportions, visible spine/rib cage/pelvis/limb bones, and non-horror wellness character style in every view. "
@@ -4427,14 +4532,22 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
                 if closeup_view and req.subject_kind == "living"
                 else "The subject must be complete, centered, full body or full object, head-to-feet visible when applicable, not cropped by the canvas. Make the subject large and readable: it should occupy about 85-95% of the image height with only small margins. "
             )
+            reference_strategy_clause = (
+                "Text-only generation mode: no source image is attached to this image request. Use only the written source/video/template briefs below as creative constraints. "
+                "This is intentionally NOT image editing and NOT identity replication. "
+                + source_subject_clause
+                + template_brief_clause
+                if similar_mode else
+                "Use the reference image(s) only as visual evidence; do not crop, cut out, paste, trace, or extract pixels from the source. "
+            )
             prompt = (
-                f"Use the reference image(s) only as visual evidence; do not crop, cut out, paste, trace, or extract pixels from the source. "
+                reference_strategy_clause
+                +
                 f"Generate one newly rendered {view_prompt} for {target}. "
-                f"The subject is a {kind_phrase}. If multiple frames are shown, treat them as evidence of one same subject, not multiple subjects. "
+                f"The subject is a {kind_phrase}. Treat all source evidence as one role and one consistent subject bible, not multiple subjects. "
                 + single_view_clause
                 + identity_clause
                 + identity_lock_clause
-                + character_reference_clause
                 + neck_product_clause
                 + canvas_clause
                 + prompt_extra_clause
@@ -4447,7 +4560,16 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
                 + transparent_character_clause
             )
             try:
-                img_bytes, _mode = _image_edit_call(model_src, prompt, models=models, fallback_text=False, max_attempts=3, max_side=1280)
+                if similar_mode:
+                    print(
+                        f"[subject assets] reconstruction_mode=similar endpoint=/images/generations view={view} image_refs=0 model={GPT_IMAGE_MODEL}",
+                        flush=True,
+                    )
+                    img_bytes, _mode = _image_text_call(prompt, models=models, max_attempts=3)
+                else:
+                    if model_src is None:
+                        raise RuntimeError("subject asset edit reference image missing")
+                    img_bytes, _mode = _image_edit_call(model_src, prompt, models=models, fallback_text=False, max_attempts=3, max_side=1280)
             except RuntimeError as e:
                 raise HTTPException(_image_error_status(e), f"subject asset {view} failed: {e}")
 
@@ -5026,6 +5148,7 @@ def save_subject_template(job_id: str, req: SaveSubjectTemplateReq) -> SubjectTe
     template_dir.mkdir(parents=True, exist_ok=True)
     now = _time.time()
     images: list[SubjectTemplateImage] = []
+    saved_image_paths: list[Path] = []
     for asset in selected_assets:
         src = job_dir(job_id) / "assets" / f"{asset.id}.jpg"
         if not src.exists():
@@ -5034,6 +5157,7 @@ def save_subject_template(job_id: str, req: SaveSubjectTemplateReq) -> SubjectTe
         filename = f"{template_id}/{image_id}.jpg"
         dst = SUBJECT_TEMPLATE_IMAGE_DIR / filename
         shutil.copy2(src, dst)
+        saved_image_paths.append(dst)
         images.append(SubjectTemplateImage(
             id=image_id,
             view=asset.view,
@@ -5053,11 +5177,18 @@ def save_subject_template(job_id: str, req: SaveSubjectTemplateReq) -> SubjectTe
         raise HTTPException(404, "subject asset files missing")
 
     primary = next((image.id for image in images if image.view == "front"), images[0].id)
+    prompt_brief = _describe_subject_template_from_images(
+        name,
+        req.subject_style,
+        saved_image_paths,
+        req.note.strip(),
+    ) or req.note.strip()
     item = SubjectTemplateItem(
         id=template_id,
         name=name,
         description=req.note.strip(),
         note=req.note.strip(),
+        prompt_brief=prompt_brief,
         source_job_id=job_id,
         source_frame_idx=frame.index,
         source_element_id=element.id,
diff --git a/deploy/.env.production.example b/deploy/.env.production.example
index 98f966f..d8f1bfe 100644
--- a/deploy/.env.production.example
+++ b/deploy/.env.production.example
@@ -22,7 +22,9 @@ LLM_API_KEY=
 ASR_MODEL=whisper-1
 ASR_FALLBACK_MODEL=gemini-2.5-flash
 TRANSLATE_MODEL=gemini-2.5-flash
-REWRITE_MODEL=gemini-2.5-pro
+GPT_TEXT_MODEL=gpt-4o
+REWRITE_MODEL=gpt-4o
+VISION_MODEL=gpt-4o
 PRODUCT_VIEW_MODEL=gpt-image-2
 IMAGE_BASE_URL=https://ai.skg.com/ezlink/v1
 IMAGE_API_KEY=
@@ -33,9 +35,14 @@ SUBJECT_ASSET_IMAGE_MODELS=gpt-image-2
 # Optional outbound proxy for AI gateway calls. Leave blank on normal VPS networking.
 AI_HTTP_PROXY=
 
+# Optional TikTok download login state for yt-dlp. Keep cookies files private.
+YTDLP_COOKIES_FILE=
+YTDLP_COOKIES_FROM_BROWSER=
+
 # Audio rewrite and Azure OpenAI TTS
 AUDIO_REWRITE_MODEL=gemini-2.5-pro
 AUDIO_PRODUCT_BRIEF="SKG smart massage products for daily neck, shoulder, back, eye, knee, and foot relaxation. Keep claims premium, clean, credible, and non-medical."
+# Voice is fixed to Azure OpenAI in the backend.
 VOICE_PROVIDER=azure_openai
 AZURE_OPENAI_BASE_URL=https://ai.skg.com/azure
 AZURE_OPENAI_API_KEY=
@@ -43,13 +50,7 @@ AZURE_TTS_MODEL=gpt-4o-mini-tts
 AZURE_TTS_VOICE_ID=alloy
 AZURE_TTS_VOICE_POOL=alloy,verse,shimmer
 AZURE_TTS_PATH=/audio/speech
-
-# Legacy MiniMax TTS fallback; not the default voice provider.
-MINIMAX_API_KEY=
-MINIMAX_TTS_BASE_URL=https://api.minimax.io
-MINIMAX_TTS_MODEL=speech-2.8-turbo
-MINIMAX_TTS_VOICE_ID=English_expressive_narrator
-MINIMAX_TTS_VOICE_POOL=English_magnetic_voiced_man,English_Upbeat_Woman,English_MaturePartner
+AZURE_TTS_PATHS=/audio/speech,/v1/audio/speech
 
 # Video generation. Use SKG Doubao / Seedance gateway in production.
 POE_API_BASE_URL=https://api.poe.com/v1
diff --git a/docs/source-analysis.html b/docs/source-analysis.html
index 2bc379c..37e0bcb 100644
--- a/docs/source-analysis.html
+++ b/docs/source-analysis.html
@@ -572,7 +572,7 @@
         <p>当前产品方向已收窄为“信息流广告快速复刻”：主界面左侧是素材输入列，右侧是信息流复刻工作表。顶部固定显示 01-09 流程顺序和每一步的判定依据，编号不再是装饰文本，而是按素材任务、源视频、音频文案、抽帧、主体资产、产品资产、分镜文案、首尾帧和视频候选这些状态解锁。用户粘贴 TK 链接或上传视频后点击“开始分析”，系统自动下载源视频；下载完成后并行启动音频文案路和视频视觉路。音频文案路提取原音频文案/字幕，分析讲话人、语速节奏、背景音乐/环境声/音效，并为后续新口播和分镜文案提供时间轴；视频视觉路同步抽取参考帧，参考帧只用于人工选择主体并生成相似主体白底视图。产品图上传后独立形成产品资产包：自动识别视角、左右/上下/内外侧、结构点、比例和风险，并补缺角度。最终分镜规划按逐句时间轴把文案、相似主体资产和产品资产汇合；当前暂停直接调视频模型，先逐条生成并审核首帧/尾帧，确认后再决定哪些分镜进入视频候选。</p>
         <div class="pipeline">
           <div class="step"><div class="num">01</div><h3>素材输入</h3><p>有当前素材任务即通过；输入框只负责创建或切换任务。</p></div>
-          <div class="step"><div class="num">02</div><h3>源视频下载</h3><p><code>job.video_url</code> 存在即通过；<code>created/downloading</code> 视为运行中。</p></div>
+          <div class="step"><div class="num">02</div><h3>源视频下载</h3><p><code>job.video_url</code> 存在即通过；<code>created/downloading</code> 视为运行中。TikTok 受限视频可通过 <code>YTDLP_COOKIES_FILE</code> 或 <code>YTDLP_COOKIES_FROM_BROWSER</code> 提供登录态，失败后可对同一素材重新下载。</p></div>
           <div class="step"><div class="num">03</div><h3>音频文案</h3><p><code>audio_script.source_text</code> 或 <code>transcript</code> 逐句时间轴有内容即通过。</p></div>
           <div class="step"><div class="num">04</div><h3>抽帧参考</h3><p><code>job.frames.length &gt; 0</code> 即通过；参考帧只做主体重构证据。</p></div>
           <div class="step"><div class="num">05</div><h3>相似主体</h3><p>关键帧里存在 <code>subject_assets</code> 即通过；生成类似创新主体，不复刻原人。</p></div>
@@ -595,7 +595,7 @@
                 <tr><td><code>web/app/page.tsx</code></td><td>产品工作台主状态：jobs、activeJobId、生成任务状态；主渲染为全屏素材输入列 + 信息流广告复刻工作表；“开始分析”会把 job 放入并行素材分析队列，下载完成后触发 <code>triggerTranscribe</code> 解析音频，并触发 <code>analyzeJob</code> 自动抽 12 张参考帧，形成“音频文案路 + 视频视觉路”同步推进；底部吸附音频条和旧全局浮动主题按钮不再从主界面渲染，避免和工作台内的明暗模式切换重复。</td></tr>
                 <tr><td><code>web/components/ad-recreation-board.tsx</code></td><td>信息流广告复刻工作表：顶部由 <code>buildWorkflowSteps</code> 统一生成 01-09 流程顺序、状态和判定依据，<code>WorkflowOrderBar</code> 展示完整顺序，<code>WorkflowStepBadge</code> / <code>PipelineLane</code> / 分镜列标题共用同一套编号。左侧素材输入只负责链接/上传和任务切换，不再重复放横版原视频预览；右侧顶部用“音频文案、抽帧参考、相似主体、产品素材池”四个状态条显示后台并行进度。源视频工作区展示视频下载状态和默认折叠的文案依据。音频解析结果改成默认折叠的辅助信息，展开后同一行看讲话人/节奏/背景音；主工作区左侧放大为按 9:16 显示的竖版原视频播放器，播放器内覆盖“当前点抽帧”，按当前播放秒数手动补参考帧；右侧上方是音频波形 / 切点参考，下方左侧是参考帧池，右侧是逐句时间轴；下一行只保留“相似主体 / 主体模板”。音频波形用参考图式的连续灰色包络显示响度、停顿和密集爆点，顶部同时显示当前播放秒数、总时长和鼠标指针停点秒数。视频播放时通过 <code>requestAnimationFrame</code> 平滑驱动波形播放线，同时同步高亮并滚动当前句；点击音频波形或字幕行会跳转原视频时间。逐句时间轴左侧参考帧池的主入口是“自动抽帧 12 张”，一键按动作峰值目标重新抽取 12 张源视频参考帧，优先抓手势、表情变化、节奏点和镜头变化；缩略图按竖版完整比例显示不裁切并用更多列紧凑铺开，点选状态直接叠在参考帧池缩略图上，鼠标停留会通过固定浮层放大展示完整帧。“生成 10 张高清图”放在下方相似主体白底视图区，不和抽参考按钮平齐；如果用户没有勾选帧，默认把全部关键帧作为主体参考，勾选后只传已选帧；生成区可在“透明骨架 / 普通真人”之间切换，可选择桌面导入的 5 套内置形象作为创意方向，并可填写统一主体方向，例如年轻女性、更运动、更高级。关键帧和相似主体白底视图都用更小的竖版缩略图密排；白底视图只展示每个 view 的最新一张，缩略图上提供“重新生成这一张”和“删除这一张”，单张重生会用 <code>replace_views=true</code> 替换同一视角。前端调用 <code>generateSubjectAssets</code> 时按主体类型传 <code>subject_style=transparent_human</code> 或 <code>source_actor</code>，按需传 <code>character_id</code>，并使用 <code>reconstruction_mode=similar</code>；后端会把关键帧和内置形象视为同一个主体的创意证据，并锁定同一性别表现、年龄段、体型、材质、风格和视觉身份，同时生成全身多视角 + 肩颈正/左右近景 + 后颈肩背特写，避免整套图出现男女性别、老少年龄或样式混杂。音频结果下方是信息流复刻分镜工作台：顶部产品参考区是“同一产品素材池”，不限量上传产品图，不做不同产品身份判断；上传原图推荐长边 1200-2000px、短边至少 600px，但后端会统一生成最长边 1600px、JPEG 92 的 AI 工作副本，并回显尺寸、自动转换和风险标注；上传后按“套在脖子上的 U 形肩颈按摩仪”进行同一产品批量识别，左/右按佩戴者身体左右、上/下按佩戴方向，额外标注内外侧、开口方向、局部结构点、背景类型、用途标签、生成风险和备注，用户只检查备注，鼠标悬停通过固定浮层显示大图预览，能盖过滚动容器和分镜框架；缺视角补图失败时保留重试入口。脚本区在分镜行上方提供“作者想法”和“整片改写”，每行新口播文案可直接编辑并可单段 AI 改写，分镜时间和原内容列压缩为窄摘要列，新口播列进一步收窄，把横向空间留给画面规划和首尾帧。每条音频分镜纵向排列，行内从左到右串起原内容、新口播文案、画面规划/产品融入和历史候选视频槽；画面规划区先选择镜头类型（人物/情绪、人物+产品、产品特写、场景过渡），再用人物/产品开关、首帧规划、尾帧规划和产品出现方式决定这一条到底需不需要产品图或相似主体参考。当前主流程暂停直接调用视频模型，不再提供“生成本条 · Seedance”或“一键提交全部”视频入口；行内新增“首尾帧闸门”，分别显示/生成首帧和尾帧，旧 keyframe 类型首尾帧会被忽略，只认真正的 asset 首尾帧。生成首尾帧时调用 <code>generateSceneAsset</code>，先按人物描述、镜头类型、首尾状态和产品佩戴需求，从相似主体 6/10 视图里自动挑选最多 5 张最相关主体视角，再传入 <code>subject_images</code> 和该行自动挑选的产品图 <code>product_images</code>；关键帧只作为前置主体重构证据和行数据承载位置，不再作为后续视频首尾帧参考。视频候选槽只展示历史候选和待生成占位，按钮改为“保存本条规划 / 保存全部规划”。只有该行勾选“产品”时，首尾帧生成才会从产品素材池按分镜角色、视角优先级、用途标签、置信度和风险自动挑选最多 6 张相关产品图；未勾选产品时不会把产品图提交给首尾帧/后续生视频模型。只有该行勾选“人物”时，才会传按需筛选后的相似主体参考图；否则 prompt 会明确禁止强行添加主角式透明骨架人，后端也不会再给产品特写强加透明骨架人约束。<code>ModelTrace</code> 会在音频解析、产品识别/补图、相似主体高清视图包、脚本改写等入口旁直接展示模型名；所有生图入口都显示并使用 <code>gpt-image-2</code>，没有其他图片模型 fallback；点击后用固定浮层展示模型链路、输入输出和回退逻辑。旧分镜卡、抽帧控制和视频生成组件仍保留在文件里，但当前主路径不渲染。</td></tr>
                 <tr><td><code>AdRecreationBoard</code> 主题切换</td><td>顶部指标区左侧有“明亮/暗色”按钮，使用 <code>Sun</code> / <code>Moon</code> 图标切换 <code>skg-board-theme--light</code> 类名，并把选择写入 <code>localStorage["skg-board-theme"]</code>。暗色仍是默认模式；明亮模式只改变工作台外观，不改变任务、素材、分镜、模型调用或接口数据。</td></tr>
-                <tr><td><code>SourceReferenceBuildPanel</code></td><td>“相似主体 / 主体模板”当前承担主体资产生成和主体模板复用的前端入口：面板先分成“主体模板库”和“本次生成 / 入库草稿”。模板库优先读取 <code>GET /subject-templates</code> 数据库模板，并保留 <code>GET /character-library/skg</code> 的内置形象作为策划初始模板；入库草稿显示本次来源、生成数量、模板命名和备注，点击保存会调用 <code>saveSubjectTemplate</code> 把当前主体视图复制到主体模板库。选择数据库模板后，后续 <code>generateSubjectAssets</code> 会传 <code>subject_template_id</code>，让后端以已保存模板视图作为新主体参考。</td></tr>
+                <tr><td><code>SourceReferenceBuildPanel</code></td><td>“相似主体 / 主体模板”当前承担主体资产生成和主体模板复用的前端入口：顶部用 radio 区分“用模板生成”和“不用模板（从源视频关键帧创新）”，<code>源视频相似</code> 不再作为模板卡混进网格。模板库把 <code>GET /subject-templates</code> 数据库模板和 <code>GET /character-library/skg</code> 内置形象合并成 120px 竖排卡片，选中态统一用 cyan；保存为主体模板的名称、备注和按钮固定在模板区底部一行。下方“生成主体视图”独立显示 <code>gpt-image-2</code> 链路，支持透明骨架/真人、全部 10 / 常用 4 / 自定义视图，并把生成结果缩略图放大为可单张重生、删除和 hover 放大的媒体卡。前端仍传 <code>reconstruction_mode=similar</code>，但后端会先把关键帧/模板转成文字 brief，再走文字生图，不再把参考图作为强 image-edit 锚点。</td></tr>
                 <tr><td><code>web/components/media-asset-tile.tsx</code></td><td>项目内媒体素材缩略图基底组件：图片、视频、抽帧、产品图、相似主体图、首尾帧和视频候选默认从这里获得统一交互。组件负责缩略图显示、顶层固定浮层 hover 放大、删除按钮、重新生成等操作按钮、忙碌遮罩和图片/视频共用预览，避免每个新板块重复手写不同的媒体交互。</td></tr>
                 <tr><td><code>web/app/login/page.tsx</code></td><td>生产登录页：访问账号/访问密钥表单、保持登录、错误/成功状态；当前只在原版 Digital Oasis 动态背景上叠加一个组合登录框，桌面端左侧是动态角色，右侧是图标化登录表单；面板左上角展示官网 SKG 字标和中文“营销内容工作台”系统标识。</td></tr>
                 <tr><td><code>web/app/login/layout.tsx</code></td><td>登录路由专属 layout：覆盖全站默认网页标题和描述为空，避免 <code>/login</code> 继承工作台 metadata 后在页面源码里继续出现登录界面文字以外的文案。</td></tr>
@@ -619,7 +619,7 @@
               <tbody>
                 <tr><td><code>api/main.py</code></td><td>FastAPI 单文件后端：登录会话、状态模型、任务恢复、下载、抽帧、Vision、清洗、元素、分镜、原音频转写/翻译、声音与背景音分析、后续口播改写/TTS、文件返回。</td></tr>
                 <tr><td><code>api/product_library/skg-products</code></td><td>内置 SKG 白底产品图库：<code>manifest.json</code> 记录从桌面产品图筛出的 gallery 白底图和桌面 4 张产品角度图，<code>images/</code> 存 45 张参考图。</td></tr>
-                <tr><td><code>api/character_library/skg-characters</code></td><td>内置相似主体形象库：从桌面 5 套策划形象导入，<code>manifest.json</code> 记录运动阳光男、都市型男、优雅白领女、运动辣妹、绅士大叔，每套含 7 张透明骨架参考图，用于相似主体高清视图包的创意方向选择。</td></tr>
+                <tr><td><code>api/character_library/skg-characters</code></td><td>内置相似主体形象库：从桌面 5 套策划形象导入，<code>manifest.json</code> 记录运动阳光男、都市型男、优雅白领女、运动辣妹、绅士大叔，每套含 7 张透明骨架参考图和一段 <code>prompt_brief</code>。相似主体生成时优先使用文字 brief 作为创意方向，避免把内置图作为强参考图复制。</td></tr>
                 <tr><td><code>jobs/&lt;jobId&gt;/state.json</code></td><td>运行时状态文件，不在源码列表里，但刷新恢复依赖它。</td></tr>
                 <tr><td><code>jobs/&lt;jobId&gt;/audio.wav</code></td><td>拆轨得到的原始音频，当前只作为后端分析和后续必要预览的只读文件来源；主界面不再默认渲染底部音频条。</td></tr>
                 <tr><td><code>jobs/&lt;jobId&gt;/frames</code></td><td>关键帧 jpg。注意 frame.index 是稳定 ID，不等于数组下标。</td></tr>
@@ -644,7 +644,7 @@ web/app/page.tsx
 后端主链路：
 api/main.py
   -> Job / KeyFrame / KeyElement / StoryboardScene / AudioScript
-  -> 下载 / 上传 / 音频提取 / ASR / 翻译 / 声音背景音分析 / 抽帧 / Vision / 清洗 / 元素提取 / 分镜保存 / 后续音频改写与 MiniMax 英文配音
+  -> 下载 / 上传 / 音频提取 / ASR / 翻译 / 声音背景音分析 / 抽帧 / Vision / 清洗 / 元素提取 / 分镜保存 / 后续音频改写与 Azure OpenAI 英文配音
   -> jobs/&lt;jobId&gt;/state.json + 图片文件落盘</pre>
       </section>
 
@@ -752,7 +752,7 @@ api/main.py
   background_audio_profile,
   product_brief,
   rewrite_model,
-  voice_provider: azure_openai | minimax,
+  voice_provider: azure_openai,
   voice_model,
   voice_id,
   voice_url,
@@ -775,6 +775,14 @@ SubjectAsset {
   background: white | black,
   width, height, size,
   source_frame_indices[]
+}</pre>
+            <p><code>SubjectTemplateItem</code> 保存用户确认过的主体视图包。<code>prompt_brief</code> 是后端从模板图反推的文字特征，后续相似生成优先读取它，而不是再次把模板图作为强参考图传给 image-edit。</p>
+            <pre>SubjectTemplateItem {
+  id, name, description, note,
+  prompt_brief,
+  subject_style: transparent_human | source_actor,
+  primary_image,
+  images: SubjectTemplateImage[]
 }</pre>
           </div>
           <div class="card">
@@ -889,16 +897,17 @@ ProductRefStateItem {
           </thead>
           <tbody>
             <tr><td>网页登录</td><td><code>POST /auth/login</code>、<code>GET /auth/check</code>、<code>POST /auth/logout</code></td><td><code>web/app/login/page.tsx</code>、Nginx <code>auth_request</code></td><td>登录页提交账号密码到 <code>/api/auth/login</code>，后端设置 HttpOnly 会话 Cookie；生产 Nginx 对工作台和 <code>/api/</code> 调 <code>/auth/check</code> 做统一校验，未登录页面跳 <code>/login/</code>，API 返回 JSON 401。</td></tr>
-            <tr><td>运行配置 / 模型标注</td><td><code>GET /health</code></td><td><code>getRuntimeHealth</code>、<code>ModelTrace</code></td><td>返回 <code>models</code>：ASR、本机 ASR、ASR fallback、翻译、改写、通用 Vision、产品视角识别 <code>product_view</code>、GPT 图像模型、主体 6 视图 GPT 图像模型、Azure OpenAI TTS、视频别名和 Seedance 服务商。前端所有当前主路径里会调用模型的按钮旁显示模型名，点击弹出小窗口查看模型链路和输入输出逻辑；不返回 API Key 或敏感凭证。</td></tr>
+            <tr><td>运行配置 / 模型标注</td><td><code>GET /health</code></td><td><code>getRuntimeHealth</code>、<code>ModelTrace</code></td><td>返回 <code>models</code>：ASR、本机 ASR、ASR fallback、翻译、GPT 改写、GPT 画面理解、产品视角识别 <code>product_view</code>、GPT 图像模型、主体 6 视图 GPT 图像模型、Azure OpenAI TTS、视频别名和 Seedance 服务商。当前 <code>REWRITE_MODEL</code>、<code>AUDIO_REWRITE_MODEL</code> 和 <code>VISION_MODEL</code> 默认使用 <code>gpt-4o</code>；如果旧环境变量仍写 <code>gemini-*</code>，后端会归一化回 <code>GPT_TEXT_MODEL</code> / <code>REWRITE_MODEL</code>。语音只走 Azure OpenAI TTS，<code>models.voice_tts_paths</code> 会回传当前尝试的语音路径，方便区分路径错误和语音服务不可用。前端所有当前主路径里会调用模型的按钮旁显示模型名，点击弹出小窗口查看模型链路和输入输出逻辑；不返回 API Key 或敏感凭证。</td></tr>
             <tr><td>历史列表</td><td><code>GET /jobs</code></td><td><code>listJobs</code></td><td>所有 job 精简列表（id/url/status/thumbnail/mtime…），按 state.json mtime 倒序。前端 URL 无 <code>?job=</code> 时拉它回填全部历史；带 <code>limit</code> 可截断。</td></tr>
-            <tr><td>创建任务</td><td><code>POST /jobs</code></td><td><code>createJob</code></td><td>提交 TK 链接，后台开始下载；前端“开始”队列会在 downloaded 后自动触发音频解析。</td></tr>
+            <tr><td>创建任务</td><td><code>POST /jobs</code></td><td><code>createJob</code></td><td>提交 TK 链接，后台开始下载；前端“开始”队列会在 downloaded 后自动触发音频解析。下载阶段优先使用 <code>YTDLP_COOKIES_FILE</code>，其次使用 <code>YTDLP_COOKIES_FROM_BROWSER</code>，TikTok 要求登录态时会提示上传 MP4 或配置后端 cookies。</td></tr>
+            <tr><td>重试下载</td><td><code>POST /jobs/{id}/download/retry</code></td><td><code>retryJobDownload</code></td><td>用于 TK 链接下载失败且没有 <code>video_url</code> 的素材；清空错误、重新进入下载状态，并在后台再次执行 <code>pipeline_download</code>。上传视频不能重下载，需要重新上传文件。</td></tr>
             <tr><td>上传视频</td><td><code>POST /jobs/upload</code></td><td><code>uploadJob</code></td><td>保存 source.mp4，然后同样进入下载完成状态；当前上传后也加入第一步队列，下载完成后自动解析音频。</td></tr>
             <tr><td>删除输入视频</td><td><code>DELETE /jobs/{id}</code></td><td><code>deleteJob</code></td><td>从任务队列、URL 和磁盘 <code>jobs/&lt;id&gt;</code> 目录移除整个 job，包括源视频、关键帧、元素提取图和生成视频。</td></tr>
             <tr><td>解析视频</td><td><code>POST /jobs/{id}/analyze?frames=&amp;target=&amp;mode=&amp;quality=</code></td><td><code>analyzeJob</code></td><td>后续阶段保留的抽帧能力。默认 <code>frames=12</code>；<code>target</code> 支持透明骨架人、综合、清晰主体、转场变化、表情瞬间、动作峰值。当前第一步主流程不自动调用该接口；原版视频旁的“抽参考 12 帧”会显式用 <code>target=motion</code>、<code>quality=accurate</code>、<code>mode=replace</code> 重新生成全局动作/节奏参考帧池。</td></tr>
             <tr><td>音频文案轨</td><td><code>POST /jobs/{id}/transcribe</code></td><td><code>triggerTranscribe</code></td><td>若尚未拆轨，先从 <code>source.mp4</code> 提取 <code>audio.wav</code> 并回填 <code>source_audio_url</code>；随后用 ASR 提取原始文案，翻译成中文，写入 <code>audio_script.source_text</code>、<code>source_zh</code> 和逐句 <code>transcript</code>。远端 <code>ASR_MODEL</code> 失败后先走本机 <code>LOCAL_ASR_BIN</code>/<code>LOCAL_ASR_MODEL</code>（默认 <code>mlx_whisper</code>），再尝试 <code>ASR_FALLBACK_MODEL</code>。后端会拒绝重复文本、逐秒假字幕或覆盖率过低的结果，不再把不可听的多模态输出写进时间轴。再用 <code>ASR_FALLBACK_MODEL</code> 多模态音频分析讲话人、语速节奏、停顿、背景音乐/环境声/音效，写入 <code>speaker_profile</code>、<code>rhythm_profile</code>、<code>background_audio_profile</code>。当前第一步不默认生成 SKG 新口播和 Azure OpenAI 配音。</td></tr>
             <tr><td>分镜脚本改写</td><td><code>POST /jobs/{id}/script/rewrite</code></td><td><code>rewriteStoryboardScript</code></td><td>根据原参考文案、当前新口播、分镜角色、时间段和作者想法改写中文口播。<code>mode=segment</code> 只改一段；<code>mode=all</code> 一次改完整片，要求整片前后连贯。接口只返回 <code>items[index,text]</code>，前端暂存在当前页面状态里，保存规划或生成首尾帧时写入 <code>StoryboardScene.action</code>。</td></tr>
             <tr><td>原始音频文件</td><td><code>GET /jobs/{id}/audio.wav</code></td><td><code>sourceAudioUrl</code></td><td>返回拆轨得到的 wav；当前主界面不再渲染底部吸附音频条，右侧复刻工作表会读取该文件生成参考图式横向响度波形，并和原视频、逐句时间轴联动；波形标题栏显示当前播放秒数、总时长和鼠标指针停点秒数。</td></tr>
-            <tr><td>改写配音文件</td><td><code>GET /jobs/{id}/audio-script.mp3</code></td><td><code>apiAssetUrl(job.audio_script.voice_url)</code></td><td>后续新配音阶段保留的 TTS 产物；默认走 <code>VOICE_PROVIDER=azure_openai</code>，通过 <code>AZURE_OPENAI_BASE_URL</code> 的 OpenAI 协议 <code>/audio/speech</code> 生成 mp3。当前第一步不默认生成该文件。</td></tr>
+            <tr><td>改写配音文件</td><td><code>GET /jobs/{id}/audio-script.mp3</code></td><td><code>apiAssetUrl(job.audio_script.voice_url)</code></td><td>后续新配音阶段保留的 TTS 产物；服务端固定走 <code>VOICE_PROVIDER=azure_openai</code>，通过 <code>AZURE_OPENAI_BASE_URL</code> 的 OpenAI 协议生成 mp3，并按 <code>AZURE_TTS_PATHS</code> 依次尝试 <code>/audio/speech</code>、<code>/v1/audio/speech</code> 等路径。当前第一步不默认生成该文件。</td></tr>
             <tr><td>手动加帧</td><td><code>POST /jobs/{id}/frames?t=</code></td><td><code>addManualFrame</code></td><td>按视频时间戳抽一帧，index 递增但 frames 按 timestamp 排序。当前主界面会把原版视频播放器的播放秒数传给 <code>AudioIntakePanel</code> 标题栏右侧的“当前点抽帧”。</td></tr>
             <tr><td>删除关键帧</td><td><code>DELETE /jobs/{id}/frames/{idx}</code></td><td><code>deleteFrame</code></td><td>删除单张关键帧并清掉对应选择态；当前主界面每张缩略图右下角提供删除入口，方便手动抽错后直接修正。</td></tr>
             <tr><td>Vision 识别</td><td><code>POST /frames/{idx}/describe</code></td><td><code>describeFrame</code></td><td>写入 frame.description，后续可从 objects 加候选元素。</td></tr>
@@ -907,15 +916,15 @@ ProductRefStateItem {
             <tr><td>应用清洗</td><td><code>POST /cleanup/apply</code></td><td><code>applyCleanedFrame</code></td><td>物理覆盖 frames/{idx}.jpg，并备份原图。</td></tr>
             <tr><td>元素增改删</td><td><code>POST/PATCH/DELETE /elements</code></td><td><code>addElement/updateElement/deleteElement</code></td><td>让用户修正 Vision 错误，避免候选结果锁死。</td></tr>
             <tr><td>元素提取</td><td><code>POST /elements/{element_id}/cutout</code></td><td><code>cutoutElement</code></td><td>调用图像模型生成独立白底素材图，每次累积一张 cutout。</td></tr>
-            <tr><td>主体资产包</td><td><code>POST /elements/{element_id}/subject-assets</code><br><code>DELETE /elements/{element_id}/subject-assets/{asset_id}</code></td><td><code>generateSubjectAssets</code><br><code>deleteSubjectAsset</code></td><td>根据参考帧、可选内置形象或数据库主体模板重新绘制一个统一主体资产包；前端默认把全部关键帧作为 <code>source_frame_indices</code>，如果用户手动选择了关键帧则只传已选帧，也可传 <code>character_id</code> 选择 5 套内置透明骨架形象之一，或传 <code>subject_template_id</code> 使用已保存的主体模板库视图。当前源视频工作区支持 <code>subject_style=transparent_human</code> 和 <code>source_actor</code> 两种相似主体：透明骨架人会保持透明/半透明皮肤包裹可见白色骨架，普通真人会保持正常广告演员方向。两种模式都使用 <code>reconstruction_mode=similar</code>，后端最多把 10 张参考图作为独立 <code>image[]</code> 提交给 <code>gpt-image-2</code>，生成默认 10 张 2048 高清白底图：正面、左右 45、左右侧、背面、肩颈正/左右近景、后颈肩背特写。Prompt 明确这是肩颈按摩设备视频素材，要求脖颈、锁骨、肩线、上背和肩胛区域清晰无遮挡；内置形象和数据库模板都只作为创意方向，不照抄。后端强制使用 <code>gpt-image-2</code>，不再接受前端或环境变量切到其他图片模型，也不做图片模型 fallback；后端会加身份锁定约束，统一性别表现、年龄段、体型、材质、风格和视觉身份，避免整套图混成不同人物。如果参考帧是竖屏，prompt 会明确要求竖版 9:16 风格画布，落盘也按源帧纵横比归一化。前端白底视图缩略图和关键帧一样，鼠标停留会用顶层浮层放大预览，点击仍打开原图；后端每个 <code>view</code> 单独调用一次生图，并明确禁止多视图拼图、contact sheet、多主体、多面板、标签或对比排版，保证一个视角一张照片。图片调用走统一 <code>ai_http_client</code>，可用 <code>AI_HTTP_PROXY</code> / <code>IMAGE_HTTP_PROXY</code> 处理本地 launchd 代理继承问题；网络/DNS 失败返回 503 并提示检查代理配置。<code>replace_views=true</code> 时会替换同一视角旧图；删除接口会移除对应 subject asset 记录并删除本地 jpg 文件。</td></tr>
+            <tr><td>主体资产包</td><td><code>POST /elements/{element_id}/subject-assets</code><br><code>DELETE /elements/{element_id}/subject-assets/{asset_id}</code></td><td><code>generateSubjectAssets</code><br><code>deleteSubjectAsset</code></td><td>根据参考帧、可选内置形象或数据库主体模板重新绘制一个统一主体资产包；前端默认把全部关键帧作为 <code>source_frame_indices</code>，如果用户手动选择了关键帧则只传已选帧，也可传 <code>character_id</code> 选择 5 套内置透明骨架形象之一，或传 <code>subject_template_id</code> 使用已保存的主体模板。当前源视频工作区支持 <code>subject_style=transparent_human</code> 和 <code>source_actor</code> 两种相似主体。<code>reconstruction_mode=similar</code> 是创新路径：后端先用 <code>VISION_MODEL</code> 把关键帧、内置形象或数据库模板反推成非身份化文字 brief，再调用 <code>gpt-image-2</code> 的 <code>/images/generations</code> 文字生图，日志会显示 <code>image_refs=0</code>；不再把 10 张同一人物实拍图上传给 <code>/images/edits</code>。<code>reconstruction_mode=same</code> 仍保留旧 image-edit 路径，用于确实需要精确复刻且有授权的场景。生成视图可由前端传 <code>views</code> 控制：全部 10、常用 4 或自定义；每个 <code>view</code> 单独调用一次生图，明确禁止多视图拼图、contact sheet、多主体、多面板、标签或对比排版。<code>replace_views=true</code> 时会替换同一视角旧图；删除接口会移除对应 subject asset 记录并删除本地 jpg 文件。</td></tr>
             <tr><td>首尾帧资产</td><td><code>POST /frames/{idx}/scene-asset</code></td><td><code>generateSceneAsset</code></td><td>同一接口兼容旧场景图和新首尾帧；当前信息流复刻流程传 <code>asset_role=first_frame/last_frame</code>、<code>subject_images</code> 和 <code>product_images</code>。后端优先把相似主体白底视图与产品素材拼成 asset contact sheet 给 <code>gpt-image-2</code> 做图像编辑，关键帧只作为行数据承载位置。生成结果保存在 <code>scene_assets</code>，前端再写入 <code>StoryboardScene.first_image/last_image</code>。</td></tr>
             <tr><td>产品图库</td><td><code>GET /product-library/skg</code></td><td><code>listProductLibrary</code></td><td>读取内置 SKG 白底图库 manifest，返回产品标题、品类、尺寸、白底评分和预览图 URL。</td></tr>
             <tr><td>产品图入库到 job</td><td><code>POST /jobs/{id}/assets</code>、<code>POST /jobs/{id}/assets/product-library</code></td><td><code>uploadStoryboardAsset</code>、<code>copyProductLibraryAsset</code></td><td>上传产品图或把内置产品图库条目复制为当前 job 的普通 asset。后端统一生成最长边 1600px、JPEG 92 的 AI 工作副本，透明底铺白，过大/过小图片会在 <code>ImageRef.asset_meta</code> 里返回转换动作和风险；黑底/白底背景本身不强行转换。注意该接口只写图片文件，产品素材池列表另由 <code>PUT /jobs/{id}/product-refs</code> 持久化。</td></tr>
             <tr><td>产品素材池保存</td><td><code>PUT /jobs/{id}/product-refs</code></td><td><code>saveProductRefs</code></td><td>把当前 job 的产品素材池列表、识别视角、用途标签、方向、结构点、备注、AI 补图和删除结果保存到 <code>Job.product_refs</code> / <code>state.json</code>。前端上传、识别完成、补角度、编辑备注和删除时都会同步保存；刷新页面或热更新后从 job 恢复，不再要求重新上传和重新识别。</td></tr>
             <tr><td>产品视角识别</td><td><code>POST /jobs/{id}/assets/product-views/analyze</code></td><td><code>analyzeProductViews</code></td><td>读取同一产品素材池，按批次把多张图一次性提交给 <code>PRODUCT_VIEW_MODEL=gpt-image-2</code> 做视角标注，不限制只看前 6 张；识别对象被固定为套在脖子上的 U 形肩颈按摩仪。返回 <code>view</code>、<code>background</code>、<code>use_tags</code>、<code>orientation</code>、<code>landmarks</code>、中文备注、生成风险和置信度；<code>orientation</code> 明确佩戴者左/右、上/下、内外侧和开口方向对应图中哪边，避免把图片左右误当产品左右。前端不再要求用户手动选择视角，也不做不同产品身份判断。</td></tr>
             <tr><td>产品缺角度补图</td><td><code>POST /jobs/{id}/assets/product-angle</code></td><td><code>generateProductAngleAsset</code></td><td>用当前同一产品素材池作为参考，通过 <code>gpt-image-2</code> 自动补全缺失视角，输出新的 <code>ImageRef(kind="asset")</code>。前端不再固定传第一张图，而是按目标视角给已上传/已标注参考图打分，优先选择真实上传图、目标相邻视角、侧厚/触点/底部对应用途标签和低风险高置信图，最多传 6 张；后端通过 <code>/images/edits</code> multipart 的多张 <code>image[]</code> 直接提交给 <code>gpt-image-2</code>，不再把参考图拼成一张板，降低模型误解成拼图/多产品的概率。Prompt 会约束白底产品图、左右非对称、厚度、内侧触点和肩颈真实佩戴比例，并禁止输出拼图/多产品；遇到上游 429 / saturated 会按退避节奏重试，最终仍失败时返回 503 和可读提示；遇到 DNS / ConnectError 也返回 503，并提示配置 <code>AI_HTTP_PROXY</code> / <code>IMAGE_HTTP_PROXY</code>。</td></tr>
-            <tr><td>角色库</td><td><code>GET /character-library/skg</code></td><td><code>listCharacterLibrary</code></td><td>读取内置 5 个透明骨架人角色 manifest，每个角色含正面、左右 45 度、侧面、背面、半身近景和背部特写 7 张参考图。</td></tr>
-            <tr><td>主体模板库</td><td><code>GET /subject-templates</code><br><code>GET /subject-templates/images/{filename}</code><br><code>POST /jobs/{id}/subject-templates</code></td><td><code>listSubjectTemplates</code><br><code>subjectTemplateImageUrl</code><br><code>saveSubjectTemplate</code></td><td>数据库化可复用主体库。前端左侧模板库优先展示这里保存的主体模板；右侧“本次生成 / 入库草稿”会把当前 job 的相似主体白底视图按名称、备注、主体类型、原 job/frame/element 和 asset 列表复制到 <code>JOBS_DIR/_subject_templates</code>，以后生成相似主体时通过 <code>subject_template_id</code> 作为参考图来源。</td></tr>
+            <tr><td>角色库</td><td><code>GET /character-library/skg</code></td><td><code>listCharacterLibrary</code></td><td>读取内置 5 个透明骨架人角色 manifest，每个角色含正面、左右 45 度、侧面、背面、半身近景和背部特写 7 张参考图，以及用于相似主体文字生图的 <code>prompt_brief</code>。</td></tr>
+            <tr><td>主体模板库</td><td><code>GET /subject-templates</code><br><code>GET /subject-templates/images/{filename}</code><br><code>POST /jobs/{id}/subject-templates</code></td><td><code>listSubjectTemplates</code><br><code>subjectTemplateImageUrl</code><br><code>saveSubjectTemplate</code></td><td>数据库化可复用主体库。前端模板库展示这里保存的主体模板；“保存为主体模板”会把当前 job 的相似主体白底视图按名称、备注、主体类型、原 job/frame/element 和 asset 列表复制到 <code>JOBS_DIR/_subject_templates</code>，并由后端用 Vision LLM 从这些图反推 <code>prompt_brief</code>。以后相似生成通过 <code>subject_template_id</code> 读取这个 brief 作为文字创意方向，不再把模板图直接上传给 image-edit。</td></tr>
             <tr><td>角色图入库到 job</td><td><code>POST /jobs/{id}/assets/character-library</code></td><td><code>copyCharacterLibraryAssets</code></td><td>把所选角色的 7 张参考图复制为当前 job asset，返回 <code>subject_images</code>，产品融合生成视频时作为人物身份参考图提交。</td></tr>
             <tr><td>产品融合引导图</td><td><code>POST /jobs/{id}/product-fusion/guide</code></td><td><code>createProductFusionGuide</code></td><td>旧流程兼容接口：读取产品图和白底人物图，按 <code>product_region</code> 合成位置引导图。当前内置角色 + 产品 + 描述流程不再主动调用它。</td></tr>
             <tr><td>产品融合描述词</td><td><code>POST /jobs/{id}/product-fusion/descriptions</code></td><td><code>generateProductFusionDescriptions</code></td><td>兼容接口：可生成产品融合动作描述库。当前前端默认直接用本地 36 条镜头语言模板预填 6 行镜头，并通过“换一组”按钮按 6 条一组轮换。</td></tr>
@@ -947,7 +956,7 @@ ProductRefStateItem {
             <tr>
               <td><span class="tag gray">音频条</span></td>
               <td>复刻工作表顶部触发音频解析；全文文案依据默认折叠，音频解析结果也默认折叠为辅助信息；主展示以源视频工作区为准：竖版原视频在左，音频波形和逐句时间轴在右；底部 <code>AudioStrip</code> 当前不渲染。</td>
-              <td>当前第一步不要默认展示底部音频条、新配音播放器、独立原文案提取大卡片，或把 MiniMax 配音当作已完成结果。</td>
+              <td>当前第一步不要默认展示底部音频条、新配音播放器、独立原文案提取大卡片，或把未生成的 Azure OpenAI 配音当作已完成结果。</td>
               <td><code>web/components/audio-strip.tsx</code>、<code>pipeline_transcribe</code>、<code>AudioScript</code></td>
             </tr>
             <tr>
@@ -982,7 +991,7 @@ ProductRefStateItem {
             <h3>阻塞 / 占位</h3>
             <ul>
               <li>ASR：优先走当前 OpenAI-compatible 音频转写入口；如果该网关没有 <code>/audio/transcriptions</code>，自动 fallback 到 <code>ASR_FALLBACK_MODEL</code>（默认 <code>gemini-2.5-flash</code>）的多模态音频识别。</li>
-              <li>Voice：当前默认语音通道是 <code>VOICE_PROVIDER=azure_openai</code>，通过 <code>AZURE_OPENAI_BASE_URL=https://ai.skg.com/azure</code> 的 OpenAI 协议生成 TTS；第一步暂不默认调用。MiniMax 仅保留为兼容旧配置。</li>
+              <li>Voice：当前语音通道固定是 <code>VOICE_PROVIDER=azure_openai</code>，通过 <code>AZURE_OPENAI_BASE_URL=https://ai.skg.com/azure</code> 的 OpenAI 协议生成 TTS；后端按 <code>AZURE_TTS_PATHS</code> 依次尝试路径。第一步暂不默认调用。</li>
               <li>Audio Product Brief：默认是通用 SKG 放松产品卖点；当前第一步只保留配置，后续分镜/新配音阶段再使用。</li>
               <li>Video Gen：当前视频通道固定优先 Seedance；<code>VIDEO_API_BASE_URL=https://ai.skg.com/doubao</code> 走 content JSON 异步任务，提交后写入候选片段并轮询到完成。</li>
               <li>Compose：还没做本地 ffmpeg 字幕/TTS 合成。</li>
@@ -1024,6 +1033,54 @@ ProductRefStateItem {
         <h2>变更记录</h2>
         <p>这个记录不是 git log 的替代品。它记录“产品理解发生了什么变化、影响了哪些源码、你以后描述需求时该怎么说”。后续每次改功能都要补一条。</p>
         <div class="changelog">
+          <article class="change">
+            <header>
+              <h3>2026-05-18 · 删除个人语音通道残留</h3>
+              <span class="tag violet">API</span>
+              <span class="tag cyan">Model</span>
+            </header>
+            <div class="body">
+              <p><strong>问题：</strong>旧个人语音通道虽然已不再作为活动路径，但仓库里仍有健康检查字段、前端类型、环境注释和文档历史文字残留，容易被误认为可用模型方案。</p>
+              <p><strong>改动：</strong><code>/health</code> 不再返回旧语音通道相关字段；<code>web/lib/api.ts</code> 删除对应类型；<code>api/.env.example</code>、<code>deploy/.env.production.example</code>、<code>RULES.md</code>、<code>.memory/status.md</code> 和本页移除相关名称和配置引用。语音只保留 Azure OpenAI TTS。</p>
+              <p><strong>影响：</strong>后续模型分工里只描述 Azure OpenAI TTS；不要再把个人语音通道作为候选、fallback 或环境变量方案。</p>
+            </div>
+          </article>
+          <article class="change">
+            <header>
+              <h3>2026-05-18 · 语音通道固定 Azure OpenAI TTS</h3>
+              <span class="tag violet">API</span>
+              <span class="tag cyan">Model</span>
+            </header>
+            <div class="body">
+              <p><strong>问题：</strong>语音生成测试失败时无法区分是 Azure OpenAI TTS 路径配置不对，还是整条语音服务不可用；同时 TTS 分支仍可能被环境变量或前端文案误导。</p>
+              <p><strong>改动：</strong>后端固定 <code>VOICE_PROVIDER=azure_openai</code>，删除旧 TTS 活动配置和 fallback；<code>_azure_openai_tts_sync</code> 改为按 <code>AZURE_TTS_PATHS</code> 依次尝试多个 OpenAI-compatible 语音路径，失败时返回每个 URL 的 HTTP 状态或连接错误。<code>/health</code> 新增 <code>models.voice_tts_paths</code>。</p>
+              <p><strong>影响：</strong>后续排查语音时先看失败信息里的路径和状态码：如果所有路径 404/405，优先调 <code>AZURE_TTS_PATHS</code>；如果连接/鉴权/上游错误，则按 Azure 语音服务可用性或 Key 排查。前端和状态文档不再展示旧语音方案。</p>
+            </div>
+          </article>
+          <article class="change">
+            <header>
+              <h3>2026-05-18 · 画面理解和文案改写改用 GPT</h3>
+              <span class="tag violet">API</span>
+              <span class="tag cyan">Model</span>
+            </header>
+            <div class="body">
+              <p><strong>问题：</strong>关键帧画面理解和分镜/口播改写仍可能被旧环境变量覆盖到 Gemini，不符合后端模型分工。</p>
+              <p><strong>改动：</strong><code>api/main.py</code> 新增 <code>GPT_TEXT_MODEL</code> / <code>gpt_model_env</code>，默认 <code>VISION_MODEL</code>、<code>REWRITE_MODEL</code> 和 <code>AUDIO_REWRITE_MODEL</code> 为 <code>gpt-4o</code>；旧 <code>gemini-*</code> 覆盖值会自动归一化回 GPT。</p>
+              <p><strong>影响：</strong><code>/health</code> 会向前端模型标注暴露 GPT 模型名；Gemini 仅保留在 ASR fallback / 翻译链路。</p>
+            </div>
+          </article>
+          <article class="change">
+            <header>
+              <h3>2026-05-18 · 相似主体改为文字 brief 创新生成</h3>
+              <span class="tag violet">API</span>
+              <span class="tag rose">UI</span>
+            </header>
+            <div class="body">
+              <p><strong>问题：</strong>相似主体生成把最多 10 张源视频帧或模板图作为 <code>image[]</code> 上传给 <code>/images/edits</code>，参考图视觉权重过强，结果容易变成源视频人物或内置模板的复刻品。</p>
+              <p><strong>改动：</strong><code>generate_subject_assets</code> 在 <code>reconstruction_mode=similar</code> 时先用 <code>VISION_MODEL</code> 把关键帧、内置形象或数据库模板转成非身份化文字 brief，再调用 <code>gpt-image-2</code> 的文字生图路径；<code>same</code> 模式保留旧 image-edit 精确复刻路径。<code>SubjectTemplateItem</code> 和内置形象 manifest 新增 <code>prompt_brief</code>，保存模板时会从生成视图反推 brief。前端 <code>SourceReferenceBuildPanel</code> 改成“用模板 / 不用模板”模式开关、120px 竖排模板卡、底部一行保存表单，以及独立的“生成主体视图”区域，支持全部 10 / 常用 4 / 自定义视图。</p>
+              <p><strong>影响：</strong>后续说“相似主体”应理解为文字 brief 驱动的创新生图，不是把关键帧或模板图送去 image edit 复制。需要精确复刻时必须显式走 <code>reconstruction_mode=same</code>。</p>
+            </div>
+          </article>
           <article class="change">
             <header>
               <h3>2026-05-18 · 主工作台增加明亮模式</h3>
@@ -1036,6 +1093,18 @@ ProductRefStateItem {
               <p><strong>影响：</strong>只改变工作台视觉模式，不改变素材下载、音频解析、抽帧、主体模板、产品素材池、首尾帧或模型链路；<code>web/app/page.tsx</code> 同步移除旧全局浮动主题按钮，避免右下角出现第二套不相关的主题入口。后续新增图片/视频板块仍应复用同一套媒体悬停放大和删除逻辑。</p>
             </div>
           </article>
+          <article class="change">
+            <header>
+              <h3>2026-05-18 · TK 受限视频支持 cookies 和失败重试</h3>
+              <span class="tag violet">API</span>
+              <span class="tag rose">UI</span>
+            </header>
+            <div class="body">
+              <p><strong>问题：</strong>部分 TikTok 链接会返回“Log in for access”，后端已经接到任务并创建 job，但裸 <code>yt-dlp</code> 下载没有登录态，任务只能失败；失败素材再次点击“开始分析”也没有重新触发下载。</p>
+              <p><strong>改动：</strong><code>api/main.py</code> 新增 <code>YTDLP_COOKIES_FILE</code> / <code>YTDLP_COOKIES_FROM_BROWSER</code> 和 <code>POST /jobs/{id}/download/retry</code>。<code>web/lib/api.ts</code> 新增失败文案格式化和 <code>retryJobDownload</code>；<code>web/app/page.tsx</code> 在失败素材上清掉自动触发标记，无 <code>video_url</code> 时重新下载，有 <code>video_url</code> 时重新跑音频和视觉路；<code>AdRecreationBoard</code> 按状态显示“重新下载 / 重新解析”。</p>
+              <p><strong>影响：</strong>用户无需删除素材再粘贴同一链接；配置 cookies 后可直接对原失败素材重新下载。cookies 属于敏感登录态，只能放本机或服务器私有路径。</p>
+            </div>
+          </article>
           <article class="change">
             <header>
               <h3>2026-05-18 · 主工作台视觉质感对齐登录页</h3>
@@ -1106,6 +1175,7 @@ ProductRefStateItem {
               <p><strong>问题：</strong>页面里的 01、02 只是硬编码视觉标签，用户无法判断编号代表什么流程，也看不出每一步是按什么条件通过、运行、阻塞或暂停。</p>
               <p><strong>改动：</strong><code>AdRecreationBoard</code> 新增 <code>buildWorkflowSteps</code>，统一定义 01 素材输入、02 源视频下载、03 音频文案、04 抽帧参考、05 相似主体、06 产品素材池、07 分镜文案、08 画面首尾帧、09 视频候选。顶部新增 <code>WorkflowOrderBar</code> 展示顺序、状态和判定依据；素材输入、源视频区、四路状态条、产品素材池、分镜文案、画面规划和视频候选列共用 <code>WorkflowStepBadge</code> / <code>PipelineLane</code> 的同一编号。</p>
               <p><strong>影响：</strong>以后说“改 03 音频文案”或“08 首尾帧怎么判定”时，可以直接定位到这套步骤配置；编号不再散落在 JSX 里。</p>
+
             </div>
           </article>
           <article class="change">
@@ -1268,7 +1338,7 @@ ProductRefStateItem {
               <span class="tag amber">Config</span>
             </header>
             <div class="body">
-              <p><strong>问题：</strong>之前图片、文本、音频分析共用 <code>LLM_BASE_URL</code>，配音默认仍是 MiniMax，视频虽然已接豆包/Seedance，但模型标注没有把“生图 GPT / 语音 Azure / 视频 Seedance”三条高优先级链路清楚拆开。</p>
+              <p><strong>问题：</strong>之前图片、文本、音频分析共用 <code>LLM_BASE_URL</code>，语音通道不够清晰，视频虽然已接豆包/Seedance，但模型标注没有把“生图 GPT / 语音 Azure / 视频 Seedance”三条高优先级链路清楚拆开。</p>
               <p><strong>改动：</strong><code>api/main.py</code> 新增 <code>IMAGE_BASE_URL</code>、<code>IMAGE_API_KEY</code>、<code>VOICE_PROVIDER</code>、<code>AZURE_OPENAI_BASE_URL</code>、<code>AZURE_OPENAI_API_KEY</code>、<code>AZURE_TTS_MODEL</code> 等配置；图片调用改走图片专用 OpenAI-compatible client，文字生图走 <code>/images/generations</code>，图生图后续已收敛到 <code>/images/edits</code>，默认 <code>gpt-image-2</code>；TTS 新增 Azure OpenAI 协议 <code>/audio/speech</code> 通道，默认 <code>VOICE_PROVIDER=azure_openai</code>；<code>GET /health</code> 回传图片、主体、语音和视频的实际模型与 base URL 供前端模型标注使用。</p>
               <p><strong>影响：</strong><code>api/main.py</code>、<code>web/lib/api.ts</code>、<code>RULES.md</code>、<code>.project.json</code>、<code>docs/source-analysis.html</code>。真实 key 仍只写本地 <code>api/.env</code> / 生产环境变量，不能入库。</p>
             </div>
@@ -1629,7 +1699,7 @@ ProductRefStateItem {
             </header>
             <div class="body">
               <p><strong>问题：</strong>用户明确否定前一版“开始后自动抽帧、分镜、元素、合成”的推进方式，当前只需要把信息流广告快速复刻的第一步跑通：粘贴 TK 链接或上传视频，下载源视频，提取原音频文案，并分析讲话人、节奏和背景音。</p>
-              <p><strong>改动：</strong><code>web/app/page.tsx</code> 的开始流程改为下载完成后只自动触发 <code>triggerTranscribe</code>；上传视频也加入同一音频解析队列。<code>AdRecreationBoard</code> 主渲染改成左侧素材输入 + 右侧音频解析工作表，不再显示“追加分镜”“开始抽帧”“生成全部视频”。<code>AudioStrip</code> 右侧改为原文案/翻译/声音背景音分析。<code>AudioScript</code> 新增 <code>background_audio_profile</code>，后端 <code>pipeline_transcribe</code> 先保存原文案、中文翻译、讲话人、节奏和背景音分析，当前第一步不默认生成 SKG 新口播或 MiniMax 配音。</p>
+              <p><strong>改动：</strong><code>web/app/page.tsx</code> 的开始流程改为下载完成后只自动触发 <code>triggerTranscribe</code>；上传视频也加入同一音频解析队列。<code>AdRecreationBoard</code> 主渲染改成左侧素材输入 + 右侧音频解析工作表，不再显示“追加分镜”“开始抽帧”“生成全部视频”。<code>AudioStrip</code> 右侧改为原文案/翻译/声音背景音分析。<code>AudioScript</code> 新增 <code>background_audio_profile</code>，后端 <code>pipeline_transcribe</code> 先保存原文案、中文翻译、讲话人、节奏和背景音分析，当前第一步不默认生成 SKG 新口播或 TTS 配音。</p>
               <p><strong>影响：</strong><code>web/app/page.tsx</code>、<code>web/components/ad-recreation-board.tsx</code>、<code>web/components/audio-strip.tsx</code>、<code>web/lib/api.ts</code>、<code>api/main.py</code>、<code>RULES.md</code>、<code>.project.json</code>、<code>docs/source-analysis.html</code>。后续需求应先描述“音频解析完成后是否进入下一步”，不要默认把抽帧/分镜/合成塞进开始动作。</p>
             </div>
           </article>
@@ -2064,11 +2134,11 @@ ProductRefStateItem {
             <header>
               <h3>2026-05-14 · 音频提取直接生成英文产品口播</h3>
               <span class="tag gray">Audio</span>
-              <span class="tag green">MiniMax</span>
+              <span class="tag green">TTS</span>
             </header>
             <div class="body">
               <p><strong>问题：</strong>“提取音频”不能只做原音频转文字再改写，用户需要点击后直接得到介绍 SKG 产品的英文文案和配音，长度尽量贴近原音频，并且声音不能生硬。</p>
-              <p><strong>改动：</strong><code>pipeline_transcribe</code> 提取 <code>audio.wav</code> 后读取原音频时长，用该时长估算英文口播词数；<code>_rewrite_audio_script_sync</code> 改为生成自然、有趣、可直接 TTS 的 SKG 英文产品介绍。ASR/翻译保留为对照和节奏参考，ASR 不可用时仍继续生成产品口播。MiniMax voice_id 改为从 <code>MINIMAX_TTS_VOICE_POOL</code> 随机选择男声、女声或成熟声。</p>
+              <p><strong>改动：</strong><code>pipeline_transcribe</code> 提取 <code>audio.wav</code> 后读取原音频时长，用该时长估算英文口播词数；<code>_rewrite_audio_script_sync</code> 改为生成自然、有趣、可直接 TTS 的 SKG 英文产品介绍。ASR/翻译保留为对照和节奏参考，ASR 不可用时仍继续生成产品口播。TTS voice_id 改为从 <code>语音音色池</code> 随机选择男声、女声或成熟声。</p>
               <p><strong>影响：</strong><code>api/main.py</code>、<code>api/.env.example</code>、<code>api/README.md</code>、<code>RULES.md</code>、<code>web/components/nodes/index.tsx</code>、<code>web/components/audio-strip.tsx</code>、<code>web/components/dashboard.tsx</code>、<code>docs/source-analysis.html</code>。</p>
             </div>
           </article>
@@ -2130,7 +2200,7 @@ ProductRefStateItem {
             </header>
             <div class="body">
               <p><strong>问题：</strong>等待抽帧完成后自动启动音频，不符合“先把声音文案拿出来审核”的工作流；用户需要在音频卡片上直接触发。</p>
-              <p><strong>改动：</strong>移除前端抽帧完成后的自动转写逻辑；<code>AudioNode</code> 保留并固定显示“提取音频 / 重新提取音频”按钮，点击音频卡片也会立即打开底部音频条。后端 <code>/transcribe</code> 不再要求 <code>frames_extracted</code>，视频就绪后可直接从 <code>source.mp4</code> 拆出 <code>audio.wav</code>，并按原音频时长生成 SKG 英文产品介绍和 MiniMax 随机英文配音；抽帧中触发时不抢主状态，而是用 <code>audio_script.status</code> 表示音频处理中。当当前网关的 <code>whisper-1</code> audio endpoint 返回 404 时，会 fallback 到 Gemini 多模态音频识别；ASR 不可用时也会继续按原音频时长生成产品口播，不把可用文案标成前端错误。</p>
+              <p><strong>改动：</strong>移除前端抽帧完成后的自动转写逻辑；<code>AudioNode</code> 保留并固定显示“提取音频 / 重新提取音频”按钮，点击音频卡片也会立即打开底部音频条。后端 <code>/transcribe</code> 不再要求 <code>frames_extracted</code>，视频就绪后可直接从 <code>source.mp4</code> 拆出 <code>audio.wav</code>，并按原音频时长生成 SKG 英文产品介绍和 TTS 随机英文配音；抽帧中触发时不抢主状态，而是用 <code>audio_script.status</code> 表示音频处理中。当当前网关的 <code>whisper-1</code> audio endpoint 返回 404 时，会 fallback 到 Gemini 多模态音频识别；ASR 不可用时也会继续按原音频时长生成产品口播，不把可用文案标成前端错误。</p>
               <p><strong>影响：</strong><code>web/app/page.tsx</code>、<code>web/components/nodes/index.tsx</code>、<code>api/main.py</code>、<code>docs/source-analysis.html</code>。</p>
             </div>
           </article>
@@ -2142,7 +2212,7 @@ ProductRefStateItem {
             </header>
             <div class="body">
               <p><strong>问题：</strong>音频和文案只在节点或侧栏里展示，审核时缺少“文字和声音时间轴对应”的空间；英文口播和中文翻译也没有上下对齐。</p>
-              <p><strong>改动：</strong>新增 <code>web/components/audio-strip.tsx</code>，在主工作台底部吸附显示，可拖拽调整高度、可收起。每个音频段按时间横向排列，上方显示英文，中间显示中文翻译，下方显示对应波形条；底部原音频播放器驱动时间轴，播放时绿色指针会沿全局波形移动，并在当前字幕节点内同步走过该段。右侧显示按原音频时长生成的 SKG 英文产品口播、MiniMax 随机英文配音和产品依据。后端新增 <code>source_audio_url</code> 和 <code>GET /jobs/{id}/audio.wav</code> 只读接口，前端用 Web Audio API 解码生成波形峰值。</p>
+              <p><strong>改动：</strong>新增 <code>web/components/audio-strip.tsx</code>，在主工作台底部吸附显示，可拖拽调整高度、可收起。每个音频段按时间横向排列，上方显示英文，中间显示中文翻译，下方显示对应波形条；底部原音频播放器驱动时间轴，播放时绿色指针会沿全局波形移动，并在当前字幕节点内同步走过该段。右侧显示按原音频时长生成的 SKG 英文产品口播、TTS 随机英文配音和产品依据。后端新增 <code>source_audio_url</code> 和 <code>GET /jobs/{id}/audio.wav</code> 只读接口，前端用 Web Audio API 解码生成波形峰值。</p>
               <p><strong>影响：</strong><code>web/app/page.tsx</code>、<code>web/components/audio-strip.tsx</code>、<code>web/lib/api.ts</code>、<code>api/main.py</code>、<code>docs/source-analysis.html</code>。</p>
             </div>
           </article>
@@ -2154,7 +2224,7 @@ ProductRefStateItem {
             </header>
             <div class="body">
               <p><strong>问题：</strong>音频识别成功后只显示改写文案，用户看不到它和原音频之间的变化关系，难以判断“是不是把参考视频转成我们自己的话”。</p>
-              <p><strong>改动：</strong><code>AudioNode</code> 增加轻量对照摘要：改前显示原音频识别/翻译预览，改后显示 SKG 英文产品口播；侧栏 <code>Rewrite</code> 面板改为完整审核视图，先列原音频逐段 ASR/翻译，再列英文产品介绍稿、产品卖点依据和 MiniMax 英文配音播放器。</p>
+              <p><strong>改动：</strong><code>AudioNode</code> 增加轻量对照摘要：改前显示原音频识别/翻译预览，改后显示 SKG 英文产品口播；侧栏 <code>Rewrite</code> 面板改为完整审核视图，先列原音频逐段 ASR/翻译，再列英文产品介绍稿、产品卖点依据和 TTS 英文配音播放器。</p>
               <p><strong>影响：</strong><code>web/components/nodes/index.tsx</code>、<code>web/components/dashboard.tsx</code>、<code>docs/source-analysis.html</code>。</p>
             </div>
           </article>
@@ -2197,14 +2267,14 @@ ProductRefStateItem {
           </article>
           <article class="change">
             <header>
-              <h3>2026-05-14 · 音频处理接入 SKG 英文产品口播与 MiniMax 配音</h3>
+              <h3>2026-05-14 · 音频处理接入 SKG 英文产品口播与 TTS 配音</h3>
               <span class="tag gray">Audio</span>
-              <span class="tag green">MiniMax</span>
+              <span class="tag green">TTS</span>
             </header>
             <div class="body">
               <p><strong>问题：</strong>音频处理节点之前只说明“音轨 → ASR → 翻译 → 改写”，没有按原音频时长生成的产品介绍产物，也没有配音输出；用户无法直接拿到符合 SKG 产品语境的英文口播。</p>
-              <p><strong>改动：</strong><code>Job</code> 新增 <code>audio_script</code>，<code>pipeline_transcribe</code> 提取 <code>audio.wav</code> 后按原音频秒数生成 SKG 英文产品介绍文案，并在配置 <code>MINIMAX_API_KEY</code> 时调用 MiniMax T2A 输出 <code>/jobs/{id}/audio-script.mp3</code>。MiniMax voice_id 从英文男声、女声、成熟声池随机选择；前端 <code>AudioNode</code> 和侧栏 Rewrite 区显示模型链路、英文产品文案和配音播放器。</p>
-              <p><strong>边界：</strong>MiniMax 官方 Speech API 当前接入的是 TTS 配音，不替代 ASR；原始音频文案提取仍走现有 OpenAI-compatible audio transcription 入口。</p>
+              <p><strong>改动：</strong><code>Job</code> 新增 <code>audio_script</code>，<code>pipeline_transcribe</code> 提取 <code>audio.wav</code> 后按原音频秒数生成 SKG 英文产品介绍文案，并在配置 <code>语音 API Key</code> 时调用 TTS T2A 输出 <code>/jobs/{id}/audio-script.mp3</code>。TTS voice_id 从英文男声、女声、成熟声池随机选择；前端 <code>AudioNode</code> 和侧栏 Rewrite 区显示模型链路、英文产品文案和配音播放器。</p>
+              <p><strong>边界：</strong>TTS 官方 Speech API 当前接入的是 TTS 配音，不替代 ASR；原始音频文案提取仍走现有 OpenAI-compatible audio transcription 入口。</p>
               <p><strong>影响：</strong><code>api/main.py</code>、<code>api/.env.example</code>、<code>api/README.md</code>、<code>web/lib/api.ts</code>、<code>web/components/nodes/index.tsx</code>、<code>web/components/dashboard.tsx</code>、<code>web/app/page.tsx</code>、<code>docs/source-analysis.html</code>。</p>
             </div>
           </article>
diff --git a/web/app/page.tsx b/web/app/page.tsx
index 375a34e..8b32bb6 100644
--- a/web/app/page.tsx
+++ b/web/app/page.tsx
@@ -17,6 +17,7 @@ import { AdRecreationBoard } from "@/components/ad-recreation-board"
 import {
   addManualFrame, analyzeJob, createJob, getJob, listJobs, uploadJob, deleteJob, deleteFrame, deleteGeneratedImage,
   deleteGeneratedVideo, deleteCutout, generateStoryboardVideo, triggerTranscribe, describeFrame, updateStoryboard, copyProductLibraryAsset,
+  formatJobError, retryJobDownload,
   type Job, type ImageRef, type KeyFrame, type ProductFusionShot, type StoryboardScene, type FrameExtractMode, type FrameExtractQuality, type FrameExtractTarget,
 } from "@/lib/api"
 import { TRANSPARENT_HUMAN_NEGATIVE_PROMPT, TRANSPARENT_HUMAN_VIDEO_PROMPT } from "@/lib/workflow-target"
@@ -569,15 +570,30 @@ export default function Home() {
   const handleStartProduction = useCallback(async (inputUrl?: string) => {
     const trimmed = inputUrl?.trim()
     const created = trimmed ? await handleSubmit(trimmed) : undefined
-    const target = created ?? job
+    let target = created ?? job
     if (!target) {
       toast.info("先粘贴视频链接或选择一个素材任务")
       return
     }
+    if (!created && target.status === "failed") {
+      autoTriggeredRef.current.delete(`${target.id}:audio`)
+      autoTriggeredRef.current.delete(`${target.id}:visual`)
+    }
+    if (!created && target.status === "failed" && !target.video_url) {
+      try {
+        target = await retryJobDownload(target.id)
+        updateJobInList(target)
+        toast.info("已重新提交下载；下载完成后会自动跑音频文案路和视觉抽帧路")
+      } catch (e) {
+        toast.error("重新下载失败：" + (e instanceof Error ? e.message : String(e)))
+        return
+      }
+    }
     setProductionJobIds((prev) => new Set(prev).add(target.id))
-    toast.success("已进入并行素材分析：下载完成后自动跑音频文案路和视觉抽帧路")
+    if (target.video_url) toast.success("已进入并行素材分析：音频文案路和视觉抽帧路会同步推进")
+    else toast.success("已进入并行素材分析：下载完成后自动跑音频文案路和视觉抽帧路")
     void startProductionLanesForJob(target)
-  }, [handleSubmit, job, startProductionLanesForJob])
+  }, [handleSubmit, job, startProductionLanesForJob, updateJobInList])
 
   useEffect(() => {
     if (productionJobIds.size === 0) return
@@ -860,6 +876,9 @@ export default function Home() {
     if (job?.status === "downloaded" && prevStatusRef.current !== "downloaded") {
       toast.info("视频已下载，音频解析会自动开始；也可以在右侧手动重试", { duration: 6000 })
     }
+    if (job?.status === "failed" && prevStatusRef.current !== "failed") {
+      toast.error(formatJobError(job.error) || "任务失败", { duration: 10000 })
+    }
     prevStatusRef.current = job?.status ?? null
 
     const TERMINAL: Job["status"][] = ["downloaded", "frames_extracted", "transcribed", "failed"]
diff --git a/web/components/ad-recreation-board.tsx b/web/components/ad-recreation-board.tsx
index aa70202..7709a50 100644
--- a/web/components/ad-recreation-board.tsx
+++ b/web/components/ad-recreation-board.tsx
@@ -33,6 +33,7 @@ import {
   cutoutElement,
   deleteSubjectAsset,
   effectiveFrameUrl,
+  formatJobError,
   generateSceneAsset,
   generateProductAngleAsset,
   generateSubjectAssets,
@@ -117,6 +118,8 @@ type AudioStoryboardRow = {
 type ProductRefItem = ProductRefStateItem
 type SubjectPlanningRef = ImageRef & { view: string; roleHint: string }
 type SubjectStyleMode = "transparent_human" | "source_actor"
+type SubjectMode = "template" | "source_similar"
+type SubjectViewMode = "all" | "common" | "custom"
 type StoryboardVisualMode = NonNullable<StoryboardScene["visual_mode"]>
 type RowPlanPatch = Partial<Pick<AudioStoryboardRow, "visualMode" | "needsProduct" | "needsSubject" | "subjectDescription" | "visualPlan" | "firstFramePlan" | "lastFramePlan" | "productIntegration" | "productPlacement">>
 type WorkflowStepId = "input" | "source" | "audio" | "visual" | "subject" | "product" | "script" | "scene" | "video"
@@ -156,6 +159,8 @@ const SUBJECT_VIEW_ORDER = [
   "back_detail",
 ]
 
+const COMMON_SUBJECT_VIEW_VALUES = ["front", "three_quarter_left", "three_quarter_right", "bust_front"]
+
 const SUBJECT_ASSET_SIZE = "2048" as const
 
 type ModelTraceSpec = {
@@ -591,9 +596,9 @@ function similarSubjectModelTrace(models: RuntimeModels | undefined, subjectStyl
     title: subjectStyle === "transparent_human" ? "相似透明骨架主体" : "相似普通真人主体",
     model: subjectImageModelChain(models),
     chain: [
-      "参考策略：未勾选关键帧时使用全部关键帧，勾选后只使用已选关键帧；也可叠加内置形象作为创意参考",
+      "参考策略：先用视觉模型把关键帧/模板转成非身份化文字 brief，生图请求不再上传参考图",
       `主体类型：${subjectStyle === "transparent_human" ? "透明/半透明皮肤包裹可见白色骨架" : "普通商业广告真人"}`,
-      `图像生成：${subjectImageModelChain(models)} 逐张生成 10 张高清图，包含全身多视角和肩颈/后背特写`,
+      `图像生成：${subjectImageModelChain(models)} 走 /images/generations 逐张生成高清图，视图数量由“全部/常用/自定义”决定`,
       "身份锁定：整套图必须是同一个主体，性别表现、年龄段、体型、材质和风格保持一致",
     ],
     note: "这是生成类似但创新的主体，不是复制、抠出或复刻源视频人物身份；内置形象也只作为方向参考。",
@@ -1650,6 +1655,9 @@ function MaterialColumn({
   onSubmitUrl: () => void
   onStartProduction: () => void
 }) {
+  const actionLabel = !url.trim() && job?.status === "failed"
+    ? job.video_url ? "重新解析" : "重新下载"
+    : "开始分析"
   return (
     <section className="skg-board-panel flex min-h-0 flex-col gap-3 rounded-lg border border-white/10 bg-white/[0.035] p-3 shadow-2xl">
       <header className="shrink-0 border-b border-white/10 pb-3">
@@ -1675,7 +1683,7 @@ function MaterialColumn({
           disabled={data.submitting || (!url.trim() && !job)}
           className="inline-flex h-10 items-center justify-center rounded-md bg-[#f0ead8] px-3 text-[13px] font-semibold text-black shadow-[0_14px_28px_rgba(0,0,0,0.28)] transition hover:bg-[#fff7df] disabled:cursor-not-allowed disabled:opacity-45"
         >
-          开始分析
+          {actionLabel}
         </button>
         <button
           type="button"
@@ -2138,7 +2146,10 @@ function SourceReferenceBuildPanel({
 }) {
   const [subjectBusy, setSubjectBusy] = useState(false)
   const [subjectAssetBusy, setSubjectAssetBusy] = useState<string | null>(null)
+  const [subjectMode, setSubjectMode] = useState<SubjectMode>("source_similar")
   const [subjectStyle, setSubjectStyle] = useState<SubjectStyleMode>("transparent_human")
+  const [subjectViewMode, setSubjectViewMode] = useState<SubjectViewMode>("all")
+  const [customSubjectViews, setCustomSubjectViews] = useState<string[]>(COMMON_SUBJECT_VIEW_VALUES)
   const [subjectDirection, setSubjectDirection] = useState("")
   const [characterLibrary, setCharacterLibrary] = useState<CharacterLibraryItem[]>([])
   const [selectedCharacterId, setSelectedCharacterId] = useState("")
@@ -2169,11 +2180,16 @@ function SourceReferenceBuildPanel({
     () => subjectTemplateLibrary.find((template) => template.id === selectedSubjectTemplateId) ?? null,
     [subjectTemplateLibrary, selectedSubjectTemplateId],
   )
-  const selectedTemplatePrompt = selectedSubjectTemplate
+  const selectedTemplatePrompt = subjectMode === "template" && selectedSubjectTemplate
     ? { name: selectedSubjectTemplate.name, sourceLabel: "数据库主体模板" }
-    : selectedCharacter
+    : subjectMode === "template" && selectedCharacter
       ? { name: selectedCharacter.name, sourceLabel: "内置策划形象" }
       : null
+  const selectedSubjectViews = useMemo(() => {
+    if (subjectViewMode === "common") return COMMON_SUBJECT_VIEW_VALUES
+    if (subjectViewMode === "custom") return customSubjectViews.length ? customSubjectViews : COMMON_SUBJECT_VIEW_VALUES
+    return SUBJECT_ASSET_VIEWS.map((view) => view.value)
+  }, [customSubjectViews, subjectViewMode])
   const visibleActorAssets = useMemo(() => {
     const latestByView = new Map<string, SubjectAsset>()
     for (const asset of actorAssets) {
@@ -2195,14 +2211,18 @@ function SourceReferenceBuildPanel({
       : "待抽帧"
   const templateSaveHint = visibleActorAssets.length
     ? templateDraftName.trim()
-      ? "保存后会进入左侧主体模板库，后续任务可直接复用"
+      ? "保存后会进入主体模板库，后续任务可直接复用"
       : "先给这套主体命名，再保存到主体模板库"
     : "先生成本次主体视图，再决定是否入库"
-  const templateSourceLabel = selectedSubjectTemplate
+  const templateSourceLabel = subjectMode === "template" && selectedSubjectTemplate
     ? `${selectedSubjectTemplate.name} · 数据库模板`
-    : selectedCharacter
+    : subjectMode === "template" && selectedCharacter
     ? `${selectedCharacter.name} · 模板参考`
     : "源视频关键帧 · 相似创新"
+  const templateRequired = subjectMode === "template" && !selectedSubjectTemplate && !selectedCharacter
+  const generationCtaLabel = subjectMode === "template"
+    ? `用模板生成 ${selectedSubjectViews.length} 张主体视图`
+    : `从源视频创新生成 ${selectedSubjectViews.length} 张主体视图`
 
   const loadSubjectTemplateLibrary = async (silent = false) => {
     setTemplateLibraryBusy(true)
@@ -2239,6 +2259,10 @@ function SourceReferenceBuildPanel({
       toast.warning("请先自动抽帧 12 张，或在原版视频上手动补帧。")
       return
     }
+    if (templateRequired) {
+      toast.warning("请先选择一个内置或数据库主体模板。")
+      return
+    }
     const baseFrame = subjectReferenceFrames[0]
     if (!baseFrame) return
     setSubjectBusy(true)
@@ -2271,14 +2295,14 @@ function SourceReferenceBuildPanel({
         background: "white",
         size: SUBJECT_ASSET_SIZE,
         source_frame_indices: subjectReferenceFrames.slice(0, 12).map((frame) => frame.index),
-        views: SUBJECT_ASSET_VIEWS.map((view) => view.value),
-        character_id: selectedCharacterId,
-        subject_template_id: selectedSubjectTemplateId,
+        views: selectedSubjectViews,
+        character_id: subjectMode === "template" ? selectedCharacterId : "",
+        subject_template_id: subjectMode === "template" ? selectedSubjectTemplateId : "",
         prompt: buildSimilarSubjectPrompt(subjectStyle, subjectDirection, selectedTemplatePrompt),
         replace_views: true,
       })
       onJobUpdate(updated)
-      toast.success("相似主体 10 张高清白底图已生成")
+      toast.success(`相似主体 ${selectedSubjectViews.length} 张高清白底图已生成`)
     } catch (e) {
       toast.error("相似主体重构失败：" + (e instanceof Error ? e.message : String(e)))
     } finally {
@@ -2301,8 +2325,8 @@ function SourceReferenceBuildPanel({
         size: SUBJECT_ASSET_SIZE,
         source_frame_indices: sourceIndices,
         views: [asset.view],
-        character_id: selectedCharacterId,
-        subject_template_id: selectedSubjectTemplateId,
+        character_id: subjectMode === "template" ? selectedCharacterId : "",
+        subject_template_id: subjectMode === "template" ? selectedSubjectTemplateId : "",
         prompt: buildSimilarSubjectPrompt(subjectStyle, subjectDirection, selectedTemplatePrompt),
         replace_views: true,
       })
@@ -2373,207 +2397,234 @@ function SourceReferenceBuildPanel({
         </div>
       </div>
       <div className="rounded-md border border-white/10 bg-black/32 p-2">
-        <div className="mb-2 grid gap-2 lg:grid-cols-[minmax(360px,1fr)_minmax(300px,0.8fr)]">
-          <div className="rounded-md border border-white/10 bg-black/28 p-2">
-            <div className="mb-1.5 flex items-center justify-between gap-2">
-              <div>
-                <div className="text-[10.5px] font-semibold text-white/70">主体模板库</div>
-                <div className="mt-0.5 text-[9px] text-white/32">数据库模板优先复用；内置形象只作为初始策划模板。</div>
-              </div>
-              <button
-                type="button"
-                onClick={() => void loadSubjectTemplateLibrary()}
-                disabled={templateLibraryBusy}
-                className="inline-flex h-6 items-center gap-1 rounded border border-emerald-200/20 bg-emerald-300/10 px-1.5 text-[9px] font-semibold text-emerald-100/80 transition hover:border-emerald-200/40 disabled:cursor-wait disabled:opacity-50"
-              >
-                {templateLibraryBusy ? <Loader2 className="h-3 w-3 animate-spin" /> : <RefreshCw className="h-3 w-3" />}
-                数据库 {subjectTemplateLibrary.length} 套
-              </button>
+        <div className="rounded-md border border-white/10 bg-black/28 p-2.5">
+          <div className="mb-2 flex flex-wrap items-start justify-between gap-2">
+            <div>
+              <div className="text-[11px] font-semibold text-white/72">主体模板库</div>
+              <div className="mt-0.5 text-[9.5px] text-white/34">先决定是否用模板，再选择内置或数据库主体；源视频相似不再混在模板网格里。</div>
             </div>
-            <div className="grid grid-cols-[repeat(auto-fill,minmax(86px,1fr))] gap-1.5">
+            <button
+              type="button"
+              onClick={() => void loadSubjectTemplateLibrary()}
+              disabled={templateLibraryBusy}
+              className="inline-flex h-7 items-center gap-1 rounded border border-white/10 bg-white/[0.045] px-2 text-[10px] font-semibold text-white/58 transition hover:border-cyan-300/35 hover:text-cyan-100 disabled:cursor-wait disabled:opacity-50"
+            >
+              {templateLibraryBusy ? <Loader2 className="h-3 w-3 animate-spin" /> : <RefreshCw className="h-3 w-3" />}
+              数据库 {subjectTemplateLibrary.length} 套
+            </button>
+          </div>
+
+          <div className="mb-2 grid gap-1.5 sm:grid-cols-2">
+            {[
+              { value: "template" as const, label: "用模板生成", desc: "从内置形象或数据库模板延展新主体" },
+              { value: "source_similar" as const, label: "不用模板（从源视频关键帧创新）", desc: "只读取源视频角色文字特征，不上传参考图做复制" },
+            ].map((item) => (
               <button
+                key={item.value}
                 type="button"
-                onClick={() => {
-                  setSelectedCharacterId("")
-                  setSelectedSubjectTemplateId("")
-                }}
-                className={`min-h-[58px] rounded-md border px-2 py-1.5 text-left transition ${
-                  !selectedCharacterId && !selectedSubjectTemplateId ? "border-cyan-200/55 bg-cyan-300/12 text-cyan-50" : "border-white/10 bg-black/25 text-white/45 hover:border-white/22 hover:text-white/70"
+                onClick={() => setSubjectMode(item.value)}
+                className={`flex min-h-[48px] items-start gap-2 rounded-md border px-2.5 py-2 text-left transition ${
+                  subjectMode === item.value
+                    ? "border-cyan-200/65 bg-cyan-300/12 text-cyan-50"
+                    : "border-white/10 bg-black/24 text-white/50 hover:border-cyan-200/30 hover:text-white/78"
                 }`}
               >
-                <span className="block text-[10.5px] font-semibold">源视频相似</span>
-                <span className="mt-1 block text-[9px] leading-tight opacity-70">根据关键帧创新</span>
+                <span className="mt-0.5 shrink-0">{subjectMode === item.value ? <Check className="h-3.5 w-3.5" /> : <Circle className="h-3.5 w-3.5" />}</span>
+                <span className="min-w-0">
+                  <span className="block text-[11px] font-semibold">{item.label}</span>
+                  <span className="mt-0.5 block text-[9.5px] leading-snug opacity-65">{item.desc}</span>
+                </span>
               </button>
+            ))}
+          </div>
+
+          <div className={`transition ${subjectMode === "source_similar" ? "pointer-events-none opacity-38 grayscale" : ""}`}>
+            <div className="grid grid-cols-[repeat(auto-fill,minmax(120px,1fr))] gap-2">
               {subjectTemplateLibrary.map((template) => {
                 const preview = characterPreviewImage(template)
-                const active = selectedSubjectTemplateId === template.id
+                const active = subjectMode === "template" && selectedSubjectTemplateId === template.id
                 return (
                   <button
                     key={template.id}
                     type="button"
                     onClick={() => {
+                      setSubjectMode("template")
                       setSelectedSubjectTemplateId(template.id)
                       setSelectedCharacterId("")
                       setSubjectStyle(template.subject_style || "transparent_human")
                     }}
-                    className={`group flex min-h-[58px] items-center gap-1.5 rounded-md border px-1.5 py-1 text-left transition ${
-                      active ? "border-cyan-200/65 bg-cyan-300/12 text-cyan-50" : "border-white/10 bg-black/25 text-white/50 hover:border-cyan-200/35 hover:text-white/80"
+                    className={`group relative rounded-md border p-1.5 text-left transition ${
+                      active ? "border-cyan-200/75 bg-cyan-300/12 text-cyan-50" : "border-white/10 bg-black/24 text-white/58 hover:border-cyan-200/35 hover:text-white/82"
                     }`}
                   >
-                    <span className="h-12 w-9 shrink-0 overflow-hidden rounded border border-white/10 bg-white">
+                    {active ? <span className="absolute right-2 top-2 z-10 rounded-full bg-cyan-200 p-0.5 text-black"><Check className="h-3 w-3" /></span> : null}
+                    <span className="block aspect-[4/5] overflow-hidden rounded border border-white/10 bg-white">
                       {preview ? <img src={subjectTemplateImageUrl(preview.filename)} alt={template.name} className="h-full w-full object-cover" /> : null}
                     </span>
-                    <span className="min-w-0">
-                      <span className="block truncate text-[10px] font-semibold">{template.name}</span>
-                      <span className="mt-0.5 block text-[8.5px] opacity-58">数据库 · {template.images.length} 图</span>
-                    </span>
+                    <span className="mt-1 block truncate text-[10.5px] font-semibold">{template.name}</span>
+                    <span className="mt-0.5 block truncate text-[9px] opacity-58">数据库 · {template.images.length} 图</span>
                   </button>
                 )
               })}
               {characterLibrary.map((character) => {
                 const preview = characterPreviewImage(character)
-                const active = selectedCharacterId === character.id
+                const active = subjectMode === "template" && selectedCharacterId === character.id
                 return (
                   <button
                     key={character.id}
                     type="button"
                     onClick={() => {
+                      setSubjectMode("template")
                       setSelectedCharacterId(character.id)
                       setSelectedSubjectTemplateId("")
                       setSubjectStyle("transparent_human")
                     }}
-                    className={`group flex min-h-[58px] items-center gap-1.5 rounded-md border px-1.5 py-1 text-left transition ${
-                      active ? "border-emerald-200/65 bg-emerald-300/12 text-emerald-50" : "border-white/10 bg-black/25 text-white/50 hover:border-emerald-200/35 hover:text-white/80"
+                    className={`group relative rounded-md border p-1.5 text-left transition ${
+                      active ? "border-cyan-200/75 bg-cyan-300/12 text-cyan-50" : "border-white/10 bg-black/24 text-white/58 hover:border-cyan-200/35 hover:text-white/82"
                     }`}
                   >
-                    <span className="h-12 w-9 shrink-0 overflow-hidden rounded border border-white/10 bg-white">
+                    {active ? <span className="absolute right-2 top-2 z-10 rounded-full bg-cyan-200 p-0.5 text-black"><Check className="h-3 w-3" /></span> : null}
+                    <span className="block aspect-[4/5] overflow-hidden rounded border border-white/10 bg-white">
                       {preview ? <img src={characterLibraryImageUrl(preview.filename)} alt={character.name} className="h-full w-full object-cover" /> : null}
                     </span>
-                    <span className="min-w-0">
-                      <span className="block truncate text-[10px] font-semibold">{character.name}</span>
-                      <span className="mt-0.5 block text-[8.5px] opacity-58">内置 · 7 图</span>
-                    </span>
+                    <span className="mt-1 block truncate text-[10.5px] font-semibold">{character.name}</span>
+                    <span className="mt-0.5 block truncate text-[9px] opacity-58">内置 · {character.images.length} 图</span>
                   </button>
                 )
               })}
             </div>
-            {!subjectTemplateLibrary.length ? (
-              <div className="mt-1.5 rounded border border-dashed border-white/10 px-2 py-1.5 text-[9px] leading-snug text-white/28">
-                数据库暂未保存主体。生成满意的相似主体后，在右侧命名并保存，后续会出现在这里。
-              </div>
-            ) : null}
-            {selectedSubjectTemplate?.images?.length ? (
-              <div className="mt-1.5 flex gap-1 overflow-x-auto pb-0.5">
-                {selectedSubjectTemplate.images.slice(0, 10).map((image) => (
-                  <div key={image.id} className="h-12 w-9 shrink-0 overflow-hidden rounded border border-white/10 bg-white" title={image.label}>
-                    <img src={subjectTemplateImageUrl(image.filename)} alt={image.label} className="h-full w-full object-cover" />
-                  </div>
-                ))}
-              </div>
-            ) : selectedCharacter?.images?.length ? (
-              <div className="mt-1.5 flex gap-1 overflow-x-auto pb-0.5">
-                {selectedCharacter.images.slice(0, 7).map((image) => (
-                  <div key={image.id} className="h-12 w-9 shrink-0 overflow-hidden rounded border border-white/10 bg-white" title={image.label}>
-                    <img src={characterLibraryImageUrl(image.filename)} alt={image.label} className="h-full w-full object-cover" />
-                  </div>
-                ))}
-              </div>
-            ) : null}
           </div>
 
-          <div className="rounded-md border border-white/10 bg-black/28 p-2">
-            <div className="mb-1.5 flex flex-wrap items-start justify-between gap-2">
-              <div>
-                <div className="flex items-center gap-2 text-[10.5px] font-semibold text-white/70">
-                  <span>本次生成 / 入库草稿</span>
-                  <ModelTrace trace={similarSubjectModelTrace(runtimeModels, subjectStyle)} compact />
+          {subjectMode === "template" && (selectedSubjectTemplate?.images?.length || selectedCharacter?.images?.length) ? (
+            <div className="mt-2 flex gap-1.5 overflow-x-auto pb-0.5">
+              {(selectedSubjectTemplate?.images ?? selectedCharacter?.images ?? []).slice(0, 10).map((image) => (
+                <div key={image.id} className="h-16 w-12 shrink-0 overflow-hidden rounded border border-white/10 bg-white" title={image.label}>
+                  <img
+                    src={selectedSubjectTemplate ? subjectTemplateImageUrl(image.filename) : characterLibraryImageUrl(image.filename)}
+                    alt={image.label}
+                    className="h-full w-full object-cover"
+                  />
                 </div>
-                <div className="mt-0.5 text-[9px] text-white/32">{templateSourceLabel} · {visibleActorAssets.length}/{SUBJECT_ASSET_VIEWS.length} 张</div>
-              </div>
-              <span className={`rounded border px-1.5 py-0.5 text-[9px] font-semibold ${
-                visibleActorAssets.length ? "border-emerald-200/25 bg-emerald-300/10 text-emerald-100/80" : "border-white/10 bg-white/5 text-white/36"
-              }`}>
-                {visibleActorAssets.length ? "可命名待入库" : "未生成"}
-              </span>
-            </div>
-            <div className="grid gap-1.5">
-              <input
-                value={templateDraftName}
-                onChange={(event) => setTemplateDraftName(event.target.value)}
-                placeholder="模板命名：如透明骨架女性 01"
-                className="h-7 rounded-md border border-white/10 bg-black/35 px-2 text-[10.5px] text-white outline-none placeholder:text-white/28 focus:border-cyan-300/50"
-              />
-              <textarea
-                value={templateDraftNote}
-                onChange={(event) => setTemplateDraftNote(event.target.value)}
-                placeholder="备注：适合什么广告、人物年龄/性别/材质、禁用点"
-                className="min-h-[46px] resize-none rounded-md border border-white/10 bg-black/35 px-2 py-1.5 text-[10.5px] leading-snug text-white outline-none placeholder:text-white/28 focus:border-cyan-300/50"
-              />
-              <div className="flex items-center justify-between gap-2">
-                <span className="min-w-0 text-[9px] leading-snug text-white/32">{templateSaveHint}</span>
-                <button
-                  type="button"
-                  onClick={() => void saveGeneratedSubjectTemplate()}
-                  disabled={!visibleActorAssets.length || !templateDraftName.trim() || templateSaveBusy}
-                  title={!visibleActorAssets.length ? "先生成主体视图" : !templateDraftName.trim() ? "先填写模板名称" : "保存到主体模板库"}
-                  className="inline-flex h-7 shrink-0 items-center justify-center gap-1 rounded-md border border-emerald-200/25 bg-emerald-300/12 px-2 text-[10px] font-semibold text-emerald-50 transition hover:border-emerald-200/45 hover:bg-emerald-300/18 disabled:cursor-not-allowed disabled:border-white/10 disabled:bg-white/6 disabled:text-white/32"
-                >
-                  {templateSaveBusy ? <Loader2 className="h-3.5 w-3.5 animate-spin" /> : <Plus className="h-3.5 w-3.5" />}
-                  保存到主体库
-                </button>
-              </div>
+              ))}
             </div>
+          ) : null}
+
+          <div className="my-2 h-px bg-white/10" />
+          <div className="grid gap-2 lg:grid-cols-[1fr_1.6fr_auto]">
+            <input
+              value={templateDraftName}
+              onChange={(event) => setTemplateDraftName(event.target.value)}
+              placeholder={visibleActorAssets.length ? "模板名称" : "生成主体视图后可命名保存"}
+              className="h-8 rounded-md border border-white/10 bg-black/35 px-2 text-[10.5px] text-white outline-none placeholder:text-white/28 focus:border-cyan-300/50"
+            />
+            <input
+              value={templateDraftNote}
+              onChange={(event) => setTemplateDraftNote(event.target.value)}
+              placeholder="保存为主体模板备注：适用广告、人物风格、禁用点"
+              className="h-8 rounded-md border border-white/10 bg-black/35 px-2 text-[10.5px] text-white outline-none placeholder:text-white/28 focus:border-cyan-300/50"
+            />
+            <button
+              type="button"
+              onClick={() => void saveGeneratedSubjectTemplate()}
+              disabled={!visibleActorAssets.length || !templateDraftName.trim() || templateSaveBusy}
+              title={!visibleActorAssets.length ? "先生成主体视图" : !templateDraftName.trim() ? "先填写模板名称" : "保存到主体模板库"}
+              className="inline-flex h-8 items-center justify-center gap-1 rounded-md border border-white/10 bg-white/[0.055] px-3 text-[10.5px] font-semibold text-white/62 transition hover:border-cyan-300/35 hover:text-cyan-100 disabled:cursor-not-allowed disabled:opacity-35"
+            >
+              {templateSaveBusy ? <Loader2 className="h-3.5 w-3.5 animate-spin" /> : <Plus className="h-3.5 w-3.5" />}
+              保存到主体库
+            </button>
           </div>
+          <div className="mt-1 text-[9px] text-white/32">{templateSaveHint}</div>
         </div>
 
-        <div>
-          <div className="mb-1.5 flex flex-wrap items-center justify-between gap-2 text-[10px] text-white/36">
-            <div className="flex items-center gap-2">
-              <span>主体生成设置</span>
-              <span className="text-white/28">{referenceCountLabel}</span>
+        <div className="mt-2 rounded-md border border-white/10 bg-black/28 p-2.5">
+          <div className="mb-2 flex flex-wrap items-center justify-between gap-2">
+            <div>
+              <div className="text-[11px] font-semibold text-white/72">生成主体视图</div>
+              <div className="mt-0.5 text-[9.5px] text-white/34">{templateSourceLabel}</div>
             </div>
-            <span className="text-[10px] text-white/32">模板只做创意参考；生成后人工确认，再决定是否入库复用。</span>
+            <ModelTrace trace={similarSubjectModelTrace(runtimeModels, subjectStyle)} compact />
           </div>
 
-          <div className="mb-1.5 flex flex-wrap items-center justify-end gap-2 text-[10px] text-white/36">
-            <div className="flex min-w-0 flex-wrap items-center justify-end gap-2">
-              <div className="flex rounded-md border border-white/10 bg-black/28 p-0.5">
-                {[
-                  { value: "transparent_human" as const, label: "透明骨架" },
-                  { value: "source_actor" as const, label: "普通真人" },
-                ].map((item) => (
+          <div className="grid gap-2 xl:grid-cols-[auto_auto_minmax(220px,1fr)_auto] xl:items-start">
+            <div className="flex rounded-md border border-white/10 bg-black/28 p-0.5">
+              {[
+                { value: "transparent_human" as const, label: "透明骨架" },
+                { value: "source_actor" as const, label: "真人" },
+              ].map((item) => (
+                <button
+                  key={item.value}
+                  type="button"
+                  onClick={() => setSubjectStyle(item.value)}
+                  className={`h-8 rounded px-2.5 text-[10.5px] font-semibold transition ${
+                    subjectStyle === item.value ? "bg-white text-black" : "text-white/45 hover:text-white"
+                  }`}
+                >
+                  {item.label}
+                </button>
+              ))}
+            </div>
+            <div className="flex flex-wrap rounded-md border border-white/10 bg-black/28 p-0.5">
+              {[
+                { value: "all" as const, label: `全部 ${SUBJECT_ASSET_VIEWS.length}` },
+                { value: "common" as const, label: `常用 ${COMMON_SUBJECT_VIEW_VALUES.length}` },
+                { value: "custom" as const, label: "自定义" },
+              ].map((item) => (
+                <button
+                  key={item.value}
+                  type="button"
+                  onClick={() => setSubjectViewMode(item.value)}
+                  className={`h-8 rounded px-2.5 text-[10.5px] font-semibold transition ${
+                    subjectViewMode === item.value ? "bg-white text-black" : "text-white/45 hover:text-white"
+                  }`}
+                >
+                  {item.label}
+                </button>
+              ))}
+            </div>
+            <input
+              value={subjectDirection}
+              onChange={(event) => setSubjectDirection(event.target.value)}
+              placeholder="统一方向：如年轻女性 / 更运动 / 更高级"
+              className="h-9 rounded-md border border-white/10 bg-black/35 px-2.5 text-[11px] text-white outline-none placeholder:text-white/28 focus:border-cyan-300/50"
+            />
+            <button
+              type="button"
+              onClick={() => void generateSimilarActor()}
+              disabled={!frames.length || subjectBusy || templateRequired || !selectedSubjectViews.length}
+              className="inline-flex h-9 min-w-[170px] items-center justify-center gap-1 rounded-md bg-white px-3 text-[11px] font-semibold text-black transition hover:bg-white/90 disabled:cursor-not-allowed disabled:opacity-40"
+            >
+              {subjectBusy ? <Loader2 className="h-3.5 w-3.5 animate-spin" /> : <Sparkles className="h-3.5 w-3.5" />}
+              {generationCtaLabel}
+            </button>
+          </div>
+
+          {subjectViewMode === "custom" ? (
+            <div className="mt-2 flex flex-wrap gap-1.5">
+              {SUBJECT_ASSET_VIEWS.map((view) => {
+                const checked = customSubjectViews.includes(view.value)
+                return (
                   <button
-                    key={item.value}
+                    key={view.value}
                     type="button"
-                    onClick={() => setSubjectStyle(item.value)}
-                    className={`h-6 rounded px-2 text-[10px] font-semibold transition ${
-                      subjectStyle === item.value ? "bg-white text-black" : "text-white/45 hover:text-white"
+                    onClick={() => setCustomSubjectViews((current) =>
+                      current.includes(view.value)
+                        ? current.filter((item) => item !== view.value)
+                        : [...current, view.value],
+                    )}
+                    className={`h-7 rounded-md border px-2 text-[10px] font-semibold transition ${
+                      checked ? "border-cyan-200/60 bg-cyan-300/12 text-cyan-50" : "border-white/10 bg-black/24 text-white/45 hover:border-cyan-200/28 hover:text-white/75"
                     }`}
                   >
-                    {item.label}
+                    {checked ? "✓ " : ""}{view.label}
                   </button>
-                  ))}
-              </div>
-              <input
-                value={subjectDirection}
-                onChange={(event) => setSubjectDirection(event.target.value)}
-                placeholder="统一方向：如年轻女性 / 更运动 / 更高级"
-                className="h-7 w-[240px] min-w-[180px] rounded-md border border-white/10 bg-black/35 px-2 text-[10.5px] text-white outline-none placeholder:text-white/28 focus:border-cyan-300/50"
-              />
-              <span>{visibleActorAssets.length}/{SUBJECT_ASSET_VIEWS.length}</span>
-              <button
-                type="button"
-                onClick={() => void generateSimilarActor()}
-                disabled={!frames.length || subjectBusy}
-                className="inline-flex h-7 items-center justify-center gap-1 rounded-md bg-white px-2 text-[10.5px] font-semibold text-black transition hover:bg-white/90 disabled:cursor-not-allowed disabled:opacity-40"
-              >
-                {subjectBusy ? <Loader2 className="h-3.5 w-3.5 animate-spin" /> : <Sparkles className="h-3.5 w-3.5" />}
-                生成 10 张高清图
-              </button>
+                )
+              })}
             </div>
-          </div>
+          ) : null}
+
           {visibleActorAssets.length ? (
-            <div className="flex flex-wrap gap-1.5">
+            <div className="mt-2 grid grid-cols-[repeat(auto-fill,minmax(96px,1fr))] gap-2">
               {visibleActorAssets.map((asset) => {
                 const busyMode = subjectAssetBusy?.endsWith(asset.id) ? subjectAssetBusy.split(":")[0] : ""
                 return (
@@ -2584,7 +2635,7 @@ function SourceReferenceBuildPanel({
                     alt={asset.label || asset.view}
                     label={asset.label || asset.view || "主体视图预览"}
                     meta={asset.width && asset.height ? `${asset.width}x${asset.height}` : undefined}
-                    className="aspect-[9/16] w-12 bg-white 2xl:w-14"
+                    className="aspect-[9/16] w-20 bg-white 2xl:w-24"
                     objectFit="contain"
                     title={asset.label || asset.view}
                     actions={[{
@@ -2605,8 +2656,10 @@ function SourceReferenceBuildPanel({
               })}
             </div>
           ) : (
-            <div className="rounded border border-dashed border-white/12 px-2 py-2 text-[10.5px] leading-snug text-white/32">
-              可直接用全部关键帧生成；勾选关键帧后会只用已选帧。选择内置形象后，会围绕同一个统一主体生成全身多视角和肩颈/背部特写。
+            <div className="mt-2 rounded border border-dashed border-white/12 px-2 py-2 text-[10.5px] leading-snug text-white/32">
+              {subjectMode === "template"
+                ? "先选主体模板，再生成新主体视图；模板只作为文字化创意方向，不再作为强参考图复制。"
+                : "直接使用关键帧的文字化主体特征生成创新主体；后端不会上传源图给生图端点。"}
             </div>
           )}
         </div>
@@ -4299,6 +4352,7 @@ function MaterialCard({
   onDelete?: () => void
 }) {
   const tone = statusTone(job)
+  const errorText = formatJobError(job.error)
   return (
     <button
       type="button"
@@ -4320,6 +4374,12 @@ function MaterialCard({
         <Metric label="文案" value={job.audio_script?.source_text || job.transcript.length ? "ready" : "-"} compact />
         <Metric label="段落" value={`${job.transcript.length}`} compact />
       </div>
+      {job.status === "failed" && errorText && (
+        <div className="mt-2 flex gap-1.5 rounded-md border border-rose-300/18 bg-rose-500/[0.08] px-2 py-1.5 text-[11px] leading-snug text-rose-100/82">
+          <AlertTriangle className="mt-0.5 h-3.5 w-3.5 shrink-0" />
+          <span className="line-clamp-3">{errorText}</span>
+        </div>
+      )}
       {onDelete && (
         <span
           role="button"
diff --git a/web/components/dashboard.tsx b/web/components/dashboard.tsx
index ea6d0ff..792f4d1 100644
--- a/web/components/dashboard.tsx
+++ b/web/components/dashboard.tsx
@@ -641,15 +641,15 @@ export const Dashboard = forwardRef<DashboardHandle, Props>(function Dashboard({
                   </div>
                 </KanbanCard>
 
-                <KanbanCard tone="green" tags={["配音"]} title={job?.audio_script?.voice_model || "MiniMax T2A"}>
+                <KanbanCard tone="green" tags={["配音"]} title={job?.audio_script?.voice_model || "Azure OpenAI TTS"}>
                   {job?.audio_script?.voice_url ? (
                     <audio controls className="h-8 w-full" src={apiAssetUrl(job.audio_script.voice_url)} />
                   ) : (
                     <div className="text-[11px] text-[var(--text-soft)]">
-                      {job?.audio_script?.error || "配置 MiniMax 后自动生成配音文件"}
+                      {job?.audio_script?.error || "配置 Azure OpenAI TTS 后自动生成配音文件"}
                     </div>
                   )}
-                  <div className="kanban-meta">{job?.audio_script?.voice_id || "random English voice"}</div>
+                  <div className="kanban-meta">{job?.audio_script?.voice_id || "Azure voice"}</div>
                 </KanbanCard>
               </>
             )}
diff --git a/web/components/nodes/index.tsx b/web/components/nodes/index.tsx
index 9245cea..295db24 100644
--- a/web/components/nodes/index.tsx
+++ b/web/components/nodes/index.tsx
@@ -2102,7 +2102,7 @@ export function RewriteNode({ data, selected }: any) {
 }
 
 /* ============================================================
-   5b. AudioNode — 合并 ASR + 翻译 + 改写 + MiniMax 配音
+   5b. AudioNode — 合并 ASR + 翻译 + 改写 + Azure OpenAI 配音
    ============================================================ */
 export function AudioNode({ data, selected }: any) {
   const d: NodeData = data
@@ -2152,9 +2152,9 @@ export function AudioNode({ data, selected }: any) {
         }}
       >
         <div>
-          音轨 → 取时长/节奏 → SKG 英文产品口播 → MiniMax 随机英文配音<br />
+          音轨 → 取时长/节奏 → SKG 英文产品口播 → Azure OpenAI 英文配音<br />
           <span className="text-[var(--text-faint)] font-mono">
-            {audioScript?.rewrite_model || "AUDIO_REWRITE_MODEL"} → {audioScript?.voice_model || "MiniMax T2A"}
+            {audioScript?.rewrite_model || "AUDIO_REWRITE_MODEL"} → {audioScript?.voice_model || "Azure OpenAI TTS"}
           </span>
         </div>
         {job && (
@@ -2195,7 +2195,7 @@ export function AudioNode({ data, selected }: any) {
             )}
           </div>
         )}
-        {voiceUrl && <div className="text-[10.5px] text-emerald-200/85">MiniMax natural English voice ready · 底部音频条播放</div>}
+        {voiceUrl && <div className="text-[10.5px] text-emerald-200/85">Azure OpenAI English voice ready · 底部音频条播放</div>}
         {isRewriting && (
           <div className="text-[10.5px] text-[var(--text-faint)]">正在按原音频时长生成英文产品口播和配音…</div>
         )}
diff --git a/web/lib/api.ts b/web/lib/api.ts
index 531bf51..d2386c9 100644
--- a/web/lib/api.ts
+++ b/web/lib/api.ts
@@ -172,10 +172,7 @@ export interface RuntimeModels {
   voice_id?: string
   voice_pool?: string[]
   voice_configured?: boolean
-  minimax_tts?: string
-  minimax_voice?: string
-  minimax_voice_pool?: string[]
-  minimax_configured?: boolean
+  voice_tts_paths?: string[]
   video?: string
   video_aliases?: Record<string, string>
   video_provider?: string
@@ -559,6 +556,7 @@ export interface CharacterLibraryItem {
   name: string
   folder: string
   description: string
+  prompt_brief?: string
   primary_image: string
   images: CharacterLibraryImage[]
 }
@@ -584,6 +582,7 @@ export interface SubjectTemplateItem {
   name: string
   description: string
   note: string
+  prompt_brief?: string
   source: "database"
   source_job_id: string
   source_frame_idx: number
@@ -676,9 +675,7 @@ export interface BackendHealth {
     translate?: string
     rewrite?: string
     audio_rewrite?: string
-    minimax_tts?: string
-    minimax_voice?: string
-    minimax_configured?: boolean
+    voice_tts_paths?: string[]
     video?: string
     video_aliases?: Record<string, string>
     video_base_url?: string
@@ -692,6 +689,25 @@ export function apiAssetUrl(path?: string | null): string {
   return `${API_BASE}${path.startsWith("/") ? "" : "/"}${path}`
 }
 
+export function isRestrictedDownloadError(error?: string | null): boolean {
+  const text = (error ?? "").toLowerCase()
+  return (
+    text.includes("tiktok 下载需要登录态") ||
+    text.includes("log in for access") ||
+    text.includes("cookies-from-browser") ||
+    text.includes("ytdlp_cookies_file") ||
+    (text.includes("tiktok") && text.includes("cookies"))
+  )
+}
+
+export function formatJobError(error?: string | null): string {
+  if (!error) return ""
+  if (isRestrictedDownloadError(error)) {
+    return "这个 TikTok 视频需要登录态。请上传 MP4，或让后端配置 YTDLP_COOKIES_FROM_BROWSER / YTDLP_COOKIES_FILE 后重试。"
+  }
+  return error
+}
+
 export async function getHealth(): Promise<BackendHealth> {
   const res = await fetch(`${API_BASE}/health`)
   if (!res.ok) throw new Error(`health ${res.status}`)
@@ -708,6 +724,15 @@ export async function createJob(tkUrl: string): Promise<Job> {
   return res.json()
 }
 
+export async function retryJobDownload(id: string): Promise<Job> {
+  const res = await fetch(`${API_BASE}/jobs/${id}/download/retry`, { method: "POST" })
+  if (!res.ok) {
+    const text = await res.text().catch(() => "")
+    throw apiError("retryJobDownload", res.status, text)
+  }
+  return res.json()
+}
+
 export async function uploadJob(file: File): Promise<Job> {
   const fd = new FormData()
   fd.append("file", file)